Perl regex extracting domain name from URL code example code.#!/usr/bin/perl -wT use strict; use Data::Dumper; # code from: # (http://www.willmaster.com/blog/perl/extracting-domain-name-from-url.php) # test links taken from the front page of news.ycombinator.com on 2Oct2013 my @urls = ( "http://www.reuters.com/article/2013/10/02/crime-silkroad-raid-idUSL1N0HS12C20131002", "http://allthingsd.com/20131002/u-s-opposes-tech-companies-requests-to-disclose-surveillance/", "http://krebsonsecurity.com/wp-content/uploads/2013/10/UlbrichtCriminalComplaint.pdf", "http://customerdevlabs.com/2013/09/24/google-news-api-mturk-press/#", "http://www.postgresqlstudio.org/", "http://www.uscourts.gov/uscourts/courts/fisc/motion-declaratory-judgement-131002.pdf", "http://www.slate.com/blogs/future_tense/2013/10/02/silk_road_s_dread_pirate_ross_ulbricht_asked_stack_overflow_question_under.html", "http://www.nytimes.com/interactive/2013/10/01/dining/nacho-graphic.html/#", "http://blog.parse.com/2013/10/02/parse-developer-day-video-series-how-to-design-great-apis/", "http://www.phonearena.com/news/Nexus-5-to-be-first-smartphone-with-MEMS-camera-fastest-on-a-phone-Lytro-like-functionality_id47900", "http://www.airpair.com/pair-programming/our-future", "http://blogs.computerworlduk.com/open-enterprise/2013/10/richard-stallman-on-the-painful-birth-of-gnu/index.htm", "https://groups.google.com/forum/#!topic/mechanical-sympathy/1TMjVjyyMmA", "http://raml.org/", "http://gemal.dk/blog/2013/10/02/html5_flash_player_shumway_landed", "http://hover.ie/", "http://simberon.blogspot.nl/2013/01/swimming-with-fish.html", "http://techcrunch.com/2013/10/02/cardflight-1-6-million/", "http://www.phoronix.com/scan.php?page=news_item&px=MTQ3NDU", "http://www.forbes.com/sites/andygreenberg/2013/10/02/end-of-the-silk-road-fbi-busts-the-webs-biggest-anonymous-drug-black-market/", "http://www.fsf.org/blogs/community/global-celebration-for-the-gnu-systems-30th-anniversary", "http://google-latlong.blogspot.ch/2013/09/street-view-arrives-at-cern.html", "https://www.neckbeardrepublic.com/screencasts/pandas-vincent", "http://www.listia.com/jobs", "http://www.slate.com/articles/health_and_science/science/2013/09/children_s_language_development_talk_and_listen_to_them_from_birth.html", "http://pauv.org/paper.pdf", "http://www.valdyas.org/fading/index.cgi/kde/krita_10_years.html", "http://techcrunch.com/2013/10/02/monsanto-acquires-weather-big-data-company-climate-corporation-for-930m/", "http://mariocaropreso.com/post/62811446044/nonlinearities-and-success" ); # print Dumper(@urls); foreach (@urls) { my $url = $_; print $url . "\n"; $url =~ s!^https?://(?:www\.)?!!i; $url =~ s!/.*!!; $url =~ s/[\?\#\:].*//; print $url . "\n\n"; } code.. output: http://www.reuters.com/article/2013/10/02/crime-silkroad-raid-idUSL1N0HS12C20131002 reuters.com http://allthingsd.com/20131002/u-s-opposes-tech-companies-requests-to-disclose-surveillance/ allthingsd.com http://krebsonsecurity.com/wp-content/uploads/2013/10/UlbrichtCriminalComplaint.pdf krebsonsecurity.com http://customerdevlabs.com/2013/09/24/google-news-api-mturk-press/# customerdevlabs.com http://www.postgresqlstudio.org/ postgresqlstudio.org http://www.uscourts.gov/uscourts/courts/fisc/motion-declaratory-judgement-131002.pdf uscourts.gov http://www.slate.com/blogs/future_tense/2013/10/02/silk_road_s_dread_pirate_ross_ulbricht_asked_stack_overflow_question_under.html slate.com http://www.nytimes.com/interactive/2013/10/01/dining/nacho-graphic.html/# nytimes.com http://blog.parse.com/2013/10/02/parse-developer-day-video-series-how-to-design-great-apis/ blog.parse.com http://www.phonearena.com/news/Nexus-5-to-be-first-smartphone-with-MEMS-camera-fastest-on-a-phone-Lytro-like-functionality_id47900 phonearena.com http://www.airpair.com/pair-programming/our-future airpair.com http://blogs.computerworlduk.com/open-enterprise/2013/10/richard-stallman-on-the-painful-birth-of-gnu/index.htm blogs.computerworlduk.com https://groups.google.com/forum/#!topic/mechanical-sympathy/1TMjVjyyMmA groups.google.com http://raml.org/ raml.org http://gemal.dk/blog/2013/10/02/html5_flash_player_shumway_landed gemal.dk http://hover.ie/ hover.ie http://simberon.blogspot.nl/2013/01/swimming-with-fish.html simberon.blogspot.nl http://techcrunch.com/2013/10/02/cardflight-1-6-million/ techcrunch.com http://www.phoronix.com/scan.php?page=news_item&px=MTQ3NDU phoronix.com http://www.forbes.com/sites/andygreenberg/2013/10/02/end-of-the-silk-road-fbi-busts-the-webs-biggest-anonymous-drug-black-market/ forbes.com http://www.fsf.org/blogs/community/global-celebration-for-the-gnu-systems-30th-anniversary fsf.org http://google-latlong.blogspot.ch/2013/09/street-view-arrives-at-cern.html google-latlong.blogspot.ch https://www.neckbeardrepublic.com/screencasts/pandas-vincent neckbeardrepublic.com http://www.listia.com/jobs listia.com http://www.slate.com/articles/health_and_science/science/2013/09/children_s_language_development_talk_and_listen_to_them_from_birth.html slate.com http://pauv.org/paper.pdf pauv.org http://www.valdyas.org/fading/index.cgi/kde/krita_10_years.html valdyas.org http://techcrunch.com/2013/10/02/monsanto-acquires-weather-big-data-company-climate-corporation-for-930m/ techcrunch.com http://mariocaropreso.com/post/62811446044/nonlinearities-and-success mariocaropreso.com #perl - #programming - #regex