Parsing RSS with RSSLite
# simple script that parses an rss file using XML::RSSLite module
use strict;
use IO::File;
use LWP::Simple;
use HTML::Entities;
use XML::RSSLite;
my $lewrss = "http://someurl.com/file.rss";
my $xml = get($lewrss);
die "Could not retrieve $lewrss" unless $xml;
# print $xml;
# exit;
if ( length($xml) < 1000 ) {
exit 1;
}
$xml =~ s|.*?||gs;
$xml =~ s|.*?||gs;
my %result;
parseRSS(\%result, \$xml);
my $website = $result{'title'};
my $prevdate = "";
foreach my $item (@{$result{'item'}}) {
my $link = $item->{'link'};
my $domain;
if ( $link =~ m|http://(.*?)/| ) {
$domain = $1;
$domain =~ s|^www\.||;
}
my $title = $item->{'title'};
my $date;
if ( exists($item->{'pubDate'}) ) {
$date = $item->{'pubDate'};
} elsif ( exists($item->{'dc:date'}) ) {
$date = $item->{'dc:date'};
}
$date = substr($date, 0, 16);
if ( $date ne $prevdate ) {
print "
$date
\n";
$prevdate = $date;
}
print "$title - $domain
\n";
}
exit 0;
code=yes