X-Git-Url: http://git.vanrenterghem.biz/git.ikiwiki.info.git/blobdiff_plain/dae0f48e91304afcb6ebe0936360e51b22a56548..04a9dbfe7daa9c352ae4e9af17df8134248f3806:/IkiWiki/Plugin/aggregate.pm
diff --git a/IkiWiki/Plugin/aggregate.pm b/IkiWiki/Plugin/aggregate.pm
index ac55dcb1d..2a3f36fce 100644
--- a/IkiWiki/Plugin/aggregate.pm
+++ b/IkiWiki/Plugin/aggregate.pm
@@ -9,6 +9,7 @@ use HTML::Entities;
use HTML::Parser;
use HTML::Tagset;
use URI;
+use open qw{:utf8 :std};
my %feeds;
my %guids;
@@ -24,6 +25,7 @@ sub import { #{{{
sub getopt () { #{{{
eval q{use Getopt::Long};
+ error($@) if $@;
Getopt::Long::Configure('pass_through');
GetOptions("aggregate" => \$config{aggregate});
} #}}}
@@ -34,6 +36,7 @@ sub checkconfig () { #{{{
if ($config{aggregate}) {
IkiWiki::loadindex();
aggregate();
+ expire();
savestate();
}
IkiWiki::unlockwiki();
@@ -79,10 +82,12 @@ sub preprocess (@) { #{{{
$feed->{expireage}=defined $params{expireage} ? $params{expireage} : 0;
$feed->{expirecount}=defined $params{expirecount} ? $params{expirecount} : 0;
delete $feed->{remove};
+ delete $feed->{expired};
$feed->{lastupdate}=0 unless defined $feed->{lastupdate};
$feed->{numposts}=0 unless defined $feed->{numposts};
$feed->{newposts}=0 unless defined $feed->{newposts};
$feed->{message}="new feed" unless defined $feed->{message};
+ $feed->{error}=0 unless defined $feed->{error};
$feed->{tags}=[];
while (@_) {
my $key=shift;
@@ -93,7 +98,9 @@ sub preprocess (@) { #{{{
}
return "{url}."\">".$feed->{name}.": ".
- "".$feed->{message}." (".$feed->{numposts}." posts".
+ ($feed->{error} ? "" : "").$feed->{message}.
+ ($feed->{error} ? "" : "").
+ " (".$feed->{numposts}." posts".
($feed->{newposts} ? "; ".$feed->{newposts}." new" : "").
")";
} # }}}
@@ -144,7 +151,7 @@ sub loadstate () { #{{{
sub savestate () { #{{{
eval q{use HTML::Entities};
- die $@ if $@;
+ error($@) if $@;
open (OUT, ">$config{wikistatedir}/aggregate" ||
die "$config{wikistatedir}/aggregate: $!");
foreach my $data (values %feeds, values %guids) {
@@ -161,6 +168,11 @@ sub savestate () { #{{{
}
next;
}
+ elsif ($data->{expired} && exists $data->{page}) {
+ unlink pagefile($data->{page});
+ delete $data->{page};
+ delete $data->{md5};
+ }
my @line;
foreach my $field (keys %$data) {
@@ -180,11 +192,37 @@ sub savestate () { #{{{
close OUT;
} #}}}
+sub expire () { #{{{
+ foreach my $feed (values %feeds) {
+ next unless $feed->{expireage} || $feed->{expirecount};
+ my $count=0;
+ foreach my $item (sort { $IkiWiki::pagectime{$b->{page}} <=> $IkiWiki::pagectime{$a->{page}} }
+ grep { exists $_->{page} && $_->{feed} eq $feed->{name} && $IkiWiki::pagectime{$_->{page}} }
+ values %guids) {
+ if ($feed->{expireage}) {
+ my $days_old = (time - $IkiWiki::pagectime{$item->{page}}) / 60 / 60 / 24;
+ if ($days_old > $feed->{expireage}) {
+ debug("expiring ".$item->{page}." ($days_old days old)");
+ $item->{expired}=1;
+ }
+ }
+ elsif ($feed->{expirecount} &&
+ $count >= $feed->{expirecount}) {
+ debug("expiring ".$item->{page});
+ $item->{expired}=1;
+ }
+ else {
+ $count++;
+ }
+ }
+ }
+} #}}}
+
sub aggregate () { #{{{
eval q{use XML::Feed};
- die $@ if $@;
+ error($@) if $@;
eval q{use HTML::Entities};
- die $@ if $@;
+ error($@) if $@;
foreach my $feed (values %feeds) {
next unless $config{rebuild} ||
@@ -199,6 +237,7 @@ sub aggregate () { #{{{
my @urls=XML::Feed->find_feeds($feed->{url});
if (! @urls) {
$feed->{message}="could not find feed at ".$feed->{feedurl};
+ $feed->{error}=1;
debug($feed->{message});
next;
}
@@ -207,11 +246,13 @@ sub aggregate () { #{{{
my $f=eval{XML::Feed->parse(URI->new($feed->{feedurl}))};
if ($@) {
$feed->{message}="feed crashed XML::Feed! $@";
+ $feed->{error}=1;
debug($feed->{message});
next;
}
if (! $f) {
$feed->{message}=XML::Feed->errstr;
+ $feed->{error}=1;
debug($feed->{message});
next;
}
@@ -229,9 +270,8 @@ sub aggregate () { #{{{
$feed->{message}="processed ok at ".
displaytime($feed->{lastupdate});
+ $feed->{error}=0;
}
-
- # TODO: expiry
} #}}}
sub add_page (@) { #{{{
@@ -243,6 +283,7 @@ sub add_page (@) { #{{{
if (exists $guids{$params{guid}}) {
# updating an existing post
$guid=$guids{$params{guid}};
+ return if $guid->{expired};
}
else {
# new post
@@ -276,6 +317,7 @@ sub add_page (@) { #{{{
# to avoid unneccessary rebuilding. The mtime from rss cannot be
# trusted; let's use a digest.
eval q{use Digest::MD5 'md5_hex'};
+ error($@) if $@;
require Encode;
my $digest=md5_hex(Encode::encode_utf8($params{content}));
return unless ! exists $guid->{md5} || $guid->{md5} ne $digest || $config{rebuild};