document that checkcontent behaves differently for comments
[git.ikiwiki.info.git] / IkiWiki / Plugin / aggregate.pm
index 419b40fdb115e221b18876ef11aa6252b6501f21..2069a352ede751eff1597da5d13e960401e5da48 100644 (file)
@@ -16,7 +16,8 @@ my %guids;
 sub import {
        hook(type => "getopt", id => "aggregate", call => \&getopt);
        hook(type => "getsetup", id => "aggregate", call => \&getsetup);
-       hook(type => "checkconfig", id => "aggregate", call => \&checkconfig);
+       hook(type => "checkconfig", id => "aggregate", call => \&checkconfig,
+               last => 1);
        hook(type => "needsbuild", id => "aggregate", call => \&needsbuild);
        hook(type => "preprocess", id => "aggregate", call => \&preprocess);
         hook(type => "delete", id => "aggregate", call => \&delete);
@@ -64,6 +65,8 @@ sub checkconfig () {
                $config{aggregateinternal}=1;
        }
 
+       # This is done here rather than in a refresh hook because it
+       # needs to run before the wiki is locked.
        if ($config{aggregate} && ! ($config{post_commit} && 
                                     IkiWiki::commit_hook_enabled())) {
                launchaggregation();
@@ -101,8 +104,7 @@ sub launchaggregation () {
        my @feeds=needsaggregate();
        return unless @feeds;
        if (! lockaggregate()) {
-               debug("an aggregation process is already running");
-               return;
+               error("an aggregation process is already running");
        }
        # force a later rebuild of source pages
        $IkiWiki::forcerebuild{$_->{sourcepage}}=1
@@ -189,7 +191,7 @@ sub migrate_to_internal {
                if (-e $oldoutput) {
                        require IkiWiki::Render;
                        debug("removing output file $oldoutput");
-                       IkiWiki::prune($oldoutput);
+                       IkiWiki::prune($oldoutput, $config{destdir});
                }
        }
        
@@ -486,6 +488,7 @@ sub needsaggregate () {
 }
 
 sub aggregate (@) {
+       eval q{use Net::INET6Glue::INET_is_INET6}; # may not be available
        eval q{use XML::Feed};
        error($@) if $@;
        eval q{use URI::Fetch};
@@ -510,7 +513,11 @@ sub aggregate (@) {
                        }
                        $feed->{feedurl}=pop @urls;
                }
-               my $res=URI::Fetch->fetch($feed->{feedurl});
+               # Using the for_url parameter makes sure we crash if used
+               # with an older IkiWiki.pm that didn't automatically try
+               # to use LWPx::ParanoidAgent.
+               my $ua=useragent(for_url => $feed->{feedurl});
+               my $res=URI::Fetch->fetch($feed->{feedurl}, UserAgent=>$ua);
                if (! $res) {
                        $feed->{message}=URI::Fetch->errstr;
                        $feed->{error}=1;
@@ -528,6 +535,14 @@ sub aggregate (@) {
                        next;
                }
                my $content=$res->content;
+
+               # This is a hack to support the media:content extension
+               # to RSS. XML::Feed does not support it, but it's the same
+               # as an enclosure, so converting it to that tag will let it
+               # parse.
+               $content=~s/<media:content/<enclosure/g;
+               $content=~s/<\/media:content/<\/enclosure/g;
+
                my $f=eval{XML::Feed->parse(\$content)};
                if ($@) {
                        # One common cause of XML::Feed crashing is a feed
@@ -549,7 +564,9 @@ sub aggregate (@) {
                        };
                }
                if ($@) {
-                       $feed->{message}=gettext("feed crashed XML::Feed!")." ($@)";
+                       # gettext can clobber $@
+                       my $error = $@;
+                       $feed->{message}=gettext("feed crashed XML::Feed!")." ($error)";
                        $feed->{error}=1;
                        debug($feed->{message});
                        next;
@@ -565,7 +582,9 @@ sub aggregate (@) {
                        # XML::Feed doesn't work around XML::Atom's bizarre
                        # API, so we will. Real unicode strings? Yes please.
                        # See [[bugs/Aggregated_Atom_feeds_are_double-encoded]]
+                       no warnings 'once';
                        local $XML::Atom::ForceUnicode = 1;
+                       use warnings;
 
                        my $c=$entry->content;
                        # atom feeds may have no content, only a summary
@@ -577,7 +596,12 @@ sub aggregate (@) {
                                feed => $feed,
                                copyright => $f->copyright,
                                title => defined $entry->title ? decode_entities($entry->title) : "untitled",
+                               author => defined $entry->author ? decode_entities($entry->author) : "",
                                link => $entry->link,
+                               enclosureurl => defined $entry->enclosure ? $entry->enclosure->url : "",
+                               enclosureimage => (defined $entry->enclosure && $entry->enclosure->type =~ m/image\//) ? "1" : "",
+                               enclosureaudio => (defined $entry->enclosure && $entry->enclosure->type =~ m/audio\//) ? "1" : "",
+                               enclosurevideo => (defined $entry->enclosure && $entry->enclosure->type =~ m/video\//) ? "1" : "",
                                content => (defined $c && defined $c->body) ? $c->body : "",
                                guid => defined $entry->id ? $entry->id : time."_".$feed->{name},
                                ctime => $entry->issued ? ($entry->issued->epoch || time) : time,
@@ -597,6 +621,7 @@ sub add_page (@) {
                # updating an existing post
                $guid=$guids{$params{guid}};
                return if $guid->{expired};
+               write_page($feed, $guid, $mtime, \%params);
        }
        else {
                # new post
@@ -611,35 +636,46 @@ sub add_page (@) {
                # escape slashes and periods in title so it doesn't specify
                # directory name or trigger ".." disallowing code.
                $page=~s!([/.])!"__".ord($1)."__"!eg;
-               $page=$feed->{dir}."/".$page;
-               ($page)=$page=~/$config{wiki_file_regexp}/;
                if (! defined $page || ! length $page) {
                        $page=$feed->{dir}."/item";
                }
+               $page=$feed->{dir}."/".$page;
+               ($page)=$page=~/$config{wiki_file_regexp}/;
                my $c="";
                while (exists $IkiWiki::pagecase{lc $page.$c} ||
                       -e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
                       -e "$config{srcdir}/".htmlfn($page.$c)) {
                        $c++
                }
+               $page=$page.$c;
 
-               # Make sure that the file name isn't too long. 
-               # NB: This doesn't check for path length limits.
-               my $max=POSIX::pathconf($config{srcdir}, &POSIX::_PC_NAME_MAX);
-               if (defined $max && length(htmlfn($page)) >= $max) {
+               $guid->{page}=$page;
+               eval { write_page($feed, $guid, $mtime, \%params) };
+               if ($@) {
+                       # assume failure was due to a too long filename
                        $c="";
                        $page=$feed->{dir}."/item";
                        while (exists $IkiWiki::pagecase{lc $page.$c} ||
                              -e $IkiWiki::Plugin::transient::transientdir."/".htmlfn($page.$c) ||
-
-                              -e "$config{srcdir}/".htmlfn($page.$c)) {
+                             -e "$config{srcdir}/".htmlfn($page.$c)) {
                                $c++
                        }
+                       $page=$page.$c;
+
+                       $guid->{page}=$page;
+                       write_page($feed, $guid, $mtime, \%params);
                }
 
-               $guid->{page}=$page;
                debug(sprintf(gettext("creating new page %s"), $page));
        }
+}
+
+sub write_page ($$$$$) {
+       my $feed=shift;
+       my $guid=shift;
+       my $mtime=shift;
+       my %params=%{shift()};
+
        $guid->{feed}=$feed->{name};
        
        # To write or not to write? Need to avoid writing unchanged pages
@@ -658,11 +694,16 @@ sub add_page (@) {
                $template=template($feed->{template}, blind_cache => 1);
        };
        if ($@) {
-               print STDERR gettext("failed to process template:")." $@";
+               # gettext can clobber $@
+               my $error = $@;
+               print STDERR gettext("failed to process template:")." $error";
                return;
        }
        $template->param(title => $params{title})
                if defined $params{title} && length($params{title});
+       $template->param(author => $params{author})
+               if defined $params{author} && length($params{author}
+                       && $params{author} ne $feed->{name});
        $template->param(content => wikiescape(htmlabs($params{content},
                defined $params{base} ? $params{base} : $feed->{feedurl})));
        $template->param(name => $feed->{name});
@@ -671,6 +712,14 @@ sub add_page (@) {
                if defined $params{copyright} && length $params{copyright};
        $template->param(permalink => IkiWiki::urlabs($params{link}, $feed->{feedurl}))
                if defined $params{link};
+       $template->param(enclosureurl => $params{enclosureurl})
+               if defined $params{enclosureurl} && length $params{enclosureurl};
+       $template->param(enclosureimage => $params{enclosureimage})
+               if defined $params{enclosureimage} && length $params{enclosureimage};
+       $template->param(enclosureaudio => $params{enclosureaudio})
+               if defined $params{enclosureaudio} && length $params{enclosureaudio};
+       $template->param(enclosurevideo => $params{enclosurevideo})
+               if defined $params{enclosurevideo} && length $params{enclosurevideo};
        if (ref $feed->{tags}) {
                $template->param(tags => [map { tag => $_ }, @{$feed->{tags}}]);
        }