web commit by joey

[git.ikiwiki.info.git] / IkiWiki / Render.pm
diff --git a/IkiWiki/Render.pm b/IkiWiki/Render.pm

index 1c56677ba162ed87f7da802a17612959a126878a..f90f16335ead61cf034b4f78da8872b775fd6dd2 100644 (file)
--- a/IkiWiki/Render.pm
+++ b/IkiWiki/Render.pm
@@ -1,20 +1,58 @@
+#!/usr/bin/perl
+
  package IkiWiki;
  
  use warnings;
  use strict;
  use File::Spec;
+use IkiWiki;
  
  sub linkify ($$) { #{{{
         my $content=shift;
         my $page=shift;
  
         $content =~ s{(\\?)$config{wiki_link_regexp}}{
-               $1 ? "[[$2]]" : htmllink($page, $2)
+               $2 ? ( $1 ? "[[$2|$3]]" : htmllink($page, titlepage($3), 0, 0, pagetitle($2)))
+                  : ( $1 ? "[[$3]]" :    htmllink($page, titlepage($3)))
         }eg;
         
         return $content;
  } #}}}
  
+my $_scrubber;
+sub scrubber { #{{{
+       return $_scrubber if defined $_scrubber;
+       
+       eval q{use HTML::Scrubber};
+       # Lists based on http://feedparser.org/docs/html-sanitization.html
+       $_scrubber = HTML::Scrubber->new(
+               allow => [qw{
+                       a abbr acronym address area b big blockquote br
+                       button caption center cite code col colgroup dd del
+                       dfn dir div dl dt em fieldset font form h1 h2 h3 h4
+                       h5 h6 hr i img input ins kbd label legend li map
+                       menu ol optgroup option p pre q s samp select small
+                       span strike strong sub sup table tbody td textarea
+                       tfoot th thead tr tt u ul var
+               }],
+               default => [undef, { map { $_ => 1 } qw{
+                       abbr accept accept-charset accesskey action
+                       align alt axis border cellpadding cellspacing
+                       char charoff charset checked cite class
+                       clear cols colspan color compact coords
+                       datetime dir disabled enctype for frame
+                       headers height href hreflang hspace id ismap
+                       label lang longdesc maxlength media method
+                       multiple name nohref noshade nowrap prompt
+                       readonly rel rev rows rowspan rules scope
+                       selected shape size span src start summary
+                       tabindex target title type usemap valign
+                       value vspace width
+               }}],
+       );
+       return $_scrubber;
+} # }}}
+
  sub htmlize ($$) { #{{{
         my $type=shift;
         my $content=shift;
@@ -27,11 +65,17 @@ sub htmlize ($$) { #{{{
         }
         
         if ($type eq '.mdwn') {
-               return Markdown::Markdown($content);
+               $content=Markdown::Markdown($content);
         }
         else {
                 error("htmlization of $type not supported");
         }
+
+       if ($config{sanitize}) {
+               $content=scrubber()->scrub($content);
+       }
+       
+       return $content;
  } #}}}
  
  sub backlinks ($) { #{{{
@@ -79,10 +123,64 @@ sub parentlinks ($) { #{{{
         return @ret;
  } #}}}
  
-sub rsspage ($) { #{{{
+sub preprocess ($$) { #{{{
         my $page=shift;
+       my $content=shift;
  
-       return $page.".rss";
+       my $handle=sub {
+               my $escape=shift;
+               my $command=shift;
+               my $params=shift;
+               if (length $escape) {
+                       return "[[$command $params]]";
+               }
+               elsif (exists $hooks{preprocess}{$command}) {
+                       my %params;
+                       while ($params =~ /(\w+)=\"([^"]+)"(\s+|$)/g) {
+                               $params{$1}=$2;
+                       }
+                       return $hooks{preprocess}{$command}{call}->(page => $page, %params);
+               }
+               else {
+                       return "[[$command not processed]]";
+               }
+       };
+       
+       $content =~ s{(\\?)$config{wiki_processor_regexp}}{$handle->($1, $2, $3)}eg;
+       return $content;
+} #}}}
+
+sub add_depends ($$) { #{{{
+       my $page=shift;
+       my $globlist=shift;
+       
+       if (! exists $depends{$page}) {
+               $depends{$page}=$globlist;
+       }
+       else {
+               $depends{$page}=globlist_merge($depends{$page}, $globlist);
+       }
+} # }}}
+
+sub globlist_merge ($$) { #{{{
+       my $a=shift;
+       my $b=shift;
+
+       my $ret="";
+       # Only add negated globs if they are not matched by the other globlist.
+       foreach my $i ((map { [ $a, $_ ] } split(" ", $b)), 
+                      (map { [ $b, $_ ] } split(" ", $a))) {
+               if ($i->[1]=~/^!(.*)/) {
+                       if (! globlist_match($1, $i->[0])) {
+                               $ret.=" ".$i->[1];
+                       }
+               }
+               else {
+                       $ret.=" ".$i->[1];
+               }
+       }
+       
+       return $ret;
  } #}}}
  
  sub genpage ($$$) { #{{{
@@ -96,10 +194,10 @@ sub genpage ($$$) { #{{{
                 filename => "$config{templatedir}/page.tmpl");
         
         if (length $config{cgiurl}) {
-               $template->param(editurl => "$config{cgiurl}?do=edit&page=$page");
-               $template->param(prefsurl => "$config{cgiurl}?do=prefs");
+               $template->param(editurl => cgiurl(do => "edit", page => $page));
+               $template->param(prefsurl => cgiurl(do => "prefs"));
                 if ($config{rcs}) {
-                       $template->param(recentchangesurl => "$config{cgiurl}?do=recentchanges");
+                       $template->param(recentchangesurl => cgiurl(do => "recentchanges"));
                 }
         }
  
@@ -108,11 +206,10 @@ sub genpage ($$$) { #{{{
                 $u=~s/\[\[file\]\]/$pagesources{$page}/g;
                 $template->param(historyurl => $u);
         }
-
-       if ($config{rss}) {
-               $template->param(rssurl => rsspage($page));
+       if ($config{hyperestraier}) {
+               $template->param(hyperestraierurl => cgiurl());
         }
-       
+
         $template->param(
                 title => $title,
                 wikiname => $config{wikiname},
@@ -121,59 +218,7 @@ sub genpage ($$$) { #{{{
                 backlinks => [backlinks($page)],
                 discussionlink => htmllink($page, "Discussion", 1, 1),
                 mtime => scalar(gmtime($mtime)),
-       );
-       
-       return $template->output;
-} #}}}
-
-sub date_822 ($) { #{{{
-       my $time=shift;
-
-       eval q{use POSIX};
-       return POSIX::strftime("%a, %d %b %Y %H:%M:%S %z", localtime($time));
-} #}}}
-
-sub absolute_urls ($$) { #{{{
-       my $content=shift;
-       my $url=shift;
-
-       $url=~s/[^\/]+$//;
-       
-       $content=~s{<a\s+href="([^"]+)"}{
-               "<a href=\"$url$1\""
-       }ieg;
-       $content=~s{<img\s+src="([^"]+)"}{
-               "<img src=\"$url$1\""
-       }ieg;
-       return $content;
-} #}}}
-
-sub genrss ($$$) { #{{{
-       my $content=shift;
-       my $page=shift;
-       my $mtime=shift;
-
-       my $url="$config{url}/".htmlpage($page);
-       
-       my $template=HTML::Template->new(blind_cache => 1,
-               filename => "$config{templatedir}/rsspage.tmpl");
-       
-       # Regular page gets a feed that is updated every time the
-       # page is changed, so the mtime is encoded in the guid.
-       my @items=(
-               {
-                       itemtitle => pagetitle(basename($page)),
-                       itemguid => "$url?mtime=$mtime",
-                       itemurl => $url,
-                       itempubdate => date_822($mtime),
-                       itemcontent => absolute_urls($content, $url), # rss sucks
-               },
-       );
-       
-       $template->param(
-               title => $config{wikiname},
-               pageurl => $url,
-               items => \@items,
+               styleurl => styleurl($page),
         );
         
         return $template->output;
@@ -194,9 +239,9 @@ sub check_overwrite ($$) { #{{{
  } #}}}
  
  sub mtime ($) { #{{{
-       my $page=shift;
+       my $file=shift;
         
-       return (stat($page))[9];
+       return (stat($file))[9];
  } #}}}
  
  sub findlinks ($$) { #{{{
@@ -205,7 +250,7 @@ sub findlinks ($$) { #{{{
  
         my @links;
         while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
-               push @links, lc($1);
+               push @links, titlepage($2);
         }
         # Discussion links are a special case since they're not in the text
         # of the page, but on its template.
@@ -216,33 +261,30 @@ sub render ($) { #{{{
         my $file=shift;
         
         my $type=pagetype($file);
-       my $content=readfile("$config{srcdir}/$file");
+       my $srcfile=srcfile($file);
         if ($type ne 'unknown') {
+               my $content=readfile($srcfile);
                 my $page=pagename($file);
                 
                 $links{$page}=[findlinks($content, $page)];
+               delete $depends{$page};
                 
                 $content=linkify($content, $page);
+               $content=preprocess($page, $content);
                 $content=htmlize($type, $content);
                 
                 check_overwrite("$config{destdir}/".htmlpage($page), $page);
-               writefile("$config{destdir}/".htmlpage($page),
-                       genpage($content, $page, mtime("$config{srcdir}/$file")));              
+               writefile(htmlpage($page), $config{destdir},
+                       genpage($content, $page, mtime($srcfile)));
                 $oldpagemtime{$page}=time;
                 $renderedfiles{$page}=htmlpage($page);
-
-               # TODO: should really add this to renderedfiles and call
-               # check_overwrite, as above, but currently renderedfiles
-               # only supports listing one file per page.
-               if ($config{rss}) {
-                       writefile("$config{destdir}/".rsspage($page),
-                               genrss($content, $page, mtime("$config{srcdir}/$file")));
-               }
         }
         else {
+               my $content=readfile($srcfile, 1);
                 $links{$file}=[];
+               delete $depends{$file};
                 check_overwrite("$config{destdir}/$file", $file);
-               writefile("$config{destdir}/$file", $content);
+               writefile($file, $config{destdir}, $content, 1);
                 $oldpagemtime{$file}=time;
                 $renderedfiles{$file}=$file;
         }
@@ -258,6 +300,57 @@ sub prune ($) { #{{{
         }
  } #}}}
  
+sub estcfg () { #{{{
+       my $estdir="$config{wikistatedir}/hyperestraier";
+       my $cgi=basename($config{cgiurl});
+       $cgi=~s/\..*$//;
+       open(TEMPLATE, ">$estdir/$cgi.tmpl") ||
+               error("write $estdir/$cgi.tmpl: $!");
+       print TEMPLATE misctemplate("search", 
+               "<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n");
+       close TEMPLATE;
+       open(TEMPLATE, ">$estdir/$cgi.conf") ||
+               error("write $estdir/$cgi.conf: $!");
+       my $template=HTML::Template->new(
+               filename => "$config{templatedir}/estseek.conf"
+       );
+       eval q{use Cwd 'abs_path'};
+       $template->param(
+               index => $estdir,
+               tmplfile => "$estdir/$cgi.tmpl",
+               destdir => abs_path($config{destdir}),
+               url => $config{url},
+       );
+       print TEMPLATE $template->output;
+       close TEMPLATE;
+       $cgi="$estdir/".basename($config{cgiurl});
+       unlink($cgi);
+       symlink("/usr/lib/estraier/estseek.cgi", $cgi) ||
+               error("symlink $cgi: $!");
+} # }}}
+
+sub estcmd ($;@) { #{{{
+       my @params=split(' ', shift);
+       push @params, "-cl", "$config{wikistatedir}/hyperestraier";
+       if (@_) {
+               push @params, "-";
+       }
+       
+       my $pid=open(CHILD, "|-");
+       if ($pid) {
+               # parent
+               foreach (@_) {
+                       print CHILD "$_\n";
+               }
+               close(CHILD) || error("estcmd @params exited nonzero: $?");
+       }
+       else {
+               # child
+               open(STDOUT, "/dev/null"); # shut it up (closing won't work)
+               exec("estcmd", @params) || error("can't run estcmd");
+       }
+} #}}}
+
  sub refresh () { #{{{
         # find existing pages
         my %exists;
@@ -267,9 +360,7 @@ sub refresh () { #{{{
                 no_chdir => 1,
                 wanted => sub {
                         if (/$config{wiki_file_prune_regexp}/) {
-                               no warnings 'once';
                                 $File::Find::prune=1;
-                               use warnings "all";
                         }
                         elsif (! -d $_ && ! -l $_) {
                                 my ($f)=/$config{wiki_file_regexp}/; # untaint
@@ -284,6 +375,30 @@ sub refresh () { #{{{
                         }
                 },
         }, $config{srcdir});
+       find({
+               no_chdir => 1,
+               wanted => sub {
+                       if (/$config{wiki_file_prune_regexp}/) {
+                               $File::Find::prune=1;
+                       }
+                       elsif (! -d $_ && ! -l $_) {
+                               my ($f)=/$config{wiki_file_regexp}/; # untaint
+                               if (! defined $f) {
+                                       warn("skipping bad filename $_\n");
+                               }
+                               else {
+                                       # Don't add files that are in the
+                                       # srcdir.
+                                       $f=~s/^\Q$config{underlaydir}\E\/?//;
+                                       if (! -e "$config{srcdir}/$f" && 
+                                           ! -l "$config{srcdir}/$f") {
+                                               push @files, $f;
+                                               $exists{pagename($f)}=1;
+                                       }
+                               }
+                       }
+               },
+       }, $config{underlaydir});
  
         my %rendered;
  
@@ -292,10 +407,12 @@ sub refresh () { #{{{
         foreach my $file (@files) {
                 my $page=pagename($file);
                 if (! $oldpagemtime{$page}) {
-                       debug("new page $page");
+                       debug("new page $page") unless exists $pagectime{$page};
                         push @add, $file;
                         $links{$page}=[];
                         $pagesources{$page}=$file;
+                       $pagectime{$page}=mtime(srcfile($file))
+                               unless exists $pagectime{$page};
                 }
         }
         my @del;
@@ -315,7 +432,7 @@ sub refresh () { #{{{
                 my $page=pagename($file);
                 
                 if (! exists $oldpagemtime{$page} ||
-                   mtime("$config{srcdir}/$file") > $oldpagemtime{$page}) {
+                   mtime(srcfile($file)) > $oldpagemtime{$page}) {
                         debug("rendering changed file $file");
                         render($file);
                         $rendered{$file}=1;
@@ -323,7 +440,7 @@ sub refresh () { #{{{
         }
         
         # if any files were added or removed, check to see if each page
-       # needs an update due to linking to them
+       # needs an update due to linking to them or inlining them.
         # TODO: inefficient; pages may get rendered above and again here;
         # problem is the bestlink may have changed and we won't know until
         # now
@@ -344,20 +461,37 @@ FILE:             foreach my $file (@files) {
                 }
         }
  
-       # handle backlinks; if a page has added/removed links, update the
-       # pages it links to
+       # Handle backlinks; if a page has added/removed links, update the
+       # pages it links to. Also handles rebuilding dependat pages.
         # TODO: inefficient; pages may get rendered above and again here;
         # problem is the backlinks could be wrong in the first pass render
         # above
-       if (%rendered) {
+       if (%rendered || @del) {
+               foreach my $f (@files) {
+                       my $p=pagename($f);
+                       if (exists $depends{$p}) {
+                               foreach my $file (keys %rendered, @del) {
+                                       next if $f eq $file;
+                                       my $page=pagename($file);
+                                       if (globlist_match($page, $depends{$p})) {
+                                               debug("rendering $f, which depends on $page");
+                                               render($f);
+                                               $rendered{$f}=1;
+                                               last;
+                                       }
+                               }
+                       }
+               }
+               
                 my %linkchanged;
                 foreach my $file (keys %rendered, @del) {
                         my $page=pagename($file);
+                       
                         if (exists $links{$page}) {
                                 foreach my $link (map { bestlink($page, $_) } @{$links{$page}}) {
                                         if (length $link &&
-                                           ! exists $oldlinks{$page} ||
-                                           ! grep { $_ eq $link } @{$oldlinks{$page}}) {
+                                           (! exists $oldlinks{$page} ||
+                                            ! grep { bestlink($page, $_) eq $link } @{$oldlinks{$page}})) {
                                                 $linkchanged{$link}=1;
                                         }
                                 }
@@ -365,8 +499,8 @@ FILE:               foreach my $file (@files) {
                         if (exists $oldlinks{$page}) {
                                 foreach my $link (map { bestlink($page, $_) } @{$oldlinks{$page}}) {
                                         if (length $link &&
-                                           ! exists $links{$page} ||
-                                           ! grep { $_ eq $link } @{$links{$page}}) {
+                                           (! exists $links{$page} || 
+                                            ! grep { bestlink($page, $_) eq $link } @{$links{$page}})) {
                                                 $linkchanged{$link}=1;
                                         }
                                 }
@@ -377,9 +511,25 @@ FILE:              foreach my $file (@files) {
                         if (defined $linkfile) {
                                 debug("rendering $linkfile, to update its backlinks");
                                 render($linkfile);
+                               $rendered{$linkfile}=1;
                         }
                 }
         }
+
+       if ($config{hyperestraier} && (%rendered || @del)) {
+               debug("updating hyperestraier search index");
+               if (%rendered) {
+                       estcmd("gather -cm -bc -cl -sd", 
+                               map { $config{destdir}."/".$renderedfiles{pagename($_)} }
+                               keys %rendered);
+               }
+               if (@del) {
+                       estcmd("purge -cl");
+               }
+               
+               debug("generating hyperestraier cgi config");
+               estcfg();
+       }
  } #}}}
  
  1