+#!/usr/bin/perl
+
package IkiWiki;
use warnings;
use strict;
use File::Spec;
+use IkiWiki;
sub linkify ($$) { #{{{
my $content=shift;
my $page=shift;
$content =~ s{(\\?)$config{wiki_link_regexp}}{
- $1 ? "[[$2]]" : htmllink($page, $2)
+ $2 ? ( $1 ? "[[$2|$3]]" : htmllink($page, titlepage($3), 0, 0, pagetitle($2)))
+ : ( $1 ? "[[$3]]" : htmllink($page, titlepage($3)))
}eg;
return $content;
} #}}}
+my $_scrubber;
+sub scrubber { #{{{
+ return $_scrubber if defined $_scrubber;
+
+ eval q{use HTML::Scrubber};
+ # Lists based on http://feedparser.org/docs/html-sanitization.html
+ $_scrubber = HTML::Scrubber->new(
+ allow => [qw{
+ a abbr acronym address area b big blockquote br
+ button caption center cite code col colgroup dd del
+ dfn dir div dl dt em fieldset font form h1 h2 h3 h4
+ h5 h6 hr i img input ins kbd label legend li map
+ menu ol optgroup option p pre q s samp select small
+ span strike strong sub sup table tbody td textarea
+ tfoot th thead tr tt u ul var
+ }],
+ default => [undef, { map { $_ => 1 } qw{
+ abbr accept accept-charset accesskey action
+ align alt axis border cellpadding cellspacing
+ char charoff charset checked cite class
+ clear cols colspan color compact coords
+ datetime dir disabled enctype for frame
+ headers height href hreflang hspace id ismap
+ label lang longdesc maxlength media method
+ multiple name nohref noshade nowrap prompt
+ readonly rel rev rows rowspan rules scope
+ selected shape size span src start summary
+ tabindex target title type usemap valign
+ value vspace width
+ }}],
+ );
+ return $_scrubber;
+} # }}}
+
sub htmlize ($$) { #{{{
my $type=shift;
my $content=shift;
}
if ($type eq '.mdwn') {
- return Markdown::Markdown($content);
+ $content=Markdown::Markdown($content);
}
else {
error("htmlization of $type not supported");
}
+
+ if ($config{sanitize}) {
+ $content=scrubber()->scrub($content);
+ }
+
+ return $content;
} #}}}
sub backlinks ($) { #{{{
return @ret;
} #}}}
-sub rsspage ($) { #{{{
+sub preprocess ($$) { #{{{
my $page=shift;
+ my $content=shift;
- return $page.".rss";
+ my $handle=sub {
+ my $escape=shift;
+ my $command=shift;
+ my $params=shift;
+ if (length $escape) {
+ return "[[$command $params]]";
+ }
+ elsif (exists $hooks{preprocess}{$command}) {
+ my %params;
+ while ($params =~ /(\w+)=\"([^"]+)"(\s+|$)/g) {
+ $params{$1}=$2;
+ }
+ return $hooks{preprocess}{$command}{call}->(page => $page, %params);
+ }
+ else {
+ return "[[$command not processed]]";
+ }
+ };
+
+ $content =~ s{(\\?)$config{wiki_processor_regexp}}{$handle->($1, $2, $3)}eg;
+ return $content;
+} #}}}
+
+sub add_depends ($$) { #{{{
+ my $page=shift;
+ my $globlist=shift;
+
+ if (! exists $depends{$page}) {
+ $depends{$page}=$globlist;
+ }
+ else {
+ $depends{$page}=globlist_merge($depends{$page}, $globlist);
+ }
+} # }}}
+
+sub globlist_merge ($$) { #{{{
+ my $a=shift;
+ my $b=shift;
+
+ my $ret="";
+ # Only add negated globs if they are not matched by the other globlist.
+ foreach my $i ((map { [ $a, $_ ] } split(" ", $b)),
+ (map { [ $b, $_ ] } split(" ", $a))) {
+ if ($i->[1]=~/^!(.*)/) {
+ if (! globlist_match($1, $i->[0])) {
+ $ret.=" ".$i->[1];
+ }
+ }
+ else {
+ $ret.=" ".$i->[1];
+ }
+ }
+
+ return $ret;
} #}}}
sub genpage ($$$) { #{{{
filename => "$config{templatedir}/page.tmpl");
if (length $config{cgiurl}) {
- $template->param(editurl => "$config{cgiurl}?do=edit&page=$page");
- $template->param(prefsurl => "$config{cgiurl}?do=prefs");
+ $template->param(editurl => cgiurl(do => "edit", page => $page));
+ $template->param(prefsurl => cgiurl(do => "prefs"));
if ($config{rcs}) {
- $template->param(recentchangesurl => "$config{cgiurl}?do=recentchanges");
+ $template->param(recentchangesurl => cgiurl(do => "recentchanges"));
}
}
$u=~s/\[\[file\]\]/$pagesources{$page}/g;
$template->param(historyurl => $u);
}
-
- if ($config{rss}) {
- $template->param(rssurl => rsspage($page));
+ if ($config{hyperestraier}) {
+ $template->param(hyperestraierurl => cgiurl());
}
-
+
$template->param(
title => $title,
wikiname => $config{wikiname},
backlinks => [backlinks($page)],
discussionlink => htmllink($page, "Discussion", 1, 1),
mtime => scalar(gmtime($mtime)),
- );
-
- return $template->output;
-} #}}}
-
-sub date_822 ($) { #{{{
- my $time=shift;
-
- eval q{use POSIX};
- return POSIX::strftime("%a, %d %b %Y %H:%M:%S %z", localtime($time));
-} #}}}
-
-sub absolute_urls ($$) { #{{{
- my $content=shift;
- my $url=shift;
-
- $url=~s/[^\/]+$//;
-
- $content=~s{<a\s+href="([^"]+)"}{
- "<a href=\"$url$1\""
- }ieg;
- $content=~s{<img\s+src="([^"]+)"}{
- "<img src=\"$url$1\""
- }ieg;
- return $content;
-} #}}}
-
-sub genrss ($$$) { #{{{
- my $content=shift;
- my $page=shift;
- my $mtime=shift;
-
- my $url="$config{url}/".htmlpage($page);
-
- my $template=HTML::Template->new(blind_cache => 1,
- filename => "$config{templatedir}/rsspage.tmpl");
-
- # Regular page gets a feed that is updated every time the
- # page is changed, so the mtime is encoded in the guid.
- my @items=(
- {
- itemtitle => pagetitle(basename($page)),
- itemguid => "$url?mtime=$mtime",
- itemurl => $url,
- itempubdate => date_822($mtime),
- itemcontent => absolute_urls($content, $url), # rss sucks
- },
- );
-
- $template->param(
- title => $config{wikiname},
- pageurl => $url,
- items => \@items,
+ styleurl => styleurl($page),
);
return $template->output;
} #}}}
sub mtime ($) { #{{{
- my $page=shift;
+ my $file=shift;
- return (stat($page))[9];
+ return (stat($file))[9];
} #}}}
sub findlinks ($$) { #{{{
my @links;
while ($content =~ /(?<!\\)$config{wiki_link_regexp}/g) {
- push @links, lc($1);
+ push @links, titlepage($2);
}
# Discussion links are a special case since they're not in the text
# of the page, but on its template.
my $file=shift;
my $type=pagetype($file);
- my $content=readfile("$config{srcdir}/$file");
+ my $srcfile=srcfile($file);
if ($type ne 'unknown') {
+ my $content=readfile($srcfile);
my $page=pagename($file);
$links{$page}=[findlinks($content, $page)];
+ delete $depends{$page};
$content=linkify($content, $page);
+ $content=preprocess($page, $content);
$content=htmlize($type, $content);
check_overwrite("$config{destdir}/".htmlpage($page), $page);
- writefile("$config{destdir}/".htmlpage($page),
- genpage($content, $page, mtime("$config{srcdir}/$file")));
+ writefile(htmlpage($page), $config{destdir},
+ genpage($content, $page, mtime($srcfile)));
$oldpagemtime{$page}=time;
$renderedfiles{$page}=htmlpage($page);
-
- # TODO: should really add this to renderedfiles and call
- # check_overwrite, as above, but currently renderedfiles
- # only supports listing one file per page.
- if ($config{rss}) {
- writefile("$config{destdir}/".rsspage($page),
- genrss($content, $page, mtime("$config{srcdir}/$file")));
- }
}
else {
+ my $content=readfile($srcfile, 1);
$links{$file}=[];
+ delete $depends{$file};
check_overwrite("$config{destdir}/$file", $file);
- writefile("$config{destdir}/$file", $content);
+ writefile($file, $config{destdir}, $content, 1);
$oldpagemtime{$file}=time;
$renderedfiles{$file}=$file;
}
}
} #}}}
+sub estcfg () { #{{{
+ my $estdir="$config{wikistatedir}/hyperestraier";
+ my $cgi=basename($config{cgiurl});
+ $cgi=~s/\..*$//;
+ open(TEMPLATE, ">$estdir/$cgi.tmpl") ||
+ error("write $estdir/$cgi.tmpl: $!");
+ print TEMPLATE misctemplate("search",
+ "<!--ESTFORM-->\n\n<!--ESTRESULT-->\n\n<!--ESTINFO-->\n\n");
+ close TEMPLATE;
+ open(TEMPLATE, ">$estdir/$cgi.conf") ||
+ error("write $estdir/$cgi.conf: $!");
+ my $template=HTML::Template->new(
+ filename => "$config{templatedir}/estseek.conf"
+ );
+ eval q{use Cwd 'abs_path'};
+ $template->param(
+ index => $estdir,
+ tmplfile => "$estdir/$cgi.tmpl",
+ destdir => abs_path($config{destdir}),
+ url => $config{url},
+ );
+ print TEMPLATE $template->output;
+ close TEMPLATE;
+ $cgi="$estdir/".basename($config{cgiurl});
+ unlink($cgi);
+ symlink("/usr/lib/estraier/estseek.cgi", $cgi) ||
+ error("symlink $cgi: $!");
+} # }}}
+
+sub estcmd ($;@) { #{{{
+ my @params=split(' ', shift);
+ push @params, "-cl", "$config{wikistatedir}/hyperestraier";
+ if (@_) {
+ push @params, "-";
+ }
+
+ my $pid=open(CHILD, "|-");
+ if ($pid) {
+ # parent
+ foreach (@_) {
+ print CHILD "$_\n";
+ }
+ close(CHILD) || error("estcmd @params exited nonzero: $?");
+ }
+ else {
+ # child
+ open(STDOUT, "/dev/null"); # shut it up (closing won't work)
+ exec("estcmd", @params) || error("can't run estcmd");
+ }
+} #}}}
+
sub refresh () { #{{{
# find existing pages
my %exists;
no_chdir => 1,
wanted => sub {
if (/$config{wiki_file_prune_regexp}/) {
- no warnings 'once';
$File::Find::prune=1;
- use warnings "all";
}
elsif (! -d $_ && ! -l $_) {
my ($f)=/$config{wiki_file_regexp}/; # untaint
}
},
}, $config{srcdir});
+ find({
+ no_chdir => 1,
+ wanted => sub {
+ if (/$config{wiki_file_prune_regexp}/) {
+ $File::Find::prune=1;
+ }
+ elsif (! -d $_ && ! -l $_) {
+ my ($f)=/$config{wiki_file_regexp}/; # untaint
+ if (! defined $f) {
+ warn("skipping bad filename $_\n");
+ }
+ else {
+ # Don't add files that are in the
+ # srcdir.
+ $f=~s/^\Q$config{underlaydir}\E\/?//;
+ if (! -e "$config{srcdir}/$f" &&
+ ! -l "$config{srcdir}/$f") {
+ push @files, $f;
+ $exists{pagename($f)}=1;
+ }
+ }
+ }
+ },
+ }, $config{underlaydir});
my %rendered;
foreach my $file (@files) {
my $page=pagename($file);
if (! $oldpagemtime{$page}) {
- debug("new page $page");
+ debug("new page $page") unless exists $pagectime{$page};
push @add, $file;
$links{$page}=[];
$pagesources{$page}=$file;
+ $pagectime{$page}=mtime(srcfile($file))
+ unless exists $pagectime{$page};
}
}
my @del;
my $page=pagename($file);
if (! exists $oldpagemtime{$page} ||
- mtime("$config{srcdir}/$file") > $oldpagemtime{$page}) {
+ mtime(srcfile($file)) > $oldpagemtime{$page}) {
debug("rendering changed file $file");
render($file);
$rendered{$file}=1;
}
# if any files were added or removed, check to see if each page
- # needs an update due to linking to them
+ # needs an update due to linking to them or inlining them.
# TODO: inefficient; pages may get rendered above and again here;
# problem is the bestlink may have changed and we won't know until
# now
}
}
- # handle backlinks; if a page has added/removed links, update the
- # pages it links to
+ # Handle backlinks; if a page has added/removed links, update the
+ # pages it links to. Also handles rebuilding dependat pages.
# TODO: inefficient; pages may get rendered above and again here;
# problem is the backlinks could be wrong in the first pass render
# above
- if (%rendered) {
+ if (%rendered || @del) {
+ foreach my $f (@files) {
+ my $p=pagename($f);
+ if (exists $depends{$p}) {
+ foreach my $file (keys %rendered, @del) {
+ next if $f eq $file;
+ my $page=pagename($file);
+ if (globlist_match($page, $depends{$p})) {
+ debug("rendering $f, which depends on $page");
+ render($f);
+ $rendered{$f}=1;
+ last;
+ }
+ }
+ }
+ }
+
my %linkchanged;
foreach my $file (keys %rendered, @del) {
my $page=pagename($file);
+
if (exists $links{$page}) {
foreach my $link (map { bestlink($page, $_) } @{$links{$page}}) {
if (length $link &&
- ! exists $oldlinks{$page} ||
- ! grep { $_ eq $link } @{$oldlinks{$page}}) {
+ (! exists $oldlinks{$page} ||
+ ! grep { bestlink($page, $_) eq $link } @{$oldlinks{$page}})) {
$linkchanged{$link}=1;
}
}
if (exists $oldlinks{$page}) {
foreach my $link (map { bestlink($page, $_) } @{$oldlinks{$page}}) {
if (length $link &&
- ! exists $links{$page} ||
- ! grep { $_ eq $link } @{$links{$page}}) {
+ (! exists $links{$page} ||
+ ! grep { bestlink($page, $_) eq $link } @{$links{$page}})) {
$linkchanged{$link}=1;
}
}
if (defined $linkfile) {
debug("rendering $linkfile, to update its backlinks");
render($linkfile);
+ $rendered{$linkfile}=1;
}
}
}
+
+ if ($config{hyperestraier} && (%rendered || @del)) {
+ debug("updating hyperestraier search index");
+ if (%rendered) {
+ estcmd("gather -cm -bc -cl -sd",
+ map { $config{destdir}."/".$renderedfiles{pagename($_)} }
+ keys %rendered);
+ }
+ if (@del) {
+ estcmd("purge -cl");
+ }
+
+ debug("generating hyperestraier cgi config");
+ estcfg();
+ }
} #}}}
1