X-Git-Url: http://git.vanrenterghem.biz/git.ikiwiki.info.git/blobdiff_plain/cbde8520e9cc5512bd2726e2fa26089db0ddc7ab..fa1aebc747ece026608908527187d3f403135f53:/IkiWiki.pm?ds=inline diff --git a/IkiWiki.pm b/IkiWiki.pm index 75c957932..efb48293a 100644 --- a/IkiWiki.pm +++ b/IkiWiki.pm @@ -5,6 +5,7 @@ package IkiWiki; use warnings; use strict; use Encode; +use Fcntl q{:flock}; use URI::Escape q{uri_escape_utf8}; use POSIX (); use Storable; @@ -14,7 +15,7 @@ use vars qw{%config %links %oldlinks %pagemtime %pagectime %pagecase %pagestate %wikistate %renderedfiles %oldrenderedfiles %pagesources %delpagesources %destsources %depends %depends_simple @mass_depends %hooks %forcerebuild %loaded_plugins %typedlinks - %oldtypedlinks %autofiles}; + %oldtypedlinks %autofiles @underlayfiles $lastrev $phase}; use Exporter q{import}; our @EXPORT = qw(hook debug error htmlpage template template_depends @@ -34,6 +35,11 @@ our $DEPEND_CONTENT=1; our $DEPEND_PRESENCE=2; our $DEPEND_LINKS=4; +# Phases of processing. +sub PHASE_SCAN () { 0 } +sub PHASE_RENDER () { 1 } +$phase = PHASE_SCAN; + # Optimisation. use Memoize; memoize("abs2rel"); @@ -103,6 +109,14 @@ sub getsetup () { safe => 1, rebuild => 1, }, + reverse_proxy => { + type => "boolean", + default => 0, + description => "do not adjust cgiurl if CGI is accessed via different URL", + advanced => 0, + safe => 1, + rebuild => 0, # only affects CGI requests + }, cgi_wrapper => { type => "string", default => '', @@ -134,6 +148,13 @@ sub getsetup () { safe => 1, rebuild => 0, }, + only_committed_changes => { + type => "boolean", + default => 0, + description => "enable optimization of only refreshing committed changes?", + safe => 1, + rebuild => 0, + }, rcs => { type => "string", default => '', @@ -145,7 +166,8 @@ sub getsetup () { type => "internal", default => [qw{mdwn link inline meta htmlscrubber passwordauth openid signinedit lockedit conditional - recentchanges parentlinks editpage}], + recentchanges parentlinks editpage + templatebody}], description => "plugins to enable by default", safe => 0, rebuild => 1, @@ -253,7 +275,7 @@ sub getsetup () { html5 => { type => "boolean", default => 0, - description => "generate HTML5?", + description => "use elements new in HTML5 like
?", advanced => 0, safe => 1, rebuild => 1, @@ -336,11 +358,20 @@ sub getsetup () { safe => 0, # paranoia rebuild => 0, }, + libdirs => { + type => "string", + default => [], + example => ["$ENV{HOME}/.local/share/ikiwiki"], + description => "extra library and plugin directories", + advanced => 1, + safe => 0, # directory + rebuild => 0, + }, libdir => { type => "string", default => "", example => "$ENV{HOME}/.ikiwiki/", - description => "extra library and plugin directory", + description => "extra library and plugin directory (searched after libdirs)", advanced => 1, safe => 0, # directory rebuild => 0, @@ -515,12 +546,45 @@ sub getsetup () { }, cookiejar => { type => "string", - default => "$ENV{HOME}/.ikiwiki/cookies", + default => { file => "$ENV{HOME}/.ikiwiki/cookies" }, description => "cookie control", - example => { file => "$ENV{HOME}/.ikiwiki/cookies" }, safe => 0, # hooks into perl module internals rebuild => 0, }, + useragent => { + type => "string", + default => "ikiwiki/$version", + example => "Wget/1.13.4 (linux-gnu)", + description => "set custom user agent string for outbound HTTP requests e.g. when fetching aggregated RSS feeds", + safe => 0, + rebuild => 0, + }, + responsive_layout => { + type => "boolean", + default => 1, + description => "theme has a responsive layout? (mobile-optimized)", + safe => 1, + rebuild => 1, + }, + deterministic => { + type => "boolean", + default => 0, + description => "try harder to produce deterministic output", + safe => 1, + rebuild => 1, + advanced => 1, + }, +} + +sub getlibdirs () { + my @libdirs; + if ($config{libdirs}) { + @libdirs = @{$config{libdirs}}; + } + if (length $config{libdir}) { + push @libdirs, $config{libdir}; + } + return @libdirs; } sub defaultconfig () { @@ -563,9 +627,20 @@ sub checkconfig () { if (defined $config{timezone} && length $config{timezone}) { $ENV{TZ}=$config{timezone}; } - else { + elsif (defined $ENV{TZ} && length $ENV{TZ}) { $config{timezone}=$ENV{TZ}; } + else { + eval q{use Config qw()}; + error($@) if $@; + + if ($Config::Config{d_gnulibc} && -e '/etc/localtime') { + $config{timezone}=$ENV{TZ}=':/etc/localtime'; + } + else { + $config{timezone}=$ENV{TZ}='GMT'; + } + } if ($config{w3mmode}) { eval q{use Cwd q{abs_path}}; @@ -593,12 +668,39 @@ sub checkconfig () { $local_cgiurl = $cgiurl->path; - if ($cgiurl->scheme ne $baseurl->scheme or - $cgiurl->authority ne $baseurl->authority) { + if ($cgiurl->scheme eq 'https' && + $baseurl->scheme eq 'http') { + # We assume that the same content is available + # over both http and https, because if it + # wasn't, accessing the static content + # from the CGI would be mixed-content, + # which would be a security flaw. + + if ($cgiurl->authority ne $baseurl->authority) { + # use protocol-relative URL for + # static content + $local_url = "$config{url}/"; + $local_url =~ s{^http://}{//}; + } + # else use host-relative URL for static content + + # either way, CGI needs to be absolute + $local_cgiurl = $config{cgiurl}; + } + elsif ($cgiurl->scheme ne $baseurl->scheme) { # too far apart, fall back to absolute URLs $local_url = "$config{url}/"; $local_cgiurl = $config{cgiurl}; } + elsif ($cgiurl->authority ne $baseurl->authority) { + # slightly too far apart, fall back to + # protocol-relative URLs + $local_url = "$config{url}/"; + $local_url =~ s{^https?://}{//}; + $local_cgiurl = $config{cgiurl}; + $local_cgiurl =~ s{^https?://}{//}; + } + # else keep host-relative URLs } $local_url =~ s{//$}{/}; @@ -638,14 +740,14 @@ sub checkconfig () { sub listplugins () { my %ret; - foreach my $dir (@INC, $config{libdir}) { + foreach my $dir (@INC, getlibdirs()) { next unless defined $dir && length $dir; foreach my $file (glob("$dir/IkiWiki/Plugin/*.pm")) { my ($plugin)=$file=~/.*\/(.*)\.pm$/; $ret{$plugin}=1; } } - foreach my $dir ($config{libdir}, "$installdir/lib/ikiwiki") { + foreach my $dir (getlibdirs(), "$installdir/lib/ikiwiki") { next unless defined $dir && length $dir; foreach my $file (glob("$dir/plugins/*")) { $ret{basename($file)}=1 if -x $file; @@ -656,8 +758,8 @@ sub listplugins () { } sub loadplugins () { - if (defined $config{libdir} && length $config{libdir}) { - unshift @INC, possibly_foolish_untaint($config{libdir}); + foreach my $dir (getlibdirs()) { + unshift @INC, possibly_foolish_untaint($dir); } foreach my $plugin (@{$config{default_plugins}}, @{$config{add_plugins}}) { @@ -690,8 +792,8 @@ sub loadplugin ($;$) { return if ! $force && grep { $_ eq $plugin} @{$config{disable_plugins}}; - foreach my $dir (defined $config{libdir} ? possibly_foolish_untaint($config{libdir}) : undef, - "$installdir/lib/ikiwiki") { + foreach my $possiblytainteddir (getlibdirs(), "$installdir/lib/ikiwiki") { + my $dir = possibly_foolish_untaint($possiblytainteddir); if (defined $dir && -x "$dir/plugins/$plugin") { eval { require IkiWiki::Plugin::external }; if ($@) { @@ -729,6 +831,7 @@ sub debug ($) { } my $log_open=0; +my $log_failed=0; sub log_message ($$) { my $type=shift; @@ -739,9 +842,17 @@ sub log_message ($$) { Sys::Syslog::openlog('ikiwiki', '', 'user'); $log_open=1; } - return eval { - Sys::Syslog::syslog($type, "[$config{wikiname}] %s", join(" ", @_)); + eval { + my $message = "[$config{wikiname}] ".join(" ", @_); + utf8::encode($message); + Sys::Syslog::syslog($type, "%s", $message); }; + if ($@) { + print STDERR "failed to syslog: $@" unless $log_failed; + $log_failed=1; + print STDERR "@_\n"; + } + return $@; } elsif (! $config{cgi}) { return print "@_\n"; @@ -1113,7 +1224,7 @@ sub cgiurl (@) { } return $cgiurl."?". - join("&", map $_."=".uri_escape_utf8($params{$_}), keys %params); + join("&", map $_."=".uri_escape_utf8($params{$_}), sort(keys %params)); } sub cgiurl_abs (@) { @@ -1121,6 +1232,19 @@ sub cgiurl_abs (@) { URI->new_abs(cgiurl(@_), $config{cgiurl}); } +# Same as cgiurl_abs, but when the user connected using https, +# will be a https url even if the cgiurl is normally a http url. +# +# This should be used for anything involving emailing a login link, +# because a https session cookie will not be sent over http. +sub cgiurl_abs_samescheme (@) { + my $u=cgiurl_abs(@_); + if (($ENV{HTTPS} && lc $ENV{HTTPS} ne "off")) { + $u=~s/^http:/https:/i; + } + return $u +} + sub baseurl (;$) { my $page=shift; @@ -1182,14 +1306,20 @@ sub formattime ($;$) { my $strftime_encoding; sub strftime_utf8 { - # strftime doesn't know about encodings, so make sure + # strftime didn't know about encodings in older Perl, so make sure # its output is properly treated as utf8. # Note that this does not handle utf-8 in the format string. + my $result = POSIX::strftime(@_); + + if (Encode::is_utf8($result)) { + return $result; + } + ($strftime_encoding) = POSIX::setlocale(&POSIX::LC_TIME) =~ m#\.([^@]+)# unless defined $strftime_encoding; $strftime_encoding - ? Encode::decode($strftime_encoding, POSIX::strftime(@_)) - : POSIX::strftime(@_); + ? Encode::decode($strftime_encoding, $result) + : $result; } sub date_3339 ($) { @@ -1326,6 +1456,7 @@ sub userpage ($) { return length $config{userdir} ? "$config{userdir}/$user" : $user; } +# Username to display for openid accounts. sub openiduser ($) { my $user=shift; @@ -1360,6 +1491,36 @@ sub openiduser ($) { return; } +# Username to display for emailauth accounts. +sub emailuser ($) { + my $user=shift; + if (defined $user && $user =~ m/(.+)@/) { + my $nick=$1; + # remove any characters from not allowed in wiki files + # support use w/o %config set + my $chars = defined $config{wiki_file_chars} ? $config{wiki_file_chars} : "-[:alnum:]+/.:_"; + $nick=~s/[^$chars]/_/g; + return $nick; + } + return; +} + +# Some user information should not be exposed in commit metadata, etc. +# This generates a cloaked form of such information. +sub cloak ($) { + my $user=shift; + # cloak email address using http://xmlns.com/foaf/spec/#term_mbox_sha1sum + if ($user=~m/(.+)@/) { + my $nick=$1; + eval q{use Digest::SHA}; + return $user if $@; + return $nick.'@'.Digest::SHA::sha1_hex("mailto:$user"); + } + else { + return $user; + } +} + sub htmlize ($$$$) { my $page=shift; my $destpage=shift; @@ -1484,7 +1645,7 @@ sub preprocess ($$$;$$) { push @params, $val, ''; } } - if ($preprocessing{$page}++ > 3) { + if ($preprocessing{$page}++ > 8) { # Avoid loops of preprocessed pages preprocessing # other pages that preprocess them, etc. return "[[!$command ". @@ -1505,6 +1666,11 @@ sub preprocess ($$$;$$) { if ($@) { my $error=$@; chomp $error; + eval q{use HTML::Entities}; + # Also encode most ASCII punctuation + # as entities so that error messages + # are not interpreted as Markdown etc. + $error = encode_entities($error, '^-A-Za-z0-9+_,./:;= '."'"); $ret="[[!$command ". gettext("Error").": $error"."]]"; } @@ -1682,7 +1848,7 @@ sub check_canchange (@) { $file=possibly_foolish_untaint($file); if (! defined $file || ! length $file || file_pruned($file)) { - error(gettext("bad file name %s"), $file); + error(sprintf(gettext("bad file name %s"), $file)); } my $type=pagetype($file); @@ -1741,8 +1907,11 @@ sub lockwiki () { } open($wikilock, '>', "$config{wikistatedir}/lockfile") || error ("cannot write to $config{wikistatedir}/lockfile: $!"); - if (! flock($wikilock, 2)) { # LOCK_EX - error("failed to get lock"); + if (! flock($wikilock, LOCK_EX | LOCK_NB)) { + debug("failed to get lock; waiting..."); + if (! flock($wikilock, LOCK_EX)) { + error("failed to get lock"); + } } return 1; } @@ -1782,7 +1951,8 @@ sub enable_commit_hook () { sub loadindex () { %oldrenderedfiles=%pagectime=(); - if (! $config{rebuild}) { + my $rebuild=$config{rebuild}; + if (! $rebuild) { %pagesources=%pagemtime=%oldlinks=%links=%depends= %destsources=%renderedfiles=%pagecase=%pagestate= %depends_simple=%typedlinks=%oldtypedlinks=(); @@ -1794,7 +1964,8 @@ sub loadindex () { open ($in, "<", "$config{wikistatedir}/indexdb") || return; } else { - $config{gettime}=1; # first build + # gettime on first build + $config{gettime}=1 unless defined $config{gettime}; return; } } @@ -1822,10 +1993,16 @@ sub loadindex () { foreach my $src (keys %$pages) { my $d=$pages->{$src}; - my $page=pagename($src); + my $page; + if (exists $d->{page} && ! $rebuild) { + $page=$d->{page}; + } + else { + $page=pagename($src); + } $pagectime{$page}=$d->{ctime}; $pagesources{$page}=$src; - if (! $config{rebuild}) { + if (! $rebuild) { $pagemtime{$page}=$d->{mtime}; $renderedfiles{$page}=$d->{dest}; if (exists $d->{links} && ref $d->{links}) { @@ -1875,6 +2052,8 @@ sub loadindex () { foreach my $page (keys %renderedfiles) { $destsources{$_}=$page foreach @{$renderedfiles{$page}}; } + $lastrev=$index->{lastrev}; + @underlayfiles=@{$index->{underlayfiles}} if ref $index->{underlayfiles}; return close($in); } @@ -1896,6 +2075,7 @@ sub saveindex () { my $src=$pagesources{$page}; $index{page}{$src}={ + page => $page, ctime => $pagectime{$page}, mtime => $pagemtime{$page}, dest => $renderedfiles{$page}, @@ -1915,11 +2095,7 @@ sub saveindex () { } if (exists $pagestate{$page}) { - foreach my $id (@plugins) { - foreach my $key (keys %{$pagestate{$page}{$id}}) { - $index{page}{$src}{state}{$id}{$key}=$pagestate{$page}{$id}{$key}; - } - } + $index{page}{$src}{state}=$pagestate{$page}; } } @@ -1931,6 +2107,9 @@ sub saveindex () { } } + $index{lastrev}=$lastrev; + $index{underlayfiles}=\@underlayfiles; + $index{version}="3"; my $ret=Storable::nstore_fd(\%index, $out); return if ! defined $ret || ! $ret; @@ -1988,11 +2167,19 @@ sub template_depends ($$;@) { if (defined $page && defined $tpage) { add_depends($page, $tpage); } - + my @opts=( filter => sub { my $text_ref = shift; ${$text_ref} = decode_utf8(${$text_ref}); + run_hooks(readtemplate => sub { + ${$text_ref} = shift->( + id => $name, + page => $tpage, + content => ${$text_ref}, + untrusted => $untrusted, + ); + }); }, loop_context_vars => 1, die_on_bad_params => 0, @@ -2282,11 +2469,131 @@ sub add_autofile ($$$) { $autofiles{$file}{generator}=$generator; } -sub useragent () { - return LWP::UserAgent->new( +sub useragent (@) { + my %params = @_; + my $for_url = delete $params{for_url}; + # Fail safe, in case a plugin calling this function is relying on + # a future parameter to make the UA more strict + foreach my $key (keys %params) { + error "Internal error: useragent(\"$key\" => ...) not understood"; + } + + eval q{use LWP}; + error($@) if $@; + + my %args = ( + agent => $config{useragent}, cookie_jar => $config{cookiejar}, - env_proxy => 1, # respect proxy env vars + env_proxy => 0, + protocols_allowed => [qw(http https)], ); + my %proxies; + + if (defined $for_url) { + # We know which URL we're going to fetch, so we can choose + # whether it's going to go through a proxy or not. + # + # We reimplement http_proxy, https_proxy and no_proxy here, so + # that we are not relying on LWP implementing them exactly the + # same way we do. + + eval q{use URI}; + error($@) if $@; + + my $proxy; + my $uri = URI->new($for_url); + + if ($uri->scheme eq 'http') { + $proxy = $ENV{http_proxy}; + # HTTP_PROXY is deliberately not implemented + # because the HTTP_* namespace is also used by CGI + } + elsif ($uri->scheme eq 'https') { + $proxy = $ENV{https_proxy}; + $proxy = $ENV{HTTPS_PROXY} unless defined $proxy; + } + else { + $proxy = undef; + } + + foreach my $var (qw(no_proxy NO_PROXY)) { + my $no_proxy = $ENV{$var}; + if (defined $no_proxy) { + foreach my $domain (split /\s*,\s*/, $no_proxy) { + if ($domain =~ s/^\*?\.//) { + # no_proxy="*.example.com" or + # ".example.com": match suffix + # against .example.com + if ($uri->host =~ m/(^|\.)\Q$domain\E$/i) { + $proxy = undef; + } + } + else { + # no_proxy="example.com": + # match exactly example.com + if (lc $uri->host eq lc $domain) { + $proxy = undef; + } + } + } + } + } + + if (defined $proxy) { + $proxies{$uri->scheme} = $proxy; + # Paranoia: make sure we can't bypass the proxy + $args{protocols_allowed} = [$uri->scheme]; + } + } + else { + # The plugin doesn't know yet which URL(s) it's going to + # fetch, so we have to make some conservative assumptions. + my $http_proxy = $ENV{http_proxy}; + my $https_proxy = $ENV{https_proxy}; + $https_proxy = $ENV{HTTPS_PROXY} unless defined $https_proxy; + + # We don't respect no_proxy here: if we are not using the + # paranoid user-agent, then we need to give the proxy the + # opportunity to reject undesirable requests. + + # If we have one, we need the other: otherwise, neither + # LWPx::ParanoidAgent nor the proxy would have the + # opportunity to filter requests for the other protocol. + if (defined $https_proxy && defined $http_proxy) { + %proxies = (http => $http_proxy, https => $https_proxy); + } + elsif (defined $https_proxy) { + %proxies = (http => $https_proxy, https => $https_proxy); + } + elsif (defined $http_proxy) { + %proxies = (http => $http_proxy, https => $http_proxy); + } + + } + + if (scalar keys %proxies) { + # The configured proxy is responsible for deciding which + # URLs are acceptable to fetch and which URLs are not. + my $ua = LWP::UserAgent->new(%args); + foreach my $scheme (@{$ua->protocols_allowed}) { + unless ($proxies{$scheme}) { + error "internal error: $scheme is allowed but has no proxy"; + } + } + # We can't pass the proxies in %args because that only + # works since LWP 6.24. + foreach my $scheme (keys %proxies) { + $ua->proxy($scheme, $proxies{$scheme}); + } + return $ua; + } + + eval q{use LWPx::ParanoidAgent}; + if ($@) { + print STDERR "warning: installing LWPx::ParanoidAgent is recommended\n"; + return LWP::UserAgent->new(%args); + } + return LWPx::ParanoidAgent->new(%args); } sub sortspec_translate ($$) { @@ -2425,6 +2732,19 @@ sub pagespec_match ($$;@) { return $sub->($page, @params); } +# e.g. @pages = sort_pages("title", \@pages, reverse => "yes") +# +# Not exported yet, but could be in future if it is generally useful. +# Note that this signature is not the same as IkiWiki::SortSpec::sort_pages, +# which is "more internal". +sub sort_pages ($$;@) { + my $sort = shift; + my $list = shift; + my %params = @_; + $sort = sortspec_translate($sort, $params{reverse}); + return IkiWiki::SortSpec::sort_pages($sort, @$list); +} + sub pagespec_match_list ($$;@) { my $page=shift; my $pagespec=shift; @@ -2530,21 +2850,48 @@ our @ISA = 'IkiWiki::SuccessReason'; package IkiWiki::SuccessReason; +# A blessed array-ref: +# +# [0]: human-readable reason for success (or, in FailReason subclass, failure) +# [1]{""}: +# - if absent or false, the influences of this evaluation are "static", +# see the influences_static method +# - if true, they are dynamic (not static) +# [1]{any other key}: +# the dependency types of influences, as returned by the influences method + use overload ( + # in string context, it's the human-readable reason '""' => sub { $_[0][0] }, + # in boolean context, SuccessReason is 1 and FailReason is 0 '0+' => sub { 1 }, + # negating a result gives the opposite result with the same influences '!' => sub { bless $_[0], 'IkiWiki::FailReason'}, + # A & B = (A ? B : A) with the influences of both '&' => sub { $_[1]->merge_influences($_[0], 1); $_[1] }, + # A | B = (A ? A : B) with the influences of both '|' => sub { $_[0]->merge_influences($_[1]); $_[0] }, fallback => 1, ); +# SuccessReason->new("human-readable reason", page => deptype, ...) + sub new { my $class = shift; my $value = shift; return bless [$value, {@_}], $class; } +# influences(): return a reference to a copy of the hash +# { page => dependency type } describing the pages that indirectly influenced +# this result, but would not cause a dependency through ikiwiki's core +# dependency logic. +# +# See [[todo/dependency_types]] for extensive discussion of what this means. +# +# influences(page => deptype, ...): remove all influences, replace them +# with the arguments, and return a reference to a copy of the new influences. + sub influences { my $this=shift; $this->[1]={@_} if @_; @@ -2553,15 +2900,46 @@ sub influences { return \%i; } +# True if this result has the same influences whichever page it matches, +# For instance, whether bar matches backlink(foo) is influenced only by +# the set of links in foo, so its only influence is { foo => DEPEND_LINKS }, +# which does not mention bar anywhere. +# +# False if this result would have different influences when matching +# different pages. For instance, when testing whether link(foo) matches bar, +# { bar => DEPEND_LINKS } is an influence on that result, because changing +# bar's links could change the outcome; so its influences are not the same +# as when testing whether link(foo) matches baz. +# +# Static influences are one of the things that make pagespec_match_list +# more efficient than repeated calls to pagespec_match. + sub influences_static { return ! $_[0][1]->{""}; } +# Change the influences of $this to be the influences of "$this & $other" +# or "$this | $other". +# +# If both $this and $other are either successful or have influences, +# or this is an "or" operation, the result has all the influences from +# either of the arguments. It has dynamic influences if either argument +# has dynamic influences. +# +# If this is an "and" operation, and at least one argument is a +# FailReason with no influences, the result has no influences, and they +# are not dynamic. For instance, link(foo) matching bar is influenced +# by bar, but enabled(ddate) has no influences. Suppose ddate is disabled; +# then (link(foo) and enabled(ddate)) not matching bar is not influenced by +# bar, because it would be false however often you edit bar. + sub merge_influences { my $this=shift; my $other=shift; my $anded=shift; + # This "if" is odd because it needs to avoid negating $this + # or $other, which would alter the objects in-place. Be careful. if (! $anded || (($this || %{$this->[1]}) && ($other || %{$other->[1]}))) { foreach my $influence (keys %{$other->[1]}) { @@ -2574,6 +2952,8 @@ sub merge_influences { } } +# Change $this so it is not considered to be influenced by $torm. + sub remove_influence { my $this=shift; my $torm=shift;