use warnings;
use strict;
-use IkiWiki 2.00;
+use IkiWiki 3.00;
-sub import { #{{{
- hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
-} # }}}
+# This regexp matches urls that are in a known safe scheme.
+# Feel free to use it from other plugins.
+our $safe_url_regexp;
-sub sanitize (@) { #{{{
- my %params=@_;
- return scrubber()->scrub($params{content});
-} # }}}
+sub import {
+ hook(type => "getsetup", id => "htmlscrubber", call => \&getsetup);
+ hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
-my $_scrubber;
-sub scrubber { #{{{
- return $_scrubber if defined $_scrubber;
-
# Only known uri schemes are allowed to avoid all the ways of
# embedding javascrpt.
# List at http://en.wikipedia.org/wiki/URI_scheme
- my $uri_schemes=join("|",
+ my $uri_schemes=join("|", map quotemeta,
# IANA registered schemes
"http", "https", "ftp", "mailto", "file", "telnet", "gopher",
"aaa", "aaas", "acap", "cap", "cid", "crid",
"aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
"irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
"msnim", "notes", "rsync", "secondlife", "skype", "ssh",
- "sftp", "sms", "steam", "webcal", "ymsgr",
+ "sftp", "smb", "sms", "snews", "webcal", "ymsgr",
+ "bitcoin", "git", "svn", "bzr", "darcs", "hg"
);
- # data is a special case. Allow data:image/*, but
- # disallow data:text/javascript and everything else.
- my $link=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+$)/i;
+ # data is a special case. Allow a few data:image/ types,
+ # but disallow data:text/javascript and everything else.
+ $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/(?:png|jpeg|gif)|[^:]+(?:$|[\/\?#]))|^#/i;
+}
+
+sub getsetup () {
+ return
+ plugin => {
+ safe => 1,
+ rebuild => undef,
+ section => "core",
+ },
+ htmlscrubber_skip => {
+ type => "pagespec",
+ example => "!*/Discussion",
+ description => "PageSpec specifying pages not to scrub",
+ link => "ikiwiki/PageSpec",
+ safe => 1,
+ rebuild => undef,
+ },
+}
+
+sub sanitize (@) {
+ my %params=@_;
+
+ if (exists $config{htmlscrubber_skip} &&
+ length $config{htmlscrubber_skip} &&
+ exists $params{page} &&
+ pagespec_match($params{page}, $config{htmlscrubber_skip})) {
+ return $params{content};
+ }
+
+ return scrubber()->scrub($params{content});
+}
+
+my $_scrubber;
+sub scrubber {
+ return $_scrubber if defined $_scrubber;
eval q{use HTML::Scrubber};
error($@) if $@;
# Lists based on http://feedparser.org/docs/html-sanitization.html
- # With html 5 video and audio tags added.
+ # With html5 tags added.
$_scrubber = HTML::Scrubber->new(
allow => [qw{
a abbr acronym address area b big blockquote br br/
menu ol optgroup option p p/ pre q s samp select small
span strike strong sub sup table tbody td textarea
tfoot th thead tr tt u ul var
- video audio
+
+ video audio source section nav article aside hgroup
+ header footer figure figcaption time mark canvas
+ datalist progress meter ruby rt rp details summary
}],
default => [undef, { (
map { $_ => 1 } qw{
selected shape size span start summary
tabindex target title type valign
value vspace width
- autoplay loopstart loopend end
- playcount controls
+
+ autofocus autoplay preload loopstart
+ loopend end playcount controls pubdate
+ placeholder min max step low high optimum
+ form required autocomplete novalidate pattern
+ list formenctype formmethod formnovalidate
+ formtarget reversed spellcheck open hidden
} ),
"/" => 1, # emit proper <hr /> XHTML
- href => $link,
- src => $link,
- action => $link,
- cite => $link,
- longdesc => $link,
- poster => $link,
- usemap => $link,
+ href => $safe_url_regexp,
+ src => $safe_url_regexp,
+ action => $safe_url_regexp,
+ formaction => $safe_url_regexp,
+ cite => $safe_url_regexp,
+ longdesc => $safe_url_regexp,
+ poster => $safe_url_regexp,
+ usemap => $safe_url_regexp,
}],
);
return $_scrubber;
-} # }}}
+}
1