our $safe_url_regexp;
sub import { #{{{
+ hook(type => "getsetup", id => "htmlscrubber", call => \&getsetup);
hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
# Only known uri schemes are allowed to avoid all the ways of
# embedding javascrpt.
# List at http://en.wikipedia.org/wiki/URI_scheme
- my $uri_schemes=join("|",
+ my $uri_schemes=join("|", map quotemeta,
# IANA registered schemes
"http", "https", "ftp", "mailto", "file", "telnet", "gopher",
"aaa", "aaas", "acap", "cap", "cid", "crid",
"dav", "dict", "dns", "fax", "go", "h323", "im", "imap",
"ldap", "mid", "news", "nfs", "nntp", "pop", "pres",
"sip", "sips", "snmp", "tel", "urn", "wais", "xmpp",
- "z39\.50r", "z39\.50s",
+ "z39.50r", "z39.50s",
# Selected unofficial schemes
"aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
"irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
"msnim", "notes", "rsync", "secondlife", "skype", "ssh",
- "sftp", "sms", "steam", "webcal", "ymsgr",
+ "sftp", "smb", "sms", "snews", "webcal", "ymsgr",
);
# data is a special case. Allow data:image/*, but
# disallow data:text/javascript and everything else.
- $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+$)/i;
+ $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+(?:$|\/))/i;
} # }}}
+sub getsetup () { #{{{
+ return
+ plugin => {
+ safe => 1,
+ rebuild => undef,
+ },
+ htmlscrubber_skip => {
+ type => "pagespec",
+ example => "!*/Discussion",
+ description => "PageSpec specifying pages not to scrub",
+ link => "ikiwiki/PageSpec",
+ safe => 1,
+ rebuild => undef,
+ },
+} #}}}
+
sub sanitize (@) { #{{{
my %params=@_;
+
+ if (exists $config{htmlscrubber_skip} &&
+ length $config{htmlscrubber_skip} &&
+ exists $params{destpage} &&
+ pagespec_match($params{destpage}, $config{htmlscrubber_skip})) {
+ return $params{content};
+ }
+
return scrubber()->scrub($params{content});
} # }}}