]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blobdiff - IkiWiki/Plugin/htmlscrubber.pm
po: reorder nearly all of the module code
[git.ikiwiki.info.git] / IkiWiki / Plugin / htmlscrubber.pm
index d795da3a93565929e5468672705662e259f812f0..7398c84784632048c41eab1976436578a8bf7027 100644 (file)
@@ -10,32 +10,57 @@ use IkiWiki 2.00;
 our $safe_url_regexp;
 
 sub import { #{{{
 our $safe_url_regexp;
 
 sub import { #{{{
+       hook(type => "getsetup", id => "htmlscrubber", call => \&getsetup);
        hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
 
        # Only known uri schemes are allowed to avoid all the ways of
        # embedding javascrpt.
        # List at http://en.wikipedia.org/wiki/URI_scheme
        hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
 
        # Only known uri schemes are allowed to avoid all the ways of
        # embedding javascrpt.
        # List at http://en.wikipedia.org/wiki/URI_scheme
-       my $uri_schemes=join("|",
+       my $uri_schemes=join("|", map quotemeta,
                # IANA registered schemes
                "http", "https", "ftp", "mailto", "file", "telnet", "gopher",
                "aaa", "aaas", "acap",  "cap", "cid", "crid", 
                "dav", "dict", "dns", "fax", "go", "h323", "im", "imap",
                "ldap", "mid", "news", "nfs", "nntp", "pop", "pres",
                "sip", "sips", "snmp", "tel", "urn", "wais", "xmpp",
                # IANA registered schemes
                "http", "https", "ftp", "mailto", "file", "telnet", "gopher",
                "aaa", "aaas", "acap",  "cap", "cid", "crid", 
                "dav", "dict", "dns", "fax", "go", "h323", "im", "imap",
                "ldap", "mid", "news", "nfs", "nntp", "pop", "pres",
                "sip", "sips", "snmp", "tel", "urn", "wais", "xmpp",
-               "z39\.50r", "z39\.50s",
+               "z39.50r", "z39.50s",
                # Selected unofficial schemes
                "aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
                "irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
                "msnim", "notes", "rsync", "secondlife", "skype", "ssh",
                # Selected unofficial schemes
                "aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
                "irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
                "msnim", "notes", "rsync", "secondlife", "skype", "ssh",
-               "sftp", "sms", "snews", "webcal", "ymsgr",
+               "sftp", "smb", "sms", "snews", "webcal", "ymsgr",
        );
        # data is a special case. Allow data:image/*, but
        # disallow data:text/javascript and everything else.
        );
        # data is a special case. Allow data:image/*, but
        # disallow data:text/javascript and everything else.
-       $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+$)/i;
+       $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+(?:$|\/))/i;
 } # }}}
 
 } # }}}
 
+sub getsetup () { #{{{
+       return
+               plugin => {
+                       safe => 1,
+                       rebuild => undef,
+               },
+               htmlscrubber_skip => {
+                       type => "pagespec",
+                       example => "!*/Discussion",
+                       description => "PageSpec specifying pages not to scrub",
+                       link => "ikiwiki/PageSpec",
+                       safe => 1,
+                       rebuild => undef,
+               },
+} #}}}
+
 sub sanitize (@) { #{{{
        my %params=@_;
 sub sanitize (@) { #{{{
        my %params=@_;
+
+       if (exists $config{htmlscrubber_skip} &&
+           length $config{htmlscrubber_skip} &&
+           exists $params{destpage} &&
+           pagespec_match($params{destpage}, $config{htmlscrubber_skip})) {
+               return $params{content};
+       }
+
        return scrubber()->scrub($params{content});
 } # }}}
 
        return scrubber()->scrub($params{content});
 } # }}}