]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blobdiff - IkiWiki/Plugin/htmlscrubber.pm
Use $a and $b for SortSpec cmp callbacks
[git.ikiwiki.info.git] / IkiWiki / Plugin / htmlscrubber.pm
index 53d4635d554736c449a2220d43336a7ef5ebf07a..26e18ffc753c537158ceb86cca80d03f5b1cca2e 100644 (file)
@@ -3,44 +3,70 @@ package IkiWiki::Plugin::htmlscrubber;
 
 use warnings;
 use strict;
-use IkiWiki 2.00;
+use IkiWiki 3.00;
 
 # This regexp matches urls that are in a known safe scheme.
 # Feel free to use it from other plugins.
 our $safe_url_regexp;
 
-sub import { #{{{
+sub import {
+       hook(type => "getsetup", id => "htmlscrubber", call => \&getsetup);
        hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
 
        # Only known uri schemes are allowed to avoid all the ways of
        # embedding javascrpt.
        # List at http://en.wikipedia.org/wiki/URI_scheme
-       my $uri_schemes=join("|",
+       my $uri_schemes=join("|", map quotemeta,
                # IANA registered schemes
                "http", "https", "ftp", "mailto", "file", "telnet", "gopher",
                "aaa", "aaas", "acap",  "cap", "cid", "crid", 
                "dav", "dict", "dns", "fax", "go", "h323", "im", "imap",
                "ldap", "mid", "news", "nfs", "nntp", "pop", "pres",
                "sip", "sips", "snmp", "tel", "urn", "wais", "xmpp",
-               "z39\.50r", "z39\.50s",
+               "z39.50r", "z39.50s",
                # Selected unofficial schemes
                "aim", "callto", "cvs", "ed2k", "feed", "fish", "gg",
                "irc", "ircs", "lastfm", "ldaps", "magnet", "mms",
                "msnim", "notes", "rsync", "secondlife", "skype", "ssh",
-               "sftp", "sms", "steam", "webcal", "ymsgr",
+               "sftp", "smb", "sms", "snews", "webcal", "ymsgr",
        );
-       # data is a special case. Allow data:image/*, but
-       # disallow data:text/javascript and everything else.
-       $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/|[^:]+$)/i;
-} # }}}
+       # data is a special case. Allow a few data:image/ types,
+       # but disallow data:text/javascript and everything else.
+       $safe_url_regexp=qr/^(?:(?:$uri_schemes):|data:image\/(?:png|jpeg|gif)|[^:]+(?:$|\/))/i;
+}
 
-sub sanitize (@) { #{{{
+sub getsetup () {
+       return
+               plugin => {
+                       safe => 1,
+                       rebuild => undef,
+                       section => "core",
+               },
+               htmlscrubber_skip => {
+                       type => "pagespec",
+                       example => "!*/Discussion",
+                       description => "PageSpec specifying pages not to scrub",
+                       link => "ikiwiki/PageSpec",
+                       safe => 1,
+                       rebuild => undef,
+               },
+}
+
+sub sanitize (@) {
        my %params=@_;
+
+       if (exists $config{htmlscrubber_skip} &&
+           length $config{htmlscrubber_skip} &&
+           exists $params{destpage} &&
+           pagespec_match($params{destpage}, $config{htmlscrubber_skip})) {
+               return $params{content};
+       }
+
        return scrubber()->scrub($params{content});
-} # }}}
+}
 
 my $_scrubber;
-sub scrubber { #{{{
+sub scrubber {
        return $_scrubber if defined $_scrubber;
 
        eval q{use HTML::Scrubber};
@@ -86,6 +112,6 @@ sub scrubber { #{{{
                }],
        );
        return $_scrubber;
-} # }}}
+}
 
 1