]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blob - IkiWiki/Plugin/blogspam.pm
useragent: Don't allow non-HTTP protocols to be used
[git.ikiwiki.info.git] / IkiWiki / Plugin / blogspam.pm
1 #!/usr/bin/perl
2 package IkiWiki::Plugin::blogspam;
4 use warnings;
5 use strict;
6 use IkiWiki 3.00;
7 use Encode;
9 my $defaulturl='http://test.blogspam.net:9999/';
10 my $client;
12 sub import {
13         hook(type => "getsetup", id => "blogspam",  call => \&getsetup);
14         hook(type => "checkconfig", id => "blogspam", call => \&checkconfig);
15         hook(type => "checkcontent", id => "blogspam", call => \&checkcontent);
16 }
18 sub getsetup () {
19         return
20                 plugin => {
21                         safe => 1,
22                         rebuild => 0,
23                         section => "auth",
24                 },
25                 blogspam_pagespec => {
26                         type => 'pagespec',
27                         example => 'postcomment(*)',
28                         description => 'PageSpec of pages to check for spam',
29                         link => 'ikiwiki/PageSpec',
30                         safe => 1,
31                         rebuild => 0,
32                 },
33                 blogspam_options => {
34                         type => "string",
35                         example => "blacklist=1.2.3.4,blacklist=8.7.6.5,max-links=10",
36                         description => "options to send to blogspam server",
37                         link => "http://blogspam.net/api/2.0/testComment.html#options",
38                         safe => 1,
39                         rebuild => 0,
40                 },
41                 blogspam_server => {
42                         type => "string",
43                         default => $defaulturl,
44                         description => "blogspam server JSON url",
45                         safe => 1,
46                         rebuild => 0,
47                 },
48 }
50 sub checkconfig () {
51         # This is done at checkconfig time because printing an error
52         # if the module is missing when a spam is posted would not
53         # let the admin know about the problem.
54         eval q{
55                 use JSON;
56                 use HTTP::Request;
57         };
58         error $@ if $@;
60         eval q{use LWPx::ParanoidAgent};
61         if (!$@) {
62                 $client=LWPx::ParanoidAgent->new(agent => $config{useragent});
63         }
64         else {
65                 eval q{use LWP};
66                 if ($@) {
67                         error $@;
68                         return;
69                 }
70                 $client=useragent();
71         }
72 }
74 sub checkcontent (@) {
75         my %params=@_;
76         my $session=$params{session};
77         
78         my $spec='!admin()';
79         if (exists $config{blogspam_pagespec} &&
80             length $config{blogspam_pagespec}) {
81                 $spec.=" and (".$config{blogspam_pagespec}.")";
82         }
84         my $user=$session->param("name");
85         return undef unless pagespec_match($params{page}, $spec,
86                 (defined $user ? (user => $user) : ()),
87                 (defined $session->remote_addr() ? (ip => $session->remote_addr()) : ()),
88                 location => $params{page});
90         my $url=$defaulturl;
91         $url = $config{blogspam_server} if exists $config{blogspam_server};
93         my @options = split(",", $config{blogspam_options})
94                 if exists $config{blogspam_options};
96         # Allow short comments and whitespace-only edits, unless the user
97         # has overridden min-words themselves.
98         push @options, "min-words=0"
99                 unless grep /^min-words=/i, @options;
100         # Wiki pages can have a lot of urls, unless the user specifically
101         # wants to limit them.
102         push @options, "exclude=lotsaurls"
103                 unless grep /^max-links/i, @options;
104         # Unless the user specified a size check, disable such checking.
105         push @options, "exclude=size"
106                 unless grep /^(?:max|min)-size/i, @options;
107         # This test has absurd false positives on words like "alpha"
108         # and "buy".
109         push @options, "exclude=stopwords";
111         my %req=(
112                 ip => $session->remote_addr(),
113                 comment => encode_utf8(defined $params{diff} ? $params{diff} : $params{content}),
114                 subject => encode_utf8(defined $params{subject} ? $params{subject} : ""),
115                 name => encode_utf8(defined $params{author} ? $params{author} : ""),
116                 link => encode_utf8(exists $params{url} ? $params{url} : ""),
117                 options => join(",", @options),
118                 site => encode_utf8($config{url}),
119                 version => "ikiwiki ".$IkiWiki::version,
120         );
121         eval q{use JSON; use HTTP::Request}; # errors handled in checkconfig()
122         my $res = $client->request(
123                 HTTP::Request->new(
124                         'POST',
125                         $url,
126                         [ 'Content-Type' => 'application/json' ],
127                         to_json(\%req),
128                 ),
129         );
131         if (! ref $res || ! $res->is_success()) {
132                 debug("failed to get response from blogspam server ($url)");
133                 return undef;
134         }
135         my $details = from_json($res->content);
136         if ($details->{result} eq 'SPAM') {
137                 eval q{use Data::Dumper};
138                 debug("blogspam server reports $details->{reason}: ".Dumper(\%req));
139                 return gettext("Sorry, but that looks like spam to <a href=\"http://blogspam.net/\">blogspam</a>: ").$details->{reason};
140         }
141         elsif ($details->{result} ne 'OK') {
142                 debug("blogspam server failure: ".$res->content);
143                 return undef;
144         }
145         else {
146                 return undef;
147         }