]> git.vanrenterghem.biz Git - git.ikiwiki.info.git/blob - IkiWiki/Plugin/blogspam.pm
useragent: Automatically choose whether to use LWPx::ParanoidAgent
[git.ikiwiki.info.git] / IkiWiki / Plugin / blogspam.pm
1 #!/usr/bin/perl
2 package IkiWiki::Plugin::blogspam;
4 use warnings;
5 use strict;
6 use IkiWiki 3.00;
7 use Encode;
9 my $defaulturl='http://test.blogspam.net:9999/';
10 my $client;
12 sub import {
13         hook(type => "getsetup", id => "blogspam",  call => \&getsetup);
14         hook(type => "checkconfig", id => "blogspam", call => \&checkconfig);
15         hook(type => "checkcontent", id => "blogspam", call => \&checkcontent);
16 }
18 sub getsetup () {
19         return
20                 plugin => {
21                         safe => 1,
22                         rebuild => 0,
23                         section => "auth",
24                 },
25                 blogspam_pagespec => {
26                         type => 'pagespec',
27                         example => 'postcomment(*)',
28                         description => 'PageSpec of pages to check for spam',
29                         link => 'ikiwiki/PageSpec',
30                         safe => 1,
31                         rebuild => 0,
32                 },
33                 blogspam_options => {
34                         type => "string",
35                         example => "blacklist=1.2.3.4,blacklist=8.7.6.5,max-links=10",
36                         description => "options to send to blogspam server",
37                         link => "http://blogspam.net/api/2.0/testComment.html#options",
38                         safe => 1,
39                         rebuild => 0,
40                 },
41                 blogspam_server => {
42                         type => "string",
43                         default => $defaulturl,
44                         description => "blogspam server JSON url",
45                         safe => 1,
46                         rebuild => 0,
47                 },
48 }
50 sub checkconfig () {
51         # This is done at checkconfig time because printing an error
52         # if the module is missing when a spam is posted would not
53         # let the admin know about the problem.
54         eval q{
55                 use JSON;
56                 use HTTP::Request;
57         };
58         error $@ if $@;
60         # Using the for_url parameter makes sure we crash if used
61         # with an older IkiWiki.pm that didn't automatically try
62         # to use LWPx::ParanoidAgent.
63         $client=useragent(for_url => $config{blogspam_server});
64 }
66 sub checkcontent (@) {
67         my %params=@_;
68         my $session=$params{session};
69         
70         my $spec='!admin()';
71         if (exists $config{blogspam_pagespec} &&
72             length $config{blogspam_pagespec}) {
73                 $spec.=" and (".$config{blogspam_pagespec}.")";
74         }
76         my $user=$session->param("name");
77         return undef unless pagespec_match($params{page}, $spec,
78                 (defined $user ? (user => $user) : ()),
79                 (defined $session->remote_addr() ? (ip => $session->remote_addr()) : ()),
80                 location => $params{page});
82         my $url=$defaulturl;
83         $url = $config{blogspam_server} if exists $config{blogspam_server};
85         my @options = split(",", $config{blogspam_options})
86                 if exists $config{blogspam_options};
88         # Allow short comments and whitespace-only edits, unless the user
89         # has overridden min-words themselves.
90         push @options, "min-words=0"
91                 unless grep /^min-words=/i, @options;
92         # Wiki pages can have a lot of urls, unless the user specifically
93         # wants to limit them.
94         push @options, "exclude=lotsaurls"
95                 unless grep /^max-links/i, @options;
96         # Unless the user specified a size check, disable such checking.
97         push @options, "exclude=size"
98                 unless grep /^(?:max|min)-size/i, @options;
99         # This test has absurd false positives on words like "alpha"
100         # and "buy".
101         push @options, "exclude=stopwords";
103         my %req=(
104                 ip => $session->remote_addr(),
105                 comment => encode_utf8(defined $params{diff} ? $params{diff} : $params{content}),
106                 subject => encode_utf8(defined $params{subject} ? $params{subject} : ""),
107                 name => encode_utf8(defined $params{author} ? $params{author} : ""),
108                 link => encode_utf8(exists $params{url} ? $params{url} : ""),
109                 options => join(",", @options),
110                 site => encode_utf8($config{url}),
111                 version => "ikiwiki ".$IkiWiki::version,
112         );
113         eval q{use JSON; use HTTP::Request}; # errors handled in checkconfig()
114         my $res = $client->request(
115                 HTTP::Request->new(
116                         'POST',
117                         $url,
118                         [ 'Content-Type' => 'application/json' ],
119                         to_json(\%req),
120                 ),
121         );
123         if (! ref $res || ! $res->is_success()) {
124                 debug("failed to get response from blogspam server ($url)");
125                 return undef;
126         }
127         my $details = from_json($res->content);
128         if ($details->{result} eq 'SPAM') {
129                 eval q{use Data::Dumper};
130                 debug("blogspam server reports $details->{reason}: ".Dumper(\%req));
131                 return gettext("Sorry, but that looks like spam to <a href=\"http://blogspam.net/\">blogspam</a>: ").$details->{reason};
132         }
133         elsif ($details->{result} ne 'OK') {
134                 debug("blogspam server failure: ".$res->content);
135                 return undef;
136         }
137         else {
138                 return undef;
139         }