$autofiles{$file}{generator}=$generator;
}
-sub useragent () {
+sub useragent (@) {
+ my %params = @_;
+ my $for_url = delete $params{for_url};
+ # Fail safe, in case a plugin calling this function is relying on
+ # a future parameter to make the UA more strict
+ foreach my $key (keys %params) {
+ error "Internal error: useragent(\"$key\" => ...) not understood";
+ }
+
eval q{use LWP};
error($@) if $@;
- return LWP::UserAgent->new(
- cookie_jar => $config{cookiejar},
- env_proxy => 1, # respect proxy env vars
+ my %args = (
agent => $config{useragent},
+ cookie_jar => $config{cookiejar},
+ env_proxy => 0,
protocols_allowed => [qw(http https)],
);
+ my %proxies;
+
+ if (defined $for_url) {
+ # We know which URL we're going to fetch, so we can choose
+ # whether it's going to go through a proxy or not.
+ #
+ # We reimplement http_proxy, https_proxy and no_proxy here, so
+ # that we are not relying on LWP implementing them exactly the
+ # same way we do.
+
+ eval q{use URI};
+ error($@) if $@;
+
+ my $proxy;
+ my $uri = URI->new($for_url);
+
+ if ($uri->scheme eq 'http') {
+ $proxy = $ENV{http_proxy};
+ # HTTP_PROXY is deliberately not implemented
+ # because the HTTP_* namespace is also used by CGI
+ }
+ elsif ($uri->scheme eq 'https') {
+ $proxy = $ENV{https_proxy};
+ $proxy = $ENV{HTTPS_PROXY} unless defined $proxy;
+ }
+ else {
+ $proxy = undef;
+ }
+
+ foreach my $var (qw(no_proxy NO_PROXY)) {
+ my $no_proxy = $ENV{$var};
+ if (defined $no_proxy) {
+ foreach my $domain (split /\s*,\s*/, $no_proxy) {
+ if ($domain =~ s/^\*?\.//) {
+ # no_proxy="*.example.com" or
+ # ".example.com": match suffix
+ # against .example.com
+ if ($uri->host =~ m/(^|\.)\Q$domain\E$/i) {
+ $proxy = undef;
+ }
+ }
+ else {
+ # no_proxy="example.com":
+ # match exactly example.com
+ if (lc $uri->host eq lc $domain) {
+ $proxy = undef;
+ }
+ }
+ }
+ }
+ }
+
+ if (defined $proxy) {
+ $proxies{$uri->scheme} = $proxy;
+ # Paranoia: make sure we can't bypass the proxy
+ $args{protocols_allowed} = [$uri->scheme];
+ }
+ }
+ else {
+ # The plugin doesn't know yet which URL(s) it's going to
+ # fetch, so we have to make some conservative assumptions.
+ my $http_proxy = $ENV{http_proxy};
+ my $https_proxy = $ENV{https_proxy};
+ $https_proxy = $ENV{HTTPS_PROXY} unless defined $https_proxy;
+
+ # We don't respect no_proxy here: if we are not using the
+ # paranoid user-agent, then we need to give the proxy the
+ # opportunity to reject undesirable requests.
+
+ # If we have one, we need the other: otherwise, neither
+ # LWPx::ParanoidAgent nor the proxy would have the
+ # opportunity to filter requests for the other protocol.
+ if (defined $https_proxy && defined $http_proxy) {
+ %proxies = (http => $http_proxy, https => $https_proxy);
+ }
+ elsif (defined $https_proxy) {
+ %proxies = (http => $https_proxy, https => $https_proxy);
+ }
+ elsif (defined $http_proxy) {
+ %proxies = (http => $http_proxy, https => $http_proxy);
+ }
+
+ }
+
+ if (scalar keys %proxies) {
+ # The configured proxy is responsible for deciding which
+ # URLs are acceptable to fetch and which URLs are not.
+ my $ua = LWP::UserAgent->new(%args);
+ foreach my $scheme (@{$ua->protocols_allowed}) {
+ unless ($proxies{$scheme}) {
+ error "internal error: $scheme is allowed but has no proxy";
+ }
+ }
+ # We can't pass the proxies in %args because that only
+ # works since LWP 6.24.
+ foreach my $scheme (keys %proxies) {
+ $ua->proxy($scheme, $proxies{$scheme});
+ }
+ return $ua;
+ }
+
+ eval q{use LWPx::ParanoidAgent};
+ if ($@) {
+ print STDERR "warning: installing LWPx::ParanoidAgent is recommended\n";
+ return LWP::UserAgent->new(%args);
+ }
+ return LWPx::ParanoidAgent->new(%args);
}
sub sortspec_translate ($$) {
--- /dev/null
+#!/usr/bin/perl
+use warnings;
+use strict;
+use Test::More;
+
+my $have_paranoid_agent;
+BEGIN {
+ plan(skip_all => 'LWP not available')
+ unless eval q{
+ use LWP qw(); 1;
+ };
+ use_ok("IkiWiki");
+ $have_paranoid_agent = eval q{
+ use LWPx::ParanoidAgent qw(); 1;
+ };
+}
+
+eval { useragent(future_feature => 1); };
+ok($@, 'future features should cause useragent to fail');
+
+diag "==== No proxy ====";
+delete $ENV{http_proxy};
+delete $ENV{https_proxy};
+delete $ENV{no_proxy};
+delete $ENV{HTTPS_PROXY};
+delete $ENV{NO_PROXY};
+
+diag "---- Unspecified URL ----";
+my $ua = useragent(for_url => undef);
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef, 'No http proxy');
+is($ua->proxy('https'), undef, 'No https proxy');
+
+diag "---- Specified URL ----";
+$ua = useragent(for_url => 'http://example.com');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef, 'No http proxy');
+is($ua->proxy('https'), undef, 'No https proxy');
+
+diag "==== Proxy for everything ====";
+$ENV{http_proxy} = 'http://proxy:8080';
+$ENV{https_proxy} = 'http://sproxy:8080';
+delete $ENV{no_proxy};
+delete $ENV{HTTPS_PROXY};
+delete $ENV{NO_PROXY};
+
+diag "---- Unspecified URL ----";
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
+$ua = useragent(for_url => 'http://example.com');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+# We don't care what $ua->proxy('https') is, because it won't be used
+$ua = useragent(for_url => 'https://example.com');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+# We don't care what $ua->proxy('http') is, because it won't be used
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
+
+diag "==== Selective proxy ====";
+$ENV{http_proxy} = 'http://proxy:8080';
+$ENV{https_proxy} = 'http://sproxy:8080';
+$ENV{no_proxy} = '*.example.net,example.com,.example.org';
+delete $ENV{HTTPS_PROXY};
+delete $ENV{NO_PROXY};
+
+diag "---- Unspecified URL ----";
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
+
+diag "---- Exact match for no_proxy ----";
+$ua = useragent(for_url => 'http://example.com');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- Subdomain of exact domain in no_proxy ----";
+$ua = useragent(for_url => 'http://sub.example.com');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+
+diag "---- example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- sub.example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://sub.example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- badexample.net does not match *.example.net ----";
+$ua = useragent(for_url => 'https://badexample.net');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+diag "---- example.org matches .example.org ----";
+$ua = useragent(for_url => 'https://example.org');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- sub.example.org matches .example.org ----";
+$ua = useragent(for_url => 'https://sub.example.org');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- badexample.org does not match .example.org ----";
+$ua = useragent(for_url => 'https://badexample.org');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+diag "==== Selective proxy (alternate variables) ====";
+$ENV{http_proxy} = 'http://proxy:8080';
+delete $ENV{https_proxy};
+$ENV{HTTPS_PROXY} = 'http://sproxy:8080';
+delete $ENV{no_proxy};
+$ENV{NO_PROXY} = '*.example.net,example.com,.example.org';
+
+diag "---- Unspecified URL ----";
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
+
+diag "---- Exact match for no_proxy ----";
+$ua = useragent(for_url => 'http://example.com');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- Subdomain of exact domain in no_proxy ----";
+$ua = useragent(for_url => 'http://sub.example.com');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+
+diag "---- example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- sub.example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://sub.example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- badexample.net does not match *.example.net ----";
+$ua = useragent(for_url => 'https://badexample.net');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+diag "---- example.org matches .example.org ----";
+$ua = useragent(for_url => 'https://example.org');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- sub.example.org matches .example.org ----";
+$ua = useragent(for_url => 'https://sub.example.org');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- badexample.org does not match .example.org ----";
+$ua = useragent(for_url => 'https://badexample.org');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+diag "==== Selective proxy (many variables) ====";
+$ENV{http_proxy} = 'http://proxy:8080';
+$ENV{https_proxy} = 'http://sproxy:8080';
+# This one should be ignored in favour of https_proxy
+$ENV{HTTPS_PROXY} = 'http://not.preferred.proxy:3128';
+# These two should be merged
+$ENV{no_proxy} = '*.example.net,example.com';
+$ENV{NO_PROXY} = '.example.org';
+
+diag "---- Unspecified URL ----";
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use CONNECT proxy');
+
+diag "---- Exact match for no_proxy ----";
+$ua = useragent(for_url => 'http://example.com');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- Subdomain of exact domain in no_proxy ----";
+$ua = useragent(for_url => 'http://sub.example.com');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+
+diag "---- example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- sub.example.net matches *.example.net ----";
+$ua = useragent(for_url => 'https://sub.example.net');
+SKIP: {
+ skip 'paranoid agent not available', 1 unless $have_paranoid_agent;
+ ok($ua->isa('LWPx::ParanoidAgent'), 'uses ParanoidAgent if possible');
+}
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), undef);
+is($ua->proxy('https'), undef);
+
+diag "---- badexample.net does not match *.example.net ----";
+$ua = useragent(for_url => 'https://badexample.net');
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(https)]);
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+diag "==== One but not the other ====\n";
+$ENV{http_proxy} = 'http://proxy:8080';
+delete $ENV{https_proxy};
+delete $ENV{HTTPS_PROXY};
+delete $ENV{no_proxy};
+delete $ENV{NO_PROXY};
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://proxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://proxy:8080', 'should use proxy');
+
+delete $ENV{http_proxy};
+$ENV{https_proxy} = 'http://sproxy:8080';
+delete $ENV{HTTPS_PROXY};
+delete $ENV{no_proxy};
+delete $ENV{NO_PROXY};
+$ua = useragent(for_url => undef);
+ok(! $ua->isa('LWPx::ParanoidAgent'), 'should use proxy instead of ParanoidAgent');
+is_deeply([sort @{$ua->protocols_allowed}], [sort qw(http https)]);
+is($ua->proxy('http'), 'http://sproxy:8080', 'should use proxy');
+is($ua->proxy('https'), 'http://sproxy:8080', 'should use proxy');
+
+done_testing;