- # what is the non path part of the url?
- my $top_uri = URI->new($url);
- $top_uri->path_query(""); # reset the path
- my $urltop = $top_uri->as_string;
-
- $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(#[^"]+)"/$1 href="$baseurl$2"/mig;
- # relative to another wiki page
- $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(?!\w+:)([^\/][^"]*)"/$1 href="$url$2"/mig;
- $content=~s/(<img(?:\s+(?:class|id|width|height)\s*="?\w+"?)*)\s+src=\s*"(?!\w+:)([^\/][^"]*)"/$1 src="$url$2"/mig;
- # relative to the top of the site
- $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(?!\w+:)(\/[^"]*)"/$1 href="$urltop$2"/mig;
- $content=~s/(<img(?:\s+(?:class|id|width|height)\s*="?\w+"?)*)\s+src=\s*"(?!\w+:)(\/[^"]*)"/$1 src="$urltop$2"/mig;
- return $content;
+ eval q{use HTML::Parser; use HTML::Tagset};
+ die $@ if $@;
+ my $p = HTML::Parser->new(api_version => 3);
+ $p->handler(default => sub { $ret.=join("", @_) }, "text");
+ $p->handler(start => sub {
+ my ($tagname, $pos, $text) = @_;
+ if (ref $HTML::Tagset::linkElements{$tagname}) {
+ while (4 <= @$pos) {
+ # use attribute sets from right to left
+ # to avoid invalidating the offsets
+ # when replacing the values
+ my ($k_offset, $k_len, $v_offset, $v_len) =
+ splice(@$pos, -4);
+ my $attrname = lc(substr($text, $k_offset, $k_len));
+ next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
+ next unless $v_offset; # 0 v_offset means no value
+ my $v = substr($text, $v_offset, $v_len);
+ $v =~ s/^([\'\"])(.*)\1$/$2/;
+ eval q{use HTML::Entities};
+ my $dv = decode_entities($v);
+ if ($dv=~/^#/) {
+ $v=$baseurl.$v; # anchor
+ }
+ elsif ($dv=~/^(?!\w+:)[^\/]/) {
+ $v=$url.$v; # relative url
+ }
+ elsif ($dv=~/^\//) {
+ if (! defined $urltop) {
+ # what is the non path part of the url?
+ my $top_uri = URI->new($url);
+ $top_uri->path_query(""); # reset the path
+ $urltop = $top_uri->as_string;
+ }
+ $v=$urltop.$v; # url relative to top of site
+ }
+ $v =~ s/\"/"/g; # since we quote with ""
+ substr($text, $v_offset, $v_len) = qq("$v");
+ }
+ }
+ $ret.=$text;
+ }, "tagname, tokenpos, text");
+ $p->parse($html);
+ $p->eof;
+
+ return $ret;