+ my $urltop; # calculated if needed
+
+ my $ret="";
+
+ eval q{use HTML::Parser; use HTML::Tagset};
+ die $@ if $@;
+ my $p = HTML::Parser->new(api_version => 3);
+ $p->handler(default => sub { $ret.=join("", @_) }, "text");
+ $p->handler(start => sub {
+ my ($tagname, $pos, $text) = @_;
+ if (ref $HTML::Tagset::linkElements{$tagname}) {
+ while (4 <= @$pos) {
+ # use attribute sets from right to left
+ # to avoid invalidating the offsets
+ # when replacing the values
+ my ($k_offset, $k_len, $v_offset, $v_len) =
+ splice(@$pos, -4);
+ my $attrname = lc(substr($text, $k_offset, $k_len));
+ next unless grep { $_ eq $attrname } @{$HTML::Tagset::linkElements{$tagname}};
+ next unless $v_offset; # 0 v_offset means no value
+ my $v = substr($text, $v_offset, $v_len);
+ $v =~ s/^([\'\"])(.*)\1$/$2/;
+ if ($v=~/^#/) {
+ $v=$baseurl.$v; # anchor
+ }
+ elsif ($v=~/^(?!\w+:)[^\/]/) {
+ $v=$url.$v; # relative url
+ }
+ elsif ($v=~/^\//) {
+ if (! defined $urltop) {
+ # what is the non path part of the url?
+ my $top_uri = URI->new($url);
+ $top_uri->path_query(""); # reset the path
+ $urltop = $top_uri->as_string;
+ }
+ $v=$urltop.$v; # url relative to top of site
+ }
+ $v =~ s/\"/"/g; # since we quote with ""
+ substr($text, $v_offset, $v_len) = qq("$v");
+ }
+ }
+ $ret.=$text;
+ }, "tagname, tokenpos, text");
+ $p->parse($html);
+ $p->eof;
+
+ return $ret;
+}