#!/usr/local/bin/perl use 5.010; use strict; use warnings; use LWP::UserAgent; use XML::LibXML; use URI; my $link = 'http://japan.cnet.com/news/business/story/0,3800104746,20416479-0,00.htm'; my $ua = LWP::UserAgent->new; my $res = $ua->get($link); die $res->status_line unless $res->is_success; say fixlinx( $res->content, $link ); sub fixlinx { my ( $html, $base ) = @_; local $SIG{__WARN__} = sub { }; # to keep LibXML quiet my $parser = XML::LibXML->new( suppress_errors => 1, suppress_warnings => 1, recover => 2, ); my $dom = $parser->parse_html_string($html); for my $node ( $dom->getElementsByTagName('a') ) { next unless my $href = $node->getAttribute('href'); $node->setAttribute( 'href' => URI->new_abs( $href, $base ) ); } for my $node ( $dom->getElementsByTagName('img') ) { next unless my $src = $node->getAttribute('src'); $node->setAttribute( 'src' => URI->new_abs( $src, $base ) ); } return $dom->toStringHTML; }