#!/usr/bin/perl -s use strict; use vars qw( $b $f ); use HTML::SimpleLinkExtor; my $extor = HTML::SimpleLinkExtor->new( defined $b ? $b : () ); my $html = do { if( defined $f ) { require LWP::Simple; LWP::Simple::get( $f ); } else { local $/; <> } }; unless( defined $html ) { die "No input!\n"; } $extor->parse( $html ); { local $, = "\n"; print $extor->links, ''; } =pod =head1 NAME linktractor - extract links from HTML =head1 SYNOPSIS % linktractor fileA.html fileB.html % linktractor -f=http://www.perl.com % lwp-request http://www.example.com | linktractor % lwp-request http://www.example.com | linktractor -b=http://www.example.com =head1 DESCRIPTION This is a small script that uses HTML::SimpleLinkExtractor to pull all the HTML links out of the input HTML. It can take input from files you specify on the command line (or standard input), or fetch a URL. =head2 Options =over 4 =item -b=base The C<-b> switch sets the base URL to resolve relative URLs in the input. =item -f=fetch_url Instead of reading from files specified on the command line or standard input, fetch this URL and use it as input. =back =head1 SOURCE AVAILABILITY This source is part of a SourceForge project which always has the latest sources in CVS, as well as all of the previous releases. http://sourceforge.net/projects/brian-d-foy/ If, for some reason, I disappear from the world, one of the other members of the project can shepherd this module appropriately. =head1 AUTHORS brian d foy, C<< >> =head1 COPYRIGHT Copyright (c) 2007 brian d foy. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 LICENSE You may use HTML::SimpleLinkExtor under the same terms as Perl itself. =cut 1;