52 lines
901 B
Perl
52 lines
901 B
Perl
|
#!/usr/bin/perl
|
||
|
use strict;
|
||
|
use warnings;
|
||
|
use HTML::TableExtract;
|
||
|
use LWP::Simple;
|
||
|
use feature "unicode_strings";
|
||
|
use open ':std', ':encoding(UTF-8)';
|
||
|
|
||
|
my $content;
|
||
|
my $url;
|
||
|
|
||
|
binmode STDOUT, ":utf8";
|
||
|
|
||
|
print "Enter a webpage to extract:\n";
|
||
|
$url = <STDIN>;
|
||
|
$content = get( $url );
|
||
|
die "Couldn't get URL" unless defined $content;
|
||
|
|
||
|
open( my $fh, ">:utf8", 'tracklist.txt');
|
||
|
|
||
|
my $te = HTML::TableExtract->new();
|
||
|
$te->parse( $content );
|
||
|
|
||
|
foreach my $ts ( $te->tables() )
|
||
|
{
|
||
|
foreach my $row ( $ts->rows() )
|
||
|
{
|
||
|
my $count = 0;
|
||
|
foreach my $element ( @$row )
|
||
|
{
|
||
|
$element =~ s/^\s+|\s+$//g; #remove all whitespace
|
||
|
if ($count == 1)
|
||
|
{
|
||
|
print "$element/";
|
||
|
print $fh "$element/";
|
||
|
}
|
||
|
if ($count == 2)
|
||
|
{
|
||
|
print "$element";
|
||
|
print $fh "$element";
|
||
|
}
|
||
|
$count++;
|
||
|
}
|
||
|
print "\n";
|
||
|
print $fh "\n";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
close $fh;
|
||
|
|
||
|
|