From 65fde136f9b4cf5c15fc5cc59f31b6aaf66f7342 Mon Sep 17 00:00:00 2001 From: h264 Date: Sun, 26 May 2024 01:40:13 -0400 Subject: [PATCH] stable script --- .gitignore | 1 + DDtracklist.pl | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 9 +++++++++ 3 files changed, 61 insertions(+) create mode 100644 .gitignore create mode 100644 DDtracklist.pl create mode 100644 README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4f1e0c9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +tracklist.txt diff --git a/DDtracklist.pl b/DDtracklist.pl new file mode 100644 index 0000000..b5c3af0 --- /dev/null +++ b/DDtracklist.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl +use strict; +use warnings; +use HTML::TableExtract; +use LWP::Simple; +use feature "unicode_strings"; +use open ':std', ':encoding(UTF-8)'; + +my $content; +my $url; + +binmode STDOUT, ":utf8"; + +print "Enter a webpage to extract:\n"; +$url = ; +$content = get( $url ); +die "Couldn't get URL" unless defined $content; + +open( my $fh, ">:utf8", 'tracklist.txt'); + +my $te = HTML::TableExtract->new(); +$te->parse( $content ); + +foreach my $ts ( $te->tables() ) +{ + foreach my $row ( $ts->rows() ) + { + my $count = 0; + foreach my $element ( @$row ) + { + $element =~ s/^\s+|\s+$//g; #remove all whitespace + if ($count == 1) + { + print "$element/"; + print $fh "$element/"; + } + if ($count == 2) + { + print "$element"; + print $fh "$element"; + } + $count++; + } + print "\n"; + print $fh "\n"; + } +} + +close $fh; + + diff --git a/README.md b/README.md new file mode 100644 index 0000000..fbdc642 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +A script for pulling the tracklist of albums I purchase from diverse.direct. + +Run the script using the perl interpreter and enter the full url to the page of the CD you wish you dump the tracklist info. + +The output is dumped into the tracklist file. + +The file is overwritten on each run. + +I added some text here to test something...