#!/usr/bin/perl -w # # Build the database of info (in plain text format so it can be easily # edited by hand) about all the doctor who episodes. # # Use -f option to force updating time and size of all video files when # doing the update. # use strict; my $topdir="/huge/vids"; my $whodir="$topdir/DoctorWho"; my $dbdir="$whodir/.data"; my $dbfile="$dbdir/allinfo.txt"; my %db; my %dirdb; # Get PATH set to include this script's directory and other useful bits my $newpath=`dirname $0`; chomp($newpath); $newpath=`$newpath/echo-path`; chomp($newpath); $ENV{'PATH'}=$newpath; my $force = (scalar(@ARGV) > 0) && ($ARGV[0] eq "-f"); # Utility routine to check file1 to see if it is newer than file2 # always returns 1 if either file does not exist. # sub samefile { my $file1 = shift; my $file2 = shift; my @f1 = stat($file1); my @f2 = stat($file2); return ((scalar(@f1) > 1) && (scalar(@f2) > 1) && ($f1[1] == $f2[1]) && ($f1[0] == $f2[0])); } # Utility routine to check file1 to see if it is the same as file2 # (i.e. same dev and inode). # sub newerthan { my $file1 = shift; my $file2 = shift; my @f1 = stat($file1); my @f2 = stat($file2); return (scalar(@f1) < 10) || (scalar(@f2) < 10) || ($f1[9] > $f2[9]); } # Utility routine to run midentify on video file and return length # in seconds. # sub getvidseconds { my $vidfile = shift; my $avh; my $seconds; if (open($avh, '-|', 'midentify', "$vidfile")) { while (<$avh>) { if (/^ID_LENGTH=(.+)$/) { $seconds = $1; } } close($avh); } return $seconds; } # Read the info.txt file with the html table data copied from the # original TiVo directory web page. Return the episode description # with various formatting cleaned up. # sub getdescription { my $dirname = shift; my $inf; my $description; if (open($inf, '<', "$whodir/$dirname/info.txt")) { while (<$inf>) { if (/Doctor Who:/) { chomp; # Remove boring and repetitive copyright notice at end. s/Copyright.*\<\/td\>/\<\/td\>/; # Remove leading and trailing table data html tags s/^\s*\]*\>//; s/\<\/td\>//; # And leading and trailing spaces. s/^\s+//; s/\s+$//; $description = $_; last; } } close($inf); } return $description; } # Check for the .edl file generated by running comskip. If it exists and the # first commercial skip starts within 2 seconds of the beginning of the mpg # file, then it is "good", otherwise it is "bad". # # I now convert the .edl into a .keep, and if I've created a .keep with # no guess- prefix, then there are no edl errors (because I manually fixed # them in the .keep file). # sub edlstatus { my $dirname = shift; my $basefile = shift; my $inf; my $edl='bad'; if (-f "$whodir/$dirname/$basefile.keep") { $edl = 'good'; } else { if (open($inf, '<', "$whodir/$dirname/$basefile.edl")) { my $line = <$inf>; chomp($line); my @ed = split(' ', $line); if (scalar(@ed) == 3) { if ($ed[0] <= 2.0) { $edl='good'; } } } } return $edl; } # What with marking some directories bad and asking to re-record # episodes (or the TiVo just re-recording them by itself), I might # have multiple directories with copies of the full .mpg file. This # routine records the directory info for each directory under a # top level hash indexed by the basename. I can sort through any # duplicates and decide which directory has the best copy to include # in the final database file. # sub accumdirinfo { my $basefile = shift; my $recdate = shift; my $dirname = shift; my $goodir = shift; my $topr = $dirdb{$basefile}; if (! defined($topr)) { $topr = {}; $dirdb{$basefile} = $topr; } my $r = {}; $topr->{$dirname} = $r; my $mainr = $db{$basefile}; if (! defined($mainr)) { $db{$basefile} = $r; } $r->{'score'} = 0; if (defined($mainr) && exists($mainr->{'mpgdirname'}) && ($mainr->{'mpgdirname'} eq $dirname) && (! newerthan("$whodir/$dirname/$basefile.mpg", $dbfile))) { if (exists($mainr->{'edl'})) { $r->{'edl'} = $mainr->{'edl'}; } if (exists($mainr->{'mpgseconds'})) { $r->{'mpgseconds'} = $mainr->{'mpgseconds'}; } if (exists($mainr->{'description'})) { $r->{'description'} = $mainr->{'description'}; } if (exists($mainr->{'mpgsize'})) { $r->{'mpgsize'} = $mainr->{'mpgsize'}; } if (exists($mainr->{'mpg'})) { $r->{'mpg'} = $mainr->{'mpg'}; } $r->{'score'} += 2000; } $r->{'mpgdirname'} = $dirname; $r->{'recdate'} = $recdate; if (! $goodir) { $r->{'score'} -= 1000; } if (! exists($r->{'edl'})) { $r->{'edl'} = edlstatus($dirname, $basefile); if ($r->{'edl'} eq 'bad') { $r->{'score'} -= 100; } } if ($force || (! exists($r->{'mpgseconds'}))) { my $seconds = getvidseconds("$whodir/$dirname/$basefile.mpg"); if (defined($seconds)) { $r->{'mpgseconds'} = $seconds; if ($seconds >= (61.9*60)) { $r->{'score'} += 200; } elsif ($seconds < (60.1*60)) { $r->{'score'} -= 200; } } else { $r->{'score'} -= 500; } } if (! exists($r->{'description'})) { my $description = getdescription($dirname); if (defined($description)) { $r->{'description'} = $description; } } if ($force || (! exists($r->{'mpgsize'}))) { my @mpstat = stat("$whodir/$dirname/$basefile.mpg"); if (scalar(@mpstat) > 7) { $r->{'mpgsize'} = $mpstat[7]; $r->{'mpg'} = 1; } } if (! exists($r->{'mpg'})) { if (-f "$whodir/$dirname/$basefile.mpg") { $r->{'mpg'} = 1; } } if (defined($mainr)) { my $airdate = $mainr->{'airdate'}; if (defined($airdate) && samefile("$whodir/$airdate-$basefile.avi", "$whodir/$dirname/$basefile.avi")) { $r->{'score'} += 1000000; } } } # Read in the existing database (if any) to start with known data # (which may no longer exist anywhere else). Do not record the 'avi' # and 'mpg' flags - need to verify the existence of such files # again during the rebuild of the database. my $fh; my $r; if (open($fh, '<', $dbfile)) { while (<$fh>) { chomp; if (/^\[(.+)\]$/) { my $basename = $1; $r = {}; $db{$basename} = $r; } elsif (/^([A-Za-z0-9_]+)=(.+)$/) { if (defined($r)) { my $key = $1; my $val = $2; if (! (($key eq 'avi') || ($key eq 'mpg'))) { $r->{$key} = $val; } } } } close($fh); undef($fh); undef($r); } # Read the list of .avi files in $whodir to find any basenames and # airdates that might not already be in the database. If the .avi file # is newer than the database file, also update the length info in # the database. my $dh; my @whodirnames; if (opendir($dh, $whodir)) { @whodirnames = readdir($dh); closedir($dh); undef $dh; } my $n; foreach $n (@whodirnames) { if ($n=~/^(\d+-\d+-\d+)-([A-Za-z0-9_]+)\.avi$/) { my $airdate = $1; my $basename = $2; $r = $db{$basename}; if (! defined($r)) { $r = {}; $db{$basename} = $r; } if (! exists($r->{'airdate'})) { $r->{'airdate'} = $airdate; } my $newavi = newerthan("$whodir/$n",$dbfile); if ((! exists($r->{'aviseconds'})) || $newavi || $force) { my $aviseconds = getvidseconds("$whodir/$n"); if (defined($aviseconds)) { $r->{'aviseconds'} = $aviseconds; } } if ((! exists($r->{'avisize'})) || $newavi || $force) { $r->{'avisize'} = (stat("$whodir/$n"))[7]; } $r->{'avi'} = 1; } } # Now read the directories to find any original .mpg files and other # info stashed in the download directories. foreach $n (@whodirnames) { if ($n=~/^(\d+-\d+-\d+)-\d+-([A-Za-z0-9_]+)$/) { accumdirinfo($2, $1, $n, 1); } elsif ($n=~/^bad-(\d+-\d+-\d+)-\d+-([A-Za-z0-9_]+)$/) { accumdirinfo($2, $1, $n, 0); } } # Go through all the info gathered from directories and pick the # highest score directory to copy info into main database. my $bn; foreach $bn (keys(%dirdb)) { my $topr = $dirdb{$bn}; my $dirname; my $topscore; my $topref; foreach $dirname (keys(%{$topr})) { my $r = $topr->{$dirname}; if ((! defined($topscore)) || ($topscore < $r->{'score'})) { $topscore = $r->{'score'}; $topref = $r; } } my $mainr = $db{$bn}; if (defined($topref) && defined($mainr)) { my $key; foreach $key (keys(%{$topref})) { if ($key ne 'score') { my $val = $topref->{$key}; $mainr->{$key} = $val; } } } } # Save new db (keeping backup) sub compare_airdate { my $ada = $db{$a}->{'airdate'}; my $adb = $db{$b}->{'airdate'}; if (! defined($ada)) { $ada=''; } if (! defined($adb)) { $adb=''; } my $rval = $ada cmp $adb; if ($rval == 0) { $rval = $a cmp $b; } return $rval; } my $dbtemp="$dbfile.$$"; my $dbh; my $missing_airdate = 0; if (open($dbh, '>', $dbtemp)) { foreach $bn (sort compare_airdate keys %db) { $r = $db{$bn}; if (! exists($r->{'airdate'})) { $missing_airdate = 1; } print $dbh "\n[$bn]\n"; my $key; my $val; foreach $key (sort(keys(%{$r}))) { $val = $r->{$key}; print $dbh "$key=$val\n"; } } close($dbh); unlink("$dbfile.bak"); link($dbfile,"$dbfile.bak"); unlink($dbfile); link($dbtemp,$dbfile); unlink($dbtemp); } if ($missing_airdate) { # I'm missing airdates for some episodes, update the database # from the Doctor Who wiki page to fill in any new airdates # discovered since the last time I did this. system("update-airdates"); }