#!/usr/bin/perl -w
#
# Build the database of info (in plain text format so it can be easily
# edited by hand) about all the doctor who episodes.
#
# Use -f option to force updating time and size of all video files when
# doing the update.
#
use strict;
my $topdir="/huge/vids";
my $whodir="$topdir/DoctorWho";
my $dbdir="$whodir/.data";
my $dbfile="$dbdir/allinfo.txt";
my %db;
my %dirdb;
# Get PATH set to include this script's directory and other useful bits
my $newpath=`dirname $0`;
chomp($newpath);
$newpath=`$newpath/echo-path`;
chomp($newpath);
$ENV{'PATH'}=$newpath;
my $force = (scalar(@ARGV) > 0) && ($ARGV[0] eq "-f");
# Utility routine to check file1 to see if it is newer than file2
# always returns 1 if either file does not exist.
#
sub samefile {
my $file1 = shift;
my $file2 = shift;
my @f1 = stat($file1);
my @f2 = stat($file2);
return ((scalar(@f1) > 1) && (scalar(@f2) > 1) && ($f1[1] == $f2[1]) &&
($f1[0] == $f2[0]));
}
# Utility routine to check file1 to see if it is the same as file2
# (i.e. same dev and inode).
#
sub newerthan {
my $file1 = shift;
my $file2 = shift;
my @f1 = stat($file1);
my @f2 = stat($file2);
return (scalar(@f1) < 10) || (scalar(@f2) < 10) || ($f1[9] > $f2[9]);
}
# Utility routine to run midentify on video file and return length
# in seconds.
#
sub getvidseconds {
my $vidfile = shift;
my $avh;
my $seconds;
if (open($avh, '-|', 'midentify', "$vidfile")) {
while (<$avh>) {
if (/^ID_LENGTH=(.+)$/) {
$seconds = $1;
}
}
close($avh);
}
return $seconds;
}
# Read the info.txt file with the html table data copied from the
# original TiVo directory web page. Return the episode description
# with various formatting cleaned up.
#
sub getdescription {
my $dirname = shift;
my $inf;
my $description;
if (open($inf, '<', "$whodir/$dirname/info.txt")) {
while (<$inf>) {
if (/Doctor Who:/) {
chomp;
# Remove boring and repetitive copyright notice at end.
s/Copyright.*\<\/td\>/\<\/td\>/;
# Remove leading and trailing table data html tags
s/^\s*\
]*\>//;
s/\<\/td\>//;
# And leading and trailing spaces.
s/^\s+//;
s/\s+$//;
$description = $_;
last;
}
}
close($inf);
}
return $description;
}
# Check for the .edl file generated by running comskip. If it exists and the
# first commercial skip starts within 2 seconds of the beginning of the mpg
# file, then it is "good", otherwise it is "bad".
#
# I now convert the .edl into a .keep, and if I've created a .keep with
# no guess- prefix, then there are no edl errors (because I manually fixed
# them in the .keep file).
#
sub edlstatus {
my $dirname = shift;
my $basefile = shift;
my $inf;
my $edl='bad';
if (-f "$whodir/$dirname/$basefile.keep") {
$edl = 'good';
} else {
if (open($inf, '<', "$whodir/$dirname/$basefile.edl")) {
my $line = <$inf>;
chomp($line);
my @ed = split(' ', $line);
if (scalar(@ed) == 3) {
if ($ed[0] <= 2.0) {
$edl='good';
}
}
}
}
return $edl;
}
# What with marking some directories bad and asking to re-record
# episodes (or the TiVo just re-recording them by itself), I might
# have multiple directories with copies of the full .mpg file. This
# routine records the directory info for each directory under a
# top level hash indexed by the basename. I can sort through any
# duplicates and decide which directory has the best copy to include
# in the final database file.
#
sub accumdirinfo {
my $basefile = shift;
my $recdate = shift;
my $dirname = shift;
my $goodir = shift;
my $topr = $dirdb{$basefile};
if (! defined($topr)) {
$topr = {};
$dirdb{$basefile} = $topr;
}
my $r = {};
$topr->{$dirname} = $r;
my $mainr = $db{$basefile};
if (! defined($mainr)) {
$db{$basefile} = $r;
}
$r->{'score'} = 0;
if (defined($mainr) &&
exists($mainr->{'mpgdirname'}) &&
($mainr->{'mpgdirname'} eq $dirname) &&
(! newerthan("$whodir/$dirname/$basefile.mpg", $dbfile))) {
if (exists($mainr->{'edl'})) {
$r->{'edl'} = $mainr->{'edl'};
}
if (exists($mainr->{'mpgseconds'})) {
$r->{'mpgseconds'} = $mainr->{'mpgseconds'};
}
if (exists($mainr->{'description'})) {
$r->{'description'} = $mainr->{'description'};
}
if (exists($mainr->{'mpgsize'})) {
$r->{'mpgsize'} = $mainr->{'mpgsize'};
}
if (exists($mainr->{'mpg'})) {
$r->{'mpg'} = $mainr->{'mpg'};
}
$r->{'score'} += 2000;
}
$r->{'mpgdirname'} = $dirname;
$r->{'recdate'} = $recdate;
if (! $goodir) {
$r->{'score'} -= 1000;
}
if (! exists($r->{'edl'})) {
$r->{'edl'} = edlstatus($dirname, $basefile);
if ($r->{'edl'} eq 'bad') {
$r->{'score'} -= 100;
}
}
if ($force || (! exists($r->{'mpgseconds'}))) {
my $seconds = getvidseconds("$whodir/$dirname/$basefile.mpg");
if (defined($seconds)) {
$r->{'mpgseconds'} = $seconds;
if ($seconds >= (61.9*60)) {
$r->{'score'} += 200;
} elsif ($seconds < (60.1*60)) {
$r->{'score'} -= 200;
}
} else {
$r->{'score'} -= 500;
}
}
if (! exists($r->{'description'})) {
my $description = getdescription($dirname);
if (defined($description)) {
$r->{'description'} = $description;
}
}
if ($force || (! exists($r->{'mpgsize'}))) {
my @mpstat = stat("$whodir/$dirname/$basefile.mpg");
if (scalar(@mpstat) > 7) {
$r->{'mpgsize'} = $mpstat[7];
$r->{'mpg'} = 1;
}
}
if (! exists($r->{'mpg'})) {
if (-f "$whodir/$dirname/$basefile.mpg") {
$r->{'mpg'} = 1;
}
}
if (defined($mainr)) {
my $airdate = $mainr->{'airdate'};
if (defined($airdate) && samefile("$whodir/$airdate-$basefile.avi",
"$whodir/$dirname/$basefile.avi")) {
$r->{'score'} += 1000000;
}
}
}
# Read in the existing database (if any) to start with known data
# (which may no longer exist anywhere else). Do not record the 'avi'
# and 'mpg' flags - need to verify the existence of such files
# again during the rebuild of the database.
my $fh;
my $r;
if (open($fh, '<', $dbfile)) {
while (<$fh>) {
chomp;
if (/^\[(.+)\]$/) {
my $basename = $1;
$r = {};
$db{$basename} = $r;
} elsif (/^([A-Za-z0-9_]+)=(.+)$/) {
if (defined($r)) {
my $key = $1;
my $val = $2;
if (! (($key eq 'avi') || ($key eq 'mpg'))) {
$r->{$key} = $val;
}
}
}
}
close($fh);
undef($fh);
undef($r);
}
# Read the list of .avi files in $whodir to find any basenames and
# airdates that might not already be in the database. If the .avi file
# is newer than the database file, also update the length info in
# the database.
my $dh;
my @whodirnames;
if (opendir($dh, $whodir)) {
@whodirnames = readdir($dh);
closedir($dh);
undef $dh;
}
my $n;
foreach $n (@whodirnames) {
if ($n=~/^(\d+-\d+-\d+)-([A-Za-z0-9_]+)\.avi$/) {
my $airdate = $1;
my $basename = $2;
$r = $db{$basename};
if (! defined($r)) {
$r = {};
$db{$basename} = $r;
}
if (! exists($r->{'airdate'})) {
$r->{'airdate'} = $airdate;
}
my $newavi = newerthan("$whodir/$n",$dbfile);
if ((! exists($r->{'aviseconds'})) || $newavi || $force) {
my $aviseconds = getvidseconds("$whodir/$n");
if (defined($aviseconds)) {
$r->{'aviseconds'} = $aviseconds;
}
}
if ((! exists($r->{'avisize'})) || $newavi || $force) {
$r->{'avisize'} = (stat("$whodir/$n"))[7];
}
$r->{'avi'} = 1;
}
}
# Now read the directories to find any original .mpg files and other
# info stashed in the download directories.
foreach $n (@whodirnames) {
if ($n=~/^(\d+-\d+-\d+)-\d+-([A-Za-z0-9_]+)$/) {
accumdirinfo($2, $1, $n, 1);
} elsif ($n=~/^bad-(\d+-\d+-\d+)-\d+-([A-Za-z0-9_]+)$/) {
accumdirinfo($2, $1, $n, 0);
}
}
# Go through all the info gathered from directories and pick the
# highest score directory to copy info into main database.
my $bn;
foreach $bn (keys(%dirdb)) {
my $topr = $dirdb{$bn};
my $dirname;
my $topscore;
my $topref;
foreach $dirname (keys(%{$topr})) {
my $r = $topr->{$dirname};
if ((! defined($topscore)) || ($topscore < $r->{'score'})) {
$topscore = $r->{'score'};
$topref = $r;
}
}
my $mainr = $db{$bn};
if (defined($topref) && defined($mainr)) {
my $key;
foreach $key (keys(%{$topref})) {
if ($key ne 'score') {
my $val = $topref->{$key};
$mainr->{$key} = $val;
}
}
}
}
# Save new db (keeping backup)
sub compare_airdate {
my $ada = $db{$a}->{'airdate'};
my $adb = $db{$b}->{'airdate'};
if (! defined($ada)) {
$ada='';
}
if (! defined($adb)) {
$adb='';
}
my $rval = $ada cmp $adb;
if ($rval == 0) {
$rval = $a cmp $b;
}
return $rval;
}
my $dbtemp="$dbfile.$$";
my $dbh;
my $missing_airdate = 0;
if (open($dbh, '>', $dbtemp)) {
foreach $bn (sort compare_airdate keys %db) {
$r = $db{$bn};
if (! exists($r->{'airdate'})) {
$missing_airdate = 1;
}
print $dbh "\n[$bn]\n";
my $key;
my $val;
foreach $key (sort(keys(%{$r}))) {
$val = $r->{$key};
print $dbh "$key=$val\n";
}
}
close($dbh);
unlink("$dbfile.bak");
link($dbfile,"$dbfile.bak");
unlink($dbfile);
link($dbtemp,$dbfile);
unlink($dbtemp);
}
if ($missing_airdate) {
# I'm missing airdates for some episodes, update the database
# from the Doctor Who wiki page to fill in any new airdates
# discovered since the last time I did this.
system("update-airdates");
}
|