#!/usr/bin/perl # # Find directories in database which have .edl files, but do not have # .keep files, and convert the .edl to .keep format. Stick a guess- # prefix on the .keep file to indicate it is just an automatically # generated guess and not yet manually verified. # # If a .edl file argument is given, then just convert that .edl file, # and don't fool with database and scanning all directories. # my $topdir="/huge/vids"; my $whodir="$topdir/DoctorWho"; my $dbdir="$whodir/.data"; my $dbfile="$dbdir/allinfo.txt"; my %db; # Get PATH set to include this script's directory and other useful bits my $newpath=`dirname $0`; chomp($newpath); $newpath=`$newpath/echo-path`; chomp($newpath); $ENV{'PATH'}=$newpath; # Utility routine to run midentify on video file and return length # in seconds. # sub getvidseconds { my $vidfile = shift; my $avh; my $seconds; if (open($avh, '-|', 'midentify', "$vidfile")) { while (<$avh>) { if (/^ID_LENGTH=(.+)$/) { $seconds = $1; } } close($avh); } return $seconds; } # Core converter routine # sub do_convert { my $edlfile = shift; my $mpegseconds = shift; my $keepfile = shift; my @times; my $edh; if (open($edh, '<', $edlfile)) { while (<$edh>) { chomp; my @flds = split(' ',$_); if ((scalar(@flds) == 3) && ($flds[2] == 0)) { push(@times, $flds[0], $flds[1]); } } close($edh); # We now have a list of times which, if taken in pairs, are the # ranges to throw away, but we want the ranges to keep, so shift # things around a bit. if ($times[0] <= 2.0) { # Current list says to throw away video real near beginning # of file, so delete that start time so we'll now have a list # that begins with the end of the initial chunk of commercials. shift(@times); } else { # Current list starts after the beginning of the recording # so prepend 0.0 to the list to say we want to keep the # beginning. unshift(@times,0.0); } my $lastime = $times[scalar(@times)-1]; if (($lastime >= $mpegseconds) || (($mpegseconds - $lastime) <= 2.0)) { # Current list says to throw away video all the way to real # near the end, so just pop that end time off the list. pop(@times); } else { # Last commercial ends before end time of video so add # end time of video as the end of the last range to keep. push(@times, $mpegseconds); } # Now I have a list of pairs that give ranges of times to keep. # Just make sure I do have a non-zero even number in the list before # writing them out in .keep file format. if ((scalar(@times) > 1) && ((scalar(@times) & 1) == 0)) { undef $edh; if (open($edh, '>', $keepfile)) { while (scalar(@times) > 0) { my $start = shift(@times); my $end = shift(@times); print $edh "$start\t$end\n"; } close($edh); } } } } # Process argv to get arguments for do_convert # sub convert_one_edl { my $edlfile = shift; my $basefile = `basename "$edlfile" .edl`; my $edldir = `dirname "$edlfile"`; chomp($edldir); chdir($edldir); chomp($basefile); my $mpgfile = "$basefile.mpg"; my $mpegseconds = getvidseconds($mpgfile); do_convert($edlfile, $mpgseconds, "guess-$basefile.keep"); } if (scalar(@ARGV) == 1) { convert_one_edl($ARGV[0]); exit(0); } # Read in the existing database (if any) to start with known data # so this update won't discard any information. my $fh; my $r; if (open($fh, '<', $dbfile)) { while (<$fh>) { chomp; if (/^\[(.+)\]$/) { my $basename = $1; $r = {}; $db{$basename} = $r; } elsif (/^([A-Za-z0-9_]+)=(.+)$/) { if (defined($r)) { my $key = $1; my $val = $2; $r->{$key} = $val; } } } close($fh); undef($fh); undef($r); } my $basename; foreach $basename (keys(%db)) { $r = $db{$basename}; my $dirname = $r->{'mpgdirname'}; if (-f "$whodir/$dirname/$basename.edl") { # There is a .edl file in this directory. if ((! (-f "$whodir/$dirname/$basename.keep")) && (! (-f "$whodir/$dirname/guess-$basename.keep"))) { # There is no .keep file, build one from the .edl. my $mpegseconds = $r->{'mpgseconds'}; if (! defined($mpegseconds)) { $mpegseconds = getvidseconds("$whodir/$dirname/$basename.mpg"); } do_convert("$whodir/$dirname/$basename.edl", $mpegseconds, "$whodir/$dirname/guess-$basename.keep"); } } }