#!/usr/bin/perl -w # # Since calibre conversion seems to ignore the perfectly valid .ncx file as # far as splitting chapters properly, this script is designed to add markup # to the html that mirrors the info in the .ncx file so I can tell the # converter to recognize the new markup to auto generate the toc. (Sigh...) # # One argument is the .ncx file. # # This is a total hack - it does no xml parsing, but simply uses text # matching and assumes the tag always comes before the matching # tag (cheesy, but easy :-). # # When converting, use this XPath to match headers that are chapters, # and Bob's 'yer Uncle! # # //*[(name()='h1' or name()='h2' or name()='h3') and @class = 'chapter'] # # And to get multi-level chapters right, use these: # # //h:h1[re:test(@class, "chapter", "i")] # //h:h2[re:test(@class, "chapter", "i")] # //h:h3[re:test(@class, "chapter", "i")] use strict; use File::Basename; my %files; my %hdrs; sub read_ncx { my $ncxfile = shift; my $fh; my $indent; my $text; my $file; my $marker; local $_; open($fh, '<', $ncxfile) || die "Cannot read $ncxfile : $!\n"; while (<$fh>) { last if (/\/); } while (<$fh>) { if (/^(\s*)\(.*)\<\/text\>/) { if (defined($text)) { die "Apparently missed a matching content prior to text $2\n"; } $indent = length($1); $text = $2; } elsif (/\{'indent'} = $indent; $info->{'text'} = $text; $info->{'count'} = 0; $r->{$marker} = $info; undef($indent); undef($marker); undef($file); undef($text); } elsif (/\<\/navMap\>/) { last; } } $text = 1; foreach $indent (sort { $a <=> $b } (keys(%hdrs))) { print "indent $indent assigned header h$text\n"; $hdrs{$indent} = "h$text"; ++$text; } } sub fix_one_file { my $file = shift; my $newfile = "$file.new"; my $r = shift; my $fh; my $oh; local $_; open($fh, '<', $file) || die "Cannot read $file : $!\n"; open($oh, '>', $newfile) || die "Cannot write $newfile : $!\n"; while (<$fh>) { if (/^(.*)\\s*\<\/a\>(.*)$/) { my $lead = $1; my $marker = $2; my $trail = $3; if (exists($r->{$marker})) { my $info = $r->{$marker}; my $count = $info->{'count'}; ++$count; $info->{'count'} = $count; if ($count > 1) { print "Hey! $marker is defined more than once!\n"; } else { my $hdr = $hdrs{$info->{'indent'}}; my $text = $info->{'text'}; $_ = "${lead}<$hdr class=\"chapter\" title=\"$text\">$trail\n"; } } } print $oh $_; } close($fh); close($oh); unlink($file); rename($newfile, $file); } read_ncx($ARGV[0]); my $file; foreach $file (sort(keys(%files))) { fix_one_file(dirname($ARGV[0]) . "/" . $file, $files{$file}); } foreach $file (sort(keys(%files))) { my $r = $files{$file}; my $marker; foreach $marker (sort(keys(%$r))) { my $info = $r->{$marker}; if ($info->{'count'} == 0) { print "Hey! marker $marker inf file $file was never defined!\n"; } } }