#! /usr/bin/perl # # Copyright (c) 2005-2006 Motoyuki Kasahara # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the project nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # # html-split -- split an HTML file. # # Usage: # html-split [option...] input-file # # `html-split' splits an HTML file with heading tags (

...

). # Suppose that `input-file' is `foo.html', HTML files splitted by # `html-split' are `foo-0.html', `foo-1.html', and so on. # # Options: # -Z do not add `-0' to the first split. # -l LEVEL split with this heading level # (default: h2) # -p PREFIX prefix of splitted HTML files. # -s SUFFIX suffix of splitted HTML files. # (default: html) # -w WIDTH minimum width of split number. # (default: 1) # -t TOC fragment name of `Table of Contents'. # (default: toc) # require 5.005; use Getopt::Std; use File::Basename; # # Usage # my $usage = "Usage: $0 [option...] input-file\n"; # # Variables # my $in_file; my $out_prefix; my $out_suffix; my $counter_width = 1; my $split_level = 2; my $toc_tag = 'toc'; my $supress_zero_flag = 0; my @toc = (); my @indice = (); my @preamble = (); my $toc_page = 0; # # Parse command line arguments. # my %options; getopts('Zl:w:p:s:t:', \%options) or die $usage; die $usage if (@ARGV != 1); $in_file = $ARGV[0]; if (defined($options{l})) { $options{l} =~ s/^h//; $split_level = $options{l}; } $counter_width = $options{w} if (defined($options{w})); $supress_zero_flag = defined($options{Z}); $toc_tag = $options{t} if (defined($options{t})); if (defined($options{p})) { $out_prefix = $options{p}; } else { $out_prefix = basename($in_file, '.htm', '.html'); } if (defined($options{s})) { $out_suffix = $options{s}; } elsif ($in_file =~ m|\.htm$|) { $out_suffix = 'htm'; } else { $out_suffix = 'html'; } # # Read an HTML file. # if (!open(IN_FILE, "< $in_file")) { die "$0: failed to open the file, $!: $in_file\n"; } my $toc_found = 0; my $page = 0; while () { last if (m|^|); push(@preamble, $_); } while () { chomp; last if (m|^|); if (m|^| && $1 <= $split_level) { $page++ if (@toc > 0); push(@toc, $_); } if (m||) { my $tag = $1; if ($tag eq $toc_tag) { $toc_page = $page; $toc_found = 1; } push(@indice, {'tag' => $tag, 'page' => $page}); } } close(IN_FILE); if (!$toc_found) { die "$0: not found\n"; } # # Generate splitted HTML files. # if (!open(IN_FILE, "< $in_file")) { die "$0: failed to open the file, $!: $in_file\n"; } while () { last if (m|^|); } for (my $page = 0; $page < @toc; $page++) { my $bar = ''; if ($page > 0) { $bar .= sprintf("[前へ] ", splitted_file_name($page - 1)); } if ($page + 1 < @toc) { $bar .= sprintf("[次へ] ", splitted_file_name($page + 1)); } $bar .= sprintf("[目次] ", splitted_file_name($toc_page), $toc_tag); my $out_file = splitted_file_name($page); if (!open(OUT_FILE, "> $out_file")) { die "$0: failed to open the file, $!: $out_file\n"; } foreach my $j (@preamble) { print OUT_FILE $j; } print OUT_FILE "\n"; print OUT_FILE "
\n", $bar, "\n
\n
\n"; print OUT_FILE $toc[$page], "\n"; for (;;) { $_ = ; chomp; if (!defined($_) || m|^|) { 1 while (); last; } elsif (m|^| && $1 <= $split_level) { next if ($page == 0 && $_ eq $toc[$page]); last; } 1 while (s||&rewrite_href($1)|e); print OUT_FILE $_, "\n"; } print OUT_FILE "
\n
\n", $bar, "\n
\n"; print OUT_FILE "\n"; print OUT_FILE "\n"; close(OUT_FILE); } close(IN_FILE); # # Return n'th splitted file name. # sub splitted_file_name ($) { my ($n) = @_; if ($n == 0 && $supress_zero_flag) { return sprintf("%s.%s", $out_prefix, $out_suffix); } else { return sprintf("%s-%0${counter_width}d.%s", $out_prefix, $n, $out_suffix); } } # # Rewrite . # sub rewrite_href ($) { my ($tag) = @_; for (my $i = 0; $i < @indice; $i++) { if ($indice[$i]->{tag} eq $tag) { return sprintf("", splitted_file_name($indice[$i]->{page}), $tag); } } warn "$0: unknown tag \`$tag'\n"; return ""; }