#!/usr/bin/perl -w
# change the above to the location of the Perl binary on your system

# mysql-xmlcvt.pl

# Parse XML errata sheet for "MySQL" book and generate HTML.
# To use:
#   mysql-xmlcvt.pl html mysql-book-errata.xml > mysql-book-errata.html

# Paul DuBois
# paul@snake.net
# 2000-03-01

# 2000-03-01
# - Wrote first version, based on xmlfaq.pl:
#*********************************************************************#
# xmlfaq.pl Copyright 1998 Jonathan Eisenzopf, All rights reserved    #
# AUTHOR: Jonathan Eisenzopf <eisen@pobox.com>                        #
# DESCRIPTION: A short Perl script that parses my Perl XML FAQ and    #
# converts it into HTML.                                              #
#*********************************************************************#
# 2000-03-03
# - Rewrote with switchbox architecture to allow output formats in
#   addition to HTML.
# 2000-03-20
# - Add <font> tags to <pre> output in HTML mode to force code listings
#   to courier.  This avoids the problem of listings displaying in
#   proportional font when output is embedded in a page that sets its
#   own font, as happens on the www.mysql.com Web site.

########
# MAIN #
########
use strict;
use XML::Parser;

my ($prog, $usage);
($prog = $0) =~ s|.*/||;
$usage = "Usage: $prog format_type xml-file

format_type specifies the output format.  Allowable specifiers are:
html		HTML output (this is the default)
troff-ms	troff -ms output (send into troff -ms or groff -ms)";

die "$usage\n" unless @ARGV == 2;

my ($fmt_type, $filename) = @ARGV;

# Switchbox hash.  Each entry contains information for generating one
# type of output.  Within each entry, item keys are XML tags and values
# are arrays indicating what to do when the open and close tags are
# encountered.  Array values are strings if tag handler should just
# write a string, functions if tag handler should invoke a function,
# undef if tag can be ignored.  If there is no entry for a tag in the
# handler entry, the tag is written as <tag>, along with any attributes
# that might be specified in the tag.  (This allows arbitrary tags to be
# passed through untouched.)

my (%switchbox) =
(
	"html"	=>
	{
		"faq"		=> [ "<html>", "</body>\n</html>\n" ],
		"header"	=> [ undef, \&html_header_ ],
		"title"		=> [ undef, undef ],
		"version"	=> [ undef, undef ],
		"overview"	=> [ "<hr><h2>Overview</h2>", "<p>" ],
		"erratum"	=> [ "<hr>", undef ],
		"location"	=> [ "<strong>", "</strong><br>" ],
		"date"		=> [ "Reported on: ", "<br>" ],
		"submitter"	=> [ "Reported by: ", "<br>" ],
		"br"		=> [ "<br>", undef ],
		"pre"		=> [ "<pre><font face=\"courier\">", "</font></pre>" ],
		""			=> [ undef, undef ]		# end placeholder
	},
	"troff-ms"	=>	# (this doesn't really work very well yet)
	{
		"faq"		=> [ undef, undef ],
		"header"	=> [ undef, \&troff_ms_header_ ],
		"title"		=> [ undef, undef ],
		"version"	=> [ undef, undef ],
		"overview"	=> [ "\n.SH\nOverview\n", undef ],
		"erratum"	=> [ "\n.sp\n----------\n.br\n", undef ],
		"location"	=> [ "\n.br\n.ft B\n", "\n.ft R\n.br\n" ],
		"date"		=> [ "\n.br\nReported on: ", "\n.ft R\n.br\n" ],
		"submitter"	=> [ "\n.br\nReported by: ", "\n.ft R\n.br\n" ],
		"br"		=> [ "\n.br\n", undef ],
		"p"			=> [ "\n.LP\n", undef ],
		"pre"		=> [ "\n.DS\n", ".DE\n" ],
		""			=> [ undef, undef ]		# end placeholder
	}
);

my ($handler) = $switchbox{$fmt_type};
defined ($handler)
	or die "Unknown format: $fmt_type\nAllowable formats: "
		. join (" ", sort (keys (%switchbox))) . "\n";

# initialize hash that will hold header info
my %header = (); 

# create a new instance of XML::Parser
# each time a new tag is discovered, it will call &handle_start
# each time an ending tag is found, it will call &handle_end
# when it finds a regular string, it will call &handle_char
my $parser = new XML::Parser(Handlers => {Start => \&handle_start,
                                          End   => \&handle_end,
										  Char  => \&handle_char});

# parse the file whose name we specified as a command-line parameter
$parser->parsefile($filename);

# Begin-tag handler

sub handle_start {
my ($p) = shift;		# reference to the parser object
my ($tag) = shift;		# tag found by parser
my (@attr) = @_;		# attribute name/value pairs
my ($action_ref);

	# Determine what to do with the key
	$action_ref = $handler->{$tag};
	if (!defined ($action_ref))		# no handler; print tag and attributes
	{
		print "<$tag";
		while (@attr)
		{
			print " " . shift (@attr) . "=\"";
			print shift (@attr) . "\"";
		}
		print ">";
	}
	elsif (!defined ($action_ref->[0]))			# ignore tag
	{
		# do nothing
	}
	elsif (ref ($action_ref->[0]) eq "CODE")	# invoke function to handle tag
	{
		&{$action_ref->[0]}();
	}
	else										# print string to handle tag
	{
		print $action_ref->[0];
	}
}

# End-tag handler

sub handle_end {
my ($p) = shift;		# reference to the parser object
my ($tag) = shift;		# tag found by parser
my ($action_ref);

	# Determine what to do with the key
	$action_ref = $handler->{$tag};
	if (!defined ($action_ref))					# no handler; print tag
	{
		print "</$tag>";
	}
	elsif (!defined ($action_ref->[1]))			# ignore tag
	{
		# do nothing
	}
	elsif (ref ($action_ref->[1]) eq "CODE")	# invoke function to handle tag
	{
		&{$action_ref->[1]}();
	}
	else										# print string to handle tag
	{
		print $action_ref->[1];
	}
}

# here we handle all strings. All we need to do here is print whatever is passed
# We also gather the title and version, with which we'll build our html
# header and title.

sub handle_char
{
my ($p, $data) = @_;

    if ($p->current_element eq 'title') {
	$header{'title'} = $data;
    } elsif ($p->current_element eq 'version') {
	$header{'version'} = $data;
    } else {
	print $data;
    }
}

# </header> handler

sub html_header_
{
	print "<head><title>$header{'title'} $header{'version'}</title></head>\n";
	print "<body>\n";
	print "<h2>$header{'title'}</h2><h3>Version $header{'version'}</h3>\n";
	print "<br><p>\n";
}

sub troff_ms_header_
{
	print "\n.TL\n$header{'title'}\n.br\n$header{'version'}\n";
	print ".LP\n";
}
