#!/usr/bin/perl -w # # dtd2latex # # Convert an XML DTD to LaTeX source for printing. # # Usage: dtd2latex [filenames...] # # Bugs: Doesn't deal with characters which are legal in DTDs but not # in LaTeX. Also if you have a DTD containing '\end{verbatim}' # there might be a problem :-) # # This program is in the public domain. Use at your own risk. # . # # Version 0.1.2 # # -- Ed Avis, epa98@doc.ic.ac.uk, 2001-01-22 # use strict; use File::Basename; # Paper size. my $PAPER = 'a4paper'; # or 'letterpaper', etc. # Minimum suspicion level before treating a comment line as example # XML. A higher value will treat fewer lines as example XML, a lower # value will be more lenient. # my $SUSP_LEVEL = 2; # Subroutine prototypes sub parse_dtd($); sub quote($); print "\\documentclass[$PAPER]{article}\n"; print '\begin{document}', "\n"; undef $/; @ARGV = '-' if not @ARGV; foreach (@ARGV) { if ($_ ne '-') { my $title = quote(basename($_)); print "\\section*{$title}\n"; } open (FH, $_) or die "cannot open $_: $!"; $_ = ; my $last_type = ''; foreach(parse_dtd($_)) { my ($type, $text) = @$_; $text =~ s/^\s+//; $text =~ s/\s+$//; next if $text eq ''; if ($type eq 'C') { # Small space between consecutive comment blocks print '\smallskip', "\n" if $last_type eq 'C'; # Sniff out example bits of XML in comments my $in_verbatim = 0; foreach (split /\n/, $text) { if (not $in_verbatim) { if ((/<\w/ + />/ + /\w=/) >= $SUSP_LEVEL) { # Bit of XML found print '{ \small \begin{verbatim}', "\n"; $in_verbatim = 1; } } else { if (/^\s*$/) { # Blank lines always end bits of XML print '\end{verbatim} }', "\n"; $in_verbatim = 0; } } unless ($in_verbatim) { # FIXME: maybe we should not quote special characters in # comments, so that people can use LaTeX commands. # $_ = quote($_); } print "$_\n"; } if ($in_verbatim) { # Finish the verbatim environment now the comment has # finished. # print '\end{verbatim} }', "\n"; } print "\n"; # Paragraph end after each comment } elsif ($type eq 'B') { print '\begin{verbatim}', "\n"; print "$text\n"; print '\end{verbatim}', "\n"; } else { die } $last_type = $type; } } print '\end{document}', "\n"; # parse_dtd # # Turn a DTD into a list of either ['B', text] or ['C', comment] # pairs. # # Parameters: # text of DTD # # Returns: list of pairs - either ['C', comment] for a comment, or # ['B', text] for an actual bit of DTD. The comment delimiters will # be stripped, but apart from that the text is unchanged. Comments # and bits of DTD will never be split into more than one chunk. # # I have no idea whether this will always work, but it works for me. # sub parse_dtd($) { die 'usage: parse_dtd(string)' if @_ != 1; local $_ = shift; my @r; while ($_ ne '') { my $idx = index $_, '\E//s or die; push @r, ['C', $1]; } else { push @r, ['B', substr($_, 0, $idx, '')]; } } return @r; } # quote() # # Quote at least some characters which do funny things in LaTeX # # Parameters: string to quote # Returns: quoted version # sub quote($) { die 'usage: quote(string)' if @_ != 1; local $_ = shift; # Quote characters s/\\/\\(\\backslash\\)/g; foreach my $ch ('_', '#', '%', '{', '}', '&') { s/$ch/\\$ch/g; } s/\$/\\\$/g; foreach my $ch ('<', '>') { s/$ch/\\($ch\\)/g; } s/~/\\(\\sim\\)/; s/\^/\\(\\hat{}\\)/; # Lines of dots s/\.{3,}\s*$/\\dotfill/mg; return $_; }