#!/usr/bin/perl -w
#
# dtd2latex
#
# Convert an XML DTD to LaTeX source for printing.
#
# Usage: dtd2latex [filenames...]
#
# Bugs: Doesn't deal with characters which are legal in DTDs but not
# in LaTeX. Also if you have a DTD containing '\end{verbatim}'
# there might be a problem :-)
#
# This program is in the public domain. Use at your own risk.
# .
#
# Version 0.1.2
#
# -- Ed Avis, epa98@doc.ic.ac.uk, 2001-01-22
#
use strict;
use File::Basename;
# Paper size.
my $PAPER = 'a4paper'; # or 'letterpaper', etc.
# Minimum suspicion level before treating a comment line as example
# XML. A higher value will treat fewer lines as example XML, a lower
# value will be more lenient.
#
my $SUSP_LEVEL = 2;
# Subroutine prototypes
sub parse_dtd($);
sub quote($);
print "\\documentclass[$PAPER]{article}\n";
print '\begin{document}', "\n";
undef $/;
@ARGV = '-' if not @ARGV;
foreach (@ARGV) {
if ($_ ne '-') {
my $title = quote(basename($_));
print "\\section*{$title}\n";
}
open (FH, $_) or die "cannot open $_: $!";
$_ = ;
my $last_type = '';
foreach(parse_dtd($_)) {
my ($type, $text) = @$_;
$text =~ s/^\s+//; $text =~ s/\s+$//;
next if $text eq '';
if ($type eq 'C') {
# Small space between consecutive comment blocks
print '\smallskip', "\n" if $last_type eq 'C';
# Sniff out example bits of XML in comments
my $in_verbatim = 0;
foreach (split /\n/, $text) {
if (not $in_verbatim) {
if ((/<\w/ + />/ + /\w=/) >= $SUSP_LEVEL) {
# Bit of XML found
print '{ \small \begin{verbatim}', "\n";
$in_verbatim = 1;
}
}
else {
if (/^\s*$/) {
# Blank lines always end bits of XML
print '\end{verbatim} }', "\n";
$in_verbatim = 0;
}
}
unless ($in_verbatim) {
# FIXME: maybe we should not quote special characters in
# comments, so that people can use LaTeX commands.
#
$_ = quote($_);
}
print "$_\n";
}
if ($in_verbatim) {
# Finish the verbatim environment now the comment has
# finished.
#
print '\end{verbatim} }', "\n";
}
print "\n"; # Paragraph end after each comment
}
elsif ($type eq 'B') {
print '\begin{verbatim}', "\n";
print "$text\n";
print '\end{verbatim}', "\n";
}
else { die }
$last_type = $type;
}
}
print '\end{document}', "\n";
# parse_dtd
#
# Turn a DTD into a list of either ['B', text] or ['C', comment]
# pairs.
#
# Parameters:
# text of DTD
#
# Returns: list of pairs - either ['C', comment] for a comment, or
# ['B', text] for an actual bit of DTD. The comment delimiters will
# be stripped, but apart from that the text is unchanged. Comments
# and bits of DTD will never be split into more than one chunk.
#
# I have no idea whether this will always work, but it works for me.
#
sub parse_dtd($) {
die 'usage: parse_dtd(string)' if @_ != 1;
local $_ = shift;
my @r;
while ($_ ne '') {
my $idx = index $_, '\E//s or die;
push @r, ['C', $1];
}
else {
push @r, ['B', substr($_, 0, $idx, '')];
}
}
return @r;
}
# quote()
#
# Quote at least some characters which do funny things in LaTeX
#
# Parameters: string to quote
# Returns: quoted version
#
sub quote($) {
die 'usage: quote(string)' if @_ != 1;
local $_ = shift;
# Quote characters
s/\\/\\(\\backslash\\)/g;
foreach my $ch ('_', '#', '%', '{', '}', '&') {
s/$ch/\\$ch/g;
}
s/\$/\\\$/g;
foreach my $ch ('<', '>') {
s/$ch/\\($ch\\)/g;
}
s/~/\\(\\sim\\)/;
s/\^/\\(\\hat{}\\)/;
# Lines of dots
s/\.{3,}\s*$/\\dotfill/mg;
return $_;
}