#!/usr/cs/bin/perl
# File: tbl2html
# Author: Anand Natrajan
# Contact: anand@virginia.edu, http://www.cs.virginia.edu/~an4m/
# Task: Converts embedded tbl file formats to HTML.
# I/P and O/P: STDIN and STDOUT
@lines = <>; @output = ();
while ($#lines > -1)
{
if (@lines[0] =~ /^\.TS/)
{
shift(@lines);
push(@output, "
\n");
$inheader = 1; # initially in the field specification lines
@formats = @maxbefore = @maxafter = ();
@prevvalues = @prevstrides = @strides = (); $stridemark = 0;
while (($#lines > -1) && (($_ = $lines[0]) !~ /^\.TE/))
{
if ($inheader == 1)
{
chop; push(@formats, $_); # save the formats
if ($formats[-1] =~ /\./) # till end-of-formats seen
{ chop($formats[-1]); $inheader = 0; }
}
else
{
# Assume one row per line and tab-separated columns.
chop; split(/\t/);
@fieldforms = split(/\s+/, @formats[0]);
if ($fieldforms[0] =~ /^s/)
{ die "First column cannot be a span specification"; }
if ($#_ > -1) { push(@output, "\t\n"); }
for ($i = 0; $i <= $#_; $i++)
{
$align = ""; $span = 1;
if ($fieldforms[0] =~ /^[LRCN]/) # spanning rows
{
if ($prevvalues[$i] ne $_[$i])
{
# put a temporary mark for later span count
$align = " rowspan=#$stridemark";
$prevstrides[$i] = $stridemark++;
$stridevalues[$prevstrides[$i]] = 1;
$prevvalues[$i] = $_[$i];
}
else
{
$stridevalues[$prevstrides[$i]]++;
shift(@fieldforms);
while ($fieldforms[0] =~ /^s/) { shift(@fieldforms); }
next;
}
}
if ($fieldforms[0] =~ /^[lL]/) { $align .= " align=left"; }
elsif ($fieldforms[0] =~ /^[rR]/) { $align .= " align=right"; }
elsif ($fieldforms[0] =~ /^[cC]/) { $align .= " align=center"; }
elsif ($fieldforms[0] =~ /^[nN]/)
{
# numbers have to be aligned by padding with spaces
$precount = length(int($absval = abs($number = $_[$i])));
if ($precount > $maxbefore[$i])
{ @maxbefore[$i] = $precount; };
$decdigits = (($pos = index($number, ".")) > -1)
? substr($number, $pos + 1) : "";
# if exponent used, expand it
$decdigits =~ s/[eE].*$//; $decdigits =~ s/0*$//;
$postcount = length($decdigits)
- ((($pos = index(lc($number), "e")) > -1)
? substr($number, $pos + 1) : 0);
if ($postcount < 0) { $postcount = 0; }
if ($postcount > $maxafter[$i])
{ $maxafter[$i] = $postcount; };
# save the field number, number of digits before
# decimal and number of digits after decimal
$align .= " align=numeric$i.$precount.$postcount";
}
shift(@fieldforms);
while ($fieldforms[0] =~ /^s/)
{ $span++; shift(@fieldforms); }
$span = ($span > 1) ? " colspan=" . $span : "";
push(@output, "\t\t$_[$i] | \n");
}
if ($#formats > 0) { shift(@formats); }
if ($#_ > -1) { push(@output, "\t
\n"); }
}
shift(@lines);
}
push(@output, "
\n");
foreach (@output)
{
if ($_ =~ /align=numeric/)
{
# align numbers to right and pad end with spaces
s/align=numeric(\d*).(\d*)\.(\d*)/align=right/;
$numform = sprintf("%%.%df", $postcount = $3);
if ($maxafter[$i = $1] > 0)
{
$padding = $maxafter[$i] - $postcount;
if ($postcount == 0) { $padding++; }
while ($padding > 0) { $padding--; $numform .= " "; }
}
$number = substr($_, $firstdigit = index($_, ">") + 1,
index($_, "") - $firstdigit);
# need fixed-width font for numbers
substr($_, $firstdigit) = ""
. sprintf($numform, $number) . "\n";
}
if ($_ =~ /rowspan=#/)
{
# replace row span marks with actual counts
/rowspan=#(\d*)/;
if ($stridevalues[$stridemark = $1] == 1) { s/ rowspan=#\d*//; }
else { s/rowspan=#\d*/rowspan=$stridevalues[$stridemark]/; }
}
}
shift(@lines);
}
else
{ push(@output, $lines[0]); shift(@lines); }
}
print @output;
=head1 NAME
tbl2html - a script that converts tbl tables into HTML
=head1 DESCRIPTION
This script takes tables formatted using tbl from STDIN and outputs
equivalent HTML tables on STDOUT. All tbl directives are processed,
including numbers. Uppercase versions of tbl directives introduce
row-spanning.
=head1 README
tbl2html is a tbl to HTML convertor. tbl was a front-end process for troff
that took tables and output troff directives. tbl's input format is very
succinct, as opposed to HTML, so I decided to write a convertor from one to
the other. My convertor ignores everything tbl would have and then changes
the rest to HTML. The HTML tables are got on STDOUT by feeding the code
through STDIN to tbl2html. Special mention must be made of the fact that
numbers are aligned properly. As an additional feature, I allow
row-spanning to be specified by capitalising the field specifications.
=pod SCRIPT CATEGORIES
CPAN/Administrative
Fun/Educational
=cut