# use perl
eval 'exec perl -S $0 "$@"'
if $runnning_under_some_shell;
#
# txt2html.pl
# Convert raw text to something with a little HTML formatting
#
# Written by Seth Golub
//;
$line_action |= $HRULE;
}
}
sub shortline
{
if (!($mode & $PRE) &&
!&is;_blank($line) &&
($line_length < $short_line_length) &&
!&is;_blank($nextline) &&
!($line_action & ($HEADER | $HRULE | $BREAK | $LIST)))
{
$line =~ s/$/ \n";
$line_action |= $PAR;
}
sub listprefix
{
local($line) = @_;
local($prefix, $number, $rawprefix);
return (0,0,0) if (!($line =~ /^\s*[-=\*o]\s+\S/ ) &&
!($line =~ /^\s*(\d+|[a-zA-Z])[\.\)\]:]\s+\S/ ));
($number) = $line =~ /^\s*(\d+|[a-zA-Z])/;
# That slippery exception of "o" as a bullet
# (This ought to be determined more through the context of what lists
# we have in progress, but this will probably work well enough.)
if($line =~ /^\s*o\s/)
{
$number = 0;
}
if ($number)
{
($rawprefix) = $line =~ /^(\s*(\d+|[a-zA-Z]).)/;
$prefix = $rawprefix;
$prefix =~ s/(\d+|[a-zA-Z])//; # Take the number out
} else {
($rawprefix) = $line =~ /^(\s*[-=o\*].)/;
$prefix = $rawprefix;
}
($prefix, $number, $rawprefix);
}
sub startlist
{
local($prefix, $number, $rawprefix) = @_;
$listprefix[$listnum] = $prefix;
if($number)
{
# It doesn't start with 1,a,A. Let's not screw with it.
if (($number != 1) && ($number ne "a") && ($number ne "A"))
{
return;
}
$prev .= "$list_indent\n";
$list[$listnum] = $OL;
} else {
$prev .= "$list_indent\n";
$list[$listnum] = $UL;
}
$listnum++;
$list_indent = " " x $listnum x $indent_width;
$line_action |= $LIST;
$mode |= $LIST;
}
sub endlist # End N lists
{
local($n) = @_;
for(; $n > 0; $n--, $listnum--)
{
$list_indent = " " x ($listnum-1) x $indent_width;
if($list[$listnum-1] == $UL)
{
$prev .= "$list_indent\n";
} elsif($list[$listnum-1] == $OL)
{
$prev .= "$list_indent\n";
} else
{
print STDERR "Encountered list of unknown type\n";
}
}
$line_action |= $END;
$mode ^= ($LIST & $mode) if (!$listnum);
}
sub continuelist
{
$line =~ s/^\s*[-]\s*/$list_indent - / if $list[$listnum-1] == $UL;
$line =~ s/^\s*(\d+|[a-zA-Z]).\s*/$list_indent - / if $list[$listnum-1] == $OL;
$line_action |= $LIST;
}
sub liststuff
{
local($i);
local($prefix, $number, $rawprefix) = &listprefix;($line);
$i = $listnum;
if (!$prefix)
{
return if !&is;_blank($prev); # inside a list item
# This ain't no list. We'll want to end all of them.
return if !($mode & $LIST); # This just speeds up the inevitable
$i = 0;
} else
{
# Maybe we're going back up to a previous list
$i-- while (($prefix ne $listprefix[$i-1]) && ($i >= 0));
}
if (($i >= 0) && ($i != $listnum))
{
&endlist;($listnum - $i);
} elsif (!$listnum || $i != $listnum)
{
&startlist;($prefix, $number, $rawprefix);
}
&continuelist;($prefix, $number, $rawprefix) if ($mode & $LIST);
}
sub endpreformat
{
if(!($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
($endpreformat_trigger_lines == 1 ||
!($nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
{
$prev =~ s#$#\n#;
$mode ^= ($PRE & $mode);
$line_action |= $END;
}
}
sub preformat
{
if($preformat_trigger_lines == 0 ||
(($line =~ /\s{$preformat_whitespace_min,}\S+/) &&
($preformat_trigger_lines == 1 ||
$nextline =~ /\s{$preformat_whitespace_min,}\S+/)))
{
$line =~ s/^/\n/;
$prev =~ s/ //;
$mode |= $PRE;
$line_action |= $PRE;
}
}
sub make_new_anchor
{
$anchor++;
$anchor;
}
sub anchor_mail
{
local($text) = $line =~ /\S+: *(.*) *$/;
local($anchor) = &make;_new_anchor($text);
$line =~ s/(.*)/
# * moved usage subroutine up top so people who look through code see
# it sooner
#
# Revision 1.6 94/10/28 12:43:46 12:43:46 seth (Seth Golub)
# * Creates anchors at each heading
#
# Revision 1.5 94/07/14 17:43:59 17:43:59 seth (Seth Golub)
# * Fixed minor bug in Headers
# * Preformatting can be set to only start/stop when TWO lines of
# [non]formatted-looking-text are encountered. Old behavior is still
# possible through command line options (-pb 1 -pe 1).
# * Can preformat entire document (-pb 0) or disable preformatting
# completely (-pe 0).
# * Fixed minor bug in CAPS handling (paragraph breaks broke)
# * Puts paragraph tags *before* paragraphs, not just between them.
#
# Revision 1.4 94/06/20 16:42:55 16:42:55 seth (Seth Golub)
# * Allow ':' for numbered lists (e.g. "1: Figs")
# * Whitespace at end of line will not start or end preformatting
# * Mailmode is now off by default
# * Doesn't break short lines if they are the first line in a list
# item. It *should* break them anyway if the next line is a
# continuation of the list item, but I haven't dealt with this yet.
# * Added action on lines that are all capital letters. You can change
# how these lines get tagged, as well as the mininum number of
# consecutive capital letters required to fire off this action.
#
# Revision 1.3 94/05/17 15:58:58 15:58:58 seth (Seth Golub)
# * Tiny bugfix in unhyphenation
#
# Revision 1.2 94/05/16 18:15:16 18:15:16 seth (Seth Golub)
# * Added unhyphenation
#
# Revision 1.1 94/05/16 16:19:03 16:19:03 seth (Seth Golub)
# Initial revision
#
#
# 1.02 Allow '-' in mail headers
# Added handling for multiline mail headers
#
#
#
# Oscar Nierstrasz has a nice script for hypertextifying URLs.
# It is available at:
# http://cui_www.unige.ch/ftp/PUBLIC/oscar/scripts/html.pl
#
#########################
# Configurable options
#
# [-s
# [-p
# <= 0 : Preformat entire document
# 1 : one line triggers
# >= 2 : two lines trigger
# [-pe
# <= 0 : Never preformat within document
# 1 : one line triggers
# >= 2 : two lines trigger
# NOTE for --prebegin and --preend:
# A zero takes precedence. If one is zero, the other is ignored.
# If both are zero, entire document is preformatted.
# [-r
\n/g; # Linefeeds become horizontal rules
}
sub hrule
{
if ($line =~ /^\s*([-_~=\*]\s*){$hrule_min,}$/)
{
$line = "";
$prev =~ s/
/;
$line_action |= $BREAK;
}
}
sub mailstuff
{
if ((($line =~ /^\w*>/) || # Handle "FF> Werewolves."
($line =~ /^\w*\|/))&& # Handle "Igor| There wolves."
!&is;_blank($nextline))
{
$line =~ s/$/
/;
$line_action |= $BREAK | $MAILQUOTE;
} elsif (($line =~ /^[\w\-]*:/) # Handle "Some-Header: blah"
&& (($previous_action & $MAILHEADER) || &is;_blank($prev))
&& !&is;_blank($nextline))
{
&anchor;_mail if !($previous_action & $MAILHEADER);
$line =~ s/$/
/;
$line_action |= $BREAK | $MAILHEADER;
} elsif (($line =~ /^\s+\S/) && # Handle multi-line mail headers
($previous_action & $MAILHEADER) &&
!&is;_blank($nextline))
{
$line =~ s/$/
/;
$line_action |= $BREAK | $MAILHEADER;
}
}
sub paragraph
{
$prev .= "$1<\/h2>/;
}
sub anchor_heading
{
local($heading) = @_;
local($anchor) = &make;_new_anchor($heading);
$line =~ s/(
$1/;
}
sub heading
{
local($hindent, $heading) = $line =~ /^(\s*)(.+)$/;
$hindent = 0; # This isn't used yet, but Perl warns of
# "possible typo" if I declare a var
# and never reference it.
# This is now taken care of in main()
# $heading =~ s/\s+$//; # get rid of trailing whitespace.
local($underline) = $nextline =~ /^\s*(\S+)\s*$/;
if((length($heading) > (length($underline) + $underline_tolerance_short))
|| (length($heading) < (length($underline) -$underline_tolerance_long)))
{
return;
}
# $underline =~ s/(^.).*/$1/; # Could I do this any less efficiently?
$underline = substr($underline,0,1);
local($hlevel);
$hlevel = 1 if $underline eq "*";
$hlevel = 2 if $underline eq "=";
$hlevel = 3 if $underline eq "+";
$hlevel = 4 if $underline eq "-";
$hlevel = 5 if $underline eq "~";
$hlevel = 6 if $underline eq ".";
return if !$hlevel;
$nextline = $1<\/H2>\n/;
}
}
sub caps
{
if($line =~ /^[^a-z<]*[A-Z]{$min_caps_length,}[^a-z<]*$/)
{
&tagline;($caps_tag);
$line_action |= $CAPS;
}
}
sub main
{
&deal;_with_options;
if(!$extract)
{
print "\n";
# It'd be nice if we could guess a title from the first header,
# but even that would be too late if we're doing this in one pass.
}
$prev = "";
$line =
if ($line =~ /(.*)\s*$/)
# $line =~ s///;
{
$title2 =~ s///;
$title2 = $line;
print "$title2
\n";
$line =~ s/(.*)//;
}
if ($line =~ /^(.*)rmb.simplenet.com(.*)\s*$/)
{
$line =~ s/rmb.simplenet.com/rmb.simplenet.com/;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
{
$line =~ s/(.*)//;
}
if ($line =~ /From.awp\@(.*)$/)
{
$line =~ s/(.*)//;
}
print $prev;
if (!&is;_blank($nextline))
{
$previous_action = $line_action;
$line_action = $NONE;
}
$prev = $line;
$line = $nextline;
$nextline =