#!/usr/bin/perl -w # # $1.13 2007/09/16$ # setext -> LaTeX converter # # (C) 2001 Erik Oliver # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, # USA. use strict; use Carp; my $numargs = @ARGV; my $infile; my $outfile; my %href; ### indexing and break options my @command = ('','section','subsection*'); my @breaks = (0,1,0); if ($numargs == 1) { $infile = "$ARGV[0].etx"; $outfile = "$ARGV[0].tex"; } elsif ($numargs == 2) { $infile = $ARGV[0]; $outfile = $ARGV[1]; } else { print STDERR "usage: $0 infile.etx outfile.tex\n"; print STDERR "usage: $0 inoutfile\n"; exit -1; } if (! -e $infile) { print STDERR "error: Input, $infile, does not exist\n"; printf exit -1; } if (! -r $infile) { print STDERR "error: Input, $infile, not readable\n"; exit -1; } open INFILE, "<$infile" || die "Could not open $infile for reading, $!"; my @data = ; # slurp input chomp @data; # strip newlines close INFILE; open OUTFILE,">$outfile" || die "Could not open $outfile for writing, $!"; my ($latextitle, $latextitleset, $latexauthor, $latexauthorset, $latexdate, $latexdateset) = ("Title",0,"Author",0,"\\today",0); my $loop=0; # href locating for($loop=0; $loop <= @data; $loop++) { if(!$data[$loop]) { next; } # skip blank lines # href-tt finder: .. _href URL if($data[$loop] =~ /^\.\.\s+_([\S_]*)\s+(.*)\s*/ ) { my $key = $1; &lookahead('href-tt',$loop); $href{$key} = $data[$loop]; $data[$loop] = ""; } $data[$loop] =~ s/``/+++/g; } for($loop=0; $loop <= @data; $loop++) { if(!$data[$loop]) { next; } # skip blank lines $_ = $data[$loop]; # $_ is default variable # title-tt / subhead-tt / subject-tt search # quote-tt / include-tt / indent-tt / bullet-tt handling if (/^\.\. / ) { $data[$loop] = ""; } elsif (/^Subject: (.*$)/) { if(!$latextitleset) { $latextitle = &texify($1); $latextitleset++; } $data[$loop] = ""; } elsif (/^From: (.*$)/) { if(!$latexauthorset) { $latexauthor = &texify($1); $latexauthorset++; } $data[$loop] = ""; } elsif (/^Date: (.*$)/) { if(!$latexdateset) { $latexdate = &texify($1); $latexdateset++; } $data[$loop] = ""; } elsif (/^===/) { &toc(1,$loop); $data[$loop] = ""; } elsif (/^---/) { &toc(2,$loop); $data[$loop] = ""; } elsif (/^\s{0,2}\* /) { &lookahead('list',$loop); } elsif(/^\s{0,2}>\s*/) { &lookahead('include',$loop); } elsif (/^ ([^ ])/) { &lookahead('indent',$loop); } elsif(/`/) { # if nothing else and has a `, assume # multiline verbatim environment &lookahead('quote',$loop); } elsif (/\$\$/) { next; } elsif (/^\s*$/) { $data[$loop] = ""; next; } else { # possibility of being here if next line # is === or --- next if ($loop == $#data); next if ($data[$loop+1] =~ /^===/); next if ($data[$loop+1] =~ /^---/); carp "Unhandled typotag, line = $loop, \"$_\"\n"; } } print OUTFILE q| \documentclass[12pt]{article} \newif\ifpdf \ifx\pdfoutput\undefined \pdffalse % we are not running PDFLaTeX \else \pdfoutput=1 % we are running PDFLaTeX \pdftrue \fi \ifpdf \usepackage[pdftex]{graphicx} \else \usepackage{graphicx} \fi \textwidth = 6.5 in \textheight = 9 in \oddsidemargin = 0.0 in \evensidemargin = 0.0 in \topmargin = 0.0 in \headheight = 0.0 in \parskip = 0.1 in \parindent = 0.0in |; print OUTFILE qq| \\title{$latextitle} \\author{$latexauthor} \\date{$latexdate} \\begin{document} |; print OUTFILE q| \ifpdf \DeclareGraphicsExtensions{.pdf, .jpg} \else \DeclareGraphicsExtensions{.eps, .jpg} \fi \maketitle \tableofcontents |; for($loop = 0; $loop <= $#data; $loop++) { if(!$data[$loop]) { #print "--blank\n"; next; } $_ = $data[$loop]; s/\+\+\+/`/g; if(/{verbatim}/) { print OUTFILE $_,"\n"; } else { print OUTFILE texify($_),"\n"; } } print OUTFILE q| \end{document} \end |; close OUTFILE; sub toc { my $level = shift; my $line = shift; my $actualline = $data[$line-1]; # first === or --- becomes title if none was found in subject: if(!$latextitleset) { $latextitle = $actualline; $latextitleset++; } $actualline = "\\" . $command[$level] . "{" . $actualline . "}\n"; if ($breaks[$level]) { $actualline = "\\newpage\n" . $actualline; } $data[$loop-1] = $actualline; } sub lookahead { my $type = shift; my $line = shift; my $pos = $line + 1; if($type eq 'indent') { $data[$line] =~ s/^ //; } elsif ( $type eq 'include' ) { $data[$line] =~ s/^\s{0,2}>\s*//; } elsif ( $type eq 'quote') { $data[$line] =~ s/`//; } elsif ( $type eq 'list') { $data[$line] =~ s/^\s*\* /\\item /; } elsif ( $type eq 'href-tt') { $data[$line] =~ s/^\.\.\s+_([\S_]*)\s+(.*)\s*/$2/; } else { carp "unhandled type in lookahead $type, $line\n"; } while($pos <= $#data) { if($type eq 'indent' && $data[$pos] =~ s/^ ([^ \*>])/$1/) { $data[$line] .= " $data[$pos]"; $data[$pos] = ""; $pos++; next; } elsif ($type eq 'include' && $data[$pos] =~ s/^\s{0,2}>\s*//) { if($data[$pos] eq "") { $data[$line] .= "\n\n\\vskip 10pt\\noindent "; $pos++; } else { $data[$line] .= " $data[$pos]"; $data[$pos] = ""; $pos++; } next; } elsif ($type eq 'quote') { if($data[$pos] =~ s/`//) { if($data[$pos] ne '') { $data[$line] .= "\n$data[$pos]"; $data[$pos] = ""; } $pos++; last; } else { $data[$line] .= "\n$data[$pos]"; $data[$pos] = ""; $pos++; next; } } elsif ($type eq 'list') { # next item if($data[$pos] =~ s/^\s{0,2}\* /\\item /) { $data[$line] .= "\n$data[$pos]"; $data[$pos] = ""; $pos++; next; } elsif ($data[$pos] =~ s/^ ([^ *])/$1/) { $data[$line] .= " $data[$pos]"; $data[$pos] = ""; $pos++; next; } else { last; } } elsif ($type eq 'href-tt') { # is this line prefixed with '.. ' but not '.. _' # then fold in if($data[$pos] =~ s/^\.\. ([^_])/$1/) { $data[$line] .= " $data[$pos]"; $data[$pos] = ""; $pos++; next; } else { last; } } else { last; } } $data[$line] = styleize($data[$line]); if($type eq 'include') { $data[$line] = "\\begin{quotation}\n\\noindent $data[$line]\n\\end{quotation}\n"; } elsif ($type eq 'indent') { $data[$line] .= "\n"; } elsif ($type eq 'quote') { $data[$line] = "\\begin{verbatim}\n$data[$line]\n\\end{verbatim}\n"; } elsif ($type eq 'list') { $data[$line] = "\\begin{itemize}\n$data[$line]\n\\end{itemize}\n"; } } sub texify { local $_ = shift; # $ is normal s/\$/\\\$/g; # protect < and > s//\$>\$/g; # prevent % from being treated as a comment s/%/\\%/g; # prevent # from being treated as a macro s/#/\\#/g; # handle & s/&/\\&/g; # handle ~ s/~/\\~{}/g; # handle ^ s/\^/\\^{}/g; # handle _ s/_/\\_/g; # (c) s/\(c\) /\\copyright /; ## contraction fix attempt s/([a-zA-Z0-9])'([a-zA-Z0-9])/$1((($2/g; ## basic balanced quote fix s/'([^']+)'/`$1'/g; s/"([^"]+)"/``$1''/g; s/\(\(\(/'/g; return $_; } sub styleize { local $_ = shift; my $h; my @array; my $counter = 0; # search for a single ` if(s/`([^`]*)`/`$counter`/) { $array[$counter] = $1; $counter++; } # bold-tt s#\*\*([^\*]*)\*\*#{\\bf $1}#g; # italic-tt s#~([^ ]*)~#{\\em $1}#g; # underline-tt s#\b_(\S*)_\b# ($a = $1) =~ s,_, ,g; "{\\em $a}"; #eg; # hot-tt s#\b([^\s]+)_\b# $h = $href{$1}; ($a = $1) =~ s,_, ,g; $h ? qq'$a\\footnote{$h}' : "$a"; #eg; for($h=0;$h<=$#array;$h++) { s/`$h`/$array[$h]/; } return $_; }