#!/usr/bin/perl # Most of the files of project Gutenbers are written # in plain ascii. They are easy to read on a screen, but # the quality of a print--out is usually sub--optimal. # The aim of this script is to convert automatically # such a text into a LaTeX file for high quality # typesetting. # # There are some drawbacks, though: # namely, there is little consistency in the way # pg texts are marked: even to guess author and title of # a book some work is required. # Furthermore, sometimes it is possible to find mistakes in # the form of inverted commas which have never been closed, # marking which do not correspond to each other, etc. $quo=0; $ital=0; $bold=0; $text=0; $chapter=0; $gottitle=0; while(<>) { # STANDARD REPLACEMENTS s/\r//g; # Remove CR s/--/ --- /g; # TeX uses --- as punctuation s/(\w)-(\w)/$1--$2/g; # and -- as inter-word hyphen s/\. \. \./\\dots /g; # dots s/\.\.\./\\dots /g; # dots (sometimes they are like this) s/\&/\\\&/g; # & s/\%/\\\%/g; # % s/\$/\\\$/g; # $ s/([A-Z][a-z]*)\.\s([A-Z])/$1.\\ $2/mg; # Abbreviations # like Mr., Dr., etc. # Deduce title and author from the heading (if present) if ((/Etext of (.*) by (.*)/) || (/Gutenberg Etext, (.*) by (.*)/)) { print "% Title algorithm 1\n"; $title=$1; $author=$2; $title=~s/\W*([ [:alpha:]*])\W$/$1/g; $author=~s/\W*$//g; }; # Sometimes we are given title and author in a nice table... if (!($title) && (/Title: /)) { $title= $'; printf "%Title algorithm 2\n"; } if (!($author) && (/Author: /)) { $author=$'; } # ... but this is not always the case... if (!($title || $author || $text)) { if (/(.*), by (.*)/) { printf "%Title algorithm 3\n"; $title=$1; $author=$2; $title=~s/\W*(.*)/$1/g; #$title=~s/\W*([ [:alpha:]*])\W$/$1/g; $author=~s/\W*$//g; } } # Begin a new chapter if ($chcg) { if (/^\n/) { next; } else { print "\\chapter{}\n"; if (/^(\s*)\w/) { print "\\par\\cappar "; } else { print "\\par"; s/^(\s*)([^\w]*\w)/\\def\\a\{$2\}\\docappar /; } $chcg=0; } }; # Replacements for the text if ($text) { while (/\"/) { if ($quo) { s/\"/\'\'/; s/\'\'\'/\'\\,\'\'/; $quo=0; next; } s/\"/\`\`/; $quo=1; }; while (/_/) { if ($ital) { #Behold the italic correction (s/_\s/\}\\, /) || (s/_/\}/); $ital=0; next; } s/_/\{\\it /; $ital=1; } # Chapter marking is a mess: sometimes there is a # number, sometimes there is not; the # capitalisation might vary as well. if ( (/^CHAPTER/)|| (/^(\s*)Chapter[IVXCM [0-9]]*(\s*)/) ) { if($chapter==0) { print "\\tableofcontents\n"; }; $chapter++; $chcg=1; next; } # Close quotes when paragraph ends # (I would advise to kill the editor, but this is # a voluntary effort, hence...) if ((/^$/) && ($quo)) { print "\,''\n\n"; $quo=0; } if (/End (.*) Gutenberg/) { $text = 0; #standard Epilog print <> } \\else %\\usepackage{times} %% Document info \\pdfinfo{ /title ($title) /Author ($author) } \\pdfcompresslevel 9 \\geometry{pdftex} \\fi \\usepackage[british]{babel} \\usepackage{fancyhdr} \\usepackage{calc} %% SIZES \\geometry{a4paper,twoside} \\geometry{tmargin=2cm,bmargin=4cm} \\geometry{lmargin=3cm,rmargin=4cm} %\\baselineskip=.8cm \\setcounter{tocdepth}{4} %%% DEFINE HEADINGS\n \\pagestyle{fancy} \\fancyhead[LE]{\\slshape \\thepage} \\fancyhead[RE]{\\slshape \\MakeUppercase{$title}} \\fancyhead[LO]{\\slshape \\leftmark} \\fancyhead[RO]{\\slshape \\thepage} \\fancyfoot[C]{} \\makeatletter % Fix-up Chapter (use empty pagestyle) \\renewcommand\\\@chapapp{} \\renewcommand\\chapter{\\if\@openright\\cleardoublepage% \\else\\clearpage\\fi \\thispagestyle{empty}% \\global\\\@topnum\\z\@ \\\@afterindentfalse \\secdef\\\@chapter\\\@schapter} \\renewcommand\\thechapter{Chapter \\\@arabic\\c\@chapter} \\makeatother %% DROPPED CAPITALS (From TeX FAQ) \\newcommand\\capfont{\\fontsize{55}{60}\\selectfont} %FONT (T1) \\newbox\\capbox \\newcount\\capl \\def\\a{A} %BOX \\def\\docappar{\\medbreak\\noindent\\setbox\\capbox\\hbox{% \\capfont\\a}\\hskip-0.18em% \\hangindent=\\wd\\capbox% \\capl=\\ht\\capbox\\divide\\capl by\\baselineskip\\advance\\capl by1% \\hangafter=-\\capl% \\raise2pt\\hbox{\\vbox to6pt{\\hbox to0pt{\\hss\\box\\capbox}\\vss}}} \\def\\cappar{\\afterassignment\\docappar\\noexpand\\let\\a } %% End DROPPED CAPITALS %% Title support \\makeatletter \\newcommand{\\HRule}{\\rule{\\linewidth}{1mm}} \\renewcommand\\maketitle{\\begin{titlepage}% \\let\\footnotesize\\small \\let\\footnoterule\\relax \\let \\footnote \\thanks \\vspace*{\\stretch{1}} \\noindent\\HRule \\vskip.6cm \\begin{flushright} {\\Huge \\\@author} \\\\[.5cm] {\\Huge \\\@title} \\\\[.6cm] \\end{flushright} \\noindent\\HRule \\vspace*{\\stretch{2}} \\begin{center} {\\large Project Gutenberg e--text \\par}% \\end{center}\\par \\end{titlepage}% \\setcounter{footnote}{0}% \\global\\let\\thanks\\relax \\global\\let\\maketitle\\relax \\global\\let\\\@thanks\\\@empty \\global\\let\\\@author\\\@empty \\global\\let\\\@date\\\@empty \\global\\let\\\@title\\\@empty \\global\\let\\title\\relax \\global\\let\\author\\relax \\global\\let\\date\\relax \\global\\let\\and\\relax } \\makeatother \\begin{document} \\maketitle ENDPROLOG $text=1; }; }