""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents. | |
Written by Raymond D. Hettinger <python at rcn.com> | |
Copyright (c) 2003 Python Software Foundation. All rights reserved. | |
Designed to catch common markup errors including: | |
* Unbalanced or mismatched parenthesis, brackets, and braces. | |
* Unbalanced or mismatched \\begin and \\end blocks. | |
* Misspelled or invalid LaTeX commands. | |
* Use of forward slashes instead of backslashes for commands. | |
* Table line size mismatches. | |
Sample command line usage: | |
python texcheck.py -k chapterheading -m lib/librandomtex *.tex | |
Options: | |
-m Munge parenthesis and brackets. [0,n) would normally mismatch. | |
-k keyword: Keyword is a valid LaTeX command. Do not include the backslash. | |
-d: Delimiter check only (useful for non-LaTeX files). | |
-h: Help | |
-s lineno: Start at lineno (useful for skipping complex sections). | |
-v: Verbose. Trace the matching of //begin and //end blocks. | |
""" | |
import re | |
import sys | |
import getopt | |
from itertools import izip, count, islice | |
import glob | |
cmdstr = r""" | |
\section \module \declaremodule \modulesynopsis \moduleauthor | |
\sectionauthor \versionadded \code \class \method \begin | |
\optional \var \ref \end \subsection \lineiii \hline \label | |
\indexii \textrm \ldots \keyword \stindex \index \item \note | |
\withsubitem \ttindex \footnote \citetitle \samp \opindex | |
\noindent \exception \strong \dfn \ctype \obindex \character | |
\indexiii \function \bifuncindex \refmodule \refbimodindex | |
\subsubsection \nodename \member \chapter \emph \ASCII \UNIX | |
\regexp \program \production \token \productioncont \term | |
\grammartoken \lineii \seemodule \file \EOF \documentclass | |
\usepackage \title \input \maketitle \ifhtml \fi \url \Cpp | |
\tableofcontents \kbd \programopt \envvar \refstmodindex | |
\cfunction \constant \NULL \moreargs \cfuncline \cdata | |
\textasciicircum \n \ABC \setindexsubitem \versionchanged | |
\deprecated \seetext \newcommand \POSIX \pep \warning \rfc | |
\verbatiminput \methodline \textgreater \seetitle \lineiv | |
\funclineni \ulink \manpage \funcline \dataline \unspecified | |
\textbackslash \mimetype \mailheader \seepep \textunderscore | |
\longprogramopt \infinity \plusminus \shortversion \version | |
\refmodindex \seerfc \makeindex \makemodindex \renewcommand | |
\indexname \appendix \protect \indexiv \mbox \textasciitilde | |
\platform \seeurl \leftmargin \labelwidth \localmoduletable | |
\LaTeX \copyright \memberline \backslash \pi \centerline | |
\caption \vspace \textwidth \menuselection \textless | |
\makevar \csimplemacro \menuselection \bfcode \sub \release | |
\email \kwindex \refexmodindex \filenq \e \menuselection | |
\exindex \linev \newsgroup \verbatim \setshortversion | |
\author \authoraddress \paragraph \subparagraph \cmemberline | |
\textbar \C \seelink | |
""" | |
def matchclose(c_lineno, c_symbol, openers, pairmap): | |
"Verify that closing delimiter matches most recent opening delimiter" | |
try: | |
o_lineno, o_symbol = openers.pop() | |
except IndexError: | |
print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol) | |
return | |
if o_symbol in pairmap.get(c_symbol, [c_symbol]): return | |
print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno) | |
return | |
def checkit(source, opts, morecmds=[]): | |
"""Check the LaTeX formatting in a sequence of lines. | |
Opts is a mapping of options to option values if any: | |
-m munge parenthesis and brackets | |
-d delimiters only checking | |
-v verbose trace of delimiter matching | |
-s lineno: linenumber to start scan (default is 1). | |
Morecmds is a sequence of LaTeX commands (without backslashes) that | |
are to be considered valid in the scan. | |
""" | |
texcmd = re.compile(r'\\[A-Za-z]+') | |
falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash | |
validcmds = set(cmdstr.split()) | |
for cmd in morecmds: | |
validcmds.add('\\' + cmd) | |
if '-m' in opts: | |
pairmap = {']':'[(', ')':'(['} # Munged openers | |
else: | |
pairmap = {']':'[', ')':'('} # Normal opener for a given closer | |
openpunct = set('([') # Set of valid openers | |
delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])') | |
braces = re.compile(r'({)|(})') | |
doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b') | |
spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s') | |
openers = [] # Stack of pending open delimiters | |
bracestack = [] # Stack of pending open braces | |
tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}') | |
tableline = re.compile(r'\\line([iv]+){') | |
tableend = re.compile(r'\\end{(?:long)?table([iv]+)}') | |
tablelevel = '' | |
tablestartline = 0 | |
startline = int(opts.get('-s', '1')) | |
lineno = 0 | |
for lineno, line in izip(count(startline), islice(source, startline-1, None)): | |
line = line.rstrip() | |
# Check balancing of open/close parenthesis, brackets, and begin/end blocks | |
for begend, name, punct in delimiters.findall(line): | |
if '-v' in opts: | |
print lineno, '|', begend, name, punct, | |
if begend == 'begin' and '-d' not in opts: | |
openers.append((lineno, name)) | |
elif punct in openpunct: | |
openers.append((lineno, punct)) | |
elif begend == 'end' and '-d' not in opts: | |
matchclose(lineno, name, openers, pairmap) | |
elif punct in pairmap: | |
matchclose(lineno, punct, openers, pairmap) | |
if '-v' in opts: | |
print ' --> ', openers | |
# Balance opening and closing braces | |
for open, close in braces.findall(line): | |
if open == '{': | |
bracestack.append(lineno) | |
if close == '}': | |
try: | |
bracestack.pop() | |
except IndexError: | |
print r'Warning, unmatched } on line %s.' % (lineno,) | |
# Optionally, skip LaTeX specific checks | |
if '-d' in opts: | |
continue | |
# Warn whenever forward slashes encountered with a LaTeX command | |
for cmd in falsetexcmd.findall(line): | |
if '822' in line or '.html' in line: | |
continue # Ignore false positives for urls and for /rfc822 | |
if '\\' + cmd in validcmds: | |
print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd) | |
# Check for markup requiring {} for correct spacing | |
for cmd in spacingmarkup.findall(line): | |
print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno) | |
# Validate commands | |
nc = line.find(r'\newcommand') | |
if nc != -1: | |
start = line.find('{', nc) | |
end = line.find('}', start) | |
validcmds.add(line[start+1:end]) | |
for cmd in texcmd.findall(line): | |
if cmd not in validcmds: | |
print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd) | |
# Check table levels (make sure lineii only inside tableii) | |
m = tablestart.search(line) | |
if m: | |
tablelevel = m.group(1) | |
tablestartline = lineno | |
m = tableline.search(line) | |
if m and m.group(1) != tablelevel: | |
print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline) | |
if tableend.search(line): | |
tablelevel = '' | |
# Style guide warnings | |
if 'e.g.' in line or 'i.e.' in line: | |
print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,) | |
for dw in doubledwords.findall(line): | |
print r'Doubled word warning. "%s" on line %d' % (dw, lineno) | |
lastline = lineno | |
for lineno, symbol in openers: | |
print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno) | |
for lineno in bracestack: | |
print "Unmatched { on line %d" % (lineno,) | |
print 'Done checking %d lines.' % (lastline,) | |
return 0 | |
def main(args=None): | |
if args is None: | |
args = sys.argv[1:] | |
optitems, arglist = getopt.getopt(args, "k:mdhs:v") | |
opts = dict(optitems) | |
if '-h' in opts or args==[]: | |
print __doc__ | |
return 0 | |
if len(arglist) < 1: | |
print 'Please specify a file to be checked' | |
return 1 | |
for i, filespec in enumerate(arglist): | |
if '*' in filespec or '?' in filespec: | |
arglist[i:i+1] = glob.glob(filespec) | |
morecmds = [v for k,v in optitems if k=='-k'] | |
err = [] | |
for filename in arglist: | |
print '=' * 30 | |
print "Checking", filename | |
try: | |
f = open(filename) | |
except IOError: | |
print 'Cannot open file %s.' % arglist[0] | |
return 2 | |
try: | |
err.append(checkit(f, opts, morecmds)) | |
finally: | |
f.close() | |
return max(err) | |
if __name__ == '__main__': | |
sys.exit(main()) |