blob: c13acef0507a682a3ebdba69b0d762d55638b7c3 [file] [log] [blame]
#-----------------------------------------------------------------------------
# ply: lex.py
#
# Author: David M. Beazley (dave@dabeaz.com)
#
# Copyright (C) 2001-2006, David M. Beazley
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# See the file COPYING for a complete copy of the LGPL.
#-----------------------------------------------------------------------------
__version__ = "2.2"
import re, sys, types
from . import util
import collections
# Regular expression used to match valid token names
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
# Available instance types. This is used when parsers are defined by a class.
# In Python3 the InstanceType and ObjectType are no more, they've passed, ceased
# to be, they are ex-classes along with old-style classes
try:
_INSTANCETYPE = (types.InstanceType, types.ObjectType)
except AttributeError:
_INSTANCETYPE = object
# Exception thrown when invalid token encountered and no default error
# handler is defined.
class LexError(Exception):
def __init__(self,message,s):
self.args = (message,)
self.text = s
# Token class
class LexToken(object):
def __str__(self):
return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
def __repr__(self):
return str(self)
def skip(self,n):
self.lexer.skip(n)
# -----------------------------------------------------------------------------
# Lexer class
#
# This class encapsulates all of the methods and data associated with a lexer.
#
# input() - Store a new string in the lexer
# token() - Get the next token
# -----------------------------------------------------------------------------
class Lexer:
def __init__(self):
self.lexre = None # Master regular expression. This is a list of
# tuples (re,findex) where re is a compiled
# regular expression and findex is a list
# mapping regex group numbers to rules
self.lexretext = None # Current regular expression strings
self.lexstatere = {} # Dictionary mapping lexer states to master regexs
self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
self.lexstate = "INITIAL" # Current lexer state
self.lexstatestack = [] # Stack of lexer states
self.lexstateinfo = None # State information
self.lexstateignore = {} # Dictionary of ignored characters for each state
self.lexstateerrorf = {} # Dictionary of error functions for each state
self.lexreflags = 0 # Optional re compile flags
self.lexdata = None # Actual input data (as a string)
self.lexpos = 0 # Current position in input text
self.lexlen = 0 # Length of the input text
self.lexerrorf = None # Error rule (if any)
self.lextokens = None # List of valid tokens
self.lexignore = "" # Ignored characters
self.lexliterals = "" # Literal characters that can be passed through
self.lexmodule = None # Module
self.lineno = 1 # Current line number
self.lexdebug = 0 # Debugging mode
self.lexoptimize = 0 # Optimized mode
def clone(self,object=None):
c = Lexer()
c.lexstatere = self.lexstatere
c.lexstateinfo = self.lexstateinfo
c.lexstateretext = self.lexstateretext
c.lexstate = self.lexstate
c.lexstatestack = self.lexstatestack
c.lexstateignore = self.lexstateignore
c.lexstateerrorf = self.lexstateerrorf
c.lexreflags = self.lexreflags
c.lexdata = self.lexdata
c.lexpos = self.lexpos
c.lexlen = self.lexlen
c.lextokens = self.lextokens
c.lexdebug = self.lexdebug
c.lineno = self.lineno
c.lexoptimize = self.lexoptimize
c.lexliterals = self.lexliterals
c.lexmodule = self.lexmodule
# If the object parameter has been supplied, it means we are attaching the
# lexer to a new object. In this case, we have to rebind all methods in
# the lexstatere and lexstateerrorf tables.
if object:
newtab = { }
for key, ritem in self.lexstatere.items():
newre = []
for cre, findex in ritem:
newfindex = []
for f in findex:
if not f or not f[0]:
newfindex.append(f)
continue
newfindex.append((getattr(object,f[0].__name__),f[1]))
newre.append((cre,newfindex))
newtab[key] = newre
c.lexstatere = newtab
c.lexstateerrorf = { }
for key, ef in self.lexstateerrorf.items():
c.lexstateerrorf[key] = getattr(object,ef.__name__)
c.lexmodule = object
# Set up other attributes
c.begin(c.lexstate)
return c
# ------------------------------------------------------------
# writetab() - Write lexer information to a table file
# ------------------------------------------------------------
def writetab(self,tabfile):
tf = open(tabfile+".py","w")
tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
tf.write("_lextokens = %s\n" % repr(self.lextokens))
tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
tabre = { }
for key, lre in self.lexstatere.items():
titem = []
for i in range(len(lre)):
titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1])))
tabre[key] = titem
tf.write("_lexstatere = %s\n" % repr(tabre))
tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
taberr = { }
for key, ef in self.lexstateerrorf.items():
if ef:
taberr[key] = ef.__name__
else:
taberr[key] = None
tf.write("_lexstateerrorf = %s\n" % repr(taberr))
tf.close()
# ------------------------------------------------------------
# readtab() - Read lexer information from a tab file
# ------------------------------------------------------------
def readtab(self,tabfile,fdict):
exec("import %s as lextab" % tabfile)
self.lextokens = lextab._lextokens
self.lexreflags = lextab._lexreflags
self.lexliterals = lextab._lexliterals
self.lexstateinfo = lextab._lexstateinfo
self.lexstateignore = lextab._lexstateignore
self.lexstatere = { }
self.lexstateretext = { }
for key,lre in lextab._lexstatere.items():
titem = []
txtitem = []
for i in range(len(lre)):
titem.append((re.compile(lre[i][0],lextab._lexreflags),_names_to_funcs(lre[i][1],fdict)))
txtitem.append(lre[i][0])
self.lexstatere[key] = titem
self.lexstateretext[key] = txtitem
self.lexstateerrorf = { }
for key,ef in lextab._lexstateerrorf.items():
self.lexstateerrorf[key] = fdict[ef]
self.begin('INITIAL')
# ------------------------------------------------------------
# input() - Push a new string into the lexer
# ------------------------------------------------------------
def input(self,s):
if not (isinstance(s,util.bytes_type) or isinstance(s, util.string_type)):
raise ValueError("Expected a string")
self.lexdata = s
self.lexpos = 0
self.lexlen = len(s)
# ------------------------------------------------------------
# begin() - Changes the lexing state
# ------------------------------------------------------------
def begin(self,state):
if state not in self.lexstatere:
raise ValueError("Undefined state")
self.lexre = self.lexstatere[state]
self.lexretext = self.lexstateretext[state]
self.lexignore = self.lexstateignore.get(state,"")
self.lexerrorf = self.lexstateerrorf.get(state,None)
self.lexstate = state
# ------------------------------------------------------------
# push_state() - Changes the lexing state and saves old on stack
# ------------------------------------------------------------
def push_state(self,state):
self.lexstatestack.append(self.lexstate)
self.begin(state)
# ------------------------------------------------------------
# pop_state() - Restores the previous state
# ------------------------------------------------------------
def pop_state(self):
self.begin(self.lexstatestack.pop())
# ------------------------------------------------------------
# current_state() - Returns the current lexing state
# ------------------------------------------------------------
def current_state(self):
return self.lexstate
# ------------------------------------------------------------
# skip() - Skip ahead n characters
# ------------------------------------------------------------
def skip(self,n):
self.lexpos += n
# ------------------------------------------------------------
# token() - Return the next token from the Lexer
#
# Note: This function has been carefully implemented to be as fast
# as possible. Don't make changes unless you really know what
# you are doing
# ------------------------------------------------------------
def token(self):
# Make local copies of frequently referenced attributes
lexpos = self.lexpos
lexlen = self.lexlen
lexignore = self.lexignore
lexdata = self.lexdata
while lexpos < lexlen:
# This code provides some short-circuit code for whitespace, tabs, and other ignored characters
if lexdata[lexpos] in lexignore:
lexpos += 1
continue
# Look for a regular expression match
for lexre,lexindexfunc in self.lexre:
m = lexre.match(lexdata,lexpos)
if not m: continue
# Set last match in lexer so that rules can access it if they want
self.lexmatch = m
# Create a token for return
tok = LexToken()
tok.value = m.group()
tok.lineno = self.lineno
tok.lexpos = lexpos
tok.lexer = self
lexpos = m.end()
i = m.lastindex
func,tok.type = lexindexfunc[i]
self.lexpos = lexpos
if not func:
# If no token type was set, it's an ignored token
if tok.type: return tok
break
# if func not callable, it means it's an ignored token
if not isinstance(func, collections.Callable):
break
# If token is processed by a function, call it
newtok = func(tok)
# Every function must return a token, if nothing, we just move to next token
if not newtok:
lexpos = self.lexpos # This is here in case user has updated lexpos.
break
# Verify type of the token. If not in the token map, raise an error
if not self.lexoptimize:
if newtok.type not in self.lextokens:
raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
func.__code__.co_filename, func.__code__.co_firstlineno,
func.__name__, newtok.type),lexdata[lexpos:])
return newtok
else:
# No match, see if in literals
if lexdata[lexpos] in self.lexliterals:
tok = LexToken()
tok.value = lexdata[lexpos]
tok.lineno = self.lineno
tok.lexer = self
tok.type = tok.value
tok.lexpos = lexpos
self.lexpos = lexpos + 1
return tok
# No match. Call t_error() if defined.
if self.lexerrorf:
tok = LexToken()
tok.value = self.lexdata[lexpos:]
tok.lineno = self.lineno
tok.type = "error"
tok.lexer = self
tok.lexpos = lexpos
self.lexpos = lexpos
newtok = self.lexerrorf(tok)
if lexpos == self.lexpos:
# Error method didn't change text position at all. This is an error.
raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
lexpos = self.lexpos
if not newtok: continue
return newtok
self.lexpos = lexpos
raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
self.lexpos = lexpos + 1
if self.lexdata is None:
raise RuntimeError("No input string given with input()")
return None
# -----------------------------------------------------------------------------
# _validate_file()
#
# This checks to see if there are duplicated t_rulename() functions or strings
# in the parser input file. This is done using a simple regular expression
# match on each line in the filename.
# -----------------------------------------------------------------------------
def _validate_file(filename):
import os.path
base,ext = os.path.splitext(filename)
if ext != '.py': return 1 # No idea what the file is. Return OK
try:
f = open(filename)
lines = f.readlines()
f.close()
except IOError:
return 1 # Oh well
fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
counthash = { }
linen = 1
noerror = 1
for l in lines:
m = fre.match(l)
if not m:
m = sre.match(l)
if m:
name = m.group(1)
prev = counthash.get(name)
if not prev:
counthash[name] = linen
else:
print("%s:%d: Rule %s redefined. Previously defined on line %d" % (filename,linen,name,prev))
noerror = 0
linen += 1
return noerror
# -----------------------------------------------------------------------------
# _funcs_to_names()
#
# Given a list of regular expression functions, this converts it to a list
# suitable for output to a table file
# -----------------------------------------------------------------------------
def _funcs_to_names(funclist):
result = []
for f in funclist:
if f and f[0]:
result.append((f[0].__name__,f[1]))
else:
result.append(f)
return result
# -----------------------------------------------------------------------------
# _names_to_funcs()
#
# Given a list of regular expression function names, this converts it back to
# functions.
# -----------------------------------------------------------------------------
def _names_to_funcs(namelist,fdict):
result = []
for n in namelist:
if n and n[0]:
result.append((fdict[n[0]],n[1]))
else:
result.append(n)
return result
# -----------------------------------------------------------------------------
# _form_master_re()
#
# This function takes a list of all of the regex components and attempts to
# form the master regular expression. Given limitations in the Python re
# module, it may be necessary to break the master regex into separate expressions.
# -----------------------------------------------------------------------------
def _form_master_re(relist,reflags,ldict):
if not relist: return []
regex = "|".join(relist)
try:
lexre = re.compile(regex,re.VERBOSE | reflags)
# Build the index to function map for the matching engine
lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
for f,i in lexre.groupindex.items():
handle = ldict.get(f,None)
if type(handle) in (types.FunctionType, types.MethodType):
lexindexfunc[i] = (handle,handle.__name__[2:])
elif handle is not None:
# If rule was specified as a string, we build an anonymous
# callback function to carry out the action
if f.find("ignore_") > 0:
lexindexfunc[i] = (None,None)
print("IGNORE", f)
else:
lexindexfunc[i] = (None, f[2:])
return [(lexre,lexindexfunc)],[regex]
except Exception as e:
m = int(len(relist)/2)
if m == 0: m = 1
llist, lre = _form_master_re(relist[:m],reflags,ldict)
rlist, rre = _form_master_re(relist[m:],reflags,ldict)
return llist+rlist, lre+rre
# -----------------------------------------------------------------------------
# def _statetoken(s,names)
#
# Given a declaration name s of the form "t_" and a dictionary whose keys are
# state names, this function returns a tuple (states,tokenname) where states
# is a tuple of state names and tokenname is the name of the token. For example,
# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
# -----------------------------------------------------------------------------
def _statetoken(s,names):
nonstate = 1
parts = s.split("_")
for i in range(1,len(parts)):
if parts[i] not in names and parts[i] != 'ANY': break
if i > 1:
states = tuple(parts[1:i])
else:
states = ('INITIAL',)
if 'ANY' in states:
states = tuple(names.keys())
tokenname = "_".join(parts[i:])
return (states,tokenname)
# -----------------------------------------------------------------------------
# lex(module)
#
# Build all of the regular expression rules from definitions in the supplied module
# -----------------------------------------------------------------------------
def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0):
global lexer
ldict = None
stateinfo = { 'INITIAL' : 'inclusive'}
error = 0
files = { }
lexobj = Lexer()
lexobj.lexdebug = debug
lexobj.lexoptimize = optimize
global token,input
if nowarn: warn = 0
else: warn = 1
if object: module = object
if module:
# User supplied a module object.
if isinstance(module, types.ModuleType):
ldict = module.__dict__
elif isinstance(module, _INSTANCETYPE):
_items = [(k,getattr(module,k)) for k in dir(module)]
ldict = { }
for (i,v) in _items:
ldict[i] = v
else:
raise ValueError("Expected a module or instance")
lexobj.lexmodule = module
else:
# No module given. We might be able to get information from the caller.
try:
raise RuntimeError
except RuntimeError:
e,b,t = sys.exc_info()
f = t.tb_frame
f = f.f_back # Walk out to our calling function
ldict = f.f_globals # Grab its globals dictionary
if optimize and lextab:
try:
lexobj.readtab(lextab,ldict)
token = lexobj.token
input = lexobj.input
lexer = lexobj
return lexobj
except ImportError:
pass
# Get the tokens, states, and literals variables (if any)
if (module and isinstance(module,_INSTANCETYPE)):
tokens = getattr(module,"tokens",None)
states = getattr(module,"states",None)
literals = getattr(module,"literals","")
else:
tokens = ldict.get("tokens",None)
states = ldict.get("states",None)
literals = ldict.get("literals","")
if not tokens:
raise SyntaxError("lex: module does not define 'tokens'")
if not (isinstance(tokens,list) or isinstance(tokens,tuple)):
raise SyntaxError("lex: tokens must be a list or tuple.")
# Build a dictionary of valid token names
lexobj.lextokens = { }
if not optimize:
for n in tokens:
if not _is_identifier.match(n):
print("lex: Bad token name '%s'" % n)
error = 1
if warn and n in lexobj.lextokens:
print("lex: Warning. Token '%s' multiply defined." % n)
lexobj.lextokens[n] = None
else:
for n in tokens: lexobj.lextokens[n] = None
if debug:
print("lex: tokens = '%s'" % list(lexobj.lextokens.keys()))
try:
for c in literals:
if not (isinstance(c,util.bytes_type) or isinstance(c, util.string_type)) or len(c) > 1:
print("lex: Invalid literal %s. Must be a single character" % repr(c))
error = 1
continue
except TypeError:
print("lex: Invalid literals specification. literals must be a sequence of characters.")
error = 1
lexobj.lexliterals = literals
# Build statemap
if states:
if not (isinstance(states,tuple) or isinstance(states,list)):
print("lex: states must be defined as a tuple or list.")
error = 1
else:
for s in states:
if not isinstance(s,tuple) or len(s) != 2:
print("lex: invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')" % repr(s))
error = 1
continue
name, statetype = s
if isinstance(name, util.string_type):
original_name = name
name = util.encode_input(name)
if not isinstance(name,util.bytes_type) or len(original_name) != len(name):
print("lex: state name %s must be a byte string" % repr(original_name))
error = 1
continue
if not (statetype == 'inclusive' or statetype == 'exclusive'):
print("lex: state type for state %s must be 'inclusive' or 'exclusive'" % name)
error = 1
continue
if name in stateinfo:
print("lex: state '%s' already defined." % name)
error = 1
continue
stateinfo[name] = statetype
# Get a list of symbols with the t_ or s_ prefix
tsymbols = [f for f in ldict.keys() if f[:2] == 't_' ]
# Now build up a list of functions and a list of strings
funcsym = { } # Symbols defined as functions
strsym = { } # Symbols defined as strings
toknames = { } # Mapping of symbols to token names
for s in stateinfo.keys():
funcsym[s] = []
strsym[s] = []
ignore = { } # Ignore strings by state
errorf = { } # Error functions by state
if len(tsymbols) == 0:
raise SyntaxError("lex: no rules of the form t_rulename are defined.")
for f in tsymbols:
t = ldict[f]
states, tokname = _statetoken(f,stateinfo)
toknames[f] = tokname
if isinstance(t, collections.Callable):
for s in states: funcsym[s].append((f,t))
elif (isinstance(t, util.bytes_type) or isinstance(t,util.string_type)):
for s in states: strsym[s].append((f,t))
else:
print("lex: %s not defined as a function or string" % f)
error = 1
# Sort the functions by line number
for f in funcsym.values():
f.sort(key=lambda x: x[1].__code__.co_firstlineno)
# Sort the strings by regular expression length
for s in strsym.values():
s.sort(key=lambda x: len(x[1]))
regexs = { }
# Build the master regular expressions
for state in stateinfo.keys():
regex_list = []
# Add rules defined by functions first
for fname, f in funcsym[state]:
line = f.__code__.co_firstlineno
file = f.__code__.co_filename
files[file] = None
tokname = toknames[fname]
ismethod = isinstance(f, types.MethodType)
if not optimize:
nargs = f.__code__.co_argcount
if ismethod:
reqargs = 2
else:
reqargs = 1
if nargs > reqargs:
print("%s:%d: Rule '%s' has too many arguments." % (file,line,f.__name__))
error = 1
continue
if nargs < reqargs:
print("%s:%d: Rule '%s' requires an argument." % (file,line,f.__name__))
error = 1
continue
if tokname == 'ignore':
print("%s:%d: Rule '%s' must be defined as a string." % (file,line,f.__name__))
error = 1
continue
if tokname == 'error':
errorf[state] = f
continue
if f.__doc__:
if not optimize:
try:
c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags)
if c.match(""):
print("%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__))
error = 1
continue
except re.error as e:
print("%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e))
if '#' in f.__doc__:
print("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__))
error = 1
continue
if debug:
print("lex: Adding rule %s -> '%s' (state '%s')" % (f.__name__,f.__doc__, state))
# Okay. The regular expression seemed okay. Let's append it to the master regular
# expression we're building
regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__))
else:
print("%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__))
# Now add all of the simple rules
for name,r in strsym[state]:
tokname = toknames[name]
if tokname == 'ignore':
ignore[state] = r
continue
if not optimize:
if tokname == 'error':
raise SyntaxError("lex: Rule '%s' must be defined as a function" % name)
error = 1
continue
if tokname not in lexobj.lextokens and tokname.find("ignore_") < 0:
print("lex: Rule '%s' defined for an unspecified token %s." % (name,tokname))
error = 1
continue
try:
c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags)
if (c.match("")):
print("lex: Regular expression for rule '%s' matches empty string." % name)
error = 1
continue
except re.error as e:
print("lex: Invalid regular expression for rule '%s'. %s" % (name,e))
if '#' in r:
print("lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name)
error = 1
continue
if debug:
print("lex: Adding rule %s -> '%s' (state '%s')" % (name,r,state))
regex_list.append("(?P<%s>%s)" % (name,r))
if not regex_list:
print("lex: No rules defined for state '%s'" % state)
error = 1
regexs[state] = regex_list
if not optimize:
for f in files.keys():
if not _validate_file(f):
error = 1
if error:
raise SyntaxError("lex: Unable to build lexer.")
# From this point forward, we're reasonably confident that we can build the lexer.
# No more errors will be generated, but there might be some warning messages.
# Build the master regular expressions
for state in regexs.keys():
lexre, re_text = _form_master_re(regexs[state],reflags,ldict)
lexobj.lexstatere[state] = lexre
lexobj.lexstateretext[state] = re_text
if debug:
for i in range(len(re_text)):
print("lex: state '%s'. regex[%d] = '%s'" % (state, i, re_text[i]))
# For inclusive states, we need to add the INITIAL state
for state,type in stateinfo.items():
if state != "INITIAL" and type == 'inclusive':
lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
lexobj.lexstateinfo = stateinfo
lexobj.lexre = lexobj.lexstatere["INITIAL"]
lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
# Set up ignore variables
lexobj.lexstateignore = ignore
lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
# Set up error functions
lexobj.lexstateerrorf = errorf
lexobj.lexerrorf = errorf.get("INITIAL",None)
if warn and not lexobj.lexerrorf:
print("lex: Warning. no t_error rule is defined.")
# Check state information for ignore and error rules
for s,stype in stateinfo.items():
if stype == 'exclusive':
if warn and s not in errorf:
print("lex: Warning. no error rule is defined for exclusive state '%s'" % s)
if warn and s not in ignore and lexobj.lexignore:
print("lex: Warning. no ignore rule is defined for exclusive state '%s'" % s)
elif stype == 'inclusive':
if s not in errorf:
errorf[s] = errorf.get("INITIAL",None)
if s not in ignore:
ignore[s] = ignore.get("INITIAL","")
# Create global versions of the token() and input() functions
token = lexobj.token
input = lexobj.input
lexer = lexobj
# If in optimize mode, we write the lextab
if lextab and optimize:
lexobj.writetab(lextab)
return lexobj
# -----------------------------------------------------------------------------
# runmain()
#
# This runs the lexer as a main program
# -----------------------------------------------------------------------------
def runmain(lexer=None,data=None):
if not data:
try:
filename = sys.argv[1]
f = open(filename)
data = f.read()
f.close()
except IndexError:
print("Reading from standard input (type EOF to end):")
data = sys.stdin.read()
if lexer:
_input = lexer.input
else:
_input = input
_input(data)
if lexer:
_token = lexer.token
else:
_token = token
while 1:
tok = _token()
if not tok: break
print("(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos))
# -----------------------------------------------------------------------------
# @TOKEN(regex)
#
# This decorator function can be used to set the regex expression on a function
# when its docstring might need to be set in an alternative way
# -----------------------------------------------------------------------------
def TOKEN(r):
def set_doc(f):
f.__doc__ = r
return f
return set_doc
# Alternative spelling of the TOKEN decorator
Token = TOKEN