Improved usage of string interning. This should fix the problems with jython.
--HG--
branch : trunk
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index 6124396..39aac9f 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -34,34 +34,82 @@
float_re = re.compile(r'(?<!\.)\d+\.\d+')
newline_re = re.compile(r'(\r\n|\r|\n)')
+# internal the tokens and keep references to them
+TOKEN_ADD = intern('add')
+TOKEN_ASSIGN = intern('assign')
+TOKEN_COLON = intern('colon')
+TOKEN_COMMA = intern('comma')
+TOKEN_DIV = intern('div')
+TOKEN_DOT = intern('dot')
+TOKEN_EQ = intern('eq')
+TOKEN_FLOORDIV = intern('floordiv')
+TOKEN_GT = intern('gt')
+TOKEN_GTEQ = intern('gteq')
+TOKEN_LBRACE = intern('lbrace')
+TOKEN_LBRACKET = intern('lbracket')
+TOKEN_LPAREN = intern('lparen')
+TOKEN_LT = intern('lt')
+TOKEN_LTEQ = intern('lteq')
+TOKEN_MOD = intern('mod')
+TOKEN_MUL = intern('mul')
+TOKEN_NE = intern('ne')
+TOKEN_PIPE = intern('pipe')
+TOKEN_POW = intern('pow')
+TOKEN_RBRACE = intern('rbrace')
+TOKEN_RBRACKET = intern('rbracket')
+TOKEN_RPAREN = intern('rparen')
+TOKEN_SEMICOLON = intern('semicolon')
+TOKEN_SUB = intern('sub')
+TOKEN_TILDE = intern('tilde')
+TOKEN_WHITESPACE = intern('whitespace')
+TOKEN_FLOAT = intern('float')
+TOKEN_INTEGER = intern('integer')
+TOKEN_NAME = intern('name')
+TOKEN_STRING = intern('string')
+TOKEN_OPERATOR = intern('operator')
+TOKEN_BLOCK_BEGIN = intern('block_begin')
+TOKEN_BLOCK_END = intern('block_end')
+TOKEN_VARIABLE_BEGIN = intern('variable_begin')
+TOKEN_VARIABLE_END = intern('variable_end')
+TOKEN_RAW_BEGIN = intern('raw_begin')
+TOKEN_RAW_END = intern('raw_end')
+TOKEN_COMMENT_BEGIN = intern('comment_begin')
+TOKEN_COMMENT_END = intern('comment_end')
+TOKEN_COMMENT = intern('comment')
+TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
+TOKEN_LINESTATEMENT_END = intern('linestatement_end')
+TOKEN_DATA = intern('data')
+TOKEN_INITIAL = intern('initial')
+TOKEN_EOF = intern('eof')
+
# bind operators to token types
operators = {
- '+': 'add',
- '-': 'sub',
- '/': 'div',
- '//': 'floordiv',
- '*': 'mul',
- '%': 'mod',
- '**': 'pow',
- '~': 'tilde',
- '[': 'lbracket',
- ']': 'rbracket',
- '(': 'lparen',
- ')': 'rparen',
- '{': 'lbrace',
- '}': 'rbrace',
- '==': 'eq',
- '!=': 'ne',
- '>': 'gt',
- '>=': 'gteq',
- '<': 'lt',
- '<=': 'lteq',
- '=': 'assign',
- '.': 'dot',
- ':': 'colon',
- '|': 'pipe',
- ',': 'comma',
- ';': 'semicolon'
+ '+': TOKEN_ADD,
+ '-': TOKEN_SUB,
+ '/': TOKEN_DIV,
+ '//': TOKEN_FLOORDIV,
+ '*': TOKEN_MUL,
+ '%': TOKEN_MOD,
+ '**': TOKEN_POW,
+ '~': TOKEN_TILDE,
+ '[': TOKEN_LBRACKET,
+ ']': TOKEN_RBRACKET,
+ '(': TOKEN_LPAREN,
+ ')': TOKEN_RPAREN,
+ '{': TOKEN_LBRACE,
+ '}': TOKEN_RBRACE,
+ '==': TOKEN_EQ,
+ '!=': TOKEN_NE,
+ '>': TOKEN_GT,
+ '>=': TOKEN_GTEQ,
+ '<': TOKEN_LT,
+ '<=': TOKEN_LTEQ,
+ '=': TOKEN_ASSIGN,
+ '.': TOKEN_DOT,
+ ':': TOKEN_COLON,
+ '|': TOKEN_PIPE,
+ ',': TOKEN_COMMA,
+ ';': TOKEN_SEMICOLON
}
reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
@@ -146,7 +194,7 @@
def next(self):
token = self.stream.current
- if token.type == 'eof':
+ if token.type is TOKEN_EOF:
self.stream.close()
raise StopIteration()
self.stream.next()
@@ -165,7 +213,7 @@
self.name = name
self.filename = filename
self.closed = False
- self.current = Token(1, 'initial', '')
+ self.current = Token(1, TOKEN_INITIAL, '')
self.next()
def __iter__(self):
@@ -173,7 +221,7 @@
def __nonzero__(self):
"""Are we at the end of the stream?"""
- return bool(self._pushed) or self.current.type != 'eof'
+ return bool(self._pushed) or self.current.type is not TOKEN_EOF
eos = property(lambda x: not x.__nonzero__(), doc=__nonzero__.__doc__)
@@ -210,7 +258,7 @@
rv = self.current
if self._pushed:
self.current = self._pushed.popleft()
- elif self.current.type != 'eof':
+ elif self.current.type is not TOKEN_EOF:
try:
self.current = self._next()
except StopIteration:
@@ -219,7 +267,7 @@
def close(self):
"""Close the stream."""
- self.current = Token(self.current.lineno, 'eof', '')
+ self.current = Token(self.current.lineno, TOKEN_EOF, '')
self._next = None
self.closed = True
@@ -230,7 +278,7 @@
if not self.current.test(expr):
if ':' in expr:
expr = expr.split(':')[1]
- if self.current.type == 'eof':
+ if self.current.type is TOKEN_EOF:
raise TemplateSyntaxError('unexpected end of template, '
'expected %r.' % expr,
self.current.lineno,
@@ -278,12 +326,12 @@
# lexing rules for tags
tag_rules = [
- (whitespace_re, 'whitespace', None),
- (float_re, 'float', None),
- (integer_re, 'integer', None),
- (name_re, 'name', None),
- (string_re, 'string', None),
- (operator_re, 'operator', None)
+ (whitespace_re, TOKEN_WHITESPACE, None),
+ (float_re, TOKEN_FLOAT, None),
+ (integer_re, TOKEN_INTEGER, None),
+ (name_re, TOKEN_NAME, None),
+ (string_re, TOKEN_STRING, None),
+ (operator_re, TOKEN_OPERATOR, None)
]
# assamble the root lexing rule. because "|" is ungreedy
@@ -326,48 +374,48 @@
)] + [
'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
for n, r in root_tag_rules
- ])), ('data', '#bygroup'), '#bygroup'),
+ ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
# data
(c('.+'), 'data', None)
],
# comments
- 'comment_begin': [
+ TOKEN_COMMENT_BEGIN: [
(c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
e(environment.comment_end_string),
e(environment.comment_end_string),
block_suffix_re
- )), ('comment', 'comment_end'), '#pop'),
+ )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
(c('(.)'), (Failure('Missing end of comment tag'),), None)
],
# blocks
- 'block_begin': [
+ TOKEN_BLOCK_BEGIN: [
(c('(?:\-%s\s*|%s)%s' % (
e(environment.block_end_string),
e(environment.block_end_string),
block_suffix_re
- )), 'block_end', '#pop'),
+ )), TOKEN_BLOCK_END, '#pop'),
] + tag_rules,
# variables
- 'variable_begin': [
+ TOKEN_VARIABLE_BEGIN: [
(c('\-%s\s*|%s' % (
e(environment.variable_end_string),
e(environment.variable_end_string)
- )), 'variable_end', '#pop')
+ )), TOKEN_VARIABLE_END, '#pop')
] + tag_rules,
# raw block
- 'raw_begin': [
+ TOKEN_RAW_BEGIN: [
(c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
e(environment.block_start_string),
e(environment.block_start_string),
e(environment.block_end_string),
e(environment.block_end_string),
block_suffix_re
- )), ('data', 'raw_end'), '#pop'),
+ )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
(c('(.)'), (Failure('Missing end of raw directive'),), None)
],
# line statements
- 'linestatement_begin': [
- (c(r'\s*(\n|$)'), 'linestatement_end', '#pop')
+ TOKEN_LINESTATEMENT_BEGIN: [
+ (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
] + tag_rules
}