| Index: third_party/jinja2/lexer.py
|
| diff --git a/third_party/jinja2/lexer.py b/third_party/jinja2/lexer.py
|
| index 0d3f69617159938700a52957ea339dde5378fb7a..a50128507bb98ac6bc57a76afe8a0776a2df2c49 100644
|
| --- a/third_party/jinja2/lexer.py
|
| +++ b/third_party/jinja2/lexer.py
|
| @@ -15,10 +15,13 @@
|
| :license: BSD, see LICENSE for more details.
|
| """
|
| import re
|
| +
|
| from operator import itemgetter
|
| from collections import deque
|
| from jinja2.exceptions import TemplateSyntaxError
|
| -from jinja2.utils import LRUCache, next
|
| +from jinja2.utils import LRUCache
|
| +from jinja2._compat import next, iteritems, implements_iterator, text_type, \
|
| + intern
|
|
|
|
|
| # cache for the lexers. Exists in order to be able to have multiple
|
| @@ -126,7 +129,7 @@ operators = {
|
| ';': TOKEN_SEMICOLON
|
| }
|
|
|
| -reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
|
| +reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
|
| assert len(operators) == len(reverse_operators), 'operators dropped'
|
| operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
|
| sorted(operators, key=lambda x: -len(x))))
|
| @@ -197,7 +200,7 @@ def compile_rules(environment):
|
|
|
| if environment.line_statement_prefix is not None:
|
| rules.append((len(environment.line_statement_prefix), 'linestatement',
|
| - r'^\s*' + e(environment.line_statement_prefix)))
|
| + r'^[ \t\v]*' + e(environment.line_statement_prefix)))
|
| if environment.line_comment_prefix is not None:
|
| rules.append((len(environment.line_comment_prefix), 'linecomment',
|
| r'(?:^|(?<=\S))[^\S\r\n]*' +
|
| @@ -262,6 +265,7 @@ class Token(tuple):
|
| )
|
|
|
|
|
| +@implements_iterator
|
| class TokenStreamIterator(object):
|
| """The iterator for tokenstreams. Iterate over the stream
|
| until the eof token is reached.
|
| @@ -273,7 +277,7 @@ class TokenStreamIterator(object):
|
| def __iter__(self):
|
| return self
|
|
|
| - def next(self):
|
| + def __next__(self):
|
| token = self.stream.current
|
| if token.type is TOKEN_EOF:
|
| self.stream.close()
|
| @@ -282,6 +286,7 @@ class TokenStreamIterator(object):
|
| return token
|
|
|
|
|
| +@implements_iterator
|
| class TokenStream(object):
|
| """A token stream is an iterable that yields :class:`Token`\s. The
|
| parser however does not iterate over it but calls :meth:`next` to go
|
| @@ -289,7 +294,7 @@ class TokenStream(object):
|
| """
|
|
|
| def __init__(self, generator, name, filename):
|
| - self._next = iter(generator).next
|
| + self._iter = iter(generator)
|
| self._pushed = deque()
|
| self.name = name
|
| self.filename = filename
|
| @@ -300,8 +305,9 @@ class TokenStream(object):
|
| def __iter__(self):
|
| return TokenStreamIterator(self)
|
|
|
| - def __nonzero__(self):
|
| + def __bool__(self):
|
| return bool(self._pushed) or self.current.type is not TOKEN_EOF
|
| + __nonzero__ = __bool__ # py2
|
|
|
| eos = property(lambda x: not x, doc="Are we at the end of the stream?")
|
|
|
| @@ -319,7 +325,7 @@ class TokenStream(object):
|
|
|
| def skip(self, n=1):
|
| """Got n tokens ahead."""
|
| - for x in xrange(n):
|
| + for x in range(n):
|
| next(self)
|
|
|
| def next_if(self, expr):
|
| @@ -333,14 +339,14 @@ class TokenStream(object):
|
| """Like :meth:`next_if` but only returns `True` or `False`."""
|
| return self.next_if(expr) is not None
|
|
|
| - def next(self):
|
| + def __next__(self):
|
| """Go one token ahead and return the old one"""
|
| rv = self.current
|
| if self._pushed:
|
| self.current = self._pushed.popleft()
|
| elif self.current.type is not TOKEN_EOF:
|
| try:
|
| - self.current = self._next()
|
| + self.current = next(self._iter)
|
| except StopIteration:
|
| self.close()
|
| return rv
|
| @@ -348,7 +354,7 @@ class TokenStream(object):
|
| def close(self):
|
| """Close the stream."""
|
| self.current = Token(self.current.lineno, TOKEN_EOF, '')
|
| - self._next = None
|
| + self._iter = None
|
| self.closed = True
|
|
|
| def expect(self, expr):
|
| @@ -383,7 +389,9 @@ def get_lexer(environment):
|
| environment.line_statement_prefix,
|
| environment.line_comment_prefix,
|
| environment.trim_blocks,
|
| - environment.newline_sequence)
|
| + environment.lstrip_blocks,
|
| + environment.newline_sequence,
|
| + environment.keep_trailing_newline)
|
| lexer = _lexer_cache.get(key)
|
| if lexer is None:
|
| lexer = Lexer(environment)
|
| @@ -414,7 +422,7 @@ class Lexer(object):
|
| (operator_re, TOKEN_OPERATOR, None)
|
| ]
|
|
|
| - # assamble the root lexing rule. because "|" is ungreedy
|
| + # assemble the root lexing rule. because "|" is ungreedy
|
| # we have to sort by length so that the lexer continues working
|
| # as expected when we have parsing rules like <% for block and
|
| # <%= for variables. (if someone wants asp like syntax)
|
| @@ -425,7 +433,44 @@ class Lexer(object):
|
| # block suffix if trimming is enabled
|
| block_suffix_re = environment.trim_blocks and '\\n?' or ''
|
|
|
| + # strip leading spaces if lstrip_blocks is enabled
|
| + prefix_re = {}
|
| + if environment.lstrip_blocks:
|
| + # use '{%+' to manually disable lstrip_blocks behavior
|
| + no_lstrip_re = e('+')
|
| + # detect overlap between block and variable or comment strings
|
| + block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
|
| + # make sure we don't mistake a block for a variable or a comment
|
| + m = block_diff.match(environment.comment_start_string)
|
| + no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
|
| + m = block_diff.match(environment.variable_start_string)
|
| + no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
|
| +
|
| + # detect overlap between comment and variable strings
|
| + comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
|
| + m = comment_diff.match(environment.variable_start_string)
|
| + no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
|
| +
|
| + lstrip_re = r'^[ \t]*'
|
| + block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
|
| + lstrip_re,
|
| + e(environment.block_start_string),
|
| + no_lstrip_re,
|
| + e(environment.block_start_string),
|
| + )
|
| + comment_prefix_re = r'%s%s%s|%s\+?' % (
|
| + lstrip_re,
|
| + e(environment.comment_start_string),
|
| + no_variable_re,
|
| + e(environment.comment_start_string),
|
| + )
|
| + prefix_re['block'] = block_prefix_re
|
| + prefix_re['comment'] = comment_prefix_re
|
| + else:
|
| + block_prefix_re = '%s' % e(environment.block_start_string)
|
| +
|
| self.newline_sequence = environment.newline_sequence
|
| + self.keep_trailing_newline = environment.keep_trailing_newline
|
|
|
| # global lexing rules
|
| self.rules = {
|
| @@ -434,11 +479,11 @@ class Lexer(object):
|
| (c('(.*?)(?:%s)' % '|'.join(
|
| [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
|
| e(environment.block_start_string),
|
| - e(environment.block_start_string),
|
| + block_prefix_re,
|
| e(environment.block_end_string),
|
| e(environment.block_end_string)
|
| )] + [
|
| - r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
|
| + r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
|
| for n, r in root_tag_rules
|
| ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
|
| # data
|
| @@ -472,7 +517,7 @@ class Lexer(object):
|
| TOKEN_RAW_BEGIN: [
|
| (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
|
| e(environment.block_start_string),
|
| - e(environment.block_start_string),
|
| + block_prefix_re,
|
| e(environment.block_end_string),
|
| e(environment.block_end_string),
|
| block_suffix_re
|
| @@ -491,7 +536,7 @@ class Lexer(object):
|
| }
|
|
|
| def _normalize_newlines(self, value):
|
| - """Called for strings and template data to normlize it to unicode."""
|
| + """Called for strings and template data to normalize it to unicode."""
|
| return newline_re.sub(self.newline_sequence, value)
|
|
|
| def tokenize(self, source, name=None, filename=None, state=None):
|
| @@ -526,7 +571,7 @@ class Lexer(object):
|
| value = self._normalize_newlines(value[1:-1]) \
|
| .encode('ascii', 'backslashreplace') \
|
| .decode('unicode-escape')
|
| - except Exception, e:
|
| + except Exception as e:
|
| msg = str(e).split(':')[-1].strip()
|
| raise TemplateSyntaxError(msg, lineno, name, filename)
|
| # if we can express it as bytestring (ascii only)
|
| @@ -549,7 +594,14 @@ class Lexer(object):
|
| """This method tokenizes the text and returns the tokens in a
|
| generator. Use this method if you just want to tokenize a template.
|
| """
|
| - source = '\n'.join(unicode(source).splitlines())
|
| + source = text_type(source)
|
| + lines = source.splitlines()
|
| + if self.keep_trailing_newline and source:
|
| + for newline in ('\r\n', '\r', '\n'):
|
| + if source.endswith(newline):
|
| + lines.append('')
|
| + break
|
| + source = '\n'.join(lines)
|
| pos = 0
|
| lineno = 1
|
| stack = ['root']
|
| @@ -571,7 +623,7 @@ class Lexer(object):
|
| if m is None:
|
| continue
|
|
|
| - # we only match blocks and variables if brances / parentheses
|
| + # we only match blocks and variables if braces / parentheses
|
| # are balanced. continue parsing with the lower rule which
|
| # is the operator rule. do this only if the end tags look
|
| # like operators
|
| @@ -590,7 +642,7 @@ class Lexer(object):
|
| # yield for the current token the first named
|
| # group that matched
|
| elif token == '#bygroup':
|
| - for key, value in m.groupdict().iteritems():
|
| + for key, value in iteritems(m.groupdict()):
|
| if value is not None:
|
| yield lineno, key, value
|
| lineno += value.count('\n')
|
| @@ -647,7 +699,7 @@ class Lexer(object):
|
| stack.pop()
|
| # resolve the new state by group checking
|
| elif new_state == '#bygroup':
|
| - for key, value in m.groupdict().iteritems():
|
| + for key, value in iteritems(m.groupdict()):
|
| if value is not None:
|
| stack.append(key)
|
| break
|
| @@ -669,7 +721,7 @@ class Lexer(object):
|
| # publish new function and start again
|
| pos = pos2
|
| break
|
| - # if loop terminated without break we havn't found a single match
|
| + # if loop terminated without break we haven't found a single match
|
| # either we are at the end of the file or we have a problem
|
| else:
|
| # end of text
|
|
|