Added Lexer state change

This commit is contained in:
David Beazley 2017-09-01 06:31:51 -05:00
parent 636197b9fd
commit d8903d8301

View File

@ -32,7 +32,7 @@
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
__version__ = '0.1' __version__ = '0.1'
__all__ = ['Lexer'] __all__ = ['Lexer', 'LexerStateChange']
import re import re
from collections import OrderedDict from collections import OrderedDict
@ -62,6 +62,14 @@ class LexerBuildError(Exception):
''' '''
pass pass
class LexerStateChange(Exception):
'''
Exception raised to force a lexing state change
'''
def __init__(self, newstate, tok=None):
self.newstate = newstate
self.tok = tok
class Token(object): class Token(object):
''' '''
Representation of a single token. Representation of a single token.
@ -192,65 +200,71 @@ class Lexer(metaclass=LexerMeta):
raise LexerBuildError('literals must be specified as strings') raise LexerBuildError('literals must be specified as strings')
def tokenize(self, text, lineno=1, index=0): def tokenize(self, text, lineno=1, index=0):
# Local copies of frequently used values while True:
_ignored_tokens = self._ignored_tokens # Local copies of frequently used values
_master_re = self._master_re _ignored_tokens = self._ignored_tokens
_ignore = self.ignore _master_re = self._master_re
_token_funcs = self._token_funcs _ignore = self.ignore
_literals = self._literals _token_funcs = self._token_funcs
_literals = self._literals
self.text = text self.text = text
try: try:
while True: while True:
try: try:
if text[index] in _ignore: if text[index] in _ignore:
index += 1 index += 1
continue continue
except IndexError: except IndexError:
break return
tok = Token() tok = Token()
tok.lineno = lineno tok.lineno = lineno
tok.index = index tok.index = index
m = _master_re.match(text, index) m = _master_re.match(text, index)
if m: if m:
index = m.end() index = m.end()
tok.value = m.group() tok.value = m.group()
tok.type = m.lastgroup tok.type = m.lastgroup
if tok.type in _token_funcs: if tok.type in _token_funcs:
self.index = index self.index = index
self.lineno = lineno self.lineno = lineno
tok = _token_funcs[tok.type](self, tok) tok = _token_funcs[tok.type](self, tok)
index = self.index index = self.index
lineno = self.lineno lineno = self.lineno
if not tok: if not tok:
continue
if tok.type in _ignored_tokens:
continue continue
if tok.type in _ignored_tokens:
continue
yield tok
else:
# No match, see if the character is in literals
if text[index] in _literals:
tok.value = text[index]
tok.type = tok.value
index += 1
yield tok yield tok
else:
# A lexing error
self.index = index
self.lineno = lineno
self.error(text[index:])
index = self.index
lineno = self.lineno
# Set the final state of the lexer before exiting (even if exception) else:
finally: # No match, see if the character is in literals
self.text = text if text[index] in _literals:
self.index = index tok.value = text[index]
self.lineno = lineno tok.type = tok.value
index += 1
yield tok
else:
# A lexing error
self.index = index
self.lineno = lineno
self.error(text[index:])
index = self.index
lineno = self.lineno
except LexerStateChange as e:
self.__class__ = e.newstate
if e.tok:
yield e.tok
# Set the final state of the lexer before exiting (even if exception)
finally:
self.text = text
self.index = index
self.lineno = lineno
# Default implementations of the error handler. May be changed in subclasses # Default implementations of the error handler. May be changed in subclasses
def error(self, value): def error(self, value):