Cleanup. Improvements to lexer pattern decorator
This commit is contained in:
parent
36cf652eae
commit
7bfadaaab3
@ -86,11 +86,12 @@ class CalcParser(Parser):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
lexer = CalcLexer()
|
||||||
parser = CalcParser()
|
parser = CalcParser()
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
s = input('calc > ')
|
text = input('calc > ')
|
||||||
except EOFError:
|
except EOFError:
|
||||||
break
|
break
|
||||||
if s:
|
if text:
|
||||||
parser.parse(CalcLexer(s))
|
parser.parse(lexer.tokenize(text))
|
||||||
|
28
setup.py
Executable file
28
setup.py
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
try:
|
||||||
|
from setuptools import setup
|
||||||
|
except ImportError:
|
||||||
|
from distutils.core import setup
|
||||||
|
|
||||||
|
tests_require = ['pytest']
|
||||||
|
|
||||||
|
setup(name = "sly",
|
||||||
|
description="SLY - Sly Lex Yacc",
|
||||||
|
long_description = """
|
||||||
|
SLY is an implementation of lex and yacc for Python 3.
|
||||||
|
""",
|
||||||
|
license="""BSD""",
|
||||||
|
version = "0.0",
|
||||||
|
author = "David Beazley",
|
||||||
|
author_email = "dave@dabeaz.com",
|
||||||
|
maintainer = "David Beazley",
|
||||||
|
maintainer_email = "dave@dabeaz.com",
|
||||||
|
url = "https://github.com/dabeaz/sly",
|
||||||
|
packages = ['sly'],
|
||||||
|
tests_require = tests_require,
|
||||||
|
extras_require = {
|
||||||
|
'test': tests_require,
|
||||||
|
},
|
||||||
|
classifiers = [
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
]
|
||||||
|
)
|
@ -7,7 +7,7 @@ class NoDupeDict(OrderedDict):
|
|||||||
super().__setitem__(key, value)
|
super().__setitem__(key, value)
|
||||||
|
|
||||||
class RuleMeta(type):
|
class RuleMeta(type):
|
||||||
@staticmethod
|
@classmethod
|
||||||
def __prepare__(meta, *args, **kwargs):
|
def __prepare__(meta, *args, **kwargs):
|
||||||
d = NoDupeDict()
|
d = NoDupeDict()
|
||||||
def _(rule):
|
def _(rule):
|
||||||
|
92
sly/lex.py
92
sly/lex.py
@ -37,8 +37,6 @@ __all__ = ['Lexer']
|
|||||||
import re
|
import re
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
from ._meta import RuleMeta
|
|
||||||
|
|
||||||
class LexError(Exception):
|
class LexError(Exception):
|
||||||
'''
|
'''
|
||||||
Exception raised if an invalid character is encountered and no default
|
Exception raised if an invalid character is encountered and no default
|
||||||
@ -70,12 +68,43 @@ class Token(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Token(%s, %r, %d, %d)' % (self.type, self.value, self.lineno, self.index)
|
return 'Token(%s, %r, %d, %d)' % (self.type, self.value, self.lineno, self.index)
|
||||||
|
|
||||||
class Lexer(metaclass=RuleMeta):
|
class NoDupeDict(OrderedDict):
|
||||||
'''
|
'''
|
||||||
Representation of a single lexing state. This class is automatically constructed
|
Special dictionary that prohits duplicate definitions.
|
||||||
by the RuleDict during class definition.
|
|
||||||
'''
|
'''
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
if key in self and not isinstance(value, property):
|
||||||
|
raise AttributeError('Name %s redefined' % (key))
|
||||||
|
super().__setitem__(key, value)
|
||||||
|
|
||||||
|
class LexerMeta(type):
|
||||||
|
'''
|
||||||
|
Metaclass for collecting lexing rules
|
||||||
|
'''
|
||||||
|
@classmethod
|
||||||
|
def __prepare__(meta, *args, **kwargs):
|
||||||
|
d = NoDupeDict()
|
||||||
|
def _(pattern):
|
||||||
|
def decorate(func):
|
||||||
|
if hasattr(func, 'pattern'):
|
||||||
|
if isinstance(pattern, str):
|
||||||
|
func.pattern = ''.join(['(', pattern, ')|(', func.pattern, ')'])
|
||||||
|
else:
|
||||||
|
func.pattern = b''.join([b'(', pattern, b')|(', func.pattern, b')'])
|
||||||
|
else:
|
||||||
|
func.pattern = pattern
|
||||||
|
return func
|
||||||
|
return decorate
|
||||||
|
d['_'] = _
|
||||||
|
return d
|
||||||
|
|
||||||
|
def __new__(meta, clsname, bases, attributes):
|
||||||
|
del attributes['_']
|
||||||
|
cls = super().__new__(meta, clsname, bases, attributes)
|
||||||
|
cls._build(list(attributes.items()))
|
||||||
|
return cls
|
||||||
|
|
||||||
|
class Lexer(metaclass=LexerMeta):
|
||||||
# These attributes may be defined in subclasses
|
# These attributes may be defined in subclasses
|
||||||
tokens = set()
|
tokens = set()
|
||||||
literals = set()
|
literals = set()
|
||||||
@ -90,11 +119,6 @@ class Lexer(metaclass=RuleMeta):
|
|||||||
_ignored_tokens = set()
|
_ignored_tokens = set()
|
||||||
_input_type = str
|
_input_type = str
|
||||||
|
|
||||||
def __init__(self, text, lineno=1, index=0):
|
|
||||||
self.text = text
|
|
||||||
self.lineno = lineno
|
|
||||||
self.index = index
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _collect_rules(cls, definitions):
|
def _collect_rules(cls, definitions):
|
||||||
'''
|
'''
|
||||||
@ -102,7 +126,7 @@ class Lexer(metaclass=RuleMeta):
|
|||||||
'''
|
'''
|
||||||
rules = []
|
rules = []
|
||||||
for key, value in definitions:
|
for key, value in definitions:
|
||||||
if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'rule'):
|
if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'pattern'):
|
||||||
rules.append((key, value))
|
rules.append((key, value))
|
||||||
return rules
|
return rules
|
||||||
|
|
||||||
@ -130,7 +154,7 @@ class Lexer(metaclass=RuleMeta):
|
|||||||
pattern = value
|
pattern = value
|
||||||
|
|
||||||
elif callable(value):
|
elif callable(value):
|
||||||
pattern = value.rule
|
pattern = value.pattern
|
||||||
cls._token_funcs[tokname] = value
|
cls._token_funcs[tokname] = value
|
||||||
|
|
||||||
# Form the regular expression component
|
# Form the regular expression component
|
||||||
@ -178,46 +202,55 @@ class Lexer(metaclass=RuleMeta):
|
|||||||
raise LexerBuildError("literals specifier not using same type as tokens (%s)" %
|
raise LexerBuildError("literals specifier not using same type as tokens (%s)" %
|
||||||
cls._input_type.__name__)
|
cls._input_type.__name__)
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
text = self.text
|
def tokenize(self, text, lineno=1, index=0):
|
||||||
index = self.index
|
# Local copies of frequently used values
|
||||||
|
_ignored_tokens = self._ignored_tokens
|
||||||
|
_master_re = self._master_re
|
||||||
|
_ignore = self.ignore
|
||||||
|
_token_funcs = self._token_funcs
|
||||||
|
_literals = self._literals
|
||||||
|
|
||||||
|
self.text = text
|
||||||
|
try:
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if text[index] in self.ignore:
|
if text[index] in _ignore:
|
||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
except IndexError:
|
except IndexError:
|
||||||
if self.eof:
|
if self.eof:
|
||||||
text = self.eof()
|
text = self.eof()
|
||||||
if text is not None:
|
if text is not None:
|
||||||
self.text = text
|
|
||||||
self.index = 0
|
|
||||||
index = 0
|
index = 0
|
||||||
continue
|
continue
|
||||||
break
|
break
|
||||||
|
|
||||||
tok = Token()
|
tok = Token()
|
||||||
tok.lineno = self.lineno
|
tok.lineno = lineno
|
||||||
tok.index = index
|
tok.index = index
|
||||||
m = self._master_re.match(text, index)
|
m = _master_re.match(text, index)
|
||||||
if m:
|
if m:
|
||||||
index = m.end()
|
index = m.end()
|
||||||
tok.value = m.group()
|
tok.value = m.group()
|
||||||
tok.type = m.lastgroup
|
tok.type = m.lastgroup
|
||||||
if tok.type in self._token_funcs:
|
if tok.type in _token_funcs:
|
||||||
self.index = index
|
self.index = index
|
||||||
tok = self._token_funcs[tok.type](self, tok)
|
self.lineno = lineno
|
||||||
|
tok = _token_funcs[tok.type](self, tok)
|
||||||
index = self.index
|
index = self.index
|
||||||
|
lineno = self.lineno
|
||||||
if not tok:
|
if not tok:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if tok.type in self._ignored_tokens:
|
if tok.type in _ignored_tokens:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
yield tok
|
yield tok
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# No match, see if the character is in literals
|
# No match, see if the character is in literals
|
||||||
if text[index] in self._literals:
|
if text[index] in _literals:
|
||||||
tok.value = text[index]
|
tok.value = text[index]
|
||||||
tok.type = tok.value
|
tok.type = tok.value
|
||||||
index += 1
|
index += 1
|
||||||
@ -225,9 +258,18 @@ class Lexer(metaclass=RuleMeta):
|
|||||||
else:
|
else:
|
||||||
# A lexing error
|
# A lexing error
|
||||||
self.index = index
|
self.index = index
|
||||||
self.error(self.text[self.index:])
|
self.lineno = lineno
|
||||||
|
self.error(text[index:])
|
||||||
index = self.index
|
index = self.index
|
||||||
|
lineno = self.lineno
|
||||||
|
|
||||||
|
# Set the final state of the lexer before exiting (even if exception)
|
||||||
|
finally:
|
||||||
|
self.text = text
|
||||||
|
self.index = index
|
||||||
|
self.lineno = lineno
|
||||||
|
|
||||||
|
# Default implementations of methods that may be subclassed by users
|
||||||
def error(self, value):
|
def error(self, value):
|
||||||
raise LexError("Illegal character %r at index %d" % (value[0], self.index), value)
|
raise LexError("Illegal character %r at index %d" % (value[0], self.index), value)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user