Initial work on lexer states (in progress)
This commit is contained in:
parent
d0e34417bc
commit
08988d2798
47
sly/lex.py
47
sly/lex.py
@ -79,9 +79,10 @@ class Token(object):
|
||||
|
||||
class TokenStr(str):
|
||||
@staticmethod
|
||||
def __new__(cls, value):
|
||||
def __new__(cls, value, before=None):
|
||||
self = super().__new__(cls, value)
|
||||
self.remap = { }
|
||||
self.before = before
|
||||
return self
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
@ -89,12 +90,15 @@ class TokenStr(str):
|
||||
|
||||
class LexerMetaDict(dict):
|
||||
'''
|
||||
Special dictionary that prohits duplicate definitions in lexer specifications.
|
||||
Special dictionary that prohibits duplicate definitions in lexer specifications.
|
||||
'''
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(value, str):
|
||||
value = TokenStr(value)
|
||||
|
||||
elif isinstance(value, tuple) and len(value) == 2:
|
||||
value = TokenStr(*value)
|
||||
|
||||
if key in self and not isinstance(value, property):
|
||||
prior = self[key]
|
||||
if isinstance(prior, str):
|
||||
@ -136,11 +140,12 @@ class LexerMeta(type):
|
||||
del attributes['_']
|
||||
remapping = { key: val.remap for key, val in attributes.items()
|
||||
if getattr(val, 'remap', None) }
|
||||
attributes = { key: str(val) if isinstance(val, TokenStr) else val
|
||||
clsattributes = { key: str(val) if isinstance(val, TokenStr) else val
|
||||
for key, val in attributes.items() }
|
||||
cls = super().__new__(meta, clsname, bases, attributes)
|
||||
cls = super().__new__(meta, clsname, bases, clsattributes)
|
||||
cls._remapping = remapping
|
||||
cls._build(list(attributes.items()))
|
||||
cls._attributes = attributes
|
||||
cls._build()
|
||||
return cls
|
||||
|
||||
class Lexer(metaclass=LexerMeta):
|
||||
@ -158,20 +163,37 @@ class Lexer(metaclass=LexerMeta):
|
||||
_ignored_tokens = set()
|
||||
|
||||
@classmethod
|
||||
def _collect_rules(cls, definitions):
|
||||
def _collect_rules(cls):
|
||||
'''
|
||||
Collect all of the rules from class definitions that look like tokens
|
||||
'''
|
||||
definitions = list(cls._attributes.items())
|
||||
rules = []
|
||||
|
||||
for base in cls.__bases__:
|
||||
if isinstance(base, LexerMeta):
|
||||
rules.extend(base._collect_rules())
|
||||
|
||||
for key, value in definitions:
|
||||
if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'pattern'):
|
||||
rules.append((key, value))
|
||||
# Check existing rules
|
||||
for n, (rkey, _) in enumerate(rules):
|
||||
if rkey == key:
|
||||
rules[n] = (key, value)
|
||||
break
|
||||
elif isinstance(value, TokenStr) and value.before == rkey:
|
||||
rules.insert(n, (key, value))
|
||||
break
|
||||
else:
|
||||
rules.append((key, value))
|
||||
# rules.append((key, value))
|
||||
elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore'}:
|
||||
raise LexerBuildError(f'{key} does not match a name in tokens')
|
||||
|
||||
return rules
|
||||
|
||||
@classmethod
|
||||
def _build(cls, definitions):
|
||||
def _build(cls):
|
||||
'''
|
||||
Build the lexer object from the collected tokens and regular expressions.
|
||||
Validate the rules to make sure they look sane.
|
||||
@ -179,6 +201,8 @@ class Lexer(metaclass=LexerMeta):
|
||||
if 'tokens' not in vars(cls):
|
||||
raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
|
||||
|
||||
# Inherit token names, literals, ignored tokens, and other details
|
||||
# from parent class (if any)
|
||||
cls._token_names = cls._token_names | set(cls.tokens)
|
||||
cls._literals = cls._literals | set(cls.literals)
|
||||
cls._ignored_tokens = set(cls._ignored_tokens)
|
||||
@ -195,7 +219,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
raise LexerBuildError(f'{missing} not included in token(s)')
|
||||
|
||||
parts = []
|
||||
for tokname, value in cls._collect_rules(definitions):
|
||||
for tokname, value in cls._collect_rules():
|
||||
if tokname.startswith('ignore_'):
|
||||
tokname = tokname[7:]
|
||||
cls._ignored_tokens.add(tokname)
|
||||
@ -228,8 +252,9 @@ class Lexer(metaclass=LexerMeta):
|
||||
return
|
||||
|
||||
# Form the master regular expression
|
||||
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||
#previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||
# cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||
cls._master_re = re.compile('|'.join(parts), cls.reflags)
|
||||
|
||||
# Verify that that ignore and literals specifiers match the input type
|
||||
if not isinstance(cls.ignore, str):
|
||||
|
Loading…
Reference in New Issue
Block a user