Initial work on lexer states (in progress)
This commit is contained in:
parent
d0e34417bc
commit
08988d2798
47
sly/lex.py
47
sly/lex.py
@ -79,9 +79,10 @@ class Token(object):
|
|||||||
|
|
||||||
class TokenStr(str):
|
class TokenStr(str):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __new__(cls, value):
|
def __new__(cls, value, before=None):
|
||||||
self = super().__new__(cls, value)
|
self = super().__new__(cls, value)
|
||||||
self.remap = { }
|
self.remap = { }
|
||||||
|
self.before = before
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
@ -89,12 +90,15 @@ class TokenStr(str):
|
|||||||
|
|
||||||
class LexerMetaDict(dict):
|
class LexerMetaDict(dict):
|
||||||
'''
|
'''
|
||||||
Special dictionary that prohits duplicate definitions in lexer specifications.
|
Special dictionary that prohibits duplicate definitions in lexer specifications.
|
||||||
'''
|
'''
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
if isinstance(value, str):
|
if isinstance(value, str):
|
||||||
value = TokenStr(value)
|
value = TokenStr(value)
|
||||||
|
|
||||||
|
elif isinstance(value, tuple) and len(value) == 2:
|
||||||
|
value = TokenStr(*value)
|
||||||
|
|
||||||
if key in self and not isinstance(value, property):
|
if key in self and not isinstance(value, property):
|
||||||
prior = self[key]
|
prior = self[key]
|
||||||
if isinstance(prior, str):
|
if isinstance(prior, str):
|
||||||
@ -136,11 +140,12 @@ class LexerMeta(type):
|
|||||||
del attributes['_']
|
del attributes['_']
|
||||||
remapping = { key: val.remap for key, val in attributes.items()
|
remapping = { key: val.remap for key, val in attributes.items()
|
||||||
if getattr(val, 'remap', None) }
|
if getattr(val, 'remap', None) }
|
||||||
attributes = { key: str(val) if isinstance(val, TokenStr) else val
|
clsattributes = { key: str(val) if isinstance(val, TokenStr) else val
|
||||||
for key, val in attributes.items() }
|
for key, val in attributes.items() }
|
||||||
cls = super().__new__(meta, clsname, bases, attributes)
|
cls = super().__new__(meta, clsname, bases, clsattributes)
|
||||||
cls._remapping = remapping
|
cls._remapping = remapping
|
||||||
cls._build(list(attributes.items()))
|
cls._attributes = attributes
|
||||||
|
cls._build()
|
||||||
return cls
|
return cls
|
||||||
|
|
||||||
class Lexer(metaclass=LexerMeta):
|
class Lexer(metaclass=LexerMeta):
|
||||||
@ -158,20 +163,37 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
_ignored_tokens = set()
|
_ignored_tokens = set()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _collect_rules(cls, definitions):
|
def _collect_rules(cls):
|
||||||
'''
|
'''
|
||||||
Collect all of the rules from class definitions that look like tokens
|
Collect all of the rules from class definitions that look like tokens
|
||||||
'''
|
'''
|
||||||
|
definitions = list(cls._attributes.items())
|
||||||
rules = []
|
rules = []
|
||||||
|
|
||||||
|
for base in cls.__bases__:
|
||||||
|
if isinstance(base, LexerMeta):
|
||||||
|
rules.extend(base._collect_rules())
|
||||||
|
|
||||||
for key, value in definitions:
|
for key, value in definitions:
|
||||||
if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'pattern'):
|
if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'pattern'):
|
||||||
rules.append((key, value))
|
# Check existing rules
|
||||||
|
for n, (rkey, _) in enumerate(rules):
|
||||||
|
if rkey == key:
|
||||||
|
rules[n] = (key, value)
|
||||||
|
break
|
||||||
|
elif isinstance(value, TokenStr) and value.before == rkey:
|
||||||
|
rules.insert(n, (key, value))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
rules.append((key, value))
|
||||||
|
# rules.append((key, value))
|
||||||
elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore'}:
|
elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore'}:
|
||||||
raise LexerBuildError(f'{key} does not match a name in tokens')
|
raise LexerBuildError(f'{key} does not match a name in tokens')
|
||||||
|
|
||||||
return rules
|
return rules
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _build(cls, definitions):
|
def _build(cls):
|
||||||
'''
|
'''
|
||||||
Build the lexer object from the collected tokens and regular expressions.
|
Build the lexer object from the collected tokens and regular expressions.
|
||||||
Validate the rules to make sure they look sane.
|
Validate the rules to make sure they look sane.
|
||||||
@ -179,6 +201,8 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
if 'tokens' not in vars(cls):
|
if 'tokens' not in vars(cls):
|
||||||
raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
|
raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
|
||||||
|
|
||||||
|
# Inherit token names, literals, ignored tokens, and other details
|
||||||
|
# from parent class (if any)
|
||||||
cls._token_names = cls._token_names | set(cls.tokens)
|
cls._token_names = cls._token_names | set(cls.tokens)
|
||||||
cls._literals = cls._literals | set(cls.literals)
|
cls._literals = cls._literals | set(cls.literals)
|
||||||
cls._ignored_tokens = set(cls._ignored_tokens)
|
cls._ignored_tokens = set(cls._ignored_tokens)
|
||||||
@ -195,7 +219,7 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
raise LexerBuildError(f'{missing} not included in token(s)')
|
raise LexerBuildError(f'{missing} not included in token(s)')
|
||||||
|
|
||||||
parts = []
|
parts = []
|
||||||
for tokname, value in cls._collect_rules(definitions):
|
for tokname, value in cls._collect_rules():
|
||||||
if tokname.startswith('ignore_'):
|
if tokname.startswith('ignore_'):
|
||||||
tokname = tokname[7:]
|
tokname = tokname[7:]
|
||||||
cls._ignored_tokens.add(tokname)
|
cls._ignored_tokens.add(tokname)
|
||||||
@ -228,8 +252,9 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Form the master regular expression
|
# Form the master regular expression
|
||||||
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
#previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||||
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
# cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||||
|
cls._master_re = re.compile('|'.join(parts), cls.reflags)
|
||||||
|
|
||||||
# Verify that that ignore and literals specifiers match the input type
|
# Verify that that ignore and literals specifiers match the input type
|
||||||
if not isinstance(cls.ignore, str):
|
if not isinstance(cls.ignore, str):
|
||||||
|
Loading…
Reference in New Issue
Block a user