Work in progress. Changes to parser production API
This commit is contained in:
parent
9a1899fa69
commit
05a709aaea
1800
docs/sly.rst
1800
docs/sly.rst
File diff suppressed because it is too large
Load Diff
@ -44,46 +44,45 @@ class CalcParser(Parser):
|
||||
|
||||
@_('NAME "=" expression')
|
||||
def statement(self, p):
|
||||
self.names[p[1]] = p[3]
|
||||
self.names[p.NAME] = p.expression
|
||||
|
||||
@_('expression')
|
||||
def statement(self, p):
|
||||
print(p[1])
|
||||
print(p.expression)
|
||||
|
||||
@_('expression "+" expression',
|
||||
'expression "-" expression',
|
||||
'expression "*" expression',
|
||||
'expression "/" expression')
|
||||
def expression(self, p):
|
||||
if p[2] == '+':
|
||||
p[0] = p[1] + p[3]
|
||||
elif p[2] == '-':
|
||||
p[0] = p[1] - p[3]
|
||||
elif p[2] == '*':
|
||||
p[0] = p[1] * p[3]
|
||||
elif p[2] == '/':
|
||||
p[0] = p[1] / p[3]
|
||||
if p[1] == '+':
|
||||
return p.expression0 + p.expression1
|
||||
elif p[1] == '-':
|
||||
return p.expression0 - p.expression1
|
||||
elif p[1] == '*':
|
||||
return p.expression0 * p.expression1
|
||||
elif p[1] == '/':
|
||||
return p.expression0 / p.expression1
|
||||
|
||||
@_('"-" expression %prec UMINUS')
|
||||
def expression(self, p):
|
||||
p[0] = -p[2]
|
||||
return -p.expression
|
||||
|
||||
@_('"(" expression ")"')
|
||||
def expression(self, p):
|
||||
p[0] = p[2]
|
||||
return p.expression
|
||||
|
||||
@_('NUMBER')
|
||||
def expression(self, p):
|
||||
p[0] = p[1]
|
||||
return p.NUMBER
|
||||
|
||||
@_('NAME')
|
||||
def expression(self, p):
|
||||
try:
|
||||
p[0] = self.names[p[1]]
|
||||
return self.names[p.NAME]
|
||||
except LookupError:
|
||||
print("Undefined name '%s'" % p[1])
|
||||
p[0] = 0
|
||||
|
||||
print("Undefined name '%s'" % p.NAME)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
lexer = CalcLexer()
|
||||
|
70
sly/lex.py
70
sly/lex.py
@ -68,9 +68,9 @@ class Token(object):
|
||||
def __repr__(self):
|
||||
return 'Token(%s, %r, %d, %d)' % (self.type, self.value, self.lineno, self.index)
|
||||
|
||||
class NoDupeDict(OrderedDict):
|
||||
class LexerMetaDict(OrderedDict):
|
||||
'''
|
||||
Special dictionary that prohits duplicate definitions.
|
||||
Special dictionary that prohits duplicate definitions in lexer specifications.
|
||||
'''
|
||||
def __setitem__(self, key, value):
|
||||
if key in self and not isinstance(value, property):
|
||||
@ -83,17 +83,15 @@ class LexerMeta(type):
|
||||
'''
|
||||
@classmethod
|
||||
def __prepare__(meta, *args, **kwargs):
|
||||
d = NoDupeDict()
|
||||
def _(*patterns):
|
||||
d = LexerMetaDict()
|
||||
def _(pattern, *extra):
|
||||
patterns = [pattern, *extra]
|
||||
def decorate(func):
|
||||
for pattern in patterns:
|
||||
if hasattr(func, 'pattern'):
|
||||
if isinstance(pattern, str):
|
||||
func.pattern = ''.join(['(', pattern, ')|(', func.pattern, ')'])
|
||||
else:
|
||||
func.pattern = b''.join([b'(', pattern, b')|(', func.pattern, b')'])
|
||||
else:
|
||||
func.pattern = pattern
|
||||
pattern = '|'.join('(%s)' % pat for pat in patterns )
|
||||
if hasattr(func, 'pattern'):
|
||||
func.pattern = pattern + '|' + func.pattern
|
||||
else:
|
||||
func.pattern = pattern
|
||||
return func
|
||||
return decorate
|
||||
d['_'] = _
|
||||
@ -109,7 +107,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
# These attributes may be defined in subclasses
|
||||
tokens = set()
|
||||
literals = set()
|
||||
ignore = None
|
||||
ignore = ''
|
||||
reflags = 0
|
||||
|
||||
# These attributes are constructed automatically by the associated metaclass
|
||||
@ -118,7 +116,6 @@ class Lexer(metaclass=LexerMeta):
|
||||
_literals = set()
|
||||
_token_funcs = { }
|
||||
_ignored_tokens = set()
|
||||
_input_type = str
|
||||
|
||||
@classmethod
|
||||
def _collect_rules(cls, definitions):
|
||||
@ -151,7 +148,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
tokname = tokname[7:]
|
||||
cls._ignored_tokens.add(tokname)
|
||||
|
||||
if isinstance(value, (str, bytes)):
|
||||
if isinstance(value, str):
|
||||
pattern = value
|
||||
|
||||
elif callable(value):
|
||||
@ -159,10 +156,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
cls._token_funcs[tokname] = value
|
||||
|
||||
# Form the regular expression component
|
||||
if isinstance(pattern, str):
|
||||
part = '(?P<%s>%s)' % (tokname, pattern)
|
||||
else:
|
||||
part = b'(?P<%s>%s)' % (tokname.encode('ascii'), pattern)
|
||||
part = '(?P<%s>%s)' % (tokname, pattern)
|
||||
|
||||
# Make sure the individual regex compiles properly
|
||||
try:
|
||||
@ -171,38 +165,24 @@ class Lexer(metaclass=LexerMeta):
|
||||
raise PatternError('Invalid regex for token %s' % tokname) from e
|
||||
|
||||
# Verify that the pattern doesn't match the empty string
|
||||
if cpat.match(type(pattern)()):
|
||||
if cpat.match(''):
|
||||
raise PatternError('Regex for token %s matches empty input' % tokname)
|
||||
|
||||
parts.append(part)
|
||||
|
||||
# If no parts collected, then no rules to process
|
||||
if not parts:
|
||||
return
|
||||
|
||||
# Verify that all of the patterns are of the same type
|
||||
if not all(type(part) == type(parts[0]) for part in parts):
|
||||
raise LexerBuildError('Tokens are specified using both bytes and strings.')
|
||||
|
||||
# Form the master regular expression
|
||||
if parts and isinstance(parts[0], bytes):
|
||||
previous = (b'|' + cls._master_re.pattern) if cls._master_re else b''
|
||||
cls._master_re = re.compile(b'|'.join(parts) + previous, cls.reflags)
|
||||
cls._input_type = bytes
|
||||
else:
|
||||
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||
cls._input_type = str
|
||||
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||
|
||||
# Verify that that ignore and literals specifiers match the input type
|
||||
if cls.ignore is not None and not isinstance(cls.ignore, cls._input_type):
|
||||
raise LexerBuildError("ignore specifier type doesn't match token types (%s)" %
|
||||
cls._input_type.__name__)
|
||||
if not isinstance(cls.ignore, str):
|
||||
raise LexerBuildError('ignore specifier must be a string')
|
||||
|
||||
if not all(isinstance(lit, cls._input_type) for lit in cls.literals):
|
||||
raise LexerBuildError("literals specifier not using same type as tokens (%s)" %
|
||||
cls._input_type.__name__)
|
||||
|
||||
if not all(isinstance(lit, str) for lit in cls.literals):
|
||||
raise LexerBuildError("literals must be specified as strings")
|
||||
|
||||
def tokenize(self, text, lineno=1, index=0):
|
||||
# Local copies of frequently used values
|
||||
@ -220,11 +200,6 @@ class Lexer(metaclass=LexerMeta):
|
||||
index += 1
|
||||
continue
|
||||
except IndexError:
|
||||
if self.eof:
|
||||
text = self.eof()
|
||||
if text:
|
||||
index = 0
|
||||
continue
|
||||
break
|
||||
|
||||
tok = Token()
|
||||
@ -270,9 +245,6 @@ class Lexer(metaclass=LexerMeta):
|
||||
self.index = index
|
||||
self.lineno = lineno
|
||||
|
||||
# Default implementations of methods that may be subclassed by users
|
||||
# Default implementations of the error handler. May be changed in subclasses
|
||||
def error(self, value):
|
||||
raise LexError("Illegal character %r at index %d" % (value[0], self.index), value)
|
||||
|
||||
def eof(self):
|
||||
pass
|
||||
|
148
sly/yacc.py
148
sly/yacc.py
@ -33,7 +33,7 @@
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
from collections import OrderedDict
|
||||
from collections import OrderedDict, defaultdict
|
||||
|
||||
__version__ = '0.0'
|
||||
__all__ = [ 'Parser' ]
|
||||
@ -104,31 +104,39 @@ class YaccSymbol:
|
||||
|
||||
class YaccProduction:
|
||||
def __init__(self, s, stack=None):
|
||||
self.slice = s
|
||||
self.stack = stack
|
||||
self._slice = s
|
||||
self._stack = stack
|
||||
self._namemap = { }
|
||||
|
||||
def __getitem__(self, n):
|
||||
if isinstance(n, slice):
|
||||
return [s.value for s in self.slice[n]]
|
||||
elif n >= 0:
|
||||
return self.slice[n].value
|
||||
if n >= 0:
|
||||
return self._slice[n].value
|
||||
else:
|
||||
return self.stack[n].value
|
||||
return self._stack[n].value
|
||||
|
||||
def __setitem__(self, n, v):
|
||||
self.slice[n].value = v
|
||||
self._slice[n].value = v
|
||||
|
||||
def __len__(self):
|
||||
return len(self.slice)
|
||||
return len(self._slice)
|
||||
|
||||
def lineno(self, n):
|
||||
return getattr(self.slice[n], 'lineno', 0)
|
||||
return getattr(self._slice[n], 'lineno', 0)
|
||||
|
||||
def set_lineno(self, n, lineno):
|
||||
self.slice[n].lineno = lineno
|
||||
self._slice[n].lineno = lineno
|
||||
|
||||
def index(self, n):
|
||||
return getattr(self.slice[n], 'index', 0)
|
||||
return getattr(self._slice[n], 'index', 0)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return self._slice[self._namemap[name]].value
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name[0:1] == '_' or name not in self._namemap:
|
||||
super().__setattr__(name, value)
|
||||
else:
|
||||
self._slice[self._namemap[name]].value = value
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# === Grammar Representation ===
|
||||
@ -171,17 +179,29 @@ class Production(object):
|
||||
self.file = file
|
||||
self.line = line
|
||||
self.prec = precedence
|
||||
|
||||
|
||||
# Internal settings used during table construction
|
||||
|
||||
self.len = len(self.prod) # Length of the production
|
||||
|
||||
# Create a list of unique production symbols used in the production
|
||||
self.usyms = []
|
||||
for s in self.prod:
|
||||
symmap = defaultdict(list)
|
||||
for n, s in enumerate(self.prod):
|
||||
symmap[s].append(n)
|
||||
if s not in self.usyms:
|
||||
self.usyms.append(s)
|
||||
|
||||
# Create a dict mapping symbol names to indices
|
||||
m = {}
|
||||
for key, indices in symmap.items():
|
||||
if len(indices) == 1:
|
||||
m[key] = indices[0]
|
||||
else:
|
||||
for n, index in enumerate(indices):
|
||||
m[key+str(n)] = index
|
||||
|
||||
self.namemap = m
|
||||
|
||||
# List of all LR items for the production
|
||||
self.lr_items = []
|
||||
self.lr_next = None
|
||||
@ -1512,9 +1532,10 @@ def _collect_grammar_rules(func):
|
||||
else:
|
||||
grammar.append((func, filename, lineno, prodname, syms))
|
||||
func = getattr(func, 'next_func', None)
|
||||
|
||||
return grammar
|
||||
|
||||
class OverloadDict(OrderedDict):
|
||||
class ParserMetaDict(OrderedDict):
|
||||
'''
|
||||
Dictionary that allows decorated grammar rule functions to be overloaded
|
||||
'''
|
||||
@ -1526,13 +1547,11 @@ class OverloadDict(OrderedDict):
|
||||
class ParserMeta(type):
|
||||
@classmethod
|
||||
def __prepare__(meta, *args, **kwargs):
|
||||
d = OverloadDict()
|
||||
def _(*rules):
|
||||
d = ParserMetaDict()
|
||||
def _(rule, *extra):
|
||||
rules = [rule, *extra]
|
||||
def decorate(func):
|
||||
if hasattr(func, 'rules'):
|
||||
func.rules.extend(rules[::-1])
|
||||
else:
|
||||
func.rules = list(rules[::-1])
|
||||
func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||
return func
|
||||
return decorate
|
||||
d['_'] = _
|
||||
@ -1788,9 +1807,9 @@ class Parser(metaclass=ParserMeta):
|
||||
self.statestack.append(0)
|
||||
self.state = 0
|
||||
|
||||
def parse(self, lexer):
|
||||
def parse(self, tokens):
|
||||
'''
|
||||
Parse the given input text. lexer is a Lexer object that produces tokens
|
||||
Parse the given input tokens.
|
||||
'''
|
||||
lookahead = None # Current lookahead symbol
|
||||
lookaheadstack = [] # Stack of lookahead symbols
|
||||
@ -1801,10 +1820,6 @@ class Parser(metaclass=ParserMeta):
|
||||
pslice = YaccProduction(None) # Production object passed to grammar rules
|
||||
errorcount = 0 # Used during error recovery
|
||||
|
||||
# Save a local reference of the lexer being used
|
||||
self.lexer = lexer
|
||||
tokens = iter(self.lexer)
|
||||
|
||||
# Set up the state and symbol stacks
|
||||
self.statestack = statestack = [] # Stack of parsing states
|
||||
self.symstack = symstack = [] # Stack of grammar symbols
|
||||
@ -1816,7 +1831,6 @@ class Parser(metaclass=ParserMeta):
|
||||
# Get the next symbol on the input. If a lookahead symbol
|
||||
# is already set, we just use that. Otherwise, we'll pull
|
||||
# the next token off of the lookaheadstack or from the lexer
|
||||
|
||||
if self.state not in defaulted_states:
|
||||
if not lookahead:
|
||||
if not lookaheadstack:
|
||||
@ -1852,74 +1866,22 @@ class Parser(metaclass=ParserMeta):
|
||||
self.production = p = prod[-t]
|
||||
pname = p.name
|
||||
plen = p.len
|
||||
pslice._namemap = p.namemap
|
||||
|
||||
# Call the production function
|
||||
sym = YaccSymbol()
|
||||
sym.type = pname # Production name
|
||||
sym.value = None
|
||||
|
||||
pslice._slice = symstack[-plen:] if plen else []
|
||||
if plen:
|
||||
targ = symstack[-plen-1:]
|
||||
targ[0] = sym
|
||||
del symstack[-plen:]
|
||||
del statestack[-plen:]
|
||||
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# The code enclosed in this section is duplicated
|
||||
# below as a performance optimization. Make sure
|
||||
# changes get made in both locations.
|
||||
sym = YaccSymbol()
|
||||
sym.type = pname
|
||||
sym.value = p.func(self, pslice)
|
||||
symstack.append(sym)
|
||||
|
||||
pslice.slice = targ
|
||||
|
||||
try:
|
||||
# Call the grammar rule with our special slice object
|
||||
del symstack[-plen:]
|
||||
p.func(self, pslice)
|
||||
del statestack[-plen:]
|
||||
symstack.append(sym)
|
||||
self.state = goto[statestack[-1]][pname]
|
||||
statestack.append(self.state)
|
||||
except SyntaxError:
|
||||
# If an error was set. Enter error recovery state
|
||||
lookaheadstack.append(lookahead)
|
||||
symstack.extend(targ[1:-1])
|
||||
statestack.pop()
|
||||
self.state = statestack[-1]
|
||||
sym.type = 'error'
|
||||
sym.value = 'error'
|
||||
lookahead = sym
|
||||
errorcount = ERROR_COUNT
|
||||
self.errorok = False
|
||||
continue
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
else:
|
||||
|
||||
targ = [sym]
|
||||
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# The code enclosed in this section is duplicated
|
||||
# above as a performance optimization. Make sure
|
||||
# changes get made in both locations.
|
||||
|
||||
pslice.slice = targ
|
||||
|
||||
try:
|
||||
# Call the grammar rule with our special slice object
|
||||
p.func(self, pslice)
|
||||
symstack.append(sym)
|
||||
self.state = goto[statestack[-1]][pname]
|
||||
statestack.append(self.state)
|
||||
except SyntaxError:
|
||||
# If an error was set. Enter error recovery state
|
||||
lookaheadstack.append(lookahead)
|
||||
statestack.pop()
|
||||
self.state = statestack[-1]
|
||||
sym.type = 'error'
|
||||
sym.value = 'error'
|
||||
lookahead = sym
|
||||
errorcount = ERROR_COUNT
|
||||
self.errorok = False
|
||||
continue
|
||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
self.state = goto[statestack[-1]][pname]
|
||||
statestack.append(self.state)
|
||||
continue
|
||||
|
||||
if t == 0:
|
||||
n = symstack[-1]
|
||||
|
Loading…
Reference in New Issue
Block a user