Work in progress. Changes to parser production API
This commit is contained in:
parent
9a1899fa69
commit
05a709aaea
1800
docs/sly.rst
1800
docs/sly.rst
File diff suppressed because it is too large
Load Diff
@ -44,46 +44,45 @@ class CalcParser(Parser):
|
|||||||
|
|
||||||
@_('NAME "=" expression')
|
@_('NAME "=" expression')
|
||||||
def statement(self, p):
|
def statement(self, p):
|
||||||
self.names[p[1]] = p[3]
|
self.names[p.NAME] = p.expression
|
||||||
|
|
||||||
@_('expression')
|
@_('expression')
|
||||||
def statement(self, p):
|
def statement(self, p):
|
||||||
print(p[1])
|
print(p.expression)
|
||||||
|
|
||||||
@_('expression "+" expression',
|
@_('expression "+" expression',
|
||||||
'expression "-" expression',
|
'expression "-" expression',
|
||||||
'expression "*" expression',
|
'expression "*" expression',
|
||||||
'expression "/" expression')
|
'expression "/" expression')
|
||||||
def expression(self, p):
|
def expression(self, p):
|
||||||
if p[2] == '+':
|
if p[1] == '+':
|
||||||
p[0] = p[1] + p[3]
|
return p.expression0 + p.expression1
|
||||||
elif p[2] == '-':
|
elif p[1] == '-':
|
||||||
p[0] = p[1] - p[3]
|
return p.expression0 - p.expression1
|
||||||
elif p[2] == '*':
|
elif p[1] == '*':
|
||||||
p[0] = p[1] * p[3]
|
return p.expression0 * p.expression1
|
||||||
elif p[2] == '/':
|
elif p[1] == '/':
|
||||||
p[0] = p[1] / p[3]
|
return p.expression0 / p.expression1
|
||||||
|
|
||||||
@_('"-" expression %prec UMINUS')
|
@_('"-" expression %prec UMINUS')
|
||||||
def expression(self, p):
|
def expression(self, p):
|
||||||
p[0] = -p[2]
|
return -p.expression
|
||||||
|
|
||||||
@_('"(" expression ")"')
|
@_('"(" expression ")"')
|
||||||
def expression(self, p):
|
def expression(self, p):
|
||||||
p[0] = p[2]
|
return p.expression
|
||||||
|
|
||||||
@_('NUMBER')
|
@_('NUMBER')
|
||||||
def expression(self, p):
|
def expression(self, p):
|
||||||
p[0] = p[1]
|
return p.NUMBER
|
||||||
|
|
||||||
@_('NAME')
|
@_('NAME')
|
||||||
def expression(self, p):
|
def expression(self, p):
|
||||||
try:
|
try:
|
||||||
p[0] = self.names[p[1]]
|
return self.names[p.NAME]
|
||||||
except LookupError:
|
except LookupError:
|
||||||
print("Undefined name '%s'" % p[1])
|
print("Undefined name '%s'" % p.NAME)
|
||||||
p[0] = 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
lexer = CalcLexer()
|
lexer = CalcLexer()
|
||||||
|
70
sly/lex.py
70
sly/lex.py
@ -68,9 +68,9 @@ class Token(object):
|
|||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Token(%s, %r, %d, %d)' % (self.type, self.value, self.lineno, self.index)
|
return 'Token(%s, %r, %d, %d)' % (self.type, self.value, self.lineno, self.index)
|
||||||
|
|
||||||
class NoDupeDict(OrderedDict):
|
class LexerMetaDict(OrderedDict):
|
||||||
'''
|
'''
|
||||||
Special dictionary that prohits duplicate definitions.
|
Special dictionary that prohits duplicate definitions in lexer specifications.
|
||||||
'''
|
'''
|
||||||
def __setitem__(self, key, value):
|
def __setitem__(self, key, value):
|
||||||
if key in self and not isinstance(value, property):
|
if key in self and not isinstance(value, property):
|
||||||
@ -83,17 +83,15 @@ class LexerMeta(type):
|
|||||||
'''
|
'''
|
||||||
@classmethod
|
@classmethod
|
||||||
def __prepare__(meta, *args, **kwargs):
|
def __prepare__(meta, *args, **kwargs):
|
||||||
d = NoDupeDict()
|
d = LexerMetaDict()
|
||||||
def _(*patterns):
|
def _(pattern, *extra):
|
||||||
|
patterns = [pattern, *extra]
|
||||||
def decorate(func):
|
def decorate(func):
|
||||||
for pattern in patterns:
|
pattern = '|'.join('(%s)' % pat for pat in patterns )
|
||||||
if hasattr(func, 'pattern'):
|
if hasattr(func, 'pattern'):
|
||||||
if isinstance(pattern, str):
|
func.pattern = pattern + '|' + func.pattern
|
||||||
func.pattern = ''.join(['(', pattern, ')|(', func.pattern, ')'])
|
else:
|
||||||
else:
|
func.pattern = pattern
|
||||||
func.pattern = b''.join([b'(', pattern, b')|(', func.pattern, b')'])
|
|
||||||
else:
|
|
||||||
func.pattern = pattern
|
|
||||||
return func
|
return func
|
||||||
return decorate
|
return decorate
|
||||||
d['_'] = _
|
d['_'] = _
|
||||||
@ -109,7 +107,7 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
# These attributes may be defined in subclasses
|
# These attributes may be defined in subclasses
|
||||||
tokens = set()
|
tokens = set()
|
||||||
literals = set()
|
literals = set()
|
||||||
ignore = None
|
ignore = ''
|
||||||
reflags = 0
|
reflags = 0
|
||||||
|
|
||||||
# These attributes are constructed automatically by the associated metaclass
|
# These attributes are constructed automatically by the associated metaclass
|
||||||
@ -118,7 +116,6 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
_literals = set()
|
_literals = set()
|
||||||
_token_funcs = { }
|
_token_funcs = { }
|
||||||
_ignored_tokens = set()
|
_ignored_tokens = set()
|
||||||
_input_type = str
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _collect_rules(cls, definitions):
|
def _collect_rules(cls, definitions):
|
||||||
@ -151,7 +148,7 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
tokname = tokname[7:]
|
tokname = tokname[7:]
|
||||||
cls._ignored_tokens.add(tokname)
|
cls._ignored_tokens.add(tokname)
|
||||||
|
|
||||||
if isinstance(value, (str, bytes)):
|
if isinstance(value, str):
|
||||||
pattern = value
|
pattern = value
|
||||||
|
|
||||||
elif callable(value):
|
elif callable(value):
|
||||||
@ -159,10 +156,7 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
cls._token_funcs[tokname] = value
|
cls._token_funcs[tokname] = value
|
||||||
|
|
||||||
# Form the regular expression component
|
# Form the regular expression component
|
||||||
if isinstance(pattern, str):
|
part = '(?P<%s>%s)' % (tokname, pattern)
|
||||||
part = '(?P<%s>%s)' % (tokname, pattern)
|
|
||||||
else:
|
|
||||||
part = b'(?P<%s>%s)' % (tokname.encode('ascii'), pattern)
|
|
||||||
|
|
||||||
# Make sure the individual regex compiles properly
|
# Make sure the individual regex compiles properly
|
||||||
try:
|
try:
|
||||||
@ -171,38 +165,24 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
raise PatternError('Invalid regex for token %s' % tokname) from e
|
raise PatternError('Invalid regex for token %s' % tokname) from e
|
||||||
|
|
||||||
# Verify that the pattern doesn't match the empty string
|
# Verify that the pattern doesn't match the empty string
|
||||||
if cpat.match(type(pattern)()):
|
if cpat.match(''):
|
||||||
raise PatternError('Regex for token %s matches empty input' % tokname)
|
raise PatternError('Regex for token %s matches empty input' % tokname)
|
||||||
|
|
||||||
parts.append(part)
|
parts.append(part)
|
||||||
|
|
||||||
# If no parts collected, then no rules to process
|
|
||||||
if not parts:
|
if not parts:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Verify that all of the patterns are of the same type
|
|
||||||
if not all(type(part) == type(parts[0]) for part in parts):
|
|
||||||
raise LexerBuildError('Tokens are specified using both bytes and strings.')
|
|
||||||
|
|
||||||
# Form the master regular expression
|
# Form the master regular expression
|
||||||
if parts and isinstance(parts[0], bytes):
|
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||||
previous = (b'|' + cls._master_re.pattern) if cls._master_re else b''
|
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||||
cls._master_re = re.compile(b'|'.join(parts) + previous, cls.reflags)
|
|
||||||
cls._input_type = bytes
|
|
||||||
else:
|
|
||||||
previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
|
||||||
cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
|
||||||
cls._input_type = str
|
|
||||||
|
|
||||||
# Verify that that ignore and literals specifiers match the input type
|
# Verify that that ignore and literals specifiers match the input type
|
||||||
if cls.ignore is not None and not isinstance(cls.ignore, cls._input_type):
|
if not isinstance(cls.ignore, str):
|
||||||
raise LexerBuildError("ignore specifier type doesn't match token types (%s)" %
|
raise LexerBuildError('ignore specifier must be a string')
|
||||||
cls._input_type.__name__)
|
|
||||||
|
|
||||||
if not all(isinstance(lit, cls._input_type) for lit in cls.literals):
|
if not all(isinstance(lit, str) for lit in cls.literals):
|
||||||
raise LexerBuildError("literals specifier not using same type as tokens (%s)" %
|
raise LexerBuildError("literals must be specified as strings")
|
||||||
cls._input_type.__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def tokenize(self, text, lineno=1, index=0):
|
def tokenize(self, text, lineno=1, index=0):
|
||||||
# Local copies of frequently used values
|
# Local copies of frequently used values
|
||||||
@ -220,11 +200,6 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
index += 1
|
index += 1
|
||||||
continue
|
continue
|
||||||
except IndexError:
|
except IndexError:
|
||||||
if self.eof:
|
|
||||||
text = self.eof()
|
|
||||||
if text:
|
|
||||||
index = 0
|
|
||||||
continue
|
|
||||||
break
|
break
|
||||||
|
|
||||||
tok = Token()
|
tok = Token()
|
||||||
@ -270,9 +245,6 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
self.index = index
|
self.index = index
|
||||||
self.lineno = lineno
|
self.lineno = lineno
|
||||||
|
|
||||||
# Default implementations of methods that may be subclassed by users
|
# Default implementations of the error handler. May be changed in subclasses
|
||||||
def error(self, value):
|
def error(self, value):
|
||||||
raise LexError("Illegal character %r at index %d" % (value[0], self.index), value)
|
raise LexError("Illegal character %r at index %d" % (value[0], self.index), value)
|
||||||
|
|
||||||
def eof(self):
|
|
||||||
pass
|
|
||||||
|
148
sly/yacc.py
148
sly/yacc.py
@ -33,7 +33,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import inspect
|
import inspect
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
__version__ = '0.0'
|
__version__ = '0.0'
|
||||||
__all__ = [ 'Parser' ]
|
__all__ = [ 'Parser' ]
|
||||||
@ -104,31 +104,39 @@ class YaccSymbol:
|
|||||||
|
|
||||||
class YaccProduction:
|
class YaccProduction:
|
||||||
def __init__(self, s, stack=None):
|
def __init__(self, s, stack=None):
|
||||||
self.slice = s
|
self._slice = s
|
||||||
self.stack = stack
|
self._stack = stack
|
||||||
|
self._namemap = { }
|
||||||
|
|
||||||
def __getitem__(self, n):
|
def __getitem__(self, n):
|
||||||
if isinstance(n, slice):
|
if n >= 0:
|
||||||
return [s.value for s in self.slice[n]]
|
return self._slice[n].value
|
||||||
elif n >= 0:
|
|
||||||
return self.slice[n].value
|
|
||||||
else:
|
else:
|
||||||
return self.stack[n].value
|
return self._stack[n].value
|
||||||
|
|
||||||
def __setitem__(self, n, v):
|
def __setitem__(self, n, v):
|
||||||
self.slice[n].value = v
|
self._slice[n].value = v
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.slice)
|
return len(self._slice)
|
||||||
|
|
||||||
def lineno(self, n):
|
def lineno(self, n):
|
||||||
return getattr(self.slice[n], 'lineno', 0)
|
return getattr(self._slice[n], 'lineno', 0)
|
||||||
|
|
||||||
def set_lineno(self, n, lineno):
|
def set_lineno(self, n, lineno):
|
||||||
self.slice[n].lineno = lineno
|
self._slice[n].lineno = lineno
|
||||||
|
|
||||||
def index(self, n):
|
def index(self, n):
|
||||||
return getattr(self.slice[n], 'index', 0)
|
return getattr(self._slice[n], 'index', 0)
|
||||||
|
|
||||||
|
def __getattr__(self, name):
|
||||||
|
return self._slice[self._namemap[name]].value
|
||||||
|
|
||||||
|
def __setattr__(self, name, value):
|
||||||
|
if name[0:1] == '_' or name not in self._namemap:
|
||||||
|
super().__setattr__(name, value)
|
||||||
|
else:
|
||||||
|
self._slice[self._namemap[name]].value = value
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# === Grammar Representation ===
|
# === Grammar Representation ===
|
||||||
@ -171,17 +179,29 @@ class Production(object):
|
|||||||
self.file = file
|
self.file = file
|
||||||
self.line = line
|
self.line = line
|
||||||
self.prec = precedence
|
self.prec = precedence
|
||||||
|
|
||||||
# Internal settings used during table construction
|
# Internal settings used during table construction
|
||||||
|
|
||||||
self.len = len(self.prod) # Length of the production
|
self.len = len(self.prod) # Length of the production
|
||||||
|
|
||||||
# Create a list of unique production symbols used in the production
|
# Create a list of unique production symbols used in the production
|
||||||
self.usyms = []
|
self.usyms = []
|
||||||
for s in self.prod:
|
symmap = defaultdict(list)
|
||||||
|
for n, s in enumerate(self.prod):
|
||||||
|
symmap[s].append(n)
|
||||||
if s not in self.usyms:
|
if s not in self.usyms:
|
||||||
self.usyms.append(s)
|
self.usyms.append(s)
|
||||||
|
|
||||||
|
# Create a dict mapping symbol names to indices
|
||||||
|
m = {}
|
||||||
|
for key, indices in symmap.items():
|
||||||
|
if len(indices) == 1:
|
||||||
|
m[key] = indices[0]
|
||||||
|
else:
|
||||||
|
for n, index in enumerate(indices):
|
||||||
|
m[key+str(n)] = index
|
||||||
|
|
||||||
|
self.namemap = m
|
||||||
|
|
||||||
# List of all LR items for the production
|
# List of all LR items for the production
|
||||||
self.lr_items = []
|
self.lr_items = []
|
||||||
self.lr_next = None
|
self.lr_next = None
|
||||||
@ -1512,9 +1532,10 @@ def _collect_grammar_rules(func):
|
|||||||
else:
|
else:
|
||||||
grammar.append((func, filename, lineno, prodname, syms))
|
grammar.append((func, filename, lineno, prodname, syms))
|
||||||
func = getattr(func, 'next_func', None)
|
func = getattr(func, 'next_func', None)
|
||||||
|
|
||||||
return grammar
|
return grammar
|
||||||
|
|
||||||
class OverloadDict(OrderedDict):
|
class ParserMetaDict(OrderedDict):
|
||||||
'''
|
'''
|
||||||
Dictionary that allows decorated grammar rule functions to be overloaded
|
Dictionary that allows decorated grammar rule functions to be overloaded
|
||||||
'''
|
'''
|
||||||
@ -1526,13 +1547,11 @@ class OverloadDict(OrderedDict):
|
|||||||
class ParserMeta(type):
|
class ParserMeta(type):
|
||||||
@classmethod
|
@classmethod
|
||||||
def __prepare__(meta, *args, **kwargs):
|
def __prepare__(meta, *args, **kwargs):
|
||||||
d = OverloadDict()
|
d = ParserMetaDict()
|
||||||
def _(*rules):
|
def _(rule, *extra):
|
||||||
|
rules = [rule, *extra]
|
||||||
def decorate(func):
|
def decorate(func):
|
||||||
if hasattr(func, 'rules'):
|
func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||||
func.rules.extend(rules[::-1])
|
|
||||||
else:
|
|
||||||
func.rules = list(rules[::-1])
|
|
||||||
return func
|
return func
|
||||||
return decorate
|
return decorate
|
||||||
d['_'] = _
|
d['_'] = _
|
||||||
@ -1788,9 +1807,9 @@ class Parser(metaclass=ParserMeta):
|
|||||||
self.statestack.append(0)
|
self.statestack.append(0)
|
||||||
self.state = 0
|
self.state = 0
|
||||||
|
|
||||||
def parse(self, lexer):
|
def parse(self, tokens):
|
||||||
'''
|
'''
|
||||||
Parse the given input text. lexer is a Lexer object that produces tokens
|
Parse the given input tokens.
|
||||||
'''
|
'''
|
||||||
lookahead = None # Current lookahead symbol
|
lookahead = None # Current lookahead symbol
|
||||||
lookaheadstack = [] # Stack of lookahead symbols
|
lookaheadstack = [] # Stack of lookahead symbols
|
||||||
@ -1801,10 +1820,6 @@ class Parser(metaclass=ParserMeta):
|
|||||||
pslice = YaccProduction(None) # Production object passed to grammar rules
|
pslice = YaccProduction(None) # Production object passed to grammar rules
|
||||||
errorcount = 0 # Used during error recovery
|
errorcount = 0 # Used during error recovery
|
||||||
|
|
||||||
# Save a local reference of the lexer being used
|
|
||||||
self.lexer = lexer
|
|
||||||
tokens = iter(self.lexer)
|
|
||||||
|
|
||||||
# Set up the state and symbol stacks
|
# Set up the state and symbol stacks
|
||||||
self.statestack = statestack = [] # Stack of parsing states
|
self.statestack = statestack = [] # Stack of parsing states
|
||||||
self.symstack = symstack = [] # Stack of grammar symbols
|
self.symstack = symstack = [] # Stack of grammar symbols
|
||||||
@ -1816,7 +1831,6 @@ class Parser(metaclass=ParserMeta):
|
|||||||
# Get the next symbol on the input. If a lookahead symbol
|
# Get the next symbol on the input. If a lookahead symbol
|
||||||
# is already set, we just use that. Otherwise, we'll pull
|
# is already set, we just use that. Otherwise, we'll pull
|
||||||
# the next token off of the lookaheadstack or from the lexer
|
# the next token off of the lookaheadstack or from the lexer
|
||||||
|
|
||||||
if self.state not in defaulted_states:
|
if self.state not in defaulted_states:
|
||||||
if not lookahead:
|
if not lookahead:
|
||||||
if not lookaheadstack:
|
if not lookaheadstack:
|
||||||
@ -1852,74 +1866,22 @@ class Parser(metaclass=ParserMeta):
|
|||||||
self.production = p = prod[-t]
|
self.production = p = prod[-t]
|
||||||
pname = p.name
|
pname = p.name
|
||||||
plen = p.len
|
plen = p.len
|
||||||
|
pslice._namemap = p.namemap
|
||||||
|
|
||||||
# Call the production function
|
# Call the production function
|
||||||
sym = YaccSymbol()
|
pslice._slice = symstack[-plen:] if plen else []
|
||||||
sym.type = pname # Production name
|
|
||||||
sym.value = None
|
|
||||||
|
|
||||||
if plen:
|
if plen:
|
||||||
targ = symstack[-plen-1:]
|
del symstack[-plen:]
|
||||||
targ[0] = sym
|
del statestack[-plen:]
|
||||||
|
|
||||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
sym = YaccSymbol()
|
||||||
# The code enclosed in this section is duplicated
|
sym.type = pname
|
||||||
# below as a performance optimization. Make sure
|
sym.value = p.func(self, pslice)
|
||||||
# changes get made in both locations.
|
symstack.append(sym)
|
||||||
|
|
||||||
pslice.slice = targ
|
self.state = goto[statestack[-1]][pname]
|
||||||
|
statestack.append(self.state)
|
||||||
try:
|
continue
|
||||||
# Call the grammar rule with our special slice object
|
|
||||||
del symstack[-plen:]
|
|
||||||
p.func(self, pslice)
|
|
||||||
del statestack[-plen:]
|
|
||||||
symstack.append(sym)
|
|
||||||
self.state = goto[statestack[-1]][pname]
|
|
||||||
statestack.append(self.state)
|
|
||||||
except SyntaxError:
|
|
||||||
# If an error was set. Enter error recovery state
|
|
||||||
lookaheadstack.append(lookahead)
|
|
||||||
symstack.extend(targ[1:-1])
|
|
||||||
statestack.pop()
|
|
||||||
self.state = statestack[-1]
|
|
||||||
sym.type = 'error'
|
|
||||||
sym.value = 'error'
|
|
||||||
lookahead = sym
|
|
||||||
errorcount = ERROR_COUNT
|
|
||||||
self.errorok = False
|
|
||||||
continue
|
|
||||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
||||||
|
|
||||||
else:
|
|
||||||
|
|
||||||
targ = [sym]
|
|
||||||
|
|
||||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
||||||
# The code enclosed in this section is duplicated
|
|
||||||
# above as a performance optimization. Make sure
|
|
||||||
# changes get made in both locations.
|
|
||||||
|
|
||||||
pslice.slice = targ
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Call the grammar rule with our special slice object
|
|
||||||
p.func(self, pslice)
|
|
||||||
symstack.append(sym)
|
|
||||||
self.state = goto[statestack[-1]][pname]
|
|
||||||
statestack.append(self.state)
|
|
||||||
except SyntaxError:
|
|
||||||
# If an error was set. Enter error recovery state
|
|
||||||
lookaheadstack.append(lookahead)
|
|
||||||
statestack.pop()
|
|
||||||
self.state = statestack[-1]
|
|
||||||
sym.type = 'error'
|
|
||||||
sym.value = 'error'
|
|
||||||
lookahead = sym
|
|
||||||
errorcount = ERROR_COUNT
|
|
||||||
self.errorok = False
|
|
||||||
continue
|
|
||||||
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
||||||
|
|
||||||
if t == 0:
|
if t == 0:
|
||||||
n = symstack[-1]
|
n = symstack[-1]
|
||||||
|
Loading…
Reference in New Issue
Block a user