Experimental EBNF features added
This commit is contained in:
parent
9944b6239c
commit
a2cdf52d0f
29
CHANGES
29
CHANGES
@ -1,3 +1,32 @@
|
||||
Version 0.5
|
||||
-----------
|
||||
03/06/2020 Added experimental support for EBNF repetition and optional
|
||||
syntax. For example, here is a rule for a comma-separated
|
||||
expression list:
|
||||
|
||||
@('expr { COMMA expr }')
|
||||
def exprlist(p):
|
||||
return [ p.expr ] + [e.expr for e in p[1]]
|
||||
|
||||
In this code, the { ... } means zero-or-more repetitions.
|
||||
It produces a list of matches that must be accessed by
|
||||
position index (p[1] in this example. p[0] is 'expr').
|
||||
The elements of the list are named tuples with attribute
|
||||
names that match the enclosed grammar symbols (e.g., e.expr
|
||||
in the example).
|
||||
|
||||
An optional value can be enclosed in brackets like this:
|
||||
|
||||
@('NAME LPAREN [ exprlist ] RPAREN')
|
||||
def function_call(p):
|
||||
args = p[2] if p[2] else []
|
||||
name = p.NAME
|
||||
print('Calling:', name, args)
|
||||
|
||||
In this case, p[2] contains the optional value. If not present,
|
||||
the value is None. If present, it is a tuple of values
|
||||
or a single value (if only one symbol).
|
||||
|
||||
Version 0.4
|
||||
-----------
|
||||
04/09/2019 Fixed very mysterious error message that resulted if you
|
||||
|
21
sly/lex.py
21
sly/lex.py
@ -360,6 +360,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
def tokenize(self, text, lineno=1, index=0):
|
||||
_ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None
|
||||
|
||||
# --- Support for state changes
|
||||
def _set_state(cls):
|
||||
nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
|
||||
_ignored_tokens = cls._ignored_tokens
|
||||
@ -371,8 +372,26 @@ class Lexer(metaclass=LexerMeta):
|
||||
|
||||
self.__set_state = _set_state
|
||||
_set_state(type(self))
|
||||
self.text = text
|
||||
|
||||
# --- Support for backtracking
|
||||
_mark_stack = []
|
||||
def _mark():
|
||||
_mark_stack.append((type(self), index, lineno))
|
||||
self.mark = _mark
|
||||
|
||||
def _accept():
|
||||
_mark_stack.pop()
|
||||
self.accept = _accept
|
||||
|
||||
def _reject():
|
||||
nonlocal index, lineno
|
||||
cls, index, lineno = _mark_stack[-1]
|
||||
_set_state(cls)
|
||||
self.reject = _reject
|
||||
|
||||
|
||||
# --- Main tokenization function
|
||||
self.text = text
|
||||
try:
|
||||
while True:
|
||||
try:
|
||||
|
175
sly/yacc.py
175
sly/yacc.py
@ -33,7 +33,7 @@
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
from collections import OrderedDict, defaultdict
|
||||
from collections import OrderedDict, defaultdict, namedtuple
|
||||
|
||||
__all__ = [ 'Parser' ]
|
||||
|
||||
@ -1551,14 +1551,166 @@ def _collect_grammar_rules(func):
|
||||
lineno = unwrapped.__code__.co_firstlineno
|
||||
for rule, lineno in zip(func.rules, range(lineno+len(func.rules)-1, 0, -1)):
|
||||
syms = rule.split()
|
||||
ebnf_prod = []
|
||||
while ('{' in syms) or ('[' in syms):
|
||||
for s in syms:
|
||||
if s == '[':
|
||||
syms, prod = _replace_ebnf_optional(syms)
|
||||
ebnf_prod.extend(prod)
|
||||
break
|
||||
elif s == '{':
|
||||
syms, prod = _replace_ebnf_repeat(syms)
|
||||
ebnf_prod.extend(prod)
|
||||
break
|
||||
|
||||
if syms[1:2] == [':'] or syms[1:2] == ['::=']:
|
||||
grammar.append((func, filename, lineno, syms[0], syms[2:]))
|
||||
else:
|
||||
grammar.append((func, filename, lineno, prodname, syms))
|
||||
grammar.extend(ebnf_prod)
|
||||
|
||||
func = getattr(func, 'next_func', None)
|
||||
|
||||
return grammar
|
||||
|
||||
# Replace EBNF repetition
|
||||
def _replace_ebnf_repeat(syms):
|
||||
syms = list(syms)
|
||||
first = syms.index('{')
|
||||
end = syms.index('}', first)
|
||||
symname, prods = _generate_repeat_rules(syms[first+1:end])
|
||||
syms[first:end+1] = [symname]
|
||||
return syms, prods
|
||||
|
||||
def _replace_ebnf_optional(syms):
|
||||
syms = list(syms)
|
||||
first = syms.index('[')
|
||||
end = syms.index(']', first)
|
||||
symname, prods = _generate_optional_rules(syms[first+1:end])
|
||||
syms[first:end+1] = [symname]
|
||||
return syms, prods
|
||||
|
||||
# Generate grammar rules for repeated items
|
||||
_gencount = 0
|
||||
|
||||
def _unique_names(names):
|
||||
from collections import defaultdict, Counter
|
||||
counts = Counter(names)
|
||||
indices = defaultdict(int)
|
||||
newnames = []
|
||||
for name in names:
|
||||
if counts[name] == 1:
|
||||
newnames.append(name)
|
||||
else:
|
||||
newnames.append(f'{name}{indices[name]}')
|
||||
indices[name] += 1
|
||||
return newnames
|
||||
|
||||
def _generate_repeat_rules(symbols):
|
||||
'''
|
||||
Symbols is a list of grammar symbols [ symbols ]. This
|
||||
generates code corresponding to these grammar construction:
|
||||
|
||||
@('repeat : many')
|
||||
def repeat(self, p):
|
||||
return p.many
|
||||
|
||||
@('repeat :')
|
||||
def repeat(self, p):
|
||||
return []
|
||||
|
||||
@('many : many symbols')
|
||||
def many(self, p):
|
||||
p.many.append(symbols)
|
||||
return p.many
|
||||
|
||||
@('many : symbols')
|
||||
def many(self, p):
|
||||
return [ p.symbols ]
|
||||
'''
|
||||
global _gencount
|
||||
_gencount += 1
|
||||
name = f'_{_gencount}_repeat'
|
||||
oname = f'_{_gencount}_items'
|
||||
iname = f'_{_gencount}_item'
|
||||
symtext = ' '.join(symbols)
|
||||
|
||||
productions = [ ]
|
||||
_ = _decorator
|
||||
|
||||
@_(f'{name} : {oname}')
|
||||
def repeat(self, p):
|
||||
return getattr(p, oname)
|
||||
|
||||
@_(f'{name} : ')
|
||||
def repeat2(self, p):
|
||||
return []
|
||||
productions.extend(_collect_grammar_rules(repeat))
|
||||
productions.extend(_collect_grammar_rules(repeat2))
|
||||
|
||||
@_(f'{oname} : {oname} {iname}')
|
||||
def many(self, p):
|
||||
items = getattr(p, oname)
|
||||
items.append(getattr(p, iname))
|
||||
return items
|
||||
|
||||
@_(f'{oname} : {iname}')
|
||||
def many2(self, p):
|
||||
return [ getattr(p, iname) ]
|
||||
|
||||
productions.extend(_collect_grammar_rules(many))
|
||||
productions.extend(_collect_grammar_rules(many2))
|
||||
|
||||
utuple = namedtuple('syms', _unique_names(symbols))
|
||||
|
||||
@_(f'{iname} : {symtext}')
|
||||
def item(self, p):
|
||||
if len(p) == 1:
|
||||
return p[0]
|
||||
else:
|
||||
return utuple(*p)
|
||||
|
||||
productions.extend(_collect_grammar_rules(item))
|
||||
return name, productions
|
||||
|
||||
def _generate_optional_rules(symbols):
|
||||
'''
|
||||
Symbols is a list of grammar symbols [ symbols ]. This
|
||||
generates code corresponding to these grammar construction:
|
||||
|
||||
@('optional : symbols')
|
||||
def optional(self, p):
|
||||
return p.symbols
|
||||
|
||||
@('optional :')
|
||||
def optional(self, p):
|
||||
return None
|
||||
'''
|
||||
global _gencount
|
||||
_gencount += 1
|
||||
name = f'_{_gencount}_optional'
|
||||
symtext = ' '.join(symbols)
|
||||
|
||||
productions = [ ]
|
||||
_ = _decorator
|
||||
|
||||
utuple = namedtuple('syms', _unique_names(symbols))
|
||||
|
||||
@_(f'{name} : {symtext}')
|
||||
def optional(self, p):
|
||||
if len(p) == 1:
|
||||
return p[0]
|
||||
else:
|
||||
return utuple(*p)
|
||||
|
||||
@_(f'{name} : ')
|
||||
def optional2(self, p):
|
||||
return None
|
||||
|
||||
productions.extend(_collect_grammar_rules(optional))
|
||||
productions.extend(_collect_grammar_rules(optional2))
|
||||
return name, productions
|
||||
|
||||
class ParserMetaDict(dict):
|
||||
'''
|
||||
Dictionary that allows decorated grammar rule functions to be overloaded
|
||||
@ -1576,17 +1728,24 @@ class ParserMetaDict(dict):
|
||||
else:
|
||||
return super().__getitem__(key)
|
||||
|
||||
def _decorator(rule, *extra):
|
||||
rules = [rule, *extra]
|
||||
def decorate(func):
|
||||
func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||
return func
|
||||
return decorate
|
||||
|
||||
class ParserMeta(type):
|
||||
@classmethod
|
||||
def __prepare__(meta, *args, **kwargs):
|
||||
d = ParserMetaDict()
|
||||
def _(rule, *extra):
|
||||
rules = [rule, *extra]
|
||||
def decorate(func):
|
||||
func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||
return func
|
||||
return decorate
|
||||
d['_'] = _
|
||||
# def _(rule, *extra):
|
||||
# rules = [rule, *extra]
|
||||
# def decorate(func):
|
||||
# func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||
# return func
|
||||
# return decorate
|
||||
d['_'] = _decorator
|
||||
return d
|
||||
|
||||
def __new__(meta, clsname, bases, attributes):
|
||||
|
@ -3,7 +3,7 @@ from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN }
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, COMMA }
|
||||
literals = { '(', ')' }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
@ -16,6 +16,7 @@ class CalcLexer(Lexer):
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
ASSIGN = r'='
|
||||
COMMA = r','
|
||||
|
||||
@_(r'\d+')
|
||||
def NUMBER(self, t):
|
||||
@ -53,6 +54,14 @@ class CalcParser(Parser):
|
||||
def statement(self, p):
|
||||
self.names[p.ID] = p.expr
|
||||
|
||||
@_('ID "(" [ arglist ] ")"')
|
||||
def statement(self, p):
|
||||
return (p.ID, p[2])
|
||||
|
||||
@_('expr { COMMA expr }')
|
||||
def arglist(self, p):
|
||||
return [p.expr, *[e.expr for e in p[1]]]
|
||||
|
||||
@_('expr')
|
||||
def statement(self, p):
|
||||
return p.expr
|
||||
@ -109,6 +118,18 @@ def test_simple():
|
||||
result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)'))
|
||||
assert result == 47
|
||||
|
||||
def test_ebnf():
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
result = parser.parse(lexer.tokenize('a()'))
|
||||
assert result == ('a', None)
|
||||
|
||||
result = parser.parse(lexer.tokenize('a(2+3)'))
|
||||
assert result == ('a', [5])
|
||||
|
||||
result = parser.parse(lexer.tokenize('a(2+3, 4+5)'))
|
||||
assert result == ('a', [5, 9])
|
||||
|
||||
def test_parse_error():
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
|
Loading…
Reference in New Issue
Block a user