Refinement of EBNF extensions

This commit is contained in:
David Beazley 2020-03-07 06:28:19 -06:00
parent a2cdf52d0f
commit 39ffd0361a
4 changed files with 92 additions and 39 deletions

29
CHANGES
View File

@ -6,26 +6,27 @@ Version 0.5
@('expr { COMMA expr }')
def exprlist(p):
return [ p.expr ] + [e.expr for e in p[1]]
return [ p.expr0 ] + p.expr1
In this code, the { ... } means zero-or-more repetitions.
It produces a list of matches that must be accessed by
position index (p[1] in this example. p[0] is 'expr').
The elements of the list are named tuples with attribute
names that match the enclosed grammar symbols (e.g., e.expr
in the example).
It turns all symbols inside into lists. So, instead of
representing a single value, p.expr1 is now a list of
values.
An optional value can be enclosed in brackets like this:
@('NAME LPAREN [ exprlist ] RPAREN')
def function_call(p):
args = p[2] if p[2] else []
name = p.NAME
print('Calling:', name, args)
@('VAR NAME [ EQUAL expr ] SEMI')
def variable_declaration(p):
print(f"Definining {p.NAME}. Initial value={p.expr}")
In this case, p[2] contains the optional value. If not present,
the value is None. If present, it is a tuple of values
or a single value (if only one symbol).
In this case, all symbols inside [ ... ] either have a value
if present or are assigned to None if missing.
In both cases, you continue to use the same name indexing
scheme used by the rest of SLY. For example, in the first
example above, you use "expr0" and "expr1" to refer to the
different "expr" symbols since that name appears in more
than one place.
Version 0.4
-----------

View File

@ -871,6 +871,38 @@ string. However,writing an "empty" rule and using "empty" to denote an
empty production may be easier to read and more clearly state your
intention.
EBNF Features (Optionals and Repeats)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Certain grammar features occur with some frequency. For example, suppose you want to
have an optional item as shown in the previous section. An alternate way to specify
it is to enclose one more more symbols in [ ] like this::
@_('[ item ] grok')
def spam(self, p):
if p.item is not None:
print("item was given and has value", p.item)
else:
print("item was not given"
@_('whatever')
def item(self, p):
...
In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied.
Otherwise, it will have the value returned by the ``item`` rule below.
You can also encode repetitions. For example, a common construction is a
list of comma separated expressions. To parse that, you could write::
@_('expr { COMMA expr }')
def exprlist(self, p):
return [p.expr0] + p.expr1
In this example, the ``{ COMMA expr }`` represents zero or more repetitions
of a rule. The value of all symbols inside is now a list. So, ``p.expr``
is a list of all expressions matched.
Dealing With Ambiguous Grammars
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -33,7 +33,7 @@
import sys
import inspect
from collections import OrderedDict, defaultdict, namedtuple
from collections import OrderedDict, defaultdict, Counter
__all__ = [ 'Parser' ]
@ -145,7 +145,7 @@ class YaccProduction:
def __getattr__(self, name):
if name in self._namemap:
return self._slice[self._namemap[name]].value
return self._namemap[name](self._slice)
else:
nameset = '{' + ', '.join(self._namemap) + '}'
raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
@ -209,16 +209,36 @@ class Production(object):
if s not in self.usyms:
self.usyms.append(s)
# Create a dict mapping symbol names to indices
m = {}
for key, indices in symmap.items():
if len(indices) == 1:
m[key] = indices[0]
else:
for n, index in enumerate(indices):
m[key+str(n)] = index
# Create a name mapping
# First determine (in advance) if there are duplicate names
namecount = defaultdict(int)
for key in self.prod:
namecount[key] += 1
if key in _name_aliases:
for key in _name_aliases[key]:
namecount[key] += 1
self.namemap = m
# Now, walk through the names and generate accessor functions
nameuse = defaultdict(int)
namemap = { }
for index, key in enumerate(self.prod):
if namecount[key] > 1:
k = f'{key}{nameuse[key]}'
nameuse[key] += 1
else:
k = key
namemap[k] = lambda s,i=index: s[i].value
if key in _name_aliases:
for n, alias in enumerate(_name_aliases[key]):
if namecount[alias] > 1:
k = f'{alias}{nameuse[alias]}'
nameuse[alias] += 1
else:
k = alias
# The value is either a list (for repetition) or a tuple for optional
namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n]
self.namemap = namemap
# List of all LR items for the production
self.lr_items = []
@ -1606,6 +1626,10 @@ def _unique_names(names):
indices[name] += 1
return newnames
# Dictionary mapping name aliases generated by EBNF rules.
_name_aliases = { }
def _generate_repeat_rules(symbols):
'''
Symbols is a list of grammar symbols [ symbols ]. This
@ -1635,6 +1659,8 @@ def _generate_repeat_rules(symbols):
iname = f'_{_gencount}_item'
symtext = ' '.join(symbols)
_name_aliases[name] = symbols
productions = [ ]
_ = _decorator
@ -1661,14 +1687,9 @@ def _generate_repeat_rules(symbols):
productions.extend(_collect_grammar_rules(many))
productions.extend(_collect_grammar_rules(many2))
utuple = namedtuple('syms', _unique_names(symbols))
@_(f'{iname} : {symtext}')
def item(self, p):
if len(p) == 1:
return p[0]
else:
return utuple(*p)
return tuple(p)
productions.extend(_collect_grammar_rules(item))
return name, productions
@ -1691,21 +1712,20 @@ def _generate_optional_rules(symbols):
name = f'_{_gencount}_optional'
symtext = ' '.join(symbols)
_name_aliases[name] = symbols
productions = [ ]
_ = _decorator
utuple = namedtuple('syms', _unique_names(symbols))
no_values = (None,) * len(symbols)
@_(f'{name} : {symtext}')
def optional(self, p):
if len(p) == 1:
return p[0]
else:
return utuple(*p)
return tuple(p)
@_(f'{name} : ')
def optional2(self, p):
return None
return no_values
productions.extend(_collect_grammar_rules(optional))
productions.extend(_collect_grammar_rules(optional2))

View File

@ -56,11 +56,11 @@ class CalcParser(Parser):
@_('ID "(" [ arglist ] ")"')
def statement(self, p):
return (p.ID, p[2])
return (p.ID, p.arglist)
@_('expr { COMMA expr }')
def arglist(self, p):
return [p.expr, *[e.expr for e in p[1]]]
return [p.expr0, *p.expr1]
@_('expr')
def statement(self, p):