Refinement of EBNF extensions
This commit is contained in:
parent
a2cdf52d0f
commit
39ffd0361a
29
CHANGES
29
CHANGES
@ -6,26 +6,27 @@ Version 0.5
|
||||
|
||||
@('expr { COMMA expr }')
|
||||
def exprlist(p):
|
||||
return [ p.expr ] + [e.expr for e in p[1]]
|
||||
return [ p.expr0 ] + p.expr1
|
||||
|
||||
In this code, the { ... } means zero-or-more repetitions.
|
||||
It produces a list of matches that must be accessed by
|
||||
position index (p[1] in this example. p[0] is 'expr').
|
||||
The elements of the list are named tuples with attribute
|
||||
names that match the enclosed grammar symbols (e.g., e.expr
|
||||
in the example).
|
||||
It turns all symbols inside into lists. So, instead of
|
||||
representing a single value, p.expr1 is now a list of
|
||||
values.
|
||||
|
||||
An optional value can be enclosed in brackets like this:
|
||||
|
||||
@('NAME LPAREN [ exprlist ] RPAREN')
|
||||
def function_call(p):
|
||||
args = p[2] if p[2] else []
|
||||
name = p.NAME
|
||||
print('Calling:', name, args)
|
||||
@('VAR NAME [ EQUAL expr ] SEMI')
|
||||
def variable_declaration(p):
|
||||
print(f"Definining {p.NAME}. Initial value={p.expr}")
|
||||
|
||||
In this case, p[2] contains the optional value. If not present,
|
||||
the value is None. If present, it is a tuple of values
|
||||
or a single value (if only one symbol).
|
||||
In this case, all symbols inside [ ... ] either have a value
|
||||
if present or are assigned to None if missing.
|
||||
|
||||
In both cases, you continue to use the same name indexing
|
||||
scheme used by the rest of SLY. For example, in the first
|
||||
example above, you use "expr0" and "expr1" to refer to the
|
||||
different "expr" symbols since that name appears in more
|
||||
than one place.
|
||||
|
||||
Version 0.4
|
||||
-----------
|
||||
|
32
docs/sly.rst
32
docs/sly.rst
@ -871,6 +871,38 @@ string. However,writing an "empty" rule and using "empty" to denote an
|
||||
empty production may be easier to read and more clearly state your
|
||||
intention.
|
||||
|
||||
EBNF Features (Optionals and Repeats)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Certain grammar features occur with some frequency. For example, suppose you want to
|
||||
have an optional item as shown in the previous section. An alternate way to specify
|
||||
it is to enclose one more more symbols in [ ] like this::
|
||||
|
||||
@_('[ item ] grok')
|
||||
def spam(self, p):
|
||||
if p.item is not None:
|
||||
print("item was given and has value", p.item)
|
||||
else:
|
||||
print("item was not given"
|
||||
|
||||
@_('whatever')
|
||||
def item(self, p):
|
||||
...
|
||||
|
||||
In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied.
|
||||
Otherwise, it will have the value returned by the ``item`` rule below.
|
||||
|
||||
You can also encode repetitions. For example, a common construction is a
|
||||
list of comma separated expressions. To parse that, you could write::
|
||||
|
||||
@_('expr { COMMA expr }')
|
||||
def exprlist(self, p):
|
||||
return [p.expr0] + p.expr1
|
||||
|
||||
In this example, the ``{ COMMA expr }`` represents zero or more repetitions
|
||||
of a rule. The value of all symbols inside is now a list. So, ``p.expr``
|
||||
is a list of all expressions matched.
|
||||
|
||||
Dealing With Ambiguous Grammars
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
66
sly/yacc.py
66
sly/yacc.py
@ -33,7 +33,7 @@
|
||||
|
||||
import sys
|
||||
import inspect
|
||||
from collections import OrderedDict, defaultdict, namedtuple
|
||||
from collections import OrderedDict, defaultdict, Counter
|
||||
|
||||
__all__ = [ 'Parser' ]
|
||||
|
||||
@ -145,7 +145,7 @@ class YaccProduction:
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._namemap:
|
||||
return self._slice[self._namemap[name]].value
|
||||
return self._namemap[name](self._slice)
|
||||
else:
|
||||
nameset = '{' + ', '.join(self._namemap) + '}'
|
||||
raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
|
||||
@ -209,16 +209,36 @@ class Production(object):
|
||||
if s not in self.usyms:
|
||||
self.usyms.append(s)
|
||||
|
||||
# Create a dict mapping symbol names to indices
|
||||
m = {}
|
||||
for key, indices in symmap.items():
|
||||
if len(indices) == 1:
|
||||
m[key] = indices[0]
|
||||
else:
|
||||
for n, index in enumerate(indices):
|
||||
m[key+str(n)] = index
|
||||
# Create a name mapping
|
||||
# First determine (in advance) if there are duplicate names
|
||||
namecount = defaultdict(int)
|
||||
for key in self.prod:
|
||||
namecount[key] += 1
|
||||
if key in _name_aliases:
|
||||
for key in _name_aliases[key]:
|
||||
namecount[key] += 1
|
||||
|
||||
self.namemap = m
|
||||
# Now, walk through the names and generate accessor functions
|
||||
nameuse = defaultdict(int)
|
||||
namemap = { }
|
||||
for index, key in enumerate(self.prod):
|
||||
if namecount[key] > 1:
|
||||
k = f'{key}{nameuse[key]}'
|
||||
nameuse[key] += 1
|
||||
else:
|
||||
k = key
|
||||
namemap[k] = lambda s,i=index: s[i].value
|
||||
if key in _name_aliases:
|
||||
for n, alias in enumerate(_name_aliases[key]):
|
||||
if namecount[alias] > 1:
|
||||
k = f'{alias}{nameuse[alias]}'
|
||||
nameuse[alias] += 1
|
||||
else:
|
||||
k = alias
|
||||
# The value is either a list (for repetition) or a tuple for optional
|
||||
namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n]
|
||||
|
||||
self.namemap = namemap
|
||||
|
||||
# List of all LR items for the production
|
||||
self.lr_items = []
|
||||
@ -1606,6 +1626,10 @@ def _unique_names(names):
|
||||
indices[name] += 1
|
||||
return newnames
|
||||
|
||||
# Dictionary mapping name aliases generated by EBNF rules.
|
||||
|
||||
_name_aliases = { }
|
||||
|
||||
def _generate_repeat_rules(symbols):
|
||||
'''
|
||||
Symbols is a list of grammar symbols [ symbols ]. This
|
||||
@ -1635,6 +1659,8 @@ def _generate_repeat_rules(symbols):
|
||||
iname = f'_{_gencount}_item'
|
||||
symtext = ' '.join(symbols)
|
||||
|
||||
_name_aliases[name] = symbols
|
||||
|
||||
productions = [ ]
|
||||
_ = _decorator
|
||||
|
||||
@ -1661,14 +1687,9 @@ def _generate_repeat_rules(symbols):
|
||||
productions.extend(_collect_grammar_rules(many))
|
||||
productions.extend(_collect_grammar_rules(many2))
|
||||
|
||||
utuple = namedtuple('syms', _unique_names(symbols))
|
||||
|
||||
@_(f'{iname} : {symtext}')
|
||||
def item(self, p):
|
||||
if len(p) == 1:
|
||||
return p[0]
|
||||
else:
|
||||
return utuple(*p)
|
||||
return tuple(p)
|
||||
|
||||
productions.extend(_collect_grammar_rules(item))
|
||||
return name, productions
|
||||
@ -1691,21 +1712,20 @@ def _generate_optional_rules(symbols):
|
||||
name = f'_{_gencount}_optional'
|
||||
symtext = ' '.join(symbols)
|
||||
|
||||
_name_aliases[name] = symbols
|
||||
|
||||
productions = [ ]
|
||||
_ = _decorator
|
||||
|
||||
utuple = namedtuple('syms', _unique_names(symbols))
|
||||
no_values = (None,) * len(symbols)
|
||||
|
||||
@_(f'{name} : {symtext}')
|
||||
def optional(self, p):
|
||||
if len(p) == 1:
|
||||
return p[0]
|
||||
else:
|
||||
return utuple(*p)
|
||||
return tuple(p)
|
||||
|
||||
@_(f'{name} : ')
|
||||
def optional2(self, p):
|
||||
return None
|
||||
return no_values
|
||||
|
||||
productions.extend(_collect_grammar_rules(optional))
|
||||
productions.extend(_collect_grammar_rules(optional2))
|
||||
|
@ -56,11 +56,11 @@ class CalcParser(Parser):
|
||||
|
||||
@_('ID "(" [ arglist ] ")"')
|
||||
def statement(self, p):
|
||||
return (p.ID, p[2])
|
||||
return (p.ID, p.arglist)
|
||||
|
||||
@_('expr { COMMA expr }')
|
||||
def arglist(self, p):
|
||||
return [p.expr, *[e.expr for e in p[1]]]
|
||||
return [p.expr0, *p.expr1]
|
||||
|
||||
@_('expr')
|
||||
def statement(self, p):
|
||||
|
Loading…
Reference in New Issue
Block a user