Refinement of EBNF extensions
This commit is contained in:
parent
a2cdf52d0f
commit
39ffd0361a
29
CHANGES
29
CHANGES
@ -6,26 +6,27 @@ Version 0.5
|
|||||||
|
|
||||||
@('expr { COMMA expr }')
|
@('expr { COMMA expr }')
|
||||||
def exprlist(p):
|
def exprlist(p):
|
||||||
return [ p.expr ] + [e.expr for e in p[1]]
|
return [ p.expr0 ] + p.expr1
|
||||||
|
|
||||||
In this code, the { ... } means zero-or-more repetitions.
|
In this code, the { ... } means zero-or-more repetitions.
|
||||||
It produces a list of matches that must be accessed by
|
It turns all symbols inside into lists. So, instead of
|
||||||
position index (p[1] in this example. p[0] is 'expr').
|
representing a single value, p.expr1 is now a list of
|
||||||
The elements of the list are named tuples with attribute
|
values.
|
||||||
names that match the enclosed grammar symbols (e.g., e.expr
|
|
||||||
in the example).
|
|
||||||
|
|
||||||
An optional value can be enclosed in brackets like this:
|
An optional value can be enclosed in brackets like this:
|
||||||
|
|
||||||
@('NAME LPAREN [ exprlist ] RPAREN')
|
@('VAR NAME [ EQUAL expr ] SEMI')
|
||||||
def function_call(p):
|
def variable_declaration(p):
|
||||||
args = p[2] if p[2] else []
|
print(f"Definining {p.NAME}. Initial value={p.expr}")
|
||||||
name = p.NAME
|
|
||||||
print('Calling:', name, args)
|
|
||||||
|
|
||||||
In this case, p[2] contains the optional value. If not present,
|
In this case, all symbols inside [ ... ] either have a value
|
||||||
the value is None. If present, it is a tuple of values
|
if present or are assigned to None if missing.
|
||||||
or a single value (if only one symbol).
|
|
||||||
|
In both cases, you continue to use the same name indexing
|
||||||
|
scheme used by the rest of SLY. For example, in the first
|
||||||
|
example above, you use "expr0" and "expr1" to refer to the
|
||||||
|
different "expr" symbols since that name appears in more
|
||||||
|
than one place.
|
||||||
|
|
||||||
Version 0.4
|
Version 0.4
|
||||||
-----------
|
-----------
|
||||||
|
32
docs/sly.rst
32
docs/sly.rst
@ -871,6 +871,38 @@ string. However,writing an "empty" rule and using "empty" to denote an
|
|||||||
empty production may be easier to read and more clearly state your
|
empty production may be easier to read and more clearly state your
|
||||||
intention.
|
intention.
|
||||||
|
|
||||||
|
EBNF Features (Optionals and Repeats)
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Certain grammar features occur with some frequency. For example, suppose you want to
|
||||||
|
have an optional item as shown in the previous section. An alternate way to specify
|
||||||
|
it is to enclose one more more symbols in [ ] like this::
|
||||||
|
|
||||||
|
@_('[ item ] grok')
|
||||||
|
def spam(self, p):
|
||||||
|
if p.item is not None:
|
||||||
|
print("item was given and has value", p.item)
|
||||||
|
else:
|
||||||
|
print("item was not given"
|
||||||
|
|
||||||
|
@_('whatever')
|
||||||
|
def item(self, p):
|
||||||
|
...
|
||||||
|
|
||||||
|
In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied.
|
||||||
|
Otherwise, it will have the value returned by the ``item`` rule below.
|
||||||
|
|
||||||
|
You can also encode repetitions. For example, a common construction is a
|
||||||
|
list of comma separated expressions. To parse that, you could write::
|
||||||
|
|
||||||
|
@_('expr { COMMA expr }')
|
||||||
|
def exprlist(self, p):
|
||||||
|
return [p.expr0] + p.expr1
|
||||||
|
|
||||||
|
In this example, the ``{ COMMA expr }`` represents zero or more repetitions
|
||||||
|
of a rule. The value of all symbols inside is now a list. So, ``p.expr``
|
||||||
|
is a list of all expressions matched.
|
||||||
|
|
||||||
Dealing With Ambiguous Grammars
|
Dealing With Ambiguous Grammars
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
66
sly/yacc.py
66
sly/yacc.py
@ -33,7 +33,7 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
import inspect
|
import inspect
|
||||||
from collections import OrderedDict, defaultdict, namedtuple
|
from collections import OrderedDict, defaultdict, Counter
|
||||||
|
|
||||||
__all__ = [ 'Parser' ]
|
__all__ = [ 'Parser' ]
|
||||||
|
|
||||||
@ -145,7 +145,7 @@ class YaccProduction:
|
|||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
if name in self._namemap:
|
if name in self._namemap:
|
||||||
return self._slice[self._namemap[name]].value
|
return self._namemap[name](self._slice)
|
||||||
else:
|
else:
|
||||||
nameset = '{' + ', '.join(self._namemap) + '}'
|
nameset = '{' + ', '.join(self._namemap) + '}'
|
||||||
raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
|
raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
|
||||||
@ -209,16 +209,36 @@ class Production(object):
|
|||||||
if s not in self.usyms:
|
if s not in self.usyms:
|
||||||
self.usyms.append(s)
|
self.usyms.append(s)
|
||||||
|
|
||||||
# Create a dict mapping symbol names to indices
|
# Create a name mapping
|
||||||
m = {}
|
# First determine (in advance) if there are duplicate names
|
||||||
for key, indices in symmap.items():
|
namecount = defaultdict(int)
|
||||||
if len(indices) == 1:
|
for key in self.prod:
|
||||||
m[key] = indices[0]
|
namecount[key] += 1
|
||||||
else:
|
if key in _name_aliases:
|
||||||
for n, index in enumerate(indices):
|
for key in _name_aliases[key]:
|
||||||
m[key+str(n)] = index
|
namecount[key] += 1
|
||||||
|
|
||||||
self.namemap = m
|
# Now, walk through the names and generate accessor functions
|
||||||
|
nameuse = defaultdict(int)
|
||||||
|
namemap = { }
|
||||||
|
for index, key in enumerate(self.prod):
|
||||||
|
if namecount[key] > 1:
|
||||||
|
k = f'{key}{nameuse[key]}'
|
||||||
|
nameuse[key] += 1
|
||||||
|
else:
|
||||||
|
k = key
|
||||||
|
namemap[k] = lambda s,i=index: s[i].value
|
||||||
|
if key in _name_aliases:
|
||||||
|
for n, alias in enumerate(_name_aliases[key]):
|
||||||
|
if namecount[alias] > 1:
|
||||||
|
k = f'{alias}{nameuse[alias]}'
|
||||||
|
nameuse[alias] += 1
|
||||||
|
else:
|
||||||
|
k = alias
|
||||||
|
# The value is either a list (for repetition) or a tuple for optional
|
||||||
|
namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n]
|
||||||
|
|
||||||
|
self.namemap = namemap
|
||||||
|
|
||||||
# List of all LR items for the production
|
# List of all LR items for the production
|
||||||
self.lr_items = []
|
self.lr_items = []
|
||||||
@ -1606,6 +1626,10 @@ def _unique_names(names):
|
|||||||
indices[name] += 1
|
indices[name] += 1
|
||||||
return newnames
|
return newnames
|
||||||
|
|
||||||
|
# Dictionary mapping name aliases generated by EBNF rules.
|
||||||
|
|
||||||
|
_name_aliases = { }
|
||||||
|
|
||||||
def _generate_repeat_rules(symbols):
|
def _generate_repeat_rules(symbols):
|
||||||
'''
|
'''
|
||||||
Symbols is a list of grammar symbols [ symbols ]. This
|
Symbols is a list of grammar symbols [ symbols ]. This
|
||||||
@ -1635,6 +1659,8 @@ def _generate_repeat_rules(symbols):
|
|||||||
iname = f'_{_gencount}_item'
|
iname = f'_{_gencount}_item'
|
||||||
symtext = ' '.join(symbols)
|
symtext = ' '.join(symbols)
|
||||||
|
|
||||||
|
_name_aliases[name] = symbols
|
||||||
|
|
||||||
productions = [ ]
|
productions = [ ]
|
||||||
_ = _decorator
|
_ = _decorator
|
||||||
|
|
||||||
@ -1661,14 +1687,9 @@ def _generate_repeat_rules(symbols):
|
|||||||
productions.extend(_collect_grammar_rules(many))
|
productions.extend(_collect_grammar_rules(many))
|
||||||
productions.extend(_collect_grammar_rules(many2))
|
productions.extend(_collect_grammar_rules(many2))
|
||||||
|
|
||||||
utuple = namedtuple('syms', _unique_names(symbols))
|
|
||||||
|
|
||||||
@_(f'{iname} : {symtext}')
|
@_(f'{iname} : {symtext}')
|
||||||
def item(self, p):
|
def item(self, p):
|
||||||
if len(p) == 1:
|
return tuple(p)
|
||||||
return p[0]
|
|
||||||
else:
|
|
||||||
return utuple(*p)
|
|
||||||
|
|
||||||
productions.extend(_collect_grammar_rules(item))
|
productions.extend(_collect_grammar_rules(item))
|
||||||
return name, productions
|
return name, productions
|
||||||
@ -1691,21 +1712,20 @@ def _generate_optional_rules(symbols):
|
|||||||
name = f'_{_gencount}_optional'
|
name = f'_{_gencount}_optional'
|
||||||
symtext = ' '.join(symbols)
|
symtext = ' '.join(symbols)
|
||||||
|
|
||||||
|
_name_aliases[name] = symbols
|
||||||
|
|
||||||
productions = [ ]
|
productions = [ ]
|
||||||
_ = _decorator
|
_ = _decorator
|
||||||
|
|
||||||
utuple = namedtuple('syms', _unique_names(symbols))
|
no_values = (None,) * len(symbols)
|
||||||
|
|
||||||
@_(f'{name} : {symtext}')
|
@_(f'{name} : {symtext}')
|
||||||
def optional(self, p):
|
def optional(self, p):
|
||||||
if len(p) == 1:
|
return tuple(p)
|
||||||
return p[0]
|
|
||||||
else:
|
|
||||||
return utuple(*p)
|
|
||||||
|
|
||||||
@_(f'{name} : ')
|
@_(f'{name} : ')
|
||||||
def optional2(self, p):
|
def optional2(self, p):
|
||||||
return None
|
return no_values
|
||||||
|
|
||||||
productions.extend(_collect_grammar_rules(optional))
|
productions.extend(_collect_grammar_rules(optional))
|
||||||
productions.extend(_collect_grammar_rules(optional2))
|
productions.extend(_collect_grammar_rules(optional2))
|
||||||
|
@ -56,11 +56,11 @@ class CalcParser(Parser):
|
|||||||
|
|
||||||
@_('ID "(" [ arglist ] ")"')
|
@_('ID "(" [ arglist ] ")"')
|
||||||
def statement(self, p):
|
def statement(self, p):
|
||||||
return (p.ID, p[2])
|
return (p.ID, p.arglist)
|
||||||
|
|
||||||
@_('expr { COMMA expr }')
|
@_('expr { COMMA expr }')
|
||||||
def arglist(self, p):
|
def arglist(self, p):
|
||||||
return [p.expr, *[e.expr for e in p[1]]]
|
return [p.expr0, *p.expr1]
|
||||||
|
|
||||||
@_('expr')
|
@_('expr')
|
||||||
def statement(self, p):
|
def statement(self, p):
|
||||||
|
Loading…
Reference in New Issue
Block a user