diff --git a/CHANGES b/CHANGES index 46bf92a..63c8f7e 100644 --- a/CHANGES +++ b/CHANGES @@ -6,26 +6,27 @@ Version 0.5 @('expr { COMMA expr }') def exprlist(p): - return [ p.expr ] + [e.expr for e in p[1]] + return [ p.expr0 ] + p.expr1 In this code, the { ... } means zero-or-more repetitions. - It produces a list of matches that must be accessed by - position index (p[1] in this example. p[0] is 'expr'). - The elements of the list are named tuples with attribute - names that match the enclosed grammar symbols (e.g., e.expr - in the example). + It turns all symbols inside into lists. So, instead of + representing a single value, p.expr1 is now a list of + values. An optional value can be enclosed in brackets like this: - @('NAME LPAREN [ exprlist ] RPAREN') - def function_call(p): - args = p[2] if p[2] else [] - name = p.NAME - print('Calling:', name, args) + @('VAR NAME [ EQUAL expr ] SEMI') + def variable_declaration(p): + print(f"Definining {p.NAME}. Initial value={p.expr}") - In this case, p[2] contains the optional value. If not present, - the value is None. If present, it is a tuple of values - or a single value (if only one symbol). + In this case, all symbols inside [ ... ] either have a value + if present or are assigned to None if missing. + + In both cases, you continue to use the same name indexing + scheme used by the rest of SLY. For example, in the first + example above, you use "expr0" and "expr1" to refer to the + different "expr" symbols since that name appears in more + than one place. Version 0.4 ----------- diff --git a/docs/sly.rst b/docs/sly.rst index 8ee47a0..30e9e3e 100644 --- a/docs/sly.rst +++ b/docs/sly.rst @@ -871,6 +871,38 @@ string. However,writing an "empty" rule and using "empty" to denote an empty production may be easier to read and more clearly state your intention. +EBNF Features (Optionals and Repeats) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Certain grammar features occur with some frequency. For example, suppose you want to +have an optional item as shown in the previous section. An alternate way to specify +it is to enclose one more more symbols in [ ] like this:: + + @_('[ item ] grok') + def spam(self, p): + if p.item is not None: + print("item was given and has value", p.item) + else: + print("item was not given" + + @_('whatever') + def item(self, p): + ... + +In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied. +Otherwise, it will have the value returned by the ``item`` rule below. + +You can also encode repetitions. For example, a common construction is a +list of comma separated expressions. To parse that, you could write:: + + @_('expr { COMMA expr }') + def exprlist(self, p): + return [p.expr0] + p.expr1 + +In this example, the ``{ COMMA expr }`` represents zero or more repetitions +of a rule. The value of all symbols inside is now a list. So, ``p.expr`` +is a list of all expressions matched. + Dealing With Ambiguous Grammars ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sly/yacc.py b/sly/yacc.py index 00a9c8d..1bcf7f3 100644 --- a/sly/yacc.py +++ b/sly/yacc.py @@ -33,7 +33,7 @@ import sys import inspect -from collections import OrderedDict, defaultdict, namedtuple +from collections import OrderedDict, defaultdict, Counter __all__ = [ 'Parser' ] @@ -145,7 +145,7 @@ class YaccProduction: def __getattr__(self, name): if name in self._namemap: - return self._slice[self._namemap[name]].value + return self._namemap[name](self._slice) else: nameset = '{' + ', '.join(self._namemap) + '}' raise AttributeError(f'No symbol {name}. Must be one of {nameset}.') @@ -209,16 +209,36 @@ class Production(object): if s not in self.usyms: self.usyms.append(s) - # Create a dict mapping symbol names to indices - m = {} - for key, indices in symmap.items(): - if len(indices) == 1: - m[key] = indices[0] - else: - for n, index in enumerate(indices): - m[key+str(n)] = index + # Create a name mapping + # First determine (in advance) if there are duplicate names + namecount = defaultdict(int) + for key in self.prod: + namecount[key] += 1 + if key in _name_aliases: + for key in _name_aliases[key]: + namecount[key] += 1 - self.namemap = m + # Now, walk through the names and generate accessor functions + nameuse = defaultdict(int) + namemap = { } + for index, key in enumerate(self.prod): + if namecount[key] > 1: + k = f'{key}{nameuse[key]}' + nameuse[key] += 1 + else: + k = key + namemap[k] = lambda s,i=index: s[i].value + if key in _name_aliases: + for n, alias in enumerate(_name_aliases[key]): + if namecount[alias] > 1: + k = f'{alias}{nameuse[alias]}' + nameuse[alias] += 1 + else: + k = alias + # The value is either a list (for repetition) or a tuple for optional + namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n] + + self.namemap = namemap # List of all LR items for the production self.lr_items = [] @@ -1606,6 +1626,10 @@ def _unique_names(names): indices[name] += 1 return newnames +# Dictionary mapping name aliases generated by EBNF rules. + +_name_aliases = { } + def _generate_repeat_rules(symbols): ''' Symbols is a list of grammar symbols [ symbols ]. This @@ -1635,6 +1659,8 @@ def _generate_repeat_rules(symbols): iname = f'_{_gencount}_item' symtext = ' '.join(symbols) + _name_aliases[name] = symbols + productions = [ ] _ = _decorator @@ -1661,14 +1687,9 @@ def _generate_repeat_rules(symbols): productions.extend(_collect_grammar_rules(many)) productions.extend(_collect_grammar_rules(many2)) - utuple = namedtuple('syms', _unique_names(symbols)) - @_(f'{iname} : {symtext}') def item(self, p): - if len(p) == 1: - return p[0] - else: - return utuple(*p) + return tuple(p) productions.extend(_collect_grammar_rules(item)) return name, productions @@ -1691,21 +1712,20 @@ def _generate_optional_rules(symbols): name = f'_{_gencount}_optional' symtext = ' '.join(symbols) + _name_aliases[name] = symbols + productions = [ ] _ = _decorator - utuple = namedtuple('syms', _unique_names(symbols)) + no_values = (None,) * len(symbols) @_(f'{name} : {symtext}') def optional(self, p): - if len(p) == 1: - return p[0] - else: - return utuple(*p) + return tuple(p) @_(f'{name} : ') def optional2(self, p): - return None + return no_values productions.extend(_collect_grammar_rules(optional)) productions.extend(_collect_grammar_rules(optional2)) diff --git a/tests/test_parser.py b/tests/test_parser.py index 5751666..f2a6e85 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -56,11 +56,11 @@ class CalcParser(Parser): @_('ID "(" [ arglist ] ")"') def statement(self, p): - return (p.ID, p[2]) + return (p.ID, p.arglist) @_('expr { COMMA expr }') def arglist(self, p): - return [p.expr, *[e.expr for e in p[1]]] + return [p.expr0, *p.expr1] @_('expr') def statement(self, p):