Refinement of EBNF extensions

2020-03-07 06:28:19 -06:00
parent a2cdf52d0f
commit 39ffd0361a
4 changed files with 92 additions and 39 deletions
--- a/29
+++ b/29
@@ -6,26 +6,27 @@ Version 0.5

               @('expr { COMMA expr }')
               def exprlist(p):
-                   return [ p.expr ] + [e.expr for e in p[1]]
+                   return [ p.expr0 ] + p.expr1

           In this code, the { ... } means zero-or-more repetitions.
-           It produces a list of matches that must be accessed by
-           position index (p[1] in this example. p[0] is 'expr').
-	   The elements of the list are named tuples with attribute
-           names that match the enclosed grammar symbols (e.g., e.expr
-           in the example).
+           It turns all symbols inside into lists.  So, instead of
+           representing a single value, p.expr1 is now a list of 
+           values.  

           An optional value can be enclosed in brackets like this:

-              @('NAME LPAREN [ exprlist ] RPAREN')
-              def function_call(p):
-                  args = p[2] if p[2] else []
-                  name = p.NAME
-                  print('Calling:', name, args)
+              @('VAR NAME [ EQUAL expr ] SEMI')
+              def variable_declaration(p):
+                  print(f"Definining {p.NAME}. Initial value={p.expr}")

-           In this case, p[2] contains the optional value.  If not present,
-           the value is None.  If present, it is a tuple of values 
-           or a single value (if only one symbol). 
+           In this case, all symbols inside [ ... ] either have a value
+           if present or are assigned to None if missing.
+
+           In both cases, you continue to use the same name indexing
+           scheme used by the rest of SLY.  For example, in the first
+           example above, you use "expr0" and "expr1" to refer to the
+           different "expr" symbols since that name appears in more
+           than one place.

 Version 0.4
 -----------
--- a/docs/sly.rst
+++ b/docs/sly.rst
@@ -871,6 +871,38 @@ string. However,writing an "empty" rule and using "empty" to denote an
 empty production may be easier to read and more clearly state your
 intention.

+EBNF Features (Optionals and Repeats)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Certain grammar features occur with some frequency.  For example, suppose you want to
+have an optional item as shown in the previous section.  An alternate way to specify
+it is to enclose one more more symbols in [ ] like this::
+
+    @_('[ item ] grok')
+    def spam(self, p):
+        if p.item is not None:
+             print("item was given and has value", p.item)
+	else:
+             print("item was not given"
+
+    @_('whatever')
+    def item(self, p):
+        ...
+
+In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied.
+Otherwise, it will have the value returned by the ``item`` rule below.
+
+You can also encode repetitions.  For example, a common construction is a 
+list of comma separated expressions.  To parse that, you could write::
+
+    @_('expr { COMMA expr }')
+    def exprlist(self, p):
+        return [p.expr0] + p.expr1
+
+In this example, the ``{ COMMA expr }`` represents zero or more repetitions
+of a rule.  The value of all symbols inside is now a list.  So, ``p.expr``
+is a list of all expressions matched.   
+
 Dealing With Ambiguous Grammars
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

--- a/sly/yacc.py
+++ b/sly/yacc.py
@@ -33,7 +33,7 @@

 import sys
 import inspect
-from collections import OrderedDict, defaultdict, namedtuple
+from collections import OrderedDict, defaultdict, Counter

 __all__        = [ 'Parser' ]

@@ -145,7 +145,7 @@ class YaccProduction:

    def __getattr__(self, name):
        if name in self._namemap:
-            return self._slice[self._namemap[name]].value
+            return self._namemap[name](self._slice)
        else:
            nameset = '{' + ', '.join(self._namemap) + '}'
            raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
@@ -209,16 +209,36 @@ class Production(object):
            if s not in self.usyms:
                self.usyms.append(s)

-        # Create a dict mapping symbol names to indices
-        m = {}
-        for key, indices in symmap.items():
-            if len(indices) == 1:
-                m[key] = indices[0]
-            else:
-                for n, index in enumerate(indices):
-                    m[key+str(n)] = index
+        # Create a name mapping
+        # First determine (in advance) if there are duplicate names
+        namecount = defaultdict(int)
+        for key in self.prod:
+            namecount[key] += 1
+            if key in _name_aliases:
+                for key in _name_aliases[key]:
+                    namecount[key] += 1

-        self.namemap = m
+        # Now, walk through the names and generate accessor functions
+        nameuse = defaultdict(int)
+        namemap = { }
+        for index, key in enumerate(self.prod):
+            if namecount[key] > 1:
+                k = f'{key}{nameuse[key]}'
+                nameuse[key] += 1
+            else:
+                k = key
+            namemap[k] = lambda s,i=index: s[i].value
+            if key in _name_aliases:
+                for n, alias in enumerate(_name_aliases[key]):
+                    if namecount[alias] > 1:
+                        k = f'{alias}{nameuse[alias]}'
+                        nameuse[alias] += 1
+                    else:
+                        k = alias
+                    # The value is either a list (for repetition) or a tuple for optional 
+                    namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n]
+
+        self.namemap = namemap
                
        # List of all LR items for the production
        self.lr_items = []
@@ -1606,6 +1626,10 @@ def _unique_names(names):
            indices[name] += 1
    return newnames

+# Dictionary mapping name aliases generated by EBNF rules.
+
+_name_aliases = { }
+
 def _generate_repeat_rules(symbols):
    '''
    Symbols is a list of grammar symbols [ symbols ]. This
@@ -1635,6 +1659,8 @@ def _generate_repeat_rules(symbols):
    iname = f'_{_gencount}_item'
    symtext = ' '.join(symbols)

+    _name_aliases[name] = symbols
+
    productions = [ ]
    _ = _decorator

@@ -1661,14 +1687,9 @@ def _generate_repeat_rules(symbols):
    productions.extend(_collect_grammar_rules(many))
    productions.extend(_collect_grammar_rules(many2))

-    utuple = namedtuple('syms', _unique_names(symbols))
-
    @_(f'{iname} : {symtext}')
    def item(self, p):
-        if len(p) == 1:
-            return p[0]
-        else:
-            return utuple(*p)
+        return tuple(p)

    productions.extend(_collect_grammar_rules(item))
    return name, productions
@@ -1691,21 +1712,20 @@ def _generate_optional_rules(symbols):
    name = f'_{_gencount}_optional'
    symtext = ' '.join(symbols)
    
+    _name_aliases[name] = symbols
+
    productions = [ ]
    _ = _decorator

-    utuple = namedtuple('syms', _unique_names(symbols))
+    no_values = (None,) * len(symbols)

    @_(f'{name} : {symtext}')
    def optional(self, p):
-        if len(p) == 1:
-            return p[0]
-        else:
-            return utuple(*p)
+        return tuple(p)

    @_(f'{name} : ')
    def optional2(self, p):
-        return None
+        return no_values

    productions.extend(_collect_grammar_rules(optional))
    productions.extend(_collect_grammar_rules(optional2))
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -56,11 +56,11 @@ class CalcParser(Parser):

    @_('ID "(" [ arglist ] ")"')
    def statement(self, p):
-        return (p.ID, p[2])
+        return (p.ID, p.arglist)

    @_('expr { COMMA expr }')
    def arglist(self, p):
-        return [p.expr, *[e.expr for e in p[1]]]
+        return [p.expr0, *p.expr1]

    @_('expr')
    def statement(self, p):