Added EBNF choice handling
This commit is contained in:
parent
ab75a58b10
commit
1f87ddaf39
18
CHANGES
18
CHANGES
@ -1,5 +1,19 @@
|
||||
Version 0.4
|
||||
-----------
|
||||
05/09/2020 Experimental support for EBNF choices. For example:
|
||||
|
||||
@('term { PLUS|MINUS term }')
|
||||
def expr(self, p):
|
||||
lterm = p.pterm0
|
||||
for op, rterm in p[1]:
|
||||
lterm = BinOp(op, lterm, rterm)
|
||||
|
||||
One issue here is just how one refers to the choice
|
||||
of values. There is no unified name to pick. So,
|
||||
you basically have to do it using a numeric index like p[1].
|
||||
In this case, p[1] is a list of all of the repeated items
|
||||
(represented as tuples).
|
||||
|
||||
05/09/2020 Changed the internal names used for EBNF rules to make them
|
||||
a bit easier to debug in the parser.out file.
|
||||
|
||||
@ -8,7 +22,7 @@ Version 0.4
|
||||
expression list:
|
||||
|
||||
@('expr { COMMA expr }')
|
||||
def exprlist(p):
|
||||
def exprlist(self, p):
|
||||
return [ p.expr0 ] + p.expr1
|
||||
|
||||
In this code, the { ... } means zero-or-more repetitions.
|
||||
@ -19,7 +33,7 @@ Version 0.4
|
||||
An optional value can be enclosed in brackets like this:
|
||||
|
||||
@('VAR NAME [ EQUAL expr ] SEMI')
|
||||
def variable_declaration(p):
|
||||
def variable_declaration(self, p):
|
||||
print(f"Definining {p.NAME}. Initial value={p.expr}")
|
||||
|
||||
In this case, all symbols inside [ ... ] either have a value
|
||||
|
@ -3,7 +3,7 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
import sys
|
||||
sys.path.append('../..')
|
||||
sys.path.insert(0, '../..')
|
||||
|
||||
from sly import Lexer, Parser
|
||||
|
||||
|
101
example/calc_ebnf/calc.py
Normal file
101
example/calc_ebnf/calc.py
Normal file
@ -0,0 +1,101 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# calc.py
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
import sys
|
||||
sys.path.insert(0, '../..')
|
||||
|
||||
from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, ASSIGN, LPAREN, RPAREN }
|
||||
ignore = ' \t'
|
||||
|
||||
# Tokens
|
||||
NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
NUMBER = r'\d+'
|
||||
|
||||
# Special symbols
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
ASSIGN = r'='
|
||||
LPAREN = r'\('
|
||||
RPAREN = r'\)'
|
||||
|
||||
# Ignored pattern
|
||||
ignore_newline = r'\n+'
|
||||
|
||||
# Extra action for newlines
|
||||
def ignore_newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def error(self, t):
|
||||
print("Illegal character '%s'" % t.value[0])
|
||||
self.index += 1
|
||||
|
||||
class CalcParser(Parser):
|
||||
tokens = CalcLexer.tokens
|
||||
|
||||
def __init__(self):
|
||||
self.names = { }
|
||||
|
||||
@_('NAME ASSIGN expr')
|
||||
def statement(self, p):
|
||||
self.names[p.NAME] = p.expr
|
||||
|
||||
@_('expr')
|
||||
def statement(self, p):
|
||||
print(p.expr)
|
||||
|
||||
@_('term { PLUS|MINUS term }')
|
||||
def expr(self, p):
|
||||
lval = p.term0
|
||||
for op, rval in p[1]:
|
||||
if op == '+':
|
||||
lval = lval + rval
|
||||
elif op == '-':
|
||||
lval = lval - rval
|
||||
return lval
|
||||
|
||||
@_('factor { TIMES|DIVIDE factor }')
|
||||
def term(self, p):
|
||||
lval = p.factor0
|
||||
for op, rval in p[1]:
|
||||
if op == '*':
|
||||
lval = lval * rval
|
||||
elif op == '/':
|
||||
lval = lval / rval
|
||||
return lval
|
||||
|
||||
@_('MINUS factor')
|
||||
def factor(self, p):
|
||||
return -p.factor
|
||||
|
||||
@_('LPAREN expr RPAREN')
|
||||
def factor(self, p):
|
||||
return p.expr
|
||||
|
||||
@_('NUMBER')
|
||||
def factor(self, p):
|
||||
return int(p.NUMBER)
|
||||
|
||||
@_('NAME')
|
||||
def factor(self, p):
|
||||
try:
|
||||
return self.names[p.NAME]
|
||||
except LookupError:
|
||||
print(f'Undefined name {p.NAME!r}')
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
while True:
|
||||
try:
|
||||
text = input('calc > ')
|
||||
except EOFError:
|
||||
break
|
||||
if text:
|
||||
parser.parse(lexer.tokenize(text))
|
55
sly/yacc.py
55
sly/yacc.py
@ -1582,6 +1582,10 @@ def _collect_grammar_rules(func):
|
||||
syms, prod = _replace_ebnf_repeat(syms)
|
||||
ebnf_prod.extend(prod)
|
||||
break
|
||||
elif '|' in s:
|
||||
syms, prod = _replace_ebnf_choice(syms)
|
||||
ebnf_prod.extend(prod)
|
||||
break
|
||||
|
||||
if syms[1:2] == [':'] or syms[1:2] == ['::=']:
|
||||
grammar.append((func, filename, lineno, syms[0], syms[2:]))
|
||||
@ -1598,9 +1602,17 @@ def _replace_ebnf_repeat(syms):
|
||||
syms = list(syms)
|
||||
first = syms.index('{')
|
||||
end = syms.index('}', first)
|
||||
symname, prods = _generate_repeat_rules(syms[first+1:end])
|
||||
|
||||
# Look for choices inside
|
||||
repeated_syms = syms[first+1:end]
|
||||
if any('|' in sym for sym in repeated_syms):
|
||||
repeated_syms, prods = _replace_ebnf_choice(repeated_syms)
|
||||
else:
|
||||
prods = []
|
||||
|
||||
symname, moreprods = _generate_repeat_rules(repeated_syms)
|
||||
syms[first:end+1] = [symname]
|
||||
return syms, prods
|
||||
return syms, prods + moreprods
|
||||
|
||||
def _replace_ebnf_optional(syms):
|
||||
syms = list(syms)
|
||||
@ -1609,7 +1621,19 @@ def _replace_ebnf_optional(syms):
|
||||
symname, prods = _generate_optional_rules(syms[first+1:end])
|
||||
syms[first:end+1] = [symname]
|
||||
return syms, prods
|
||||
|
||||
|
||||
def _replace_ebnf_choice(syms):
|
||||
syms = list(syms)
|
||||
newprods = [ ]
|
||||
n = 0
|
||||
while n < len(syms):
|
||||
if '|' in syms[n]:
|
||||
symname, prods = _generate_choice_rules(syms[n].split('|'))
|
||||
syms[n] = symname
|
||||
newprods.extend(prods)
|
||||
n += 1
|
||||
return syms, newprods
|
||||
|
||||
# Generate grammar rules for repeated items
|
||||
_gencount = 0
|
||||
|
||||
@ -1728,6 +1752,31 @@ def _generate_optional_rules(symbols):
|
||||
productions.extend(_collect_grammar_rules(optional))
|
||||
productions.extend(_collect_grammar_rules(optional2))
|
||||
return name, productions
|
||||
|
||||
def _generate_choice_rules(symbols):
|
||||
'''
|
||||
Symbols is a list of grammar symbols such as [ 'PLUS', 'MINUS' ].
|
||||
This generates code corresponding to the following construction:
|
||||
|
||||
@('PLUS', 'MINUS')
|
||||
def choice(self, p):
|
||||
return p[0]
|
||||
'''
|
||||
global _gencount
|
||||
_gencount += 1
|
||||
basename = f'_{_gencount}_' + '_'.join(_sanitize_symbols(symbols))
|
||||
name = f'{basename}_choice'
|
||||
|
||||
_ = _decorator
|
||||
productions = [ ]
|
||||
|
||||
|
||||
def choice(self, p):
|
||||
return p[0]
|
||||
choice.__name__ = name
|
||||
choice = _(*symbols)(choice)
|
||||
productions.extend(_collect_grammar_rules(choice))
|
||||
return name, productions
|
||||
|
||||
class ParserMetaDict(dict):
|
||||
'''
|
||||
|
152
tests/test_ebnf.py
Normal file
152
tests/test_ebnf.py
Normal file
@ -0,0 +1,152 @@
|
||||
import pytest
|
||||
from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, COMMA }
|
||||
literals = { '(', ')' }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
ignore = ' \t'
|
||||
|
||||
# Regular expression rules for tokens
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
ASSIGN = r'='
|
||||
COMMA = r','
|
||||
|
||||
@_(r'\d+')
|
||||
def NUMBER(self, t):
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
# Ignored text
|
||||
ignore_comment = r'\#.*'
|
||||
|
||||
@_(r'\n+')
|
||||
def newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def error(self, t):
|
||||
self.errors.append(t.value[0])
|
||||
self.index += 1
|
||||
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
|
||||
|
||||
class CalcParser(Parser):
|
||||
tokens = CalcLexer.tokens
|
||||
|
||||
def __init__(self):
|
||||
self.names = { }
|
||||
self.errors = [ ]
|
||||
|
||||
@_('ID ASSIGN expr')
|
||||
def statement(self, p):
|
||||
self.names[p.ID] = p.expr
|
||||
|
||||
@_('ID "(" [ arglist ] ")"')
|
||||
def statement(self, p):
|
||||
return (p.ID, p.arglist)
|
||||
|
||||
@_('expr { COMMA expr }')
|
||||
def arglist(self, p):
|
||||
return [p.expr0, *p.expr1]
|
||||
|
||||
@_('expr')
|
||||
def statement(self, p):
|
||||
return p.expr
|
||||
|
||||
@_('term { PLUS|MINUS term }')
|
||||
def expr(self, p):
|
||||
lval = p.term0
|
||||
for op, rval in p[1]:
|
||||
if op == '+':
|
||||
lval = lval + rval
|
||||
elif op == '-':
|
||||
lval = lval - rval
|
||||
return lval
|
||||
|
||||
@_('factor { TIMES|DIVIDE factor }')
|
||||
def term(self, p):
|
||||
lval = p.factor0
|
||||
for op, rval in p[1]:
|
||||
if op == '*':
|
||||
lval = lval * rval
|
||||
elif op == '/':
|
||||
lval = lval / rval
|
||||
return lval
|
||||
|
||||
@_('MINUS factor')
|
||||
def factor(self, p):
|
||||
return -p.factor
|
||||
|
||||
@_("'(' expr ')'")
|
||||
def factor(self, p):
|
||||
return p.expr
|
||||
|
||||
@_('NUMBER')
|
||||
def factor(self, p):
|
||||
return int(p.NUMBER)
|
||||
|
||||
@_('ID')
|
||||
def factor(self, p):
|
||||
try:
|
||||
return self.names[p.ID]
|
||||
except LookupError:
|
||||
print(f'Undefined name {p.ID!r}')
|
||||
return 0
|
||||
|
||||
def error(self, tok):
|
||||
self.errors.append(tok)
|
||||
|
||||
|
||||
# Test basic recognition of various tokens and literals
|
||||
def test_simple():
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
|
||||
result = parser.parse(lexer.tokenize('a = 3 + 4 * (5 + 6)'))
|
||||
assert result == None
|
||||
assert parser.names['a'] == 47
|
||||
|
||||
result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)'))
|
||||
assert result == 47
|
||||
|
||||
def test_ebnf():
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
result = parser.parse(lexer.tokenize('a()'))
|
||||
assert result == ('a', None)
|
||||
|
||||
result = parser.parse(lexer.tokenize('a(2+3)'))
|
||||
assert result == ('a', [5])
|
||||
|
||||
result = parser.parse(lexer.tokenize('a(2+3, 4+5)'))
|
||||
assert result == ('a', [5, 9])
|
||||
|
||||
def test_parse_error():
|
||||
lexer = CalcLexer()
|
||||
parser = CalcParser()
|
||||
|
||||
result = parser.parse(lexer.tokenize('a 123 4 + 5'))
|
||||
assert result == 9
|
||||
assert len(parser.errors) == 1
|
||||
assert parser.errors[0].type == 'NUMBER'
|
||||
assert parser.errors[0].value == 123
|
||||
|
||||
# TO DO: Add tests
|
||||
# - error productions
|
||||
# - embedded actions
|
||||
# - lineno tracking
|
||||
# - various error cases caught during parser construction
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user