sly/tests/test_parser.py

154 lines
3.3 KiB
Python
Raw Permalink Normal View History

2016-09-14 22:11:26 +02:00
import pytest
from sly import Lexer, Parser
class CalcLexer(Lexer):
# Set of token names. This is always required
2020-03-07 03:58:48 +01:00
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, COMMA }
2016-09-14 22:11:26 +02:00
literals = { '(', ')' }
# String containing ignored characters between tokens
ignore = ' \t'
# Regular expression rules for tokens
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
PLUS = r'\+'
MINUS = r'-'
TIMES = r'\*'
DIVIDE = r'/'
ASSIGN = r'='
2020-03-07 03:58:48 +01:00
COMMA = r','
2016-09-14 22:11:26 +02:00
@_(r'\d+')
def NUMBER(self, t):
t.value = int(t.value)
return t
# Ignored text
ignore_comment = r'\#.*'
@_(r'\n+')
def newline(self, t):
self.lineno += t.value.count('\n')
def error(self, t):
self.errors.append(t.value[0])
2016-09-14 22:11:26 +02:00
self.index += 1
def __init__(self):
self.errors = []
class CalcParser(Parser):
tokens = CalcLexer.tokens
precedence = (
('left', PLUS, MINUS),
('left', TIMES, DIVIDE),
('right', UMINUS),
2016-09-14 22:11:26 +02:00
)
def __init__(self):
self.names = { }
self.errors = [ ]
@_('ID ASSIGN expr')
def statement(self, p):
self.names[p.ID] = p.expr
2020-03-07 03:58:48 +01:00
@_('ID "(" [ arglist ] ")"')
def statement(self, p):
2020-03-07 13:28:19 +01:00
return (p.ID, p.arglist)
2020-03-07 03:58:48 +01:00
@_('expr { COMMA expr }')
def arglist(self, p):
2020-03-07 13:28:19 +01:00
return [p.expr0, *p.expr1]
2020-03-07 03:58:48 +01:00
2016-09-14 22:11:26 +02:00
@_('expr')
def statement(self, p):
return p.expr
@_('expr PLUS expr')
def expr(self, p):
return p.expr0 + p.expr1
@_('expr MINUS expr')
def expr(self, p):
return p.expr0 - p.expr1
@_('expr TIMES expr')
def expr(self, p):
return p.expr0 * p.expr1
@_('expr DIVIDE expr')
def expr(self, p):
return p.expr0 / p.expr1
@_('MINUS expr %prec UMINUS')
def expr(self, p):
return -p.expr
@_('"(" expr ")"')
def expr(self, p):
return p.expr
@_('NUMBER')
def expr(self, p):
return p.NUMBER
@_('ID')
def expr(self, p):
try:
return self.names[p.ID]
except LookupError:
self.errors.append(('undefined', p.ID))
return 0
def error(self, tok):
self.errors.append(tok)
# Test basic recognition of various tokens and literals
def test_simple():
lexer = CalcLexer()
parser = CalcParser()
result = parser.parse(lexer.tokenize('a = 3 + 4 * (5 + 6)'))
assert result == None
assert parser.names['a'] == 47
result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)'))
assert result == 47
2020-03-07 03:58:48 +01:00
def test_ebnf():
lexer = CalcLexer()
parser = CalcParser()
result = parser.parse(lexer.tokenize('a()'))
assert result == ('a', None)
result = parser.parse(lexer.tokenize('a(2+3)'))
assert result == ('a', [5])
result = parser.parse(lexer.tokenize('a(2+3, 4+5)'))
assert result == ('a', [5, 9])
2016-09-14 22:11:26 +02:00
def test_parse_error():
lexer = CalcLexer()
parser = CalcParser()
result = parser.parse(lexer.tokenize('a 123 4 + 5'))
assert result == 9
assert len(parser.errors) == 1
assert parser.errors[0].type == 'NUMBER'
assert parser.errors[0].value == 123
# TO DO: Add tests
# - error productions
# - embedded actions
# - lineno tracking
# - various error cases caught during parser construction