diff --git a/tests/test_lex.py b/tests/test_lex.py new file mode 100644 index 0000000..d1aa218 --- /dev/null +++ b/tests/test_lex.py @@ -0,0 +1,93 @@ +import pytest +from sly import Lexer + +class CalcLexer(Lexer): + # Set of token names. This is always required + tokens = { + 'ID', + 'NUMBER', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'ASSIGN', + 'LT', + 'LE', + } + + literals = { '(', ')' } + + # String containing ignored characters between tokens + ignore = ' \t' + + # Regular expression rules for tokens + ID = r'[a-zA-Z_][a-zA-Z0-9_]*' + PLUS = r'\+' + MINUS = r'-' + TIMES = r'\*' + DIVIDE = r'/' + ASSIGN = r'=' + LE = r'<=' + LT = r'<' + + @_(r'\d+') + def NUMBER(self, t): + t.value = int(t.value) + return t + + # Ignored text + ignore_comment = r'\#.*' + + @_(r'\n+') + def newline(self, t): + self.lineno += t.value.count('\n') + + # Attached rule + def ID(self, t): + t.value = t.value.upper() + return t + + def error(self, value): + self.errors.append(value) + self.index += 1 + + def __init__(self): + self.errors = [] + +# Test basic recognition of various tokens and literals +def test_tokens(): + lexer = CalcLexer() + toks = list(lexer.tokenize('abc 123 + - * / = < <= ( )')) + types = [t.type for t in toks] + vals = [t.value for t in toks] + assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')'] + assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')'] + +# Test ignored comments and newlines +def test_ignored(): + lexer = CalcLexer() + toks = list(lexer.tokenize('\n\n# A comment\n123\nabc\n')) + types = [t.type for t in toks] + vals = [t.value for t in toks] + linenos = [t.lineno for t in toks] + assert types == ['NUMBER', 'ID'] + assert vals == [123, 'ABC'] + assert linenos == [4,5] + assert lexer.lineno == 6 + +# Test error handling +def test_error(): + lexer = CalcLexer() + toks = list(lexer.tokenize('123 :+-')) + types = [t.type for t in toks] + vals = [t.value for t in toks] + assert types == ['NUMBER', 'PLUS', 'MINUS'] + assert vals == [123, '+', '-'] + assert lexer.errors == [ ':+-' ] + + + + + + + diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..38db4fe --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,141 @@ +import pytest +from sly import Lexer, Parser + +class CalcLexer(Lexer): + # Set of token names. This is always required + tokens = { + 'ID', + 'NUMBER', + 'PLUS', + 'MINUS', + 'TIMES', + 'DIVIDE', + 'ASSIGN', + } + + literals = { '(', ')' } + + # String containing ignored characters between tokens + ignore = ' \t' + + # Regular expression rules for tokens + ID = r'[a-zA-Z_][a-zA-Z0-9_]*' + PLUS = r'\+' + MINUS = r'-' + TIMES = r'\*' + DIVIDE = r'/' + ASSIGN = r'=' + + @_(r'\d+') + def NUMBER(self, t): + t.value = int(t.value) + return t + + # Ignored text + ignore_comment = r'\#.*' + + @_(r'\n+') + def newline(self, t): + self.lineno += t.value.count('\n') + + def error(self, value): + self.errors.append(value) + self.index += 1 + + def __init__(self): + self.errors = [] + +class CalcParser(Parser): + tokens = CalcLexer.tokens + + precedence = ( + ('left', 'PLUS', 'MINUS'), + ('left', 'TIMES', 'DIVIDE'), + ('right', 'UMINUS'), + ) + + def __init__(self): + self.names = { } + self.errors = [ ] + + @_('ID ASSIGN expr') + def statement(self, p): + self.names[p.ID] = p.expr + + @_('expr') + def statement(self, p): + return p.expr + + @_('expr PLUS expr') + def expr(self, p): + return p.expr0 + p.expr1 + + @_('expr MINUS expr') + def expr(self, p): + return p.expr0 - p.expr1 + + @_('expr TIMES expr') + def expr(self, p): + return p.expr0 * p.expr1 + + @_('expr DIVIDE expr') + def expr(self, p): + return p.expr0 / p.expr1 + + @_('MINUS expr %prec UMINUS') + def expr(self, p): + return -p.expr + + @_('"(" expr ")"') + def expr(self, p): + return p.expr + + @_('NUMBER') + def expr(self, p): + return p.NUMBER + + @_('ID') + def expr(self, p): + try: + return self.names[p.ID] + except LookupError: + self.errors.append(('undefined', p.ID)) + return 0 + + def error(self, tok): + self.errors.append(tok) + + +# Test basic recognition of various tokens and literals +def test_simple(): + lexer = CalcLexer() + parser = CalcParser() + + result = parser.parse(lexer.tokenize('a = 3 + 4 * (5 + 6)')) + assert result == None + assert parser.names['a'] == 47 + + result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)')) + assert result == 47 + +def test_parse_error(): + lexer = CalcLexer() + parser = CalcParser() + + result = parser.parse(lexer.tokenize('a 123 4 + 5')) + assert result == 9 + assert len(parser.errors) == 1 + assert parser.errors[0].type == 'NUMBER' + assert parser.errors[0].value == 123 + +# TO DO: Add tests +# - error productions +# - embedded actions +# - lineno tracking +# - various error cases caught during parser construction + + + + + +