From b088d9b2cef949278dc63083b2375265f21eb3a4 Mon Sep 17 00:00:00 2001
From: David Beazley <dave@dabeaz.com>
Date: Sat, 27 Jan 2018 15:27:15 -0600
Subject: [PATCH] Changes to token specification. More metamagic

---
 CHANGES                   |  59 ++++++++++++++
 README.txt                |   8 +-
 docs/index.rst            |   4 +-
 docs/sly.rst              | 165 +++++++++++++++++++-------------------
 example/calc/calc.py      |  14 +---
 example/calc_prec/calc.py |   6 +-
 sly/lex.py                |  56 +++++++++++--
 sly/yacc.py               |  20 +++--
 tests/test_lex.py         |  91 ++++++++++++++++++++-
 tests/test_parser.py      |  21 ++---
 10 files changed, 302 insertions(+), 142 deletions(-)

diff --git a/CHANGES b/CHANGES
index 988aa6b..0b4e38c 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,64 @@
 Version 0.3
 -----------
+1/27/2018  Tokens no longer have to be specified as strings.   For example, you
+           can now write:
+
+           from sly import Lexer
+
+           class TheLexer(Lexer):
+               tokens = { ID, NUMBER, PLUS, MINUS }
+
+               ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
+               NUMBER = r'\d+'
+               PLUS = r'\+'
+               MINUS = r'-'
+
+           This convention also carries over to the parser for things such
+           as precedence specifiers:
+
+           from sly import Parser
+           class TheParser(Parser):
+                 tokens = TheLexer.tokens
+
+                 precedence = (
+                     ('left', PLUS, MINUS),
+                     ('left', TIMES, DIVIDE),
+                     ('right', UMINUS),
+                  )
+            ...
+
+           Nevermind the fact that ID, NUMBER, PLUS, and MINUS appear to be
+           undefined identifiers.  It all works. 
+
+1/27/2018  Tokens now allow special-case remapping.   For example:
+
+           from sly import Lexer
+
+           class TheLexer(Lexer):
+               tokens = { ID, IF, ELSE, WHILE, NUMBER, PLUS, MINUS }
+
+               ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
+               ID['if'] = IF
+               ID['else'] = ELSE
+               ID['while'] = WHILE
+
+               NUMBER = r'\d+'
+               PLUS = r'\+'
+               MINUS = r'-'
+       
+           In this code, the ID rule matches any identifier.  However,
+           special cases have been made for IF, ELSE, and WHILE tokens.
+           Previously, this had to be handled in a special action method 
+           such as this:
+
+               def ID(self, t):
+                   if t.value in { 'if', 'else', 'while' }:
+                       t.type = t.value.upper()
+                   return t
+
+           Nevermind the fact that the syntax appears to suggest that strings
+           work as a kind of mutable mapping.
+      
 1/16/2018  Usability improvement on Lexer class.  Regular expression rules
            specified as strings that don't match any name in tokens are
            now reported as errors.
diff --git a/README.txt b/README.txt
index b94c0ff..7007383 100644
--- a/README.txt
+++ b/README.txt
@@ -1,6 +1,6 @@
-SLY (Sly Lex-Yacc)                   Version 0.2
+SLY (Sly Lex-Yacc)                   Version 0.3
 
-Copyright (C) 2016-2017
+Copyright (C) 2016-2018
 David M. Beazley (Dabeaz LLC)
 All rights reserved.
 
@@ -85,9 +85,7 @@ expressions and store variables:
     from sly import Lexer, Parser
 
     class CalcLexer(Lexer):
-        tokens = {
-            'NAME', 'NUMBER',
-            }
+        tokens = { NAME, NUMBER }
         ignore = ' \t'
         literals = { '=', '+', '-', '*', '/', '(', ')' }
 
diff --git a/docs/index.rst b/docs/index.rst
index 29b43c3..d79a586 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -60,9 +60,7 @@ expressions and store variables::
     from sly import Lexer, Parser
 
     class CalcLexer(Lexer):
-        tokens = {
-            'NAME', 'NUMBER',
-            }
+        tokens = { NAME, NUMBER }
         ignore = ' \t'
         literals = { '=', '+', '-', '*', '/', '(', ')' }
 
diff --git a/docs/sly.rst b/docs/sly.rst
index dc6dd87..9509abd 100644
--- a/docs/sly.rst
+++ b/docs/sly.rst
@@ -68,17 +68,8 @@ lexer that tokenizes the above text::
 
     class CalcLexer(Lexer):
         # Set of token names.   This is always required
-        tokens = {
-            'ID',       
-            'NUMBER',
-            'PLUS',
-            'MINUS',
-            'TIMES',
-            'DIVIDE',
-            'ASSIGN',
-            'LPAREN',
-            'RPAREN',
-            }
+        tokens = { ID, NUMBER, PLUS, MINUS, TIMES, 
+                   DIVIDE, ASSIGN, LPAREN, RPAREN }
 
         # String containing ignored characters between tokens
         ignore = ' \t'
@@ -131,19 +122,12 @@ In the example, the following code specified the token names::
     class CalcLexer(Lexer):
         ...
         # Set of token names.   This is always required
-        tokens = {
-            'ID',
-            'NUMBER',
-            'PLUS',
-            'MINUS',
-            'TIMES',
-            'DIVIDE',
-            'ASSIGN',
-            'LPAREN',
-            'RPAREN',
-            }
+        tokens = { ID, NUMBER, PLUS, MINUS, TIMES, 
+                   DIVIDE, ASSIGN, LPAREN, RPAREN }
         ...
 
+Token names should be specified using all-caps as shown. 
+
 Specification of token match patterns
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -167,7 +151,7 @@ short tokens.  For example, if you wanted to have separate tokens for
 example::
 
     class MyLexer(Lexer):
-        tokens = {'ASSIGN', 'EQ', ...}
+        tokens = { ASSIGN, EQ, ...}
         ...
         EQ     = r'=='       # MUST APPEAR FIRST! (LONGER)
         ASSIGN = r'='
@@ -251,12 +235,10 @@ Instead of using the ``@_()`` decorator, you can also write a method
 that matches the same name as a token previously specified as a
 string. For example::
 
-    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
+    NUMBER = r'\d+'
     ...
-    def ID(self, t):
-        reserved = { 'if', 'else', 'while', 'for' }
-        if t.value in reserved:
-             t.type = t.value.upper()
+    def NUMBER(self, t):
+        t.value = int(t.value)
         return t
 
 This is potentially useful trick for debugging a lexer.  You can temporarily
@@ -264,6 +246,36 @@ attach a method a token and have it execute when the token is encountered.
 If you later take the method away, the lexer will revert back to its original
 behavior.
 
+Token Remapping
+^^^^^^^^^^^^^^^
+
+Occasionally, you might need to remap tokens based on special cases. 
+Consider the case of matching identifiers such as "abc", "python", or "guido".  
+Certain identifiers such as "if", "else", and "while" might need to be
+treated as special keywords.  To handle this, include token remapping rules when
+writing the lexer like this::
+
+    # calclex.py
+
+    from sly import Lexer
+
+    class CalcLexer(Lexer):
+        tokens = { ID, IF, ELSE, WHILE }
+        # String containing ignored characters (between tokens)
+        ignore = ' \t'
+
+        # Base ID rule
+        ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
+
+        # Special cases
+        ID['if'] = IF
+        ID['else'] = ELSE
+        ID['while'] = WHILE
+
+When parsing an identifier, the special cases will remap certain matching 
+values to a new token type.  For example, if the value of an identifier is
+"if" above, an ``IF`` token will be generated.
+
 Line numbers and position tracking
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -385,26 +397,11 @@ into practice::
     from sly import Lexer
  
     class CalcLexer(Lexer):
-        # Set of reserved names (language keywords)
-        reserved_words = { 'WHILE', 'IF', 'ELSE', 'PRINT' }
-
         # Set of token names.   This is always required
-        tokens = {
-            'NUMBER',
-            'ID',
-            'PLUS',
-            'MINUS',
-            'TIMES',
-            'DIVIDE',
-            'ASSIGN',
-            'EQ',
-            'LT',
-            'LE',
-            'GT',
-            'GE',
-            'NE',
-            *reserved_words,
-            } 
+        tokens = { NUMBER, ID, WHILE, IF, ELSE, PRINT,
+                   PLUS, MINUS, TIMES, DIVIDE, ASSIGN,
+                   EQ, LT, LE, GT, GE, NE }
+
 
         literals = { '(', ')', '{', '}', ';' }
 
@@ -429,12 +426,12 @@ into practice::
             t.value = int(t.value)
             return t
 
-        @_(r'[a-zA-Z_][a-zA-Z0-9_]*')
-        def ID(self, t):
-            # Check if name matches a reserved word (change token type if true)
-            if t.value.upper() in self.reserved_words:
-                t.type = t.value.upper()
-            return t
+        # Identifiers and keywords
+        ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
+        ID['if'] = IF
+        ID['else'] = ELSE
+        ID['while'] = WHILE
+        ID['print'] = PRINT
 
         ignore_comment = r'\#.*'
 
@@ -443,8 +440,8 @@ into practice::
         def ignore_newline(self, t):
             self.lineno += t.value.count('\n')
 
-        def error(self, value):
-            print('Line %d: Bad character %r' % (self.lineno, value[0]))
+        def error(self, t):
+            print('Line %d: Bad character %r' % (self.lineno, t.value[0]))
             self.index += 1
 
     if __name__ == '__main__':
@@ -462,27 +459,27 @@ into practice::
 
 If you run this code, you'll get output that looks like this::
 
-    Token(ID, 'x', 3, 12)
-    Token(ASSIGN, '=', 3, 14)
-    Token(NUMBER, 0, 3, 16)
-    Token(;, ';', 3, 17)
-    Token(WHILE, 'while', 4, 19)
-    Token((, '(', 4, 25)
-    Token(ID, 'x', 4, 26)
-    Token(LT, '<', 4, 28)
-    Token(NUMBER, 10, 4, 30)
-    Token(), ')', 4, 32)
-    Token({, '{', 4, 34)
-    Token(PRINT, 'print', 5, 40)
-    Token(ID, 'x', 5, 46)
+    Token(type='ID', value='x', lineno=3, index=20)
+    Token(type='ASSIGN', value='=', lineno=3, index=22)
+    Token(type='NUMBER', value=0, lineno=3, index=24)
+    Token(type=';', value=';', lineno=3, index=25)
+    Token(type='WHILE', value='while', lineno=4, index=31)
+    Token(type='(', value='(', lineno=4, index=37)
+    Token(type='ID', value='x', lineno=4, index=38)
+    Token(type='LT', value='<', lineno=4, index=40)
+    Token(type='NUMBER', value=10, lineno=4, index=42)
+    Token(type=')', value=')', lineno=4, index=44)
+    Token(type='{', value='{', lineno=4, index=46)
+    Token(type='PRINT', value='print', lineno=5, index=56)
+    Token(type='ID', value='x', lineno=5, index=62)
     Line 5: Bad character ':'
-    Token(ID, 'x', 6, 53)
-    Token(ASSIGN, '=', 6, 55)
-    Token(ID, 'x', 6, 57)
-    Token(PLUS, '+', 6, 59)
-    Token(NUMBER, 1, 6, 61)
-    Token(;, ';', 6, 62)
-    Token(}, '}', 7, 64)
+    Token(type='ID', value='x', lineno=6, index=73)
+    Token(type='ASSIGN', value='=', lineno=6, index=75)
+    Token(type='ID', value='x', lineno=6, index=77)
+    Token(type='PLUS', value='+', lineno=6, index=79)
+    Token(type='NUMBER', value=1, lineno=6, index=81)
+    Token(type=';', value=';', lineno=6, index=82)
+    Token(type='}', value='}', lineno=7, index=88)
 
 Study this example closely.  It might take a bit to digest, but all of the
 essential parts of writing a lexer are there. Tokens have to be specified
@@ -914,8 +911,8 @@ like this::
     class CalcParser(Parser):
         ...
         precedence = (
-           ('left', 'PLUS', 'MINUS'),
-           ('left', 'TIMES', 'DIVIDE'),
+           ('left', PLUS, MINUS),
+           ('left', TIMES, DIVIDE),
         )
 
         # Rules where precedence is applied
@@ -1004,9 +1001,9 @@ like this::
     class CalcParser(Parser):
         ...
         precedence = (
-            ('left', 'PLUS', 'MINUS'),
-            ('left', 'TIMES', 'DIVIDE'),
-            ('right', 'UMINUS'),            # Unary minus operator
+            ('left', PLUS, MINUS),
+            ('left', TIMES, DIVIDE),
+            ('right', UMINUS),            # Unary minus operator
         )
 
 Now, in the grammar file, you write the unary minus rule like this::
@@ -1034,10 +1031,10 @@ operators like ``<`` and ``>`` but you didn't want combinations like
     class MyParser(Parser):
          ...
          precedence = (
-              ('nonassoc', 'LESSTHAN', 'GREATERTHAN'),  # Nonassociative operators
-              ('left', 'PLUS', 'MINUS'),
-              ('left', 'TIMES', 'DIVIDE'),
-              ('right', 'UMINUS'),            # Unary minus operator
+              ('nonassoc', LESSTHAN, GREATERTHAN),  # Nonassociative operators
+              ('left', PLUS, MINUS),
+              ('left', TIMES, DIVIDE),
+              ('right', UMINUS),            # Unary minus operator
          )
 
 If you do this, the occurrence of input text such as ``a < b < c``
diff --git a/example/calc/calc.py b/example/calc/calc.py
index e15f80a..f7825ec 100644
--- a/example/calc/calc.py
+++ b/example/calc/calc.py
@@ -9,28 +9,16 @@ from sly import Lexer, Parser
 
 class CalcLexer(Lexer):
     # Set of token names.   This is always required
-    tokens = {
-        'ID',
-        'NUMBER',
-        'PLUS',
-        'MINUS',
-        'TIMES',
-        'DIVIDE',
-        'ASSIGN',
-        'LPAREN',
-        'RPAREN',
-        }
+    tokens = { NUMBER, PLUS, MINUS, TIMES, DIVIDE, LPAREN, RPAREN }
 
     # String containing ignored characters between tokens
     ignore = ' \t'
 
     # Regular expression rules for tokens
-    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
     PLUS    = r'\+'
     MINUS   = r'-'
     TIMES   = r'\*'
     DIVIDE  = r'/'
-    ASSIGN  = r'='
     LPAREN  = r'\('
     RPAREN  = r'\)'
 
diff --git a/example/calc_prec/calc.py b/example/calc_prec/calc.py
index 0a6261e..597aaa0 100644
--- a/example/calc_prec/calc.py
+++ b/example/calc_prec/calc.py
@@ -8,9 +8,7 @@ sys.path.insert(0, "../..")
 from sly import Lexer, Parser
 
 class CalcLexer(Lexer):
-    tokens = {
-        'NAME', 'NUMBER',
-        }
+    tokens = { NAME, NUMBER }
     ignore = ' \t'
     literals = { '=', '+', '-', '*', '/', '(', ')' }
 
@@ -36,7 +34,7 @@ class CalcParser(Parser):
     precedence = (
         ('left', '+', '-'),
         ('left', '*', '/'),
-        ('right', 'UMINUS'),
+        ('right', UMINUS),
         )
 
     def __init__(self):
diff --git a/sly/lex.py b/sly/lex.py
index 82b5a24..dc69569 100644
--- a/sly/lex.py
+++ b/sly/lex.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # sly: lex.py
 #
-# Copyright (C) 2016
+# Copyright (C) 2016 - 2018
 # David M. Beazley (Dabeaz LLC)
 # All rights reserved.
 #
@@ -31,11 +31,10 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # -----------------------------------------------------------------------------
 
-__version__    = '0.2'
+__version__    = '0.3'
 __all__ = ['Lexer', 'LexerStateChange']
 
 import re
-from collections import OrderedDict
 
 class LexError(Exception):
     '''
@@ -78,20 +77,41 @@ class Token(object):
     def __repr__(self):
         return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
 
-class LexerMetaDict(OrderedDict):
+class TokenStr(str):
+    @staticmethod
+    def __new__(cls, value):
+        self = super().__new__(cls, value)
+        self.remap = { }
+        return self
+
+    def __setitem__(self, key, value):
+        self.remap[key] = value
+
+class LexerMetaDict(dict):
     '''
     Special dictionary that prohits duplicate definitions in lexer specifications.
     '''
     def __setitem__(self, key, value):
+        if isinstance(value, str):
+            value = TokenStr(value)
+            
         if key in self and not isinstance(value, property):
-            if isinstance(self[key], str):
+            prior = self[key]
+            if isinstance(prior, str):
                 if callable(value):
-                    value.pattern = self[key]
+                    value.pattern = prior
+                    value.remap = getattr(prior, 'remap', None)
                 else:
                     raise AttributeError(f'Name {key} redefined')
 
         super().__setitem__(key, value)
 
+    def __getitem__(self, key):
+        if key not in self and key.isupper() and key[:1] != '_':
+            return key
+        else:
+            return super().__getitem__(key)
+
 class LexerMeta(type):
     '''
     Metaclass for collecting lexing rules
@@ -114,7 +134,12 @@ class LexerMeta(type):
 
     def __new__(meta, clsname, bases, attributes):
         del attributes['_']
+        remapping = { key: val.remap for key, val in attributes.items()
+                      if getattr(val, 'remap', None) }
+        attributes = { key: str(val) if isinstance(val, TokenStr) else val
+                       for key, val in attributes.items() }
         cls = super().__new__(meta, clsname, bases, attributes)
+        cls._remapping = remapping
         cls._build(list(attributes.items()))
         return cls
 
@@ -159,6 +184,16 @@ class Lexer(metaclass=LexerMeta):
         cls._ignored_tokens = set(cls._ignored_tokens)
         cls._token_funcs = dict(cls._token_funcs)
 
+        # Build a set of all remapped tokens
+        remapped_tokens = set()
+        for toks in cls._remapping.values():
+            remapped_tokens.update(toks.values())
+
+        undefined = remapped_tokens - cls._token_names
+        if undefined:
+            missing = ', '.join(undefined)
+            raise LexerBuildError(f'{missing} not included in token(s)')
+
         parts = []
         for tokname, value in cls._collect_rules(definitions):
             if tokname.startswith('ignore_'):
@@ -169,8 +204,10 @@ class Lexer(metaclass=LexerMeta):
                 pattern = value
 
             elif callable(value):
-                pattern = value.pattern
                 cls._token_funcs[tokname] = value
+                pattern = getattr(value, 'pattern', None)
+                if not pattern:
+                    continue
 
             # Form the regular expression component
             part = f'(?P<{tokname}>{pattern})'
@@ -209,7 +246,7 @@ class Lexer(metaclass=LexerMeta):
             _ignore = self.ignore
             _token_funcs = self._token_funcs
             _literals = self._literals
-
+            _remapping = self._remapping
             self.text = text
             try:
                 while True:
@@ -228,6 +265,9 @@ class Lexer(metaclass=LexerMeta):
                         index = m.end()
                         tok.value = m.group()
                         tok.type = m.lastgroup
+                        if tok.type in _remapping:
+                            tok.type = _remapping[tok.type].get(tok.value, tok.type)
+
                         if tok.type in _token_funcs:
                             self.index = index
                             self.lineno = lineno
diff --git a/sly/yacc.py b/sly/yacc.py
index 6478580..85cd85c 100644
--- a/sly/yacc.py
+++ b/sly/yacc.py
@@ -1,7 +1,7 @@
 # -----------------------------------------------------------------------------
 # sly: yacc.py
 #
-# Copyright (C) 2016-2017
+# Copyright (C) 2016-2018
 # David M. Beazley (Dabeaz LLC)
 # All rights reserved.
 #
@@ -35,7 +35,7 @@ import sys
 import inspect
 from collections import OrderedDict, defaultdict
 
-__version__    = '0.2'
+__version__    = '0.3'
 __all__        = [ 'Parser' ]
 
 class YaccError(Exception):
@@ -55,12 +55,12 @@ ERROR_COUNT = 3                # Number of symbols that must be shifted to leave
 MAXINT = sys.maxsize
 
 # This object is a stand-in for a logging object created by the
-# logging module.   PLY will use this by default to create things
+# logging module.   SLY will use this by default to create things
 # such as the parser.out file.  If a user wants more detailed
 # information, they can create their own logging object and pass
-# it into PLY.
+# it into SLY.
 
-class PlyLogger(object):
+class SlyLogger(object):
     def __init__(self, f):
         self.f = f
 
@@ -1552,7 +1552,7 @@ def _collect_grammar_rules(func):
 
     return grammar
 
-class ParserMetaDict(OrderedDict):
+class ParserMetaDict(dict):
     '''
     Dictionary that allows decorated grammar rule functions to be overloaded
     '''
@@ -1560,6 +1560,12 @@ class ParserMetaDict(OrderedDict):
         if key in self and callable(value) and hasattr(value, 'rules'):
             value.next_func = self[key]
         super().__setitem__(key, value)
+    
+    def __getitem__(self, key):
+        if key not in self and key.isupper() and key[:1] != '_':
+            return key.upper()
+        else:
+            return super().__getitem__(key)
 
 class ParserMeta(type):
     @classmethod
@@ -1582,7 +1588,7 @@ class ParserMeta(type):
 
 class Parser(metaclass=ParserMeta):
     # Logging object where debugging/diagnostic messages are sent
-    log = PlyLogger(sys.stderr)     
+    log = SlyLogger(sys.stderr)     
 
     # Debugging filename where parsetab.out data can be written
     debugfile = None
diff --git a/tests/test_lex.py b/tests/test_lex.py
index 212ceec..a730f71 100644
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@@ -98,9 +98,94 @@ def test_error_return():
     assert vals == [123, ':+-', '+', '-']
     assert lexer.errors == [ ':+-' ]
 
-    
-    
-    
+
+class ModernCalcLexer(Lexer):
+    # Set of token names.   This is always required
+    tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, LT, LE, IF, ELSE }
+    literals = { '(', ')' }
+
+    # String containing ignored characters between tokens
+    ignore = ' \t'
+
+    # Regular expression rules for tokens
+    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
+    ID['if'] = IF
+    ID['else'] = ELSE
+
+    NUMBER  = r'\d+'
+    PLUS    = r'\+'
+    MINUS   = r'-'
+    TIMES   = r'\*'
+    DIVIDE  = r'/'
+    ASSIGN  = r'='
+    LE      = r'<='
+    LT      = r'<'
+
+    def NUMBER(self, t):
+        t.value = int(t.value)
+        return t
+
+    # Ignored text
+    ignore_comment = r'\#.*'
+
+    @_(r'\n+')
+    def ignore_newline(self, t):
+        self.lineno += t.value.count('\n')
+
+    # Attached rule
+    def ID(self, t):
+        t.value = t.value.upper()
+        return t
+
+    def error(self, t):
+        self.errors.append(t.value)
+        self.index += 1
+        if hasattr(self, 'return_error'):
+            return t
+
+    def __init__(self):
+        self.errors = []
 
 
+# Test basic recognition of various tokens and literals
+def test_modern_tokens():
+    lexer = ModernCalcLexer()
+    toks = list(lexer.tokenize('abc if else 123 + - * / = < <= ( )'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    assert types == ['ID','IF','ELSE', 'NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
+    assert vals == ['ABC','if','else', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
+
+# Test ignored comments and newlines
+def test_modern_ignored():
+    lexer = ModernCalcLexer()
+    toks = list(lexer.tokenize('\n\n# A comment\n123\nabc\n'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    linenos = [t.lineno for t in toks]
+    assert types == ['NUMBER', 'ID']
+    assert vals == [123, 'ABC']
+    assert linenos == [4,5]
+    assert lexer.lineno == 6
+
+# Test error handling
+def test_modern_error():
+    lexer = ModernCalcLexer()
+    toks = list(lexer.tokenize('123 :+-'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    assert types == ['NUMBER', 'PLUS', 'MINUS']
+    assert vals == [123, '+', '-']
+    assert lexer.errors == [ ':+-' ]
+
+# Test error token return handling
+def test_modern_error_return():
+    lexer = ModernCalcLexer()
+    lexer.return_error = True
+    toks = list(lexer.tokenize('123 :+-'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
+    assert vals == [123, ':+-', '+', '-']
+    assert lexer.errors == [ ':+-' ]
 
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 38db4fe..2661448 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -3,16 +3,7 @@ from sly import Lexer, Parser
 
 class CalcLexer(Lexer):
     # Set of token names.   This is always required
-    tokens = {
-        'ID',
-        'NUMBER',
-        'PLUS',
-        'MINUS',
-        'TIMES',
-        'DIVIDE',
-        'ASSIGN',
-        }
-
+    tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN }
     literals = { '(', ')' }
 
     # String containing ignored characters between tokens
@@ -38,8 +29,8 @@ class CalcLexer(Lexer):
     def newline(self, t):
         self.lineno += t.value.count('\n')
 
-    def error(self, value):
-        self.errors.append(value)
+    def error(self, t):
+        self.errors.append(t.value[0])
         self.index += 1
 
     def __init__(self):
@@ -49,9 +40,9 @@ class CalcParser(Parser):
     tokens = CalcLexer.tokens
 
     precedence = (
-        ('left', 'PLUS', 'MINUS'),
-        ('left', 'TIMES', 'DIVIDE'),
-        ('right', 'UMINUS'),
+        ('left', PLUS, MINUS),
+        ('left', TIMES, DIVIDE),
+        ('right', UMINUS),
         )
 
     def __init__(self):