Changes for 0.2

2018-01-10 06:09:20 -06:00 · 2018-01-10 06:09:20 -06:00 · e05748494c
commit e05748494c
parent d8903d8301
10 changed files with 55 additions and 22 deletions
--- a/8
+++ b/8
@ -0,0 +1,8 @@
+Version 0.2
+-----------
+
+12/24/2017 The error(self, t) method of lexer objects now receives a
+           token as input.  The value attribute of this token contains
+           all remaining input text.  If the passed token is returned
+           by error(), then it shows up in the token stream where
+           can be processed by the parser.
--- a/README.md
+++ b/README.md
@ -1,4 +1,4 @@
-SLY (Sly Lex-Yacc)                   Version 0.1
+SLY (Sly Lex-Yacc)                   Version 0.2

 Copyright (C) 2016-2017
 David M. Beazley (Dabeaz LLC)
@ -103,8 +103,8 @@ expressions and store variables:
        def newline(self, t):
            self.lineno += t.value.count('\n')

-        def error(self, value):
-            print("Illegal character '%s'" % value[0])
+        def error(self, t):
+            print("Illegal character '%s'" % t.value[0])
            self.index += 1

    class CalcParser(Parser):
--- a/docs/index.rst
+++ b/docs/index.rst
@ -78,8 +78,8 @@ expressions and store variables::
        def newline(self, t):
            self.lineno += t.value.count('\n')

-        def error(self, value):
-            print("Illegal character '%s'" % value[0])
+        def error(self, t):
+            print("Illegal character '%s'" % t.value[0])
            self.index += 1

    class CalcParser(Parser):
--- a/docs/sly.rst
+++ b/docs/sly.rst
@ -350,15 +350,15 @@ Error handling
 If a bad character is encountered while lexing, tokenizing will stop.
 However, you can add an ``error()`` method to handle lexing errors
 that occur when illegal characters are detected.  The error method
-receives a string containing all remaining untokenized text.  A
-typical handler might look at this text and skip ahead in some manner.
-For example::
+receives a ``Token`` where the ``value`` attribute contains all
+remaining untokenized text.  A typical handler might look at this text
+and skip ahead in some manner.  For example::

    class MyLexer(Lexer):
        ...
        # Error handling rule
-        def error(self, value):
-            print("Illegal character '%s'" % value[0])
+        def error(self, t):
+            print("Illegal character '%s'" % t.value[0])
            self.index += 1

 In this case, we print the offending character and skip ahead
@ -367,6 +367,13 @@ parser is often a hard problem.  An error handler might scan ahead
 to a logical synchronization point such as a semicolon, a blank line,
 or similar landmark.

+If the ``error()`` method also returns the passed token, it will
+show up as an ``ERROR`` token in the resulting token stream. This
+might be useful if the parser wants to see error tokens for some
+reason--perhaps for the purposes of improved error messages or
+some other kind of error handling.
+
+
 A More Complete Example
 ^^^^^^^^^^^^^^^^^^^^^^^

--- a/example/calc/calc.py
+++ b/example/calc/calc.py
@ -43,8 +43,8 @@ class CalcLexer(Lexer):
    def newline(self, t):
        self.lineno += t.value.count('\n')

-    def error(self, value):
-        print("Illegal character '%s'" % value[0])
+    def error(self, t):
+        print("Illegal character '%s'" % t.value[0])
        self.index += 1

 class CalcParser(Parser):
--- a/example/calc_prec/calc.py
+++ b/example/calc_prec/calc.py
@ -26,8 +26,8 @@ class CalcLexer(Lexer):
    def newline(self, t):
        self.lineno += t.value.count('\n')

-    def error(self, value):
-        print("Illegal character '%s'" % value[0])
+    def error(self, t):
+        print("Illegal character '%s'" % t.value[0])
        self.index += 1

 class CalcParser(Parser):
--- a/setup.py
+++ b/setup.py
@ -11,7 +11,7 @@ setup(name = "sly",
 SLY is an implementation of lex and yacc for Python 3.
 """,
            license="""BSD""",
-            version = "0.1",
+            version = "0.2",
            author = "David Beazley",
            author_email = "dave@dabeaz.com",
            maintainer = "David Beazley",
--- a/sly/lex.py
+++ b/sly/lex.py
@ -31,7 +31,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # -----------------------------------------------------------------------------

-__version__    = '0.1'
+__version__    = '0.2'
 __all__ = ['Lexer', 'LexerStateChange']

 import re
@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta):
                            # A lexing error
                            self.index = index
                            self.lineno = lineno
-                            self.error(text[index:])
+                            tok.type = 'ERROR'
+                            tok.value = text[index:]
+                            tok = self.error(tok)
+                            if tok is not None:
+                                yield tok
+
                            index = self.index
                            lineno = self.lineno

@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta):
                self.lineno = lineno

    # Default implementations of the error handler. May be changed in subclasses
-    def error(self, value):
-        raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index)
+    def error(self, t):
+        raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index)
--- a/sly/yacc.py
+++ b/sly/yacc.py
@ -35,7 +35,7 @@ import sys
 import inspect
 from collections import OrderedDict, defaultdict

-__version__    = '0.1'
+__version__    = '0.2'
 __all__        = [ 'Parser' ]

 class YaccError(Exception):
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@ -47,9 +47,11 @@ class CalcLexer(Lexer):
        t.value = t.value.upper()
        return t

-    def error(self, value):
-        self.errors.append(value)
+    def error(self, t):
+        self.errors.append(t.value)
        self.index += 1
+        if hasattr(self, 'return_error'):
+            return t

    def __init__(self):
        self.errors = []
@ -85,6 +87,17 @@ def test_error():
    assert vals == [123, '+', '-']
    assert lexer.errors == [ ':+-' ]

+# Test error token return handling
+def test_error_return():
+    lexer = CalcLexer()
+    lexer.return_error = True
+    toks = list(lexer.tokenize('123 :+-'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
+    assert vals == [123, ':+-', '+', '-']
+    assert lexer.errors == [ ':+-' ]
+