diff --git a/CHANGES b/CHANGES new file mode 100644 index 0000000..40af0eb --- /dev/null +++ b/CHANGES @@ -0,0 +1,8 @@ +Version 0.2 +----------- + +12/24/2017 The error(self, t) method of lexer objects now receives a + token as input. The value attribute of this token contains + all remaining input text. If the passed token is returned + by error(), then it shows up in the token stream where + can be processed by the parser. diff --git a/README.md b/README.md index b3b5f3f..b94c0ff 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -SLY (Sly Lex-Yacc) Version 0.1 +SLY (Sly Lex-Yacc) Version 0.2 Copyright (C) 2016-2017 David M. Beazley (Dabeaz LLC) @@ -103,8 +103,8 @@ expressions and store variables: def newline(self, t): self.lineno += t.value.count('\n') - def error(self, value): - print("Illegal character '%s'" % value[0]) + def error(self, t): + print("Illegal character '%s'" % t.value[0]) self.index += 1 class CalcParser(Parser): diff --git a/docs/index.rst b/docs/index.rst index b52b9f3..29b43c3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -78,8 +78,8 @@ expressions and store variables:: def newline(self, t): self.lineno += t.value.count('\n') - def error(self, value): - print("Illegal character '%s'" % value[0]) + def error(self, t): + print("Illegal character '%s'" % t.value[0]) self.index += 1 class CalcParser(Parser): diff --git a/docs/sly.rst b/docs/sly.rst index ca22d1c..dc6dd87 100644 --- a/docs/sly.rst +++ b/docs/sly.rst @@ -350,15 +350,15 @@ Error handling If a bad character is encountered while lexing, tokenizing will stop. However, you can add an ``error()`` method to handle lexing errors that occur when illegal characters are detected. The error method -receives a string containing all remaining untokenized text. A -typical handler might look at this text and skip ahead in some manner. -For example:: +receives a ``Token`` where the ``value`` attribute contains all +remaining untokenized text. A typical handler might look at this text +and skip ahead in some manner. For example:: class MyLexer(Lexer): ... # Error handling rule - def error(self, value): - print("Illegal character '%s'" % value[0]) + def error(self, t): + print("Illegal character '%s'" % t.value[0]) self.index += 1 In this case, we print the offending character and skip ahead @@ -367,6 +367,13 @@ parser is often a hard problem. An error handler might scan ahead to a logical synchronization point such as a semicolon, a blank line, or similar landmark. +If the ``error()`` method also returns the passed token, it will +show up as an ``ERROR`` token in the resulting token stream. This +might be useful if the parser wants to see error tokens for some +reason--perhaps for the purposes of improved error messages or +some other kind of error handling. + + A More Complete Example ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/example/calc/calc.py b/example/calc/calc.py index 1e16ed4..e15f80a 100644 --- a/example/calc/calc.py +++ b/example/calc/calc.py @@ -43,8 +43,8 @@ class CalcLexer(Lexer): def newline(self, t): self.lineno += t.value.count('\n') - def error(self, value): - print("Illegal character '%s'" % value[0]) + def error(self, t): + print("Illegal character '%s'" % t.value[0]) self.index += 1 class CalcParser(Parser): diff --git a/example/calc_prec/calc.py b/example/calc_prec/calc.py index 24b51fc..0a6261e 100644 --- a/example/calc_prec/calc.py +++ b/example/calc_prec/calc.py @@ -26,8 +26,8 @@ class CalcLexer(Lexer): def newline(self, t): self.lineno += t.value.count('\n') - def error(self, value): - print("Illegal character '%s'" % value[0]) + def error(self, t): + print("Illegal character '%s'" % t.value[0]) self.index += 1 class CalcParser(Parser): diff --git a/setup.py b/setup.py index b510f5d..87e0f15 100755 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ setup(name = "sly", SLY is an implementation of lex and yacc for Python 3. """, license="""BSD""", - version = "0.1", + version = "0.2", author = "David Beazley", author_email = "dave@dabeaz.com", maintainer = "David Beazley", diff --git a/sly/lex.py b/sly/lex.py index 56a38d7..0d2f715 100644 --- a/sly/lex.py +++ b/sly/lex.py @@ -31,7 +31,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = '0.1' +__version__ = '0.2' __all__ = ['Lexer', 'LexerStateChange'] import re @@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta): # A lexing error self.index = index self.lineno = lineno - self.error(text[index:]) + tok.type = 'ERROR' + tok.value = text[index:] + tok = self.error(tok) + if tok is not None: + yield tok + index = self.index lineno = self.lineno @@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta): self.lineno = lineno # Default implementations of the error handler. May be changed in subclasses - def error(self, value): - raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index) + def error(self, t): + raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index) diff --git a/sly/yacc.py b/sly/yacc.py index dfe8911..6478580 100644 --- a/sly/yacc.py +++ b/sly/yacc.py @@ -35,7 +35,7 @@ import sys import inspect from collections import OrderedDict, defaultdict -__version__ = '0.1' +__version__ = '0.2' __all__ = [ 'Parser' ] class YaccError(Exception): diff --git a/tests/test_lex.py b/tests/test_lex.py index d1aa218..212ceec 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -47,9 +47,11 @@ class CalcLexer(Lexer): t.value = t.value.upper() return t - def error(self, value): - self.errors.append(value) + def error(self, t): + self.errors.append(t.value) self.index += 1 + if hasattr(self, 'return_error'): + return t def __init__(self): self.errors = [] @@ -85,6 +87,17 @@ def test_error(): assert vals == [123, '+', '-'] assert lexer.errors == [ ':+-' ] +# Test error token return handling +def test_error_return(): + lexer = CalcLexer() + lexer.return_error = True + toks = list(lexer.tokenize('123 :+-')) + types = [t.type for t in toks] + vals = [t.value for t in toks] + assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS'] + assert vals == [123, ':+-', '+', '-'] + assert lexer.errors == [ ':+-' ] +