Changes for 0.2

This commit is contained in:
David Beazley 2018-01-10 06:09:20 -06:00
parent d8903d8301
commit e05748494c
10 changed files with 55 additions and 22 deletions

8
CHANGES Normal file
View File

@ -0,0 +1,8 @@
Version 0.2
-----------
12/24/2017 The error(self, t) method of lexer objects now receives a
token as input. The value attribute of this token contains
all remaining input text. If the passed token is returned
by error(), then it shows up in the token stream where
can be processed by the parser.

View File

@ -1,4 +1,4 @@
SLY (Sly Lex-Yacc) Version 0.1 SLY (Sly Lex-Yacc) Version 0.2
Copyright (C) 2016-2017 Copyright (C) 2016-2017
David M. Beazley (Dabeaz LLC) David M. Beazley (Dabeaz LLC)
@ -103,8 +103,8 @@ expressions and store variables:
def newline(self, t): def newline(self, t):
self.lineno += t.value.count('\n') self.lineno += t.value.count('\n')
def error(self, value): def error(self, t):
print("Illegal character '%s'" % value[0]) print("Illegal character '%s'" % t.value[0])
self.index += 1 self.index += 1
class CalcParser(Parser): class CalcParser(Parser):

View File

@ -78,8 +78,8 @@ expressions and store variables::
def newline(self, t): def newline(self, t):
self.lineno += t.value.count('\n') self.lineno += t.value.count('\n')
def error(self, value): def error(self, t):
print("Illegal character '%s'" % value[0]) print("Illegal character '%s'" % t.value[0])
self.index += 1 self.index += 1
class CalcParser(Parser): class CalcParser(Parser):

View File

@ -350,15 +350,15 @@ Error handling
If a bad character is encountered while lexing, tokenizing will stop. If a bad character is encountered while lexing, tokenizing will stop.
However, you can add an ``error()`` method to handle lexing errors However, you can add an ``error()`` method to handle lexing errors
that occur when illegal characters are detected. The error method that occur when illegal characters are detected. The error method
receives a string containing all remaining untokenized text. A receives a ``Token`` where the ``value`` attribute contains all
typical handler might look at this text and skip ahead in some manner. remaining untokenized text. A typical handler might look at this text
For example:: and skip ahead in some manner. For example::
class MyLexer(Lexer): class MyLexer(Lexer):
... ...
# Error handling rule # Error handling rule
def error(self, value): def error(self, t):
print("Illegal character '%s'" % value[0]) print("Illegal character '%s'" % t.value[0])
self.index += 1 self.index += 1
In this case, we print the offending character and skip ahead In this case, we print the offending character and skip ahead
@ -367,6 +367,13 @@ parser is often a hard problem. An error handler might scan ahead
to a logical synchronization point such as a semicolon, a blank line, to a logical synchronization point such as a semicolon, a blank line,
or similar landmark. or similar landmark.
If the ``error()`` method also returns the passed token, it will
show up as an ``ERROR`` token in the resulting token stream. This
might be useful if the parser wants to see error tokens for some
reason--perhaps for the purposes of improved error messages or
some other kind of error handling.
A More Complete Example A More Complete Example
^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -43,8 +43,8 @@ class CalcLexer(Lexer):
def newline(self, t): def newline(self, t):
self.lineno += t.value.count('\n') self.lineno += t.value.count('\n')
def error(self, value): def error(self, t):
print("Illegal character '%s'" % value[0]) print("Illegal character '%s'" % t.value[0])
self.index += 1 self.index += 1
class CalcParser(Parser): class CalcParser(Parser):

View File

@ -26,8 +26,8 @@ class CalcLexer(Lexer):
def newline(self, t): def newline(self, t):
self.lineno += t.value.count('\n') self.lineno += t.value.count('\n')
def error(self, value): def error(self, t):
print("Illegal character '%s'" % value[0]) print("Illegal character '%s'" % t.value[0])
self.index += 1 self.index += 1
class CalcParser(Parser): class CalcParser(Parser):

View File

@ -11,7 +11,7 @@ setup(name = "sly",
SLY is an implementation of lex and yacc for Python 3. SLY is an implementation of lex and yacc for Python 3.
""", """,
license="""BSD""", license="""BSD""",
version = "0.1", version = "0.2",
author = "David Beazley", author = "David Beazley",
author_email = "dave@dabeaz.com", author_email = "dave@dabeaz.com",
maintainer = "David Beazley", maintainer = "David Beazley",

View File

@ -31,7 +31,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ----------------------------------------------------------------------------- # -----------------------------------------------------------------------------
__version__ = '0.1' __version__ = '0.2'
__all__ = ['Lexer', 'LexerStateChange'] __all__ = ['Lexer', 'LexerStateChange']
import re import re
@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta):
# A lexing error # A lexing error
self.index = index self.index = index
self.lineno = lineno self.lineno = lineno
self.error(text[index:]) tok.type = 'ERROR'
tok.value = text[index:]
tok = self.error(tok)
if tok is not None:
yield tok
index = self.index index = self.index
lineno = self.lineno lineno = self.lineno
@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta):
self.lineno = lineno self.lineno = lineno
# Default implementations of the error handler. May be changed in subclasses # Default implementations of the error handler. May be changed in subclasses
def error(self, value): def error(self, t):
raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index) raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index)

View File

@ -35,7 +35,7 @@ import sys
import inspect import inspect
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
__version__ = '0.1' __version__ = '0.2'
__all__ = [ 'Parser' ] __all__ = [ 'Parser' ]
class YaccError(Exception): class YaccError(Exception):

View File

@ -47,9 +47,11 @@ class CalcLexer(Lexer):
t.value = t.value.upper() t.value = t.value.upper()
return t return t
def error(self, value): def error(self, t):
self.errors.append(value) self.errors.append(t.value)
self.index += 1 self.index += 1
if hasattr(self, 'return_error'):
return t
def __init__(self): def __init__(self):
self.errors = [] self.errors = []
@ -85,6 +87,17 @@ def test_error():
assert vals == [123, '+', '-'] assert vals == [123, '+', '-']
assert lexer.errors == [ ':+-' ] assert lexer.errors == [ ':+-' ]
# Test error token return handling
def test_error_return():
lexer = CalcLexer()
lexer.return_error = True
toks = list(lexer.tokenize('123 :+-'))
types = [t.type for t in toks]
vals = [t.value for t in toks]
assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
assert vals == [123, ':+-', '+', '-']
assert lexer.errors == [ ':+-' ]