Changes for 0.2

This commit is contained in:
David Beazley 2018-01-10 06:09:20 -06:00
parent d8903d8301
commit e05748494c
10 changed files with 55 additions and 22 deletions

8
CHANGES Normal file
View File

@ -0,0 +1,8 @@
Version 0.2
-----------
12/24/2017 The error(self, t) method of lexer objects now receives a
token as input. The value attribute of this token contains
all remaining input text. If the passed token is returned
by error(), then it shows up in the token stream where
can be processed by the parser.

View File

@ -1,4 +1,4 @@
SLY (Sly Lex-Yacc) Version 0.1
SLY (Sly Lex-Yacc) Version 0.2
Copyright (C) 2016-2017
David M. Beazley (Dabeaz LLC)
@ -103,8 +103,8 @@ expressions and store variables:
def newline(self, t):
self.lineno += t.value.count('\n')
def error(self, value):
print("Illegal character '%s'" % value[0])
def error(self, t):
print("Illegal character '%s'" % t.value[0])
self.index += 1
class CalcParser(Parser):

View File

@ -78,8 +78,8 @@ expressions and store variables::
def newline(self, t):
self.lineno += t.value.count('\n')
def error(self, value):
print("Illegal character '%s'" % value[0])
def error(self, t):
print("Illegal character '%s'" % t.value[0])
self.index += 1
class CalcParser(Parser):

View File

@ -350,15 +350,15 @@ Error handling
If a bad character is encountered while lexing, tokenizing will stop.
However, you can add an ``error()`` method to handle lexing errors
that occur when illegal characters are detected. The error method
receives a string containing all remaining untokenized text. A
typical handler might look at this text and skip ahead in some manner.
For example::
receives a ``Token`` where the ``value`` attribute contains all
remaining untokenized text. A typical handler might look at this text
and skip ahead in some manner. For example::
class MyLexer(Lexer):
...
# Error handling rule
def error(self, value):
print("Illegal character '%s'" % value[0])
def error(self, t):
print("Illegal character '%s'" % t.value[0])
self.index += 1
In this case, we print the offending character and skip ahead
@ -367,6 +367,13 @@ parser is often a hard problem. An error handler might scan ahead
to a logical synchronization point such as a semicolon, a blank line,
or similar landmark.
If the ``error()`` method also returns the passed token, it will
show up as an ``ERROR`` token in the resulting token stream. This
might be useful if the parser wants to see error tokens for some
reason--perhaps for the purposes of improved error messages or
some other kind of error handling.
A More Complete Example
^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -43,8 +43,8 @@ class CalcLexer(Lexer):
def newline(self, t):
self.lineno += t.value.count('\n')
def error(self, value):
print("Illegal character '%s'" % value[0])
def error(self, t):
print("Illegal character '%s'" % t.value[0])
self.index += 1
class CalcParser(Parser):

View File

@ -26,8 +26,8 @@ class CalcLexer(Lexer):
def newline(self, t):
self.lineno += t.value.count('\n')
def error(self, value):
print("Illegal character '%s'" % value[0])
def error(self, t):
print("Illegal character '%s'" % t.value[0])
self.index += 1
class CalcParser(Parser):

View File

@ -11,7 +11,7 @@ setup(name = "sly",
SLY is an implementation of lex and yacc for Python 3.
""",
license="""BSD""",
version = "0.1",
version = "0.2",
author = "David Beazley",
author_email = "dave@dabeaz.com",
maintainer = "David Beazley",

View File

@ -31,7 +31,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
__version__ = '0.1'
__version__ = '0.2'
__all__ = ['Lexer', 'LexerStateChange']
import re
@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta):
# A lexing error
self.index = index
self.lineno = lineno
self.error(text[index:])
tok.type = 'ERROR'
tok.value = text[index:]
tok = self.error(tok)
if tok is not None:
yield tok
index = self.index
lineno = self.lineno
@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta):
self.lineno = lineno
# Default implementations of the error handler. May be changed in subclasses
def error(self, value):
raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index)
def error(self, t):
raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index)

View File

@ -35,7 +35,7 @@ import sys
import inspect
from collections import OrderedDict, defaultdict
__version__ = '0.1'
__version__ = '0.2'
__all__ = [ 'Parser' ]
class YaccError(Exception):

View File

@ -47,9 +47,11 @@ class CalcLexer(Lexer):
t.value = t.value.upper()
return t
def error(self, value):
self.errors.append(value)
def error(self, t):
self.errors.append(t.value)
self.index += 1
if hasattr(self, 'return_error'):
return t
def __init__(self):
self.errors = []
@ -85,6 +87,17 @@ def test_error():
assert vals == [123, '+', '-']
assert lexer.errors == [ ':+-' ]
# Test error token return handling
def test_error_return():
lexer = CalcLexer()
lexer.return_error = True
toks = list(lexer.tokenize('123 :+-'))
types = [t.type for t in toks]
vals = [t.value for t in toks]
assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
assert vals == [123, ':+-', '+', '-']
assert lexer.errors == [ ':+-' ]