Changes for 0.2
This commit is contained in:
parent
d8903d8301
commit
e05748494c
8
CHANGES
Normal file
8
CHANGES
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
Version 0.2
|
||||||
|
-----------
|
||||||
|
|
||||||
|
12/24/2017 The error(self, t) method of lexer objects now receives a
|
||||||
|
token as input. The value attribute of this token contains
|
||||||
|
all remaining input text. If the passed token is returned
|
||||||
|
by error(), then it shows up in the token stream where
|
||||||
|
can be processed by the parser.
|
@ -1,4 +1,4 @@
|
|||||||
SLY (Sly Lex-Yacc) Version 0.1
|
SLY (Sly Lex-Yacc) Version 0.2
|
||||||
|
|
||||||
Copyright (C) 2016-2017
|
Copyright (C) 2016-2017
|
||||||
David M. Beazley (Dabeaz LLC)
|
David M. Beazley (Dabeaz LLC)
|
||||||
@ -103,8 +103,8 @@ expressions and store variables:
|
|||||||
def newline(self, t):
|
def newline(self, t):
|
||||||
self.lineno += t.value.count('\n')
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
print("Illegal character '%s'" % value[0])
|
print("Illegal character '%s'" % t.value[0])
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
class CalcParser(Parser):
|
class CalcParser(Parser):
|
||||||
|
@ -78,8 +78,8 @@ expressions and store variables::
|
|||||||
def newline(self, t):
|
def newline(self, t):
|
||||||
self.lineno += t.value.count('\n')
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
print("Illegal character '%s'" % value[0])
|
print("Illegal character '%s'" % t.value[0])
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
class CalcParser(Parser):
|
class CalcParser(Parser):
|
||||||
|
17
docs/sly.rst
17
docs/sly.rst
@ -350,15 +350,15 @@ Error handling
|
|||||||
If a bad character is encountered while lexing, tokenizing will stop.
|
If a bad character is encountered while lexing, tokenizing will stop.
|
||||||
However, you can add an ``error()`` method to handle lexing errors
|
However, you can add an ``error()`` method to handle lexing errors
|
||||||
that occur when illegal characters are detected. The error method
|
that occur when illegal characters are detected. The error method
|
||||||
receives a string containing all remaining untokenized text. A
|
receives a ``Token`` where the ``value`` attribute contains all
|
||||||
typical handler might look at this text and skip ahead in some manner.
|
remaining untokenized text. A typical handler might look at this text
|
||||||
For example::
|
and skip ahead in some manner. For example::
|
||||||
|
|
||||||
class MyLexer(Lexer):
|
class MyLexer(Lexer):
|
||||||
...
|
...
|
||||||
# Error handling rule
|
# Error handling rule
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
print("Illegal character '%s'" % value[0])
|
print("Illegal character '%s'" % t.value[0])
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
In this case, we print the offending character and skip ahead
|
In this case, we print the offending character and skip ahead
|
||||||
@ -367,6 +367,13 @@ parser is often a hard problem. An error handler might scan ahead
|
|||||||
to a logical synchronization point such as a semicolon, a blank line,
|
to a logical synchronization point such as a semicolon, a blank line,
|
||||||
or similar landmark.
|
or similar landmark.
|
||||||
|
|
||||||
|
If the ``error()`` method also returns the passed token, it will
|
||||||
|
show up as an ``ERROR`` token in the resulting token stream. This
|
||||||
|
might be useful if the parser wants to see error tokens for some
|
||||||
|
reason--perhaps for the purposes of improved error messages or
|
||||||
|
some other kind of error handling.
|
||||||
|
|
||||||
|
|
||||||
A More Complete Example
|
A More Complete Example
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
@ -43,8 +43,8 @@ class CalcLexer(Lexer):
|
|||||||
def newline(self, t):
|
def newline(self, t):
|
||||||
self.lineno += t.value.count('\n')
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
print("Illegal character '%s'" % value[0])
|
print("Illegal character '%s'" % t.value[0])
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
class CalcParser(Parser):
|
class CalcParser(Parser):
|
||||||
|
@ -26,8 +26,8 @@ class CalcLexer(Lexer):
|
|||||||
def newline(self, t):
|
def newline(self, t):
|
||||||
self.lineno += t.value.count('\n')
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
print("Illegal character '%s'" % value[0])
|
print("Illegal character '%s'" % t.value[0])
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
class CalcParser(Parser):
|
class CalcParser(Parser):
|
||||||
|
2
setup.py
2
setup.py
@ -11,7 +11,7 @@ setup(name = "sly",
|
|||||||
SLY is an implementation of lex and yacc for Python 3.
|
SLY is an implementation of lex and yacc for Python 3.
|
||||||
""",
|
""",
|
||||||
license="""BSD""",
|
license="""BSD""",
|
||||||
version = "0.1",
|
version = "0.2",
|
||||||
author = "David Beazley",
|
author = "David Beazley",
|
||||||
author_email = "dave@dabeaz.com",
|
author_email = "dave@dabeaz.com",
|
||||||
maintainer = "David Beazley",
|
maintainer = "David Beazley",
|
||||||
|
13
sly/lex.py
13
sly/lex.py
@ -31,7 +31,7 @@
|
|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
__version__ = '0.1'
|
__version__ = '0.2'
|
||||||
__all__ = ['Lexer', 'LexerStateChange']
|
__all__ = ['Lexer', 'LexerStateChange']
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
# A lexing error
|
# A lexing error
|
||||||
self.index = index
|
self.index = index
|
||||||
self.lineno = lineno
|
self.lineno = lineno
|
||||||
self.error(text[index:])
|
tok.type = 'ERROR'
|
||||||
|
tok.value = text[index:]
|
||||||
|
tok = self.error(tok)
|
||||||
|
if tok is not None:
|
||||||
|
yield tok
|
||||||
|
|
||||||
index = self.index
|
index = self.index
|
||||||
lineno = self.lineno
|
lineno = self.lineno
|
||||||
|
|
||||||
@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
self.lineno = lineno
|
self.lineno = lineno
|
||||||
|
|
||||||
# Default implementations of the error handler. May be changed in subclasses
|
# Default implementations of the error handler. May be changed in subclasses
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index)
|
raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index)
|
||||||
|
@ -35,7 +35,7 @@ import sys
|
|||||||
import inspect
|
import inspect
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
__version__ = '0.1'
|
__version__ = '0.2'
|
||||||
__all__ = [ 'Parser' ]
|
__all__ = [ 'Parser' ]
|
||||||
|
|
||||||
class YaccError(Exception):
|
class YaccError(Exception):
|
||||||
|
@ -47,9 +47,11 @@ class CalcLexer(Lexer):
|
|||||||
t.value = t.value.upper()
|
t.value = t.value.upper()
|
||||||
return t
|
return t
|
||||||
|
|
||||||
def error(self, value):
|
def error(self, t):
|
||||||
self.errors.append(value)
|
self.errors.append(t.value)
|
||||||
self.index += 1
|
self.index += 1
|
||||||
|
if hasattr(self, 'return_error'):
|
||||||
|
return t
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.errors = []
|
self.errors = []
|
||||||
@ -85,6 +87,17 @@ def test_error():
|
|||||||
assert vals == [123, '+', '-']
|
assert vals == [123, '+', '-']
|
||||||
assert lexer.errors == [ ':+-' ]
|
assert lexer.errors == [ ':+-' ]
|
||||||
|
|
||||||
|
# Test error token return handling
|
||||||
|
def test_error_return():
|
||||||
|
lexer = CalcLexer()
|
||||||
|
lexer.return_error = True
|
||||||
|
toks = list(lexer.tokenize('123 :+-'))
|
||||||
|
types = [t.type for t in toks]
|
||||||
|
vals = [t.value for t in toks]
|
||||||
|
assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
|
||||||
|
assert vals == [123, ':+-', '+', '-']
|
||||||
|
assert lexer.errors == [ ':+-' ]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user