Changes for 0.2
This commit is contained in:
		
							
								
								
									
										8
									
								
								CHANGES
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								CHANGES
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
				
			|||||||
 | 
					Version 0.2
 | 
				
			||||||
 | 
					-----------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					12/24/2017 The error(self, t) method of lexer objects now receives a
 | 
				
			||||||
 | 
					           token as input.  The value attribute of this token contains
 | 
				
			||||||
 | 
					           all remaining input text.  If the passed token is returned
 | 
				
			||||||
 | 
					           by error(), then it shows up in the token stream where
 | 
				
			||||||
 | 
					           can be processed by the parser.
 | 
				
			||||||
@@ -1,4 +1,4 @@
 | 
				
			|||||||
SLY (Sly Lex-Yacc)                   Version 0.1
 | 
					SLY (Sly Lex-Yacc)                   Version 0.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Copyright (C) 2016-2017
 | 
					Copyright (C) 2016-2017
 | 
				
			||||||
David M. Beazley (Dabeaz LLC)
 | 
					David M. Beazley (Dabeaz LLC)
 | 
				
			||||||
@@ -103,8 +103,8 @@ expressions and store variables:
 | 
				
			|||||||
        def newline(self, t):
 | 
					        def newline(self, t):
 | 
				
			||||||
            self.lineno += t.value.count('\n')
 | 
					            self.lineno += t.value.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def error(self, value):
 | 
					        def error(self, t):
 | 
				
			||||||
            print("Illegal character '%s'" % value[0])
 | 
					            print("Illegal character '%s'" % t.value[0])
 | 
				
			||||||
            self.index += 1
 | 
					            self.index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    class CalcParser(Parser):
 | 
					    class CalcParser(Parser):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -78,8 +78,8 @@ expressions and store variables::
 | 
				
			|||||||
        def newline(self, t):
 | 
					        def newline(self, t):
 | 
				
			||||||
            self.lineno += t.value.count('\n')
 | 
					            self.lineno += t.value.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def error(self, value):
 | 
					        def error(self, t):
 | 
				
			||||||
            print("Illegal character '%s'" % value[0])
 | 
					            print("Illegal character '%s'" % t.value[0])
 | 
				
			||||||
            self.index += 1
 | 
					            self.index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    class CalcParser(Parser):
 | 
					    class CalcParser(Parser):
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										17
									
								
								docs/sly.rst
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								docs/sly.rst
									
									
									
									
									
								
							@@ -350,15 +350,15 @@ Error handling
 | 
				
			|||||||
If a bad character is encountered while lexing, tokenizing will stop.
 | 
					If a bad character is encountered while lexing, tokenizing will stop.
 | 
				
			||||||
However, you can add an ``error()`` method to handle lexing errors
 | 
					However, you can add an ``error()`` method to handle lexing errors
 | 
				
			||||||
that occur when illegal characters are detected.  The error method
 | 
					that occur when illegal characters are detected.  The error method
 | 
				
			||||||
receives a string containing all remaining untokenized text.  A
 | 
					receives a ``Token`` where the ``value`` attribute contains all
 | 
				
			||||||
typical handler might look at this text and skip ahead in some manner.
 | 
					remaining untokenized text.  A typical handler might look at this text
 | 
				
			||||||
For example::
 | 
					and skip ahead in some manner.  For example::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    class MyLexer(Lexer):
 | 
					    class MyLexer(Lexer):
 | 
				
			||||||
        ...
 | 
					        ...
 | 
				
			||||||
        # Error handling rule
 | 
					        # Error handling rule
 | 
				
			||||||
        def error(self, value):
 | 
					        def error(self, t):
 | 
				
			||||||
            print("Illegal character '%s'" % value[0])
 | 
					            print("Illegal character '%s'" % t.value[0])
 | 
				
			||||||
            self.index += 1
 | 
					            self.index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
In this case, we print the offending character and skip ahead
 | 
					In this case, we print the offending character and skip ahead
 | 
				
			||||||
@@ -367,6 +367,13 @@ parser is often a hard problem.  An error handler might scan ahead
 | 
				
			|||||||
to a logical synchronization point such as a semicolon, a blank line,
 | 
					to a logical synchronization point such as a semicolon, a blank line,
 | 
				
			||||||
or similar landmark.
 | 
					or similar landmark.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If the ``error()`` method also returns the passed token, it will
 | 
				
			||||||
 | 
					show up as an ``ERROR`` token in the resulting token stream. This
 | 
				
			||||||
 | 
					might be useful if the parser wants to see error tokens for some
 | 
				
			||||||
 | 
					reason--perhaps for the purposes of improved error messages or
 | 
				
			||||||
 | 
					some other kind of error handling.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A More Complete Example
 | 
					A More Complete Example
 | 
				
			||||||
^^^^^^^^^^^^^^^^^^^^^^^
 | 
					^^^^^^^^^^^^^^^^^^^^^^^
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -43,8 +43,8 @@ class CalcLexer(Lexer):
 | 
				
			|||||||
    def newline(self, t):
 | 
					    def newline(self, t):
 | 
				
			||||||
        self.lineno += t.value.count('\n')
 | 
					        self.lineno += t.value.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def error(self, value):
 | 
					    def error(self, t):
 | 
				
			||||||
        print("Illegal character '%s'" % value[0])
 | 
					        print("Illegal character '%s'" % t.value[0])
 | 
				
			||||||
        self.index += 1
 | 
					        self.index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CalcParser(Parser):
 | 
					class CalcParser(Parser):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -26,8 +26,8 @@ class CalcLexer(Lexer):
 | 
				
			|||||||
    def newline(self, t):
 | 
					    def newline(self, t):
 | 
				
			||||||
        self.lineno += t.value.count('\n')
 | 
					        self.lineno += t.value.count('\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def error(self, value):
 | 
					    def error(self, t):
 | 
				
			||||||
        print("Illegal character '%s'" % value[0])
 | 
					        print("Illegal character '%s'" % t.value[0])
 | 
				
			||||||
        self.index += 1
 | 
					        self.index += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CalcParser(Parser):
 | 
					class CalcParser(Parser):
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							@@ -11,7 +11,7 @@ setup(name = "sly",
 | 
				
			|||||||
SLY is an implementation of lex and yacc for Python 3.
 | 
					SLY is an implementation of lex and yacc for Python 3.
 | 
				
			||||||
""",
 | 
					""",
 | 
				
			||||||
            license="""BSD""",
 | 
					            license="""BSD""",
 | 
				
			||||||
            version = "0.1",
 | 
					            version = "0.2",
 | 
				
			||||||
            author = "David Beazley",
 | 
					            author = "David Beazley",
 | 
				
			||||||
            author_email = "dave@dabeaz.com",
 | 
					            author_email = "dave@dabeaz.com",
 | 
				
			||||||
            maintainer = "David Beazley",
 | 
					            maintainer = "David Beazley",
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										13
									
								
								sly/lex.py
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								sly/lex.py
									
									
									
									
									
								
							@@ -31,7 +31,7 @@
 | 
				
			|||||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
					# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
# -----------------------------------------------------------------------------
 | 
					# -----------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__version__    = '0.1'
 | 
					__version__    = '0.2'
 | 
				
			||||||
__all__ = ['Lexer', 'LexerStateChange']
 | 
					__all__ = ['Lexer', 'LexerStateChange']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
@@ -251,7 +251,12 @@ class Lexer(metaclass=LexerMeta):
 | 
				
			|||||||
                            # A lexing error
 | 
					                            # A lexing error
 | 
				
			||||||
                            self.index = index
 | 
					                            self.index = index
 | 
				
			||||||
                            self.lineno = lineno
 | 
					                            self.lineno = lineno
 | 
				
			||||||
                            self.error(text[index:])
 | 
					                            tok.type = 'ERROR'
 | 
				
			||||||
 | 
					                            tok.value = text[index:]
 | 
				
			||||||
 | 
					                            tok = self.error(tok)
 | 
				
			||||||
 | 
					                            if tok is not None:
 | 
				
			||||||
 | 
					                                yield tok
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                            index = self.index
 | 
					                            index = self.index
 | 
				
			||||||
                            lineno = self.lineno
 | 
					                            lineno = self.lineno
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -267,5 +272,5 @@ class Lexer(metaclass=LexerMeta):
 | 
				
			|||||||
                self.lineno = lineno
 | 
					                self.lineno = lineno
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Default implementations of the error handler. May be changed in subclasses
 | 
					    # Default implementations of the error handler. May be changed in subclasses
 | 
				
			||||||
    def error(self, value):
 | 
					    def error(self, t):
 | 
				
			||||||
        raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index)
 | 
					        raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', value, self.index)
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,7 +35,7 @@ import sys
 | 
				
			|||||||
import inspect
 | 
					import inspect
 | 
				
			||||||
from collections import OrderedDict, defaultdict
 | 
					from collections import OrderedDict, defaultdict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__version__    = '0.1'
 | 
					__version__    = '0.2'
 | 
				
			||||||
__all__        = [ 'Parser' ]
 | 
					__all__        = [ 'Parser' ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class YaccError(Exception):
 | 
					class YaccError(Exception):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -47,9 +47,11 @@ class CalcLexer(Lexer):
 | 
				
			|||||||
        t.value = t.value.upper()
 | 
					        t.value = t.value.upper()
 | 
				
			||||||
        return t
 | 
					        return t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def error(self, value):
 | 
					    def error(self, t):
 | 
				
			||||||
        self.errors.append(value)
 | 
					        self.errors.append(t.value)
 | 
				
			||||||
        self.index += 1
 | 
					        self.index += 1
 | 
				
			||||||
 | 
					        if hasattr(self, 'return_error'):
 | 
				
			||||||
 | 
					            return t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
        self.errors = []
 | 
					        self.errors = []
 | 
				
			||||||
@@ -85,6 +87,17 @@ def test_error():
 | 
				
			|||||||
    assert vals == [123, '+', '-']
 | 
					    assert vals == [123, '+', '-']
 | 
				
			||||||
    assert lexer.errors == [ ':+-' ]
 | 
					    assert lexer.errors == [ ':+-' ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Test error token return handling
 | 
				
			||||||
 | 
					def test_error_return():
 | 
				
			||||||
 | 
					    lexer = CalcLexer()
 | 
				
			||||||
 | 
					    lexer.return_error = True
 | 
				
			||||||
 | 
					    toks = list(lexer.tokenize('123 :+-'))
 | 
				
			||||||
 | 
					    types = [t.type for t in toks]
 | 
				
			||||||
 | 
					    vals = [t.value for t in toks]
 | 
				
			||||||
 | 
					    assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
 | 
				
			||||||
 | 
					    assert vals == [123, ':+-', '+', '-']
 | 
				
			||||||
 | 
					    assert lexer.errors == [ ':+-' ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user