Changes to token specification. More metamagic
This commit is contained in:
parent
b74e7223ce
commit
b088d9b2ce
59
CHANGES
59
CHANGES
@ -1,5 +1,64 @@
|
||||
Version 0.3
|
||||
-----------
|
||||
1/27/2018 Tokens no longer have to be specified as strings. For example, you
|
||||
can now write:
|
||||
|
||||
from sly import Lexer
|
||||
|
||||
class TheLexer(Lexer):
|
||||
tokens = { ID, NUMBER, PLUS, MINUS }
|
||||
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
NUMBER = r'\d+'
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
|
||||
This convention also carries over to the parser for things such
|
||||
as precedence specifiers:
|
||||
|
||||
from sly import Parser
|
||||
class TheParser(Parser):
|
||||
tokens = TheLexer.tokens
|
||||
|
||||
precedence = (
|
||||
('left', PLUS, MINUS),
|
||||
('left', TIMES, DIVIDE),
|
||||
('right', UMINUS),
|
||||
)
|
||||
...
|
||||
|
||||
Nevermind the fact that ID, NUMBER, PLUS, and MINUS appear to be
|
||||
undefined identifiers. It all works.
|
||||
|
||||
1/27/2018 Tokens now allow special-case remapping. For example:
|
||||
|
||||
from sly import Lexer
|
||||
|
||||
class TheLexer(Lexer):
|
||||
tokens = { ID, IF, ELSE, WHILE, NUMBER, PLUS, MINUS }
|
||||
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
ID['if'] = IF
|
||||
ID['else'] = ELSE
|
||||
ID['while'] = WHILE
|
||||
|
||||
NUMBER = r'\d+'
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
|
||||
In this code, the ID rule matches any identifier. However,
|
||||
special cases have been made for IF, ELSE, and WHILE tokens.
|
||||
Previously, this had to be handled in a special action method
|
||||
such as this:
|
||||
|
||||
def ID(self, t):
|
||||
if t.value in { 'if', 'else', 'while' }:
|
||||
t.type = t.value.upper()
|
||||
return t
|
||||
|
||||
Nevermind the fact that the syntax appears to suggest that strings
|
||||
work as a kind of mutable mapping.
|
||||
|
||||
1/16/2018 Usability improvement on Lexer class. Regular expression rules
|
||||
specified as strings that don't match any name in tokens are
|
||||
now reported as errors.
|
||||
|
@ -1,6 +1,6 @@
|
||||
SLY (Sly Lex-Yacc) Version 0.2
|
||||
SLY (Sly Lex-Yacc) Version 0.3
|
||||
|
||||
Copyright (C) 2016-2017
|
||||
Copyright (C) 2016-2018
|
||||
David M. Beazley (Dabeaz LLC)
|
||||
All rights reserved.
|
||||
|
||||
@ -85,9 +85,7 @@ expressions and store variables:
|
||||
from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
tokens = {
|
||||
'NAME', 'NUMBER',
|
||||
}
|
||||
tokens = { NAME, NUMBER }
|
||||
ignore = ' \t'
|
||||
literals = { '=', '+', '-', '*', '/', '(', ')' }
|
||||
|
||||
|
@ -60,9 +60,7 @@ expressions and store variables::
|
||||
from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
tokens = {
|
||||
'NAME', 'NUMBER',
|
||||
}
|
||||
tokens = { NAME, NUMBER }
|
||||
ignore = ' \t'
|
||||
literals = { '=', '+', '-', '*', '/', '(', ')' }
|
||||
|
||||
|
165
docs/sly.rst
165
docs/sly.rst
@ -68,17 +68,8 @@ lexer that tokenizes the above text::
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
'ID',
|
||||
'NUMBER',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'TIMES',
|
||||
'DIVIDE',
|
||||
'ASSIGN',
|
||||
'LPAREN',
|
||||
'RPAREN',
|
||||
}
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES,
|
||||
DIVIDE, ASSIGN, LPAREN, RPAREN }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
ignore = ' \t'
|
||||
@ -131,19 +122,12 @@ In the example, the following code specified the token names::
|
||||
class CalcLexer(Lexer):
|
||||
...
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
'ID',
|
||||
'NUMBER',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'TIMES',
|
||||
'DIVIDE',
|
||||
'ASSIGN',
|
||||
'LPAREN',
|
||||
'RPAREN',
|
||||
}
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES,
|
||||
DIVIDE, ASSIGN, LPAREN, RPAREN }
|
||||
...
|
||||
|
||||
Token names should be specified using all-caps as shown.
|
||||
|
||||
Specification of token match patterns
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@ -167,7 +151,7 @@ short tokens. For example, if you wanted to have separate tokens for
|
||||
example::
|
||||
|
||||
class MyLexer(Lexer):
|
||||
tokens = {'ASSIGN', 'EQ', ...}
|
||||
tokens = { ASSIGN, EQ, ...}
|
||||
...
|
||||
EQ = r'==' # MUST APPEAR FIRST! (LONGER)
|
||||
ASSIGN = r'='
|
||||
@ -251,12 +235,10 @@ Instead of using the ``@_()`` decorator, you can also write a method
|
||||
that matches the same name as a token previously specified as a
|
||||
string. For example::
|
||||
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
NUMBER = r'\d+'
|
||||
...
|
||||
def ID(self, t):
|
||||
reserved = { 'if', 'else', 'while', 'for' }
|
||||
if t.value in reserved:
|
||||
t.type = t.value.upper()
|
||||
def NUMBER(self, t):
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
This is potentially useful trick for debugging a lexer. You can temporarily
|
||||
@ -264,6 +246,36 @@ attach a method a token and have it execute when the token is encountered.
|
||||
If you later take the method away, the lexer will revert back to its original
|
||||
behavior.
|
||||
|
||||
Token Remapping
|
||||
^^^^^^^^^^^^^^^
|
||||
|
||||
Occasionally, you might need to remap tokens based on special cases.
|
||||
Consider the case of matching identifiers such as "abc", "python", or "guido".
|
||||
Certain identifiers such as "if", "else", and "while" might need to be
|
||||
treated as special keywords. To handle this, include token remapping rules when
|
||||
writing the lexer like this::
|
||||
|
||||
# calclex.py
|
||||
|
||||
from sly import Lexer
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
tokens = { ID, IF, ELSE, WHILE }
|
||||
# String containing ignored characters (between tokens)
|
||||
ignore = ' \t'
|
||||
|
||||
# Base ID rule
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
|
||||
# Special cases
|
||||
ID['if'] = IF
|
||||
ID['else'] = ELSE
|
||||
ID['while'] = WHILE
|
||||
|
||||
When parsing an identifier, the special cases will remap certain matching
|
||||
values to a new token type. For example, if the value of an identifier is
|
||||
"if" above, an ``IF`` token will be generated.
|
||||
|
||||
Line numbers and position tracking
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
@ -385,26 +397,11 @@ into practice::
|
||||
from sly import Lexer
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of reserved names (language keywords)
|
||||
reserved_words = { 'WHILE', 'IF', 'ELSE', 'PRINT' }
|
||||
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
'NUMBER',
|
||||
'ID',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'TIMES',
|
||||
'DIVIDE',
|
||||
'ASSIGN',
|
||||
'EQ',
|
||||
'LT',
|
||||
'LE',
|
||||
'GT',
|
||||
'GE',
|
||||
'NE',
|
||||
*reserved_words,
|
||||
}
|
||||
tokens = { NUMBER, ID, WHILE, IF, ELSE, PRINT,
|
||||
PLUS, MINUS, TIMES, DIVIDE, ASSIGN,
|
||||
EQ, LT, LE, GT, GE, NE }
|
||||
|
||||
|
||||
literals = { '(', ')', '{', '}', ';' }
|
||||
|
||||
@ -429,12 +426,12 @@ into practice::
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
@_(r'[a-zA-Z_][a-zA-Z0-9_]*')
|
||||
def ID(self, t):
|
||||
# Check if name matches a reserved word (change token type if true)
|
||||
if t.value.upper() in self.reserved_words:
|
||||
t.type = t.value.upper()
|
||||
return t
|
||||
# Identifiers and keywords
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
ID['if'] = IF
|
||||
ID['else'] = ELSE
|
||||
ID['while'] = WHILE
|
||||
ID['print'] = PRINT
|
||||
|
||||
ignore_comment = r'\#.*'
|
||||
|
||||
@ -443,8 +440,8 @@ into practice::
|
||||
def ignore_newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def error(self, value):
|
||||
print('Line %d: Bad character %r' % (self.lineno, value[0]))
|
||||
def error(self, t):
|
||||
print('Line %d: Bad character %r' % (self.lineno, t.value[0]))
|
||||
self.index += 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
@ -462,27 +459,27 @@ into practice::
|
||||
|
||||
If you run this code, you'll get output that looks like this::
|
||||
|
||||
Token(ID, 'x', 3, 12)
|
||||
Token(ASSIGN, '=', 3, 14)
|
||||
Token(NUMBER, 0, 3, 16)
|
||||
Token(;, ';', 3, 17)
|
||||
Token(WHILE, 'while', 4, 19)
|
||||
Token((, '(', 4, 25)
|
||||
Token(ID, 'x', 4, 26)
|
||||
Token(LT, '<', 4, 28)
|
||||
Token(NUMBER, 10, 4, 30)
|
||||
Token(), ')', 4, 32)
|
||||
Token({, '{', 4, 34)
|
||||
Token(PRINT, 'print', 5, 40)
|
||||
Token(ID, 'x', 5, 46)
|
||||
Token(type='ID', value='x', lineno=3, index=20)
|
||||
Token(type='ASSIGN', value='=', lineno=3, index=22)
|
||||
Token(type='NUMBER', value=0, lineno=3, index=24)
|
||||
Token(type=';', value=';', lineno=3, index=25)
|
||||
Token(type='WHILE', value='while', lineno=4, index=31)
|
||||
Token(type='(', value='(', lineno=4, index=37)
|
||||
Token(type='ID', value='x', lineno=4, index=38)
|
||||
Token(type='LT', value='<', lineno=4, index=40)
|
||||
Token(type='NUMBER', value=10, lineno=4, index=42)
|
||||
Token(type=')', value=')', lineno=4, index=44)
|
||||
Token(type='{', value='{', lineno=4, index=46)
|
||||
Token(type='PRINT', value='print', lineno=5, index=56)
|
||||
Token(type='ID', value='x', lineno=5, index=62)
|
||||
Line 5: Bad character ':'
|
||||
Token(ID, 'x', 6, 53)
|
||||
Token(ASSIGN, '=', 6, 55)
|
||||
Token(ID, 'x', 6, 57)
|
||||
Token(PLUS, '+', 6, 59)
|
||||
Token(NUMBER, 1, 6, 61)
|
||||
Token(;, ';', 6, 62)
|
||||
Token(}, '}', 7, 64)
|
||||
Token(type='ID', value='x', lineno=6, index=73)
|
||||
Token(type='ASSIGN', value='=', lineno=6, index=75)
|
||||
Token(type='ID', value='x', lineno=6, index=77)
|
||||
Token(type='PLUS', value='+', lineno=6, index=79)
|
||||
Token(type='NUMBER', value=1, lineno=6, index=81)
|
||||
Token(type=';', value=';', lineno=6, index=82)
|
||||
Token(type='}', value='}', lineno=7, index=88)
|
||||
|
||||
Study this example closely. It might take a bit to digest, but all of the
|
||||
essential parts of writing a lexer are there. Tokens have to be specified
|
||||
@ -914,8 +911,8 @@ like this::
|
||||
class CalcParser(Parser):
|
||||
...
|
||||
precedence = (
|
||||
('left', 'PLUS', 'MINUS'),
|
||||
('left', 'TIMES', 'DIVIDE'),
|
||||
('left', PLUS, MINUS),
|
||||
('left', TIMES, DIVIDE),
|
||||
)
|
||||
|
||||
# Rules where precedence is applied
|
||||
@ -1004,9 +1001,9 @@ like this::
|
||||
class CalcParser(Parser):
|
||||
...
|
||||
precedence = (
|
||||
('left', 'PLUS', 'MINUS'),
|
||||
('left', 'TIMES', 'DIVIDE'),
|
||||
('right', 'UMINUS'), # Unary minus operator
|
||||
('left', PLUS, MINUS),
|
||||
('left', TIMES, DIVIDE),
|
||||
('right', UMINUS), # Unary minus operator
|
||||
)
|
||||
|
||||
Now, in the grammar file, you write the unary minus rule like this::
|
||||
@ -1034,10 +1031,10 @@ operators like ``<`` and ``>`` but you didn't want combinations like
|
||||
class MyParser(Parser):
|
||||
...
|
||||
precedence = (
|
||||
('nonassoc', 'LESSTHAN', 'GREATERTHAN'), # Nonassociative operators
|
||||
('left', 'PLUS', 'MINUS'),
|
||||
('left', 'TIMES', 'DIVIDE'),
|
||||
('right', 'UMINUS'), # Unary minus operator
|
||||
('nonassoc', LESSTHAN, GREATERTHAN), # Nonassociative operators
|
||||
('left', PLUS, MINUS),
|
||||
('left', TIMES, DIVIDE),
|
||||
('right', UMINUS), # Unary minus operator
|
||||
)
|
||||
|
||||
If you do this, the occurrence of input text such as ``a < b < c``
|
||||
|
@ -9,28 +9,16 @@ from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
'ID',
|
||||
'NUMBER',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'TIMES',
|
||||
'DIVIDE',
|
||||
'ASSIGN',
|
||||
'LPAREN',
|
||||
'RPAREN',
|
||||
}
|
||||
tokens = { NUMBER, PLUS, MINUS, TIMES, DIVIDE, LPAREN, RPAREN }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
ignore = ' \t'
|
||||
|
||||
# Regular expression rules for tokens
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
ASSIGN = r'='
|
||||
LPAREN = r'\('
|
||||
RPAREN = r'\)'
|
||||
|
||||
|
@ -8,9 +8,7 @@ sys.path.insert(0, "../..")
|
||||
from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
tokens = {
|
||||
'NAME', 'NUMBER',
|
||||
}
|
||||
tokens = { NAME, NUMBER }
|
||||
ignore = ' \t'
|
||||
literals = { '=', '+', '-', '*', '/', '(', ')' }
|
||||
|
||||
@ -36,7 +34,7 @@ class CalcParser(Parser):
|
||||
precedence = (
|
||||
('left', '+', '-'),
|
||||
('left', '*', '/'),
|
||||
('right', 'UMINUS'),
|
||||
('right', UMINUS),
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
|
56
sly/lex.py
56
sly/lex.py
@ -1,7 +1,7 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# sly: lex.py
|
||||
#
|
||||
# Copyright (C) 2016
|
||||
# Copyright (C) 2016 - 2018
|
||||
# David M. Beazley (Dabeaz LLC)
|
||||
# All rights reserved.
|
||||
#
|
||||
@ -31,11 +31,10 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
__version__ = '0.2'
|
||||
__version__ = '0.3'
|
||||
__all__ = ['Lexer', 'LexerStateChange']
|
||||
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
|
||||
class LexError(Exception):
|
||||
'''
|
||||
@ -78,20 +77,41 @@ class Token(object):
|
||||
def __repr__(self):
|
||||
return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
|
||||
|
||||
class LexerMetaDict(OrderedDict):
|
||||
class TokenStr(str):
|
||||
@staticmethod
|
||||
def __new__(cls, value):
|
||||
self = super().__new__(cls, value)
|
||||
self.remap = { }
|
||||
return self
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self.remap[key] = value
|
||||
|
||||
class LexerMetaDict(dict):
|
||||
'''
|
||||
Special dictionary that prohits duplicate definitions in lexer specifications.
|
||||
'''
|
||||
def __setitem__(self, key, value):
|
||||
if isinstance(value, str):
|
||||
value = TokenStr(value)
|
||||
|
||||
if key in self and not isinstance(value, property):
|
||||
if isinstance(self[key], str):
|
||||
prior = self[key]
|
||||
if isinstance(prior, str):
|
||||
if callable(value):
|
||||
value.pattern = self[key]
|
||||
value.pattern = prior
|
||||
value.remap = getattr(prior, 'remap', None)
|
||||
else:
|
||||
raise AttributeError(f'Name {key} redefined')
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key not in self and key.isupper() and key[:1] != '_':
|
||||
return key
|
||||
else:
|
||||
return super().__getitem__(key)
|
||||
|
||||
class LexerMeta(type):
|
||||
'''
|
||||
Metaclass for collecting lexing rules
|
||||
@ -114,7 +134,12 @@ class LexerMeta(type):
|
||||
|
||||
def __new__(meta, clsname, bases, attributes):
|
||||
del attributes['_']
|
||||
remapping = { key: val.remap for key, val in attributes.items()
|
||||
if getattr(val, 'remap', None) }
|
||||
attributes = { key: str(val) if isinstance(val, TokenStr) else val
|
||||
for key, val in attributes.items() }
|
||||
cls = super().__new__(meta, clsname, bases, attributes)
|
||||
cls._remapping = remapping
|
||||
cls._build(list(attributes.items()))
|
||||
return cls
|
||||
|
||||
@ -159,6 +184,16 @@ class Lexer(metaclass=LexerMeta):
|
||||
cls._ignored_tokens = set(cls._ignored_tokens)
|
||||
cls._token_funcs = dict(cls._token_funcs)
|
||||
|
||||
# Build a set of all remapped tokens
|
||||
remapped_tokens = set()
|
||||
for toks in cls._remapping.values():
|
||||
remapped_tokens.update(toks.values())
|
||||
|
||||
undefined = remapped_tokens - cls._token_names
|
||||
if undefined:
|
||||
missing = ', '.join(undefined)
|
||||
raise LexerBuildError(f'{missing} not included in token(s)')
|
||||
|
||||
parts = []
|
||||
for tokname, value in cls._collect_rules(definitions):
|
||||
if tokname.startswith('ignore_'):
|
||||
@ -169,8 +204,10 @@ class Lexer(metaclass=LexerMeta):
|
||||
pattern = value
|
||||
|
||||
elif callable(value):
|
||||
pattern = value.pattern
|
||||
cls._token_funcs[tokname] = value
|
||||
pattern = getattr(value, 'pattern', None)
|
||||
if not pattern:
|
||||
continue
|
||||
|
||||
# Form the regular expression component
|
||||
part = f'(?P<{tokname}>{pattern})'
|
||||
@ -209,7 +246,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
_ignore = self.ignore
|
||||
_token_funcs = self._token_funcs
|
||||
_literals = self._literals
|
||||
|
||||
_remapping = self._remapping
|
||||
self.text = text
|
||||
try:
|
||||
while True:
|
||||
@ -228,6 +265,9 @@ class Lexer(metaclass=LexerMeta):
|
||||
index = m.end()
|
||||
tok.value = m.group()
|
||||
tok.type = m.lastgroup
|
||||
if tok.type in _remapping:
|
||||
tok.type = _remapping[tok.type].get(tok.value, tok.type)
|
||||
|
||||
if tok.type in _token_funcs:
|
||||
self.index = index
|
||||
self.lineno = lineno
|
||||
|
20
sly/yacc.py
20
sly/yacc.py
@ -1,7 +1,7 @@
|
||||
# -----------------------------------------------------------------------------
|
||||
# sly: yacc.py
|
||||
#
|
||||
# Copyright (C) 2016-2017
|
||||
# Copyright (C) 2016-2018
|
||||
# David M. Beazley (Dabeaz LLC)
|
||||
# All rights reserved.
|
||||
#
|
||||
@ -35,7 +35,7 @@ import sys
|
||||
import inspect
|
||||
from collections import OrderedDict, defaultdict
|
||||
|
||||
__version__ = '0.2'
|
||||
__version__ = '0.3'
|
||||
__all__ = [ 'Parser' ]
|
||||
|
||||
class YaccError(Exception):
|
||||
@ -55,12 +55,12 @@ ERROR_COUNT = 3 # Number of symbols that must be shifted to leave
|
||||
MAXINT = sys.maxsize
|
||||
|
||||
# This object is a stand-in for a logging object created by the
|
||||
# logging module. PLY will use this by default to create things
|
||||
# logging module. SLY will use this by default to create things
|
||||
# such as the parser.out file. If a user wants more detailed
|
||||
# information, they can create their own logging object and pass
|
||||
# it into PLY.
|
||||
# it into SLY.
|
||||
|
||||
class PlyLogger(object):
|
||||
class SlyLogger(object):
|
||||
def __init__(self, f):
|
||||
self.f = f
|
||||
|
||||
@ -1552,7 +1552,7 @@ def _collect_grammar_rules(func):
|
||||
|
||||
return grammar
|
||||
|
||||
class ParserMetaDict(OrderedDict):
|
||||
class ParserMetaDict(dict):
|
||||
'''
|
||||
Dictionary that allows decorated grammar rule functions to be overloaded
|
||||
'''
|
||||
@ -1560,6 +1560,12 @@ class ParserMetaDict(OrderedDict):
|
||||
if key in self and callable(value) and hasattr(value, 'rules'):
|
||||
value.next_func = self[key]
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key not in self and key.isupper() and key[:1] != '_':
|
||||
return key.upper()
|
||||
else:
|
||||
return super().__getitem__(key)
|
||||
|
||||
class ParserMeta(type):
|
||||
@classmethod
|
||||
@ -1582,7 +1588,7 @@ class ParserMeta(type):
|
||||
|
||||
class Parser(metaclass=ParserMeta):
|
||||
# Logging object where debugging/diagnostic messages are sent
|
||||
log = PlyLogger(sys.stderr)
|
||||
log = SlyLogger(sys.stderr)
|
||||
|
||||
# Debugging filename where parsetab.out data can be written
|
||||
debugfile = None
|
||||
|
@ -98,9 +98,94 @@ def test_error_return():
|
||||
assert vals == [123, ':+-', '+', '-']
|
||||
assert lexer.errors == [ ':+-' ]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class ModernCalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, LT, LE, IF, ELSE }
|
||||
literals = { '(', ')' }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
ignore = ' \t'
|
||||
|
||||
# Regular expression rules for tokens
|
||||
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
ID['if'] = IF
|
||||
ID['else'] = ELSE
|
||||
|
||||
NUMBER = r'\d+'
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
TIMES = r'\*'
|
||||
DIVIDE = r'/'
|
||||
ASSIGN = r'='
|
||||
LE = r'<='
|
||||
LT = r'<'
|
||||
|
||||
def NUMBER(self, t):
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
# Ignored text
|
||||
ignore_comment = r'\#.*'
|
||||
|
||||
@_(r'\n+')
|
||||
def ignore_newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
# Attached rule
|
||||
def ID(self, t):
|
||||
t.value = t.value.upper()
|
||||
return t
|
||||
|
||||
def error(self, t):
|
||||
self.errors.append(t.value)
|
||||
self.index += 1
|
||||
if hasattr(self, 'return_error'):
|
||||
return t
|
||||
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
|
||||
|
||||
# Test basic recognition of various tokens and literals
|
||||
def test_modern_tokens():
|
||||
lexer = ModernCalcLexer()
|
||||
toks = list(lexer.tokenize('abc if else 123 + - * / = < <= ( )'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['ID','IF','ELSE', 'NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
|
||||
assert vals == ['ABC','if','else', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
|
||||
|
||||
# Test ignored comments and newlines
|
||||
def test_modern_ignored():
|
||||
lexer = ModernCalcLexer()
|
||||
toks = list(lexer.tokenize('\n\n# A comment\n123\nabc\n'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
linenos = [t.lineno for t in toks]
|
||||
assert types == ['NUMBER', 'ID']
|
||||
assert vals == [123, 'ABC']
|
||||
assert linenos == [4,5]
|
||||
assert lexer.lineno == 6
|
||||
|
||||
# Test error handling
|
||||
def test_modern_error():
|
||||
lexer = ModernCalcLexer()
|
||||
toks = list(lexer.tokenize('123 :+-'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['NUMBER', 'PLUS', 'MINUS']
|
||||
assert vals == [123, '+', '-']
|
||||
assert lexer.errors == [ ':+-' ]
|
||||
|
||||
# Test error token return handling
|
||||
def test_modern_error_return():
|
||||
lexer = ModernCalcLexer()
|
||||
lexer.return_error = True
|
||||
toks = list(lexer.tokenize('123 :+-'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
|
||||
assert vals == [123, ':+-', '+', '-']
|
||||
assert lexer.errors == [ ':+-' ]
|
||||
|
||||
|
@ -3,16 +3,7 @@ from sly import Lexer, Parser
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
'ID',
|
||||
'NUMBER',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'TIMES',
|
||||
'DIVIDE',
|
||||
'ASSIGN',
|
||||
}
|
||||
|
||||
tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN }
|
||||
literals = { '(', ')' }
|
||||
|
||||
# String containing ignored characters between tokens
|
||||
@ -38,8 +29,8 @@ class CalcLexer(Lexer):
|
||||
def newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def error(self, value):
|
||||
self.errors.append(value)
|
||||
def error(self, t):
|
||||
self.errors.append(t.value[0])
|
||||
self.index += 1
|
||||
|
||||
def __init__(self):
|
||||
@ -49,9 +40,9 @@ class CalcParser(Parser):
|
||||
tokens = CalcLexer.tokens
|
||||
|
||||
precedence = (
|
||||
('left', 'PLUS', 'MINUS'),
|
||||
('left', 'TIMES', 'DIVIDE'),
|
||||
('right', 'UMINUS'),
|
||||
('left', PLUS, MINUS),
|
||||
('left', TIMES, DIVIDE),
|
||||
('right', UMINUS),
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
|
Loading…
Reference in New Issue
Block a user