Compare commits
10 Commits
f8fcbb080c
...
539a85a5d5
Author | SHA1 | Date | |
---|---|---|---|
![]() |
539a85a5d5 | ||
![]() |
069f6d7766 | ||
![]() |
33d4f5afc0 | ||
![]() |
4000988231 | ||
![]() |
b453ea1854 | ||
![]() |
004df26293 | ||
![]() |
dbcf6d0f7f | ||
![]() |
dd71d70882 | ||
![]() |
62203d8b75 | ||
![]() |
cd9014eda2 |
21
CHANGES
21
CHANGES
@ -1,6 +1,25 @@
|
||||
In Progress
|
||||
Version 0.5
|
||||
-----------
|
||||
10/25/2022 ***IMPORTANT NOTE*** This is the last release to be made
|
||||
on PyPi. If you want the latest version go to
|
||||
https://github.com/dabeaz/sly.
|
||||
|
||||
09/06/2022 Modernization of the packaging infrastructure. Slight
|
||||
project reorganization.
|
||||
|
||||
03/25/2022 Added automatic location tracking to the parser. Use
|
||||
Parser.line_position(value) to return the line number
|
||||
and Parser.index_position(value) to return a (start, end)
|
||||
index pair. value is *any* object returned by one of
|
||||
the various methods in the parser definition. Typically,
|
||||
it would be a AST node. The parser tracks the data using
|
||||
the value of id(value).
|
||||
|
||||
03/25/2022 Added .end attribute to tokens that specify the ending
|
||||
index of the matching text. This is used to do more
|
||||
precise location tracking for the purpose of issuing
|
||||
more useful error messages.
|
||||
|
||||
05/09/2020 Experimental support for EBNF choices. For example:
|
||||
|
||||
@('term { PLUS|MINUS term }')
|
||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
SLY (Sly Lex-Yacc)
|
||||
|
||||
Copyright (C) 2016-2019
|
||||
Copyright (C) 2016-2022
|
||||
David M. Beazley (Dabeaz LLC)
|
||||
All rights reserved.
|
||||
|
||||
|
@ -1,2 +1,4 @@
|
||||
include Makefile CONTRIBUTING.md
|
||||
recursive-include example *
|
||||
recursive-include tests *
|
||||
recursive-include docs *
|
||||
|
21
Makefile
Normal file
21
Makefile
Normal file
@ -0,0 +1,21 @@
|
||||
PYTHON=python3
|
||||
VENV=.venv
|
||||
|
||||
# Setup and install all of the required tools for building, testing,
|
||||
# and deploying
|
||||
setup::
|
||||
rm -rf $(VENV)
|
||||
$(PYTHON) -m venv $(VENV)
|
||||
./$(VENV)/bin/python -m pip install pytest
|
||||
./$(VENV)/bin/python -m pip install pytest-cov
|
||||
./$(VENV)/bin/python -m pip install build
|
||||
./$(VENV)/bin/python -m pip install twine
|
||||
|
||||
# Run unit tests
|
||||
test::
|
||||
./$(VENV)/bin/python -m pip install .
|
||||
./$(VENV)/bin/python -m pytest --cov
|
||||
|
||||
# Build an artifact suitable for installing with pip
|
||||
build::
|
||||
./$(VENV)/bin/python -m build
|
10
README.rst
10
README.rst
@ -33,6 +33,16 @@ SLY is a modern library for performing lexing and parsing. It
|
||||
implements the LALR(1) parsing algorithm, commonly used for
|
||||
parsing and compiling various programming languages.
|
||||
|
||||
Important Notice : October 11, 2022
|
||||
-----------------------------------
|
||||
The SLY project is no longer making package-installable releases.
|
||||
It's fully functional, but if choose to use it, you should
|
||||
vendor the code into your application. SLY has zero-dependencies.
|
||||
Although I am semi-retiring the project, I will respond to
|
||||
bug reports and still may decide to make future changes to it
|
||||
depending on my mood. I'd like to thank everyone who
|
||||
has contributed to it over the years. --Dave
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
|
@ -138,12 +138,6 @@ names of the tokens provided in the ``tokens`` set. For example::
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
|
||||
Regular expression patterns are compiled using the ``re.VERBOSE`` flag
|
||||
which can be used to help readability. However,
|
||||
unescaped whitespace is ignored and comments are allowed in this mode.
|
||||
If your pattern involves whitespace, make sure you use ``\s``. If you
|
||||
need to match the ``#`` character, use ``[#]`` or ``\#``.
|
||||
|
||||
Tokens are matched in the same order that patterns are listed in the
|
||||
``Lexer`` class. Longer tokens always need to be specified before
|
||||
short tokens. For example, if you wanted to have separate tokens for
|
||||
|
3
pyproject.toml
Normal file
3
pyproject.toml
Normal file
@ -0,0 +1,3 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
18
setup.cfg
Normal file
18
setup.cfg
Normal file
@ -0,0 +1,18 @@
|
||||
[metadata]
|
||||
name = sly
|
||||
version = 0.5
|
||||
url = https://github.com/dabeaz/sly
|
||||
author = David Beazley
|
||||
author_email = "David Beazley" <dave@dabeaz.com>
|
||||
description = "SLY - Sly Lex Yacc"
|
||||
long_description = "SLY is an implementation of lex and yacc. No longer maintained on PyPI. Latest version on GitHub."
|
||||
license = BSD-3-Clause
|
||||
license_files = LICENSE
|
||||
classifiers =
|
||||
License :: OSI Approved :: BSD License
|
||||
|
||||
[options]
|
||||
package_dir =
|
||||
=src
|
||||
|
||||
packages = sly
|
28
setup.py
28
setup.py
@ -1,28 +0,0 @@
|
||||
try:
|
||||
from setuptools import setup
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
|
||||
tests_require = ['pytest', 'regex']
|
||||
|
||||
setup(name = "sly",
|
||||
description="SLY - Sly Lex Yacc",
|
||||
long_description = """
|
||||
SLY is an implementation of lex and yacc for Python 3.
|
||||
""",
|
||||
license="""BSD""",
|
||||
version = "0.4",
|
||||
author = "David Beazley",
|
||||
author_email = "dave@dabeaz.com",
|
||||
maintainer = "David Beazley",
|
||||
maintainer_email = "dave@dabeaz.com",
|
||||
url = "https://github.com/dabeaz/sly",
|
||||
packages = ['sly'],
|
||||
tests_require = tests_require,
|
||||
extras_require = {
|
||||
'test': tests_require,
|
||||
},
|
||||
classifiers = [
|
||||
'Programming Language :: Python :: 3',
|
||||
]
|
||||
)
|
@ -2,5 +2,5 @@
|
||||
from .lex import *
|
||||
from .yacc import *
|
||||
|
||||
__version__ = "0.4"
|
||||
__version__ = "0.5"
|
||||
__all__ = [ *lex.__all__, *yacc.__all__ ]
|
@ -73,9 +73,9 @@ class Token(object):
|
||||
'''
|
||||
Representation of a single token.
|
||||
'''
|
||||
__slots__ = ('type', 'value', 'lineno', 'index')
|
||||
__slots__ = ('type', 'value', 'lineno', 'index', 'end')
|
||||
def __repr__(self):
|
||||
return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
|
||||
return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})'
|
||||
|
||||
class TokenStr(str):
|
||||
@staticmethod
|
||||
@ -406,7 +406,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
tok.index = index
|
||||
m = _master_re.match(text, index)
|
||||
if m:
|
||||
index = m.end()
|
||||
tok.end = index = m.end()
|
||||
tok.value = m.group()
|
||||
tok.type = m.lastgroup
|
||||
|
||||
@ -431,6 +431,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
# No match, see if the character is in literals
|
||||
if text[index] in _literals:
|
||||
tok.value = text[index]
|
||||
tok.end = index + 1
|
||||
tok.type = tok.value
|
||||
index += 1
|
||||
yield tok
|
||||
@ -442,6 +443,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
tok.value = text[index:]
|
||||
tok = self.error(tok)
|
||||
if tok is not None:
|
||||
tok.end = self.index
|
||||
yield tok
|
||||
|
||||
index = self.index
|
@ -126,8 +126,6 @@ class YaccProduction:
|
||||
@property
|
||||
def lineno(self):
|
||||
for tok in self._slice:
|
||||
if isinstance(tok, YaccSymbol):
|
||||
continue
|
||||
lineno = getattr(tok, 'lineno', None)
|
||||
if lineno:
|
||||
return lineno
|
||||
@ -136,13 +134,20 @@ class YaccProduction:
|
||||
@property
|
||||
def index(self):
|
||||
for tok in self._slice:
|
||||
if isinstance(tok, YaccSymbol):
|
||||
continue
|
||||
index = getattr(tok, 'index', None)
|
||||
if index is not None:
|
||||
return index
|
||||
raise AttributeError('No index attribute found')
|
||||
|
||||
@property
|
||||
def end(self):
|
||||
result = None
|
||||
for tok in self._slice:
|
||||
r = getattr(tok, 'end', None)
|
||||
if r:
|
||||
result = r
|
||||
return result
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name in self._namemap:
|
||||
return self._namemap[name](self._slice)
|
||||
@ -1806,12 +1811,6 @@ class ParserMeta(type):
|
||||
@classmethod
|
||||
def __prepare__(meta, *args, **kwargs):
|
||||
d = ParserMetaDict()
|
||||
# def _(rule, *extra):
|
||||
# rules = [rule, *extra]
|
||||
# def decorate(func):
|
||||
# func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||
# return func
|
||||
# return decorate
|
||||
d['_'] = _decorator
|
||||
return d
|
||||
|
||||
@ -1822,6 +1821,9 @@ class ParserMeta(type):
|
||||
return cls
|
||||
|
||||
class Parser(metaclass=ParserMeta):
|
||||
# Automatic tracking of position information
|
||||
track_positions = True
|
||||
|
||||
# Logging object where debugging/diagnostic messages are sent
|
||||
log = SlyLogger(sys.stderr)
|
||||
|
||||
@ -2076,9 +2078,15 @@ class Parser(metaclass=ParserMeta):
|
||||
self.tokens = tokens
|
||||
self.statestack = statestack = [] # Stack of parsing states
|
||||
self.symstack = symstack = [] # Stack of grammar symbols
|
||||
pslice._stack = symstack # Associate the stack with the production
|
||||
pslice._stack = symstack # Associate the stack with the production
|
||||
self.restart()
|
||||
|
||||
# Set up position tracking
|
||||
track_positions = self.track_positions
|
||||
if not hasattr(self, '_line_positions'):
|
||||
self._line_positions = { } # id: -> lineno
|
||||
self._index_positions = { } # id: -> (start, end)
|
||||
|
||||
errtoken = None # Err token
|
||||
while True:
|
||||
# Get the next symbol on the input. If a lookahead symbol
|
||||
@ -2093,7 +2101,7 @@ class Parser(metaclass=ParserMeta):
|
||||
if not lookahead:
|
||||
lookahead = YaccSymbol()
|
||||
lookahead.type = '$end'
|
||||
|
||||
|
||||
# Check the action table
|
||||
ltype = lookahead.type
|
||||
t = actions[self.state].get(ltype)
|
||||
@ -2129,7 +2137,23 @@ class Parser(metaclass=ParserMeta):
|
||||
value = p.func(self, pslice)
|
||||
if value is pslice:
|
||||
value = (pname, *(s.value for s in pslice._slice))
|
||||
|
||||
sym.value = value
|
||||
|
||||
# Record positions
|
||||
if track_positions:
|
||||
if plen:
|
||||
sym.lineno = symstack[-plen].lineno
|
||||
sym.index = symstack[-plen].index
|
||||
sym.end = symstack[-1].end
|
||||
else:
|
||||
# A zero-length production (what to put here?)
|
||||
sym.lineno = None
|
||||
sym.index = None
|
||||
sym.end = None
|
||||
self._line_positions[id(value)] = sym.lineno
|
||||
self._index_positions[id(value)] = (sym.index, sym.end)
|
||||
|
||||
if plen:
|
||||
del symstack[-plen:]
|
||||
del statestack[-plen:]
|
||||
@ -2214,6 +2238,8 @@ class Parser(metaclass=ParserMeta):
|
||||
t.lineno = lookahead.lineno
|
||||
if hasattr(lookahead, 'index'):
|
||||
t.index = lookahead.index
|
||||
if hasattr(lookahead, 'end'):
|
||||
t.end = lookahead.end
|
||||
t.value = lookahead
|
||||
lookaheadstack.append(lookahead)
|
||||
lookahead = t
|
||||
@ -2225,3 +2251,11 @@ class Parser(metaclass=ParserMeta):
|
||||
|
||||
# Call an error function here
|
||||
raise RuntimeError('sly: internal parser error!!!\n')
|
||||
|
||||
# Return position tracking information
|
||||
def line_position(self, value):
|
||||
return self._line_positions[id(value)]
|
||||
|
||||
def index_position(self, value):
|
||||
return self._index_positions[id(value)]
|
||||
|
@ -1,11 +1,6 @@
|
||||
import pytest
|
||||
from sly import Lexer
|
||||
|
||||
try:
|
||||
import regex
|
||||
except ImportError:
|
||||
regex = None
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
@ -61,29 +56,6 @@ class CalcLexer(Lexer):
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
|
||||
if regex is not None:
|
||||
class RegexModuleCalcLexer(Lexer):
|
||||
regex_module = regex
|
||||
|
||||
tokens = { 'ID', 'PLUS', 'MINUS' }
|
||||
|
||||
literals = { '(', ')' }
|
||||
ignore = ' \t'
|
||||
|
||||
ID = r'\p{Ll}+' # Unicode lowercase letters, regex module feature
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
|
||||
ignore_comment = r'\#.*'
|
||||
|
||||
@_(r'\n+')
|
||||
def newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def ID(self, t):
|
||||
t.value = t.value.upper()
|
||||
return t
|
||||
|
||||
# Test basic recognition of various tokens and literals
|
||||
def test_tokens():
|
||||
lexer = CalcLexer()
|
||||
@ -93,17 +65,21 @@ def test_tokens():
|
||||
assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
|
||||
assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
|
||||
|
||||
# Test third-party regex module support
|
||||
@pytest.mark.skipif(regex is None,
|
||||
reason="third-party regex module not installed")
|
||||
def test_3rd_party_regex_module():
|
||||
lexer = RegexModuleCalcLexer()
|
||||
toks = list(lexer.tokenize('a + b - c'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['ID','PLUS','ID','MINUS','ID']
|
||||
assert vals == ['A', '+', 'B', '-', 'C']
|
||||
# Test position tracking
|
||||
def test_positions():
|
||||
lexer = CalcLexer()
|
||||
text = 'abc\n( )'
|
||||
toks = list(lexer.tokenize(text))
|
||||
lines = [t.lineno for t in toks ]
|
||||
indices = [t.index for t in toks ]
|
||||
ends = [t.end for t in toks]
|
||||
values = [ text[t.index:t.end] for t in toks ]
|
||||
assert values == ['abc', '(', ')']
|
||||
assert lines == [1, 2, 2]
|
||||
assert indices == [0, 4, 6]
|
||||
assert ends == [3, 5, 7]
|
||||
|
||||
|
||||
# Test ignored comments and newlines
|
||||
def test_ignored():
|
||||
lexer = CalcLexer()
|
||||
@ -228,23 +204,5 @@ def test_modern_error_return():
|
||||
assert vals == [123, ':+-', '+', '-']
|
||||
assert lexer.errors == [ ':+-' ]
|
||||
|
||||
# Test Lexer Inheritance. This class should inherit all of the tokens
|
||||
# and features of ModernCalcLexer, but add two new tokens to it. The
|
||||
# PLUSPLUS token matches before the PLUS token.
|
||||
|
||||
if False:
|
||||
class SubModernCalcLexer(ModernCalcLexer):
|
||||
tokens |= { DOLLAR, PLUSPLUS }
|
||||
DOLLAR = r'\$'
|
||||
PLUSPLUS = r'\+\+'
|
||||
PLUSPLUS.before = PLUS
|
||||
|
||||
def test_lexer_inherit():
|
||||
lexer = SubModernCalcLexer()
|
||||
toks = list(lexer.tokenize('123 + - $ ++ if'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['NUMBER', 'PLUS', 'MINUS', 'DOLLAR', 'PLUSPLUS', 'IF']
|
||||
assert vals == [123, '+', '-', '$', '++', 'if']
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user