Compare commits
No commits in common. "539a85a5d5818bf4e1cb5a9e749d6e2fab70a351" and "f8fcbb080c4bc4ff14bd30876386edd63d8362cb" have entirely different histories.
539a85a5d5
...
f8fcbb080c
21
CHANGES
21
CHANGES
@ -1,25 +1,6 @@
|
|||||||
Version 0.5
|
In Progress
|
||||||
-----------
|
-----------
|
||||||
10/25/2022 ***IMPORTANT NOTE*** This is the last release to be made
|
|
||||||
on PyPi. If you want the latest version go to
|
|
||||||
https://github.com/dabeaz/sly.
|
|
||||||
|
|
||||||
09/06/2022 Modernization of the packaging infrastructure. Slight
|
|
||||||
project reorganization.
|
|
||||||
|
|
||||||
03/25/2022 Added automatic location tracking to the parser. Use
|
|
||||||
Parser.line_position(value) to return the line number
|
|
||||||
and Parser.index_position(value) to return a (start, end)
|
|
||||||
index pair. value is *any* object returned by one of
|
|
||||||
the various methods in the parser definition. Typically,
|
|
||||||
it would be a AST node. The parser tracks the data using
|
|
||||||
the value of id(value).
|
|
||||||
|
|
||||||
03/25/2022 Added .end attribute to tokens that specify the ending
|
|
||||||
index of the matching text. This is used to do more
|
|
||||||
precise location tracking for the purpose of issuing
|
|
||||||
more useful error messages.
|
|
||||||
|
|
||||||
05/09/2020 Experimental support for EBNF choices. For example:
|
05/09/2020 Experimental support for EBNF choices. For example:
|
||||||
|
|
||||||
@('term { PLUS|MINUS term }')
|
@('term { PLUS|MINUS term }')
|
||||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
|||||||
SLY (Sly Lex-Yacc)
|
SLY (Sly Lex-Yacc)
|
||||||
|
|
||||||
Copyright (C) 2016-2022
|
Copyright (C) 2016-2019
|
||||||
David M. Beazley (Dabeaz LLC)
|
David M. Beazley (Dabeaz LLC)
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
@ -1,4 +1,2 @@
|
|||||||
include Makefile CONTRIBUTING.md
|
|
||||||
recursive-include example *
|
recursive-include example *
|
||||||
recursive-include tests *
|
|
||||||
recursive-include docs *
|
recursive-include docs *
|
||||||
|
21
Makefile
21
Makefile
@ -1,21 +0,0 @@
|
|||||||
PYTHON=python3
|
|
||||||
VENV=.venv
|
|
||||||
|
|
||||||
# Setup and install all of the required tools for building, testing,
|
|
||||||
# and deploying
|
|
||||||
setup::
|
|
||||||
rm -rf $(VENV)
|
|
||||||
$(PYTHON) -m venv $(VENV)
|
|
||||||
./$(VENV)/bin/python -m pip install pytest
|
|
||||||
./$(VENV)/bin/python -m pip install pytest-cov
|
|
||||||
./$(VENV)/bin/python -m pip install build
|
|
||||||
./$(VENV)/bin/python -m pip install twine
|
|
||||||
|
|
||||||
# Run unit tests
|
|
||||||
test::
|
|
||||||
./$(VENV)/bin/python -m pip install .
|
|
||||||
./$(VENV)/bin/python -m pytest --cov
|
|
||||||
|
|
||||||
# Build an artifact suitable for installing with pip
|
|
||||||
build::
|
|
||||||
./$(VENV)/bin/python -m build
|
|
10
README.rst
10
README.rst
@ -33,16 +33,6 @@ SLY is a modern library for performing lexing and parsing. It
|
|||||||
implements the LALR(1) parsing algorithm, commonly used for
|
implements the LALR(1) parsing algorithm, commonly used for
|
||||||
parsing and compiling various programming languages.
|
parsing and compiling various programming languages.
|
||||||
|
|
||||||
Important Notice : October 11, 2022
|
|
||||||
-----------------------------------
|
|
||||||
The SLY project is no longer making package-installable releases.
|
|
||||||
It's fully functional, but if choose to use it, you should
|
|
||||||
vendor the code into your application. SLY has zero-dependencies.
|
|
||||||
Although I am semi-retiring the project, I will respond to
|
|
||||||
bug reports and still may decide to make future changes to it
|
|
||||||
depending on my mood. I'd like to thank everyone who
|
|
||||||
has contributed to it over the years. --Dave
|
|
||||||
|
|
||||||
Requirements
|
Requirements
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
@ -138,6 +138,12 @@ names of the tokens provided in the ``tokens`` set. For example::
|
|||||||
PLUS = r'\+'
|
PLUS = r'\+'
|
||||||
MINUS = r'-'
|
MINUS = r'-'
|
||||||
|
|
||||||
|
Regular expression patterns are compiled using the ``re.VERBOSE`` flag
|
||||||
|
which can be used to help readability. However,
|
||||||
|
unescaped whitespace is ignored and comments are allowed in this mode.
|
||||||
|
If your pattern involves whitespace, make sure you use ``\s``. If you
|
||||||
|
need to match the ``#`` character, use ``[#]`` or ``\#``.
|
||||||
|
|
||||||
Tokens are matched in the same order that patterns are listed in the
|
Tokens are matched in the same order that patterns are listed in the
|
||||||
``Lexer`` class. Longer tokens always need to be specified before
|
``Lexer`` class. Longer tokens always need to be specified before
|
||||||
short tokens. For example, if you wanted to have separate tokens for
|
short tokens. For example, if you wanted to have separate tokens for
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
[build-system]
|
|
||||||
requires = ["setuptools", "wheel"]
|
|
||||||
build-backend = "setuptools.build_meta"
|
|
18
setup.cfg
18
setup.cfg
@ -1,18 +0,0 @@
|
|||||||
[metadata]
|
|
||||||
name = sly
|
|
||||||
version = 0.5
|
|
||||||
url = https://github.com/dabeaz/sly
|
|
||||||
author = David Beazley
|
|
||||||
author_email = "David Beazley" <dave@dabeaz.com>
|
|
||||||
description = "SLY - Sly Lex Yacc"
|
|
||||||
long_description = "SLY is an implementation of lex and yacc. No longer maintained on PyPI. Latest version on GitHub."
|
|
||||||
license = BSD-3-Clause
|
|
||||||
license_files = LICENSE
|
|
||||||
classifiers =
|
|
||||||
License :: OSI Approved :: BSD License
|
|
||||||
|
|
||||||
[options]
|
|
||||||
package_dir =
|
|
||||||
=src
|
|
||||||
|
|
||||||
packages = sly
|
|
28
setup.py
Executable file
28
setup.py
Executable file
@ -0,0 +1,28 @@
|
|||||||
|
try:
|
||||||
|
from setuptools import setup
|
||||||
|
except ImportError:
|
||||||
|
from distutils.core import setup
|
||||||
|
|
||||||
|
tests_require = ['pytest', 'regex']
|
||||||
|
|
||||||
|
setup(name = "sly",
|
||||||
|
description="SLY - Sly Lex Yacc",
|
||||||
|
long_description = """
|
||||||
|
SLY is an implementation of lex and yacc for Python 3.
|
||||||
|
""",
|
||||||
|
license="""BSD""",
|
||||||
|
version = "0.4",
|
||||||
|
author = "David Beazley",
|
||||||
|
author_email = "dave@dabeaz.com",
|
||||||
|
maintainer = "David Beazley",
|
||||||
|
maintainer_email = "dave@dabeaz.com",
|
||||||
|
url = "https://github.com/dabeaz/sly",
|
||||||
|
packages = ['sly'],
|
||||||
|
tests_require = tests_require,
|
||||||
|
extras_require = {
|
||||||
|
'test': tests_require,
|
||||||
|
},
|
||||||
|
classifiers = [
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
]
|
||||||
|
)
|
@ -2,5 +2,5 @@
|
|||||||
from .lex import *
|
from .lex import *
|
||||||
from .yacc import *
|
from .yacc import *
|
||||||
|
|
||||||
__version__ = "0.5"
|
__version__ = "0.4"
|
||||||
__all__ = [ *lex.__all__, *yacc.__all__ ]
|
__all__ = [ *lex.__all__, *yacc.__all__ ]
|
@ -73,9 +73,9 @@ class Token(object):
|
|||||||
'''
|
'''
|
||||||
Representation of a single token.
|
Representation of a single token.
|
||||||
'''
|
'''
|
||||||
__slots__ = ('type', 'value', 'lineno', 'index', 'end')
|
__slots__ = ('type', 'value', 'lineno', 'index')
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})'
|
return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
|
||||||
|
|
||||||
class TokenStr(str):
|
class TokenStr(str):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -406,7 +406,7 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
tok.index = index
|
tok.index = index
|
||||||
m = _master_re.match(text, index)
|
m = _master_re.match(text, index)
|
||||||
if m:
|
if m:
|
||||||
tok.end = index = m.end()
|
index = m.end()
|
||||||
tok.value = m.group()
|
tok.value = m.group()
|
||||||
tok.type = m.lastgroup
|
tok.type = m.lastgroup
|
||||||
|
|
||||||
@ -431,7 +431,6 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
# No match, see if the character is in literals
|
# No match, see if the character is in literals
|
||||||
if text[index] in _literals:
|
if text[index] in _literals:
|
||||||
tok.value = text[index]
|
tok.value = text[index]
|
||||||
tok.end = index + 1
|
|
||||||
tok.type = tok.value
|
tok.type = tok.value
|
||||||
index += 1
|
index += 1
|
||||||
yield tok
|
yield tok
|
||||||
@ -443,7 +442,6 @@ class Lexer(metaclass=LexerMeta):
|
|||||||
tok.value = text[index:]
|
tok.value = text[index:]
|
||||||
tok = self.error(tok)
|
tok = self.error(tok)
|
||||||
if tok is not None:
|
if tok is not None:
|
||||||
tok.end = self.index
|
|
||||||
yield tok
|
yield tok
|
||||||
|
|
||||||
index = self.index
|
index = self.index
|
@ -126,6 +126,8 @@ class YaccProduction:
|
|||||||
@property
|
@property
|
||||||
def lineno(self):
|
def lineno(self):
|
||||||
for tok in self._slice:
|
for tok in self._slice:
|
||||||
|
if isinstance(tok, YaccSymbol):
|
||||||
|
continue
|
||||||
lineno = getattr(tok, 'lineno', None)
|
lineno = getattr(tok, 'lineno', None)
|
||||||
if lineno:
|
if lineno:
|
||||||
return lineno
|
return lineno
|
||||||
@ -134,20 +136,13 @@ class YaccProduction:
|
|||||||
@property
|
@property
|
||||||
def index(self):
|
def index(self):
|
||||||
for tok in self._slice:
|
for tok in self._slice:
|
||||||
|
if isinstance(tok, YaccSymbol):
|
||||||
|
continue
|
||||||
index = getattr(tok, 'index', None)
|
index = getattr(tok, 'index', None)
|
||||||
if index is not None:
|
if index is not None:
|
||||||
return index
|
return index
|
||||||
raise AttributeError('No index attribute found')
|
raise AttributeError('No index attribute found')
|
||||||
|
|
||||||
@property
|
|
||||||
def end(self):
|
|
||||||
result = None
|
|
||||||
for tok in self._slice:
|
|
||||||
r = getattr(tok, 'end', None)
|
|
||||||
if r:
|
|
||||||
result = r
|
|
||||||
return result
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
if name in self._namemap:
|
if name in self._namemap:
|
||||||
return self._namemap[name](self._slice)
|
return self._namemap[name](self._slice)
|
||||||
@ -1811,6 +1806,12 @@ class ParserMeta(type):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def __prepare__(meta, *args, **kwargs):
|
def __prepare__(meta, *args, **kwargs):
|
||||||
d = ParserMetaDict()
|
d = ParserMetaDict()
|
||||||
|
# def _(rule, *extra):
|
||||||
|
# rules = [rule, *extra]
|
||||||
|
# def decorate(func):
|
||||||
|
# func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
|
||||||
|
# return func
|
||||||
|
# return decorate
|
||||||
d['_'] = _decorator
|
d['_'] = _decorator
|
||||||
return d
|
return d
|
||||||
|
|
||||||
@ -1821,9 +1822,6 @@ class ParserMeta(type):
|
|||||||
return cls
|
return cls
|
||||||
|
|
||||||
class Parser(metaclass=ParserMeta):
|
class Parser(metaclass=ParserMeta):
|
||||||
# Automatic tracking of position information
|
|
||||||
track_positions = True
|
|
||||||
|
|
||||||
# Logging object where debugging/diagnostic messages are sent
|
# Logging object where debugging/diagnostic messages are sent
|
||||||
log = SlyLogger(sys.stderr)
|
log = SlyLogger(sys.stderr)
|
||||||
|
|
||||||
@ -2078,15 +2076,9 @@ class Parser(metaclass=ParserMeta):
|
|||||||
self.tokens = tokens
|
self.tokens = tokens
|
||||||
self.statestack = statestack = [] # Stack of parsing states
|
self.statestack = statestack = [] # Stack of parsing states
|
||||||
self.symstack = symstack = [] # Stack of grammar symbols
|
self.symstack = symstack = [] # Stack of grammar symbols
|
||||||
pslice._stack = symstack # Associate the stack with the production
|
pslice._stack = symstack # Associate the stack with the production
|
||||||
self.restart()
|
self.restart()
|
||||||
|
|
||||||
# Set up position tracking
|
|
||||||
track_positions = self.track_positions
|
|
||||||
if not hasattr(self, '_line_positions'):
|
|
||||||
self._line_positions = { } # id: -> lineno
|
|
||||||
self._index_positions = { } # id: -> (start, end)
|
|
||||||
|
|
||||||
errtoken = None # Err token
|
errtoken = None # Err token
|
||||||
while True:
|
while True:
|
||||||
# Get the next symbol on the input. If a lookahead symbol
|
# Get the next symbol on the input. If a lookahead symbol
|
||||||
@ -2101,7 +2093,7 @@ class Parser(metaclass=ParserMeta):
|
|||||||
if not lookahead:
|
if not lookahead:
|
||||||
lookahead = YaccSymbol()
|
lookahead = YaccSymbol()
|
||||||
lookahead.type = '$end'
|
lookahead.type = '$end'
|
||||||
|
|
||||||
# Check the action table
|
# Check the action table
|
||||||
ltype = lookahead.type
|
ltype = lookahead.type
|
||||||
t = actions[self.state].get(ltype)
|
t = actions[self.state].get(ltype)
|
||||||
@ -2137,23 +2129,7 @@ class Parser(metaclass=ParserMeta):
|
|||||||
value = p.func(self, pslice)
|
value = p.func(self, pslice)
|
||||||
if value is pslice:
|
if value is pslice:
|
||||||
value = (pname, *(s.value for s in pslice._slice))
|
value = (pname, *(s.value for s in pslice._slice))
|
||||||
|
|
||||||
sym.value = value
|
sym.value = value
|
||||||
|
|
||||||
# Record positions
|
|
||||||
if track_positions:
|
|
||||||
if plen:
|
|
||||||
sym.lineno = symstack[-plen].lineno
|
|
||||||
sym.index = symstack[-plen].index
|
|
||||||
sym.end = symstack[-1].end
|
|
||||||
else:
|
|
||||||
# A zero-length production (what to put here?)
|
|
||||||
sym.lineno = None
|
|
||||||
sym.index = None
|
|
||||||
sym.end = None
|
|
||||||
self._line_positions[id(value)] = sym.lineno
|
|
||||||
self._index_positions[id(value)] = (sym.index, sym.end)
|
|
||||||
|
|
||||||
if plen:
|
if plen:
|
||||||
del symstack[-plen:]
|
del symstack[-plen:]
|
||||||
del statestack[-plen:]
|
del statestack[-plen:]
|
||||||
@ -2238,8 +2214,6 @@ class Parser(metaclass=ParserMeta):
|
|||||||
t.lineno = lookahead.lineno
|
t.lineno = lookahead.lineno
|
||||||
if hasattr(lookahead, 'index'):
|
if hasattr(lookahead, 'index'):
|
||||||
t.index = lookahead.index
|
t.index = lookahead.index
|
||||||
if hasattr(lookahead, 'end'):
|
|
||||||
t.end = lookahead.end
|
|
||||||
t.value = lookahead
|
t.value = lookahead
|
||||||
lookaheadstack.append(lookahead)
|
lookaheadstack.append(lookahead)
|
||||||
lookahead = t
|
lookahead = t
|
||||||
@ -2251,11 +2225,3 @@ class Parser(metaclass=ParserMeta):
|
|||||||
|
|
||||||
# Call an error function here
|
# Call an error function here
|
||||||
raise RuntimeError('sly: internal parser error!!!\n')
|
raise RuntimeError('sly: internal parser error!!!\n')
|
||||||
|
|
||||||
# Return position tracking information
|
|
||||||
def line_position(self, value):
|
|
||||||
return self._line_positions[id(value)]
|
|
||||||
|
|
||||||
def index_position(self, value):
|
|
||||||
return self._index_positions[id(value)]
|
|
||||||
|
|
@ -1,6 +1,11 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from sly import Lexer
|
from sly import Lexer
|
||||||
|
|
||||||
|
try:
|
||||||
|
import regex
|
||||||
|
except ImportError:
|
||||||
|
regex = None
|
||||||
|
|
||||||
class CalcLexer(Lexer):
|
class CalcLexer(Lexer):
|
||||||
# Set of token names. This is always required
|
# Set of token names. This is always required
|
||||||
tokens = {
|
tokens = {
|
||||||
@ -56,6 +61,29 @@ class CalcLexer(Lexer):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.errors = []
|
self.errors = []
|
||||||
|
|
||||||
|
if regex is not None:
|
||||||
|
class RegexModuleCalcLexer(Lexer):
|
||||||
|
regex_module = regex
|
||||||
|
|
||||||
|
tokens = { 'ID', 'PLUS', 'MINUS' }
|
||||||
|
|
||||||
|
literals = { '(', ')' }
|
||||||
|
ignore = ' \t'
|
||||||
|
|
||||||
|
ID = r'\p{Ll}+' # Unicode lowercase letters, regex module feature
|
||||||
|
PLUS = r'\+'
|
||||||
|
MINUS = r'-'
|
||||||
|
|
||||||
|
ignore_comment = r'\#.*'
|
||||||
|
|
||||||
|
@_(r'\n+')
|
||||||
|
def newline(self, t):
|
||||||
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
def ID(self, t):
|
||||||
|
t.value = t.value.upper()
|
||||||
|
return t
|
||||||
|
|
||||||
# Test basic recognition of various tokens and literals
|
# Test basic recognition of various tokens and literals
|
||||||
def test_tokens():
|
def test_tokens():
|
||||||
lexer = CalcLexer()
|
lexer = CalcLexer()
|
||||||
@ -65,21 +93,17 @@ def test_tokens():
|
|||||||
assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
|
assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
|
||||||
assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
|
assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
|
||||||
|
|
||||||
# Test position tracking
|
# Test third-party regex module support
|
||||||
def test_positions():
|
@pytest.mark.skipif(regex is None,
|
||||||
lexer = CalcLexer()
|
reason="third-party regex module not installed")
|
||||||
text = 'abc\n( )'
|
def test_3rd_party_regex_module():
|
||||||
toks = list(lexer.tokenize(text))
|
lexer = RegexModuleCalcLexer()
|
||||||
lines = [t.lineno for t in toks ]
|
toks = list(lexer.tokenize('a + b - c'))
|
||||||
indices = [t.index for t in toks ]
|
types = [t.type for t in toks]
|
||||||
ends = [t.end for t in toks]
|
vals = [t.value for t in toks]
|
||||||
values = [ text[t.index:t.end] for t in toks ]
|
assert types == ['ID','PLUS','ID','MINUS','ID']
|
||||||
assert values == ['abc', '(', ')']
|
assert vals == ['A', '+', 'B', '-', 'C']
|
||||||
assert lines == [1, 2, 2]
|
|
||||||
assert indices == [0, 4, 6]
|
|
||||||
assert ends == [3, 5, 7]
|
|
||||||
|
|
||||||
|
|
||||||
# Test ignored comments and newlines
|
# Test ignored comments and newlines
|
||||||
def test_ignored():
|
def test_ignored():
|
||||||
lexer = CalcLexer()
|
lexer = CalcLexer()
|
||||||
@ -204,5 +228,23 @@ def test_modern_error_return():
|
|||||||
assert vals == [123, ':+-', '+', '-']
|
assert vals == [123, ':+-', '+', '-']
|
||||||
assert lexer.errors == [ ':+-' ]
|
assert lexer.errors == [ ':+-' ]
|
||||||
|
|
||||||
|
# Test Lexer Inheritance. This class should inherit all of the tokens
|
||||||
|
# and features of ModernCalcLexer, but add two new tokens to it. The
|
||||||
|
# PLUSPLUS token matches before the PLUS token.
|
||||||
|
|
||||||
|
if False:
|
||||||
|
class SubModernCalcLexer(ModernCalcLexer):
|
||||||
|
tokens |= { DOLLAR, PLUSPLUS }
|
||||||
|
DOLLAR = r'\$'
|
||||||
|
PLUSPLUS = r'\+\+'
|
||||||
|
PLUSPLUS.before = PLUS
|
||||||
|
|
||||||
|
def test_lexer_inherit():
|
||||||
|
lexer = SubModernCalcLexer()
|
||||||
|
toks = list(lexer.tokenize('123 + - $ ++ if'))
|
||||||
|
types = [t.type for t in toks]
|
||||||
|
vals = [t.value for t in toks]
|
||||||
|
assert types == ['NUMBER', 'PLUS', 'MINUS', 'DOLLAR', 'PLUSPLUS', 'IF']
|
||||||
|
assert vals == [123, '+', '-', '$', '++', 'if']
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user