15 changed files with 108 additions and 141 deletions
--- a/21
+++ b/21
@ -1,25 +1,6 @@
-Version 0.5
+In Progress
 -----------
 10/25/2022 ***IMPORTANT NOTE*** This is the last release to be made
           on PyPi.  If you want the latest version go to
 	   https://github.com/dabeaz/sly.
 09/06/2022 Modernization of the packaging infrastructure. Slight
           project reorganization.
 03/25/2022 Added automatic location tracking to the parser.  Use
 	   Parser.line_position(value) to return the line number
           and Parser.index_position(value) to return a (start, end)
 	   index pair.  value is *any* object returned by one of
 	   the various methods in the parser definition. Typically,
 	   it would be a AST node.  The parser tracks the data using
 	   the value of id(value).
 03/25/2022 Added .end attribute to tokens that specify the ending
           index of the matching text.   This is used to do more
 	   precise location tracking for the purpose of issuing
 	   more useful error messages.
 05/09/2020 Experimental support for EBNF choices.  For example:
 	      @('term { PLUS|MINUS term }')
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 SLY (Sly Lex-Yacc)                   
-Copyright (C) 2016-2022
+Copyright (C) 2016-2019
 David M. Beazley (Dabeaz LLC)
 All rights reserved.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,4 +1,2 @@
 include Makefile CONTRIBUTING.md
 recursive-include example *
 recursive-include tests *
 recursive-include docs *
--- a/21
+++ b/21
@ -1,21 +0,0 @@
 PYTHON=python3
 VENV=.venv
 # Setup and install all of the required tools for building, testing,
 # and deploying
 setup::
 	rm -rf $(VENV)
 	$(PYTHON) -m venv $(VENV)
 	./$(VENV)/bin/python -m pip install pytest
 	./$(VENV)/bin/python -m pip install pytest-cov
 	./$(VENV)/bin/python -m pip install build
 	./$(VENV)/bin/python -m pip install twine
 # Run unit tests
 test::
 	./$(VENV)/bin/python -m pip install .
 	./$(VENV)/bin/python -m pytest --cov
 # Build an artifact suitable for installing with pip
 build::
 	./$(VENV)/bin/python -m build
--- a/README.rst
+++ b/README.rst
@ -33,16 +33,6 @@ SLY is a modern library for performing lexing and parsing. It
 implements the LALR(1) parsing algorithm, commonly used for
 parsing and compiling various programming languages. 
 Important Notice : October 11, 2022
 -----------------------------------
 The SLY project is no longer making package-installable releases.
 It's fully functional, but if choose to use it, you should
 vendor the code into your application. SLY has zero-dependencies.
 Although I am semi-retiring the project, I will respond to
 bug reports and still may decide to make future changes to it
 depending on my mood. I'd like to thank everyone who
 has contributed to it over the years. --Dave
 Requirements
 ------------
--- a/docs/sly.rst
+++ b/docs/sly.rst
@ -138,6 +138,12 @@ names of the tokens provided in the ``tokens`` set.  For example::
    PLUS = r'\+'
    MINUS = r'-'
 Regular expression patterns are compiled using the ``re.VERBOSE`` flag
 which can be used to help readability.  However,
 unescaped whitespace is ignored and comments are allowed in this mode.
 If your pattern involves whitespace, make sure you use ``\s``.  If you
 need to match the ``#`` character, use ``[#]`` or ``\#``.
 Tokens are matched in the same order that patterns are listed in the
 ``Lexer`` class.  Longer tokens always need to be specified before
 short tokens.  For example, if you wanted to have separate tokens for
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,3 +0,0 @@
 [build-system]
 requires = ["setuptools", "wheel"]
 build-backend = "setuptools.build_meta"
--- a/setup.cfg
+++ b/setup.cfg
@ -1,18 +0,0 @@
 [metadata]
 name = sly
 version = 0.5
 url = https://github.com/dabeaz/sly
 author = David Beazley
 author_email = "David Beazley" <dave@dabeaz.com>
 description = "SLY - Sly Lex Yacc"
 long_description = "SLY is an implementation of lex and yacc. No longer maintained on PyPI. Latest version on GitHub."
 license = BSD-3-Clause 
 license_files = LICENSE
 classifiers =
    License :: OSI Approved :: BSD License
 [options]
 package_dir =
    =src
 packages = sly
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,28 @@
 try:
    from setuptools import setup
 except ImportError:
    from distutils.core import setup
 tests_require = ['pytest', 'regex']
 setup(name = "sly",
            description="SLY - Sly Lex Yacc",
            long_description = """
 SLY is an implementation of lex and yacc for Python 3.
 """,
            license="""BSD""",
            version = "0.4",
            author = "David Beazley",
            author_email = "dave@dabeaz.com",
            maintainer = "David Beazley",
            maintainer_email = "dave@dabeaz.com",
            url = "https://github.com/dabeaz/sly",
            packages = ['sly'],
            tests_require = tests_require,
            extras_require = {
                'test': tests_require,
              },
            classifiers = [
              'Programming Language :: Python :: 3',
              ]
            )
--- a/src/sly/init.py
+++ b/src/sly/init.py
@ -2,5 +2,5 @@
 from .lex import *
 from .yacc import *
-__version__ = "0.5"
+__version__ = "0.4"
 __all__ = [ *lex.__all__, *yacc.__all__ ]
--- a/src/sly/ast.py
+++ b/src/sly/ast.py
--- a/src/sly/docparse.py
+++ b/src/sly/docparse.py
--- a/src/sly/lex.py
+++ b/src/sly/lex.py
@ -73,9 +73,9 @@ class Token(object):
    '''
    Representation of a single token.
    '''
-    __slots__ = ('type', 'value', 'lineno', 'index', 'end')
+    __slots__ = ('type', 'value', 'lineno', 'index')
    def __repr__(self):
-        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})'
+        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
 class TokenStr(str):
    @staticmethod
@ -406,7 +406,7 @@ class Lexer(metaclass=LexerMeta):
                tok.index = index
                m = _master_re.match(text, index)
                if m:
-                    tok.end = index = m.end()
+                    index = m.end()
                    tok.value = m.group()
                    tok.type = m.lastgroup
@ -431,7 +431,6 @@ class Lexer(metaclass=LexerMeta):
                    # No match, see if the character is in literals
                    if text[index] in _literals:
                        tok.value = text[index]
                        tok.end = index + 1
                        tok.type = tok.value
                        index += 1
                        yield tok
@ -443,7 +442,6 @@ class Lexer(metaclass=LexerMeta):
                        tok.value = text[index:]
                        tok = self.error(tok)
                        if tok is not None:
                            tok.end = self.index
                            yield tok
                        index = self.index
--- a/src/sly/yacc.py
+++ b/src/sly/yacc.py
@ -126,6 +126,8 @@ class YaccProduction:
    @property
    def lineno(self):
        for tok in self._slice:
            if isinstance(tok, YaccSymbol):
                continue
            lineno = getattr(tok, 'lineno', None)
            if lineno:
                return lineno
@ -134,20 +136,13 @@ class YaccProduction:
    @property
    def index(self):
        for tok in self._slice:
            if isinstance(tok, YaccSymbol):
                continue
            index = getattr(tok, 'index', None)
            if index is not None:
                return index
        raise AttributeError('No index attribute found')
    @property
    def end(self):
        result = None
        for tok in self._slice:
            r = getattr(tok, 'end', None)
            if r:
                result = r
        return result
    def __getattr__(self, name):
        if name in self._namemap:
            return self._namemap[name](self._slice)
@ -1811,6 +1806,12 @@ class ParserMeta(type):
    @classmethod
    def __prepare__(meta, *args, **kwargs):
        d = ParserMetaDict()
 #        def _(rule, *extra):
 #            rules = [rule, *extra]
 #            def decorate(func):
 #                func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
 #                return func
 #            return decorate
        d['_'] = _decorator
        return d
@ -1821,9 +1822,6 @@ class ParserMeta(type):
        return cls
 class Parser(metaclass=ParserMeta):
    # Automatic tracking of position information
    track_positions = True
    # Logging object where debugging/diagnostic messages are sent
    log = SlyLogger(sys.stderr)     
@ -2078,15 +2076,9 @@ class Parser(metaclass=ParserMeta):
        self.tokens = tokens
        self.statestack = statestack = []                 # Stack of parsing states
        self.symstack = symstack = []                     # Stack of grammar symbols
-        pslice._stack = symstack                          # Associate the stack with the production
+        pslice._stack = symstack                           # Associate the stack with the production
        self.restart()
        # Set up position tracking
        track_positions = self.track_positions
        if not hasattr(self, '_line_positions'):
            self._line_positions = { }           # id: -> lineno
            self._index_positions = { }          # id: -> (start, end)
        errtoken   = None                                 # Err token
        while True:
            # Get the next symbol on the input.  If a lookahead symbol
@ -2101,7 +2093,7 @@ class Parser(metaclass=ParserMeta):
                    if not lookahead:
                        lookahead = YaccSymbol()
                        lookahead.type = '$end'
-                    
+
                # Check the action table
                ltype = lookahead.type
                t = actions[self.state].get(ltype)
@ -2137,23 +2129,7 @@ class Parser(metaclass=ParserMeta):
                    value = p.func(self, pslice)
                    if value is pslice:
                        value = (pname, *(s.value for s in pslice._slice))
                    sym.value = value
                    # Record positions
                    if track_positions:
                        if plen:
                            sym.lineno = symstack[-plen].lineno
                            sym.index = symstack[-plen].index
                            sym.end = symstack[-1].end
                        else:
                            # A zero-length production  (what to put here?)
                            sym.lineno = None
                            sym.index = None
                            sym.end = None
                        self._line_positions[id(value)] = sym.lineno
                        self._index_positions[id(value)] = (sym.index, sym.end)
                    if plen:
                        del symstack[-plen:]
                        del statestack[-plen:]
@ -2238,8 +2214,6 @@ class Parser(metaclass=ParserMeta):
                        t.lineno = lookahead.lineno
                    if hasattr(lookahead, 'index'):
                        t.index = lookahead.index
                    if hasattr(lookahead, 'end'):
                        t.end = lookahead.end
                    t.value = lookahead
                    lookaheadstack.append(lookahead)
                    lookahead = t
@ -2251,11 +2225,3 @@ class Parser(metaclass=ParserMeta):
            # Call an error function here
            raise RuntimeError('sly: internal parser error!!!\n')
    # Return position tracking information
    def line_position(self, value):
        return self._line_positions[id(value)]
    def index_position(self, value):
        return self._index_positions[id(value)]
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@ -1,6 +1,11 @@
 import pytest
 from sly import Lexer
 try:
    import regex
 except ImportError:
    regex = None
 class CalcLexer(Lexer):
    # Set of token names.   This is always required
    tokens = {
@ -56,6 +61,29 @@ class CalcLexer(Lexer):
    def __init__(self):
        self.errors = []
 if regex is not None:
    class RegexModuleCalcLexer(Lexer):
        regex_module = regex
        tokens = { 'ID', 'PLUS', 'MINUS' }
        literals = { '(', ')' }
        ignore = ' \t'
        ID      = r'\p{Ll}+'  # Unicode lowercase letters, regex module feature
        PLUS    = r'\+'
        MINUS   = r'-'
        ignore_comment = r'\#.*'
        @_(r'\n+')
        def newline(self, t):
            self.lineno += t.value.count('\n')
        def ID(self, t):
            t.value = t.value.upper()
            return t
 # Test basic recognition of various tokens and literals
 def test_tokens():
    lexer = CalcLexer()
@ -65,21 +93,17 @@ def test_tokens():
    assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
    assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
-# Test position tracking
+# Test third-party regex module support
-def test_positions():
+@pytest.mark.skipif(regex is None,
-    lexer = CalcLexer()
+                    reason="third-party regex module not installed")
-    text = 'abc\n( )'
+def test_3rd_party_regex_module():
-    toks = list(lexer.tokenize(text))
+    lexer = RegexModuleCalcLexer()
-    lines = [t.lineno for t in toks ]
+    toks = list(lexer.tokenize('a + b - c'))
-    indices = [t.index for t in toks ]
+    types = [t.type for t in toks]
-    ends = [t.end for t in toks]
+    vals = [t.value for t in toks]
-    values = [ text[t.index:t.end] for t in toks ]
+    assert types == ['ID','PLUS','ID','MINUS','ID']
-    assert values == ['abc', '(', ')']
+    assert vals == ['A', '+', 'B', '-', 'C']
    assert lines == [1, 2, 2]
    assert indices == [0, 4, 6]
    assert ends == [3, 5, 7]
 # Test ignored comments and newlines
 def test_ignored():
    lexer = CalcLexer()
@ -204,5 +228,23 @@ def test_modern_error_return():
    assert vals == [123, ':+-', '+', '-']
    assert lexer.errors == [ ':+-' ]
 # Test Lexer Inheritance.  This class should inherit all of the tokens
 # and features of ModernCalcLexer, but add two new tokens to it.  The
 # PLUSPLUS token matches before the PLUS token.
 if False:
    class SubModernCalcLexer(ModernCalcLexer):
        tokens |= { DOLLAR, PLUSPLUS }
        DOLLAR = r'\$'
        PLUSPLUS = r'\+\+'
        PLUSPLUS.before = PLUS
    def test_lexer_inherit():
        lexer = SubModernCalcLexer()
        toks = list(lexer.tokenize('123 + - $ ++ if'))
        types = [t.type for t in toks]
        vals = [t.value for t in toks]
        assert types == ['NUMBER', 'PLUS', 'MINUS', 'DOLLAR', 'PLUSPLUS', 'IF']
        assert vals == [123, '+', '-', '$', '++', 'if']