Merge pull request #109 from shadchin/patch-1

Fix license classifier
2024-06-05 09:15:25 -05:00 · 2024-06-05 13:04:43 +03:00 · 2023-02-14 10:43:21 -06:00 · 2022-10-25 09:31:43 -05:00 · 2022-10-25 09:29:14 -05:00 · 2022-10-25 09:28:13 -05:00
15 changed files with 141 additions and 108 deletions
--- a/21
+++ b/21
@@ -1,6 +1,25 @@
-In Progress
+Version 0.5
 -----------
+10/25/2022 ***IMPORTANT NOTE*** This is the last release to be made
+           on PyPi.  If you want the latest version go to
+	   https://github.com/dabeaz/sly.
+	   
+09/06/2022 Modernization of the packaging infrastructure. Slight
+           project reorganization.

+03/25/2022 Added automatic location tracking to the parser.  Use
+	   Parser.line_position(value) to return the line number
+           and Parser.index_position(value) to return a (start, end)
+	   index pair.  value is *any* object returned by one of
+	   the various methods in the parser definition. Typically,
+	   it would be a AST node.  The parser tracks the data using
+	   the value of id(value).
+
+03/25/2022 Added .end attribute to tokens that specify the ending
+           index of the matching text.   This is used to do more
+	   precise location tracking for the purpose of issuing
+	   more useful error messages.
+	   
 05/09/2020 Experimental support for EBNF choices.  For example:

 	      @('term { PLUS|MINUS term }')
--- a/2
+++ b/2
@@ -1,6 +1,6 @@
 SLY (Sly Lex-Yacc)                   

-Copyright (C) 2016-2019
+Copyright (C) 2016-2022
 David M. Beazley (Dabeaz LLC)
 All rights reserved.

--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,4 @@
+include Makefile CONTRIBUTING.md
 recursive-include example *
+recursive-include tests *
 recursive-include docs *
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+PYTHON=python3
+VENV=.venv
+
+# Setup and install all of the required tools for building, testing,
+# and deploying
+setup::
+	rm -rf $(VENV)
+	$(PYTHON) -m venv $(VENV)
+	./$(VENV)/bin/python -m pip install pytest
+	./$(VENV)/bin/python -m pip install pytest-cov
+	./$(VENV)/bin/python -m pip install build
+	./$(VENV)/bin/python -m pip install twine
+
+# Run unit tests
+test::
+	./$(VENV)/bin/python -m pip install .
+	./$(VENV)/bin/python -m pytest --cov
+
+# Build an artifact suitable for installing with pip
+build::
+	./$(VENV)/bin/python -m build
--- a/README.rst
+++ b/README.rst
@@ -33,6 +33,16 @@ SLY is a modern library for performing lexing and parsing. It
 implements the LALR(1) parsing algorithm, commonly used for
 parsing and compiling various programming languages. 

+Important Notice : October 11, 2022
+-----------------------------------
+The SLY project is no longer making package-installable releases.
+It's fully functional, but if choose to use it, you should
+vendor the code into your application. SLY has zero-dependencies.
+Although I am semi-retiring the project, I will respond to
+bug reports and still may decide to make future changes to it
+depending on my mood. I'd like to thank everyone who
+has contributed to it over the years. --Dave
+
 Requirements
 ------------

--- a/docs/sly.rst
+++ b/docs/sly.rst
@@ -138,12 +138,6 @@ names of the tokens provided in the ``tokens`` set.  For example::
    PLUS = r'\+'
    MINUS = r'-'

-Regular expression patterns are compiled using the ``re.VERBOSE`` flag
-which can be used to help readability.  However,
-unescaped whitespace is ignored and comments are allowed in this mode.
-If your pattern involves whitespace, make sure you use ``\s``.  If you
-need to match the ``#`` character, use ``[#]`` or ``\#``.
-
 Tokens are matched in the same order that patterns are listed in the
 ``Lexer`` class.  Longer tokens always need to be specified before
 short tokens.  For example, if you wanted to have separate tokens for
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,18 @@
+[metadata]
+name = sly
+version = 0.5
+url = https://github.com/dabeaz/sly
+author = David Beazley
+author_email = "David Beazley" <dave@dabeaz.com>
+description = "SLY - Sly Lex Yacc"
+long_description = "SLY is an implementation of lex and yacc. No longer maintained on PyPI. Latest version on GitHub."
+license = BSD-3-Clause 
+license_files = LICENSE
+classifiers =
+    License :: OSI Approved :: BSD License
+
+[options]
+package_dir =
+    =src
+
+packages = sly
--- a/setup.py
+++ b/setup.py
@@ -1,28 +0,0 @@
-try:
-    from setuptools import setup
-except ImportError:
-    from distutils.core import setup
-
-tests_require = ['pytest', 'regex']
-
-setup(name = "sly",
-            description="SLY - Sly Lex Yacc",
-            long_description = """
-SLY is an implementation of lex and yacc for Python 3.
-""",
-            license="""BSD""",
-            version = "0.4",
-            author = "David Beazley",
-            author_email = "dave@dabeaz.com",
-            maintainer = "David Beazley",
-            maintainer_email = "dave@dabeaz.com",
-            url = "https://github.com/dabeaz/sly",
-            packages = ['sly'],
-            tests_require = tests_require,
-            extras_require = {
-                'test': tests_require,
-              },
-            classifiers = [
-              'Programming Language :: Python :: 3',
-              ]
-            )
--- a/src/sly/init.py
+++ b/src/sly/init.py
@@ -2,5 +2,5 @@
 from .lex import *
 from .yacc import *

-__version__ = "0.4"
+__version__ = "0.5"
 __all__ = [ *lex.__all__, *yacc.__all__ ]
--- a/src/sly/ast.py
+++ b/src/sly/ast.py
--- a/src/sly/docparse.py
+++ b/src/sly/docparse.py
--- a/src/sly/lex.py
+++ b/src/sly/lex.py
@@ -73,9 +73,9 @@ class Token(object):
    '''
    Representation of a single token.
    '''
-    __slots__ = ('type', 'value', 'lineno', 'index')
+    __slots__ = ('type', 'value', 'lineno', 'index', 'end')
    def __repr__(self):
-        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
+        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})'

 class TokenStr(str):
    @staticmethod
@@ -406,7 +406,7 @@ class Lexer(metaclass=LexerMeta):
                tok.index = index
                m = _master_re.match(text, index)
                if m:
-                    index = m.end()
+                    tok.end = index = m.end()
                    tok.value = m.group()
                    tok.type = m.lastgroup

@@ -431,6 +431,7 @@ class Lexer(metaclass=LexerMeta):
                    # No match, see if the character is in literals
                    if text[index] in _literals:
                        tok.value = text[index]
+                        tok.end = index + 1
                        tok.type = tok.value
                        index += 1
                        yield tok
@@ -442,6 +443,7 @@ class Lexer(metaclass=LexerMeta):
                        tok.value = text[index:]
                        tok = self.error(tok)
                        if tok is not None:
+                            tok.end = self.index
                            yield tok

                        index = self.index
--- a/src/sly/yacc.py
+++ b/src/sly/yacc.py
@@ -126,8 +126,6 @@ class YaccProduction:
    @property
    def lineno(self):
        for tok in self._slice:
-            if isinstance(tok, YaccSymbol):
-                continue
            lineno = getattr(tok, 'lineno', None)
            if lineno:
                return lineno
@@ -136,13 +134,20 @@ class YaccProduction:
    @property
    def index(self):
        for tok in self._slice:
-            if isinstance(tok, YaccSymbol):
-                continue
            index = getattr(tok, 'index', None)
            if index is not None:
                return index
        raise AttributeError('No index attribute found')

+    @property
+    def end(self):
+        result = None
+        for tok in self._slice:
+            r = getattr(tok, 'end', None)
+            if r:
+                result = r
+        return result
+    
    def __getattr__(self, name):
        if name in self._namemap:
            return self._namemap[name](self._slice)
@@ -1806,12 +1811,6 @@ class ParserMeta(type):
    @classmethod
    def __prepare__(meta, *args, **kwargs):
        d = ParserMetaDict()
-#        def _(rule, *extra):
-#            rules = [rule, *extra]
-#            def decorate(func):
-#                func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
-#                return func
-#            return decorate
        d['_'] = _decorator
        return d

@@ -1822,6 +1821,9 @@ class ParserMeta(type):
        return cls

 class Parser(metaclass=ParserMeta):
+    # Automatic tracking of position information
+    track_positions = True
+    
    # Logging object where debugging/diagnostic messages are sent
    log = SlyLogger(sys.stderr)     

@@ -2076,9 +2078,15 @@ class Parser(metaclass=ParserMeta):
        self.tokens = tokens
        self.statestack = statestack = []                 # Stack of parsing states
        self.symstack = symstack = []                     # Stack of grammar symbols
-        pslice._stack = symstack                           # Associate the stack with the production
+        pslice._stack = symstack                          # Associate the stack with the production
        self.restart()

+        # Set up position tracking
+        track_positions = self.track_positions
+        if not hasattr(self, '_line_positions'):
+            self._line_positions = { }           # id: -> lineno
+            self._index_positions = { }          # id: -> (start, end)
+
        errtoken   = None                                 # Err token
        while True:
            # Get the next symbol on the input.  If a lookahead symbol
@@ -2093,7 +2101,7 @@ class Parser(metaclass=ParserMeta):
                    if not lookahead:
                        lookahead = YaccSymbol()
                        lookahead.type = '$end'
-
+                    
                # Check the action table
                ltype = lookahead.type
                t = actions[self.state].get(ltype)
@@ -2129,7 +2137,23 @@ class Parser(metaclass=ParserMeta):
                    value = p.func(self, pslice)
                    if value is pslice:
                        value = (pname, *(s.value for s in pslice._slice))
+
                    sym.value = value
+                        
+                    # Record positions
+                    if track_positions:
+                        if plen:
+                            sym.lineno = symstack[-plen].lineno
+                            sym.index = symstack[-plen].index
+                            sym.end = symstack[-1].end
+                        else:
+                            # A zero-length production  (what to put here?)
+                            sym.lineno = None
+                            sym.index = None
+                            sym.end = None
+                        self._line_positions[id(value)] = sym.lineno
+                        self._index_positions[id(value)] = (sym.index, sym.end)
+                            
                    if plen:
                        del symstack[-plen:]
                        del statestack[-plen:]
@@ -2214,6 +2238,8 @@ class Parser(metaclass=ParserMeta):
                        t.lineno = lookahead.lineno
                    if hasattr(lookahead, 'index'):
                        t.index = lookahead.index
+                    if hasattr(lookahead, 'end'):
+                        t.end = lookahead.end
                    t.value = lookahead
                    lookaheadstack.append(lookahead)
                    lookahead = t
@@ -2225,3 +2251,11 @@ class Parser(metaclass=ParserMeta):

            # Call an error function here
            raise RuntimeError('sly: internal parser error!!!\n')
+
+    # Return position tracking information
+    def line_position(self, value):
+        return self._line_positions[id(value)]
+
+    def index_position(self, value):
+        return self._index_positions[id(value)]
+    
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@@ -1,11 +1,6 @@
 import pytest
 from sly import Lexer

-try:
-    import regex
-except ImportError:
-    regex = None
-
 class CalcLexer(Lexer):
    # Set of token names.   This is always required
    tokens = {
@@ -61,29 +56,6 @@ class CalcLexer(Lexer):
    def __init__(self):
        self.errors = []

-if regex is not None:
-    class RegexModuleCalcLexer(Lexer):
-        regex_module = regex
-
-        tokens = { 'ID', 'PLUS', 'MINUS' }
-
-        literals = { '(', ')' }
-        ignore = ' \t'
-
-        ID      = r'\p{Ll}+'  # Unicode lowercase letters, regex module feature
-        PLUS    = r'\+'
-        MINUS   = r'-'
-
-        ignore_comment = r'\#.*'
-
-        @_(r'\n+')
-        def newline(self, t):
-            self.lineno += t.value.count('\n')
-
-        def ID(self, t):
-            t.value = t.value.upper()
-            return t
-
 # Test basic recognition of various tokens and literals
 def test_tokens():
    lexer = CalcLexer()
@@ -93,17 +65,21 @@ def test_tokens():
    assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
    assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']

-# Test third-party regex module support
-@pytest.mark.skipif(regex is None,
-                    reason="third-party regex module not installed")
-def test_3rd_party_regex_module():
-    lexer = RegexModuleCalcLexer()
-    toks = list(lexer.tokenize('a + b - c'))
-    types = [t.type for t in toks]
-    vals = [t.value for t in toks]
-    assert types == ['ID','PLUS','ID','MINUS','ID']
-    assert vals == ['A', '+', 'B', '-', 'C']
+# Test position tracking
+def test_positions():
+    lexer = CalcLexer()
+    text = 'abc\n( )'
+    toks = list(lexer.tokenize(text))
+    lines = [t.lineno for t in toks ]
+    indices = [t.index for t in toks ]
+    ends = [t.end for t in toks]
+    values = [ text[t.index:t.end] for t in toks ]
+    assert values == ['abc', '(', ')']
+    assert lines == [1, 2, 2]
+    assert indices == [0, 4, 6]
+    assert ends == [3, 5, 7]

+    
 # Test ignored comments and newlines
 def test_ignored():
    lexer = CalcLexer()
@@ -228,23 +204,5 @@ def test_modern_error_return():
    assert vals == [123, ':+-', '+', '-']
    assert lexer.errors == [ ':+-' ]

-# Test Lexer Inheritance.  This class should inherit all of the tokens
-# and features of ModernCalcLexer, but add two new tokens to it.  The
-# PLUSPLUS token matches before the PLUS token.
-
-if False:
-    class SubModernCalcLexer(ModernCalcLexer):
-        tokens |= { DOLLAR, PLUSPLUS }
-        DOLLAR = r'\$'
-        PLUSPLUS = r'\+\+'
-        PLUSPLUS.before = PLUS
-
-    def test_lexer_inherit():
-        lexer = SubModernCalcLexer()
-        toks = list(lexer.tokenize('123 + - $ ++ if'))
-        types = [t.type for t in toks]
-        vals = [t.value for t in toks]
-        assert types == ['NUMBER', 'PLUS', 'MINUS', 'DOLLAR', 'PLUSPLUS', 'IF']
-        assert vals == [123, '+', '-', '$', '++', 'if']
Author	SHA1	Message	Date
David Beazley	539a85a5d5	Merge pull request #109 from shadchin/patch-1 Fix license classifier	2024-06-05 09:15:25 -05:00
Alexander Shadchin	069f6d7766	Fix license classifier	2024-06-05 13:04:43 +03:00
David Beazley	33d4f5afc0	Clarification on retirement	2023-02-14 10:43:21 -06:00
David Beazley	4000988231	Fix to manifest.in	2022-10-25 09:31:43 -05:00
David Beazley	b453ea1854	Updated long description	2022-10-25 09:29:14 -05:00
David Beazley	004df26293	Minor edits. Added Makefile	2022-10-25 09:28:13 -05:00
David Beazley	dbcf6d0f7f	Added retirement notice	2022-10-11 08:44:48 -05:00
David Beazley	dd71d70882	Packaging reorganization	2022-09-06 20:15:16 -05:00
David Beazley	62203d8b75	Various work in progress. Position tracking	2022-09-06 19:38:33 -05:00
David Beazley	cd9014eda2	Fixed issue #94	2022-03-25 15:49:11 -05:00