Merge pull request #27 from Akuli/regex-module
Add support for third-party regex module
This commit is contained in:
commit
5a7f8ab652
19
docs/sly.rst
19
docs/sly.rst
@ -385,6 +385,25 @@ might be useful if the parser wants to see error tokens for some
|
||||
reason--perhaps for the purposes of improved error messages or
|
||||
some other kind of error handling.
|
||||
|
||||
Third-Party Regex Module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. versionadded:: 0.4
|
||||
|
||||
The third-party `regex <https://pypi.org/project/regex/>`_ module can be used
|
||||
with sly. Like this::
|
||||
|
||||
from sly import Lexer
|
||||
import regex
|
||||
|
||||
class MyLexer(Lexer):
|
||||
regex_module = regex
|
||||
...
|
||||
|
||||
Now all regular expressions that ``MyLexer`` uses will be handled with the
|
||||
``regex`` module. The ``regex_module`` can be set to any module that is
|
||||
compatible with Python's standard library ``re``.
|
||||
|
||||
|
||||
A More Complete Example
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
2
setup.py
2
setup.py
@ -3,7 +3,7 @@ try:
|
||||
except ImportError:
|
||||
from distutils.core import setup
|
||||
|
||||
tests_require = ['pytest']
|
||||
tests_require = ['pytest', 'regex']
|
||||
|
||||
setup(name = "sly",
|
||||
description="SLY - Sly Lex Yacc",
|
||||
|
@ -186,6 +186,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
literals = set()
|
||||
ignore = ''
|
||||
reflags = 0
|
||||
regex_module = re
|
||||
|
||||
_token_names = set()
|
||||
_token_funcs = {}
|
||||
@ -307,7 +308,7 @@ class Lexer(metaclass=LexerMeta):
|
||||
|
||||
# Make sure the individual regex compiles properly
|
||||
try:
|
||||
cpat = re.compile(part, cls.reflags)
|
||||
cpat = cls.regex_module.compile(part, cls.reflags)
|
||||
except Exception as e:
|
||||
raise PatternError(f'Invalid regex for token {tokname}') from e
|
||||
|
||||
@ -322,8 +323,8 @@ class Lexer(metaclass=LexerMeta):
|
||||
|
||||
# Form the master regular expression
|
||||
#previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
|
||||
# cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
|
||||
cls._master_re = re.compile('|'.join(parts), cls.reflags)
|
||||
# cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
|
||||
cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
|
||||
|
||||
# Verify that that ignore and literals specifiers match the input type
|
||||
if not isinstance(cls.ignore, str):
|
||||
|
@ -1,6 +1,11 @@
|
||||
import pytest
|
||||
from sly import Lexer
|
||||
|
||||
try:
|
||||
import regex
|
||||
except ImportError:
|
||||
regex = None
|
||||
|
||||
class CalcLexer(Lexer):
|
||||
# Set of token names. This is always required
|
||||
tokens = {
|
||||
@ -56,6 +61,29 @@ class CalcLexer(Lexer):
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
|
||||
if regex is not None:
|
||||
class RegexModuleCalcLexer(Lexer):
|
||||
regex_module = regex
|
||||
|
||||
tokens = { 'ID', 'PLUS', 'MINUS' }
|
||||
|
||||
literals = { '(', ')' }
|
||||
ignore = ' \t'
|
||||
|
||||
ID = r'\p{Ll}+' # Unicode lowercase letters, regex module feature
|
||||
PLUS = r'\+'
|
||||
MINUS = r'-'
|
||||
|
||||
ignore_comment = r'\#.*'
|
||||
|
||||
@_(r'\n+')
|
||||
def newline(self, t):
|
||||
self.lineno += t.value.count('\n')
|
||||
|
||||
def ID(self, t):
|
||||
t.value = t.value.upper()
|
||||
return t
|
||||
|
||||
# Test basic recognition of various tokens and literals
|
||||
def test_tokens():
|
||||
lexer = CalcLexer()
|
||||
@ -65,6 +93,17 @@ def test_tokens():
|
||||
assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
|
||||
assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
|
||||
|
||||
# Test third-party regex module support
|
||||
@pytest.mark.skipif(regex is None,
|
||||
reason="third-party regex module not installed")
|
||||
def test_3rd_party_regex_module():
|
||||
lexer = RegexModuleCalcLexer()
|
||||
toks = list(lexer.tokenize('a + b - c'))
|
||||
types = [t.type for t in toks]
|
||||
vals = [t.value for t in toks]
|
||||
assert types == ['ID','PLUS','ID','MINUS','ID']
|
||||
assert vals == ['A', '+', 'B', '-', 'C']
|
||||
|
||||
# Test ignored comments and newlines
|
||||
def test_ignored():
|
||||
lexer = CalcLexer()
|
||||
|
Loading…
Reference in New Issue
Block a user