From 0083477f016a4d24b9c870c04a3349ecf88c9729 Mon Sep 17 00:00:00 2001
From: Akuli <akuviljanen17@gmail.com>
Date: Sun, 17 Feb 2019 22:55:49 +0200
Subject: [PATCH] Add support for third-party regex module

Fixes #26.
---
 docs/sly.rst      | 19 +++++++++++++++++++
 setup.py          |  2 +-
 sly/lex.py        |  7 ++++---
 tests/test_lex.py | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/docs/sly.rst b/docs/sly.rst
index c500d5a..8ee47a0 100644
--- a/docs/sly.rst
+++ b/docs/sly.rst
@@ -385,6 +385,25 @@ might be useful if the parser wants to see error tokens for some
 reason--perhaps for the purposes of improved error messages or
 some other kind of error handling.
 
+Third-Party Regex Module
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionadded:: 0.4
+
+The third-party `regex <https://pypi.org/project/regex/>`_ module can be used
+with sly. Like this::
+
+    from sly import Lexer
+    import regex
+
+    class MyLexer(Lexer):
+        regex_module = regex
+        ...
+
+Now all regular expressions that ``MyLexer`` uses will be handled with the
+``regex`` module. The ``regex_module`` can be set to any module that is
+compatible with Python's standard library ``re``.
+
 
 A More Complete Example
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/setup.py b/setup.py
index d3a823c..dc9d05b 100755
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ try:
 except ImportError:
     from distutils.core import setup
 
-tests_require = ['pytest']
+tests_require = ['pytest', 'regex']
 
 setup(name = "sly",
             description="SLY - Sly Lex Yacc",
diff --git a/sly/lex.py b/sly/lex.py
index e6c8ac3..246dd9e 100644
--- a/sly/lex.py
+++ b/sly/lex.py
@@ -186,6 +186,7 @@ class Lexer(metaclass=LexerMeta):
     literals = set()
     ignore = ''
     reflags = 0
+    regex_module = re
 
     _token_names = set()
     _token_funcs = {}
@@ -307,7 +308,7 @@ class Lexer(metaclass=LexerMeta):
 
             # Make sure the individual regex compiles properly
             try:
-                cpat = re.compile(part, cls.reflags)
+                cpat = cls.regex_module.compile(part, cls.reflags)
             except Exception as e:
                 raise PatternError(f'Invalid regex for token {tokname}') from e
 
@@ -322,8 +323,8 @@ class Lexer(metaclass=LexerMeta):
 
         # Form the master regular expression
         #previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
-        # cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
-        cls._master_re = re.compile('|'.join(parts), cls.reflags)
+        # cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
+        cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
 
         # Verify that that ignore and literals specifiers match the input type
         if not isinstance(cls.ignore, str):
diff --git a/tests/test_lex.py b/tests/test_lex.py
index 7c7421b..c7bf3e9 100644
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@@ -1,6 +1,11 @@
 import pytest
 from sly import Lexer
 
+try:
+    import regex
+except ImportError:
+    regex = None
+
 class CalcLexer(Lexer):
     # Set of token names.   This is always required
     tokens = {
@@ -56,6 +61,29 @@ class CalcLexer(Lexer):
     def __init__(self):
         self.errors = []
 
+if regex is not None:
+    class RegexModuleCalcLexer(Lexer):
+        regex_module = regex
+
+        tokens = { 'ID', 'PLUS', 'MINUS' }
+
+        literals = { '(', ')' }
+        ignore = ' \t'
+
+        ID      = r'\p{Ll}+'  # Unicode lowercase letters, regex module feature
+        PLUS    = r'\+'
+        MINUS   = r'-'
+
+        ignore_comment = r'\#.*'
+
+        @_(r'\n+')
+        def newline(self, t):
+            self.lineno += t.value.count('\n')
+
+        def ID(self, t):
+            t.value = t.value.upper()
+            return t
+
 # Test basic recognition of various tokens and literals
 def test_tokens():
     lexer = CalcLexer()
@@ -65,6 +93,17 @@ def test_tokens():
     assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
     assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
 
+# Test third-party regex module support
+@pytest.mark.skipif(regex is None,
+                    reason="third-party regex module not installed")
+def test_3rd_party_regex_module():
+    lexer = RegexModuleCalcLexer()
+    toks = list(lexer.tokenize('a + b - c'))
+    types = [t.type for t in toks]
+    vals = [t.value for t in toks]
+    assert types == ['ID','PLUS','ID','MINUS','ID']
+    assert vals == ['A', '+', 'B', '-', 'C']
+
 # Test ignored comments and newlines
 def test_ignored():
     lexer = CalcLexer()