Compare commits
	
		
			53 Commits
		
	
	
		
			0.1
			...
			f8fcbb080c
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					f8fcbb080c | ||
| 
						 | 
					06b92e3e73 | ||
| 
						 | 
					1f87ddaf39 | ||
| 
						 | 
					ab75a58b10 | ||
| 
						 | 
					a128245cd3 | ||
| 
						 | 
					cf35258d6f | ||
| 
						 | 
					78dc087c45 | ||
| 
						 | 
					8a67ae946b | ||
| 
						 | 
					39ffd0361a | ||
| 
						 | 
					a2cdf52d0f | ||
| 
						 | 
					9944b6239c | ||
| 
						 | 
					aead4b43be | ||
| 
						 | 
					d9c763d2f7 | ||
| 
						 | 
					5bf4d9707d | ||
| 
						 | 
					a728a23adf | ||
| 
						 | 
					5a7f8ab652 | ||
| 
						 | 
					0083477f01 | ||
| 
						 | 
					90a5484ea6 | ||
| 
						 | 
					b8e5ac3f6b | ||
| 
						 | 
					ac8e0ecba1 | ||
| 
						 | 
					6cdb4d8ca7 | ||
| 
						 | 
					a84811539e | ||
| 
						 | 
					d87f21a138 | ||
| 
						 | 
					dd41efd112 | ||
| 
						 | 
					d1b9f64e49 | ||
| 
						 | 
					52c993e00c | ||
| 
						 | 
					2be28d29a5 | ||
| 
						 | 
					66b6bd73f8 | ||
| 
						 | 
					503fae9e18 | ||
| 
						 | 
					0ac3c1a0a3 | ||
| 
						 | 
					16d700b310 | ||
| 
						 | 
					e54c5dfcea | ||
| 
						 | 
					5fdc971f36 | ||
| 
						 | 
					6a27431f81 | ||
| 
						 | 
					a33ff221e5 | ||
| 
						 | 
					3f7240b9a2 | ||
| 
						 | 
					995d0ecff1 | ||
| 715222a0fc | |||
| fb43a50f8a | |||
| 
						 | 
					1251da034a | ||
| 
						 | 
					c5659a4465 | ||
| 
						 | 
					3a0ee0d9c1 | ||
| 
						 | 
					08988d2798 | ||
| 
						 | 
					d0e34417bc | ||
| 
						 | 
					51b01d8335 | ||
| 
						 | 
					b088d9b2ce | ||
| 
						 | 
					b74e7223ce | ||
| 
						 | 
					e9346daff0 | ||
| 
						 | 
					cdd7a082a4 | ||
| 
						 | 
					e05748494c | ||
| 
						 | 
					d8903d8301 | ||
| 
						 | 
					636197b9fd | ||
| 
						 | 
					b71fbdafe3 | 
							
								
								
									
										238
									
								
								CHANGES
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								CHANGES
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,238 @@
 | 
			
		||||
In Progress
 | 
			
		||||
-----------
 | 
			
		||||
 | 
			
		||||
05/09/2020 Experimental support for EBNF choices.  For example:
 | 
			
		||||
 | 
			
		||||
	      @('term { PLUS|MINUS term }')
 | 
			
		||||
              def expr(self, p):
 | 
			
		||||
                  lterm = p.pterm0
 | 
			
		||||
                  for op, rterm in p[1]:
 | 
			
		||||
		      lterm = BinOp(op, lterm, rterm)
 | 
			
		||||
 | 
			
		||||
           One issue here is just how one refers to the choice
 | 
			
		||||
           of values.  There is no unified name to pick. So,
 | 
			
		||||
           you basically have to do it using a numeric index like p[1].
 | 
			
		||||
           In this case, p[1] is a list of all of the repeated items
 | 
			
		||||
           (represented as tuples).
 | 
			
		||||
 | 
			
		||||
05/09/2020 Changed the internal names used for EBNF rules to make them
 | 
			
		||||
           a bit easier to debug in the parser.out file.
 | 
			
		||||
 | 
			
		||||
Version 0.4
 | 
			
		||||
-----------
 | 
			
		||||
 | 
			
		||||
03/06/2020 Added experimental support for EBNF repetition and optional
 | 
			
		||||
           syntax.  For example, here is a rule for a comma-separated
 | 
			
		||||
           expression list:
 | 
			
		||||
 | 
			
		||||
               @('expr { COMMA expr }')
 | 
			
		||||
               def exprlist(self, p):
 | 
			
		||||
                   return [ p.expr0 ] + p.expr1
 | 
			
		||||
 | 
			
		||||
           In this code, the { ... } means zero-or-more repetitions.
 | 
			
		||||
           It turns all symbols inside into lists.  So, instead of
 | 
			
		||||
           representing a single value, p.expr1 is now a list of 
 | 
			
		||||
           values.  
 | 
			
		||||
 | 
			
		||||
           An optional value can be enclosed in brackets like this:
 | 
			
		||||
 | 
			
		||||
              @('VAR NAME [ EQUAL expr ] SEMI')
 | 
			
		||||
              def variable_declaration(self, p):
 | 
			
		||||
                  print(f"Definining {p.NAME}. Initial value={p.expr}")
 | 
			
		||||
 | 
			
		||||
           In this case, all symbols inside [ ... ] either have a value
 | 
			
		||||
           if present or are assigned to None if missing.
 | 
			
		||||
 | 
			
		||||
           In both cases, you continue to use the same name indexing
 | 
			
		||||
           scheme used by the rest of SLY.  For example, in the first
 | 
			
		||||
           example above, you use "expr0" and "expr1" to refer to the
 | 
			
		||||
           different "expr" symbols since that name appears in more
 | 
			
		||||
           than one place.
 | 
			
		||||
 | 
			
		||||
04/09/2019 Fixed very mysterious error message that resulted if you 
 | 
			
		||||
           defined a grammar rule called "start".   start can now 
 | 
			
		||||
           be a string or a function.
 | 
			
		||||
 | 
			
		||||
04/09/2019 Minor refinement to the reporting of reduce/reduce conflicts.
 | 
			
		||||
           If a top grammar rule wasn't specified, SLY could fail with 
 | 
			
		||||
           a mysterious "unknown conflict" exception.  This should be
 | 
			
		||||
           fixed.
 | 
			
		||||
 | 
			
		||||
11/18/2018 Various usability fixes observed from last compilers course.
 | 
			
		||||
 | 
			
		||||
            - Errors encountered during grammar construction are now
 | 
			
		||||
              reported as part of the raised GrammarError exception
 | 
			
		||||
              instead of via logging.  This places them in the same
 | 
			
		||||
              visual position as normal Python errors (at the end
 | 
			
		||||
              of the traceback)
 | 
			
		||||
 | 
			
		||||
            - Repeated warning messages about unused tokens have
 | 
			
		||||
              been consolidated in a single warning message to make
 | 
			
		||||
              the output less verbose.
 | 
			
		||||
 | 
			
		||||
            - Grammar attributes (e.g., p.TOKEN) used during parsing
 | 
			
		||||
              are now read-only.
 | 
			
		||||
 | 
			
		||||
            - The error about "infinite recursion" is only checked
 | 
			
		||||
              if there are no undefined grammar symbols.  Sometimes
 | 
			
		||||
              you'd get this message and be confused when the only
 | 
			
		||||
              mistake was a bad token name or similar. 
 | 
			
		||||
          
 | 
			
		||||
 | 
			
		||||
9/8/2018   Fixed Issue #14.  YaccProduction index property causes
 | 
			
		||||
           AttributeError if index is 0
 | 
			
		||||
 | 
			
		||||
9/5/2018   Added support for getattr() and related functions on
 | 
			
		||||
           productions.
 | 
			
		||||
 | 
			
		||||
Version 0.3
 | 
			
		||||
-----------
 | 
			
		||||
4/1/2018   Support for Lexer inheritance added.  For example:
 | 
			
		||||
 | 
			
		||||
            from sly import Lexer
 | 
			
		||||
 | 
			
		||||
            class BaseLexer(Lexer):
 | 
			
		||||
                tokens = { NAME, NUMBER }
 | 
			
		||||
                ignore = ' \t'
 | 
			
		||||
		
 | 
			
		||||
                NAME = r'[a-zA-Z]+'
 | 
			
		||||
		NUMBER = r'\d+'
 | 
			
		||||
 | 
			
		||||
               
 | 
			
		||||
            class ChildLexer(BaseLexer):
 | 
			
		||||
                tokens = { PLUS, MINUS }
 | 
			
		||||
                PLUS = r'\+'
 | 
			
		||||
                MINUS = r'-'
 | 
			
		||||
 | 
			
		||||
           In this example, the ChildLexer class gets all of the tokens
 | 
			
		||||
           from the parent class (BaseLexer) in addition to the new
 | 
			
		||||
           definitions it added of its own.  
 | 
			
		||||
 | 
			
		||||
           One quirk of Lexer inheritance is that definition order has
 | 
			
		||||
           an impact on the low-level regular expression parsing.  By
 | 
			
		||||
           default new definitions are always processed AFTER any previous
 | 
			
		||||
           definitions.  You can change this using the before() function
 | 
			
		||||
           like this:
 | 
			
		||||
 | 
			
		||||
            class GrandChildLexer(ChildLexer):
 | 
			
		||||
                tokens = { PLUSPLUS, MINUSMINUS }
 | 
			
		||||
                PLUSPLUS = before(PLUS, r'\+\+')
 | 
			
		||||
                MINUSMINUS = before(MINUS, r'--')
 | 
			
		||||
 | 
			
		||||
           In this example, the PLUSPLUS token is checked before the
 | 
			
		||||
           PLUS token in the base class.  Thus, an input text of '++'
 | 
			
		||||
           will be parsed as a single token PLUSPLUS, not two PLUS tokens.
 | 
			
		||||
 | 
			
		||||
4/1/2018   Better support for lexing states.   Each lexing state can be defined as
 | 
			
		||||
           as a separate class.  Use the begin(cls) method to switch to a
 | 
			
		||||
           different state.  For example:
 | 
			
		||||
 | 
			
		||||
            from sly import Lexer
 | 
			
		||||
 | 
			
		||||
            class LexerA(Lexer):
 | 
			
		||||
                tokens = { NAME, NUMBER, LBRACE }
 | 
			
		||||
 | 
			
		||||
                ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
                NAME = r'[a-zA-Z]+'
 | 
			
		||||
                NUMBER = r'\d+'
 | 
			
		||||
                LBRACE = r'\{'
 | 
			
		||||
 | 
			
		||||
                def LBRACE(self, t):
 | 
			
		||||
                    self.begin(LexerB)
 | 
			
		||||
                    return t
 | 
			
		||||
 | 
			
		||||
            class LexerB(Lexer):
 | 
			
		||||
                tokens = { PLUS, MINUS, RBRACE }
 | 
			
		||||
 | 
			
		||||
                ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
                PLUS = r'\+'
 | 
			
		||||
                MINUS = r'-'
 | 
			
		||||
                RBRACE = r'\}'
 | 
			
		||||
 | 
			
		||||
                def RBRACE(self, t):
 | 
			
		||||
                    self.begin(LexerA)
 | 
			
		||||
                    return t
 | 
			
		||||
 | 
			
		||||
           In this example, LexerA switches to a new state LexerB when
 | 
			
		||||
           a left brace ({) is encountered.  The begin() method causes
 | 
			
		||||
           the state transition.   LexerB switches back to state LexerA
 | 
			
		||||
           when a right brace (}) is encountered.
 | 
			
		||||
 | 
			
		||||
           An option to the begin() method, you can also use push_state(cls)
 | 
			
		||||
           and pop_state(cls) methods.  This manages the lexing states as a
 | 
			
		||||
           stack.  The pop_state() method will return back to the previous
 | 
			
		||||
           lexing state.
 | 
			
		||||
   
 | 
			
		||||
1/27/2018  Tokens no longer have to be specified as strings.   For example, you
 | 
			
		||||
           can now write:
 | 
			
		||||
 | 
			
		||||
           from sly import Lexer
 | 
			
		||||
 | 
			
		||||
           class TheLexer(Lexer):
 | 
			
		||||
               tokens = { ID, NUMBER, PLUS, MINUS }
 | 
			
		||||
 | 
			
		||||
               ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
               NUMBER = r'\d+'
 | 
			
		||||
               PLUS = r'\+'
 | 
			
		||||
               MINUS = r'-'
 | 
			
		||||
 | 
			
		||||
           This convention also carries over to the parser for things such
 | 
			
		||||
           as precedence specifiers:
 | 
			
		||||
 | 
			
		||||
           from sly import Parser
 | 
			
		||||
           class TheParser(Parser):
 | 
			
		||||
                 tokens = TheLexer.tokens
 | 
			
		||||
 | 
			
		||||
                 precedence = (
 | 
			
		||||
                     ('left', PLUS, MINUS),
 | 
			
		||||
                     ('left', TIMES, DIVIDE),
 | 
			
		||||
                     ('right', UMINUS),
 | 
			
		||||
                  )
 | 
			
		||||
            ...
 | 
			
		||||
 | 
			
		||||
           Nevermind the fact that ID, NUMBER, PLUS, and MINUS appear to be
 | 
			
		||||
           undefined identifiers.  It all works. 
 | 
			
		||||
 | 
			
		||||
1/27/2018  Tokens now allow special-case remapping.   For example:
 | 
			
		||||
 | 
			
		||||
           from sly import Lexer
 | 
			
		||||
 | 
			
		||||
           class TheLexer(Lexer):
 | 
			
		||||
               tokens = { ID, IF, ELSE, WHILE, NUMBER, PLUS, MINUS }
 | 
			
		||||
 | 
			
		||||
               ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
               ID['if'] = IF
 | 
			
		||||
               ID['else'] = ELSE
 | 
			
		||||
               ID['while'] = WHILE
 | 
			
		||||
 | 
			
		||||
               NUMBER = r'\d+'
 | 
			
		||||
               PLUS = r'\+'
 | 
			
		||||
               MINUS = r'-'
 | 
			
		||||
       
 | 
			
		||||
           In this code, the ID rule matches any identifier.  However,
 | 
			
		||||
           special cases have been made for IF, ELSE, and WHILE tokens.
 | 
			
		||||
           Previously, this had to be handled in a special action method 
 | 
			
		||||
           such as this:
 | 
			
		||||
 | 
			
		||||
               def ID(self, t):
 | 
			
		||||
                   if t.value in { 'if', 'else', 'while' }:
 | 
			
		||||
                       t.type = t.value.upper()
 | 
			
		||||
                   return t
 | 
			
		||||
 | 
			
		||||
           Nevermind the fact that the syntax appears to suggest that strings
 | 
			
		||||
           work as a kind of mutable mapping.
 | 
			
		||||
      
 | 
			
		||||
1/16/2018  Usability improvement on Lexer class.  Regular expression rules
 | 
			
		||||
           specified as strings that don't match any name in tokens are
 | 
			
		||||
           now reported as errors.
 | 
			
		||||
 | 
			
		||||
Version 0.2
 | 
			
		||||
-----------
 | 
			
		||||
 | 
			
		||||
12/24/2017 The error(self, t) method of lexer objects now receives a
 | 
			
		||||
           token as input.  The value attribute of this token contains
 | 
			
		||||
           all remaining input text.  If the passed token is returned
 | 
			
		||||
           by error(), then it shows up in the token stream where
 | 
			
		||||
           can be processed by the parser.
 | 
			
		||||
							
								
								
									
										45
									
								
								CONTRIBUTING.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								CONTRIBUTING.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,45 @@
 | 
			
		||||
Contributing to SLY
 | 
			
		||||
===================
 | 
			
		||||
SLY, like most projects related to parser generators, is a niche
 | 
			
		||||
project.  Although it appears to be a somewhat "new" project, it is
 | 
			
		||||
actually an outgrowth of the PLY project which has been around since
 | 
			
		||||
2001.  Contributions of most kinds that make it better are
 | 
			
		||||
welcome--this includes code, documentation, examples, and feature
 | 
			
		||||
requests.
 | 
			
		||||
 | 
			
		||||
There aren't too many formal guidelines.  If submitting a bug report,
 | 
			
		||||
any information that helps to reproduce the problem will be handy.  If
 | 
			
		||||
submitting a pull request, try to make sure that SLY's test suite
 | 
			
		||||
still passes. Even if that's not the case though, that's okay--a
 | 
			
		||||
failed test might be something very minor that can fixed up after a
 | 
			
		||||
merge.
 | 
			
		||||
 | 
			
		||||
Project Scope
 | 
			
		||||
-------------
 | 
			
		||||
It is not my goal to turn SLY into a gigantic parsing framework with
 | 
			
		||||
every possible feature.  What you see here is pretty much what it is--a
 | 
			
		||||
basic LALR(1) parser generator and tokenizer.  If you've built something
 | 
			
		||||
useful that uses SLY or builds upon it, it's probably better served by
 | 
			
		||||
its own repository. Feel free to submit a pull request to the SLY README
 | 
			
		||||
file that includes a link to your project.
 | 
			
		||||
 | 
			
		||||
The SLY "Community" (or lack thereof)
 | 
			
		||||
-------------------------------------
 | 
			
		||||
As noted, parser generator tools are a highly niche area.  It is
 | 
			
		||||
important to emphasize that SLY is a very much a side-project for
 | 
			
		||||
me. No funding is received for this work.  I also run a business and
 | 
			
		||||
have a family with kids.  These things have higher priority. As such,
 | 
			
		||||
there may be periods in which little activity is made on pull
 | 
			
		||||
requests, issues, and other development matters.  Sometimes you might
 | 
			
		||||
only see a flurry of activity around the times when I use SLY in
 | 
			
		||||
a compilers course that I teach.   Do not mistake "inaction" for
 | 
			
		||||
"disinterest."  I am definitely interested in improving SLY--it's
 | 
			
		||||
just not practical for me to give it my undivided attention. 
 | 
			
		||||
 | 
			
		||||
Important Note
 | 
			
		||||
--------------
 | 
			
		||||
As a general rule, pull requests related to third-party tooling (IDEs,
 | 
			
		||||
type-checkers, linters, code formatters, etc.) will not be accepted.
 | 
			
		||||
If you think something should be changed/improved in this regard,
 | 
			
		||||
please submit an issue instead.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										39
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
SLY (Sly Lex-Yacc)                   
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016-2019
 | 
			
		||||
David M. Beazley (Dabeaz LLC)
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
 | 
			
		||||
* Redistributions of source code must retain the above copyright notice,
 | 
			
		||||
  this list of conditions and the following disclaimer.  
 | 
			
		||||
* Redistributions in binary form must reproduce the above copyright notice, 
 | 
			
		||||
  this list of conditions and the following disclaimer in the documentation
 | 
			
		||||
  and/or other materials provided with the distribution.  
 | 
			
		||||
* Neither the name of the David Beazley or Dabeaz LLC may be used to
 | 
			
		||||
  endorse or promote products derived from this software without
 | 
			
		||||
  specific prior written permission. 
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
			
		||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
			
		||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
			
		||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | 
			
		||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
			
		||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
			
		||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
			
		||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
			
		||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								MANIFEST.in
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
			
		||||
recursive-include example *
 | 
			
		||||
recursive-include docs *
 | 
			
		||||
							
								
								
									
										207
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										207
									
								
								README.md
									
									
									
									
									
								
							@@ -1,207 +0,0 @@
 | 
			
		||||
SLY (Sly Lex-Yacc)                   Version 0.1
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2016-2017
 | 
			
		||||
David M. Beazley (Dabeaz LLC)
 | 
			
		||||
All rights reserved.
 | 
			
		||||
 | 
			
		||||
Redistribution and use in source and binary forms, with or without
 | 
			
		||||
modification, are permitted provided that the following conditions are
 | 
			
		||||
met:
 | 
			
		||||
 | 
			
		||||
* Redistributions of source code must retain the above copyright notice,
 | 
			
		||||
  this list of conditions and the following disclaimer.  
 | 
			
		||||
* Redistributions in binary form must reproduce the above copyright notice, 
 | 
			
		||||
  this list of conditions and the following disclaimer in the documentation
 | 
			
		||||
  and/or other materials provided with the distribution.  
 | 
			
		||||
* Neither the name of the David Beazley or Dabeaz LLC may be used to
 | 
			
		||||
  endorse or promote products derived from this software without
 | 
			
		||||
  specific prior written permission. 
 | 
			
		||||
 | 
			
		||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | 
			
		||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | 
			
		||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | 
			
		||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | 
			
		||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | 
			
		||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | 
			
		||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
			
		||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
			
		||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
			
		||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | 
			
		||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
 | 
			
		||||
CAUTION
 | 
			
		||||
=======
 | 
			
		||||
THIS IS A WORK IN PROGRESS.  NO OFFICIAL RELEASE HAS BEEN MADE.
 | 
			
		||||
USE AT YOUR OWN RISK.
 | 
			
		||||
 | 
			
		||||
Requirements
 | 
			
		||||
============
 | 
			
		||||
 | 
			
		||||
SLY requires the use of Python 3.6 or greater.  Older versions
 | 
			
		||||
of Python are not supported.
 | 
			
		||||
 | 
			
		||||
Introduction
 | 
			
		||||
============
 | 
			
		||||
 | 
			
		||||
SLY is a 100% Python implementation of the lex and yacc tools
 | 
			
		||||
commonly used to write parsers and compilers.  Parsing is
 | 
			
		||||
based on the same LALR(1) algorithm used by many yacc tools.
 | 
			
		||||
Here are a few notable features:
 | 
			
		||||
 | 
			
		||||
 -  SLY provides *very* extensive error reporting and diagnostic 
 | 
			
		||||
    information to assist in parser construction.  The original
 | 
			
		||||
    implementation was developed for instructional purposes.  As
 | 
			
		||||
    a result, the system tries to identify the most common types
 | 
			
		||||
    of errors made by novice users.  
 | 
			
		||||
 | 
			
		||||
 -  SLY provides full support for empty productions, error recovery,
 | 
			
		||||
    precedence specifiers, and moderately ambiguous grammars.
 | 
			
		||||
 | 
			
		||||
 -  SLY uses various Python metaprogramming features to specify
 | 
			
		||||
    lexers and parsers.  There are no generated files or extra
 | 
			
		||||
    steps involved. You simply write Python code and run it.
 | 
			
		||||
 | 
			
		||||
 -  SLY can be used to build parsers for "real" programming languages.
 | 
			
		||||
    Although it is not ultra-fast due to its Python implementation,
 | 
			
		||||
    SLY can be used to parse grammars consisting of several hundred
 | 
			
		||||
    rules (as might be found for a language like C).  
 | 
			
		||||
 | 
			
		||||
SLY originates from the PLY project (http://www.dabeaz.com/ply/index.html).
 | 
			
		||||
However, it's been modernized a bit.  In fact, don't expect any code
 | 
			
		||||
previously written for PLY to work. That said, most of the things 
 | 
			
		||||
that were possible in PLY are also possible in SLY. 
 | 
			
		||||
 | 
			
		||||
An Example
 | 
			
		||||
==========
 | 
			
		||||
 | 
			
		||||
SLY is probably best illustrated by an example.  Here's what it
 | 
			
		||||
looks like to write a parser that can evaluate simple arithmetic
 | 
			
		||||
expressions and store variables:
 | 
			
		||||
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
    # calc.py
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
    from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        tokens = {
 | 
			
		||||
            'NAME', 'NUMBER',
 | 
			
		||||
            }
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
        literals = { '=', '+', '-', '*', '/', '(', ')' }
 | 
			
		||||
 | 
			
		||||
        # Tokens
 | 
			
		||||
        NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
 | 
			
		||||
        @_(r'\d+')
 | 
			
		||||
        def NUMBER(self, t):
 | 
			
		||||
            t.value = int(t.value)
 | 
			
		||||
            return t
 | 
			
		||||
 | 
			
		||||
        @_(r'\n+')
 | 
			
		||||
        def newline(self, t):
 | 
			
		||||
            self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
        def error(self, value):
 | 
			
		||||
            print("Illegal character '%s'" % value[0])
 | 
			
		||||
            self.index += 1
 | 
			
		||||
 | 
			
		||||
    class CalcParser(Parser):
 | 
			
		||||
        tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
        precedence = (
 | 
			
		||||
            ('left', '+', '-'),
 | 
			
		||||
            ('left', '*', '/'),
 | 
			
		||||
            ('right', 'UMINUS'),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        def __init__(self):
 | 
			
		||||
            self.names = { }
 | 
			
		||||
 | 
			
		||||
        @_('NAME "=" expr')
 | 
			
		||||
        def statement(self, p):
 | 
			
		||||
            self.names[p.NAME] = p.expr
 | 
			
		||||
 | 
			
		||||
        @_('expr')
 | 
			
		||||
        def statement(self, p):
 | 
			
		||||
            print(p.expr)
 | 
			
		||||
 | 
			
		||||
        @_('expr "+" expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 + p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr "-" expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 - p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr "*" expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 * p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr "/" expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 / p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('"-" expr %prec UMINUS')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return -p.expr
 | 
			
		||||
 | 
			
		||||
        @_('"(" expr ")"')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr
 | 
			
		||||
 | 
			
		||||
        @_('NUMBER')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.NUMBER
 | 
			
		||||
 | 
			
		||||
        @_('NAME')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            try:
 | 
			
		||||
                return self.names[p.NAME]
 | 
			
		||||
            except LookupError:
 | 
			
		||||
                print("Undefined name '%s'" % p.NAME)
 | 
			
		||||
                return 0
 | 
			
		||||
 | 
			
		||||
    if __name__ == '__main__':
 | 
			
		||||
        lexer = CalcLexer()
 | 
			
		||||
        parser = CalcParser()
 | 
			
		||||
        while True:
 | 
			
		||||
            try:
 | 
			
		||||
                text = input('calc > ')
 | 
			
		||||
            except EOFError:
 | 
			
		||||
                break
 | 
			
		||||
            if text:
 | 
			
		||||
                parser.parse(lexer.tokenize(text))
 | 
			
		||||
 | 
			
		||||
Documentation
 | 
			
		||||
=============
 | 
			
		||||
 | 
			
		||||
Further documentation can be found at https://sly.readthedocs.io/en/latest
 | 
			
		||||
 | 
			
		||||
Resources
 | 
			
		||||
=========
 | 
			
		||||
 | 
			
		||||
For a detailed overview of parsing theory, consult the excellent
 | 
			
		||||
book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
 | 
			
		||||
Ullman.  The topics found in "Lex & Yacc" by Levine, Mason, and Brown
 | 
			
		||||
may also be useful.
 | 
			
		||||
 | 
			
		||||
The GitHub page for SLY can be found at:
 | 
			
		||||
 | 
			
		||||
     https://github.com/dabeaz/sly
 | 
			
		||||
 | 
			
		||||
Please direct bug reports and pull requests to the GitHub page.
 | 
			
		||||
To contact me directly, send email to dave@dabeaz.com or contact
 | 
			
		||||
me on Twitter (@dabeaz).
 | 
			
		||||
 
 | 
			
		||||
-- Dave
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										187
									
								
								README.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										187
									
								
								README.rst
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,187 @@
 | 
			
		||||
SLY (Sly Lex-Yacc)
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
SLY is a 100% Python implementation of the lex and yacc tools
 | 
			
		||||
commonly used to write parsers and compilers.  Parsing is
 | 
			
		||||
based on the same LALR(1) algorithm used by many yacc tools.
 | 
			
		||||
Here are a few notable features:
 | 
			
		||||
 | 
			
		||||
-  SLY provides *very* extensive error reporting and diagnostic 
 | 
			
		||||
   information to assist in parser construction.  The original
 | 
			
		||||
   implementation was developed for instructional purposes.  As
 | 
			
		||||
   a result, the system tries to identify the most common types
 | 
			
		||||
   of errors made by novice users.  
 | 
			
		||||
 | 
			
		||||
-  SLY provides full support for empty productions, error recovery,
 | 
			
		||||
   precedence specifiers, and moderately ambiguous grammars.
 | 
			
		||||
 | 
			
		||||
-  SLY uses various Python metaprogramming features to specify
 | 
			
		||||
   lexers and parsers.  There are no generated files or extra
 | 
			
		||||
   steps involved. You simply write Python code and run it.
 | 
			
		||||
 | 
			
		||||
-  SLY can be used to build parsers for "real" programming languages.
 | 
			
		||||
   Although it is not ultra-fast due to its Python implementation,
 | 
			
		||||
   SLY can be used to parse grammars consisting of several hundred
 | 
			
		||||
   rules (as might be found for a language like C).  
 | 
			
		||||
 | 
			
		||||
SLY originates from the `PLY project <http://www.dabeaz.com/ply/index.html>`_.
 | 
			
		||||
However, it's been modernized a bit.  In fact, don't expect any code
 | 
			
		||||
previously written for PLY to work. That said, most of the things 
 | 
			
		||||
that were possible in PLY are also possible in SLY. 
 | 
			
		||||
 | 
			
		||||
SLY is a modern library for performing lexing and parsing. It
 | 
			
		||||
implements the LALR(1) parsing algorithm, commonly used for
 | 
			
		||||
parsing and compiling various programming languages. 
 | 
			
		||||
 | 
			
		||||
Requirements
 | 
			
		||||
------------
 | 
			
		||||
 | 
			
		||||
SLY requires the use of Python 3.6 or greater.  Older versions
 | 
			
		||||
of Python are not supported.
 | 
			
		||||
 | 
			
		||||
An Example
 | 
			
		||||
----------
 | 
			
		||||
 | 
			
		||||
SLY is probably best illustrated by an example.  Here's what it
 | 
			
		||||
looks like to write a parser that can evaluate simple arithmetic
 | 
			
		||||
expressions and store variables:
 | 
			
		||||
 | 
			
		||||
.. code:: python
 | 
			
		||||
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
    # calc.py
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
    from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, ASSIGN, LPAREN, RPAREN }
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
        # Tokens
 | 
			
		||||
        NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
        NUMBER = r'\d+'
 | 
			
		||||
 | 
			
		||||
        # Special symbols
 | 
			
		||||
        PLUS = r'\+'
 | 
			
		||||
        MINUS = r'-'
 | 
			
		||||
        TIMES = r'\*'
 | 
			
		||||
        DIVIDE = r'/'
 | 
			
		||||
        ASSIGN = r'='
 | 
			
		||||
        LPAREN = r'\('
 | 
			
		||||
        RPAREN = r'\)'
 | 
			
		||||
 | 
			
		||||
        # Ignored pattern
 | 
			
		||||
        ignore_newline = r'\n+'
 | 
			
		||||
 | 
			
		||||
        # Extra action for newlines
 | 
			
		||||
        def ignore_newline(self, t):
 | 
			
		||||
            self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
        def error(self, t):
 | 
			
		||||
            print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
            self.index += 1
 | 
			
		||||
 | 
			
		||||
    class CalcParser(Parser):
 | 
			
		||||
        tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
        precedence = (
 | 
			
		||||
            ('left', PLUS, MINUS),
 | 
			
		||||
            ('left', TIMES, DIVIDE),
 | 
			
		||||
            ('right', UMINUS),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        def __init__(self):
 | 
			
		||||
            self.names = { }
 | 
			
		||||
 | 
			
		||||
        @_('NAME ASSIGN expr')
 | 
			
		||||
        def statement(self, p):
 | 
			
		||||
            self.names[p.NAME] = p.expr
 | 
			
		||||
 | 
			
		||||
        @_('expr')
 | 
			
		||||
        def statement(self, p):
 | 
			
		||||
            print(p.expr)
 | 
			
		||||
 | 
			
		||||
        @_('expr PLUS expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 + p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr MINUS expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 - p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr TIMES expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 * p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('expr DIVIDE expr')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr0 / p.expr1
 | 
			
		||||
 | 
			
		||||
        @_('MINUS expr %prec UMINUS')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return -p.expr
 | 
			
		||||
 | 
			
		||||
        @_('LPAREN expr RPAREN')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return p.expr
 | 
			
		||||
 | 
			
		||||
        @_('NUMBER')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            return int(p.NUMBER)
 | 
			
		||||
 | 
			
		||||
        @_('NAME')
 | 
			
		||||
        def expr(self, p):
 | 
			
		||||
            try:
 | 
			
		||||
                return self.names[p.NAME]
 | 
			
		||||
            except LookupError:
 | 
			
		||||
                print(f'Undefined name {p.NAME!r}')
 | 
			
		||||
                return 0
 | 
			
		||||
 | 
			
		||||
    if __name__ == '__main__':
 | 
			
		||||
        lexer = CalcLexer()
 | 
			
		||||
        parser = CalcParser()
 | 
			
		||||
        while True:
 | 
			
		||||
            try:
 | 
			
		||||
                text = input('calc > ')
 | 
			
		||||
            except EOFError:
 | 
			
		||||
                break
 | 
			
		||||
            if text:
 | 
			
		||||
                parser.parse(lexer.tokenize(text))
 | 
			
		||||
 | 
			
		||||
Documentation
 | 
			
		||||
-------------
 | 
			
		||||
 | 
			
		||||
Further documentation can be found at `https://sly.readthedocs.io/en/latest <https://sly.readthedocs.io/en/latest>`_.
 | 
			
		||||
 | 
			
		||||
Talks
 | 
			
		||||
-----
 | 
			
		||||
 | 
			
		||||
* `Reinventing the Parser Generator <https://www.youtube.com/watch?v=zJ9z6Ge-vXs>`_, talk by David Beazley at PyCon 2018, Cleveland.
 | 
			
		||||
 | 
			
		||||
Resources
 | 
			
		||||
---------
 | 
			
		||||
 | 
			
		||||
For a detailed overview of parsing theory, consult the excellent
 | 
			
		||||
book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
 | 
			
		||||
Ullman.  The topics found in "Lex & Yacc" by Levine, Mason, and Brown
 | 
			
		||||
may also be useful.
 | 
			
		||||
 | 
			
		||||
The GitHub page for SLY can be found at:
 | 
			
		||||
 | 
			
		||||
     ``https://github.com/dabeaz/sly``
 | 
			
		||||
 | 
			
		||||
Please direct bug reports and pull requests to the GitHub page.
 | 
			
		||||
To contact me directly, send email to dave@dabeaz.com or contact
 | 
			
		||||
me on Twitter (@dabeaz).
 | 
			
		||||
 
 | 
			
		||||
-- Dave
 | 
			
		||||
 | 
			
		||||
P.S.
 | 
			
		||||
----
 | 
			
		||||
 | 
			
		||||
You should come take a `course <https://www.dabeaz.com/courses.html>`_!
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -60,9 +60,7 @@ expressions and store variables::
 | 
			
		||||
    from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        tokens = {
 | 
			
		||||
            'NAME', 'NUMBER',
 | 
			
		||||
            }
 | 
			
		||||
        tokens = { NAME, NUMBER }
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
        literals = { '=', '+', '-', '*', '/', '(', ')' }
 | 
			
		||||
 | 
			
		||||
@@ -78,8 +76,8 @@ expressions and store variables::
 | 
			
		||||
        def newline(self, t):
 | 
			
		||||
            self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
        def error(self, value):
 | 
			
		||||
            print("Illegal character '%s'" % value[0])
 | 
			
		||||
        def error(self, t):
 | 
			
		||||
            print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
            self.index += 1
 | 
			
		||||
 | 
			
		||||
    class CalcParser(Parser):
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										296
									
								
								docs/sly.rst
									
									
									
									
									
								
							
							
						
						
									
										296
									
								
								docs/sly.rst
									
									
									
									
									
								
							@@ -2,9 +2,9 @@ SLY (Sly Lex Yacc)
 | 
			
		||||
==================
 | 
			
		||||
 | 
			
		||||
This document provides an overview of lexing and parsing with SLY.
 | 
			
		||||
Given the intrinsic complexity of parsing, I would strongly advise 
 | 
			
		||||
Given the intrinsic complexity of parsing, I would strongly advise
 | 
			
		||||
that you read (or at least skim) this entire document before jumping
 | 
			
		||||
into a big development project with SLY.  
 | 
			
		||||
into a big development project with SLY.
 | 
			
		||||
 | 
			
		||||
SLY requires Python 3.6 or newer.  If you're using an older version,
 | 
			
		||||
you're out of luck. Sorry.
 | 
			
		||||
@@ -54,10 +54,10 @@ The first step of parsing is to break the text into tokens where
 | 
			
		||||
each token has a type and value. For example, the above text might be
 | 
			
		||||
described by the following list of token tuples::
 | 
			
		||||
 | 
			
		||||
    [ ('ID','x'), ('EQUALS','='), ('NUMBER','3'), 
 | 
			
		||||
    [ ('ID','x'), ('EQUALS','='), ('NUMBER','3'),
 | 
			
		||||
      ('PLUS','+'), ('NUMBER','42'), ('TIMES','*'),
 | 
			
		||||
      ('LPAREN','('), ('ID','s'), ('MINUS','-'),
 | 
			
		||||
      ('ID','t'), ('RPAREN',')' ]
 | 
			
		||||
      ('ID','t'), ('RPAREN',')') ]
 | 
			
		||||
 | 
			
		||||
The SLY ``Lexer`` class is used to do this.   Here is a sample of a simple
 | 
			
		||||
lexer that tokenizes the above text::
 | 
			
		||||
@@ -68,17 +68,8 @@ lexer that tokenizes the above text::
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        # Set of token names.   This is always required
 | 
			
		||||
        tokens = {
 | 
			
		||||
            'ID',       
 | 
			
		||||
            'NUMBER',
 | 
			
		||||
            'PLUS',
 | 
			
		||||
            'MINUS',
 | 
			
		||||
            'TIMES',
 | 
			
		||||
            'DIVIDE',
 | 
			
		||||
            'ASSIGN',
 | 
			
		||||
            'LPAREN',
 | 
			
		||||
            'RPAREN',
 | 
			
		||||
            }
 | 
			
		||||
        tokens = { ID, NUMBER, PLUS, MINUS, TIMES,
 | 
			
		||||
                   DIVIDE, ASSIGN, LPAREN, RPAREN }
 | 
			
		||||
 | 
			
		||||
        # String containing ignored characters between tokens
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
@@ -117,7 +108,7 @@ When executed, the example will produce the following output::
 | 
			
		||||
A lexer only has one public method ``tokenize()``.  This is a generator
 | 
			
		||||
function that produces a stream of ``Token`` instances.
 | 
			
		||||
The ``type`` and ``value`` attributes of ``Token`` contain the
 | 
			
		||||
token type name and value respectively.  
 | 
			
		||||
token type name and value respectively.
 | 
			
		||||
 | 
			
		||||
The tokens set
 | 
			
		||||
^^^^^^^^^^^^^^^
 | 
			
		||||
@@ -131,19 +122,12 @@ In the example, the following code specified the token names::
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        ...
 | 
			
		||||
        # Set of token names.   This is always required
 | 
			
		||||
        tokens = {
 | 
			
		||||
            'ID',
 | 
			
		||||
            'NUMBER',
 | 
			
		||||
            'PLUS',
 | 
			
		||||
            'MINUS',
 | 
			
		||||
            'TIMES',
 | 
			
		||||
            'DIVIDE',
 | 
			
		||||
            'ASSIGN',
 | 
			
		||||
            'LPAREN',
 | 
			
		||||
            'RPAREN',
 | 
			
		||||
            }
 | 
			
		||||
        tokens = { ID, NUMBER, PLUS, MINUS, TIMES,
 | 
			
		||||
                   DIVIDE, ASSIGN, LPAREN, RPAREN }
 | 
			
		||||
        ...
 | 
			
		||||
 | 
			
		||||
Token names should be specified using all-caps as shown.
 | 
			
		||||
 | 
			
		||||
Specification of token match patterns
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
@@ -155,7 +139,7 @@ names of the tokens provided in the ``tokens`` set.  For example::
 | 
			
		||||
    MINUS = r'-'
 | 
			
		||||
 | 
			
		||||
Regular expression patterns are compiled using the ``re.VERBOSE`` flag
 | 
			
		||||
which can be used to help readability.  However, 
 | 
			
		||||
which can be used to help readability.  However,
 | 
			
		||||
unescaped whitespace is ignored and comments are allowed in this mode.
 | 
			
		||||
If your pattern involves whitespace, make sure you use ``\s``.  If you
 | 
			
		||||
need to match the ``#`` character, use ``[#]`` or ``\#``.
 | 
			
		||||
@@ -167,7 +151,7 @@ short tokens.  For example, if you wanted to have separate tokens for
 | 
			
		||||
example::
 | 
			
		||||
 | 
			
		||||
    class MyLexer(Lexer):
 | 
			
		||||
        tokens = {'ASSIGN', 'EQ', ...}
 | 
			
		||||
        tokens = { ASSIGN, EQ, ...}
 | 
			
		||||
        ...
 | 
			
		||||
        EQ     = r'=='       # MUST APPEAR FIRST! (LONGER)
 | 
			
		||||
        ASSIGN = r'='
 | 
			
		||||
@@ -205,8 +189,8 @@ comments and newlines::
 | 
			
		||||
        ...
 | 
			
		||||
 | 
			
		||||
    if __name__ == '__main__':
 | 
			
		||||
        data = '''x = 3 + 42 
 | 
			
		||||
                    * (s    # This is a comment 
 | 
			
		||||
        data = '''x = 3 + 42
 | 
			
		||||
                    * (s    # This is a comment
 | 
			
		||||
                        - t)'''
 | 
			
		||||
        lexer = CalcLexer()
 | 
			
		||||
        for tok in lexer.tokenize(data):
 | 
			
		||||
@@ -235,7 +219,7 @@ object should be returned as a result. If no value is returned by the
 | 
			
		||||
function, the token is discarded and the next token read.
 | 
			
		||||
 | 
			
		||||
The ``@_()`` decorator is defined automatically within the ``Lexer``
 | 
			
		||||
class--you don't need to do any kind of special import for it. 
 | 
			
		||||
class--you don't need to do any kind of special import for it.
 | 
			
		||||
It can also accept multiple regular expression rules. For example::
 | 
			
		||||
 | 
			
		||||
    @_(r'0x[0-9a-fA-F]+',
 | 
			
		||||
@@ -251,12 +235,10 @@ Instead of using the ``@_()`` decorator, you can also write a method
 | 
			
		||||
that matches the same name as a token previously specified as a
 | 
			
		||||
string. For example::
 | 
			
		||||
 | 
			
		||||
    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    NUMBER = r'\d+'
 | 
			
		||||
    ...
 | 
			
		||||
    def ID(self, t):
 | 
			
		||||
        reserved = { 'if', 'else', 'while', 'for' }
 | 
			
		||||
        if t.value in reserved:
 | 
			
		||||
             t.type = t.value.upper()
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
        t.value = int(t.value)
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
This is potentially useful trick for debugging a lexer.  You can temporarily
 | 
			
		||||
@@ -264,6 +246,36 @@ attach a method a token and have it execute when the token is encountered.
 | 
			
		||||
If you later take the method away, the lexer will revert back to its original
 | 
			
		||||
behavior.
 | 
			
		||||
 | 
			
		||||
Token Remapping
 | 
			
		||||
^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Occasionally, you might need to remap tokens based on special cases.
 | 
			
		||||
Consider the case of matching identifiers such as "abc", "python", or "guido".
 | 
			
		||||
Certain identifiers such as "if", "else", and "while" might need to be
 | 
			
		||||
treated as special keywords.  To handle this, include token remapping rules when
 | 
			
		||||
writing the lexer like this::
 | 
			
		||||
 | 
			
		||||
    # calclex.py
 | 
			
		||||
 | 
			
		||||
    from sly import Lexer
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        tokens = { ID, IF, ELSE, WHILE }
 | 
			
		||||
        # String containing ignored characters (between tokens)
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
        # Base ID rule
 | 
			
		||||
        ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
 | 
			
		||||
        # Special cases
 | 
			
		||||
        ID['if'] = IF
 | 
			
		||||
        ID['else'] = ELSE
 | 
			
		||||
        ID['while'] = WHILE
 | 
			
		||||
 | 
			
		||||
When parsing an identifier, the special cases will remap certain matching
 | 
			
		||||
values to a new token type.  For example, if the value of an identifier is
 | 
			
		||||
"if" above, an ``IF`` token will be generated.
 | 
			
		||||
 | 
			
		||||
Line numbers and position tracking
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
@@ -288,7 +300,7 @@ it does record positional information related to each token in the token's
 | 
			
		||||
column information as a separate step.  For instance, you can search
 | 
			
		||||
backwards until you reach the previous newline::
 | 
			
		||||
 | 
			
		||||
    # Compute column. 
 | 
			
		||||
    # Compute column.
 | 
			
		||||
    #     input is the input text string
 | 
			
		||||
    #     token is a token instance
 | 
			
		||||
    def find_column(text, token):
 | 
			
		||||
@@ -350,15 +362,15 @@ Error handling
 | 
			
		||||
If a bad character is encountered while lexing, tokenizing will stop.
 | 
			
		||||
However, you can add an ``error()`` method to handle lexing errors
 | 
			
		||||
that occur when illegal characters are detected.  The error method
 | 
			
		||||
receives a string containing all remaining untokenized text.  A
 | 
			
		||||
typical handler might look at this text and skip ahead in some manner.
 | 
			
		||||
For example::
 | 
			
		||||
receives a ``Token`` where the ``value`` attribute contains all
 | 
			
		||||
remaining untokenized text.  A typical handler might look at this text
 | 
			
		||||
and skip ahead in some manner.  For example::
 | 
			
		||||
 | 
			
		||||
    class MyLexer(Lexer):
 | 
			
		||||
        ...
 | 
			
		||||
        # Error handling rule
 | 
			
		||||
        def error(self, value):
 | 
			
		||||
            print("Illegal character '%s'" % value[0])
 | 
			
		||||
        def error(self, t):
 | 
			
		||||
            print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
            self.index += 1
 | 
			
		||||
 | 
			
		||||
In this case, we print the offending character and skip ahead
 | 
			
		||||
@@ -367,37 +379,48 @@ parser is often a hard problem.  An error handler might scan ahead
 | 
			
		||||
to a logical synchronization point such as a semicolon, a blank line,
 | 
			
		||||
or similar landmark.
 | 
			
		||||
 | 
			
		||||
If the ``error()`` method also returns the passed token, it will
 | 
			
		||||
show up as an ``ERROR`` token in the resulting token stream. This
 | 
			
		||||
might be useful if the parser wants to see error tokens for some
 | 
			
		||||
reason--perhaps for the purposes of improved error messages or
 | 
			
		||||
some other kind of error handling.
 | 
			
		||||
 | 
			
		||||
Third-Party Regex Module
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
.. versionadded:: 0.4
 | 
			
		||||
 | 
			
		||||
The third-party `regex <https://pypi.org/project/regex/>`_ module can be used
 | 
			
		||||
with sly. Like this::
 | 
			
		||||
 | 
			
		||||
    from sly import Lexer
 | 
			
		||||
    import regex
 | 
			
		||||
 | 
			
		||||
    class MyLexer(Lexer):
 | 
			
		||||
        regex_module = regex
 | 
			
		||||
        ...
 | 
			
		||||
 | 
			
		||||
Now all regular expressions that ``MyLexer`` uses will be handled with the
 | 
			
		||||
``regex`` module. The ``regex_module`` can be set to any module that is
 | 
			
		||||
compatible with Python's standard library ``re``.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
A More Complete Example
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Here is a more complete example that puts many of these concepts 
 | 
			
		||||
Here is a more complete example that puts many of these concepts
 | 
			
		||||
into practice::
 | 
			
		||||
 | 
			
		||||
    # calclex.py
 | 
			
		||||
 | 
			
		||||
    from sly import Lexer
 | 
			
		||||
 
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        # Set of reserved names (language keywords)
 | 
			
		||||
        reserved_words = { 'WHILE', 'IF', 'ELSE', 'PRINT' }
 | 
			
		||||
 | 
			
		||||
    class CalcLexer(Lexer):
 | 
			
		||||
        # Set of token names.   This is always required
 | 
			
		||||
        tokens = {
 | 
			
		||||
            'NUMBER',
 | 
			
		||||
            'ID',
 | 
			
		||||
            'PLUS',
 | 
			
		||||
            'MINUS',
 | 
			
		||||
            'TIMES',
 | 
			
		||||
            'DIVIDE',
 | 
			
		||||
            'ASSIGN',
 | 
			
		||||
            'EQ',
 | 
			
		||||
            'LT',
 | 
			
		||||
            'LE',
 | 
			
		||||
            'GT',
 | 
			
		||||
            'GE',
 | 
			
		||||
            'NE',
 | 
			
		||||
            *reserved_words,
 | 
			
		||||
            } 
 | 
			
		||||
        tokens = { NUMBER, ID, WHILE, IF, ELSE, PRINT,
 | 
			
		||||
                   PLUS, MINUS, TIMES, DIVIDE, ASSIGN,
 | 
			
		||||
                   EQ, LT, LE, GT, GE, NE }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        literals = { '(', ')', '{', '}', ';' }
 | 
			
		||||
 | 
			
		||||
@@ -416,18 +439,18 @@ into practice::
 | 
			
		||||
        GE      = r'>='
 | 
			
		||||
        GT      = r'>'
 | 
			
		||||
        NE      = r'!='
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
        @_(r'\d+')
 | 
			
		||||
        def NUMBER(self, t):
 | 
			
		||||
            t.value = int(t.value)
 | 
			
		||||
            return t
 | 
			
		||||
 | 
			
		||||
        @_(r'[a-zA-Z_][a-zA-Z0-9_]*')
 | 
			
		||||
        def ID(self, t):
 | 
			
		||||
            # Check if name matches a reserved word (change token type if true)
 | 
			
		||||
            if t.value.upper() in self.reserved_words:
 | 
			
		||||
                t.type = t.value.upper()
 | 
			
		||||
            return t
 | 
			
		||||
        # Identifiers and keywords
 | 
			
		||||
        ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
        ID['if'] = IF
 | 
			
		||||
        ID['else'] = ELSE
 | 
			
		||||
        ID['while'] = WHILE
 | 
			
		||||
        ID['print'] = PRINT
 | 
			
		||||
 | 
			
		||||
        ignore_comment = r'\#.*'
 | 
			
		||||
 | 
			
		||||
@@ -436,8 +459,8 @@ into practice::
 | 
			
		||||
        def ignore_newline(self, t):
 | 
			
		||||
            self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
        def error(self, value):
 | 
			
		||||
            print('Line %d: Bad character %r' % (self.lineno, value[0]))
 | 
			
		||||
        def error(self, t):
 | 
			
		||||
            print('Line %d: Bad character %r' % (self.lineno, t.value[0]))
 | 
			
		||||
            self.index += 1
 | 
			
		||||
 | 
			
		||||
    if __name__ == '__main__':
 | 
			
		||||
@@ -455,27 +478,27 @@ into practice::
 | 
			
		||||
 | 
			
		||||
If you run this code, you'll get output that looks like this::
 | 
			
		||||
 | 
			
		||||
    Token(ID, 'x', 3, 12)
 | 
			
		||||
    Token(ASSIGN, '=', 3, 14)
 | 
			
		||||
    Token(NUMBER, 0, 3, 16)
 | 
			
		||||
    Token(;, ';', 3, 17)
 | 
			
		||||
    Token(WHILE, 'while', 4, 19)
 | 
			
		||||
    Token((, '(', 4, 25)
 | 
			
		||||
    Token(ID, 'x', 4, 26)
 | 
			
		||||
    Token(LT, '<', 4, 28)
 | 
			
		||||
    Token(NUMBER, 10, 4, 30)
 | 
			
		||||
    Token(), ')', 4, 32)
 | 
			
		||||
    Token({, '{', 4, 34)
 | 
			
		||||
    Token(PRINT, 'print', 5, 40)
 | 
			
		||||
    Token(ID, 'x', 5, 46)
 | 
			
		||||
    Token(type='ID', value='x', lineno=3, index=20)
 | 
			
		||||
    Token(type='ASSIGN', value='=', lineno=3, index=22)
 | 
			
		||||
    Token(type='NUMBER', value=0, lineno=3, index=24)
 | 
			
		||||
    Token(type=';', value=';', lineno=3, index=25)
 | 
			
		||||
    Token(type='WHILE', value='while', lineno=4, index=31)
 | 
			
		||||
    Token(type='(', value='(', lineno=4, index=37)
 | 
			
		||||
    Token(type='ID', value='x', lineno=4, index=38)
 | 
			
		||||
    Token(type='LT', value='<', lineno=4, index=40)
 | 
			
		||||
    Token(type='NUMBER', value=10, lineno=4, index=42)
 | 
			
		||||
    Token(type=')', value=')', lineno=4, index=44)
 | 
			
		||||
    Token(type='{', value='{', lineno=4, index=46)
 | 
			
		||||
    Token(type='PRINT', value='print', lineno=5, index=56)
 | 
			
		||||
    Token(type='ID', value='x', lineno=5, index=62)
 | 
			
		||||
    Line 5: Bad character ':'
 | 
			
		||||
    Token(ID, 'x', 6, 53)
 | 
			
		||||
    Token(ASSIGN, '=', 6, 55)
 | 
			
		||||
    Token(ID, 'x', 6, 57)
 | 
			
		||||
    Token(PLUS, '+', 6, 59)
 | 
			
		||||
    Token(NUMBER, 1, 6, 61)
 | 
			
		||||
    Token(;, ';', 6, 62)
 | 
			
		||||
    Token(}, '}', 7, 64)
 | 
			
		||||
    Token(type='ID', value='x', lineno=6, index=73)
 | 
			
		||||
    Token(type='ASSIGN', value='=', lineno=6, index=75)
 | 
			
		||||
    Token(type='ID', value='x', lineno=6, index=77)
 | 
			
		||||
    Token(type='PLUS', value='+', lineno=6, index=79)
 | 
			
		||||
    Token(type='NUMBER', value=1, lineno=6, index=81)
 | 
			
		||||
    Token(type=';', value=';', lineno=6, index=82)
 | 
			
		||||
    Token(type='}', value='}', lineno=7, index=88)
 | 
			
		||||
 | 
			
		||||
Study this example closely.  It might take a bit to digest, but all of the
 | 
			
		||||
essential parts of writing a lexer are there. Tokens have to be specified
 | 
			
		||||
@@ -501,7 +524,7 @@ specification like this::
 | 
			
		||||
    expr       : expr + term
 | 
			
		||||
               | expr - term
 | 
			
		||||
               | term
 | 
			
		||||
 
 | 
			
		||||
 | 
			
		||||
    term       : term * factor
 | 
			
		||||
               | term / factor
 | 
			
		||||
               | factor
 | 
			
		||||
@@ -528,7 +551,7 @@ example, given the expression grammar above, you might write the
 | 
			
		||||
specification for the operation of a simple calculator like this::
 | 
			
		||||
 | 
			
		||||
    Grammar                   Action
 | 
			
		||||
    ------------------------  -------------------------------- 
 | 
			
		||||
    ------------------------  --------------------------------
 | 
			
		||||
    expr0   : expr1 + term    expr0.val = expr1.val + term.val
 | 
			
		||||
            | expr1 - term    expr0.val = expr1.val - term.val
 | 
			
		||||
            | term            expr0.val = term.val
 | 
			
		||||
@@ -545,7 +568,7 @@ values then propagate according to the actions described above.  For
 | 
			
		||||
example, ``factor.val = int(NUMBER.val)`` propagates the value from
 | 
			
		||||
``NUMBER`` to ``factor``.  ``term0.val = factor.val`` propagates the
 | 
			
		||||
value from ``factor`` to ``term``.  Rules such as ``expr0.val =
 | 
			
		||||
expr1.val + term1.val`` combine and propagate values further. Just to 
 | 
			
		||||
expr1.val + term1.val`` combine and propagate values further. Just to
 | 
			
		||||
illustrate, here is how values propagate in the expression ``2 + 3 * 4``::
 | 
			
		||||
 | 
			
		||||
     NUMBER.val=2 + NUMBER.val=3 * NUMBER.val=4    # NUMBER -> factor
 | 
			
		||||
@@ -556,7 +579,7 @@ illustrate, here is how values propagate in the expression ``2 + 3 * 4``::
 | 
			
		||||
     expr.val=2 + term.val=3 * NUMBER.val=4        # NUMBER -> factor
 | 
			
		||||
     expr.val=2 + term.val=3 * factor.val=4        # term * factor -> term
 | 
			
		||||
     expr.val=2 + term.val=12                      # expr + term -> expr
 | 
			
		||||
     expr.val=14                                   
 | 
			
		||||
     expr.val=14
 | 
			
		||||
 | 
			
		||||
SLY uses a parsing technique known as LR-parsing or shift-reduce
 | 
			
		||||
parsing.  LR parsing is a bottom up technique that tries to recognize
 | 
			
		||||
@@ -848,6 +871,39 @@ string. However,writing an "empty" rule and using "empty" to denote an
 | 
			
		||||
empty production may be easier to read and more clearly state your
 | 
			
		||||
intention.
 | 
			
		||||
 | 
			
		||||
EBNF Features (Optionals and Repeats)
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
Certain grammar features occur with some frequency.  For example, suppose you want to
 | 
			
		||||
have an optional item as shown in the previous section.  An alternate way to specify
 | 
			
		||||
it is to enclose one more more symbols in [ ] like this::
 | 
			
		||||
 | 
			
		||||
    @_('[ item ] grok')
 | 
			
		||||
    def spam(self, p):
 | 
			
		||||
        if p.item is not None:
 | 
			
		||||
             print("item was given and has value", p.item)
 | 
			
		||||
	else:
 | 
			
		||||
             print("item was not given")
 | 
			
		||||
 | 
			
		||||
    @_('whatever')
 | 
			
		||||
    def item(self, p):
 | 
			
		||||
        ...
 | 
			
		||||
 | 
			
		||||
In this case, the value of ``p.item`` is set to ``None`` if the value wasn't supplied.
 | 
			
		||||
Otherwise, it will have the value returned by the ``item`` rule below.
 | 
			
		||||
 | 
			
		||||
You can also encode repetitions.  For example, a common construction is a 
 | 
			
		||||
list of comma separated expressions.  To parse that, you could write::
 | 
			
		||||
 | 
			
		||||
    @_('expr { COMMA expr }')
 | 
			
		||||
    def exprlist(self, p):
 | 
			
		||||
        return [p.expr0] + p.expr1
 | 
			
		||||
 | 
			
		||||
In this example, the ``{ COMMA expr }`` represents zero or more repetitions
 | 
			
		||||
of a rule.  The value of all symbols inside is now a list.  So, ``p.expr1``
 | 
			
		||||
is a list of all expressions matched.   Note, when duplicate symbol names 
 | 
			
		||||
appear in a rule, they are distinguished by appending a numeric index as shown. 
 | 
			
		||||
 | 
			
		||||
Dealing With Ambiguous Grammars
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
@@ -907,8 +963,8 @@ like this::
 | 
			
		||||
    class CalcParser(Parser):
 | 
			
		||||
        ...
 | 
			
		||||
        precedence = (
 | 
			
		||||
           ('left', 'PLUS', 'MINUS'),
 | 
			
		||||
           ('left', 'TIMES', 'DIVIDE'),
 | 
			
		||||
           ('left', PLUS, MINUS),
 | 
			
		||||
           ('left', TIMES, DIVIDE),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Rules where precedence is applied
 | 
			
		||||
@@ -997,9 +1053,9 @@ like this::
 | 
			
		||||
    class CalcParser(Parser):
 | 
			
		||||
        ...
 | 
			
		||||
        precedence = (
 | 
			
		||||
            ('left', 'PLUS', 'MINUS'),
 | 
			
		||||
            ('left', 'TIMES', 'DIVIDE'),
 | 
			
		||||
            ('right', 'UMINUS'),            # Unary minus operator
 | 
			
		||||
            ('left', PLUS, MINUS),
 | 
			
		||||
            ('left', TIMES, DIVIDE),
 | 
			
		||||
            ('right', UMINUS),            # Unary minus operator
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
Now, in the grammar file, you write the unary minus rule like this::
 | 
			
		||||
@@ -1027,10 +1083,10 @@ operators like ``<`` and ``>`` but you didn't want combinations like
 | 
			
		||||
    class MyParser(Parser):
 | 
			
		||||
         ...
 | 
			
		||||
         precedence = (
 | 
			
		||||
              ('nonassoc', 'LESSTHAN', 'GREATERTHAN'),  # Nonassociative operators
 | 
			
		||||
              ('left', 'PLUS', 'MINUS'),
 | 
			
		||||
              ('left', 'TIMES', 'DIVIDE'),
 | 
			
		||||
              ('right', 'UMINUS'),            # Unary minus operator
 | 
			
		||||
              ('nonassoc', LESSTHAN, GREATERTHAN),  # Nonassociative operators
 | 
			
		||||
              ('left', PLUS, MINUS),
 | 
			
		||||
              ('left', TIMES, DIVIDE),
 | 
			
		||||
              ('right', UMINUS),            # Unary minus operator
 | 
			
		||||
         )
 | 
			
		||||
 | 
			
		||||
If you do this, the occurrence of input text such as ``a < b < c``
 | 
			
		||||
@@ -1046,7 +1102,7 @@ generate the same set of symbols.  For example::
 | 
			
		||||
 | 
			
		||||
    assignment :  ID EQUALS NUMBER
 | 
			
		||||
               |  ID EQUALS expr
 | 
			
		||||
           
 | 
			
		||||
 | 
			
		||||
    expr       : expr PLUS expr
 | 
			
		||||
               | expr MINUS expr
 | 
			
		||||
               | expr TIMES expr
 | 
			
		||||
@@ -1097,7 +1153,7 @@ states to the file you specify.  Each state of the parser is shown
 | 
			
		||||
as output that looks something like this::
 | 
			
		||||
 | 
			
		||||
    state 2
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
        (7) factor -> LPAREN . expr RPAREN
 | 
			
		||||
        (1) expr -> . term
 | 
			
		||||
        (2) expr -> . expr MINUS term
 | 
			
		||||
@@ -1109,7 +1165,7 @@ as output that looks something like this::
 | 
			
		||||
        (8) factor -> . NUMBER
 | 
			
		||||
        LPAREN          shift and go to state 2
 | 
			
		||||
        NUMBER          shift and go to state 3
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
        factor                         shift and go to state 1
 | 
			
		||||
        term                           shift and go to state 4
 | 
			
		||||
        expr                           shift and go to state 6
 | 
			
		||||
@@ -1123,7 +1179,7 @@ usually track down the source of most parsing conflicts.  It should
 | 
			
		||||
also be stressed that not all shift-reduce conflicts are bad.
 | 
			
		||||
However, the only way to be sure that they are resolved correctly is
 | 
			
		||||
to look at the debugging file.
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
Syntax Error Handling
 | 
			
		||||
^^^^^^^^^^^^^^^^^^^^^
 | 
			
		||||
 | 
			
		||||
@@ -1208,7 +1264,7 @@ appear as the last token on the right in an error rule.  For example::
 | 
			
		||||
This is because the first bad token encountered will cause the rule to
 | 
			
		||||
be reduced--which may make it difficult to recover if more bad tokens
 | 
			
		||||
immediately follow.    It's better to have some kind of landmark such as
 | 
			
		||||
a semicolon, closing parenthesese, or other token that can be used as
 | 
			
		||||
a semicolon, closing parentheses, or other token that can be used as
 | 
			
		||||
a synchronization point.
 | 
			
		||||
 | 
			
		||||
Panic mode recovery
 | 
			
		||||
@@ -1232,7 +1288,7 @@ state::
 | 
			
		||||
        # Read ahead looking for a closing '}'
 | 
			
		||||
        while True:
 | 
			
		||||
            tok = next(self.tokens, None)
 | 
			
		||||
            if not tok or tok.type == 'RBRACE': 
 | 
			
		||||
            if not tok or tok.type == 'RBRACE':
 | 
			
		||||
                break
 | 
			
		||||
        self.restart()
 | 
			
		||||
 | 
			
		||||
@@ -1267,12 +1323,12 @@ useful if trying to synchronize on special characters.  For example::
 | 
			
		||||
        # Read ahead looking for a terminating ";"
 | 
			
		||||
        while True:
 | 
			
		||||
            tok = next(self.tokens, None)           # Get the next token
 | 
			
		||||
            if not tok or tok.type == 'SEMI': 
 | 
			
		||||
            if not tok or tok.type == 'SEMI':
 | 
			
		||||
                break
 | 
			
		||||
            self.errok()
 | 
			
		||||
 | 
			
		||||
        # Return SEMI to the parser as the next lookahead token
 | 
			
		||||
        return tok  
 | 
			
		||||
        return tok
 | 
			
		||||
 | 
			
		||||
When Do Syntax Errors Get Reported?
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
@@ -1335,7 +1391,7 @@ are many possible ways to do this, but one example is something
 | 
			
		||||
like this::
 | 
			
		||||
 | 
			
		||||
    @_('expr PLUS expr',
 | 
			
		||||
       'expr MINUS expr', 
 | 
			
		||||
       'expr MINUS expr',
 | 
			
		||||
       'expr TIMES expr',
 | 
			
		||||
       'expr DIVIDE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
@@ -1343,17 +1399,17 @@ like this::
 | 
			
		||||
 | 
			
		||||
    @_('LPAREN expr RPAREN')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return ('group-expression',p.expr])
 | 
			
		||||
        return ('group-expression', p.expr)
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return ('number-expression', p.NUMBER)
 | 
			
		||||
 | 
			
		||||
Another approach is to create a set of data structure for different
 | 
			
		||||
Another approach is to create a set of data structures for different
 | 
			
		||||
kinds of abstract syntax tree nodes and create different node types
 | 
			
		||||
in each rule::
 | 
			
		||||
 | 
			
		||||
    class Expr: 
 | 
			
		||||
    class Expr:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    class BinOp(Expr):
 | 
			
		||||
@@ -1367,7 +1423,7 @@ in each rule::
 | 
			
		||||
            self.value = value
 | 
			
		||||
 | 
			
		||||
    @_('expr PLUS expr',
 | 
			
		||||
       'expr MINUS expr', 
 | 
			
		||||
       'expr MINUS expr',
 | 
			
		||||
       'expr TIMES expr',
 | 
			
		||||
       'expr DIVIDE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
@@ -1490,7 +1546,7 @@ C code, you might write code like this::
 | 
			
		||||
        # Action code
 | 
			
		||||
        ...
 | 
			
		||||
        pop_scope()        # Return to previous scope
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    @_('')
 | 
			
		||||
    def new_scope(self, p):
 | 
			
		||||
        # Create a new scope for local variables
 | 
			
		||||
 
 | 
			
		||||
@@ -3,95 +3,101 @@
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(0, "../..")
 | 
			
		||||
sys.path.insert(0, '../..')
 | 
			
		||||
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    # Set of token names.   This is always required
 | 
			
		||||
    tokens = {
 | 
			
		||||
        'ID',
 | 
			
		||||
        'NUMBER',
 | 
			
		||||
        'PLUS',
 | 
			
		||||
        'MINUS',
 | 
			
		||||
        'TIMES',
 | 
			
		||||
        'DIVIDE',
 | 
			
		||||
        'ASSIGN',
 | 
			
		||||
        'LPAREN',
 | 
			
		||||
        'RPAREN',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    # String containing ignored characters between tokens
 | 
			
		||||
    tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, ASSIGN, LPAREN, RPAREN }
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
    # Regular expression rules for tokens
 | 
			
		||||
    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    PLUS    = r'\+'
 | 
			
		||||
    MINUS   = r'-'
 | 
			
		||||
    TIMES   = r'\*'
 | 
			
		||||
    DIVIDE  = r'/'
 | 
			
		||||
    ASSIGN  = r'='
 | 
			
		||||
    LPAREN  = r'\('
 | 
			
		||||
    RPAREN  = r'\)'
 | 
			
		||||
    # Tokens
 | 
			
		||||
    NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    NUMBER = r'\d+'
 | 
			
		||||
 | 
			
		||||
    @_(r'\d+')
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
        t.value = int(t.value)
 | 
			
		||||
        return t
 | 
			
		||||
    # Special symbols
 | 
			
		||||
    PLUS = r'\+'
 | 
			
		||||
    MINUS = r'-'
 | 
			
		||||
    TIMES = r'\*'
 | 
			
		||||
    DIVIDE = r'/'
 | 
			
		||||
    ASSIGN = r'='
 | 
			
		||||
    LPAREN = r'\('
 | 
			
		||||
    RPAREN = r'\)'
 | 
			
		||||
 | 
			
		||||
    @_(r'\n+')
 | 
			
		||||
    def newline(self, t):
 | 
			
		||||
    # Ignored pattern
 | 
			
		||||
    ignore_newline = r'\n+'
 | 
			
		||||
 | 
			
		||||
    # Extra action for newlines
 | 
			
		||||
    def ignore_newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, value):
 | 
			
		||||
        print("Illegal character '%s'" % value[0])
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
class CalcParser(Parser):
 | 
			
		||||
    # Get the token list from the lexer (required)
 | 
			
		||||
    tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
    # Grammar rules and actions
 | 
			
		||||
    @_('expr PLUS term')
 | 
			
		||||
    precedence = (
 | 
			
		||||
        ('left', PLUS, MINUS),
 | 
			
		||||
        ('left', TIMES, DIVIDE),
 | 
			
		||||
        ('right', UMINUS)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.names = { }
 | 
			
		||||
 | 
			
		||||
    @_('NAME ASSIGN expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        self.names[p.NAME] = p.expr
 | 
			
		||||
 | 
			
		||||
    @_('expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        print(p.expr)
 | 
			
		||||
 | 
			
		||||
    @_('expr PLUS expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return p.expr + p.term
 | 
			
		||||
        return p.expr0 + p.expr1
 | 
			
		||||
 | 
			
		||||
    @_('expr MINUS term')
 | 
			
		||||
    @_('expr MINUS expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return p.expr - p.term
 | 
			
		||||
        return p.expr0 - p.expr1
 | 
			
		||||
 | 
			
		||||
    @_('term')
 | 
			
		||||
    @_('expr TIMES expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return p.term
 | 
			
		||||
        return p.expr0 * p.expr1
 | 
			
		||||
 | 
			
		||||
    @_('term TIMES factor')
 | 
			
		||||
    def term(self, p):
 | 
			
		||||
        return p.term * p.factor
 | 
			
		||||
    @_('expr DIVIDE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return p.expr0 / p.expr1
 | 
			
		||||
 | 
			
		||||
    @_('term DIVIDE factor')
 | 
			
		||||
    def term(self, p):
 | 
			
		||||
        return p.term / p.factor
 | 
			
		||||
 | 
			
		||||
    @_('factor')
 | 
			
		||||
    def term(self, p):
 | 
			
		||||
        return p.factor
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return p.NUMBER
 | 
			
		||||
    @_('MINUS expr %prec UMINUS')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return -p.expr
 | 
			
		||||
 | 
			
		||||
    @_('LPAREN expr RPAREN')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return p.expr
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        return int(p.NUMBER)
 | 
			
		||||
 | 
			
		||||
    @_('NAME')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        try:
 | 
			
		||||
            return self.names[p.NAME]
 | 
			
		||||
        except LookupError:
 | 
			
		||||
            print(f'Undefined name {p.NAME!r}')
 | 
			
		||||
            return 0
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
 | 
			
		||||
    while True:
 | 
			
		||||
        try:
 | 
			
		||||
            text = input('calc > ')
 | 
			
		||||
            result = parser.parse(lexer.tokenize(text))
 | 
			
		||||
            print(result)
 | 
			
		||||
        except EOFError:
 | 
			
		||||
            break
 | 
			
		||||
        if text:
 | 
			
		||||
            parser.parse(lexer.tokenize(text))
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										101
									
								
								example/calc_ebnf/calc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								example/calc_ebnf/calc.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,101 @@
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
# calc.py
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(0, '../..')
 | 
			
		||||
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, ASSIGN, LPAREN, RPAREN }
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
    # Tokens
 | 
			
		||||
    NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    NUMBER = r'\d+'
 | 
			
		||||
 | 
			
		||||
    # Special symbols
 | 
			
		||||
    PLUS = r'\+'
 | 
			
		||||
    MINUS = r'-'
 | 
			
		||||
    TIMES = r'\*'
 | 
			
		||||
    DIVIDE = r'/'
 | 
			
		||||
    ASSIGN = r'='
 | 
			
		||||
    LPAREN = r'\('
 | 
			
		||||
    RPAREN = r'\)'
 | 
			
		||||
 | 
			
		||||
    # Ignored pattern
 | 
			
		||||
    ignore_newline = r'\n+'
 | 
			
		||||
 | 
			
		||||
    # Extra action for newlines
 | 
			
		||||
    def ignore_newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
class CalcParser(Parser):
 | 
			
		||||
    tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.names = { }
 | 
			
		||||
 | 
			
		||||
    @_('NAME ASSIGN expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        self.names[p.NAME] = p.expr
 | 
			
		||||
 | 
			
		||||
    @_('expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        print(p.expr)
 | 
			
		||||
 | 
			
		||||
    @_('term { PLUS|MINUS term }')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        lval = p.term0
 | 
			
		||||
        for op, rval in p[1]:
 | 
			
		||||
            if op == '+':
 | 
			
		||||
                lval = lval + rval
 | 
			
		||||
            elif op == '-':
 | 
			
		||||
                lval = lval - rval
 | 
			
		||||
        return lval
 | 
			
		||||
 | 
			
		||||
    @_('factor { TIMES|DIVIDE factor }')
 | 
			
		||||
    def term(self, p):
 | 
			
		||||
        lval = p.factor0
 | 
			
		||||
        for op, rval in p[1]:
 | 
			
		||||
            if op == '*':
 | 
			
		||||
                lval = lval * rval
 | 
			
		||||
            elif op == '/':
 | 
			
		||||
                lval = lval / rval
 | 
			
		||||
        return lval
 | 
			
		||||
 | 
			
		||||
    @_('MINUS factor')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return -p.factor
 | 
			
		||||
 | 
			
		||||
    @_('LPAREN expr RPAREN')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return p.expr
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return int(p.NUMBER)
 | 
			
		||||
 | 
			
		||||
    @_('NAME')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        try:
 | 
			
		||||
            return self.names[p.NAME]
 | 
			
		||||
        except LookupError:
 | 
			
		||||
            print(f'Undefined name {p.NAME!r}')
 | 
			
		||||
            return 0
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
    while True:
 | 
			
		||||
        try:
 | 
			
		||||
            text = input('calc > ')
 | 
			
		||||
        except EOFError:
 | 
			
		||||
            break
 | 
			
		||||
        if text:
 | 
			
		||||
            parser.parse(lexer.tokenize(text))
 | 
			
		||||
@@ -8,9 +8,7 @@ sys.path.insert(0, "../..")
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    tokens = {
 | 
			
		||||
        'NAME', 'NUMBER',
 | 
			
		||||
        }
 | 
			
		||||
    tokens = { NAME, NUMBER }
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
    literals = { '=', '+', '-', '*', '/', '(', ')' }
 | 
			
		||||
 | 
			
		||||
@@ -26,8 +24,8 @@ class CalcLexer(Lexer):
 | 
			
		||||
    def newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, value):
 | 
			
		||||
        print("Illegal character '%s'" % value[0])
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
class CalcParser(Parser):
 | 
			
		||||
@@ -36,7 +34,7 @@ class CalcParser(Parser):
 | 
			
		||||
    precedence = (
 | 
			
		||||
        ('left', '+', '-'),
 | 
			
		||||
        ('left', '*', '/'),
 | 
			
		||||
        ('right', 'UMINUS'),
 | 
			
		||||
        ('right', UMINUS),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										179
									
								
								example/schcls/schcls.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								example/schcls/schcls.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,179 @@
 | 
			
		||||
# schcls.py
 | 
			
		||||
#
 | 
			
		||||
# Proof of concept--not complete
 | 
			
		||||
 | 
			
		||||
from sly.docparse import DocParseMeta
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class SchLexer(Lexer):
 | 
			
		||||
    tokens   = { NUMBER, NAME, DEFINE, SET }
 | 
			
		||||
    ignore   = ' \t'
 | 
			
		||||
    literals = ['=','+','-','*','/','(',')','.']
 | 
			
		||||
 | 
			
		||||
    NAME     = '[a-zA-Z_!][a-zA-Z0-9_!]*'
 | 
			
		||||
    NAME['define'] = DEFINE
 | 
			
		||||
    NAME['set!'] = SET
 | 
			
		||||
 | 
			
		||||
    @_(r'\d+')
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
        t.value = int(t.value)
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
    @_(r'\n+')
 | 
			
		||||
    def newline(self, t):
 | 
			
		||||
        self.lineno = t.lineno + t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        print(f"{self.cls_module}.{self.cls_name}:{self.lineno}: * Illegal character", repr(self.text[self.index]))
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
class SchParser(Parser):
 | 
			
		||||
    tokens = SchLexer.tokens
 | 
			
		||||
    precedence = ( 
 | 
			
		||||
        ('left', '+','-'),
 | 
			
		||||
        ('left', '*','/')
 | 
			
		||||
        )
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.env = { }
 | 
			
		||||
 | 
			
		||||
    @_('declarations',
 | 
			
		||||
       '')
 | 
			
		||||
    def program(self, p):
 | 
			
		||||
        return self.env
 | 
			
		||||
 | 
			
		||||
    @_('declarations declaration')
 | 
			
		||||
    def declarations(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('declaration')
 | 
			
		||||
    def declarations(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_("'(' DEFINE NAME expression ')'")
 | 
			
		||||
    def declaration(self, p):
 | 
			
		||||
        self.env[p.NAME] = p.expression
 | 
			
		||||
 | 
			
		||||
    @_("'(' DEFINE '(' NAME arglist ')' exprlist ')'")
 | 
			
		||||
    def declaration(self, p):
 | 
			
		||||
        args = ','.join(p.arglist)
 | 
			
		||||
        self.env[p.NAME] = eval(f"lambda {args}: ({','.join(p.exprlist)},)[-1]")
 | 
			
		||||
 | 
			
		||||
    @_("'(' SET NAME '.' NAME expression ')'")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return f'setattr({p.NAME0}, {p.NAME1!r}, {p.expression})'
 | 
			
		||||
 | 
			
		||||
    @_("")
 | 
			
		||||
    def arglist(self, p):
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    @_("arglist_nonempty")
 | 
			
		||||
    def arglist(self, p):
 | 
			
		||||
        return p.arglist_nonempty
 | 
			
		||||
 | 
			
		||||
    @_("arglist_nonempty NAME")
 | 
			
		||||
    def arglist_nonempty(self, p):
 | 
			
		||||
        p.arglist_nonempty.append(p.NAME)
 | 
			
		||||
        return p.arglist_nonempty
 | 
			
		||||
 | 
			
		||||
    @_("NAME")
 | 
			
		||||
    def arglist_nonempty(self, p):
 | 
			
		||||
        return [ p.NAME ]
 | 
			
		||||
 | 
			
		||||
    @_("NUMBER")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return str(p.NUMBER)
 | 
			
		||||
 | 
			
		||||
    @_("name")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return p.name
 | 
			
		||||
 | 
			
		||||
    @_("'(' operator exprlist ')'")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return '(' + p.operator.join(p.exprlist) + ')'
 | 
			
		||||
    
 | 
			
		||||
    @_("'+'", "'-'", "'*'", "'/'")
 | 
			
		||||
    def operator(self, p):
 | 
			
		||||
        return p[0]
 | 
			
		||||
 | 
			
		||||
    @_("'(' name exprlist ')'")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return p.name + '(' + ','.join(p.exprlist) + ')'
 | 
			
		||||
 | 
			
		||||
    @_("'(' name ')'")
 | 
			
		||||
    def expression(self, p):
 | 
			
		||||
        return p.name + '()'
 | 
			
		||||
 | 
			
		||||
    @_('exprlist expression')
 | 
			
		||||
    def exprlist(self, p):
 | 
			
		||||
        p.exprlist.append(p.expression)
 | 
			
		||||
        return p.exprlist
 | 
			
		||||
 | 
			
		||||
    @_('expression')
 | 
			
		||||
    def exprlist(self, p):
 | 
			
		||||
        return [ p.expression ]
 | 
			
		||||
 | 
			
		||||
    @_("NAME '.' NAME")
 | 
			
		||||
    def name(self, p):
 | 
			
		||||
        return f'{p.NAME0}.{p.NAME1}'
 | 
			
		||||
 | 
			
		||||
    @_("NAME")
 | 
			
		||||
    def name(self, p):
 | 
			
		||||
        return p.NAME
 | 
			
		||||
 | 
			
		||||
    def error(self, p):
 | 
			
		||||
        print(f'{self.cls_module}.{self.cls_name}:{getattr(p,"lineno","")}: '
 | 
			
		||||
              f'Syntax error at {getattr(p,"value","EOC")}')
 | 
			
		||||
 | 
			
		||||
class SchMeta(DocParseMeta):
 | 
			
		||||
    lexer = SchLexer
 | 
			
		||||
    parser = SchParser
 | 
			
		||||
 | 
			
		||||
class Sch(metaclass=SchMeta):
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
class Rat(Sch):
 | 
			
		||||
    '''
 | 
			
		||||
    (define (__init__ self numer denom)
 | 
			
		||||
        (set! self.numer numer)
 | 
			
		||||
        (set! self.denom denom)
 | 
			
		||||
    )
 | 
			
		||||
    (define (__add__ self other)
 | 
			
		||||
        (Rat (+ (* self.numer other.denom)
 | 
			
		||||
                (* self.denom other.numer))
 | 
			
		||||
             (* self.denom other.denom)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
    (define (__sub__ self other)
 | 
			
		||||
        (Rat (- (* self.numer other.denom)
 | 
			
		||||
                (* self.denom other.numer))
 | 
			
		||||
             (* self.denom other.denom)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
    (define (__mul__ self other) 
 | 
			
		||||
        (Rat (* self.numer other.numer)
 | 
			
		||||
             (* self.denom other.denom)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
    (define (__truediv__ self other) 
 | 
			
		||||
        (Rat (* self.numer other.denom)
 | 
			
		||||
             (* self.denom other.numer)
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
    '''
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'Rat({self.numer}, {self.denom})'
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    a = Rat(2, 3)
 | 
			
		||||
    b = Rat(1, 4)
 | 
			
		||||
    print(a + b)
 | 
			
		||||
    print(a - b)
 | 
			
		||||
    print(a * b)
 | 
			
		||||
    print(a / b)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
							
								
								
									
										245
									
								
								example/wasm/expr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										245
									
								
								example/wasm/expr.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,245 @@
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
# expr.py
 | 
			
		||||
#
 | 
			
		||||
# Proof-of-concept encoding of functions/expressions into Wasm.
 | 
			
		||||
#
 | 
			
		||||
# This file implements a mini-language for writing Wasm functions as expressions.
 | 
			
		||||
# It only supports integers.
 | 
			
		||||
#
 | 
			
		||||
# Here's a few examples:
 | 
			
		||||
#
 | 
			
		||||
# # Some basic function definitions
 | 
			
		||||
# add(x, y) = x + y;
 | 
			
		||||
# mul(x, y) = x * y;
 | 
			
		||||
# dsquare(x, y) = mul(x, x) + mul(y, y);
 | 
			
		||||
#
 | 
			
		||||
# # A recursive function
 | 
			
		||||
# fact(n) = if n < 1 then 1 else n*fact(n-1);
 | 
			
		||||
#
 | 
			
		||||
# The full grammar:
 | 
			
		||||
#
 | 
			
		||||
#     functions : functions function
 | 
			
		||||
#               | function
 | 
			
		||||
#
 | 
			
		||||
#     function : NAME ( parms ) = expr ;
 | 
			
		||||
#     
 | 
			
		||||
#     expr : expr + expr
 | 
			
		||||
#          | expr - expr
 | 
			
		||||
#          | expr * expr
 | 
			
		||||
#          | expr / expr
 | 
			
		||||
#          | expr < expr
 | 
			
		||||
#          | expr <= expr
 | 
			
		||||
#          | expr > expr
 | 
			
		||||
#          | expr >= expr
 | 
			
		||||
#          | expr == expr
 | 
			
		||||
#          | expr != expr
 | 
			
		||||
#          | ( expr )
 | 
			
		||||
#          | NAME (exprs)
 | 
			
		||||
#          | if expr then expr else expr
 | 
			
		||||
#          | NUMBER
 | 
			
		||||
#
 | 
			
		||||
# Note: This is implemented as one-pass compiler with no intermediate AST.
 | 
			
		||||
# Some of the grammar rules have to be written in a funny way to make this 
 | 
			
		||||
# work.  If doing this for real, I'd probably build an AST and construct
 | 
			
		||||
# Wasm code through AST walking.  
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.append('../..')
 | 
			
		||||
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
import wasm
 | 
			
		||||
 | 
			
		||||
class ExprLexer(Lexer):
 | 
			
		||||
    tokens = { NAME, NUMBER, PLUS, TIMES, MINUS, DIVIDE, LPAREN, RPAREN, COMMA,
 | 
			
		||||
               LT, LE, GT, GE, EQ, NE, IF, THEN, ELSE, ASSIGN, SEMI }
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
    # Tokens
 | 
			
		||||
    NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    NAME['if'] = IF
 | 
			
		||||
    NAME['then'] = THEN
 | 
			
		||||
    NAME['else'] = ELSE
 | 
			
		||||
 | 
			
		||||
    NUMBER = r'\d+'
 | 
			
		||||
 | 
			
		||||
    # Special symbols
 | 
			
		||||
    PLUS = r'\+'
 | 
			
		||||
    MINUS = r'-'
 | 
			
		||||
    TIMES = r'\*'
 | 
			
		||||
    DIVIDE = r'/'
 | 
			
		||||
    LPAREN = r'\('
 | 
			
		||||
    RPAREN = r'\)'
 | 
			
		||||
    COMMA = r','
 | 
			
		||||
    LE = r'<='
 | 
			
		||||
    LT = r'<'
 | 
			
		||||
    GE = r'>='
 | 
			
		||||
    GT = r'>'
 | 
			
		||||
    EQ = r'=='
 | 
			
		||||
    NE = r'!='
 | 
			
		||||
    ASSIGN = r'='
 | 
			
		||||
    SEMI = ';'
 | 
			
		||||
 | 
			
		||||
    # Ignored pattern
 | 
			
		||||
    ignore_newline = r'\n+'
 | 
			
		||||
    ignore_comment = r'#.*\n'
 | 
			
		||||
 | 
			
		||||
    # Extra action for newlines
 | 
			
		||||
    def ignore_newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        print("Illegal character '%s'" % t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
class ExprParser(Parser):
 | 
			
		||||
    tokens = ExprLexer.tokens
 | 
			
		||||
 | 
			
		||||
    precedence = (
 | 
			
		||||
        ('left', IF, ELSE),
 | 
			
		||||
        ('left', EQ, NE, LT, LE, GT, GE),
 | 
			
		||||
        ('left', PLUS, MINUS),
 | 
			
		||||
        ('left', TIMES, DIVIDE),
 | 
			
		||||
        ('right', UMINUS)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.functions = { }
 | 
			
		||||
        self.module = wasm.Module()
 | 
			
		||||
 | 
			
		||||
    @_('functions function')
 | 
			
		||||
    def functions(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('function')
 | 
			
		||||
    def functions(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('function_decl ASSIGN expr SEMI')
 | 
			
		||||
    def function(self, p):
 | 
			
		||||
        self.function.block_end()
 | 
			
		||||
        self.function = None
 | 
			
		||||
 | 
			
		||||
    @_('NAME LPAREN parms RPAREN')
 | 
			
		||||
    def function_decl(self, p):
 | 
			
		||||
        self.locals = { name:n for n, name in enumerate(p.parms) }
 | 
			
		||||
        self.function = self.module.add_function(p.NAME, [wasm.i32]*len(p.parms), [wasm.i32])
 | 
			
		||||
        self.functions[p.NAME] = self.function
 | 
			
		||||
 | 
			
		||||
    @_('NAME LPAREN RPAREN')
 | 
			
		||||
    def function_decl(self, p):
 | 
			
		||||
        self.locals = { }
 | 
			
		||||
        self.function = self.module.add_function(p.NAME, [], [wasm.i32])
 | 
			
		||||
        self.functions[p.NAME] = self.function
 | 
			
		||||
 | 
			
		||||
    @_('parms COMMA parm')
 | 
			
		||||
    def parms(self, p):
 | 
			
		||||
        return p.parms + [p.parm]
 | 
			
		||||
 | 
			
		||||
    @_('parm')
 | 
			
		||||
    def parms(self, p):
 | 
			
		||||
        return [ p.parm ]
 | 
			
		||||
 | 
			
		||||
    @_('NAME')
 | 
			
		||||
    def parm(self, p):
 | 
			
		||||
        return p.NAME
 | 
			
		||||
 | 
			
		||||
    @_('expr PLUS expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.add()
 | 
			
		||||
 | 
			
		||||
    @_('expr MINUS expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.sub()
 | 
			
		||||
 | 
			
		||||
    @_('expr TIMES expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.mul()
 | 
			
		||||
 | 
			
		||||
    @_('expr DIVIDE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.div_s()
 | 
			
		||||
 | 
			
		||||
    @_('expr LT expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.lt_s()
 | 
			
		||||
 | 
			
		||||
    @_('expr LE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.le_s()
 | 
			
		||||
 | 
			
		||||
    @_('expr GT expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.gt_s()
 | 
			
		||||
 | 
			
		||||
    @_('expr GE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.ge_s()
 | 
			
		||||
 | 
			
		||||
    @_('expr EQ expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.eq()
 | 
			
		||||
 | 
			
		||||
    @_('expr NE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.ne()
 | 
			
		||||
 | 
			
		||||
    @_('MINUS expr %prec UMINUS')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('LPAREN expr RPAREN')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.i32.const(int(p.NUMBER))
 | 
			
		||||
 | 
			
		||||
    @_('NAME')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.local.get(self.locals[p.NAME])
 | 
			
		||||
 | 
			
		||||
    @_('NAME LPAREN exprlist RPAREN')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.call(self.functions[p.NAME])
 | 
			
		||||
 | 
			
		||||
    @_('NAME LPAREN RPAREN')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.call(self.functions[p.NAME])
 | 
			
		||||
 | 
			
		||||
    @_('IF expr thenexpr ELSE expr')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        self.function.block_end()
 | 
			
		||||
 | 
			
		||||
    @_('exprlist COMMA expr')
 | 
			
		||||
    def exprlist(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('expr')
 | 
			
		||||
    def exprlist(self, p):
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @_('startthen expr')
 | 
			
		||||
    def thenexpr(self, p):
 | 
			
		||||
        self.function.else_start()
 | 
			
		||||
    
 | 
			
		||||
    @_('THEN')
 | 
			
		||||
    def startthen(self, p):
 | 
			
		||||
        self.function.if_start(wasm.i32)
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    import sys
 | 
			
		||||
    if len(sys.argv) != 2:
 | 
			
		||||
        raise SystemExit(f'Usage: {sys.argv[0]} module')
 | 
			
		||||
    
 | 
			
		||||
    lexer = ExprLexer()
 | 
			
		||||
    parser = ExprParser()
 | 
			
		||||
    parser.parse(lexer.tokenize(open(sys.argv[1]).read()))
 | 
			
		||||
 | 
			
		||||
    name = sys.argv[1].split('.')[0]
 | 
			
		||||
    parser.module.write_wasm(name)
 | 
			
		||||
    parser.module.write_html(name)
 | 
			
		||||
    print(f'Wrote: {name}.wasm')
 | 
			
		||||
    print(f'Wrote: {name}.html')
 | 
			
		||||
    print('Use python3 -m http.server to test')
 | 
			
		||||
							
								
								
									
										25
									
								
								example/wasm/test.e
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								example/wasm/test.e
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,25 @@
 | 
			
		||||
# Experimental Wasm function examples.
 | 
			
		||||
# To run:
 | 
			
		||||
#
 | 
			
		||||
#    1. First run python3 expr.py test.e
 | 
			
		||||
#    2. Use python3 -m http.server
 | 
			
		||||
#
 | 
			
		||||
# Go to a browser and visit http://localhost:8000/test.html.
 | 
			
		||||
# From the browser, open the Javascript console.  Try executing
 | 
			
		||||
# the functions from there.
 | 
			
		||||
#   
 | 
			
		||||
# Some basic functions
 | 
			
		||||
add(x,y) = x+y;
 | 
			
		||||
sub(x,y) = x-y;
 | 
			
		||||
mul(x,y) = x*y;
 | 
			
		||||
div(x,y) = x/y;
 | 
			
		||||
 | 
			
		||||
# A function calling other functions
 | 
			
		||||
dsquare(x,y) = mul(x,x) + mul(y,y);
 | 
			
		||||
 | 
			
		||||
# A conditional
 | 
			
		||||
minval(a, b) = if a < b then a else b;
 | 
			
		||||
 | 
			
		||||
# Some recursive functions
 | 
			
		||||
fact(n) = if n <= 1 then 1 else n*fact(n-1);
 | 
			
		||||
fib(n) = if n < 2 then 1 else fib(n-1) + fib(n-2);
 | 
			
		||||
							
								
								
									
										32
									
								
								example/wasm/test.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								example/wasm/test.html
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
 | 
			
		||||
<html>
 | 
			
		||||
<body>
 | 
			
		||||
  <script>
 | 
			
		||||
    var imports = {};
 | 
			
		||||
 | 
			
		||||
    fetch("test.wasm").then(response =>
 | 
			
		||||
      response.arrayBuffer()
 | 
			
		||||
    ).then(bytes =>
 | 
			
		||||
           WebAssembly.instantiate(bytes, imports)
 | 
			
		||||
    ).then(results => {
 | 
			
		||||
      window.dsquared = results.instance.exports.dsquared;
 | 
			
		||||
window.distance = results.instance.exports.distance;
 | 
			
		||||
window.getval = results.instance.exports.getval;
 | 
			
		||||
window.setval = results.instance.exports.setval;
 | 
			
		||||
 | 
			
		||||
    });
 | 
			
		||||
  </script>
 | 
			
		||||
 | 
			
		||||
<h3>module test</h3>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
The following exports are made. Access from the JS console.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
<p><tt>dsquared(f64, f64) -> f64</tt></p>
 | 
			
		||||
<p><tt>distance(f64, f64) -> f64</tt></p>
 | 
			
		||||
<p><tt>getval(i32) -> i32</tt></p>
 | 
			
		||||
<p><tt>setval(i32, i32) -> i32</tt></p>
 | 
			
		||||
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
							
								
								
									
										942
									
								
								example/wasm/wasm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										942
									
								
								example/wasm/wasm.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,942 @@
 | 
			
		||||
# wasm.py
 | 
			
		||||
#
 | 
			
		||||
# Experimental builder for Wasm binary encoding. Use at your own peril.
 | 
			
		||||
#
 | 
			
		||||
# Author: David Beazley (@dabeaz)
 | 
			
		||||
# Copyright (C) 2019
 | 
			
		||||
# http://www.dabeaz.com
 | 
			
		||||
 | 
			
		||||
import struct
 | 
			
		||||
import enum
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
def encode_unsigned(value):
 | 
			
		||||
    '''
 | 
			
		||||
    Produce an LEB128 encoded unsigned integer.
 | 
			
		||||
    '''
 | 
			
		||||
    parts = []
 | 
			
		||||
    while value:
 | 
			
		||||
        parts.append((value & 0x7f) | 0x80)
 | 
			
		||||
        value >>= 7
 | 
			
		||||
    if not parts:
 | 
			
		||||
        parts.append(0)
 | 
			
		||||
    parts[-1] &= 0x7f
 | 
			
		||||
    return bytes(parts)
 | 
			
		||||
 | 
			
		||||
def encode_signed(value):
 | 
			
		||||
    '''
 | 
			
		||||
    Produce a LEB128 encoded signed integer.
 | 
			
		||||
    '''
 | 
			
		||||
    parts = [ ]
 | 
			
		||||
    if value < 0:
 | 
			
		||||
        # Sign extend the value up to a multiple of 7 bits
 | 
			
		||||
        value = (1 << (value.bit_length() + (7 - value.bit_length() % 7))) + value
 | 
			
		||||
        negative = True
 | 
			
		||||
    else:
 | 
			
		||||
        negative = False
 | 
			
		||||
    while value:
 | 
			
		||||
        parts.append((value & 0x7f) | 0x80)
 | 
			
		||||
        value >>= 7
 | 
			
		||||
    if not parts or (not negative and parts[-1] & 0x40):
 | 
			
		||||
        parts.append(0)
 | 
			
		||||
    parts[-1] &= 0x7f
 | 
			
		||||
    return bytes(parts)
 | 
			
		||||
 | 
			
		||||
assert encode_unsigned(624485) == bytes([0xe5, 0x8e, 0x26])
 | 
			
		||||
assert encode_unsigned(127) == bytes([0x7f])
 | 
			
		||||
assert encode_signed(-624485) == bytes([0x9b, 0xf1, 0x59])
 | 
			
		||||
assert encode_signed(127) == bytes([0xff, 0x00])
 | 
			
		||||
 | 
			
		||||
def encode_f64(value):
 | 
			
		||||
    '''
 | 
			
		||||
    Encode a 64-bit floating point as little endian
 | 
			
		||||
    '''
 | 
			
		||||
    return struct.pack('<d', value)
 | 
			
		||||
 | 
			
		||||
def encode_f32(value):
 | 
			
		||||
    '''
 | 
			
		||||
    Encode a 32-bit floating point as little endian.
 | 
			
		||||
    '''
 | 
			
		||||
    return struct.pack('<f', value)
 | 
			
		||||
 | 
			
		||||
def encode_name(value):
 | 
			
		||||
    '''
 | 
			
		||||
    Encode a name as UTF-8
 | 
			
		||||
    '''
 | 
			
		||||
    data = value.encode('utf-8')
 | 
			
		||||
    return encode_vector(data)
 | 
			
		||||
 | 
			
		||||
def encode_vector(items):
 | 
			
		||||
    '''
 | 
			
		||||
    Items is a list of encoded value or bytess
 | 
			
		||||
    '''
 | 
			
		||||
    if isinstance(items, bytes):
 | 
			
		||||
        return encode_unsigned(len(items)) + items
 | 
			
		||||
    else:
 | 
			
		||||
        return encode_unsigned(len(items)) + b''.join(items)
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------
 | 
			
		||||
# Instruction encoding enums.
 | 
			
		||||
#
 | 
			
		||||
# Wasm defines 4 core data types [i32, i64, f32, f64].  These type
 | 
			
		||||
# names are used in various places (specifying functions, globals,
 | 
			
		||||
# etc.).  However, the type names are also used as a namespace for
 | 
			
		||||
# type-specific instructions such as i32.add.  We're going to use
 | 
			
		||||
# Python enums to set up this arrangement in a clever way that 
 | 
			
		||||
# makes it possible to do both of these tasks.
 | 
			
		||||
 | 
			
		||||
# Metaclass for instruction encoding categories. The class itself
 | 
			
		||||
# can be used as an integer when encoding instructions.
 | 
			
		||||
 | 
			
		||||
class HexEnumMeta(enum.EnumMeta):
 | 
			
		||||
    def __int__(cls):
 | 
			
		||||
        return int(cls._encoding)
 | 
			
		||||
 | 
			
		||||
    __index__ = __int__
 | 
			
		||||
 | 
			
		||||
    def __repr__(cls):
 | 
			
		||||
        return cls.__name__
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __prepare__(meta, name, bases, encoding=0):
 | 
			
		||||
        return super().__prepare__(name, bases)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def __new__(meta, clsname, bases, methods, encoding=0):
 | 
			
		||||
        cls = super().__new__(meta, clsname, bases, methods)
 | 
			
		||||
        cls._encoding = encoding
 | 
			
		||||
        return cls
 | 
			
		||||
 | 
			
		||||
class HexEnum(enum.IntEnum):
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'<{self!s}: 0x{self:x}>'
 | 
			
		||||
 | 
			
		||||
HexEnum.__class__ = HexEnumMeta
 | 
			
		||||
 | 
			
		||||
class i32(HexEnum, encoding=0x7f):
 | 
			
		||||
    eqz             = 0x45
 | 
			
		||||
    eq              = 0x46
 | 
			
		||||
    ne              = 0x47
 | 
			
		||||
    lt_s            = 0x48
 | 
			
		||||
    lt_u            = 0x49
 | 
			
		||||
    gt_s            = 0x4a
 | 
			
		||||
    gt_u            = 0x4b
 | 
			
		||||
    le_s            = 0x4c
 | 
			
		||||
    le_u            = 0x4d
 | 
			
		||||
    ge_s            = 0x4e
 | 
			
		||||
    ge_u            = 0x4f
 | 
			
		||||
    clz             = 0x67
 | 
			
		||||
    ctz             = 0x68
 | 
			
		||||
    popcnt          = 0x69
 | 
			
		||||
    add             = 0x6a
 | 
			
		||||
    sub             = 0x6b
 | 
			
		||||
    mul             = 0x6c
 | 
			
		||||
    div_s           = 0x6d
 | 
			
		||||
    div_u           = 0x6e
 | 
			
		||||
    rem_s           = 0x6f
 | 
			
		||||
    rem_u           = 0x70
 | 
			
		||||
    and_            = 0x71
 | 
			
		||||
    or_             = 0x72
 | 
			
		||||
    xor             = 0x73
 | 
			
		||||
    shl             = 0x74
 | 
			
		||||
    shr_s           = 0x75
 | 
			
		||||
    shr_u           = 0x76
 | 
			
		||||
    rotl            = 0x77
 | 
			
		||||
    rotr            = 0x78
 | 
			
		||||
    wrap_i64        = 0xa7
 | 
			
		||||
    trunc_f32_s     = 0xa8
 | 
			
		||||
    trunc_f32_u     = 0xa9
 | 
			
		||||
    trunc_f64_s     = 0xaa
 | 
			
		||||
    trunc_f64_u     = 0xab
 | 
			
		||||
    reinterpret_f32 = 0xbc
 | 
			
		||||
    load            = 0x28
 | 
			
		||||
    load8_s         = 0x2c
 | 
			
		||||
    load8_u         = 0x2d
 | 
			
		||||
    load16_s        = 0x2e
 | 
			
		||||
    load16_u        = 0x2f
 | 
			
		||||
    store           = 0x36
 | 
			
		||||
    store8          = 0x3a
 | 
			
		||||
    store16         = 0x3b
 | 
			
		||||
    const           = 0x41
 | 
			
		||||
 | 
			
		||||
class i64(HexEnum, encoding=0x7e):
 | 
			
		||||
    eqz             = 0x50
 | 
			
		||||
    eq              = 0x51
 | 
			
		||||
    ne              = 0x52
 | 
			
		||||
    lt_s            = 0x53
 | 
			
		||||
    lt_u            = 0x54
 | 
			
		||||
    gt_s            = 0x55
 | 
			
		||||
    gt_u            = 0x56
 | 
			
		||||
    le_s            = 0x57
 | 
			
		||||
    le_u            = 0x58
 | 
			
		||||
    ge_s            = 0x59
 | 
			
		||||
    ge_u            = 0x5a
 | 
			
		||||
    clz             = 0x79
 | 
			
		||||
    ctz             = 0x7a
 | 
			
		||||
    popcnt          = 0x7b
 | 
			
		||||
    add             = 0x7c
 | 
			
		||||
    sub             = 0x7d
 | 
			
		||||
    mul             = 0x7e
 | 
			
		||||
    div_s           = 0x7f
 | 
			
		||||
    div_u           = 0x80
 | 
			
		||||
    rem_s           = 0x81
 | 
			
		||||
    rem_u           = 0x82
 | 
			
		||||
    and_            = 0x83
 | 
			
		||||
    or_             = 0x84
 | 
			
		||||
    xor             = 0x85
 | 
			
		||||
    shl             = 0x86
 | 
			
		||||
    shr_s           = 0x87
 | 
			
		||||
    shr_u           = 0x88
 | 
			
		||||
    rotl            = 0x89
 | 
			
		||||
    rotr            = 0x8a
 | 
			
		||||
    extend_i32_s    = 0xac
 | 
			
		||||
    extend_i32_u    = 0xad
 | 
			
		||||
    trunc_f32_s     = 0xae
 | 
			
		||||
    trunc_f32_u     = 0xaf
 | 
			
		||||
    trunc_f64_s     = 0xb0
 | 
			
		||||
    trunc_f64_u     = 0xb1
 | 
			
		||||
    reinterpret_f64 = 0xbd
 | 
			
		||||
    load            = 0x29
 | 
			
		||||
    load8_s         = 0x30
 | 
			
		||||
    load8_u         = 0x31
 | 
			
		||||
    load16_s        = 0x32
 | 
			
		||||
    load16_u        = 0x33
 | 
			
		||||
    load32_s        = 0x34
 | 
			
		||||
    load32_u        = 0x35
 | 
			
		||||
    store           = 0x37
 | 
			
		||||
    store8          = 0x3c
 | 
			
		||||
    store16         = 0x3d
 | 
			
		||||
    store32         = 0x3e
 | 
			
		||||
    const           = 0x42
 | 
			
		||||
 | 
			
		||||
class f32(HexEnum, encoding=0x7d):
 | 
			
		||||
    eq              = 0x5b
 | 
			
		||||
    ne              = 0x5c
 | 
			
		||||
    lt              = 0x5d
 | 
			
		||||
    gt              = 0x5e
 | 
			
		||||
    le              = 0x5f
 | 
			
		||||
    ge              = 0x60
 | 
			
		||||
    abs             = 0x8b
 | 
			
		||||
    neg             = 0x8c
 | 
			
		||||
    ceil            = 0x8d
 | 
			
		||||
    floor           = 0x8e
 | 
			
		||||
    trunc           = 0x8f
 | 
			
		||||
    nearest         = 0x90
 | 
			
		||||
    sqrt            = 0x91
 | 
			
		||||
    add             = 0x92
 | 
			
		||||
    sub             = 0x93
 | 
			
		||||
    mul             = 0x94
 | 
			
		||||
    div             = 0x95
 | 
			
		||||
    min             = 0x96
 | 
			
		||||
    max             = 0x97
 | 
			
		||||
    copysign        = 0x98
 | 
			
		||||
    convert_i32_s   = 0xb2
 | 
			
		||||
    convert_i32_u   = 0xb3
 | 
			
		||||
    convert_i64_s   = 0xb4
 | 
			
		||||
    convert_i64_u   = 0xb5
 | 
			
		||||
    demote_f64      = 0xb6
 | 
			
		||||
    reinterpret_i32 = 0xbe
 | 
			
		||||
    load            = 0x2a
 | 
			
		||||
    store           = 0x38
 | 
			
		||||
    const           = 0x43
 | 
			
		||||
 | 
			
		||||
class f64(HexEnum, encoding=0x7c):
 | 
			
		||||
    eq              = 0x61
 | 
			
		||||
    ne              = 0x62
 | 
			
		||||
    lt              = 0x63
 | 
			
		||||
    gt              = 0x64
 | 
			
		||||
    le              = 0x65
 | 
			
		||||
    ge              = 0x66
 | 
			
		||||
    abs             = 0x99
 | 
			
		||||
    neg             = 0x9a
 | 
			
		||||
    ceil            = 0x9b
 | 
			
		||||
    floor           = 0x9c
 | 
			
		||||
    trunc           = 0x9d
 | 
			
		||||
    nearest         = 0x9e
 | 
			
		||||
    sqrt            = 0x9f
 | 
			
		||||
    add             = 0xa0
 | 
			
		||||
    sub             = 0xa1
 | 
			
		||||
    mul             = 0xa2
 | 
			
		||||
    div             = 0xa3
 | 
			
		||||
    min             = 0xa4
 | 
			
		||||
    max             = 0xa5
 | 
			
		||||
    copysign        = 0xa6
 | 
			
		||||
    convert_i32_s   = 0xb7
 | 
			
		||||
    convert_i32_u   = 0xb8
 | 
			
		||||
    convert_i64_s   = 0xb9
 | 
			
		||||
    convert_i64_u   = 0xba
 | 
			
		||||
    promote_f32     = 0xbb
 | 
			
		||||
    reinterpret_i64 = 0xbf
 | 
			
		||||
    load            = 0x2b
 | 
			
		||||
    store           = 0x39
 | 
			
		||||
    const           = 0x44
 | 
			
		||||
 | 
			
		||||
class local(HexEnum):
 | 
			
		||||
    get = 0x20
 | 
			
		||||
    set = 0x21
 | 
			
		||||
    tee = 0x22
 | 
			
		||||
 | 
			
		||||
class global_(HexEnum):
 | 
			
		||||
    get = 0x23
 | 
			
		||||
    set = 0x24
 | 
			
		||||
 | 
			
		||||
global_.__name__ = 'global'
 | 
			
		||||
 | 
			
		||||
# Special void type for block returns
 | 
			
		||||
void = 0x40
 | 
			
		||||
 | 
			
		||||
# ------------------------------------------------------------
 | 
			
		||||
def encode_function_type(parameters, results):
 | 
			
		||||
    '''
 | 
			
		||||
    parameters is a vector of value types
 | 
			
		||||
    results is a vector value types
 | 
			
		||||
    '''
 | 
			
		||||
    enc_parms = bytes(parameters)
 | 
			
		||||
    enc_results = bytes(results)
 | 
			
		||||
    return b'\x60' + encode_vector(enc_parms) + encode_vector(enc_results)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def encode_limits(min, max=None):
 | 
			
		||||
    if max is None:
 | 
			
		||||
        return b'\x00' + encode_unsigned(min)
 | 
			
		||||
    else:
 | 
			
		||||
        return b'\x01' + encode_unsigned(min) + encode_unsigned(max)
 | 
			
		||||
 | 
			
		||||
def encode_table_type(elemtype, min, max=None):
 | 
			
		||||
    return b'\x70' + encode_limits(min, max)
 | 
			
		||||
 | 
			
		||||
def encode_global_type(value_type, mut=True):
 | 
			
		||||
    return bytes([value_type, mut])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# ----------------------------------------------------------------------
 | 
			
		||||
# Instruction builders
 | 
			
		||||
#
 | 
			
		||||
# Wasm instructions are grouped into different namespaces.  For example:
 | 
			
		||||
#   
 | 
			
		||||
#     i32.add()
 | 
			
		||||
#     local.get()
 | 
			
		||||
#     memory.size()
 | 
			
		||||
#     ...
 | 
			
		||||
#
 | 
			
		||||
# The classes that follow implement the namespace for different instruction
 | 
			
		||||
# categories.
 | 
			
		||||
 | 
			
		||||
# Builder for the local.* namespace
 | 
			
		||||
 | 
			
		||||
class SubBuilder:
 | 
			
		||||
    def __init__(self, builder):
 | 
			
		||||
        self._builder = builder
 | 
			
		||||
 | 
			
		||||
    def _append(self, instr):
 | 
			
		||||
        self._builder._code.append(instr)
 | 
			
		||||
 | 
			
		||||
class LocalBuilder(SubBuilder):
 | 
			
		||||
    def get(self, localidx):
 | 
			
		||||
        self._append([local.get, *encode_unsigned(localidx)])
 | 
			
		||||
 | 
			
		||||
    def set(self, localidx):
 | 
			
		||||
        self._append([local.set, *encode_unsigned(localidx)])
 | 
			
		||||
 | 
			
		||||
    def tee(self, localidx):
 | 
			
		||||
        self._append([local.tee, *encode_unsigned(localidx)])
 | 
			
		||||
 | 
			
		||||
class GlobalBuilder(SubBuilder):
 | 
			
		||||
    def get(self, glob):
 | 
			
		||||
        if isinstance(glob, int):
 | 
			
		||||
            globidx = glob
 | 
			
		||||
        else:
 | 
			
		||||
            globidx = glob.idx
 | 
			
		||||
        self._append([global_.get, *encode_unsigned(globidx)])
 | 
			
		||||
 | 
			
		||||
    def set(self, glob):
 | 
			
		||||
        if isinstance(glob, int):
 | 
			
		||||
            globidx = glob
 | 
			
		||||
        else:
 | 
			
		||||
            globidx = glob.idx
 | 
			
		||||
        self._append([global_.set, *encode_unsigned(globidx)])
 | 
			
		||||
 | 
			
		||||
class MemoryBuilder(SubBuilder):
 | 
			
		||||
    def size(self):
 | 
			
		||||
        self._append([0x3f, 0x00])
 | 
			
		||||
 | 
			
		||||
    def grow(self):
 | 
			
		||||
        self._append([0x40, 0x00])
 | 
			
		||||
 | 
			
		||||
class OpBuilder(SubBuilder):
 | 
			
		||||
    _optable = None       # To be supplied by subclasses
 | 
			
		||||
 | 
			
		||||
    # Memory ops
 | 
			
		||||
    def load(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load8_s(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load8_s, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load8_u(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load8_u, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load16_s(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load16_s, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load16_u(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load16_u, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load32_s(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load32_s, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def load32_u(self, align, offset):
 | 
			
		||||
        self._append([self._optable.load32_u, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def store(self, align, offset):
 | 
			
		||||
        self._append([self._optable.store, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def store8(self, align, offset):
 | 
			
		||||
        self._append([self._optable.store8, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def store16(self, align, offset):
 | 
			
		||||
        self._append([self._optable.store16, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def store32(self, align, offset):
 | 
			
		||||
        self._append([self._optable.store32, *encode_unsigned(align), *encode_unsigned(offset)])
 | 
			
		||||
 | 
			
		||||
    def __getattr__(self, key):
 | 
			
		||||
        def call():
 | 
			
		||||
            self._append([getattr(self._optable, key)])
 | 
			
		||||
        return call
 | 
			
		||||
 | 
			
		||||
class I32OpBuilder(OpBuilder):
 | 
			
		||||
    _optable = i32
 | 
			
		||||
 | 
			
		||||
    def const(self, value):
 | 
			
		||||
        self._append([self._optable.const, *encode_signed(value)])
 | 
			
		||||
 | 
			
		||||
class I64OpBuilder(OpBuilder):
 | 
			
		||||
    _optable = i64
 | 
			
		||||
 | 
			
		||||
    def const(self, value):
 | 
			
		||||
        self._append([self._optable.const, *encode_signed(value)])
 | 
			
		||||
 | 
			
		||||
class F32OpBuilder(OpBuilder):
 | 
			
		||||
    _optable = f32
 | 
			
		||||
 | 
			
		||||
    def const(self, value):
 | 
			
		||||
        self._append([self._optable.const, *encode_f32(value)])
 | 
			
		||||
 | 
			
		||||
class F64OpBuilder(OpBuilder):
 | 
			
		||||
    _optable = f64
 | 
			
		||||
 | 
			
		||||
    def const(self, value):
 | 
			
		||||
        self._append([self._optable.const, *encode_f64(value)])
 | 
			
		||||
 | 
			
		||||
def _flatten(instr):
 | 
			
		||||
    for x in instr:
 | 
			
		||||
        if isinstance(x, list):
 | 
			
		||||
            yield from _flatten(x)
 | 
			
		||||
        else:
 | 
			
		||||
            yield x
 | 
			
		||||
 | 
			
		||||
# High-level class that allows instructions to be easily encoded. 
 | 
			
		||||
class InstructionBuilder:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self._code = [ ]
 | 
			
		||||
        self.local = LocalBuilder(self)
 | 
			
		||||
        self.global_ = GlobalBuilder(self)
 | 
			
		||||
        self.i32 = I32OpBuilder(self)
 | 
			
		||||
        self.i64 = I64OpBuilder(self)
 | 
			
		||||
        self.f32 = F32OpBuilder(self)
 | 
			
		||||
        self.f64 = F64OpBuilder(self)
 | 
			
		||||
 | 
			
		||||
        # Control-flow stack.  
 | 
			
		||||
        self._control = [ None ]
 | 
			
		||||
 | 
			
		||||
    def __iter__(self):
 | 
			
		||||
        return iter(self._code)
 | 
			
		||||
 | 
			
		||||
    # Resolve a human-readable label into control-stack index
 | 
			
		||||
    def _resolve_label(self, label):
 | 
			
		||||
        if isinstance(label, int):
 | 
			
		||||
            return label
 | 
			
		||||
        index = self._control.index(label)
 | 
			
		||||
        return len(label) - 1 - index
 | 
			
		||||
 | 
			
		||||
    # Control flow instructions
 | 
			
		||||
    def unreachable(self):
 | 
			
		||||
        self._code.append([0x01])
 | 
			
		||||
 | 
			
		||||
    def nop(self):
 | 
			
		||||
        self._code.append([0x01])
 | 
			
		||||
 | 
			
		||||
    def block_start(self, result_type, label=None):
 | 
			
		||||
        self._code.append([0x02, result_type])
 | 
			
		||||
        self._control.append(label)
 | 
			
		||||
        return len(self._control)
 | 
			
		||||
 | 
			
		||||
    def block_end(self):
 | 
			
		||||
        self._code.append([0x0b])
 | 
			
		||||
        self._control.pop()
 | 
			
		||||
        
 | 
			
		||||
    def loop_start(self, result_type, label=None):
 | 
			
		||||
        self._code.append([0x03, result_type])
 | 
			
		||||
        self._control.append(label)
 | 
			
		||||
        return len(self._control)
 | 
			
		||||
 | 
			
		||||
    def if_start(self, result_type, label=None):
 | 
			
		||||
        self._code.append([0x04, result_type])
 | 
			
		||||
        self._control.append(label)
 | 
			
		||||
        
 | 
			
		||||
    def else_start(self):
 | 
			
		||||
        self._code.append([0x05])
 | 
			
		||||
 | 
			
		||||
    def br(self, label):
 | 
			
		||||
        labelidx = self._resolve_label(label)
 | 
			
		||||
        self._code.append([0x0c, *encode_unsigned(labelidx)])
 | 
			
		||||
 | 
			
		||||
    def br_if(self, label):
 | 
			
		||||
        labelidx = self._resolve_label(label)
 | 
			
		||||
        self._code.append([0x0d, *encode_unsigned(labelidx)])
 | 
			
		||||
 | 
			
		||||
    def br_table(self, labels, label):
 | 
			
		||||
        enc_labels = [encode_unsigned(self._resolve_label(idx)) for idx in labels]
 | 
			
		||||
        self._code.append([0x0e, *encode_vector(enc_labels), *encode_unsigned(self._resolve_label(label))])
 | 
			
		||||
 | 
			
		||||
    def return_(self):
 | 
			
		||||
        self._code.append([0x0f])
 | 
			
		||||
 | 
			
		||||
    def call(self, func):
 | 
			
		||||
        if isinstance(func, (ImportFunction,Function)):
 | 
			
		||||
            self._code.append([0x10, *encode_unsigned(func._idx)])
 | 
			
		||||
        else:
 | 
			
		||||
            self._code.append([0x10, *encode_unsigned(func)])
 | 
			
		||||
 | 
			
		||||
    def call_indirect(self, typesig):
 | 
			
		||||
        if isinstance(typesig, Type):
 | 
			
		||||
            typeidx = typesig.idx
 | 
			
		||||
        else:
 | 
			
		||||
            typeidx = typesig
 | 
			
		||||
        self._code.append([0x11, *encode_unsigned(typeidx), 0x00])
 | 
			
		||||
 | 
			
		||||
    def drop(self):
 | 
			
		||||
        self._code.append([0x1a])
 | 
			
		||||
    
 | 
			
		||||
    def select(self):
 | 
			
		||||
        self._code.append([0x1b])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Type:
 | 
			
		||||
    def __init__(self, parms, results, idx):
 | 
			
		||||
        self.parms = parms
 | 
			
		||||
        self.results = results
 | 
			
		||||
        self.idx = idx
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'{self.parms!r} -> {self.results!r}'
 | 
			
		||||
 | 
			
		||||
class ImportFunction:
 | 
			
		||||
    def __init__(self, name, typesig, idx):
 | 
			
		||||
        self._name = name
 | 
			
		||||
        self._typesig = typesig
 | 
			
		||||
        self._idx = idx
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'ImportFunction({self._name}, {self._typesig}, {self._idx})'
 | 
			
		||||
 | 
			
		||||
class Function(InstructionBuilder):
 | 
			
		||||
    def __init__(self, name, typesig, idx, export=True):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        self._name = name
 | 
			
		||||
        self._typesig = typesig
 | 
			
		||||
        self._locals = list(typesig.parms)
 | 
			
		||||
        self._export = export
 | 
			
		||||
        self._idx = idx
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'Function({self._name}, {self._typesig}, {self._idx})'
 | 
			
		||||
 | 
			
		||||
    # Allocate a new local variable of a given type
 | 
			
		||||
    def alloc(self, valuetype):
 | 
			
		||||
        self._locals.append(valuetype)
 | 
			
		||||
        return len(self.locals) - 1
 | 
			
		||||
 | 
			
		||||
class ImportGlobal:
 | 
			
		||||
    def __init__(self, name, valtype, idx):
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.valtype = valtype
 | 
			
		||||
        self.idx = idx
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'ImportGlobal({self.name}, {self.valtype}, {self.idx})'
 | 
			
		||||
 | 
			
		||||
class Global:
 | 
			
		||||
    def __init__(self, name, valtype, initializer, idx):
 | 
			
		||||
        self.name = name
 | 
			
		||||
        self.valtype = valtype
 | 
			
		||||
        self.initializer = initializer
 | 
			
		||||
        self.idx = idx
 | 
			
		||||
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'Global({self.name}, {self.valtype}, {self.initializer}, {self.idx})'
 | 
			
		||||
 | 
			
		||||
class Module:
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        # Vector of function type signatures.  Signatures are reused
 | 
			
		||||
        # if more than one function has the same signature.
 | 
			
		||||
        self.type_section = []
 | 
			
		||||
 | 
			
		||||
        # Vector of imported entities.  These can be functions, globals,
 | 
			
		||||
        # tables, and memories
 | 
			
		||||
        self.import_section = []
 | 
			
		||||
 | 
			
		||||
        # There are 4 basic entities within a Wasm file. Functions,
 | 
			
		||||
        # globals, memories, and tables.  Each kind of entity is
 | 
			
		||||
        # stored in a separate list and is indexed by an integer
 | 
			
		||||
        # index starting at 0.   Imported entities must always
 | 
			
		||||
        # go before entities defined in the Wasm module itself.
 | 
			
		||||
        self.funcidx = 0                   
 | 
			
		||||
        self.globalidx = 0     
 | 
			
		||||
        self.memoryidx = 0
 | 
			
		||||
        self.tableidx = 0
 | 
			
		||||
 | 
			
		||||
        self.function_section = []         # Vector of typeidx
 | 
			
		||||
        self.global_section = []           # Vector of globals
 | 
			
		||||
        self.table_section = []            # Vector of tables
 | 
			
		||||
        self.memory_section = []           # Vector of memories
 | 
			
		||||
 | 
			
		||||
        # Exported entities.  A module may export functions, globals,
 | 
			
		||||
        # tables, and memories
 | 
			
		||||
        
 | 
			
		||||
        self.export_section = []           # Vector of exports
 | 
			
		||||
 | 
			
		||||
        # Optional start function.  A function that executes upon loading
 | 
			
		||||
        self.start_section = None          # Optional start function
 | 
			
		||||
 | 
			
		||||
        # Initialization of table elements
 | 
			
		||||
        self.element_section = []         
 | 
			
		||||
 | 
			
		||||
        # Code section for function bodies. 
 | 
			
		||||
        self.code_section = []
 | 
			
		||||
 | 
			
		||||
        # Data section contains data segments
 | 
			
		||||
        self.data_section = []             
 | 
			
		||||
 | 
			
		||||
        # List of function objects (to help with encoding)
 | 
			
		||||
        self.functions = []
 | 
			
		||||
 | 
			
		||||
        # Output for JS/Html
 | 
			
		||||
        self.js_exports = "";
 | 
			
		||||
        self.html_exports = "";
 | 
			
		||||
        self.js_imports = defaultdict(dict)
 | 
			
		||||
 | 
			
		||||
    def add_type(self, parms, results):
 | 
			
		||||
        enc = encode_function_type(parms, results)
 | 
			
		||||
        if enc in self.type_section:
 | 
			
		||||
            return Type(parms, results, self.type_section.index(enc))
 | 
			
		||||
        else:
 | 
			
		||||
            self.type_section.append(enc)
 | 
			
		||||
            return Type(parms, results, len(self.type_section) - 1)
 | 
			
		||||
 | 
			
		||||
    def import_function(self, module, name, parms, results):
 | 
			
		||||
        if len(self.function_section) > 0:
 | 
			
		||||
            raise RuntimeError('function imports must go before first function definition')
 | 
			
		||||
 | 
			
		||||
        typesig = self.add_type(parms, results)
 | 
			
		||||
        code = encode_name(module) + encode_name(name) + b'\x00' + encode_unsigned(typesig.idx)
 | 
			
		||||
        self.import_section.append(code)
 | 
			
		||||
        self.js_imports[module][name] = f"function: {typesig}"
 | 
			
		||||
        self.funcidx += 1
 | 
			
		||||
        return ImportFunction(f'{module}.{name}', typesig, self.funcidx - 1)
 | 
			
		||||
 | 
			
		||||
    def import_table(self, module, name, elemtype, min, max=None):
 | 
			
		||||
        code = encode_name(module) + encode_name(name) + b'\x01' + encode_table_type(elemtype, min, max)
 | 
			
		||||
        self.import_section.append(code)
 | 
			
		||||
        self.js_imports[module][name] = "table:"
 | 
			
		||||
        self.tableidx += 1
 | 
			
		||||
        return self.tableidx - 1
 | 
			
		||||
 | 
			
		||||
    def import_memtype(self, module, name, min, max=None):
 | 
			
		||||
        code = encode_name(module) + encode_name(name) + b'\x02' + encode_limits(min, max)
 | 
			
		||||
        self.import_section.append(code)
 | 
			
		||||
        self.js_imports[module][name] = "memory:"
 | 
			
		||||
        self.memoryidx += 1
 | 
			
		||||
        return self.memoryidx - 1
 | 
			
		||||
 | 
			
		||||
    def import_global(self, module, name, value_type):
 | 
			
		||||
        if len(self.global_section) > 0:
 | 
			
		||||
            raise RuntimeError('global imports must go before first global definition')
 | 
			
		||||
 | 
			
		||||
        code = encode_name(module) + encode_name(name) + b'\x03' + encode_global_type(value_type, False)
 | 
			
		||||
        self.import_section.append(code)
 | 
			
		||||
        self.js_imports[module][name] = f"global: {value_type}"
 | 
			
		||||
        self.globalidx += 1
 | 
			
		||||
        return ImportGlobal(f'{module}.{name}', value_type, self.globalidx - 1)
 | 
			
		||||
 | 
			
		||||
    def add_function(self, name, parms, results, export=True):
 | 
			
		||||
        typesig = self.add_type(parms, results)
 | 
			
		||||
        func = Function(name, typesig, self.funcidx, export)
 | 
			
		||||
        self.funcidx += 1
 | 
			
		||||
        self.functions.append(func)
 | 
			
		||||
        self.function_section.append(encode_unsigned(typesig.idx))
 | 
			
		||||
        self.html_exports += f'<p><tt>{name}({", ".join(str(p) for p in parms)}) -> {results[0]!s}</tt></p>\n'
 | 
			
		||||
        return func
 | 
			
		||||
 | 
			
		||||
    def add_table(self, elemtype, min, max=None):
 | 
			
		||||
        self.table_section.append(encode_table_type(elemtype, min, max))
 | 
			
		||||
        self.tableidx += 1
 | 
			
		||||
        return self.tableidx - 1
 | 
			
		||||
 | 
			
		||||
    def add_memory(self, min, max=None):
 | 
			
		||||
        self.memory_section.append(encode_limits(min, max))
 | 
			
		||||
        self.memoryidx += 1
 | 
			
		||||
        return self.memoryidx - 1
 | 
			
		||||
 | 
			
		||||
    def add_global(self, name, value_type, initializer, mutable=True, export=True):
 | 
			
		||||
        code = encode_global_type(value_type, mutuable)
 | 
			
		||||
        expr = InstructionBuilder()
 | 
			
		||||
        getattr(expr, str(valtype)).const(initializer)
 | 
			
		||||
        expr.finalize()
 | 
			
		||||
        code += expr._code
 | 
			
		||||
        self.global_section.append(code)
 | 
			
		||||
        if export:
 | 
			
		||||
            self.export_global(name, self.globalidx)
 | 
			
		||||
        self.globalidx += 1
 | 
			
		||||
        return Global(name, value_type, initializer, self.globalidx - 1)
 | 
			
		||||
 | 
			
		||||
    def export_function(self, name, funcidx):
 | 
			
		||||
        code = encode_name(name) + b'\x00' + encode_unsigned(funcidx)
 | 
			
		||||
        self.export_section.append(code)
 | 
			
		||||
        self.js_exports += f'window.{name} = results.instance.exports.{name};\n'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def export_table(self, name, tableidx):
 | 
			
		||||
        code = encode_name(name) + b'\x01' + encode_unsigned(tableidx)
 | 
			
		||||
        self.export_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def export_memory(self, name, memidx):
 | 
			
		||||
        code = encode_name(name) + b'\x02' + encode_unsigned(memidx)
 | 
			
		||||
        self.export_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def export_global(self, name, globalidx):
 | 
			
		||||
        code = encode_name(name) + b'\x03' + encode_unsigned(globalidx)
 | 
			
		||||
        self.export_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def start_function(self, funcidx):
 | 
			
		||||
        self.start = encode_unsigned(funcidx)
 | 
			
		||||
 | 
			
		||||
    def add_element(self, tableidx, expr, funcidxs):
 | 
			
		||||
        code = encode_unsigned(tableidx) + expr.code
 | 
			
		||||
        code += encode_vector([encode_unsigned(i) for i in funcidxs])
 | 
			
		||||
        self.element_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def add_function_code(self, locals, expr):
 | 
			
		||||
        # Locals is a list of valtypes [i32, i32, etc...]
 | 
			
		||||
        # expr is an expression representing the actual code (InstructionBuilder)
 | 
			
		||||
        
 | 
			
		||||
        locs = [ encode_unsigned(1) + bytes([loc]) for loc in locals ]
 | 
			
		||||
        locs_code = encode_vector(locs)
 | 
			
		||||
        func_code = locs_code + bytes(_flatten(expr))
 | 
			
		||||
        code = encode_unsigned(len(func_code)) + func_code
 | 
			
		||||
        self.code_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def add_data(self, memidx, expr, data):
 | 
			
		||||
        # data is bytes
 | 
			
		||||
        code = encode_unsigned(memidx) + expr.code + encode_vector([data[i:i+1] for i in range(len(data))])
 | 
			
		||||
        self.data_section.append(code)
 | 
			
		||||
 | 
			
		||||
    def _encode_section_vector(self, sectionid, contents):
 | 
			
		||||
        if not contents:
 | 
			
		||||
            return b''
 | 
			
		||||
        contents_code = encode_vector(contents)
 | 
			
		||||
        code = bytes([sectionid]) + encode_unsigned(len(contents_code)) + contents_code
 | 
			
		||||
        return code
 | 
			
		||||
 | 
			
		||||
    def encode(self):
 | 
			
		||||
        for func in self.functions:
 | 
			
		||||
            self.add_function_code(func._locals, func._code)
 | 
			
		||||
            if func._export:
 | 
			
		||||
                self.export_function(func._name, func._idx)
 | 
			
		||||
 | 
			
		||||
        # Encode the whole module
 | 
			
		||||
        code = b'\x00\x61\x73\x6d\x01\x00\x00\x00'
 | 
			
		||||
        code += self._encode_section_vector(1, self.type_section)
 | 
			
		||||
        code += self._encode_section_vector(2, self.import_section)
 | 
			
		||||
        code += self._encode_section_vector(3, self.function_section)
 | 
			
		||||
        code += self._encode_section_vector(4, self.table_section)
 | 
			
		||||
        code += self._encode_section_vector(5, self.memory_section)
 | 
			
		||||
        code += self._encode_section_vector(6, self.global_section)
 | 
			
		||||
        code += self._encode_section_vector(7, self.export_section)
 | 
			
		||||
        if self.start_section:
 | 
			
		||||
            code += encode_unsigned(8) + self.start_section
 | 
			
		||||
        code += self._encode_section_vector(9, self.element_section)
 | 
			
		||||
        code += self._encode_section_vector(10, self.code_section)
 | 
			
		||||
        code += self._encode_section_vector(11, self.data_section)
 | 
			
		||||
        return code
 | 
			
		||||
 | 
			
		||||
    def write_wasm(self, modname):
 | 
			
		||||
        with open(f'{modname}.wasm', 'wb') as f:
 | 
			
		||||
            f.write(self.encode())
 | 
			
		||||
 | 
			
		||||
    def write_html(self, modname):
 | 
			
		||||
        with open(f'{modname}.html', 'wt') as f:
 | 
			
		||||
            f.write(js_template.format(
 | 
			
		||||
                    module=modname,
 | 
			
		||||
                    imports=json.dumps(self.js_imports, indent=4),
 | 
			
		||||
                    exports=self.js_exports,
 | 
			
		||||
                    exports_html=self.html_exports,
 | 
			
		||||
                    )
 | 
			
		||||
            )
 | 
			
		||||
                    
 | 
			
		||||
js_template = '''
 | 
			
		||||
<html>
 | 
			
		||||
<body>
 | 
			
		||||
  <script>
 | 
			
		||||
    var imports = {imports};
 | 
			
		||||
 | 
			
		||||
    fetch("{module}.wasm").then(response =>
 | 
			
		||||
      response.arrayBuffer()
 | 
			
		||||
    ).then(bytes =>
 | 
			
		||||
           WebAssembly.instantiate(bytes, imports)
 | 
			
		||||
    ).then(results => {{
 | 
			
		||||
      {exports}
 | 
			
		||||
    }});
 | 
			
		||||
  </script>
 | 
			
		||||
 | 
			
		||||
<h3>module {module}</h3>
 | 
			
		||||
 | 
			
		||||
<p>
 | 
			
		||||
The following exports are made. Access from the JS console.
 | 
			
		||||
</p>
 | 
			
		||||
 | 
			
		||||
{exports_html}
 | 
			
		||||
</body>
 | 
			
		||||
</html>
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
def test1():
 | 
			
		||||
    mod = Module()
 | 
			
		||||
 | 
			
		||||
    # An external function import.  Note:  All imports MUST go first.  
 | 
			
		||||
    # Indexing affects function indexing for functions defined in the module.
 | 
			
		||||
 | 
			
		||||
    # Import some functions from JS
 | 
			
		||||
    # math_sin = mod.import_function('util', 'sin', [f64], [f64])
 | 
			
		||||
    # math_cos = mod.import_function('util', 'cos', [f64], [f64])
 | 
			
		||||
 | 
			
		||||
    # Import a function from another module entirely
 | 
			
		||||
    # fact = mod.import_function('recurse', 'fact', [i32], [i32])
 | 
			
		||||
 | 
			
		||||
    # Import a global variable (from JS?)
 | 
			
		||||
    # FOO = mod.import_global('util', 'FOO', f64)
 | 
			
		||||
 | 
			
		||||
    # A more complicated function
 | 
			
		||||
    dsquared_func = mod.add_function('dsquared', [f64, f64], [f64])
 | 
			
		||||
    dsquared_func.local.get(0)
 | 
			
		||||
    dsquared_func.local.get(0)
 | 
			
		||||
    dsquared_func.f64.mul()
 | 
			
		||||
    dsquared_func.local.get(1)
 | 
			
		||||
    dsquared_func.local.get(1)
 | 
			
		||||
    dsquared_func.f64.mul()
 | 
			
		||||
    dsquared_func.f64.add()
 | 
			
		||||
    dsquared_func.block_end()
 | 
			
		||||
 | 
			
		||||
    # A function calling another function
 | 
			
		||||
    distance = mod.add_function('distance', [f64, f64], [f64])
 | 
			
		||||
    distance.local.get(0)
 | 
			
		||||
    distance.local.get(1)
 | 
			
		||||
    distance.call(dsquared_func)
 | 
			
		||||
    distance.f64.sqrt()
 | 
			
		||||
    distance.block_end()
 | 
			
		||||
 | 
			
		||||
    # A function calling out to JS
 | 
			
		||||
    # ext = mod.add_function('ext', [f64, f64], [f64])
 | 
			
		||||
    # ext.local.get(0)
 | 
			
		||||
    # ext.call(math_sin)
 | 
			
		||||
    # ext.local.get(1)
 | 
			
		||||
    # ext.call(math_cos)
 | 
			
		||||
    # ext.f64.add()
 | 
			
		||||
    # ext.block_end()
 | 
			
		||||
 | 
			
		||||
    # A function calling across modules
 | 
			
		||||
    # tenf = mod.add_function('tenfact', [i32], [i32])
 | 
			
		||||
    # tenf.local.get(0)
 | 
			
		||||
    # tenf.call(fact)
 | 
			
		||||
    # tenf.i32.const(10)
 | 
			
		||||
    # tenf.i32.mul()
 | 
			
		||||
    # tenf.block_end()
 | 
			
		||||
 | 
			
		||||
    # A function accessing an imported global variable
 | 
			
		||||
    # gf = mod.add_function('gf', [f64], [f64])
 | 
			
		||||
    # gf.global_.get(FOO)
 | 
			
		||||
    # gf.local.get(0)
 | 
			
		||||
    # gf.f64.mul()
 | 
			
		||||
    # gf.block_end()
 | 
			
		||||
 | 
			
		||||
    # Memory
 | 
			
		||||
    mod.add_memory(1)
 | 
			
		||||
    mod.export_memory('memory', 0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Function that returns a byte value 
 | 
			
		||||
    getval = mod.add_function('getval', [i32], [i32])
 | 
			
		||||
    getval.local.get(0)
 | 
			
		||||
    getval.i32.load8_u(0, 0)
 | 
			
		||||
    getval.block_end()
 | 
			
		||||
 | 
			
		||||
    # Function that sets a byte value
 | 
			
		||||
    setval = mod.add_function('setval', [i32,i32], [i32])
 | 
			
		||||
    setval.local.get(0)        # Memory address
 | 
			
		||||
    setval.local.get(1)        # value
 | 
			
		||||
    setval.i32.store8(0,0)
 | 
			
		||||
    setval.i32.const(1)
 | 
			
		||||
    setval.block_end()
 | 
			
		||||
    return mod
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test2():
 | 
			
		||||
    mod = Module()
 | 
			
		||||
 | 
			
		||||
    fact = mod.add_function('fact', [i32], [i32])
 | 
			
		||||
    fact.local.get(0)
 | 
			
		||||
    fact.i32.const(1)
 | 
			
		||||
    fact.i32.lt_s()
 | 
			
		||||
    fact.if_start(i32)
 | 
			
		||||
    fact.i32.const(1)
 | 
			
		||||
    fact.else_start()
 | 
			
		||||
    fact.local.get(0)
 | 
			
		||||
    fact.local.get(0)
 | 
			
		||||
    fact.i32.const(1)
 | 
			
		||||
    fact.i32.sub()
 | 
			
		||||
    fact.call(fact)
 | 
			
		||||
    fact.i32.mul()
 | 
			
		||||
    fact.block_end()
 | 
			
		||||
    fact.block_end()
 | 
			
		||||
 | 
			
		||||
    return mod
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    mod = test1()
 | 
			
		||||
 | 
			
		||||
    mod.write_wasm('test')
 | 
			
		||||
    mod.write_html('test')
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        
 | 
			
		||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							@@ -3,7 +3,7 @@ try:
 | 
			
		||||
except ImportError:
 | 
			
		||||
    from distutils.core import setup
 | 
			
		||||
 | 
			
		||||
tests_require = ['pytest']
 | 
			
		||||
tests_require = ['pytest', 'regex']
 | 
			
		||||
 | 
			
		||||
setup(name = "sly",
 | 
			
		||||
            description="SLY - Sly Lex Yacc",
 | 
			
		||||
@@ -11,7 +11,7 @@ setup(name = "sly",
 | 
			
		||||
SLY is an implementation of lex and yacc for Python 3.
 | 
			
		||||
""",
 | 
			
		||||
            license="""BSD""",
 | 
			
		||||
            version = "0.1",
 | 
			
		||||
            version = "0.4",
 | 
			
		||||
            author = "David Beazley",
 | 
			
		||||
            author_email = "dave@dabeaz.com",
 | 
			
		||||
            maintainer = "David Beazley",
 | 
			
		||||
 
 | 
			
		||||
@@ -2,4 +2,5 @@
 | 
			
		||||
from .lex import *
 | 
			
		||||
from .yacc import *
 | 
			
		||||
 | 
			
		||||
__version__ = "0.4"
 | 
			
		||||
__all__ = [ *lex.__all__, *yacc.__all__ ]
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										25
									
								
								sly/ast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								sly/ast.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,25 @@
 | 
			
		||||
# sly/ast.py
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
class AST(object):
 | 
			
		||||
    
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __init_subclass__(cls, **kwargs):
 | 
			
		||||
        mod = sys.modules[cls.__module__]
 | 
			
		||||
        if not hasattr(cls, '__annotations__'):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        hints = list(cls.__annotations__.items())
 | 
			
		||||
 | 
			
		||||
        def __init__(self, *args, **kwargs):
 | 
			
		||||
            if len(hints) != len(args):
 | 
			
		||||
                raise TypeError(f'Expected {len(hints)} arguments')
 | 
			
		||||
            for arg, (name, val) in zip(args, hints):
 | 
			
		||||
                if isinstance(val, str):
 | 
			
		||||
                    val = getattr(mod, val)
 | 
			
		||||
                if not isinstance(arg, val):
 | 
			
		||||
                    raise TypeError(f'{name} argument must be {val}')
 | 
			
		||||
                setattr(self, name, arg)
 | 
			
		||||
 | 
			
		||||
        cls.__init__ = __init__
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										60
									
								
								sly/docparse.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								sly/docparse.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,60 @@
 | 
			
		||||
# docparse.py
 | 
			
		||||
#
 | 
			
		||||
# Support doc-string parsing classes
 | 
			
		||||
 | 
			
		||||
__all__ = [ 'DocParseMeta' ]
 | 
			
		||||
 | 
			
		||||
class DocParseMeta(type):
 | 
			
		||||
    '''
 | 
			
		||||
    Metaclass that processes the class docstring through a parser and
 | 
			
		||||
    incorporates the result into the resulting class definition. This
 | 
			
		||||
    allows Python classes to be defined with alternative syntax. 
 | 
			
		||||
    To use this class, you first need to define a lexer and parser:
 | 
			
		||||
 | 
			
		||||
        from sly import Lexer, Parser
 | 
			
		||||
        class MyLexer(Lexer):
 | 
			
		||||
           ...
 | 
			
		||||
 | 
			
		||||
        class MyParser(Parser):
 | 
			
		||||
           ...
 | 
			
		||||
 | 
			
		||||
    You then need to define a metaclass that inherits from DocParseMeta.
 | 
			
		||||
    This class must specify the associated lexer and parser classes.
 | 
			
		||||
    For example:
 | 
			
		||||
 | 
			
		||||
        class MyDocParseMeta(DocParseMeta):
 | 
			
		||||
            lexer = MyLexer
 | 
			
		||||
            parser = MyParser
 | 
			
		||||
 | 
			
		||||
    This metaclass is then used as a base for processing user-defined
 | 
			
		||||
    classes:
 | 
			
		||||
 | 
			
		||||
        class Base(metaclass=MyDocParseMeta):
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        class Spam(Base):
 | 
			
		||||
            """
 | 
			
		||||
            doc string is parsed
 | 
			
		||||
            ...
 | 
			
		||||
            """
 | 
			
		||||
 | 
			
		||||
    It is expected that the MyParser() class would return a dictionary. 
 | 
			
		||||
    This dictionary is used to create the final class Spam in this example.
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def __new__(meta, clsname, bases, clsdict):
 | 
			
		||||
        if '__doc__' in clsdict:
 | 
			
		||||
            lexer = meta.lexer()
 | 
			
		||||
            parser = meta.parser()
 | 
			
		||||
            lexer.cls_name = parser.cls_name = clsname
 | 
			
		||||
            lexer.cls_qualname = parser.cls_qualname = clsdict['__qualname__']
 | 
			
		||||
            lexer.cls_module = parser.cls_module = clsdict['__module__']
 | 
			
		||||
            parsedict = parser.parse(lexer.tokenize(clsdict['__doc__']))
 | 
			
		||||
            assert isinstance(parsedict, dict), 'Parser must return a dictionary'
 | 
			
		||||
            clsdict.update(parsedict)
 | 
			
		||||
        return super().__new__(meta, clsname, bases, clsdict)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __init_subclass__(cls):
 | 
			
		||||
        assert hasattr(cls, 'parser') and hasattr(cls, 'lexer')
 | 
			
		||||
							
								
								
									
										289
									
								
								sly/lex.py
									
									
									
									
									
								
							
							
						
						
									
										289
									
								
								sly/lex.py
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
# sly: lex.py
 | 
			
		||||
#
 | 
			
		||||
# Copyright (C) 2016
 | 
			
		||||
# Copyright (C) 2016 - 2018
 | 
			
		||||
# David M. Beazley (Dabeaz LLC)
 | 
			
		||||
# All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
@@ -31,11 +31,10 @@
 | 
			
		||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
__version__    = '0.1'
 | 
			
		||||
__all__ = ['Lexer']
 | 
			
		||||
__all__ = ['Lexer', 'LexerStateChange']
 | 
			
		||||
 | 
			
		||||
import re
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
import copy
 | 
			
		||||
 | 
			
		||||
class LexError(Exception):
 | 
			
		||||
    '''
 | 
			
		||||
@@ -62,35 +61,93 @@ class LexerBuildError(Exception):
 | 
			
		||||
    '''
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
class LexerStateChange(Exception):
 | 
			
		||||
    '''
 | 
			
		||||
    Exception raised to force a lexing state change
 | 
			
		||||
    '''
 | 
			
		||||
    def __init__(self, newstate, tok=None):
 | 
			
		||||
        self.newstate = newstate
 | 
			
		||||
        self.tok = tok
 | 
			
		||||
 | 
			
		||||
class Token(object):
 | 
			
		||||
    '''
 | 
			
		||||
    Representation of a single token.
 | 
			
		||||
    '''
 | 
			
		||||
    __slots__ = ('type', 'value', 'lineno', 'index')
 | 
			
		||||
    def __repr__(self):
 | 
			
		||||
        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}'
 | 
			
		||||
        return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index})'
 | 
			
		||||
 | 
			
		||||
class LexerMetaDict(OrderedDict):
 | 
			
		||||
    '''
 | 
			
		||||
    Special dictionary that prohits duplicate definitions in lexer specifications.
 | 
			
		||||
    '''
 | 
			
		||||
class TokenStr(str):
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def __new__(cls, value, key=None, remap=None):
 | 
			
		||||
        self = super().__new__(cls, value)
 | 
			
		||||
        self.key = key
 | 
			
		||||
        self.remap = remap
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    # Implementation of TOKEN[value] = NEWTOKEN
 | 
			
		||||
    def __setitem__(self, key, value):
 | 
			
		||||
        if self.remap is not None:
 | 
			
		||||
            self.remap[self.key, key] = value
 | 
			
		||||
 | 
			
		||||
    # Implementation of del TOKEN[value]
 | 
			
		||||
    def __delitem__(self, key):
 | 
			
		||||
        if self.remap is not None:
 | 
			
		||||
            self.remap[self.key, key] = self.key
 | 
			
		||||
 | 
			
		||||
class _Before:
 | 
			
		||||
    def __init__(self, tok, pattern):
 | 
			
		||||
        self.tok = tok
 | 
			
		||||
        self.pattern = pattern
 | 
			
		||||
 | 
			
		||||
class LexerMetaDict(dict):
 | 
			
		||||
    '''
 | 
			
		||||
    Special dictionary that prohibits duplicate definitions in lexer specifications.
 | 
			
		||||
    '''
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.before = { }
 | 
			
		||||
        self.delete = [ ]
 | 
			
		||||
        self.remap = { }
 | 
			
		||||
 | 
			
		||||
    def __setitem__(self, key, value):
 | 
			
		||||
        if isinstance(value, str):
 | 
			
		||||
            value = TokenStr(value, key, self.remap)
 | 
			
		||||
            
 | 
			
		||||
        if isinstance(value, _Before):
 | 
			
		||||
            self.before[key] = value.tok
 | 
			
		||||
            value = TokenStr(value.pattern, key, self.remap)
 | 
			
		||||
            
 | 
			
		||||
        if key in self and not isinstance(value, property):
 | 
			
		||||
            if isinstance(self[key], str):
 | 
			
		||||
            prior = self[key]
 | 
			
		||||
            if isinstance(prior, str):
 | 
			
		||||
                if callable(value):
 | 
			
		||||
                    value.pattern = self[key]
 | 
			
		||||
                    value.pattern = prior
 | 
			
		||||
                else:
 | 
			
		||||
                    raise AttributeError(f'Name {key} redefined')
 | 
			
		||||
 | 
			
		||||
        super().__setitem__(key, value)
 | 
			
		||||
 | 
			
		||||
    def __delitem__(self, key):
 | 
			
		||||
        self.delete.append(key)
 | 
			
		||||
        if key not in self and key.isupper():
 | 
			
		||||
            pass
 | 
			
		||||
        else:
 | 
			
		||||
            return super().__delitem__(key)
 | 
			
		||||
 | 
			
		||||
    def __getitem__(self, key):
 | 
			
		||||
        if key not in self and key.split('ignore_')[-1].isupper() and key[:1] != '_':
 | 
			
		||||
            return TokenStr(key, key, self.remap)
 | 
			
		||||
        else:
 | 
			
		||||
            return super().__getitem__(key)
 | 
			
		||||
 | 
			
		||||
class LexerMeta(type):
 | 
			
		||||
    '''
 | 
			
		||||
    Metaclass for collecting lexing rules
 | 
			
		||||
    '''
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __prepare__(meta, *args, **kwargs):
 | 
			
		||||
    def __prepare__(meta, name, bases):
 | 
			
		||||
        d = LexerMetaDict()
 | 
			
		||||
 | 
			
		||||
        def _(pattern, *extra):
 | 
			
		||||
            patterns = [pattern, *extra]
 | 
			
		||||
            def decorate(func):
 | 
			
		||||
@@ -101,13 +158,26 @@ class LexerMeta(type):
 | 
			
		||||
                    func.pattern = pattern
 | 
			
		||||
                return func
 | 
			
		||||
            return decorate
 | 
			
		||||
 | 
			
		||||
        d['_'] = _
 | 
			
		||||
        d['before'] = _Before
 | 
			
		||||
        return d
 | 
			
		||||
 | 
			
		||||
    def __new__(meta, clsname, bases, attributes):
 | 
			
		||||
        del attributes['_']
 | 
			
		||||
        cls = super().__new__(meta, clsname, bases, attributes)
 | 
			
		||||
        cls._build(list(attributes.items()))
 | 
			
		||||
        del attributes['before']
 | 
			
		||||
 | 
			
		||||
        # Create attributes for use in the actual class body
 | 
			
		||||
        cls_attributes = { str(key): str(val) if isinstance(val, TokenStr) else val
 | 
			
		||||
                           for key, val in attributes.items() }
 | 
			
		||||
        cls = super().__new__(meta, clsname, bases, cls_attributes)
 | 
			
		||||
 | 
			
		||||
        # Attach various metadata to the class
 | 
			
		||||
        cls._attributes = dict(attributes)
 | 
			
		||||
        cls._remap = attributes.remap
 | 
			
		||||
        cls._before = attributes.before
 | 
			
		||||
        cls._delete = attributes.delete
 | 
			
		||||
        cls._build()
 | 
			
		||||
        return cls
 | 
			
		||||
 | 
			
		||||
class Lexer(metaclass=LexerMeta):
 | 
			
		||||
@@ -116,27 +186,81 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
    literals = set()
 | 
			
		||||
    ignore = ''
 | 
			
		||||
    reflags = 0
 | 
			
		||||
    regex_module = re
 | 
			
		||||
 | 
			
		||||
    # These attributes are constructed automatically by the associated metaclass
 | 
			
		||||
    _master_re = None
 | 
			
		||||
    _token_names = set()
 | 
			
		||||
    _literals = set()
 | 
			
		||||
    _token_funcs = { }
 | 
			
		||||
    _token_funcs = {}
 | 
			
		||||
    _ignored_tokens = set()
 | 
			
		||||
    _remapping = {}
 | 
			
		||||
    _delete = {}
 | 
			
		||||
    _remap = {}
 | 
			
		||||
 | 
			
		||||
    # Internal attributes
 | 
			
		||||
    __state_stack = None
 | 
			
		||||
    __set_state = None
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _collect_rules(cls, definitions):
 | 
			
		||||
        '''
 | 
			
		||||
        Collect all of the rules from class definitions that look like tokens
 | 
			
		||||
        '''
 | 
			
		||||
    def _collect_rules(cls):
 | 
			
		||||
        # Collect all of the rules from class definitions that look like token
 | 
			
		||||
        # information.   There are a few things that govern this:
 | 
			
		||||
        #
 | 
			
		||||
        # 1.  Any definition of the form NAME = str is a token if NAME is
 | 
			
		||||
        #     is defined in the tokens set.
 | 
			
		||||
        #
 | 
			
		||||
        # 2.  Any definition of the form ignore_NAME = str is a rule for an ignored
 | 
			
		||||
        #     token.
 | 
			
		||||
        #
 | 
			
		||||
        # 3.  Any function defined with a 'pattern' attribute is treated as a rule.
 | 
			
		||||
        #     Such functions can be created with the @_ decorator or by defining
 | 
			
		||||
        #     function with the same name as a previously defined string.
 | 
			
		||||
        #
 | 
			
		||||
        # This function is responsible for keeping rules in order. 
 | 
			
		||||
 | 
			
		||||
        # Collect all previous rules from base classes
 | 
			
		||||
        rules = []
 | 
			
		||||
        for key, value in definitions:
 | 
			
		||||
            if (key in cls.tokens) or key.startswith('ignore_') or hasattr(value, 'pattern'):
 | 
			
		||||
                rules.append((key, value))
 | 
			
		||||
        return rules
 | 
			
		||||
 | 
			
		||||
        for base in cls.__bases__:
 | 
			
		||||
            if isinstance(base, LexerMeta):
 | 
			
		||||
                rules.extend(base._rules)
 | 
			
		||||
                
 | 
			
		||||
        # Dictionary of previous rules
 | 
			
		||||
        existing = dict(rules)
 | 
			
		||||
 | 
			
		||||
        for key, value in cls._attributes.items():
 | 
			
		||||
            if (key in cls._token_names) or key.startswith('ignore_') or hasattr(value, 'pattern'):
 | 
			
		||||
                if callable(value) and not hasattr(value, 'pattern'):
 | 
			
		||||
                    raise LexerBuildError(f"function {value} doesn't have a regex pattern")
 | 
			
		||||
                
 | 
			
		||||
                if key in existing:
 | 
			
		||||
                    # The definition matches something that already existed in the base class.
 | 
			
		||||
                    # We replace it, but keep the original ordering
 | 
			
		||||
                    n = rules.index((key, existing[key]))
 | 
			
		||||
                    rules[n] = (key, value)
 | 
			
		||||
                    existing[key] = value
 | 
			
		||||
 | 
			
		||||
                elif isinstance(value, TokenStr) and key in cls._before:
 | 
			
		||||
                    before = cls._before[key]
 | 
			
		||||
                    if before in existing:
 | 
			
		||||
                        # Position the token before another specified token
 | 
			
		||||
                        n = rules.index((before, existing[before]))
 | 
			
		||||
                        rules.insert(n, (key, value))
 | 
			
		||||
                    else:
 | 
			
		||||
                        # Put at the end of the rule list
 | 
			
		||||
                        rules.append((key, value))
 | 
			
		||||
                    existing[key] = value
 | 
			
		||||
                else:
 | 
			
		||||
                    rules.append((key, value))
 | 
			
		||||
                    existing[key] = value
 | 
			
		||||
 | 
			
		||||
            elif isinstance(value, str) and not key.startswith('_') and key not in {'ignore', 'literals'}:
 | 
			
		||||
                raise LexerBuildError(f'{key} does not match a name in tokens')
 | 
			
		||||
 | 
			
		||||
        # Apply deletion rules
 | 
			
		||||
        rules = [ (key, value) for key, value in rules if key not in cls._delete ]
 | 
			
		||||
        cls._rules = rules
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _build(cls, definitions):
 | 
			
		||||
    def _build(cls):
 | 
			
		||||
        '''
 | 
			
		||||
        Build the lexer object from the collected tokens and regular expressions.
 | 
			
		||||
        Validate the rules to make sure they look sane.
 | 
			
		||||
@@ -144,13 +268,30 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
        if 'tokens' not in vars(cls):
 | 
			
		||||
            raise LexerBuildError(f'{cls.__qualname__} class does not define a tokens attribute')
 | 
			
		||||
 | 
			
		||||
        # Pull definitions created for any parent classes
 | 
			
		||||
        cls._token_names = cls._token_names | set(cls.tokens)
 | 
			
		||||
        cls._literals = cls._literals | set(cls.literals)
 | 
			
		||||
        cls._ignored_tokens = set(cls._ignored_tokens)
 | 
			
		||||
        cls._token_funcs = dict(cls._token_funcs)
 | 
			
		||||
        cls._remapping = dict(cls._remapping)
 | 
			
		||||
 | 
			
		||||
        for (key, val), newtok in cls._remap.items():
 | 
			
		||||
            if key not in cls._remapping:
 | 
			
		||||
                cls._remapping[key] = {}
 | 
			
		||||
            cls._remapping[key][val] = newtok
 | 
			
		||||
 | 
			
		||||
        remapped_toks = set()
 | 
			
		||||
        for d in cls._remapping.values():
 | 
			
		||||
            remapped_toks.update(d.values())
 | 
			
		||||
            
 | 
			
		||||
        undefined = remapped_toks - set(cls._token_names)
 | 
			
		||||
        if undefined:
 | 
			
		||||
            missing = ', '.join(undefined)
 | 
			
		||||
            raise LexerBuildError(f'{missing} not included in token(s)')
 | 
			
		||||
 | 
			
		||||
        cls._collect_rules()
 | 
			
		||||
 | 
			
		||||
        parts = []
 | 
			
		||||
        for tokname, value in cls._collect_rules(definitions):
 | 
			
		||||
        for tokname, value in cls._rules:
 | 
			
		||||
            if tokname.startswith('ignore_'):
 | 
			
		||||
                tokname = tokname[7:]
 | 
			
		||||
                cls._ignored_tokens.add(tokname)
 | 
			
		||||
@@ -159,15 +300,15 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
                pattern = value
 | 
			
		||||
 | 
			
		||||
            elif callable(value):
 | 
			
		||||
                pattern = value.pattern
 | 
			
		||||
                cls._token_funcs[tokname] = value
 | 
			
		||||
                pattern = getattr(value, 'pattern')
 | 
			
		||||
 | 
			
		||||
            # Form the regular expression component
 | 
			
		||||
            part = f'(?P<{tokname}>{pattern})'
 | 
			
		||||
 | 
			
		||||
            # Make sure the individual regex compiles properly
 | 
			
		||||
            try:
 | 
			
		||||
                cpat = re.compile(part, cls.reflags)
 | 
			
		||||
                cpat = cls.regex_module.compile(part, cls.reflags)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                raise PatternError(f'Invalid regex for token {tokname}') from e
 | 
			
		||||
 | 
			
		||||
@@ -181,8 +322,9 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Form the master regular expression
 | 
			
		||||
        previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
 | 
			
		||||
        cls._master_re = re.compile('|'.join(parts) + previous, cls.reflags)
 | 
			
		||||
        #previous = ('|' + cls._master_re.pattern) if cls._master_re else ''
 | 
			
		||||
        # cls._master_re = cls.regex_module.compile('|'.join(parts) + previous, cls.reflags)
 | 
			
		||||
        cls._master_re = cls.regex_module.compile('|'.join(parts), cls.reflags)
 | 
			
		||||
 | 
			
		||||
        # Verify that that ignore and literals specifiers match the input type
 | 
			
		||||
        if not isinstance(cls.ignore, str):
 | 
			
		||||
@@ -191,14 +333,64 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
        if not all(isinstance(lit, str) for lit in cls.literals):
 | 
			
		||||
            raise LexerBuildError('literals must be specified as strings')
 | 
			
		||||
 | 
			
		||||
    def tokenize(self, text, lineno=1, index=0):
 | 
			
		||||
        # Local copies of frequently used values
 | 
			
		||||
        _ignored_tokens = self._ignored_tokens
 | 
			
		||||
        _master_re = self._master_re
 | 
			
		||||
        _ignore = self.ignore
 | 
			
		||||
        _token_funcs = self._token_funcs
 | 
			
		||||
        _literals = self._literals
 | 
			
		||||
    def begin(self, cls):
 | 
			
		||||
        '''
 | 
			
		||||
        Begin a new lexer state
 | 
			
		||||
        '''
 | 
			
		||||
        assert isinstance(cls, LexerMeta), "state must be a subclass of Lexer"
 | 
			
		||||
        if self.__set_state:
 | 
			
		||||
            self.__set_state(cls)
 | 
			
		||||
        self.__class__ = cls
 | 
			
		||||
 | 
			
		||||
    def push_state(self, cls):
 | 
			
		||||
        '''
 | 
			
		||||
        Push a new lexer state onto the stack
 | 
			
		||||
        '''
 | 
			
		||||
        if self.__state_stack is None:
 | 
			
		||||
            self.__state_stack = []
 | 
			
		||||
        self.__state_stack.append(type(self))
 | 
			
		||||
        self.begin(cls)
 | 
			
		||||
 | 
			
		||||
    def pop_state(self):
 | 
			
		||||
        '''
 | 
			
		||||
        Pop a lexer state from the stack
 | 
			
		||||
        '''
 | 
			
		||||
        self.begin(self.__state_stack.pop())
 | 
			
		||||
 | 
			
		||||
    def tokenize(self, text, lineno=1, index=0):
 | 
			
		||||
        _ignored_tokens = _master_re = _ignore = _token_funcs = _literals = _remapping = None
 | 
			
		||||
 | 
			
		||||
        # --- Support for state changes
 | 
			
		||||
        def _set_state(cls):
 | 
			
		||||
            nonlocal _ignored_tokens, _master_re, _ignore, _token_funcs, _literals, _remapping
 | 
			
		||||
            _ignored_tokens = cls._ignored_tokens
 | 
			
		||||
            _master_re = cls._master_re
 | 
			
		||||
            _ignore = cls.ignore
 | 
			
		||||
            _token_funcs = cls._token_funcs
 | 
			
		||||
            _literals = cls.literals
 | 
			
		||||
            _remapping = cls._remapping
 | 
			
		||||
 | 
			
		||||
        self.__set_state = _set_state
 | 
			
		||||
        _set_state(type(self))
 | 
			
		||||
 | 
			
		||||
        # --- Support for backtracking
 | 
			
		||||
        _mark_stack = []
 | 
			
		||||
        def _mark():
 | 
			
		||||
            _mark_stack.append((type(self), index, lineno))
 | 
			
		||||
        self.mark = _mark
 | 
			
		||||
 | 
			
		||||
        def _accept():
 | 
			
		||||
            _mark_stack.pop()
 | 
			
		||||
        self.accept = _accept
 | 
			
		||||
 | 
			
		||||
        def _reject():
 | 
			
		||||
            nonlocal index, lineno
 | 
			
		||||
            cls, index, lineno = _mark_stack[-1]
 | 
			
		||||
            _set_state(cls)
 | 
			
		||||
        self.reject = _reject
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # --- Main tokenization function
 | 
			
		||||
        self.text = text
 | 
			
		||||
        try:
 | 
			
		||||
            while True:
 | 
			
		||||
@@ -207,7 +399,7 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
                        index += 1
 | 
			
		||||
                        continue
 | 
			
		||||
                except IndexError:
 | 
			
		||||
                    break
 | 
			
		||||
                    return
 | 
			
		||||
 | 
			
		||||
                tok = Token()
 | 
			
		||||
                tok.lineno = lineno
 | 
			
		||||
@@ -217,6 +409,10 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
                    index = m.end()
 | 
			
		||||
                    tok.value = m.group()
 | 
			
		||||
                    tok.type = m.lastgroup
 | 
			
		||||
 | 
			
		||||
                    if tok.type in _remapping:
 | 
			
		||||
                        tok.type = _remapping[tok.type].get(tok.value, tok.type)
 | 
			
		||||
 | 
			
		||||
                    if tok.type in _token_funcs:
 | 
			
		||||
                        self.index = index
 | 
			
		||||
                        self.lineno = lineno
 | 
			
		||||
@@ -242,7 +438,12 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
                        # A lexing error
 | 
			
		||||
                        self.index = index
 | 
			
		||||
                        self.lineno = lineno
 | 
			
		||||
                        self.error(text[index:])
 | 
			
		||||
                        tok.type = 'ERROR'
 | 
			
		||||
                        tok.value = text[index:]
 | 
			
		||||
                        tok = self.error(tok)
 | 
			
		||||
                        if tok is not None:
 | 
			
		||||
                            yield tok
 | 
			
		||||
 | 
			
		||||
                        index = self.index
 | 
			
		||||
                        lineno = self.lineno
 | 
			
		||||
 | 
			
		||||
@@ -253,5 +454,5 @@ class Lexer(metaclass=LexerMeta):
 | 
			
		||||
            self.lineno = lineno
 | 
			
		||||
 | 
			
		||||
    # Default implementations of the error handler. May be changed in subclasses
 | 
			
		||||
    def error(self, value):
 | 
			
		||||
        raise LexError(f'Illegal character {value[0]!r} at index {self.index}', value, self.index)
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        raise LexError(f'Illegal character {t.value[0]!r} at index {self.index}', t.value, self.index)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										353
									
								
								sly/yacc.py
									
									
									
									
									
								
							
							
						
						
									
										353
									
								
								sly/yacc.py
									
									
									
									
									
								
							@@ -1,7 +1,7 @@
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
# sly: yacc.py
 | 
			
		||||
#
 | 
			
		||||
# Copyright (C) 2016-2017
 | 
			
		||||
# Copyright (C) 2016-2018
 | 
			
		||||
# David M. Beazley (Dabeaz LLC)
 | 
			
		||||
# All rights reserved.
 | 
			
		||||
#
 | 
			
		||||
@@ -33,9 +33,8 @@
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
import inspect
 | 
			
		||||
from collections import OrderedDict, defaultdict
 | 
			
		||||
from collections import OrderedDict, defaultdict, Counter
 | 
			
		||||
 | 
			
		||||
__version__    = '0.1'
 | 
			
		||||
__all__        = [ 'Parser' ]
 | 
			
		||||
 | 
			
		||||
class YaccError(Exception):
 | 
			
		||||
@@ -55,12 +54,12 @@ ERROR_COUNT = 3                # Number of symbols that must be shifted to leave
 | 
			
		||||
MAXINT = sys.maxsize
 | 
			
		||||
 | 
			
		||||
# This object is a stand-in for a logging object created by the
 | 
			
		||||
# logging module.   PLY will use this by default to create things
 | 
			
		||||
# logging module.   SLY will use this by default to create things
 | 
			
		||||
# such as the parser.out file.  If a user wants more detailed
 | 
			
		||||
# information, they can create their own logging object and pass
 | 
			
		||||
# it into PLY.
 | 
			
		||||
# it into SLY.
 | 
			
		||||
 | 
			
		||||
class PlyLogger(object):
 | 
			
		||||
class SlyLogger(object):
 | 
			
		||||
    def __init__(self, f):
 | 
			
		||||
        self.f = f
 | 
			
		||||
 | 
			
		||||
@@ -103,6 +102,7 @@ class YaccSymbol:
 | 
			
		||||
# ----------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
class YaccProduction:
 | 
			
		||||
    __slots__ = ('_slice', '_namemap', '_stack')
 | 
			
		||||
    def __init__(self, s, stack=None):
 | 
			
		||||
        self._slice = s
 | 
			
		||||
        self._namemap = { }
 | 
			
		||||
@@ -139,18 +139,22 @@ class YaccProduction:
 | 
			
		||||
            if isinstance(tok, YaccSymbol):
 | 
			
		||||
                continue
 | 
			
		||||
            index = getattr(tok, 'index', None)
 | 
			
		||||
            if index:
 | 
			
		||||
            if index is not None:
 | 
			
		||||
                return index
 | 
			
		||||
        raise AttributeError('No index attribute found')
 | 
			
		||||
 | 
			
		||||
    def __getattr__(self, name):
 | 
			
		||||
        return self._slice[self._namemap[name]].value
 | 
			
		||||
        if name in self._namemap:
 | 
			
		||||
            return self._namemap[name](self._slice)
 | 
			
		||||
        else:
 | 
			
		||||
            nameset = '{' + ', '.join(self._namemap) + '}'
 | 
			
		||||
            raise AttributeError(f'No symbol {name}. Must be one of {nameset}.')
 | 
			
		||||
 | 
			
		||||
    def __setattr__(self, name, value):
 | 
			
		||||
        if name[0:1] == '_' or name not in self._namemap:
 | 
			
		||||
        if name[:1] == '_':
 | 
			
		||||
            super().__setattr__(name, value)
 | 
			
		||||
        else:
 | 
			
		||||
            self._slice[self._namemap[name]].value = value
 | 
			
		||||
            raise AttributeError(f"Can't reassign the value of attribute {name!r}")
 | 
			
		||||
 | 
			
		||||
# -----------------------------------------------------------------------------
 | 
			
		||||
#                          === Grammar Representation ===
 | 
			
		||||
@@ -205,16 +209,36 @@ class Production(object):
 | 
			
		||||
            if s not in self.usyms:
 | 
			
		||||
                self.usyms.append(s)
 | 
			
		||||
 | 
			
		||||
        # Create a dict mapping symbol names to indices
 | 
			
		||||
        m = {}
 | 
			
		||||
        for key, indices in symmap.items():
 | 
			
		||||
            if len(indices) == 1:
 | 
			
		||||
                m[key] = indices[0]
 | 
			
		||||
            else:
 | 
			
		||||
                for n, index in enumerate(indices):
 | 
			
		||||
                    m[key+str(n)] = index
 | 
			
		||||
        # Create a name mapping
 | 
			
		||||
        # First determine (in advance) if there are duplicate names
 | 
			
		||||
        namecount = defaultdict(int)
 | 
			
		||||
        for key in self.prod:
 | 
			
		||||
            namecount[key] += 1
 | 
			
		||||
            if key in _name_aliases:
 | 
			
		||||
                for key in _name_aliases[key]:
 | 
			
		||||
                    namecount[key] += 1
 | 
			
		||||
 | 
			
		||||
        self.namemap = m
 | 
			
		||||
        # Now, walk through the names and generate accessor functions
 | 
			
		||||
        nameuse = defaultdict(int)
 | 
			
		||||
        namemap = { }
 | 
			
		||||
        for index, key in enumerate(self.prod):
 | 
			
		||||
            if namecount[key] > 1:
 | 
			
		||||
                k = f'{key}{nameuse[key]}'
 | 
			
		||||
                nameuse[key] += 1
 | 
			
		||||
            else:
 | 
			
		||||
                k = key
 | 
			
		||||
            namemap[k] = lambda s,i=index: s[i].value
 | 
			
		||||
            if key in _name_aliases:
 | 
			
		||||
                for n, alias in enumerate(_name_aliases[key]):
 | 
			
		||||
                    if namecount[alias] > 1:
 | 
			
		||||
                        k = f'{alias}{nameuse[alias]}'
 | 
			
		||||
                        nameuse[alias] += 1
 | 
			
		||||
                    else:
 | 
			
		||||
                        k = alias
 | 
			
		||||
                    # The value is either a list (for repetition) or a tuple for optional 
 | 
			
		||||
                    namemap[k] = lambda s,i=index,n=n: ([x[n] for x in s[i].value]) if isinstance(s[i].value, list) else s[i].value[n]
 | 
			
		||||
 | 
			
		||||
        self.namemap = namemap
 | 
			
		||||
                
 | 
			
		||||
        # List of all LR items for the production
 | 
			
		||||
        self.lr_items = []
 | 
			
		||||
@@ -386,7 +410,7 @@ class Grammar(object):
 | 
			
		||||
        if term in self.Precedence:
 | 
			
		||||
            raise GrammarError(f'Precedence already specified for terminal {term!r}')
 | 
			
		||||
        if assoc not in ['left', 'right', 'nonassoc']:
 | 
			
		||||
            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
 | 
			
		||||
            raise GrammarError(f"Associativity of {term!r} must be one of 'left','right', or 'nonassoc'")
 | 
			
		||||
        self.Precedence[term] = (assoc, level)
 | 
			
		||||
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
@@ -482,6 +506,9 @@ class Grammar(object):
 | 
			
		||||
    # -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
    def set_start(self, start=None):
 | 
			
		||||
        if callable(start):
 | 
			
		||||
            start = start.__name__
 | 
			
		||||
 | 
			
		||||
        if not start:
 | 
			
		||||
            start = self.Productions[1].name
 | 
			
		||||
 | 
			
		||||
@@ -1410,7 +1437,7 @@ class LRTable(object):
 | 
			
		||||
                                            if not rlevel:
 | 
			
		||||
                                                descrip.append(f'  ! shift/reduce conflict for {a} resolved as shift')
 | 
			
		||||
                                                self.sr_conflicts.append((st, a, 'shift'))
 | 
			
		||||
                                    elif r < 0:
 | 
			
		||||
                                    elif r <= 0:
 | 
			
		||||
                                        # Reduce/reduce conflict.   In this case, we favor the rule
 | 
			
		||||
                                        # that was defined first in the grammar file
 | 
			
		||||
                                        oldp = Productions[-r]
 | 
			
		||||
@@ -1447,7 +1474,7 @@ class LRTable(object):
 | 
			
		||||
                                    if r > 0:
 | 
			
		||||
                                        if r != j:
 | 
			
		||||
                                            raise LALRError(f'Shift/shift conflict in state {st}')
 | 
			
		||||
                                    elif r < 0:
 | 
			
		||||
                                    elif r <= 0:
 | 
			
		||||
                                        # Do a precedence check.
 | 
			
		||||
                                        #   -  if precedence of reduce rule is higher, we reduce.
 | 
			
		||||
                                        #   -  if precedence of reduce is same and left assoc, we reduce.
 | 
			
		||||
@@ -1544,34 +1571,248 @@ def _collect_grammar_rules(func):
 | 
			
		||||
        lineno = unwrapped.__code__.co_firstlineno
 | 
			
		||||
        for rule, lineno in zip(func.rules, range(lineno+len(func.rules)-1, 0, -1)):
 | 
			
		||||
            syms = rule.split()
 | 
			
		||||
            ebnf_prod = []
 | 
			
		||||
            while ('{' in syms) or ('[' in syms):
 | 
			
		||||
                for s in syms:
 | 
			
		||||
                    if s == '[':
 | 
			
		||||
                        syms, prod = _replace_ebnf_optional(syms)
 | 
			
		||||
                        ebnf_prod.extend(prod)
 | 
			
		||||
                        break
 | 
			
		||||
                    elif s == '{':
 | 
			
		||||
                        syms, prod = _replace_ebnf_repeat(syms)
 | 
			
		||||
                        ebnf_prod.extend(prod)
 | 
			
		||||
                        break
 | 
			
		||||
                    elif '|' in s:
 | 
			
		||||
                        syms, prod = _replace_ebnf_choice(syms)
 | 
			
		||||
                        ebnf_prod.extend(prod)
 | 
			
		||||
                        break
 | 
			
		||||
 | 
			
		||||
            if syms[1:2] == [':'] or syms[1:2] == ['::=']:
 | 
			
		||||
                grammar.append((func, filename, lineno, syms[0], syms[2:]))
 | 
			
		||||
            else:
 | 
			
		||||
                grammar.append((func, filename, lineno, prodname, syms))
 | 
			
		||||
            grammar.extend(ebnf_prod)
 | 
			
		||||
            
 | 
			
		||||
        func = getattr(func, 'next_func', None)
 | 
			
		||||
 | 
			
		||||
    return grammar
 | 
			
		||||
 | 
			
		||||
class ParserMetaDict(OrderedDict):
 | 
			
		||||
# Replace EBNF repetition
 | 
			
		||||
def _replace_ebnf_repeat(syms):
 | 
			
		||||
    syms = list(syms)
 | 
			
		||||
    first = syms.index('{')
 | 
			
		||||
    end = syms.index('}', first)
 | 
			
		||||
 | 
			
		||||
    # Look for choices inside
 | 
			
		||||
    repeated_syms = syms[first+1:end]
 | 
			
		||||
    if any('|' in sym for sym in repeated_syms):
 | 
			
		||||
        repeated_syms, prods = _replace_ebnf_choice(repeated_syms)
 | 
			
		||||
    else:
 | 
			
		||||
        prods = []
 | 
			
		||||
 | 
			
		||||
    symname, moreprods = _generate_repeat_rules(repeated_syms)
 | 
			
		||||
    syms[first:end+1] = [symname]
 | 
			
		||||
    return syms, prods + moreprods
 | 
			
		||||
 | 
			
		||||
def _replace_ebnf_optional(syms):
 | 
			
		||||
    syms = list(syms)
 | 
			
		||||
    first = syms.index('[')
 | 
			
		||||
    end = syms.index(']', first)
 | 
			
		||||
    symname, prods = _generate_optional_rules(syms[first+1:end])
 | 
			
		||||
    syms[first:end+1] = [symname]
 | 
			
		||||
    return syms, prods
 | 
			
		||||
 | 
			
		||||
def _replace_ebnf_choice(syms):
 | 
			
		||||
    syms = list(syms)
 | 
			
		||||
    newprods = [ ]
 | 
			
		||||
    n = 0
 | 
			
		||||
    while n < len(syms):
 | 
			
		||||
        if '|' in syms[n]:
 | 
			
		||||
            symname, prods = _generate_choice_rules(syms[n].split('|'))
 | 
			
		||||
            syms[n] = symname
 | 
			
		||||
            newprods.extend(prods)
 | 
			
		||||
        n += 1
 | 
			
		||||
    return syms, newprods
 | 
			
		||||
    
 | 
			
		||||
# Generate grammar rules for repeated items
 | 
			
		||||
_gencount = 0
 | 
			
		||||
 | 
			
		||||
# Dictionary mapping name aliases generated by EBNF rules.
 | 
			
		||||
 | 
			
		||||
_name_aliases = { }
 | 
			
		||||
 | 
			
		||||
def _sanitize_symbols(symbols):
 | 
			
		||||
    for sym in symbols:
 | 
			
		||||
        if sym.startswith("'"):
 | 
			
		||||
            yield str(hex(ord(sym[1])))
 | 
			
		||||
        elif sym.isidentifier():
 | 
			
		||||
            yield sym
 | 
			
		||||
        else:
 | 
			
		||||
            yield sym.encode('utf-8').hex()
 | 
			
		||||
            
 | 
			
		||||
def _generate_repeat_rules(symbols):
 | 
			
		||||
    '''
 | 
			
		||||
    Symbols is a list of grammar symbols [ symbols ]. This
 | 
			
		||||
    generates code corresponding to these grammar construction:
 | 
			
		||||
  
 | 
			
		||||
       @('repeat : many')
 | 
			
		||||
       def repeat(self, p):
 | 
			
		||||
           return p.many
 | 
			
		||||
 | 
			
		||||
       @('repeat :')
 | 
			
		||||
       def repeat(self, p):
 | 
			
		||||
           return []
 | 
			
		||||
 | 
			
		||||
       @('many : many symbols')
 | 
			
		||||
       def many(self, p):
 | 
			
		||||
           p.many.append(symbols)
 | 
			
		||||
           return p.many
 | 
			
		||||
 | 
			
		||||
       @('many : symbols')
 | 
			
		||||
       def many(self, p):
 | 
			
		||||
           return [ p.symbols ]
 | 
			
		||||
    '''
 | 
			
		||||
    global _gencount
 | 
			
		||||
    _gencount += 1
 | 
			
		||||
    basename = f'_{_gencount}_' + '_'.join(_sanitize_symbols(symbols))
 | 
			
		||||
    name = f'{basename}_repeat'
 | 
			
		||||
    oname = f'{basename}_items'
 | 
			
		||||
    iname = f'{basename}_item'
 | 
			
		||||
    symtext = ' '.join(symbols)
 | 
			
		||||
 | 
			
		||||
    _name_aliases[name] = symbols
 | 
			
		||||
 | 
			
		||||
    productions = [ ]
 | 
			
		||||
    _ = _decorator
 | 
			
		||||
 | 
			
		||||
    @_(f'{name} : {oname}')
 | 
			
		||||
    def repeat(self, p):
 | 
			
		||||
        return getattr(p, oname)
 | 
			
		||||
 | 
			
		||||
    @_(f'{name} : ')
 | 
			
		||||
    def repeat2(self, p):
 | 
			
		||||
        return []
 | 
			
		||||
    productions.extend(_collect_grammar_rules(repeat))
 | 
			
		||||
    productions.extend(_collect_grammar_rules(repeat2))
 | 
			
		||||
 | 
			
		||||
    @_(f'{oname} : {oname} {iname}')
 | 
			
		||||
    def many(self, p):
 | 
			
		||||
        items = getattr(p, oname)
 | 
			
		||||
        items.append(getattr(p, iname))
 | 
			
		||||
        return items
 | 
			
		||||
 | 
			
		||||
    @_(f'{oname} : {iname}')
 | 
			
		||||
    def many2(self, p):
 | 
			
		||||
        return [ getattr(p, iname) ]
 | 
			
		||||
 | 
			
		||||
    productions.extend(_collect_grammar_rules(many))
 | 
			
		||||
    productions.extend(_collect_grammar_rules(many2))
 | 
			
		||||
 | 
			
		||||
    @_(f'{iname} : {symtext}')
 | 
			
		||||
    def item(self, p):
 | 
			
		||||
        return tuple(p)
 | 
			
		||||
 | 
			
		||||
    productions.extend(_collect_grammar_rules(item))
 | 
			
		||||
    return name, productions
 | 
			
		||||
 | 
			
		||||
def _generate_optional_rules(symbols):
 | 
			
		||||
    '''
 | 
			
		||||
    Symbols is a list of grammar symbols [ symbols ]. This
 | 
			
		||||
    generates code corresponding to these grammar construction:
 | 
			
		||||
  
 | 
			
		||||
       @('optional : symbols')
 | 
			
		||||
       def optional(self, p):
 | 
			
		||||
           return p.symbols
 | 
			
		||||
 | 
			
		||||
       @('optional :')
 | 
			
		||||
       def optional(self, p):
 | 
			
		||||
           return None
 | 
			
		||||
    '''
 | 
			
		||||
    global _gencount
 | 
			
		||||
    _gencount += 1
 | 
			
		||||
    basename = f'_{_gencount}_' + '_'.join(_sanitize_symbols(symbols))
 | 
			
		||||
    name = f'{basename}_optional'
 | 
			
		||||
    symtext = ' '.join(symbols)
 | 
			
		||||
    
 | 
			
		||||
    _name_aliases[name] = symbols
 | 
			
		||||
 | 
			
		||||
    productions = [ ]
 | 
			
		||||
    _ = _decorator
 | 
			
		||||
 | 
			
		||||
    no_values = (None,) * len(symbols)
 | 
			
		||||
 | 
			
		||||
    @_(f'{name} : {symtext}')
 | 
			
		||||
    def optional(self, p):
 | 
			
		||||
        return tuple(p)
 | 
			
		||||
 | 
			
		||||
    @_(f'{name} : ')
 | 
			
		||||
    def optional2(self, p):
 | 
			
		||||
        return no_values
 | 
			
		||||
 | 
			
		||||
    productions.extend(_collect_grammar_rules(optional))
 | 
			
		||||
    productions.extend(_collect_grammar_rules(optional2))
 | 
			
		||||
    return name, productions
 | 
			
		||||
 | 
			
		||||
def _generate_choice_rules(symbols):
 | 
			
		||||
    '''
 | 
			
		||||
    Symbols is a list of grammar symbols such as [ 'PLUS', 'MINUS' ].
 | 
			
		||||
    This generates code corresponding to the following construction:
 | 
			
		||||
    
 | 
			
		||||
    @('PLUS', 'MINUS')
 | 
			
		||||
    def choice(self, p):
 | 
			
		||||
        return p[0]
 | 
			
		||||
    '''
 | 
			
		||||
    global _gencount
 | 
			
		||||
    _gencount += 1
 | 
			
		||||
    basename = f'_{_gencount}_' + '_'.join(_sanitize_symbols(symbols))
 | 
			
		||||
    name = f'{basename}_choice'
 | 
			
		||||
 | 
			
		||||
    _ = _decorator
 | 
			
		||||
    productions = [ ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def choice(self, p):
 | 
			
		||||
        return p[0]
 | 
			
		||||
    choice.__name__ = name
 | 
			
		||||
    choice = _(*symbols)(choice)
 | 
			
		||||
    productions.extend(_collect_grammar_rules(choice))
 | 
			
		||||
    return name, productions
 | 
			
		||||
    
 | 
			
		||||
class ParserMetaDict(dict):
 | 
			
		||||
    '''
 | 
			
		||||
    Dictionary that allows decorated grammar rule functions to be overloaded
 | 
			
		||||
    '''
 | 
			
		||||
    def __setitem__(self, key, value):
 | 
			
		||||
        if key in self and callable(value) and hasattr(value, 'rules'):
 | 
			
		||||
            value.next_func = self[key]
 | 
			
		||||
            if not hasattr(value.next_func, 'rules'):
 | 
			
		||||
                raise GrammarError(f'Redefinition of {key}. Perhaps an earlier {key} is missing @_')
 | 
			
		||||
        super().__setitem__(key, value)
 | 
			
		||||
    
 | 
			
		||||
    def __getitem__(self, key):
 | 
			
		||||
        if key not in self and key.isupper() and key[:1] != '_':
 | 
			
		||||
            return key.upper()
 | 
			
		||||
        else:
 | 
			
		||||
            return super().__getitem__(key)
 | 
			
		||||
 | 
			
		||||
def _decorator(rule, *extra):
 | 
			
		||||
     rules = [rule, *extra]
 | 
			
		||||
     def decorate(func):
 | 
			
		||||
         func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
 | 
			
		||||
         return func
 | 
			
		||||
     return decorate
 | 
			
		||||
 | 
			
		||||
class ParserMeta(type):
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __prepare__(meta, *args, **kwargs):
 | 
			
		||||
        d = ParserMetaDict()
 | 
			
		||||
        def _(rule, *extra):
 | 
			
		||||
            rules = [rule, *extra]
 | 
			
		||||
            def decorate(func):
 | 
			
		||||
                func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
 | 
			
		||||
                return func
 | 
			
		||||
            return decorate
 | 
			
		||||
        d['_'] = _
 | 
			
		||||
#        def _(rule, *extra):
 | 
			
		||||
#            rules = [rule, *extra]
 | 
			
		||||
#            def decorate(func):
 | 
			
		||||
#                func.rules = [ *getattr(func, 'rules', []), *rules[::-1] ]
 | 
			
		||||
#                return func
 | 
			
		||||
#            return decorate
 | 
			
		||||
        d['_'] = _decorator
 | 
			
		||||
        return d
 | 
			
		||||
 | 
			
		||||
    def __new__(meta, clsname, bases, attributes):
 | 
			
		||||
@@ -1582,7 +1823,7 @@ class ParserMeta(type):
 | 
			
		||||
 | 
			
		||||
class Parser(metaclass=ParserMeta):
 | 
			
		||||
    # Logging object where debugging/diagnostic messages are sent
 | 
			
		||||
    log = PlyLogger(sys.stderr)     
 | 
			
		||||
    log = SlyLogger(sys.stderr)     
 | 
			
		||||
 | 
			
		||||
    # Debugging filename where parsetab.out data can be written
 | 
			
		||||
    debugfile = None
 | 
			
		||||
@@ -1650,11 +1891,10 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
        Build the grammar from the grammar rules
 | 
			
		||||
        '''
 | 
			
		||||
        grammar_rules = []
 | 
			
		||||
        fail = False
 | 
			
		||||
        errors = ''
 | 
			
		||||
        # Check for non-empty symbols
 | 
			
		||||
        if not rules:
 | 
			
		||||
            cls.log.error('no grammar rules are defined')
 | 
			
		||||
            return False
 | 
			
		||||
            raise YaccError('No grammar rules are defined')
 | 
			
		||||
 | 
			
		||||
        grammar = Grammar(cls.tokens)
 | 
			
		||||
 | 
			
		||||
@@ -1663,8 +1903,7 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
            try:
 | 
			
		||||
                grammar.set_precedence(term, assoc, level)
 | 
			
		||||
            except GrammarError as e:
 | 
			
		||||
                cls.log.error(str(e))
 | 
			
		||||
                fail = True
 | 
			
		||||
                errors += f'{e}\n'
 | 
			
		||||
 | 
			
		||||
        for name, func in rules:
 | 
			
		||||
            try:
 | 
			
		||||
@@ -1673,25 +1912,22 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
                    try:
 | 
			
		||||
                        grammar.add_production(prodname, syms, pfunc, rulefile, ruleline)
 | 
			
		||||
                    except GrammarError as e:
 | 
			
		||||
                        cls.log.error(str(e))
 | 
			
		||||
                        fail = True
 | 
			
		||||
                        errors += f'{e}\n'
 | 
			
		||||
            except SyntaxError as e:
 | 
			
		||||
                cls.log.error(str(e))
 | 
			
		||||
                fail = True
 | 
			
		||||
                errors += f'{e}\n'
 | 
			
		||||
        try:
 | 
			
		||||
            grammar.set_start(getattr(cls, 'start', None))
 | 
			
		||||
        except GrammarError as e:
 | 
			
		||||
            cls.log.error(str(e))
 | 
			
		||||
            fail = True
 | 
			
		||||
            errors += f'{e}\n'
 | 
			
		||||
 | 
			
		||||
        undefined_symbols = grammar.undefined_symbols()
 | 
			
		||||
        for sym, prod in undefined_symbols:
 | 
			
		||||
            cls.log.error(f'%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
 | 
			
		||||
            fail = True
 | 
			
		||||
            errors += '%s:%d: Symbol %r used, but not defined as a token or a rule\n' % (prod.file, prod.line, sym)
 | 
			
		||||
 | 
			
		||||
        unused_terminals = grammar.unused_terminals()
 | 
			
		||||
        for term in unused_terminals:
 | 
			
		||||
            cls.log.warning('Token %r defined, but not used', term)
 | 
			
		||||
        if unused_terminals:
 | 
			
		||||
            unused_str = '{' + ','.join(unused_terminals) + '}'
 | 
			
		||||
            cls.log.warning(f'Token{"(s)" if len(unused_terminals) >1 else ""} {unused_str} defined, but not used')
 | 
			
		||||
 | 
			
		||||
        unused_rules = grammar.unused_rules()
 | 
			
		||||
        for prod in unused_rules:
 | 
			
		||||
@@ -1711,18 +1947,18 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
        for u in unreachable:
 | 
			
		||||
           cls.log.warning('Symbol %r is unreachable', u)
 | 
			
		||||
 | 
			
		||||
        infinite = grammar.infinite_cycles()
 | 
			
		||||
        for inf in infinite:
 | 
			
		||||
            cls.log.error('Infinite recursion detected for symbol %r', inf)
 | 
			
		||||
            fail = True
 | 
			
		||||
        if len(undefined_symbols) == 0:
 | 
			
		||||
            infinite = grammar.infinite_cycles()
 | 
			
		||||
            for inf in infinite:
 | 
			
		||||
                errors += 'Infinite recursion detected for symbol %r\n' % inf
 | 
			
		||||
 | 
			
		||||
        unused_prec = grammar.unused_precedence()
 | 
			
		||||
        for term, assoc in unused_prec:
 | 
			
		||||
            cls.log.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
 | 
			
		||||
            fail = True
 | 
			
		||||
            errors += 'Precedence rule %r defined for unknown symbol %r\n' % (assoc, term)
 | 
			
		||||
 | 
			
		||||
        cls._grammar = grammar
 | 
			
		||||
        return not fail
 | 
			
		||||
        if errors:
 | 
			
		||||
            raise YaccError('Unable to build grammar.\n'+errors)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __build_lrtables(cls):
 | 
			
		||||
@@ -1776,8 +2012,7 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
            raise YaccError('Invalid parser specification')
 | 
			
		||||
 | 
			
		||||
        # Build the underlying grammar object
 | 
			
		||||
        if not cls.__build_grammar(rules):
 | 
			
		||||
            raise YaccError('Invalid grammar')
 | 
			
		||||
        cls.__build_grammar(rules)
 | 
			
		||||
 | 
			
		||||
        # Build the LR tables
 | 
			
		||||
        if not cls.__build_lrtables():
 | 
			
		||||
@@ -1800,11 +2035,11 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
        if token:
 | 
			
		||||
            lineno = getattr(token, 'lineno', 0)
 | 
			
		||||
            if lineno:
 | 
			
		||||
                sys.stderr.write(f'yacc: Syntax error at line {lineno}, token={token.type}\n')
 | 
			
		||||
                sys.stderr.write(f'sly: Syntax error at line {lineno}, token={token.type}\n')
 | 
			
		||||
            else:
 | 
			
		||||
                sys.stderr.write(f'yacc: Syntax error, token={token.type}')
 | 
			
		||||
                sys.stderr.write(f'sly: Syntax error, token={token.type}')
 | 
			
		||||
        else:
 | 
			
		||||
            sys.stderr.write('yacc: Parse error in input. EOF\n')
 | 
			
		||||
            sys.stderr.write('sly: Parse error in input. EOF\n')
 | 
			
		||||
 
 | 
			
		||||
    def errok(self):
 | 
			
		||||
        '''
 | 
			
		||||
@@ -1989,4 +2224,4 @@ class Parser(metaclass=ParserMeta):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # Call an error function here
 | 
			
		||||
            raise RuntimeError('yacc: internal parser error!!!\n')
 | 
			
		||||
            raise RuntimeError('sly: internal parser error!!!\n')
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										152
									
								
								tests/test_ebnf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								tests/test_ebnf.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,152 @@
 | 
			
		||||
import pytest
 | 
			
		||||
from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    # Set of token names.   This is always required
 | 
			
		||||
    tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, COMMA }
 | 
			
		||||
    literals = { '(', ')' }
 | 
			
		||||
 | 
			
		||||
    # String containing ignored characters between tokens
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
    # Regular expression rules for tokens
 | 
			
		||||
    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    PLUS    = r'\+'
 | 
			
		||||
    MINUS   = r'-'
 | 
			
		||||
    TIMES   = r'\*'
 | 
			
		||||
    DIVIDE  = r'/'
 | 
			
		||||
    ASSIGN  = r'='
 | 
			
		||||
    COMMA   = r','
 | 
			
		||||
 | 
			
		||||
    @_(r'\d+')
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
        t.value = int(t.value)
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
    # Ignored text
 | 
			
		||||
    ignore_comment = r'\#.*'
 | 
			
		||||
 | 
			
		||||
    @_(r'\n+')
 | 
			
		||||
    def newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        self.errors.append(t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.errors = []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CalcParser(Parser):
 | 
			
		||||
    tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.names = { }
 | 
			
		||||
        self.errors = [ ]
 | 
			
		||||
 | 
			
		||||
    @_('ID ASSIGN expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        self.names[p.ID] = p.expr
 | 
			
		||||
 | 
			
		||||
    @_('ID "(" [ arglist ] ")"')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        return (p.ID, p.arglist)
 | 
			
		||||
 | 
			
		||||
    @_('expr { COMMA expr }')
 | 
			
		||||
    def arglist(self, p):
 | 
			
		||||
        return [p.expr0, *p.expr1]
 | 
			
		||||
 | 
			
		||||
    @_('expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        return p.expr
 | 
			
		||||
 | 
			
		||||
    @_('term { PLUS|MINUS term }')
 | 
			
		||||
    def expr(self, p):
 | 
			
		||||
        lval = p.term0
 | 
			
		||||
        for op, rval in p[1]:
 | 
			
		||||
            if op == '+':
 | 
			
		||||
                lval = lval + rval
 | 
			
		||||
            elif op == '-':
 | 
			
		||||
                lval = lval - rval
 | 
			
		||||
        return lval
 | 
			
		||||
 | 
			
		||||
    @_('factor { TIMES|DIVIDE factor }')
 | 
			
		||||
    def term(self, p):
 | 
			
		||||
        lval = p.factor0
 | 
			
		||||
        for op, rval in p[1]:
 | 
			
		||||
            if op == '*':
 | 
			
		||||
                lval = lval * rval
 | 
			
		||||
            elif op == '/':
 | 
			
		||||
                lval = lval / rval
 | 
			
		||||
        return lval
 | 
			
		||||
 | 
			
		||||
    @_('MINUS factor')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return -p.factor
 | 
			
		||||
 | 
			
		||||
    @_("'(' expr ')'")
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return p.expr
 | 
			
		||||
 | 
			
		||||
    @_('NUMBER')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        return int(p.NUMBER)
 | 
			
		||||
 | 
			
		||||
    @_('ID')
 | 
			
		||||
    def factor(self, p):
 | 
			
		||||
        try:
 | 
			
		||||
            return self.names[p.ID]
 | 
			
		||||
        except LookupError:
 | 
			
		||||
            print(f'Undefined name {p.ID!r}')
 | 
			
		||||
            return 0
 | 
			
		||||
 | 
			
		||||
    def error(self, tok):
 | 
			
		||||
        self.errors.append(tok)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Test basic recognition of various tokens and literals
 | 
			
		||||
def test_simple():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a = 3 + 4 * (5 + 6)'))
 | 
			
		||||
    assert result == None
 | 
			
		||||
    assert parser.names['a'] == 47
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)'))
 | 
			
		||||
    assert result == 47
 | 
			
		||||
 | 
			
		||||
def test_ebnf():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a()'))
 | 
			
		||||
    assert result == ('a', None)
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a(2+3)'))
 | 
			
		||||
    assert result == ('a', [5])
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a(2+3, 4+5)'))
 | 
			
		||||
    assert result == ('a', [5, 9])
 | 
			
		||||
 | 
			
		||||
def test_parse_error():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a 123 4 + 5'))
 | 
			
		||||
    assert result == 9
 | 
			
		||||
    assert len(parser.errors) == 1
 | 
			
		||||
    assert parser.errors[0].type == 'NUMBER'
 | 
			
		||||
    assert parser.errors[0].value == 123
 | 
			
		||||
 | 
			
		||||
# TO DO:  Add tests
 | 
			
		||||
# - error productions
 | 
			
		||||
# - embedded actions
 | 
			
		||||
# - lineno tracking
 | 
			
		||||
# - various error cases caught during parser construction
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -1,6 +1,11 @@
 | 
			
		||||
import pytest
 | 
			
		||||
from sly import Lexer
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import regex
 | 
			
		||||
except ImportError:
 | 
			
		||||
    regex = None
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    # Set of token names.   This is always required
 | 
			
		||||
    tokens = {
 | 
			
		||||
@@ -47,13 +52,38 @@ class CalcLexer(Lexer):
 | 
			
		||||
        t.value = t.value.upper()
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
    def error(self, value):
 | 
			
		||||
        self.errors.append(value)
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        self.errors.append(t.value)
 | 
			
		||||
        self.index += 1
 | 
			
		||||
        if hasattr(self, 'return_error'):
 | 
			
		||||
            return t
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.errors = []
 | 
			
		||||
 | 
			
		||||
if regex is not None:
 | 
			
		||||
    class RegexModuleCalcLexer(Lexer):
 | 
			
		||||
        regex_module = regex
 | 
			
		||||
 | 
			
		||||
        tokens = { 'ID', 'PLUS', 'MINUS' }
 | 
			
		||||
 | 
			
		||||
        literals = { '(', ')' }
 | 
			
		||||
        ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
        ID      = r'\p{Ll}+'  # Unicode lowercase letters, regex module feature
 | 
			
		||||
        PLUS    = r'\+'
 | 
			
		||||
        MINUS   = r'-'
 | 
			
		||||
 | 
			
		||||
        ignore_comment = r'\#.*'
 | 
			
		||||
 | 
			
		||||
        @_(r'\n+')
 | 
			
		||||
        def newline(self, t):
 | 
			
		||||
            self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
        def ID(self, t):
 | 
			
		||||
            t.value = t.value.upper()
 | 
			
		||||
            return t
 | 
			
		||||
 | 
			
		||||
# Test basic recognition of various tokens and literals
 | 
			
		||||
def test_tokens():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
@@ -63,6 +93,17 @@ def test_tokens():
 | 
			
		||||
    assert types == ['ID','NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
 | 
			
		||||
    assert vals == ['ABC', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
 | 
			
		||||
 | 
			
		||||
# Test third-party regex module support
 | 
			
		||||
@pytest.mark.skipif(regex is None,
 | 
			
		||||
                    reason="third-party regex module not installed")
 | 
			
		||||
def test_3rd_party_regex_module():
 | 
			
		||||
    lexer = RegexModuleCalcLexer()
 | 
			
		||||
    toks = list(lexer.tokenize('a + b - c'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    assert types == ['ID','PLUS','ID','MINUS','ID']
 | 
			
		||||
    assert vals == ['A', '+', 'B', '-', 'C']
 | 
			
		||||
 | 
			
		||||
# Test ignored comments and newlines
 | 
			
		||||
def test_ignored():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
@@ -85,9 +126,125 @@ def test_error():
 | 
			
		||||
    assert vals == [123, '+', '-']
 | 
			
		||||
    assert lexer.errors == [ ':+-' ]
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
# Test error token return handling
 | 
			
		||||
def test_error_return():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    lexer.return_error = True
 | 
			
		||||
    toks = list(lexer.tokenize('123 :+-'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
 | 
			
		||||
    assert vals == [123, ':+-', '+', '-']
 | 
			
		||||
    assert lexer.errors == [ ':+-' ]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ModernCalcLexer(Lexer):
 | 
			
		||||
    # Set of token names.   This is always required
 | 
			
		||||
    tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, LT, LE, IF, ELSE }
 | 
			
		||||
    literals = { '(', ')' }
 | 
			
		||||
 | 
			
		||||
    # String containing ignored characters between tokens
 | 
			
		||||
    ignore = ' \t'
 | 
			
		||||
 | 
			
		||||
    # Regular expression rules for tokens
 | 
			
		||||
    ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
 | 
			
		||||
    ID['if'] = IF
 | 
			
		||||
    ID['else'] = ELSE
 | 
			
		||||
 | 
			
		||||
    NUMBER  = r'\d+'
 | 
			
		||||
    PLUS    = r'\+'
 | 
			
		||||
    MINUS   = r'-'
 | 
			
		||||
    TIMES   = r'\*'
 | 
			
		||||
    DIVIDE  = r'/'
 | 
			
		||||
    ASSIGN  = r'='
 | 
			
		||||
    LE      = r'<='
 | 
			
		||||
    LT      = r'<'
 | 
			
		||||
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
        t.value = int(t.value)
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
    # Ignored text
 | 
			
		||||
    ignore_comment = r'\#.*'
 | 
			
		||||
 | 
			
		||||
    @_(r'\n+')
 | 
			
		||||
    def ignore_newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    # Attached rule
 | 
			
		||||
    def ID(self, t):
 | 
			
		||||
        t.value = t.value.upper()
 | 
			
		||||
        return t
 | 
			
		||||
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        self.errors.append(t.value)
 | 
			
		||||
        self.index += 1
 | 
			
		||||
        if hasattr(self, 'return_error'):
 | 
			
		||||
            return t
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.errors = []
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Test basic recognition of various tokens and literals
 | 
			
		||||
def test_modern_tokens():
 | 
			
		||||
    lexer = ModernCalcLexer()
 | 
			
		||||
    toks = list(lexer.tokenize('abc if else 123 + - * / = < <= ( )'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    assert types == ['ID','IF','ELSE', 'NUMBER','PLUS','MINUS','TIMES','DIVIDE','ASSIGN','LT','LE','(',')']
 | 
			
		||||
    assert vals == ['ABC','if','else', 123, '+', '-', '*', '/', '=', '<', '<=', '(', ')']
 | 
			
		||||
 | 
			
		||||
# Test ignored comments and newlines
 | 
			
		||||
def test_modern_ignored():
 | 
			
		||||
    lexer = ModernCalcLexer()
 | 
			
		||||
    toks = list(lexer.tokenize('\n\n# A comment\n123\nabc\n'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    linenos = [t.lineno for t in toks]
 | 
			
		||||
    assert types == ['NUMBER', 'ID']
 | 
			
		||||
    assert vals == [123, 'ABC']
 | 
			
		||||
    assert linenos == [4,5]
 | 
			
		||||
    assert lexer.lineno == 6
 | 
			
		||||
 | 
			
		||||
# Test error handling
 | 
			
		||||
def test_modern_error():
 | 
			
		||||
    lexer = ModernCalcLexer()
 | 
			
		||||
    toks = list(lexer.tokenize('123 :+-'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    assert types == ['NUMBER', 'PLUS', 'MINUS']
 | 
			
		||||
    assert vals == [123, '+', '-']
 | 
			
		||||
    assert lexer.errors == [ ':+-' ]
 | 
			
		||||
 | 
			
		||||
# Test error token return handling
 | 
			
		||||
def test_modern_error_return():
 | 
			
		||||
    lexer = ModernCalcLexer()
 | 
			
		||||
    lexer.return_error = True
 | 
			
		||||
    toks = list(lexer.tokenize('123 :+-'))
 | 
			
		||||
    types = [t.type for t in toks]
 | 
			
		||||
    vals = [t.value for t in toks]
 | 
			
		||||
    assert types == ['NUMBER', 'ERROR', 'PLUS', 'MINUS']
 | 
			
		||||
    assert vals == [123, ':+-', '+', '-']
 | 
			
		||||
    assert lexer.errors == [ ':+-' ]
 | 
			
		||||
 | 
			
		||||
# Test Lexer Inheritance.  This class should inherit all of the tokens
 | 
			
		||||
# and features of ModernCalcLexer, but add two new tokens to it.  The
 | 
			
		||||
# PLUSPLUS token matches before the PLUS token.
 | 
			
		||||
 | 
			
		||||
if False:
 | 
			
		||||
    class SubModernCalcLexer(ModernCalcLexer):
 | 
			
		||||
        tokens |= { DOLLAR, PLUSPLUS }
 | 
			
		||||
        DOLLAR = r'\$'
 | 
			
		||||
        PLUSPLUS = r'\+\+'
 | 
			
		||||
        PLUSPLUS.before = PLUS
 | 
			
		||||
 | 
			
		||||
    def test_lexer_inherit():
 | 
			
		||||
        lexer = SubModernCalcLexer()
 | 
			
		||||
        toks = list(lexer.tokenize('123 + - $ ++ if'))
 | 
			
		||||
        types = [t.type for t in toks]
 | 
			
		||||
        vals = [t.value for t in toks]
 | 
			
		||||
        assert types == ['NUMBER', 'PLUS', 'MINUS', 'DOLLAR', 'PLUSPLUS', 'IF']
 | 
			
		||||
        assert vals == [123, '+', '-', '$', '++', 'if']
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3,16 +3,7 @@ from sly import Lexer, Parser
 | 
			
		||||
 | 
			
		||||
class CalcLexer(Lexer):
 | 
			
		||||
    # Set of token names.   This is always required
 | 
			
		||||
    tokens = {
 | 
			
		||||
        'ID',
 | 
			
		||||
        'NUMBER',
 | 
			
		||||
        'PLUS',
 | 
			
		||||
        'MINUS',
 | 
			
		||||
        'TIMES',
 | 
			
		||||
        'DIVIDE',
 | 
			
		||||
        'ASSIGN',
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    tokens = { ID, NUMBER, PLUS, MINUS, TIMES, DIVIDE, ASSIGN, COMMA }
 | 
			
		||||
    literals = { '(', ')' }
 | 
			
		||||
 | 
			
		||||
    # String containing ignored characters between tokens
 | 
			
		||||
@@ -25,6 +16,7 @@ class CalcLexer(Lexer):
 | 
			
		||||
    TIMES   = r'\*'
 | 
			
		||||
    DIVIDE  = r'/'
 | 
			
		||||
    ASSIGN  = r'='
 | 
			
		||||
    COMMA   = r','
 | 
			
		||||
 | 
			
		||||
    @_(r'\d+')
 | 
			
		||||
    def NUMBER(self, t):
 | 
			
		||||
@@ -38,8 +30,8 @@ class CalcLexer(Lexer):
 | 
			
		||||
    def newline(self, t):
 | 
			
		||||
        self.lineno += t.value.count('\n')
 | 
			
		||||
 | 
			
		||||
    def error(self, value):
 | 
			
		||||
        self.errors.append(value)
 | 
			
		||||
    def error(self, t):
 | 
			
		||||
        self.errors.append(t.value[0])
 | 
			
		||||
        self.index += 1
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
@@ -49,9 +41,9 @@ class CalcParser(Parser):
 | 
			
		||||
    tokens = CalcLexer.tokens
 | 
			
		||||
 | 
			
		||||
    precedence = (
 | 
			
		||||
        ('left', 'PLUS', 'MINUS'),
 | 
			
		||||
        ('left', 'TIMES', 'DIVIDE'),
 | 
			
		||||
        ('right', 'UMINUS'),
 | 
			
		||||
        ('left', PLUS, MINUS),
 | 
			
		||||
        ('left', TIMES, DIVIDE),
 | 
			
		||||
        ('right', UMINUS),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
@@ -62,6 +54,14 @@ class CalcParser(Parser):
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        self.names[p.ID] = p.expr
 | 
			
		||||
 | 
			
		||||
    @_('ID "(" [ arglist ] ")"')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        return (p.ID, p.arglist)
 | 
			
		||||
 | 
			
		||||
    @_('expr { COMMA expr }')
 | 
			
		||||
    def arglist(self, p):
 | 
			
		||||
        return [p.expr0, *p.expr1]
 | 
			
		||||
 | 
			
		||||
    @_('expr')
 | 
			
		||||
    def statement(self, p):
 | 
			
		||||
        return p.expr
 | 
			
		||||
@@ -118,6 +118,18 @@ def test_simple():
 | 
			
		||||
    result = parser.parse(lexer.tokenize('3 + 4 * (5 + 6)'))
 | 
			
		||||
    assert result == 47
 | 
			
		||||
 | 
			
		||||
def test_ebnf():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a()'))
 | 
			
		||||
    assert result == ('a', None)
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a(2+3)'))
 | 
			
		||||
    assert result == ('a', [5])
 | 
			
		||||
 | 
			
		||||
    result = parser.parse(lexer.tokenize('a(2+3, 4+5)'))
 | 
			
		||||
    assert result == ('a', [5, 9])
 | 
			
		||||
 | 
			
		||||
def test_parse_error():
 | 
			
		||||
    lexer = CalcLexer()
 | 
			
		||||
    parser = CalcParser()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user