More doc updates
This commit is contained in:
parent
5c3083712f
commit
0a17f78d2e
192
README.md
192
README.md
@ -1,4 +1,192 @@
|
|||||||
# SLY (Sly Lex Yacc)
|
SLY (Sly Lex-Yacc) Version 0.0
|
||||||
|
|
||||||
|
Copyright (C) 2016
|
||||||
|
David M. Beazley (Dabeaz LLC)
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the David Beazley or Dabeaz LLC may be used to
|
||||||
|
endorse or promote products derived from this software without
|
||||||
|
specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
Requirements
|
||||||
|
============
|
||||||
|
|
||||||
|
SLY requires the use of Python 3.5 or greater. Older versions
|
||||||
|
of Python are not supported.
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
============
|
||||||
|
|
||||||
|
SLY is a 100% Python implementation of the lex and yacc tools
|
||||||
|
commonly used to write parsers and compilers. Parsing is
|
||||||
|
based on the same LALR(1) algorithm used by many yacc tools.
|
||||||
|
Here are a few notable features:
|
||||||
|
|
||||||
|
- SLY provides *very* extensive error reporting and diagnostic
|
||||||
|
information to assist in parser construction. The original
|
||||||
|
implementation was developed for instructional purposes. As
|
||||||
|
a result, the system tries to identify the most common types
|
||||||
|
of errors made by novice users.
|
||||||
|
|
||||||
|
- SLY provides full support for empty productions, error recovery,
|
||||||
|
precedence specifiers, and moderately ambiguous grammars.
|
||||||
|
|
||||||
|
- SLY uses various Python metaprogramming features to specify
|
||||||
|
lexers and parsers. There are no generated files or extra
|
||||||
|
steps involved. You simply write Python code and run it.
|
||||||
|
|
||||||
|
- SLY can be used to build parsers for "real" programming languages.
|
||||||
|
Although it is not ultra-fast due to its Python implementation,
|
||||||
|
SLY can be used to parse grammars consisting of several hundred
|
||||||
|
rules (as might be found for a language like C).
|
||||||
|
|
||||||
|
An Example
|
||||||
|
==========
|
||||||
|
|
||||||
|
SLY is probably best illustrated by an example. Here's what it
|
||||||
|
looks like to write a parser that can evaluate simple arithmetic
|
||||||
|
expressions and store variables:
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# calc.py
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
from sly import Lexer, Parser
|
||||||
|
|
||||||
|
class CalcLexer(Lexer):
|
||||||
|
tokens = {
|
||||||
|
'NAME', 'NUMBER',
|
||||||
|
}
|
||||||
|
ignore = ' \t'
|
||||||
|
literals = { '=', '+', '-', '*', '/', '(', ')' }
|
||||||
|
|
||||||
|
# Tokens
|
||||||
|
NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||||
|
|
||||||
|
@_(r'\d+')
|
||||||
|
def NUMBER(self, t):
|
||||||
|
t.value = int(t.value)
|
||||||
|
return t
|
||||||
|
|
||||||
|
@_(r'\n+')
|
||||||
|
def newline(self, t):
|
||||||
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
def error(self, value):
|
||||||
|
print("Illegal character '%s'" % value[0])
|
||||||
|
self.index += 1
|
||||||
|
|
||||||
|
class CalcParser(Parser):
|
||||||
|
tokens = CalcLexer.tokens
|
||||||
|
|
||||||
|
precedence = (
|
||||||
|
('left', '+', '-'),
|
||||||
|
('left', '*', '/'),
|
||||||
|
('right', 'UMINUS'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.names = { }
|
||||||
|
|
||||||
|
@_('NAME "=" expr')
|
||||||
|
def statement(self, p):
|
||||||
|
self.names[p.NAME] = p.expr
|
||||||
|
|
||||||
|
@_('expr')
|
||||||
|
def statement(self, p):
|
||||||
|
print(p.expr)
|
||||||
|
|
||||||
|
@_('expr "+" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 + p.expr1
|
||||||
|
|
||||||
|
@_('expr "-" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 - p.expr1
|
||||||
|
|
||||||
|
@_('expr "*" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 * p.expr1
|
||||||
|
|
||||||
|
@_('expr "/" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 / p.expr1
|
||||||
|
|
||||||
|
@_('"-" expr %prec UMINUS')
|
||||||
|
def expr(self, p):
|
||||||
|
return -p.expr
|
||||||
|
|
||||||
|
@_('"(" expr ")"')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr
|
||||||
|
|
||||||
|
@_('NUMBER')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.NUMBER
|
||||||
|
|
||||||
|
@_('NAME')
|
||||||
|
def expr(self, p):
|
||||||
|
try:
|
||||||
|
return self.names[p.NAME]
|
||||||
|
except LookupError:
|
||||||
|
print("Undefined name '%s'" % p.NAME)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
lexer = CalcLexer()
|
||||||
|
parser = CalcParser()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
text = input('calc > ')
|
||||||
|
except EOFError:
|
||||||
|
break
|
||||||
|
if text:
|
||||||
|
parser.parse(lexer.tokenize(text))
|
||||||
|
|
||||||
|
Resources
|
||||||
|
=========
|
||||||
|
|
||||||
|
For a detailed overview of parsing theory, consult the excellent
|
||||||
|
book "Compilers : Principles, Techniques, and Tools" by Aho, Sethi, and
|
||||||
|
Ullman. The topics found in "Lex & Yacc" by Levine, Mason, and Brown
|
||||||
|
may also be useful.
|
||||||
|
|
||||||
|
The GitHub page for SLY can be found at:
|
||||||
|
|
||||||
|
https://github.com/dabeaz/sly
|
||||||
|
|
||||||
|
Please direct bug reports and pull requests to the GitHub page.
|
||||||
|
To contact me directly, send email to dave@dabeaz.com or contact
|
||||||
|
me on Twitter (@dabeaz).
|
||||||
|
|
||||||
|
-- Dave
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
The name says it all.
|
|
||||||
|
|
||||||
|
129
docs/sly.rst
129
docs/sly.rst
@ -638,35 +638,35 @@ SLY::
|
|||||||
# Grammar rules and actions
|
# Grammar rules and actions
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0] + p[2]
|
return p.expr + p.term
|
||||||
|
|
||||||
@_('expr MINUS term')
|
@_('expr MINUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0] - p[2]
|
return p.expr - p.term
|
||||||
|
|
||||||
@_('term')
|
@_('term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0]
|
return p.term
|
||||||
|
|
||||||
@_('term TIMES factor')
|
@_('term TIMES factor')
|
||||||
def term(self, p):
|
def term(self, p):
|
||||||
return p[0] * p[2]
|
return p.term * p.factor
|
||||||
|
|
||||||
@_('term DIVIDE factor')
|
@_('term DIVIDE factor')
|
||||||
def term(self, p):
|
def term(self, p):
|
||||||
return p[0] / p[2]
|
return p.term / p.factor
|
||||||
|
|
||||||
@_('factor')
|
@_('factor')
|
||||||
def term(self, p):
|
def term(self, p):
|
||||||
return p[0]
|
return p.factor
|
||||||
|
|
||||||
@_('NUMBER')
|
@_('NUMBER')
|
||||||
def factor(self, p):
|
def factor(self, p):
|
||||||
return p[0]
|
return p.NUMBER
|
||||||
|
|
||||||
@_('LPAREN expr RPAREN')
|
@_('LPAREN expr RPAREN')
|
||||||
def factor(self, p):
|
def factor(self, p):
|
||||||
return p[1]
|
return p.expr
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
lexer = CalcLexer()
|
lexer = CalcLexer()
|
||||||
@ -697,37 +697,45 @@ becomes a method like this::
|
|||||||
|
|
||||||
The method is triggered when that grammar rule is recognized on the
|
The method is triggered when that grammar rule is recognized on the
|
||||||
input. As an argument, the method receives a sequence of grammar symbol
|
input. As an argument, the method receives a sequence of grammar symbol
|
||||||
values ``p`` that is accessed as an array of symbols. The mapping between
|
values in ``p``. There are two ways to access these symbols. First, you
|
||||||
elements of ``p`` and the grammar rule is as shown here::
|
can use symbol names as shown::
|
||||||
|
|
||||||
# p[0] p[1] p[2]
|
|
||||||
# | | |
|
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
...
|
return p.expr + p.term
|
||||||
|
|
||||||
For tokens, the value of the corresponding ``p[i]`` is the *same* as
|
Alternatively, you can also index ``p`` like an array::
|
||||||
the ``p.value`` attribute assigned to tokens in the lexer module. For
|
|
||||||
non-terminals, the value is whatever was returned by the methods
|
|
||||||
defined for that rule.
|
|
||||||
|
|
||||||
Within each rule, you return a value that becomes associated with that
|
|
||||||
grammar symbol elsewhere. In the example shown, rules are carrying out
|
|
||||||
the evaluation of an arithmetic expression::
|
|
||||||
|
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0] + p[2]
|
return p[0] + p[2]
|
||||||
|
|
||||||
|
For tokens, the value of the corresponding ``p.symbol`` or ``p[i]`` is
|
||||||
|
the *same* as the ``p.value`` attribute assigned to tokens in the
|
||||||
|
lexer module. For non-terminals, the value is whatever was returned
|
||||||
|
by the methods defined for that rule.
|
||||||
|
|
||||||
|
If a grammar rule includes the same symbol name more than once, you
|
||||||
|
need to append a numeric suffix to disambiguate the symbol name when
|
||||||
|
you're accessing values. For example::
|
||||||
|
|
||||||
|
@_('expr PLUS expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 + p.expr1
|
||||||
|
|
||||||
|
Finally, within each rule, you always return a value that becomes
|
||||||
|
associated with that grammar symbol elsewhere. This is how values
|
||||||
|
propagate within the grammar.
|
||||||
|
|
||||||
There are many other kinds of things that might happen in a rule
|
There are many other kinds of things that might happen in a rule
|
||||||
though. For example, a rule might construct part of a parse tree
|
though. For example, a rule might construct part of a parse tree
|
||||||
instead::
|
instead::
|
||||||
|
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('+', p[0], p[2])
|
return ('+', p.expr, p.term)
|
||||||
|
|
||||||
or perhaps create an instance related to an abstract syntax tree::
|
or it might create an instance related to an abstract syntax tree::
|
||||||
|
|
||||||
class BinOp(object):
|
class BinOp(object):
|
||||||
def __init__(self, op, left, right):
|
def __init__(self, op, left, right):
|
||||||
@ -737,7 +745,7 @@ or perhaps create an instance related to an abstract syntax tree::
|
|||||||
|
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return BinOp('+', p[0], p[2])
|
return BinOp('+', p.expr, p.term)
|
||||||
|
|
||||||
The key thing is that the method returns the value that's going to
|
The key thing is that the method returns the value that's going to
|
||||||
be attached to the symbol "expr" in this case. This is the propagation
|
be attached to the symbol "expr" in this case. This is the propagation
|
||||||
@ -751,25 +759,29 @@ For example, suppose you had two rules that were constructing a parse tree::
|
|||||||
|
|
||||||
@_('expr PLUS term')
|
@_('expr PLUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('+', p[0], p[2])
|
return ('+', p.expr, p.term)
|
||||||
|
|
||||||
@_('expr MINUS term')
|
@_('expr MINUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('-', p[0], p[2])
|
return ('-', p.expr, p.term)
|
||||||
|
|
||||||
Instead of writing two functions, you might write a single function like this::
|
Instead of writing two functions, you might write a single function like this::
|
||||||
|
|
||||||
@_('expr PLUS term',
|
@_('expr PLUS term',
|
||||||
'expr MINUS term')
|
'expr MINUS term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return (p[1], p[0], p[2])
|
return (p[1], p.expr, p.term)
|
||||||
|
|
||||||
|
In this example, the operator could be ``PLUS`` or ``MINUS``. Thus,
|
||||||
|
we can't use the symbolic name to refer to its value. Instead, use the array
|
||||||
|
index ``p[1]`` to get it as shown.
|
||||||
|
|
||||||
In general, the ``@_()`` decorator for any given method can list
|
In general, the ``@_()`` decorator for any given method can list
|
||||||
multiple grammar rules. When combining grammar rules into a single
|
multiple grammar rules. When combining grammar rules into a single
|
||||||
function though, it is usually a good idea for all of the rules to
|
function though, all of the rules should have a similar structure
|
||||||
have a similar structure (e.g., the same number of terms). Otherwise,
|
(e.g., the same number of terms and consistent symbol names).
|
||||||
the corresponding action code may end up being more complicated than
|
Otherwise, the corresponding action code may end up being more
|
||||||
necessary.
|
complicated than necessary.
|
||||||
|
|
||||||
Character Literals
|
Character Literals
|
||||||
^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^
|
||||||
@ -779,11 +791,11 @@ literals. For example::
|
|||||||
|
|
||||||
@_('expr "+" term')
|
@_('expr "+" term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0] + p[2]
|
return p.expr + p.term
|
||||||
|
|
||||||
@_('expr "-" term')
|
@_('expr "-" term')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[0] - p[2]
|
return p.expr - p.term
|
||||||
|
|
||||||
A character literal must be enclosed in quotes such as ``"+"``. In
|
A character literal must be enclosed in quotes such as ``"+"``. In
|
||||||
addition, if literals are used, they must be declared in the
|
addition, if literals are used, they must be declared in the
|
||||||
@ -898,16 +910,33 @@ like this::
|
|||||||
('left', 'PLUS', 'MINUS'),
|
('left', 'PLUS', 'MINUS'),
|
||||||
('left', 'TIMES', 'DIVIDE'),
|
('left', 'TIMES', 'DIVIDE'),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Rules where precedence is applied
|
||||||
|
@_('expr PLUS expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 + p.expr1
|
||||||
|
|
||||||
|
@_('expr MINUS expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 - p.expr1
|
||||||
|
|
||||||
|
@_('expr TIMES expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 * p.expr1
|
||||||
|
|
||||||
|
@_('expr DIVIDE expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 / p.expr1
|
||||||
...
|
...
|
||||||
|
|
||||||
This declaration specifies that ``PLUS``/``MINUS`` have the
|
This ``precedence`` declaration specifies that ``PLUS``/``MINUS`` have
|
||||||
same precedence level and are left-associative and that
|
the same precedence level and are left-associative and that
|
||||||
``TIMES``/``DIVIDE`` have the same precedence and are
|
``TIMES``/``DIVIDE`` have the same precedence and are
|
||||||
left-associative. Within the ``precedence`` declaration, tokens
|
left-associative. Within the ``precedence`` declaration, tokens are
|
||||||
are ordered from lowest to highest precedence. Thus, this declaration
|
ordered from lowest to highest precedence. Thus, this declaration
|
||||||
specifies that ``TIMES``/``DIVIDE`` have higher precedence
|
specifies that ``TIMES``/``DIVIDE`` have higher precedence than
|
||||||
than ``PLUS``/``MINUS`` (since they appear later in the
|
``PLUS``/``MINUS`` (since they appear later in the precedence
|
||||||
precedence specification).
|
specification).
|
||||||
|
|
||||||
The precedence specification works by associating a numerical
|
The precedence specification works by associating a numerical
|
||||||
precedence level value and associativity direction to the listed
|
precedence level value and associativity direction to the listed
|
||||||
@ -977,7 +1006,7 @@ Now, in the grammar file, you write the unary minus rule like this::
|
|||||||
|
|
||||||
@_('MINUS expr %prec UMINUS')
|
@_('MINUS expr %prec UMINUS')
|
||||||
def expr(p):
|
def expr(p):
|
||||||
p[0] = -p[2]
|
return -p.expr
|
||||||
|
|
||||||
In this case, ``%prec UMINUS`` overrides the default rule precedence--setting it to that
|
In this case, ``%prec UMINUS`` overrides the default rule precedence--setting it to that
|
||||||
of ``UMINUS`` in the precedence specifier.
|
of ``UMINUS`` in the precedence specifier.
|
||||||
@ -1310,15 +1339,15 @@ like this::
|
|||||||
'expr TIMES expr',
|
'expr TIMES expr',
|
||||||
'expr DIVIDE expr')
|
'expr DIVIDE expr')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('binary-expression', p[1], p[0], p[2])
|
return ('binary-expression', p[1], p.expr0, p.expr1)
|
||||||
|
|
||||||
@_('LPAREN expr RPAREN')
|
@_('LPAREN expr RPAREN')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('group-expression',p[1])
|
return ('group-expression',p.expr])
|
||||||
|
|
||||||
@_('NUMBER')
|
@_('NUMBER')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return ('number-expression', p[0])
|
return ('number-expression', p.NUMBER)
|
||||||
|
|
||||||
Another approach is to create a set of data structure for different
|
Another approach is to create a set of data structure for different
|
||||||
kinds of abstract syntax tree nodes and create different node types
|
kinds of abstract syntax tree nodes and create different node types
|
||||||
@ -1342,15 +1371,15 @@ in each rule::
|
|||||||
'expr TIMES expr',
|
'expr TIMES expr',
|
||||||
'expr DIVIDE expr')
|
'expr DIVIDE expr')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return BinOp(p[1], p[0], p[2])
|
return BinOp(p[1], p.expr0, p.expr1)
|
||||||
|
|
||||||
@_('LPAREN expr RPAREN')
|
@_('LPAREN expr RPAREN')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return p[1]
|
return p.expr
|
||||||
|
|
||||||
@_('NUMBER')
|
@_('NUMBER')
|
||||||
def expr(self, p):
|
def expr(self, p):
|
||||||
return Number(p[0])
|
return Number(p.NUMBER)
|
||||||
|
|
||||||
The advantage to this approach is that it may make it easier to attach
|
The advantage to this approach is that it may make it easier to attach
|
||||||
more complicated semantics, type checking, code generation, and other
|
more complicated semantics, type checking, code generation, and other
|
||||||
@ -1385,7 +1414,7 @@ at the end of a rule. For example, suppose you have a rule like this::
|
|||||||
|
|
||||||
@_('A B C D')
|
@_('A B C D')
|
||||||
def foo(self, p):
|
def foo(self, p):
|
||||||
print("Parsed a foo", p[0],p[1],p[2],p[3])
|
print("Parsed a foo", p.A, p.B, p.C, p.D)
|
||||||
|
|
||||||
In this case, the supplied action code only executes after all of the
|
In this case, the supplied action code only executes after all of the
|
||||||
symbols ``A``, ``B``, ``C``, and ``D`` have been
|
symbols ``A``, ``B``, ``C``, and ``D`` have been
|
||||||
@ -1396,8 +1425,8 @@ been parsed. To do this, write an empty rule like this::
|
|||||||
|
|
||||||
@_('A seen_A B C D')
|
@_('A seen_A B C D')
|
||||||
def foo(self, p):
|
def foo(self, p):
|
||||||
print("Parsed a foo", p[0],p[2],p[3],p[4])
|
print("Parsed a foo", p.A, p.B, p.C, p.D)
|
||||||
print("seen_A returned", p[1])
|
print("seen_A returned", p.seen_A])
|
||||||
|
|
||||||
@_('')
|
@_('')
|
||||||
def seen_A(self, p):
|
def seen_A(self, p):
|
||||||
|
@ -8,14 +8,31 @@ sys.path.insert(0, "../..")
|
|||||||
from sly import Lexer, Parser
|
from sly import Lexer, Parser
|
||||||
|
|
||||||
class CalcLexer(Lexer):
|
class CalcLexer(Lexer):
|
||||||
tokens = (
|
# Set of token names. This is always required
|
||||||
'NAME', 'NUMBER',
|
tokens = {
|
||||||
)
|
'ID',
|
||||||
ignore = ' \t'
|
'NUMBER',
|
||||||
literals = ['=', '+', '-', '*', '/', '(', ')']
|
'PLUS',
|
||||||
|
'MINUS',
|
||||||
|
'TIMES',
|
||||||
|
'DIVIDE',
|
||||||
|
'ASSIGN',
|
||||||
|
'LPAREN',
|
||||||
|
'RPAREN',
|
||||||
|
}
|
||||||
|
|
||||||
# Tokens
|
# String containing ignored characters between tokens
|
||||||
NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
ignore = ' \t'
|
||||||
|
|
||||||
|
# Regular expression rules for tokens
|
||||||
|
ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||||
|
PLUS = r'\+'
|
||||||
|
MINUS = r'-'
|
||||||
|
TIMES = r'\*'
|
||||||
|
DIVIDE = r'/'
|
||||||
|
ASSIGN = r'='
|
||||||
|
LPAREN = r'\('
|
||||||
|
RPAREN = r'\)'
|
||||||
|
|
||||||
@_(r'\d+')
|
@_(r'\d+')
|
||||||
def NUMBER(self, t):
|
def NUMBER(self, t):
|
||||||
@ -31,66 +48,50 @@ class CalcLexer(Lexer):
|
|||||||
self.index += 1
|
self.index += 1
|
||||||
|
|
||||||
class CalcParser(Parser):
|
class CalcParser(Parser):
|
||||||
|
# Get the token list from the lexer (required)
|
||||||
tokens = CalcLexer.tokens
|
tokens = CalcLexer.tokens
|
||||||
|
|
||||||
precedence = (
|
|
||||||
('left', '+', '-'),
|
|
||||||
('left', '*', '/'),
|
|
||||||
('right', 'UMINUS'),
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self):
|
# Grammar rules and actions
|
||||||
self.names = { }
|
@_('expr PLUS term')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr + p.term
|
||||||
|
|
||||||
@_('NAME "=" expression')
|
@_('expr MINUS term')
|
||||||
def statement(self, p):
|
def expr(self, p):
|
||||||
self.names[p.NAME] = p.expression
|
return p.expr - p.term
|
||||||
|
|
||||||
@_('expression')
|
@_('term')
|
||||||
def statement(self, p):
|
def expr(self, p):
|
||||||
print(p.expression)
|
return p.term
|
||||||
|
|
||||||
@_('expression "+" expression',
|
@_('term TIMES factor1')
|
||||||
'expression "-" expression',
|
def term(self, p):
|
||||||
'expression "*" expression',
|
return p.term * p.factor
|
||||||
'expression "/" expression')
|
|
||||||
def expression(self, p):
|
|
||||||
if p[1] == '+':
|
|
||||||
return p.expression0 + p.expression1
|
|
||||||
elif p[1] == '-':
|
|
||||||
return p.expression0 - p.expression1
|
|
||||||
elif p[1] == '*':
|
|
||||||
return p.expression0 * p.expression1
|
|
||||||
elif p[1] == '/':
|
|
||||||
return p.expression0 / p.expression1
|
|
||||||
|
|
||||||
@_('"-" expression %prec UMINUS')
|
@_('term DIVIDE factor')
|
||||||
def expression(self, p):
|
def term(self, p):
|
||||||
return -p.expression
|
return p.term / p.factor
|
||||||
|
|
||||||
@_('"(" expression ")"')
|
@_('factor')
|
||||||
def expression(self, p):
|
def term(self, p):
|
||||||
return p.expression
|
return p.factor
|
||||||
|
|
||||||
@_('NUMBER')
|
@_('NUMBER')
|
||||||
def expression(self, p):
|
def factor(self, p):
|
||||||
return p.NUMBER
|
return p.NUMBER
|
||||||
|
|
||||||
@_('NAME')
|
@_('LPAREN expr RPAREN')
|
||||||
def expression(self, p):
|
def factor(self, p):
|
||||||
try:
|
return p.expr
|
||||||
return self.names[p.NAME]
|
|
||||||
except LookupError:
|
|
||||||
print("Undefined name '%s'" % p.NAME)
|
|
||||||
return 0
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
lexer = CalcLexer()
|
lexer = CalcLexer()
|
||||||
parser = CalcParser()
|
parser = CalcParser()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
text = input('calc > ')
|
text = input('calc > ')
|
||||||
|
result = parser.parse(lexer.tokenize(text))
|
||||||
|
print(result)
|
||||||
except EOFError:
|
except EOFError:
|
||||||
break
|
break
|
||||||
if text:
|
|
||||||
parser.parse(lexer.tokenize(text))
|
|
||||||
|
98
example/calc_prec/calc.py
Normal file
98
example/calc_prec/calc.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# calc.py
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, "../..")
|
||||||
|
|
||||||
|
from sly import Lexer, Parser
|
||||||
|
|
||||||
|
class CalcLexer(Lexer):
|
||||||
|
tokens = {
|
||||||
|
'NAME', 'NUMBER',
|
||||||
|
}
|
||||||
|
ignore = ' \t'
|
||||||
|
literals = { '=', '+', '-', '*', '/', '(', ')' }
|
||||||
|
|
||||||
|
# Tokens
|
||||||
|
NAME = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||||
|
|
||||||
|
@_(r'\d+')
|
||||||
|
def NUMBER(self, t):
|
||||||
|
t.value = int(t.value)
|
||||||
|
return t
|
||||||
|
|
||||||
|
@_(r'\n+')
|
||||||
|
def newline(self, t):
|
||||||
|
self.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
def error(self, value):
|
||||||
|
print("Illegal character '%s'" % value[0])
|
||||||
|
self.index += 1
|
||||||
|
|
||||||
|
class CalcParser(Parser):
|
||||||
|
tokens = CalcLexer.tokens
|
||||||
|
|
||||||
|
precedence = (
|
||||||
|
('left', '+', '-'),
|
||||||
|
('left', '*', '/'),
|
||||||
|
('right', 'UMINUS'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.names = { }
|
||||||
|
|
||||||
|
@_('NAME "=" expr')
|
||||||
|
def statement(self, p):
|
||||||
|
self.names[p.NAME] = p.expr
|
||||||
|
|
||||||
|
@_('expr')
|
||||||
|
def statement(self, p):
|
||||||
|
print(p.expr)
|
||||||
|
|
||||||
|
@_('expr "+" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 + p.expr1
|
||||||
|
|
||||||
|
@_('expr "-" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 - p.expr1
|
||||||
|
|
||||||
|
@_('expr "*" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 * p.expr1
|
||||||
|
|
||||||
|
@_('expr "/" expr')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr0 / p.expr1
|
||||||
|
|
||||||
|
@_('"-" expr %prec UMINUS')
|
||||||
|
def expr(self, p):
|
||||||
|
return -p.expr
|
||||||
|
|
||||||
|
@_('"(" expr ")"')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.expr
|
||||||
|
|
||||||
|
@_('NUMBER')
|
||||||
|
def expr(self, p):
|
||||||
|
return p.NUMBER
|
||||||
|
|
||||||
|
@_('NAME')
|
||||||
|
def expr(self, p):
|
||||||
|
try:
|
||||||
|
return self.names[p.NAME]
|
||||||
|
except LookupError:
|
||||||
|
print("Undefined name '%s'" % p.NAME)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
lexer = CalcLexer()
|
||||||
|
parser = CalcParser()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
text = input('calc > ')
|
||||||
|
except EOFError:
|
||||||
|
break
|
||||||
|
if text:
|
||||||
|
parser.parse(lexer.tokenize(text))
|
@ -126,6 +126,8 @@ class YaccProduction:
|
|||||||
@property
|
@property
|
||||||
def lineno(self):
|
def lineno(self):
|
||||||
for tok in self._slice:
|
for tok in self._slice:
|
||||||
|
if isinstance(tok, YaccSymbol):
|
||||||
|
continue
|
||||||
lineno = getattr(tok, 'lineno', None)
|
lineno = getattr(tok, 'lineno', None)
|
||||||
if lineno:
|
if lineno:
|
||||||
return lineno
|
return lineno
|
||||||
@ -134,6 +136,8 @@ class YaccProduction:
|
|||||||
@property
|
@property
|
||||||
def index(self):
|
def index(self):
|
||||||
for tok in self._slice:
|
for tok in self._slice:
|
||||||
|
if isinstance(tok, YaccSymbol):
|
||||||
|
continue
|
||||||
index = getattr(tok, 'index', None)
|
index = getattr(tok, 'index', None)
|
||||||
if index:
|
if index:
|
||||||
return index
|
return index
|
||||||
@ -1680,7 +1684,7 @@ class Parser(metaclass=ParserMeta):
|
|||||||
undefined_symbols = grammar.undefined_symbols()
|
undefined_symbols = grammar.undefined_symbols()
|
||||||
for sym, prod in undefined_symbols:
|
for sym, prod in undefined_symbols:
|
||||||
cls.log.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
|
cls.log.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
|
||||||
fai = True
|
fail = True
|
||||||
|
|
||||||
unused_terminals = grammar.unused_terminals()
|
unused_terminals = grammar.unused_terminals()
|
||||||
for term in unused_terminals:
|
for term in unused_terminals:
|
||||||
|
Loading…
Reference in New Issue
Block a user