Commit 616fbf55 authored by SVN-Git Migration's avatar SVN-Git Migration

Imported Upstream version 0.7.3

parent 1f43e089
Metadata-Version: 1.0
Name: rply
Version: 0.7.2
Version: 0.7.3
Summary: A pure Python Lex/Yacc that works with RPython
Home-page: UNKNOWN
Author: Alex Gaynor
......@@ -16,6 +16,8 @@ Description: RPLY
It is a more-or-less direct port of David Beazley's awesome PLY, with a new
public API, and RPython support.
You can find the documentation `online`_.
Basic API:
.. code:: python
......@@ -126,4 +128,6 @@ Description: RPLY
* `PyPI releases <https://pypi.python.org/pypi/rply>`_
* `Talk at PyCon US 2013: So you want to write an interpreter? <http://pyvideo.org/video/1694/so-you-want-to-write-an-interpreter>`_
.. _`online`: https://rply.readthedocs.org/
Platform: UNKNOWN
......@@ -8,6 +8,8 @@ Welcome to RPLY! A pure python parser generator, that also works with RPython.
It is a more-or-less direct port of David Beazley's awesome PLY, with a new
public API, and RPython support.
You can find the documentation `online`_.
Basic API:
.. code:: python
......@@ -117,3 +119,5 @@ Links
* `Source code and issue tracker <https://github.com/alex/rply/>`_
* `PyPI releases <https://pypi.python.org/pypi/rply>`_
* `Talk at PyCon US 2013: So you want to write an interpreter? <http://pyvideo.org/video/1694/so-you-want-to-write-an-interpreter>`_
.. _`online`: https://rply.readthedocs.org/
Metadata-Version: 1.0
Name: rply
Version: 0.7.2
Version: 0.7.3
Summary: A pure Python Lex/Yacc that works with RPython
Home-page: UNKNOWN
Author: Alex Gaynor
......@@ -16,6 +16,8 @@ Description: RPLY
It is a more-or-less direct port of David Beazley's awesome PLY, with a new
public API, and RPython support.
You can find the documentation `online`_.
Basic API:
.. code:: python
......@@ -126,4 +128,6 @@ Description: RPLY
* `PyPI releases <https://pypi.python.org/pypi/rply>`_
* `Talk at PyCon US 2013: So you want to write an interpreter? <http://pyvideo.org/video/1694/so-you-want-to-write-an-interpreter>`_
.. _`online`: https://rply.readthedocs.org/
Platform: UNKNOWN
......@@ -3,20 +3,32 @@ class ParserGeneratorError(Exception):
class LexingError(Exception):
"""
Raised by a Lexer, if no rule matches.
"""
def __init__(self, message, source_pos):
self.message = message
self.source_pos = source_pos
def getsourcepos(self):
"""
Returns the position in the source, at which this error occurred.
"""
return self.source_pos
class ParsingError(Exception):
"""
Raised by a Parser, if no production rule can be applied.
"""
def __init__(self, message, source_pos):
self.message = message
self.source_pos = source_pos
def getsourcepos(self):
"""
Returns the position in the source, at which this error occurred.
"""
return self.source_pos
......
......@@ -42,8 +42,9 @@ class LexerStream(object):
for rule in self.lexer.rules:
match = rule.matches(self.s, self.idx)
if match:
lineno = self._lineno
colno = self._update_pos(match)
source_pos = SourcePosition(match.start, self._lineno, colno)
source_pos = SourcePosition(match.start, lineno, colno)
token = Token(
rule.name, self.s[match.start:match.end], source_pos
)
......
......@@ -21,9 +21,9 @@ from rply.lexer import Lexer
class Rule(object):
def __init__(self, name, pattern):
def __init__(self, name, pattern, flags=0):
self.name = name
self.re = re.compile(pattern)
self.re = re.compile(pattern, flags=flags)
def _freeze_(self):
return True
......@@ -42,17 +42,66 @@ class Match(object):
class LexerGenerator(object):
"""
A LexerGenerator represents a set of rules that match pieces of text that
should either be turned into tokens or ignored by the lexer.
Rules are added using the :meth:`add` and :meth:`ignore` methods:
>>> from rply import LexerGenerator
>>> lg = LexerGenerator()
>>> lg.add('NUMBER', r'\d+')
>>> lg.add('ADD', r'\+')
>>> lg.ignore(r'\s+')
The rules are passed to :func:`re.compile`. If you need additional flags,
e.g. :const:`re.DOTALL`, you can pass them to :meth:`add` and
:meth:`ignore` as an additional optional parameter:
>>> import re
>>> lg.add('ALL', r'.*', flags=re.DOTALL)
You can then build a lexer with which you can lex a string to produce an
iterator yielding tokens:
>>> lexer = lg.build()
>>> iterator = lexer.lex('1 + 1')
>>> iterator.next()
Token('NUMBER', '1')
>>> iterator.next()
Token('ADD', '+')
>>> iterator.next()
Token('NUMBER', '1')
>>> iterator.next()
Traceback (most recent call last):
...
StopIteration
"""
def __init__(self):
self.rules = []
self.ignore_rules = []
def add(self, name, pattern):
self.rules.append(Rule(name, pattern))
def add(self, name, pattern, flags=0):
"""
Adds a rule with the given `name` and `pattern`. In case of ambiguity,
the first rule added wins.
"""
self.rules.append(Rule(name, pattern, flags=flags))
def ignore(self, pattern):
self.ignore_rules.append(Rule("", pattern))
def ignore(self, pattern, flags=0):
"""
Adds a rule whose matched value will be ignored. Ignored rules will be
matched before regular ones.
"""
self.ignore_rules.append(Rule("", pattern, flags=flags))
def build(self):
"""
Returns a lexer instance, which provides a `lex` method that must be
called with a string and returns an iterator yielding
:class:`~rply.Token` instances.
"""
return Lexer(self.rules, self.ignore_rules)
if rpython:
......@@ -82,7 +131,9 @@ if rpython:
])
init_pbc = bk.immutablevalue(rsre_core.StrMatchContext.__init__)
bk.emulate_pbc_call((self, "str_match_context_init"), init_pbc, [
model.SomeInstance(bk.getuniqueclassdef(rsre_core.StrMatchContext)),
model.SomeInstance(
bk.getuniqueclassdef(rsre_core.StrMatchContext)
),
bk.newlist(model.SomeInteger(nonneg=True)),
model.SomeString(),
model.SomeInteger(nonneg=True),
......@@ -91,10 +142,14 @@ if rpython:
])
match_context_pbc = bk.immutablevalue(rsre_core.match_context)
bk.emulate_pbc_call((self, "match_context"), match_context_pbc, [
model.SomeInstance(bk.getuniqueclassdef(rsre_core.StrMatchContext)),
model.SomeInstance(
bk.getuniqueclassdef(rsre_core.StrMatchContext)
),
])
return model.SomeInstance(getbookkeeper().getuniqueclassdef(Match), can_be_None=True)
return model.SomeInstance(
getbookkeeper().getuniqueclassdef(Match), can_be_None=True
)
def getattr(self, s_attr):
if s_attr.is_constant() and s_attr.const == "name":
......@@ -115,18 +170,25 @@ if rpython:
rtyper.annotator.bookkeeper.immutablevalue(Match.__init__)
)
self.match_context_init_repr = rtyper.getrepr(
rtyper.annotator.bookkeeper.immutablevalue(rsre_core.StrMatchContext.__init__)
rtyper.annotator.bookkeeper.immutablevalue(
rsre_core.StrMatchContext.__init__
)
)
self.match_context_repr = rtyper.getrepr(
rtyper.annotator.bookkeeper.immutablevalue(rsre_core.match_context)
rtyper.annotator.bookkeeper.immutablevalue(
rsre_core.match_context
)
)
list_repr = FixedSizeListRepr(rtyper, rtyper.getrepr(model.SomeInteger(nonneg=True)))
list_repr = FixedSizeListRepr(
rtyper, rtyper.getrepr(model.SomeInteger(nonneg=True))
)
list_repr._setup_repr()
self.lowleveltype = lltype.Ptr(lltype.GcStruct(
"RULE",
("name", lltype.Ptr(STR)),
("code", list_repr.lowleveltype),
("flags", lltype.Signed),
))
def convert_const(self, rule):
......@@ -134,9 +196,12 @@ if rpython:
ll_rule = lltype.malloc(self.lowleveltype.TO)
ll_rule.name = llstr(rule.name)
code = get_code(rule.re.pattern)
ll_rule.code = lltype.malloc(self.lowleveltype.TO.code.TO, len(code))
ll_rule.code = lltype.malloc(
self.lowleveltype.TO.code.TO, len(code)
)
for i, c in enumerate(code):
ll_rule.code[i] = c
ll_rule.flags = rule.re.flags
self.ll_rule_cache[rule] = ll_rule
return self.ll_rule_cache[rule]
......@@ -148,12 +213,22 @@ if rpython:
return super(RuleRepr, self).rtype_getattr(hop)
def rtype_method_matches(self, hop):
[v_rule, v_s, v_pos] = hop.inputargs(self, string_repr, lltype.Signed)
[v_rule, v_s, v_pos] = hop.inputargs(
self, string_repr, lltype.Signed
)
c_MATCHTYPE = hop.inputconst(lltype.Void, Match)
c_MATCH_INIT = hop.inputconst(lltype.Void, self.match_init_repr)
c_MATCH_CONTEXTTYPE = hop.inputconst(lltype.Void, rsre_core.StrMatchContext)
c_MATCH_CONTEXT_INIT = hop.inputconst(lltype.Void, self.match_context_init_repr)
c_MATCH_CONTEXT = hop.inputconst(lltype.Void, self.match_context_repr)
c_MATCH_INIT = hop.inputconst(
lltype.Void, self.match_init_repr
)
c_MATCH_CONTEXTTYPE = hop.inputconst(
lltype.Void, rsre_core.StrMatchContext
)
c_MATCH_CONTEXT_INIT = hop.inputconst(
lltype.Void, self.match_context_init_repr
)
c_MATCH_CONTEXT = hop.inputconst(
lltype.Void, self.match_context_repr
)
return hop.gendirectcall(
LLRule.ll_matches,
......@@ -174,7 +249,7 @@ if rpython:
ctx = instantiate(MATCH_CONTEXTTYPE)
hlinvoke(
MATCH_CONTEXT_INIT, rsre_core.StrMatchContext.__init__,
ctx, ll_rule.code, hlstr(s), pos, len(s), 0
ctx, ll_rule.code, hlstr(s), pos, len(s), ll_rule.flags
)
matched = hlinvoke(MATCH_CONTEXT, rsre_core.match_context, ctx)
if matched:
......
......@@ -19,7 +19,9 @@ class LRParser(object):
while True:
if self.lr_table.default_reductions[current_state]:
t = self.lr_table.default_reductions[current_state]
current_state = self._reduce_production(t, symstack, statestack, state)
current_state = self._reduce_production(
t, symstack, statestack, state
)
continue
if lookahead is None:
......@@ -44,7 +46,9 @@ class LRParser(object):
lookahead = None
continue
elif t < 0:
current_state = self._reduce_production(t, symstack, statestack, state)
current_state = self._reduce_production(
t, symstack, statestack, state
)
continue
else:
n = symstack[-1]
......
import os
import hashlib
import json
import os
import random
import stat
import string
......@@ -11,13 +11,26 @@ import warnings
from rply.errors import ParserGeneratorError, ParserGeneratorWarning
from rply.grammar import Grammar
from rply.parser import LRParser
from rply.utils import IdentityDict, Counter, iteritems, itervalues
from rply.utils import Counter, IdentityDict, iteritems, itervalues
LARGE_VALUE = sys.maxsize
class ParserGenerator(object):
"""
A ParserGenerator represents a set of production rules, that define a
sequence of terminals and non-terminals to be replaced with a non-terminal,
which can be turned into a parser.
:param tokens: A list of token (non-terminal) names.
:param precedence: A list of tuples defining the order of operation for
avoiding ambiguity, consisting of a string defining
associativity (left, right or nonassoc) and a list of
token names with the same associativity and level of
precedence.
:param cache_id: A string specifying an ID for caching.
"""
VERSION = 1
def __init__(self, tokens, precedence=[], cache_id=None):
......@@ -31,6 +44,36 @@ class ParserGenerator(object):
self.error_handler = None
def production(self, rule, precedence=None):
"""
A decorator that defines a production rule and registers the decorated
function to be called with the terminals and non-terminals matched by
that rule.
A `rule` should consist of a name defining the non-terminal returned
by the decorated function and a sequence of non-terminals and terminals
that are supposed to be replaced::
replacing_non_terminal : ATERMINAL non_terminal
The name of the non-terminal replacing the sequence is on the left,
separated from the sequence by a colon. The whitespace around the colon
is required.
Knowing this we can define productions::
pg = ParserGenerator(['NUMBER', 'ADD'])
@pg.production('number : NUMBER')
def expr_number(p):
return BoxInt(int(p[0].getstr()))
@pg.production('expr : number ADD number')
def expr_add(p):
return BoxInt(p[0].getint() + p[2].getint())
If a state was passed to the parser, the decorated function is
additionally called with that state as first argument.
"""
parts = rule.split()
production_name = parts[0]
if parts[1] != ":":
......@@ -43,6 +86,13 @@ class ParserGenerator(object):
return inner
def error(self, func):
"""
Sets the error handler that is called with the state (if passed to the
parser) and the token the parser errored on.
Currently error handlers must raise an exception. If an error handler
is not defined, a :exc:`rply.ParsingError` will be raised.
"""
self.error_handler = func
return func
......@@ -70,7 +120,9 @@ class ParserGenerator(object):
"start": table.grammar.start,
"terminals": sorted(table.grammar.terminals),
"precedence": table.grammar.precedence,
"productions": [(p.name, p.prod, p.prec) for p in table.grammar.productions],
"productions": [
(p.name, p.prod, p.prec) for p in table.grammar.productions
],
}
def data_is_valid(self, g, data):
......@@ -127,12 +179,19 @@ class ParserGenerator(object):
if os.name == "nt":
cache_file = os.path.join(
tempfile.gettempdir(),
"rply-%s-%s-%s.json" % (self.VERSION, self.cache_id, self.compute_grammar_hash(g))
"rply-%s-%s-%s.json" % (
self.VERSION, self.cache_id, self.compute_grammar_hash(g)
)
)
else:
cache_file = os.path.join(
tempfile.gettempdir(),
"rply-%s-%s-%s-%s.json" % (self.VERSION, os.getuid(), self.cache_id, self.compute_grammar_hash(g))
"rply-%s-%s-%s-%s.json" % (
self.VERSION,
os.getuid(),
self.cache_id,
self.compute_grammar_hash(g)
)
)
table = None
if os.path.exists(cache_file):
......@@ -149,18 +208,26 @@ class ParserGenerator(object):
table = LRTable.from_cache(g, data)
if table is None:
table = LRTable.from_grammar(g)
fd = os.open(cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600)
fd = os.open(
cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600
)
with os.fdopen(fd, "w") as f:
json.dump(self.serialize_table(table), f)
if table.sr_conflicts:
warnings.warn(
"%d shift/reduce conflict%s" % (len(table.sr_conflicts), "s" if len(table.sr_conflicts) > 1 else ""),
"%d shift/reduce conflict%s" % (
len(table.sr_conflicts),
"s" if len(table.sr_conflicts) > 1 else ""
),
ParserGeneratorWarning,
stacklevel=2,
)
if table.rr_conflicts:
warnings.warn(
"%d reduce/reduce conflict%s" % (len(table.rr_conflicts), "s" if len(table.rr_conflicts) > 1 else ""),
"%d reduce/reduce conflict%s" % (
len(table.rr_conflicts),
"s" if len(table.rr_conflicts) > 1 else ""
),
ParserGeneratorWarning,
stacklevel=2,
)
......@@ -202,7 +269,8 @@ def traverse(x, N, stack, F, X, R, FP):
class LRTable(object):
def __init__(self, grammar, lr_action, lr_goto, default_reductions, sr_conflicts, rr_conflicts):
def __init__(self, grammar, lr_action, lr_goto, default_reductions,
sr_conflicts, rr_conflicts):
self.grammar = grammar
self.lr_action = lr_action
self.lr_goto = lr_goto
......@@ -282,7 +350,7 @@ class LRTable(object):
chosenp, rejectp = oldp, pp
rr_conflicts.append((st, repr(chosenp), repr(rejectp)))
else:
raise LALRError("Unknown conflict in state %d" % st)
raise ParserGeneratorError("Unknown conflict in state %d" % st)
else:
st_action[a] = -p.number
st_actionp[a] = p
......@@ -298,7 +366,7 @@ class LRTable(object):
r = st_action[a]
if r > 0:
if r != j:
raise LALRError("Shift/shift conflict in state %d" % st)
raise ParserGeneratorError("Shift/shift conflict in state %d" % st)
elif r < 0:
rprec, rlevel = grammar.productions[st_actionp[a].number].prec
sprec, slevel = grammar.precedence.get(a, ("right", 0))
......@@ -312,7 +380,7 @@ class LRTable(object):
if not slevel and not rlevel:
sr_conflicts.append((st, repr(a), "reduce"))
else:
raise LALRError("Unknown conflict in state %d" % st)
raise ParserGeneratorError("Unknown conflict in state %d" % st)
else:
st_action[a] = j
st_actionp[a] = p
......
class BaseBox(object):
"""
A base class for polymorphic boxes that wrap parser results. Simply use
this as a base class for anything you return in a production function of a
parser. This is necessary because RPython unlike Python expects functions
to always return objects of the same type.
"""
_attrs_ = []
class Token(BaseBox):
"""
Represents a syntactically relevant piece of text.
:param name: A string describing the kind of text represented.
:param value: The actual text represented.
:param source_pos: A :class:`SourcePosition` object representing the
position of the first character in the source from which
this token was generated.
"""
def __init__(self, name, value, source_pos=None):
self.name = name
self.value = value
......@@ -17,17 +32,42 @@ class Token(BaseBox):
return self.name == other.name and self.value == other.value
def gettokentype(self):
"""
Returns the type or name of the token.
"""
return self.name
def getsourcepos(self):
"""
Returns a :class:`SourcePosition` instance, describing the position of
this token's first character in the source.
"""
return self.source_pos
def getstr(self):
"""
Returns the string represented by this token.
"""
return self.value
class SourcePosition(object):
"""
Represents the position of a character in some source string.
:param idx: The index of the character in the source.
:param lineno: The number of the line in which the character occurs.
:param colno: The number of the column in which the character occurs.
The values passed to this object can be retrieved using the identically
named attributes.
"""
def __init__(self, idx, lineno, colno):
self.idx = idx
self.lineno = lineno
self.colno = colno
def __repr__(self):
return "SourcePosition(idx={0}, lineno={1}, colno={2})".format(
self.idx, self.lineno, self.colno
)
......@@ -8,7 +8,8 @@ setup(
name="rply",
description="A pure Python Lex/Yacc that works with RPython",
long_description=readme,
version="0.7.2",
# duplicated in docs/conf.py
version="0.7.3",
author="Alex Gaynor",
author_email="alex.gaynor@gmail.com",
packages=["rply"],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment