Browse Source

Merge pull request 'master' (#3) from Jason/pypeg2:master into master

Reviewed-on: #3
master
Jason Morgan 3 weeks ago
parent
commit
4dd9d69781
5 changed files with 98 additions and 45 deletions
  1. +52
    -12
      pypeg2/__init__.py
  2. +1
    -0
      pypeg2/xmlast.py
  3. +20
    -16
      samples/sample1.py
  4. +23
    -15
      samples/sample2.py
  5. +2
    -2
      setup.py

+ 52
- 12
pypeg2/__init__.py View File

@ -16,6 +16,9 @@ try:
except NameError:
pass
import logging
logger = logging.getLogger("pyPEG2")
__version__ = 2.15
__author__ = "Volker Birk"
@ -167,6 +170,7 @@ class RegEx(object):
"""
def __init__(self, value, **kwargs):
logger.debug(f"New Regex({value})")
self.regex = re.compile(value, re.U)
self.search = self.regex.search
self.match = self.regex.match
@ -199,6 +203,7 @@ class Literal(object):
_basic_types = (bool, int, float, complex, str, bytes, bytearray, list,
tuple, slice, set, frozenset, dict)
def __init__(self, value, **kwargs):
logger.debug(f"New Literal({value})")
if isinstance(self, Literal._basic_types):
pass
else:
@ -231,6 +236,23 @@ class Literal(object):
else:
return False
class Str:
"""A mutable string like object"""
def __init__(self, value, name=None, **kwargs):
logger.debug(f"New Str({value})")
self.data = str(value)
if name is not None:
self.name = Symbol(name)
for k, v in kwargs:
setattr(self, k, v)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
try:
return self.__class__.__name__ + f"(name={self.name}, data={self.data})"
except AttributeError:
return self.__class__.__name__ + f"(data={self.data})"
class Plain(object):
"""A plain object"""
@ -239,6 +261,7 @@ class Plain(object):
"""Construct a plain object with an optional name and optional other
attributes
"""
logger.debug(f"New Plain({name})")
if name is not None:
self.name = Symbol(name)
for k, v in kwargs:
@ -259,6 +282,7 @@ class List(list):
"""Construct a List, and construct its attributes from keyword
arguments.
"""
logger.debug(f"New List({args}, {kwargs})")
_args = []
if len(args) == 1:
if isinstance(args[0], str):
@ -333,6 +357,7 @@ class Namespace(_UserDict):
Arguments are being put into the Namespace, keyword arguments give the
attributes of the Namespace.
"""
logger.debug(f"New Namespace({args}, {kwargs})")
if args:
self.data = OrderedDict(args)
else:
@ -387,6 +412,7 @@ class Enum(Namespace):
def __init__(self, *things, **kwargs):
"""Construct an Enum using a tuple of things."""
logger.debug(f"New Enum({things})")
self.data = OrderedDict()
for thing in things:
if type(thing) == str:
@ -435,6 +461,7 @@ class Symbol(str):
TypeError if namespace is given and not a Namespace
"""
logger.debug(f"New Symbol({name})")
if Symbol.check_keywords and name in Keyword.table:
raise ValueError(repr(name)
+ " is a Keyword, but is used as a Symbol")
@ -657,6 +684,7 @@ def parse(text, thing, filename=None, whitespace=whitespace, comment=None,
if grammar contains an illegal cardinality value
"""
logger.debug(f"parse({repr(text)}, {thing})")
parser = Parser()
parser.whitespace = whitespace
parser.comment = comment
@ -693,6 +721,7 @@ def compose(thing, grammar=None, indent=" ", autoblank=True):
if grammar contains an illegal cardinality value
"""
logger.debug(f"compose({thing}, {grammar})")
parser = Parser()
parser.indent = indent
parser.autoblank = autoblank
@ -729,7 +758,7 @@ class Parser(object):
attribute instead of dumping them
"""
def __init__(self):
def __init__(self, name=None):
"""Initialize instance variables to their defaults."""
self.whitespace = whitespace
self.comment = None
@ -744,6 +773,12 @@ class Parser(object):
self._got_endl = True
self._contiguous = False
self._got_regex = False
self._name = hex(id(self)) if name is None else name
logger.debug(f"New Parser(name={name})")
@property
def name(self):
return self._name
def clear_memory(self, thing=None):
"""Clear cache memory for packrat parsing.
@ -784,6 +819,7 @@ class Parser(object):
if grammar contains an illegal cardinality value
"""
logger.debug(f"Parser({self.name}).parse({repr(text)}, {thing})")
self.text = text
if filename:
self.filename = filename
@ -807,6 +843,7 @@ class Parser(object):
def _skip(self, text, pos=None):
# Skip whitespace and comments from input text
logger.debug(f"Parser({self.name})._skip({repr(text)}, {pos})")
t2 = None
t = text
result = []
@ -854,15 +891,17 @@ class Parser(object):
def _parse(self, text, thing, pos=[1, 0]):
# Parser implementation
logger.debug(f"Parser({self.name})._parse([{type(thing)}]: {repr(text)}, {thing}, {pos})")
def update_pos(text, t, pos):
# Calculate where we are in the text
if not pos:
return
if text == t:
return
d_text = text[:len(text) - len(t)]
pos[0] += d_text.count("\n")
pos[1] += len(d_text)
old_pos = pos
if pos and text != t:
d_text = text[:len(text) - len(t)]
pos[0] += d_text.count("\n")
pos[1] += len(d_text)
logger.debug(f"Parser({self.name})._parse.update_pos(" + f"{pos})" if old_pos == pos else f"{old_pos}->{pos})")
try:
return self._memory[id(thing)][text]
@ -913,7 +952,7 @@ class Parser(object):
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + repr(thing))
result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'")
elif isinstance(thing, (RegEx, _RegEx)):
m = thing.match(text)
@ -924,7 +963,7 @@ class Parser(object):
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting match on "
+ thing.pattern)
+ thing.pattern + f" in '{text}'")
elif isinstance(thing, (str, Literal)):
if text.startswith(str(thing)):
@ -933,7 +972,7 @@ class Parser(object):
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + repr(thing))
result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'")
elif _issubclass(thing, Symbol):
m = thing.regex.match(text)
@ -961,7 +1000,7 @@ class Parser(object):
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + thing.__name__)
result = text, syntax_error("expecting " + thing.__name__ + f" in '{text}'")
# non-terminal constructs
@ -1273,6 +1312,7 @@ class Parser(object):
GrammarValueError
if grammar contains an illegal cardinality value
"""
logger.debug(f"Parser({self.name}).compose({thing}, {grammar})")
if __debug__:
# make sure that we're not having this typing error
compose = None


+ 1
- 0
pypeg2/xmlast.py View File

@ -126,6 +126,7 @@ def thing2xml(thing, pretty=False, object_names=False):
if pretty:
warnings.warn("lxml is needed for pretty printing",
ImportWarning)
etree.indent(tree)
return etree.tostring(tree)


+ 20
- 16
samples/sample1.py View File

@ -13,31 +13,31 @@ Because function has a name() in its grammar, we can access this now as an
attribute. With Python 2.7 this gives Symbol(u'f'), with Python 3.2 it gives Symbol('f'):
>>> f.name
Symbol(...'f')
Symbol('f')
A Function has an Attribute "parms" in its grammar, which directs to class
Parameters.
>>> f.parms
Parameters([(Symbol(...'a'), <__main__.Parameter object at 0x...>), (Symbol(...'b'), <__main__.Parameter object at 0x...>), ])
>>> f.parms # doctest: +ELLIPSIS
Parameters([(Symbol('a'), Symbol(a[int]) at 0x...), (Symbol('b'), Symbol(b[long]) at 0x...), ])
Because Parameters is a Namespace, we can access its content by name.
>>> f.parms["a"]
<__main__.Parameter object at 0x...>
>>> f.parms["a"] # doctest: +ELLIPSIS
Symbol(a[int]) at 0x...
Its content are Parameter instances. Parameter has an Attribute "typing".
>>> f.parms["b"].typing
Type(...'long')
Type('long')
The Instructions of our small sample are just words. Because Function is a
List, we can access them one by one.
>>> f
Function([...'do_this', ...'do_that'], name=Symbol(...'f'))
Function(['do_this', 'do_that'], name=Symbol('f'))
>>> print("f is " + repr(f[0]))
f is ...'do_this'
f is 'do_this'
The result can be composed to a text again.
@ -52,7 +52,7 @@ int f(int a, long b)
/* on level 1 */
do_something_else;
}
...
<BLANKLINE>
pyPEG contains an XML backend, too:
@ -62,13 +62,13 @@ pyPEG contains an XML backend, too:
>>> print(xml.decode())
<Function typing="int" name="f">
<Parameters>
<Parameter typing="int" name="a"/>
<Parameter typing="long" name="b"/>
<Parameter typing="int" name="a" />
<Parameter typing="long" name="b" />
</Parameters>
<Instruction>do_this</Instruction>
<Instruction>do_that</Instruction>
</Function>
...
The XML backend can read XML text and create things:
@ -76,13 +76,13 @@ The XML backend can read XML text and create things:
>>> xml = b'<Function typing="long" name="g"><Parameters><Parameter name="x" typing="int"/></Parameters><Instruction>return</Instruction></Function>'
>>> g = xml2thing(xml, globals())
>>> g.name
Symbol(...'g')
Symbol('g')
>>> g.typing
Type(...'long')
Type('long')
>>> g.parms["x"].typing
Type(...'int')
Type('int')
>>> print("g[0] is " + repr(g[0]))
g[0] is ...'return'
g[0] is 'return'
"""
from __future__ import unicode_literals, print_function
@ -102,6 +102,10 @@ class Type(Keyword):
class Parameter(object):
grammar = attr("typing", Type), blank, name()
# We pretty print parameters to remove the class instance name as that can be inconsistent
def __repr__(self):
return f"Symbol({self.name}[{self.typing}]) at 0x{hex(id(self))}"
# A Namespace is a container for named things.
# csl() creates the grammar for a comma separated list.


+ 23
- 15
samples/sample2.py View File

@ -5,13 +5,28 @@ Ini file sample (see end of file for the content of the ini file)
To parse an ini file we use the grammar below. Comments in ini files are
starting with a semicolon ";".
Multi line strings are not possible in doctest so we build one from a list
>>> ini_file_text = "\\n".join([
... "[Number 1]",
... "this=something",
... "that=something else",
... "",
... "; now for something even more useless",
... "[Number 2]",
... "once=anything",
... "twice=goes",
... ])
>>> ini_file = parse(ini_file_text, IniFile, comment=(";", restline))
Because IniFile and Section are Namespaces, we can access their content by
name.
>>> print("found: " + repr(ini_file["Number 1"]["that"]))
found: ...'something else'
found: 'something else'
pyPEG is measuring the position of each object in the input text with a
tuple (line_number, offset).
@ -33,7 +48,7 @@ that=new one
once=anything
twice=goes
[Number 3]
...
<BLANKLINE>
pyPEG contains an XML backend, too:
@ -48,9 +63,8 @@ pyPEG contains an XML backend, too:
<Key name="once">anything</Key>
<Key name="twice">goes</Key>
</Section>
<Section name="Number 3"/>
<Section name="Number 3" />
</IniFile>
...
In this sample the tree contains named objects only. Then we can output object
names as tag names. Spaces in names will be translated into underscores.
@ -65,9 +79,8 @@ names as tag names. Spaces in names will be translated into underscores.
<once>anything</once>
<twice>goes</twice>
</Number_2>
<Number_3/>
<Number_3 />
</IniFile>
...
"""
from __future__ import unicode_literals, print_function
@ -79,24 +92,19 @@ import re
# symbols in ini files can include spaces
Symbol.regex = re.compile(r"[\w\s]+")
# A key is "name = some string"
class Key(str):
grammar = name(), "=", restline, endl
# Sections start with a name like "[NAME]" and may contain at least one key
class Section(Namespace):
grammar = "[", name(), "]", endl, maybe_some(Key)
# Ini files have one or more sections
class IniFile(Namespace):
grammar = some(Section)
if __name__ == "__main__":
ini_file_text = """[Number 1]
this=something
that=something else
; now for something even more useless
[Number 2]
once=anything
twice=goes
"""
if __name__ == "__main__":
import doctest
doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE))

+ 2
- 2
setup.py View File

@ -1,4 +1,4 @@
from distutils.core import setup
from setuptools import setup
_version = '2.15.2'
@ -12,7 +12,7 @@ setup(
download_url='http://fdik.org/pyPEG2/pyPEG2-' + _version + '.tar.gz',
license='LICENSE.txt',
description='An intrinsic PEG Parser-Interpreter for Python',
long_description=open('README.txt').read(),
long_description=open('README.md').read(),
requires=['lxml',],
provides=['pyPEG2 (' + _version + ')',],
classifiers=[


Loading…
Cancel
Save