From 2d0642fac53cd0ae2645c8ed24cac59f40666869 Mon Sep 17 00:00:00 2001 From: J Morgan Date: Thu, 18 Nov 2021 14:25:05 +0000 Subject: [PATCH 1/5] Add logging and improve runtime error print --- pypeg2/__init__.py | 47 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/pypeg2/__init__.py b/pypeg2/__init__.py index eb87f03..2ea9ae5 100644 --- a/pypeg2/__init__.py +++ b/pypeg2/__init__.py @@ -16,6 +16,9 @@ try: except NameError: pass +import logging + +logger = logging.getLogger("pyPEG2") __version__ = 2.15 __author__ = "Volker Birk" @@ -167,6 +170,7 @@ class RegEx(object): """ def __init__(self, value, **kwargs): + logger.debug(f"New Regex({value})") self.regex = re.compile(value, re.U) self.search = self.regex.search self.match = self.regex.match @@ -199,6 +203,7 @@ class Literal(object): _basic_types = (bool, int, float, complex, str, bytes, bytearray, list, tuple, slice, set, frozenset, dict) def __init__(self, value, **kwargs): + logger.debug(f"New Literal({value})") if isinstance(self, Literal._basic_types): pass else: @@ -239,6 +244,7 @@ class Plain(object): """Construct a plain object with an optional name and optional other attributes """ + logger.debug(f"New Plain({name})") if name is not None: self.name = Symbol(name) for k, v in kwargs: @@ -259,6 +265,7 @@ class List(list): """Construct a List, and construct its attributes from keyword arguments. """ + logger.debug(f"New List({args}, {kwargs})") _args = [] if len(args) == 1: if isinstance(args[0], str): @@ -333,6 +340,7 @@ class Namespace(_UserDict): Arguments are being put into the Namespace, keyword arguments give the attributes of the Namespace. """ + logger.debug(f"New Namespace({args}, {kwargs})") if args: self.data = OrderedDict(args) else: @@ -387,6 +395,7 @@ class Enum(Namespace): def __init__(self, *things, **kwargs): """Construct an Enum using a tuple of things.""" + logger.debug(f"New Enum({things})") self.data = OrderedDict() for thing in things: if type(thing) == str: @@ -435,6 +444,7 @@ class Symbol(str): TypeError if namespace is given and not a Namespace """ + logger.debug(f"New Symbol({name})") if Symbol.check_keywords and name in Keyword.table: raise ValueError(repr(name) + " is a Keyword, but is used as a Symbol") @@ -657,6 +667,7 @@ def parse(text, thing, filename=None, whitespace=whitespace, comment=None, if grammar contains an illegal cardinality value """ + logger.debug(f"parse({repr(text)}, {thing})") parser = Parser() parser.whitespace = whitespace parser.comment = comment @@ -693,6 +704,7 @@ def compose(thing, grammar=None, indent=" ", autoblank=True): if grammar contains an illegal cardinality value """ + logger.debug(f"compose({thing}, {grammar})") parser = Parser() parser.indent = indent parser.autoblank = autoblank @@ -729,7 +741,7 @@ class Parser(object): attribute instead of dumping them """ - def __init__(self): + def __init__(self, name=None): """Initialize instance variables to their defaults.""" self.whitespace = whitespace self.comment = None @@ -744,6 +756,12 @@ class Parser(object): self._got_endl = True self._contiguous = False self._got_regex = False + self._name = hex(id(self)) if name is None else name + logger.debug(f"New Parser(name={name})") + + @property + def name(self): + return self._name def clear_memory(self, thing=None): """Clear cache memory for packrat parsing. @@ -784,6 +802,7 @@ class Parser(object): if grammar contains an illegal cardinality value """ + logger.debug(f"Parser({self.name}).parse({repr(text)}, {thing})") self.text = text if filename: self.filename = filename @@ -807,6 +826,7 @@ class Parser(object): def _skip(self, text, pos=None): # Skip whitespace and comments from input text + logger.debug(f"Parser({self.name})._skip({repr(text)}, {pos})") t2 = None t = text result = [] @@ -854,15 +874,17 @@ class Parser(object): def _parse(self, text, thing, pos=[1, 0]): # Parser implementation + logger.debug(f"Parser({self.name})._parse([{type(thing)}]: {repr(text)}, {thing}, {pos})") def update_pos(text, t, pos): # Calculate where we are in the text - if not pos: - return - if text == t: - return - d_text = text[:len(text) - len(t)] - pos[0] += d_text.count("\n") - pos[1] += len(d_text) + old_pos = pos + if pos and text != t: + d_text = text[:len(text) - len(t)] + pos[0] += d_text.count("\n") + pos[1] += len(d_text) + + logger.debug(f"Parser({self.name})._parse.update_pos(" + f"{pos})" if old_pos == pos else f"{old_pos}->{pos})") + try: return self._memory[id(thing)][text] @@ -913,7 +935,7 @@ class Parser(object): result = t, r update_pos(text, t, pos) else: - result = text, syntax_error("expecting " + repr(thing)) + result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'") elif isinstance(thing, (RegEx, _RegEx)): m = thing.match(text) @@ -924,7 +946,7 @@ class Parser(object): update_pos(text, t, pos) else: result = text, syntax_error("expecting match on " - + thing.pattern) + + thing.pattern + f" in '{text}'") elif isinstance(thing, (str, Literal)): if text.startswith(str(thing)): @@ -933,7 +955,7 @@ class Parser(object): result = t, r update_pos(text, t, pos) else: - result = text, syntax_error("expecting " + repr(thing)) + result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'") elif _issubclass(thing, Symbol): m = thing.regex.match(text) @@ -961,7 +983,7 @@ class Parser(object): result = t, r update_pos(text, t, pos) else: - result = text, syntax_error("expecting " + thing.__name__) + result = text, syntax_error("expecting " + thing.__name__ + f" in '{text}'") # non-terminal constructs @@ -1273,6 +1295,7 @@ class Parser(object): GrammarValueError if grammar contains an illegal cardinality value """ + logger.debug(f"Parser({self.name}).compose({thing}, {grammar})") if __debug__: # make sure that we're not having this typing error compose = None -- 2.30.2 From 760fed337b16850746127f45e9de6b4e3b00b85d Mon Sep 17 00:00:00 2001 From: J Morgan Date: Thu, 18 Nov 2021 14:26:46 +0000 Subject: [PATCH 2/5] Add pypeg2.Str, a mutable string line object --- pypeg2/__init__.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pypeg2/__init__.py b/pypeg2/__init__.py index 2ea9ae5..78a6e14 100644 --- a/pypeg2/__init__.py +++ b/pypeg2/__init__.py @@ -236,6 +236,23 @@ class Literal(object): else: return False +class Str: + """A mutable string like object""" + + def __init__(self, value, name=None, **kwargs): + logger.debug(f"New Str({value})") + self.data = str(value) + if name is not None: + self.name = Symbol(name) + for k, v in kwargs: + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + try: + return self.__class__.__name__ + f"(name={self.name}, data={self.data})" + except AttributeError: + return self.__class__.__name__ + f"(data={self.data})" class Plain(object): """A plain object""" -- 2.30.2 From 508c362e15071b3e6bc0a90b5d7e7d78c8c3a55b Mon Sep 17 00:00:00 2001 From: J Morgan Date: Thu, 18 Nov 2021 14:27:45 +0000 Subject: [PATCH 3/5] Fix sample code so that it works without error --- samples/sample1.py | 36 ++++++++++++++++++++---------------- samples/sample2.py | 32 +++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/samples/sample1.py b/samples/sample1.py index 4e812a8..da8cd5e 100644 --- a/samples/sample1.py +++ b/samples/sample1.py @@ -13,31 +13,31 @@ Because function has a name() in its grammar, we can access this now as an attribute. With Python 2.7 this gives Symbol(u'f'), with Python 3.2 it gives Symbol('f'): >>> f.name -Symbol(...'f') +Symbol('f') A Function has an Attribute "parms" in its grammar, which directs to class Parameters. ->>> f.parms -Parameters([(Symbol(...'a'), <__main__.Parameter object at 0x...>), (Symbol(...'b'), <__main__.Parameter object at 0x...>), ]) +>>> f.parms # doctest: +ELLIPSIS +Parameters([(Symbol('a'), Symbol(a[int]) at 0x...), (Symbol('b'), Symbol(b[long]) at 0x...), ]) Because Parameters is a Namespace, we can access its content by name. ->>> f.parms["a"] -<__main__.Parameter object at 0x...> +>>> f.parms["a"] # doctest: +ELLIPSIS +Symbol(a[int]) at 0x... Its content are Parameter instances. Parameter has an Attribute "typing". >>> f.parms["b"].typing -Type(...'long') +Type('long') The Instructions of our small sample are just words. Because Function is a List, we can access them one by one. >>> f -Function([...'do_this', ...'do_that'], name=Symbol(...'f')) +Function(['do_this', 'do_that'], name=Symbol('f')) >>> print("f is " + repr(f[0])) -f is ...'do_this' +f is 'do_this' The result can be composed to a text again. @@ -52,7 +52,7 @@ int f(int a, long b) /* on level 1 */ do_something_else; } -... + pyPEG contains an XML backend, too: @@ -62,13 +62,13 @@ pyPEG contains an XML backend, too: >>> print(xml.decode()) - - + + do_this do_that -... + The XML backend can read XML text and create things: @@ -76,13 +76,13 @@ The XML backend can read XML text and create things: >>> xml = b'return' >>> g = xml2thing(xml, globals()) >>> g.name -Symbol(...'g') +Symbol('g') >>> g.typing -Type(...'long') +Type('long') >>> g.parms["x"].typing -Type(...'int') +Type('int') >>> print("g[0] is " + repr(g[0])) -g[0] is ...'return' +g[0] is 'return' """ from __future__ import unicode_literals, print_function @@ -102,6 +102,10 @@ class Type(Keyword): class Parameter(object): grammar = attr("typing", Type), blank, name() + # We pretty print parameters to remove the class instance name as that can be inconsistent + def __repr__(self): + return f"Symbol({self.name}[{self.typing}]) at 0x{hex(id(self))}" + # A Namespace is a container for named things. # csl() creates the grammar for a comma separated list. diff --git a/samples/sample2.py b/samples/sample2.py index 33519d9..edd80ca 100644 --- a/samples/sample2.py +++ b/samples/sample2.py @@ -5,13 +5,28 @@ Ini file sample (see end of file for the content of the ini file) To parse an ini file we use the grammar below. Comments in ini files are starting with a semicolon ";". +Multi line strings are not possible in doctest so we build one from a list + +>>> ini_file_text = "\\n".join([ +... "[Number 1]", +... "this=something", +... "that=something else", +... "", +... "; now for something even more useless", +... "[Number 2]", +... "once=anything", +... "twice=goes", +... ]) + + + >>> ini_file = parse(ini_file_text, IniFile, comment=(";", restline)) Because IniFile and Section are Namespaces, we can access their content by name. >>> print("found: " + repr(ini_file["Number 1"]["that"])) -found: ...'something else' +found: 'something else' pyPEG is measuring the position of each object in the input text with a tuple (line_number, offset). @@ -55,7 +70,7 @@ pyPEG contains an XML backend, too: In this sample the tree contains named objects only. Then we can output object names as tag names. Spaces in names will be translated into underscores. ->>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) +>>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) # doctest: +SKIP something @@ -79,24 +94,19 @@ import re # symbols in ini files can include spaces Symbol.regex = re.compile(r"[\w\s]+") +# A key is "name = some string" class Key(str): grammar = name(), "=", restline, endl +# Sections start with a name like "[NAME]" and may contain at least one key class Section(Namespace): grammar = "[", name(), "]", endl, maybe_some(Key) +# Ini files have one or more sections class IniFile(Namespace): grammar = some(Section) -if __name__ == "__main__": - ini_file_text = """[Number 1] -this=something -that=something else -; now for something even more useless -[Number 2] -once=anything -twice=goes -""" +if __name__ == "__main__": import doctest doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) -- 2.30.2 From 9b71ffc25f07ab1d2769e0ea5f8e317adfa60802 Mon Sep 17 00:00:00 2001 From: J Morgan Date: Thu, 18 Nov 2021 15:26:41 +0000 Subject: [PATCH 4/5] Remove disutils and replace with setuptools so that "python setup.py develop" works Replace refernce to README.txt with README.md --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d592be3..b271629 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -from distutils.core import setup +from setuptools import setup _version = '2.15.2' @@ -12,7 +12,7 @@ setup( download_url='http://fdik.org/pyPEG2/pyPEG2-' + _version + '.tar.gz', license='LICENSE.txt', description='An intrinsic PEG Parser-Interpreter for Python', - long_description=open('README.txt').read(), + long_description=open('README.md').read(), requires=['lxml',], provides=['pyPEG2 (' + _version + ')',], classifiers=[ -- 2.30.2 From de2e7f0791ea1963e022c36da8c1e3ea8a9a5648 Mon Sep 17 00:00:00 2001 From: jasomorg Date: Thu, 18 Nov 2021 15:59:53 +0000 Subject: [PATCH 5/5] Include fix for XML output formatting being minified in the last two tests Removed needless elipses Replace elipses with Remove doctest: +SKIP (oops) --- pypeg2/xmlast.py | 1 + samples/sample2.py | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pypeg2/xmlast.py b/pypeg2/xmlast.py index b02882f..0145a34 100644 --- a/pypeg2/xmlast.py +++ b/pypeg2/xmlast.py @@ -126,6 +126,7 @@ def thing2xml(thing, pretty=False, object_names=False): if pretty: warnings.warn("lxml is needed for pretty printing", ImportWarning) + etree.indent(tree) return etree.tostring(tree) diff --git a/samples/sample2.py b/samples/sample2.py index edd80ca..5fb1711 100644 --- a/samples/sample2.py +++ b/samples/sample2.py @@ -48,7 +48,7 @@ that=new one once=anything twice=goes [Number 3] -... + pyPEG contains an XML backend, too: @@ -63,14 +63,13 @@ pyPEG contains an XML backend, too: anything goes -
+
-... In this sample the tree contains named objects only. Then we can output object names as tag names. Spaces in names will be translated into underscores. ->>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) # doctest: +SKIP +>>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) something @@ -80,9 +79,8 @@ names as tag names. Spaces in names will be translated into underscores. anything goes - + -... """ from __future__ import unicode_literals, print_function -- 2.30.2