|
|
- """
- pyPEG parsing framework
-
- pyPEG offers a packrat parser as well as a framework to parse and output
- languages for Python 3.5+, see http://fdik.org/pyPEG2
-
- Copyleft 2012, Volker Birk.
- This program is under GNU General Public License 2.0.
- """
-
-
- import logging
-
- logger = logging.getLogger("pyPEG2")
-
- __version__ = 2.50
- __author__ = "Volker Birk"
- __license__ = "This program is under GNU General Public License 2.0."
- __url__ = "http://fdik.org/pyPEG"
-
-
- import re
- import sys
- import weakref
- if __debug__:
- import warnings
- from types import FunctionType
- from collections import namedtuple
- from collections import OrderedDict
- from collections import UserString
-
-
- word = re.compile(r"\w+")
- """Regular expression for scanning a word."""
-
- _RegEx = type(word)
-
- restline = re.compile(r".*")
- """Regular expression for rest of line."""
-
- whitespace = re.compile(r"(?m)\s+")
- """Regular expression for scanning whitespace."""
-
- comment_sh = re.compile(r"\#.*")
- """Shell script style comment."""
-
- comment_cpp = re.compile(r"//.*")
- """C++ style comment."""
-
- comment_c = re.compile(r"(?ms)/\*.*?\*/")
- """C style comment without nesting comments."""
-
- comment_pas = re.compile(r"(?ms)\(\*.*?\*\)")
- """Pascal style comment without nesting comments."""
-
-
- def _card(n, thing):
- # Reduce unnecessary recursions
- if len(thing) == 1:
- return n, thing[0]
- else:
- return n, thing
-
-
- def some(*thing):
- """At least one occurrence of thing, + operator.
- Inserts -2 as cardinality before thing.
- """
- return _card(-2, thing)
-
-
- def maybe_some(*thing):
- """No thing or some of them, * operator.
- Inserts -1 as cardinality before thing.
- """
- return _card(-1, thing)
-
-
- def optional(*thing):
- """Thing or no thing, ? operator.
- Inserts 0 as cardinality before thing.
- """
- return _card(0, thing)
-
-
- def _csl(separator, *thing):
- # reduce unnecessary recursions
- if len(thing) == 1:
- L = [thing[0]]
- L.extend(maybe_some(separator, blank, thing[0]))
- return tuple(L)
- else:
- L = list(thing)
- L.append(-1)
- L2 = [separator, blank]
- L2.extend(tuple(thing))
- L.append(tuple(L2))
- return tuple(L)
-
-
- try:
- # Python 3.x
- _exec = eval("exec")
- _exec('''
- def csl(*thing, separator=","):
- """Generate a grammar for a simple comma separated list."""
- return _csl(separator, *thing)
- ''')
- except SyntaxError:
- # Python 2.7
- def csl(*thing):
- """Generate a grammar for a simple comma separated list."""
- return _csl(",", *thing)
-
-
- def attr(name, thing=word, subtype=None):
- """Generate an Attribute with that name, referencing the thing.
-
- Instance variables:
- Class Attribute class generated by namedtuple()
- """
- # if __debug__:
- # if isinstance(thing, (tuple, list)):
- # warnings.warn(type(thing).__name__
- # + " not recommended as grammar of attribute "
- # + repr(name), SyntaxWarning)
- return attr.Class(name, thing, subtype)
-
-
- attr.Class = namedtuple("Attribute", ("name", "thing", "subtype"))
-
-
- def flag(name, thing=None):
- """Generate an Attribute with that name which is valued True or False."""
- if thing is None:
- thing = Keyword(name)
- return attr(name, thing, "Flag")
-
-
- def attributes(grammar, invisible=False):
- """Iterates all attributes of a grammar."""
- if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"):
- yield grammar
- elif type(grammar) == tuple:
- for e in grammar:
- for a in attributes(e, invisible):
- yield a
-
-
- class Whitespace(str):
- grammar = whitespace
-
-
- class RegEx(object):
- """Regular Expression.
-
- Instance Variables:
- regex pre-compiled object from re.compile()
- """
-
- def __init__(self, value, **kwargs):
- logger.debug(f"New Regex({value})")
- self.regex = re.compile(value, re.U)
- self.search = self.regex.search
- self.match = self.regex.match
- self.split = self.regex.split
- self.findall = self.regex.findall
- self.finditer = self.regex.finditer
- self.sub = self.regex.sub
- self.subn = self.regex.subn
- self.flags = self.regex.flags
- self.groups = self.regex.groups
- self.groupindex = self.regex.groupindex
- self.pattern = value
- for k, v in kwargs.items():
- setattr(self, k, v)
-
- def __str__(self):
- return self.pattern
-
- def __repr__(self):
- result = type(self).__name__ + "(" + repr(self.pattern)
- try:
- result += ", name=" + repr(self.name)
- except:
- pass
- return result + ")"
-
-
- class Literal(object):
- """Literal value."""
- _basic_types = (bool, int, float, complex, str, bytes, bytearray, list,
- tuple, slice, set, frozenset, dict)
-
- def __init__(self, value, **kwargs):
- logger.debug(f"New Literal({value})")
- if isinstance(self, Literal._basic_types):
- pass
- else:
- self.value = value
- for k, v in kwargs.items():
- setattr(self, k, v)
-
- def __str__(self):
- if isinstance(self, Literal._basic_types):
- return super(Literal, self).__str__()
- else:
- return str(self.value)
-
- def __repr__(self):
- if isinstance(self, Literal._basic_types):
- return type(self).__name__ + "(" + \
- super(Literal, self).__repr__() + ")"
- else:
- return type(self).__name__ + "(" + repr(self.value) + ")"
-
- def __eq__(self, other):
- if isinstance(self, Literal._basic_types):
- if type(self) == type(other) and super().__eq__(other):
- return True
- else:
- return False
- else:
- if type(self) == type(other) and str(self) == str(other):
- return True
- else:
- return False
-
-
- class Str(UserString):
- """A mutable string like object"""
-
- def __new__(cls, x):
- return super().__new__(cls, x)
-
- def __init__(self, value, name=None, **kwargs):
- super().__init__(self, )
- logger.debug(f"New Str({value})")
- if name is not None:
- self.name = Symbol(name)
- for k, v in kwargs:
- setattr(self, k, v)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- try:
- return self.__class__.__name__ + f"(name={self.name}, data={self.data})"
- except AttributeError:
- return self.__class__.__name__ + f"(data={self.data})"
-
-
- class Plain(object):
- """A plain object"""
-
- def __init__(self, name=None, **kwargs):
- """Construct a plain object with an optional name and optional other
- attributes
- """
- logger.debug(f"New Plain({repr(name)})")
- if name is not None:
- self.name = Symbol(name)
- for k, v in kwargs:
- setattr(self, k, v)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- try:
- return self.__class__.__name__ + "(name=" + repr(self.name) + ")"
- except AttributeError:
- return self.__class__.__name__ + "()"
-
-
- class List(list):
- """A List of things."""
-
- def __init__(self, *args, **kwargs):
- """Construct a List, and construct its attributes from keyword
- arguments.
- """
- logger.debug(f"New List({args}, {kwargs})")
- _args = []
- if len(args) == 1:
- if isinstance(args[0], str):
- self.append(args[0])
- elif isinstance(args[0], (tuple, list)):
- for e in args[0]:
- if isinstance(e, attr.Class):
- setattr(self, e.name, e.value)
- else:
- _args.append(e)
- super(List, self).__init__(_args)
- else:
- raise ValueError("initializer of List should be collection or string")
- else:
- for e in args:
- if isinstance(e, attr.Class):
- setattr(self, e.name, e.value)
- else:
- _args.append(e)
- super(List, self).__init__(_args)
-
- for k, v in kwargs.items():
- setattr(self, k, v)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- result = type(self).__name__ + "(" + super(List, self).__repr__()
- try:
- result += ", name=" + repr(self.name)
- except:
- pass
- return result + ")"
-
- def __eq__(self, other):
- return super(List, self).__eq__(list(other))
-
-
- class _UserDict(object):
- # UserDict cannot be used because of metaclass conflicts
- def __init__(self, *args, **kwargs):
- self.data = dict(*args, **kwargs)
-
- def __len__(self):
- return len(self.data)
-
- def __getitem__(self, key):
- return self.data[key]
-
- def __setitem__(self, key, value):
- self.data[key] = value
-
- def __delitem__(self, key):
- del self.data[key]
-
- def __iter__(self):
- return self.data.keys()
-
- def __contains__(self, item):
- return item in self.data
-
- def items(self):
- return self.data.items()
-
- def keys(self):
- return self.data.keys()
-
- def values(self):
- return self.data.values()
-
- def clear(self):
- self.data.clear()
-
- def copy(self):
- return self.data.copy()
-
-
- class Namespace(_UserDict):
- """A dictionary of things, indexed by their name."""
-
- @staticmethod
- def name_by(value):
- return "#" + str(id(value))
-
- def __init__(self, *args, **kwargs):
- """Initialize an OrderedDict containing the data of the Namespace.
- Arguments are being put into the Namespace, keyword arguments give the
- attributes of the Namespace.
- """
- super().__init__(*args, **kwargs)
- logger.debug(f"New Namespace({args}, {kwargs})")
- if args:
- self.data = OrderedDict(args)
- else:
- self.data = OrderedDict()
- for k, v in kwargs.items():
- setattr(self, k, v)
-
- def __setitem__(self, key, value):
- """x.__setitem__(i, y) <==> x[i]=y"""
- if key is None:
- name = Symbol(Namespace.name_by(value))
- else:
- name = Symbol(key)
- try:
- value.name = name
- except AttributeError:
- pass
- try:
- value.namespace
- except AttributeError:
- try:
- value.namespace = weakref.ref(self)
- except AttributeError:
- pass
- else:
- if not value.namespace:
- value.namespace = weakref.ref(self)
- super(Namespace, self).__setitem__(name, value)
-
- def __delitem__(self, key):
- """x.__delitem__(y) <==> del x[y]"""
- self[key].namespace = None
- super(Namespace, self).__delitem__(key)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- result = type(self).__name__ + "(["
- for key, value in self.data.items():
- result += "(" + repr(key) + ", " + repr(value) + ")"
- result += ", "
- result += "]"
- try:
- result += ", name=" + repr(self.name)
- # BUG(JM): self.name is not an attribute of Namespace
- except NameError:
- pass
- return result + ")"
-
-
- class Enum(Namespace):
- """A Namespace which is being treated as an Enum.
- Enums can only contain Keywords or Symbols."""
-
- def __init__(self, *things, **kwargs):
- """Construct an Enum using a tuple of things."""
- super().__init__(*things, **kwargs)
- logger.debug(f"New Enum({things})")
- self.data = OrderedDict()
- for thing in things:
- if type(thing) == str:
- thing = Symbol(thing)
- if not isinstance(thing, Symbol):
- raise TypeError(repr(thing) + " is not a Symbol")
- super(Enum, self).__setitem__(thing, thing)
- for k, v in kwargs.items():
- setattr(self, k, v)
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- v = [e for e in self.values()]
- result = type(self).__name__ + "(" + repr(v)
- try:
- result += ", name=" + repr(self.name)
- # BUG(JM): self.name is not an attribute of Namespace
- except NameError:
- pass
- return result + ")"
-
- def __setitem__(self, key, value):
- """x.__setitem__(i, y) <==> x[i]=y"""
- if not isinstance(value, Keyword) and not isinstance(value, Symbol):
- raise TypeError("Enums can only contain Keywords or Symbols")
- raise ValueError("Enums cannot be modified after creation.")
-
-
- class Symbol(str):
- r"""Use to scan Symbols.
-
- Class variables:
- regex regular expression to scan, default r"\w+"
- check_keywords flag if a Symbol is checked for not being a Keyword
- default: False
- """
-
- regex = word
- check_keywords = False
-
- def __init__(self, name, namespace=None):
- """Construct a Symbol with that name in Namespace namespace.
-
- Raises:
- ValueError if check_keywords is True and value is identical to
- a Keyword
- TypeError if namespace is given and not a Namespace
- """
-
- logger.debug(f"New Symbol({repr(name)})")
- if Symbol.check_keywords and name in Keyword.table:
- raise ValueError(repr(name)
- + " is a Keyword, but is used as a Symbol")
- if namespace:
- if isinstance(namespace, Namespace):
- namespace[name] = self
- else:
- raise TypeError(repr(namespace) + " is not a Namespace")
- else:
- self.name = name
- self.namespace = None
-
- def __repr__(self):
- """x.__repr__() <==> repr(x)"""
- return type(self).__name__ + "(" + str(self).__repr__() + ")"
-
-
- class Keyword(Symbol):
- r"""Use to access the keyword table.
-
- Class variables:
- regex regular expression to scan, default r"\w+"
- table Namespace with keyword table
- """
-
- regex = word
- table = Namespace()
-
- def __init__(self, keyword):
- """Adds keyword to the keyword table."""
- super().__init__(keyword)
- if keyword not in Keyword.table:
- Keyword.table[keyword] = self
- self.name = keyword
-
- K = Keyword
- """Shortcut for Keyword."""
-
-
- class IKeyword(Keyword):
- """Use for case-insensitive keyword."""
-
- def parse(self, parser, text, pos):
- m = type(self).regex.match(text)
- if m:
- if m.group(0).upper() == str(self).upper():
- return text[len(str(self)):], None
- else:
- return text, SyntaxError("expecting " + repr(self))
- else:
- return text, SyntaxError("expecting " + repr(self))
-
- IK = IKeyword
- """Shortcut for case-insensitive Keyword."""
-
-
- class Concat(List):
- """Concatenation of things.
-
- This class exists as a mutable alternative to using a tuple.
- """
- pass
-
-
- def name():
- """Generate a grammar for a symbol with name."""
- return attr("name", Symbol)
-
-
- def ignore(grammar):
- """Ignore what matches to the grammar."""
- try:
- ignore.serial += 1
- except AttributeError:
- ignore.serial = 1
- return attr("_ignore" + str(ignore.serial), grammar)
-
-
- def indent(*thing):
- """Indent thing by one level.
- Inserts -3 as cardinality before thing.
- """
- return _card(-3, thing)
-
-
- def contiguous(*thing):
- """Disable automated whitespace matching.
- Inserts -4 as cardinality before thing.
- """
- return _card(-4, thing)
-
-
- def separated(*thing):
- """Enable automated whitespace matching.
- Inserts -5 as cardinality before thing.
- """
- return _card(-5, thing)
-
-
- def omit(*thing):
- """Omit what matches to the grammar."""
- return _card(-6, thing)
-
-
- def endl(thing, parser):
- """End of line marker for composing text."""
- return "\n"
-
-
- def blank(thing, parser):
- """Space marker for composing text."""
- return " "
-
-
-
- class GrammarError(Exception):
- """Base class for errors in grammars."""
-
-
- class GrammarTypeError(TypeError, GrammarError):
- """Raised if grammar contains an object of unkown type."""
-
-
- class GrammarValueError(ValueError, GrammarError):
- """Raised if grammar contains an illegal value."""
-
-
- def how_many(grammar):
- """Determines the possibly parsed objects of grammar.
-
- Returns:
- 0 if there will be no objects
- 1 if there will be a maximum of one object
- 2 if there can be more than one object
-
- Raises:
- GrammarTypeError
- if grammar contains an object of unkown type
- GrammarValueError
- if grammar contains an illegal cardinality value
- """
-
- if grammar is None:
- return 0
-
- elif type(grammar) == int:
- return grammar
-
- elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)):
- return 1
-
- elif isinstance(grammar, (str, Literal)):
- return 0
-
- elif isinstance(grammar, attr.Class):
- return 0
-
- elif type(grammar) == FunctionType:
- return 0
-
- elif isinstance(grammar, (tuple, Concat)):
- length, card = 0, 1
- for e in grammar:
- if type(e) == int:
- if e < -6:
- raise GrammarValueError("illegal cardinality value in grammar: " + str(e))
- if e in (-5, -4, -3):
- pass
- elif e in (-1, -2):
- card = 2
- elif e == 0:
- card = 1
- elif e == -6:
- return 0
- else:
- card = min(e, 2)
- else:
- length += card * how_many(e)
- if length >= 2:
- return 2
- return length
-
- elif isinstance(grammar, list):
- m = 0
- for e in grammar:
- m = max(m, how_many(e))
- if m == 2:
- return m
- return m
-
- elif _issubclass(grammar, object):
- return 1
-
- else:
- err = f"grammar contains an illegal type: {type(grammar).__name__}: {repr(grammar)}"
- raise GrammarTypeError(err)
-
-
- def parse(text, thing, filename=None, whitespace=whitespace, comment=None,
- keep_feeble_things=False, name=None):
- r"""Parse text following thing as grammar and return the resulting things or
- raise an error.
-
- Arguments:
- text text to parse
- thing grammar for things to parse
- filename filename where text is origin from
- whitespace regular expression to skip whitespace
- default: regex "(?m)\s+"
- comment grammar to parse comments
- default: None
- keep_feeble_things
- put whitespace and comments into the .feeble_things
- attribute instead of dumping them
-
- Returns generated things.
-
- Raises:
- SyntaxError if text does not match the grammar in thing
- ValueError if input does not match types
- TypeError if output classes have wrong syntax for __init__()
- GrammarTypeError
- if grammar contains an object of unkown type
- GrammarValueError
- if grammar contains an illegal cardinality value
- """
-
- logger.debug(f"parse({repr(text)}, {thing})")
- parser = Parser(name=name)
- parser.whitespace = whitespace
- parser.comment = comment
- parser.text = text
- parser.filename = filename
- parser.keep_feeble_things = keep_feeble_things
-
- t, r = parser.parse(text, thing)
- if t:
- raise parser.last_error
- return r
-
-
- def compose(thing, grammar=None, indent=" ", autoblank=True):
- """Compose text using thing with grammar.
-
- Arguments:
- thing thing containing other things with grammar
- grammar grammar to use to compose thing
- default: thing.grammar
- indent string to use to indent while composing
- default: four spaces
- autoblank add blanks if grammar would possibly be
- violated otherwise
- default: True
-
- Returns text
-
- Raises:
- ValueError if input does not match grammar
- GrammarTypeError
- if grammar contains an object of unkown type
- GrammarValueError
- if grammar contains an illegal cardinality value
- """
-
- logger.debug(f"compose({thing}, {grammar})")
- parser = Parser()
- parser.indent = indent
- parser.autoblank = autoblank
- return parser.compose(thing, grammar)
-
-
- def _issubclass(obj, cls):
- # If obj is not a class, just return False
- try:
- return issubclass(obj, cls)
- except TypeError:
- return False
-
-
- class Parser(object):
- r"""Offers parsing and composing capabilities. Implements a Packrat parser.
-
- Instance variables:
- whitespace regular expression to scan whitespace
- default: "(?m)\s+"
- comment grammar to parse comments
- last_error syntax error which ended parsing
- indent string to use to indent while composing
- default: four spaces
- indention_level level to indent to
- default: 0
- text original text to parse; set for decorated syntax
- errors
- filename filename where text is origin from
- autoblank add blanks while composing if grammar would possibly
- be violated otherwise
- default: True
- keep_feeble_things put whitespace and comments into the .feeble_things
- attribute instead of dumping them
- """
-
- def __init__(self, name=None):
- """Initialize instance variables to their defaults."""
- self.whitespace = whitespace
- self.comment = None
- self.last_error = None
- self.indent = " "
- self.indention_level = 0
- self.text = None
- self.filename = None
- self.autoblank = True
- self.keep_feeble_things = False
- self._memory = {}
- self._got_endl = True
- self._contiguous = False
- self._got_regex = False
- self._name = hex(id(self)) if name is None else name
- logger.debug(f"New Parser(name={name})")
-
- @property
- def name(self):
- return self._name
-
- def clear_memory(self, thing=None):
- """Clear cache memory for packrat parsing.
-
- Arguments:
- thing thing for which cache memory is cleared,
- None if cache memory should be cleared for all
- things
- """
-
- if thing is None:
- self._memory = {}
- else:
- try:
- del self._memory[id(thing)]
- except KeyError:
- pass
-
- def parse(self, text, thing, filename=None):
- """(Partially) parse text following thing as grammar and return the
- resulting things.
-
- Arguments:
- text text to parse
- thing grammar for things to parse
- filename filename where text is origin from
-
- Returns (text, result) with:
- text unparsed text
- result generated objects or SyntaxError object
-
- Raises:
- ValueError if input does not match types
- TypeError if output classes have wrong syntax for __init__()
- GrammarTypeError
- if grammar contains an object of unkown type
- GrammarValueError
- if grammar contains an illegal cardinality value
- """
-
- logger.debug(f"Parser({self.name}).parse({repr(text)}, {thing})")
- self.text = text
- if filename:
- self.filename = filename
- pos = [1, 0]
- t, skip_result = self._skip(text, pos)
- t, r = self._parse(t, thing, pos)
- if type(r) == SyntaxError:
- raise r
- else:
- if self.keep_feeble_things and skip_result:
- try:
- r.feeble_things
- except AttributeError:
- try:
- r.feeble_things = skip_result
- except AttributeError:
- pass
- else:
- r.feeble_things = skip_result + r.feeble_things
- return t, r
-
- def _skip(self, text, pos=None):
- # Skip whitespace and comments from input text
- logger.debug(f"Parser({self.name})._skip({repr(text)}, {pos})")
- t2 = None
- t = text
- result = []
- while t2 != t:
- if self.whitespace and not self._contiguous:
- t, r = self._parse(t, self.whitespace, pos)
- if self.keep_feeble_things and r and not isinstance(r,
- SyntaxError):
- result.append(r)
- t2 = t
- if self.comment:
- t, r = self._parse(t, self.comment, pos)
- if self.keep_feeble_things and r and not isinstance(r,
- SyntaxError):
- result.append(r)
- return t, result
-
- def generate_syntax_error(self, msg, pos):
- """Generate a syntax error construct with
-
- msg string with error message
- pos (lineNo, charInText) with positioning information
- """
-
- result = SyntaxError(msg)
- if pos:
- result.lineno = pos[0]
- start = max(pos[1] - 19, 0)
- end = min(pos[1] + 20, len(self.text))
- result.text = self.text[start:end]
- result.offset = pos[1] - start + 1
- while "\n" in result.text:
- lf = result.text.find("\n")
- if lf >= result.offset:
- result.text = result.text[:result.offset-1]
- break;
- else:
- L = len(result.text)
- result.text = result.text[lf+1:]
- result.offset -= L - len(result.text)
- if self.filename:
- result.filename = self.filename
- return result
-
- def _parse(self, text, thing, pos=[1, 0]):
- # Parser implementation
-
- logger.debug(f"Parser({self.name})._parse([{type(thing)}]: {repr(text)}, {thing}, {pos})")
-
- def update_pos(text, t, pos):
- # Calculate where we are in the text
- old_pos = pos
- if pos and text != t:
- d_text = text[:len(text) - len(t)]
- pos[0] += d_text.count("\n")
- pos[1] += len(d_text)
-
- logger.debug(f"Parser({self.name})._parse.update_pos(" + f"{pos})" if old_pos == pos else f"{old_pos}->{pos})")
-
- try:
- ret = self._memory[id(thing)][text]
- logger.debug(f"Parser({self.name})._parse() -> cached ret: {repr(ret)}")
- return ret
- except (IndexError, KeyError):
- pass
-
- if pos:
- current_pos = tuple(pos)
- else:
- current_pos = None
-
- def syntax_error(msg, thing=None):
- # Not all error propagate, we log all of them
- if thing is not None:
- type_of_thing = ""
- else:
- type_of_thing = type(thing)
- logger.error(f"Syntax Error: Parser({self.name})._parse({type_of_thing}): {repr(msg)})")
- return self.generate_syntax_error(msg, pos)
-
- try:
- thing.parse
- except AttributeError:
- pass
- else:
- t, r = thing.parse(self, text, pos)
- if not isinstance(r, SyntaxError):
- t, skip_result = self._skip(t)
- update_pos(text, t, pos)
- if self.keep_feeble_things:
- try:
- r.feeble_things
- except AttributeError:
- try:
- r.feeble_things = skip_result
- except AttributeError:
- pass
- else:
- r.feeble_things += skip_result
- return t, r
-
- skip_result = None
-
- # terminal symbols
-
- if thing is None or type(thing) == FunctionType:
- result = text, None
- #
- elif isinstance(thing, Symbol):
- m = type(thing).regex.match(text)
- if m and m.group(0) == str(thing):
- t, r = text[len(thing):], None
- t, skip_result = self._skip(t)
- result = t, r
- update_pos(text, t, pos)
- else:
- err = "expecting " + repr(thing) + f" in '{text}'"
- result = text, syntax_error(err, thing)
- #
- elif isinstance(thing, (RegEx, _RegEx)):
- m = thing.match(text)
- if m:
- t, r = text[len(m.group(0)):], m.group(0)
- t, skip_result = self._skip(t)
- result = t, r
- update_pos(text, t, pos)
- else:
- err = "expecting match on " + thing.pattern + f" in '{text}'"
- result = text, syntax_error(err, thing)
- #
- elif isinstance(thing, (str, Literal)):
- if text.startswith(str(thing)):
- t, r = text[len(str(thing)):], None
- t, skip_result = self._skip(t)
- result = t, r
- update_pos(text, t, pos)
- else:
- err = "expecting " + repr(thing) + f" in '{text}'"
- result = text, syntax_error(err, thing)
- #
- elif _issubclass(thing, (Symbol, Str)):
- m = thing.regex.match(text)
- if m:
- result = None
- try:
- thing.grammar
- except AttributeError:
- pass
- else:
- if thing.grammar is None:
- pass
- elif isinstance(thing.grammar, Enum):
- if not m.group(0) in thing.grammar:
- result = text, syntax_error(repr(m.group(0))
- + " is not a member of " + repr(thing.grammar))
- else:
- raise GrammarValueError(
- "Symbol " + type(thing).__name__
- + " has a grammar which is not an Enum: "
- + repr(thing.grammar))
- if not result:
- t, r = text[len(m.group(0)):], thing(m.group(0))
- t, skip_result = self._skip(t)
- result = t, r
- update_pos(text, t, pos)
- else:
- err = "expecting " + thing.__name__ + f" in '{text}'"
- result = text, syntax_error(err, thing)
- #
- # non-terminal constructs
- #
- elif isinstance(thing, attr.Class):
- t, r = self._parse(text, thing.thing, pos)
- if type(r) == SyntaxError:
- if thing.subtype == "Flag":
- result = t, attr(thing.name, False)
- else:
- result = text, r
- else:
- if thing.subtype == "Flag":
- result = t, attr(thing.name, True)
- else:
- result = t, attr(thing.name, r)
- #
- elif isinstance(thing, (tuple, Concat)):
- if self.keep_feeble_things:
- L = List()
- else:
- L = []
- t = text
- flag = True
- _min, _max = 1, 1
- contiguous = self._contiguous
- omit = False
- for e in thing:
- if type(e) == int:
- if e < -6:
- raise GrammarValueError(
- "illegal cardinality value in grammar: " + str(e))
- if e == -6:
- omit = True
- elif e == -5:
- self._contiguous = False
- t, skip_result = self._skip(t)
- if self.keep_feeble_things and skip_result:
- try:
- L.feeble_things
- except AttributeError:
- try:
- L.feeble_things = skip_result
- except AttributeError:
- pass
- else:
- L.feeble_things += skip_result
- elif e == -4:
- self._contiguous = True
- elif e == -3:
- pass
- elif e == -2:
- _min, _max = 1, sys.maxsize
- elif e == -1:
- _min, _max = 0, sys.maxsize
- elif e == 0:
- _min, _max = 0, 1
- else:
- _min, _max = e, e
- continue
- for i in range(_max):
- t2, r = self._parse(t, e, pos)
- if type(r) == SyntaxError:
- i -= 1
- break
- elif omit:
- t = t2
- r = None
- else:
- t = t2
- if r is not None:
- if type(r) is list:
- L.extend(r)
- else:
- L.append(r)
- if i+1 < _min:
- if type(r) != SyntaxError:
- r = syntax_error("expecting " + str(_min)
- + " occurrence(s) of " + repr(e)
- + " (" + str(i+1) + " found)")
- flag = False
- break
- _min, _max = 1, 1
- omit = False
- if flag:
- if self._contiguous and not contiguous:
- self._contiguous = False
- t, skip_result = self._skip(t)
- if self.keep_feeble_things and skip_result:
- try:
- L.feeble_things
- except AttributeError:
- try:
- L.feeble_things = skip_result
- except AttributeError:
- pass
- else:
- L.feeble_things += skip_result
- if len(L) > 1 or how_many(thing) > 1:
- result = t, L
- elif not L:
- if not self.keep_feeble_things:
- return t, None
- try:
- L.feeble_things
- except AttributeError:
- return t, None
- if len(L.feeble_things):
- return t, L
- else:
- return t, None
- else:
- if self.keep_feeble_things:
- try:
- L.feeble_things
- except AttributeError:
- pass
- else:
- if L.feeble_things:
- try:
- L[0].feeble_things
- except AttributeError:
- try:
- L[0].feeble_things = L.feeble_things
- except AttributeError:
- pass
- else:
- L[0].feeble_things = L.feeble_things + \
- L[0].feeble_things
- result = t, L[0]
- else:
- result = text, r
- self._contiguous = contiguous
- #
- elif isinstance(thing, list):
- found = False
- for e in thing:
- try:
- t, r = self._parse(text, e, pos)
- if type(r) != SyntaxError:
- found = True
- break
- except GrammarValueError:
- raise
- except ValueError:
- pass
- if found:
- result = t, r
- else:
- result = text, syntax_error("expecting one of " + repr(thing))
- #
- elif _issubclass(thing, Namespace):
- t, r = self._parse(text, thing.grammar, pos)
- if type(r) != SyntaxError:
- if isinstance(r, thing):
- result = t, r
- else:
- obj = thing()
- for e in r:
- if type(e) == attr.Class:
- setattr(obj, e.name, e.thing)
- else:
- try:
- obj[e.name] = e
- except AttributeError:
- obj[None] = e
-
- try:
- obj.polish()
- except AttributeError:
- pass
- result = t, obj
- else:
- result = text, r
- #
- elif _issubclass(thing, list):
- try:
- g = thing.grammar
- except AttributeError:
- g = csl(Symbol)
- t, r = self._parse(text, g, pos)
- if type(r) != SyntaxError:
- if isinstance(r, thing):
- result = t, r
- else:
- obj = thing()
- if type(r) == list:
- for e in r:
- if type(e) == attr.Class:
- setattr(obj, e.name, e.thing)
- else:
- obj.append(e)
- else:
- if type(r) == attr.Class:
- setattr(obj, r.name, r.thing)
- else:
- obj.append(r)
- try:
- obj.polish()
- except AttributeError:
- pass
- result = t, obj
- else:
- result = text, r
- #
- elif _issubclass(thing, object):
- try:
- g = thing.grammar
- except AttributeError:
- g = word
- t, r = self._parse(text, g, pos)
- if type(r) != SyntaxError:
- if isinstance(r, thing):
- result = t, r
- else:
- try:
- if type(r) == list:
- L, a = [], []
- for e in r:
- if type(e) == attr.Class:
- a.append(e)
- else:
- L.append(e)
- if L:
- lg = how_many(thing.grammar)
- if lg == 0:
- obj = None
- elif lg == 1:
- obj = thing(L[0])
- else:
- obj = thing(L)
- else:
- obj = thing()
- for e in a:
- setattr(obj, e.name, e.thing)
- else:
- if type(r) == attr.Class:
- obj = thing()
- setattr(obj, r.name, r.thing)
- else:
- if r is None:
- obj = thing()
- else:
- obj = thing(r)
- except TypeError as t:
- L = list(t.args)
- L[0] = thing.__name__ + ": " + L[0]
- t.args = tuple(L)
- raise t
- try:
- obj.polish()
- except AttributeError:
- pass
- result = t, obj
- else:
- result = text, r
- #
- else:
- raise GrammarTypeError("in grammar: " + repr(thing))
-
- logger.debug(f"Parser({self.name}).parse() result: {repr(result)}")
-
- if pos:
- if type(result[1]) == SyntaxError:
- pos[0] = current_pos[0]
- pos[1] = current_pos[1]
- self.last_error = result[1]
- else:
- try:
- result[1].position_in_text = current_pos
- except AttributeError:
- pass
-
- if self.keep_feeble_things and skip_result:
- try:
- result[1].feeble_things
- except AttributeError:
- try:
- result[1].feeble_things = skip_result
- except AttributeError:
- pass
- else:
- result[1].feeble_things += skip_result
-
- try:
- self._memory[id(thing)]
- except KeyError:
- self._memory[id(thing)] = { text: result }
- else:
- self._memory[id(thing)][text] = result
-
- return result
-
- def compose(self, thing, grammar=None, attr_of=None):
- """Compose text using thing with grammar.
-
- Arguments:
- thing thing containing other things with grammar
- grammar grammar to use for composing thing
- default: type(thing).grammar
- attr_of if composing the value of an attribute, this
- is a reference to the thing where this value
- is an attribute of; None if this is not an
- attribute value
-
- Returns text
-
- Raises:
- ValueError if thing does not match grammar
- GrammarTypeError
- if grammar contains an object of unkown type
- GrammarValueError
- if grammar contains an illegal cardinality value
- """
- logger.debug(f"Parser({self.name}).compose({thing}, {grammar})")
- if __debug__:
- # make sure that we're not having this typing error
- compose = None
-
- def terminal_indent(do_blank=False):
- self._got_regex = False
- if self._got_endl:
- result = self.indent * self.indention_level
- self._got_endl = False
- return result
- elif do_blank and self.whitespace:
- if self._contiguous or not self.autoblank:
- return ""
- else:
- return blank(thing, self)
- else:
- return ""
-
- try:
- thing.compose
- except AttributeError:
- pass
- else:
- return terminal_indent() + thing.compose(self, attr_of=attr_of)
-
- if not grammar:
- try:
- grammar = type(thing).grammar
- except AttributeError:
- if isinstance(thing, Symbol):
- grammar = type(thing).regex
- elif isinstance(thing, list):
- grammar = csl(Symbol)
- else:
- grammar = word
- else:
- if isinstance(thing, Symbol):
- grammar = type(thing).regex
-
- if grammar is None:
- result = ""
- #
- elif type(grammar) == FunctionType:
- if grammar == endl:
- result = endl(thing, self)
- self._got_endl = True
- elif grammar == blank:
- result = terminal_indent() + blank(thing, self)
- else:
- result = self.compose(thing, grammar(thing, self))
- #
- elif isinstance(grammar, (RegEx, _RegEx)):
- m = grammar.match(str(thing))
- if m:
- result = terminal_indent(do_blank=self._got_regex) + str(thing)
- else:
- raise ValueError(repr(thing) + " does not match "
- + grammar.pattern)
- self._got_regex = True
- #
- elif isinstance(grammar, Keyword):
- result = terminal_indent(do_blank=self._got_regex) + str(grammar)
- self._got_regex = True
- #
- elif isinstance(grammar, (str, int, Literal)):
- result = terminal_indent() + str(grammar)
- #
- elif isinstance(grammar, Enum):
- if thing in grammar:
- if isinstance(thing, Keyword):
- result = terminal_indent(do_blank=self._got_regex) + str(thing)
- self._got_regex = True
- else:
- result = terminal_indent() + str(thing)
- else:
- raise ValueError(repr(thing) + " is not in " + repr(grammar))
- #
- elif isinstance(grammar, attr.Class):
- if grammar.subtype == "Flag":
- if getattr(thing, grammar.name):
- result = self.compose(thing, grammar.thing, attr_of=thing)
- else:
- result = terminal_indent()
- else:
- result = self.compose(getattr(thing, grammar.name),
- grammar.thing, attr_of=thing)
- #
- elif isinstance(grammar, (tuple, list)):
- def compose_tuple(thing, things, grammar):
- text = []
- multiple, card = 1, 1
- indenting = 0
- if isinstance(grammar, (tuple, Concat)):
- # concatenation
- for g in grammar:
- if g is None:
- multiple = 1
- if self.indenting:
- self.indention_level -= indenting
- self.indenting = 0
- elif type(g) == int:
- if g < -6:
- raise GrammarValueError(
- "illegal cardinality value in grammar: "
- + str(g))
- card = g
- if g in (-2, -1):
- multiple = sys.maxsize
- elif g in (-5, -4, -3, 0):
- multiple = 1
- if g == -3:
- self.indention_level += 1
- indenting += 1
- elif g == -6:
- multiple = 0
- else:
- multiple = g
- else:
- passes = 0
- try:
- for r in range(multiple):
- if isinstance(g, (str, Symbol, Literal)):
- text.append(self.compose(thing, g))
- if card < 1:
- break
- elif isinstance(g, FunctionType):
- text.append(self.compose(thing, g))
- if card < 1:
- break
- elif isinstance(g, attr.Class):
- text.append(self.compose(getattr(thing,
- g.name), g.thing, attr_of=thing))
- if card < 1:
- break
- elif isinstance(g, (tuple, list)):
- text.append(compose_tuple(thing, things, g))
- if not things:
- break
- else:
- text.append(self.compose(things.pop(), g))
- passes += 1
- except (IndexError, ValueError):
- if card == -2:
- if passes < 1:
- raise ValueError(repr(g)
- + " has to be there at least once")
- elif card > 0:
- if passes < multiple:
- raise ValueError(repr(g)
- + " has to be there exactly "
- + str(multiple) + " times")
- multiple = 1
- if indenting:
- self.indention_level -= indenting
- indenting = 0
- return ''.join(text)
- else:
- # options
- for g in grammar:
- try:
- if isinstance(g, (str, Symbol, Literal)):
- return self.compose(thing, g)
- elif isinstance(g, FunctionType):
- return self.compose(thing, g)
- elif isinstance(g, attr.Class):
- return self.compose(getattr(thing, g.name), g.thing)
- elif isinstance(g, (tuple, list)):
- return compose_tuple(thing, things, g)
- else:
- try:
- text = self.compose(things[-1], g)
- except Exception as e:
- raise e
- things.pop()
- return text
- except GrammarTypeError:
- raise
- except AttributeError:
- pass
- except KeyError:
- pass
- except TypeError:
- pass
- except ValueError:
- pass
- raise ValueError("none of the options in " + repr(grammar)
- + " found")
-
- if isinstance(thing, Namespace):
- L = [e for e in thing.values()]
- L.reverse()
- elif isinstance(thing, list):
- L = thing[:]
- L.reverse()
- else:
- L = [thing]
- result = compose_tuple(thing, L, grammar)
- #
- elif _issubclass(grammar, object):
- if isinstance(thing, grammar):
- try:
- grammar.grammar
- except AttributeError:
- if _issubclass(grammar, Symbol):
- result = self.compose(thing, grammar.regex)
- else:
- result = self.compose(thing)
- else:
- result = self.compose(thing, grammar.grammar)
- else:
- if grammar == Symbol and isinstance(thing, str):
- result = self.compose(str(thing), Symbol.regex)
- else:
- raise ValueError(repr(thing) + " is not a " + repr(grammar))
- #
- else:
- raise GrammarTypeError("in grammar: " + repr(grammar))
-
- return result
|