You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1535 lines
51 KiB
Python

"""
pyPEG parsing framework
pyPEG offers a packrat parser as well as a framework to parse and output
languages for Python 2.7 and 3.x, see http://fdik.org/pyPEG2
Copyleft 2012, Volker Birk.
This program is under GNU General Public License 2.0.
"""
from __future__ import unicode_literals
try:
range = xrange
str = unicode
except NameError:
pass
import logging
logger = logging.getLogger("pyPEG2")
__version__ = 2.15
__author__ = "Volker Birk"
__license__ = "This program is under GNU General Public License 2.0."
__url__ = "http://fdik.org/pyPEG"
import re
import sys
try:
maxsize = sys.maxint
except AttributeError:
maxsize = sys.maxsize
import weakref
if __debug__:
import warnings
from types import FunctionType
from collections import namedtuple
try:
from collections import OrderedDict
except ImportError:
from ordereddict import OrderedDict
word = re.compile(r"\w+")
"""Regular expression for scanning a word."""
_RegEx = type(word)
restline = re.compile(r".*")
"""Regular expression for rest of line."""
whitespace = re.compile(r"(?m)\s+")
"""Regular expression for scanning whitespace."""
comment_sh = re.compile(r"\#.*")
"""Shell script style comment."""
comment_cpp = re.compile(r"//.*")
"""C++ style comment."""
comment_c = re.compile(r"(?ms)/\*.*?\*/")
"""C style comment without nesting comments."""
comment_pas = re.compile(r"(?ms)\(\*.*?\*\)")
"""Pascal style comment without nesting comments."""
def _card(n, thing):
# Reduce unnecessary recursions
if len(thing) == 1:
return n, thing[0]
else:
return n, thing
def some(*thing):
"""At least one occurrence of thing, + operator.
Inserts -2 as cardinality before thing.
"""
return _card(-2, thing)
def maybe_some(*thing):
"""No thing or some of them, * operator.
Inserts -1 as cardinality before thing.
"""
return _card(-1, thing)
def optional(*thing):
"""Thing or no thing, ? operator.
Inserts 0 as cardinality before thing.
"""
return _card(0, thing)
def _csl(separator, *thing):
# reduce unnecessary recursions
if len(thing) == 1:
L = [thing[0]]
L.extend(maybe_some(separator, blank, thing[0]))
return tuple(L)
else:
L = list(thing)
L.append(-1)
L2 = [separator, blank]
L2.extend(tuple(thing))
L.append(tuple(L2))
return tuple(L)
try:
# Python 3.x
_exec = eval("exec")
_exec('''
def csl(*thing, separator=","):
"""Generate a grammar for a simple comma separated list."""
return _csl(separator, *thing)
''')
except SyntaxError:
# Python 2.7
def csl(*thing):
"""Generate a grammar for a simple comma separated list."""
return _csl(",", *thing)
def attr(name, thing=word, subtype=None):
"""Generate an Attribute with that name, referencing the thing.
Instance variables:
Class Attribute class generated by namedtuple()
"""
# if __debug__:
# if isinstance(thing, (tuple, list)):
# warnings.warn(type(thing).__name__
# + " not recommended as grammar of attribute "
# + repr(name), SyntaxWarning)
return attr.Class(name, thing, subtype)
attr.Class = namedtuple("Attribute", ("name", "thing", "subtype"))
def flag(name, thing=None):
"""Generate an Attribute with that name which is valued True or False."""
if thing is None:
thing = Keyword(name)
return attr(name, thing, "Flag")
def attributes(grammar, invisible=False):
"""Iterates all attributes of a grammar."""
if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"):
yield grammar
elif type(grammar) == tuple:
for e in grammar:
for a in attributes(e, invisible):
yield a
class Whitespace(str):
grammar = whitespace
class RegEx(object):
"""Regular Expression.
Instance Variables:
regex pre-compiled object from re.compile()
"""
def __init__(self, value, **kwargs):
logger.debug(f"New Regex({value})")
self.regex = re.compile(value, re.U)
self.search = self.regex.search
self.match = self.regex.match
self.split = self.regex.split
self.findall = self.regex.findall
self.finditer = self.regex.finditer
self.sub = self.regex.sub
self.subn = self.regex.subn
self.flags = self.regex.flags
self.groups = self.regex.groups
self.groupindex = self.regex.groupindex
self.pattern = value
for k, v in kwargs.items():
setattr(self, k, v)
def __str__(self):
return self.pattern
def __repr__(self):
result = type(self).__name__ + "(" + repr(self.pattern)
try:
result += ", name=" + repr(self.name)
except:
pass
return result + ")"
class Literal(object):
"""Literal value."""
_basic_types = (bool, int, float, complex, str, bytes, bytearray, list,
tuple, slice, set, frozenset, dict)
def __init__(self, value, **kwargs):
logger.debug(f"New Literal({value})")
if isinstance(self, Literal._basic_types):
pass
else:
self.value = value
for k, v in kwargs.items():
setattr(self, k, v)
def __str__(self):
if isinstance(self, Literal._basic_types):
return super(Literal, self).__str__()
else:
return str(self.value)
def __repr__(self):
if isinstance(self, Literal._basic_types):
return type(self).__name__ + "(" + \
super(Literal, self).__repr__() + ")"
else:
return type(self).__name__ + "(" + repr(self.value) + ")"
def __eq__(self, other):
if isinstance(self, Literal._basic_types):
if type(self) == type(other) and super().__eq__(other):
return True
else:
return False
else:
if type(self) == type(other) and str(self) == str(other):
return True
else:
return False
class Str:
"""A mutable string like object"""
def __init__(self, value, name=None, **kwargs):
logger.debug(f"New Str({value})")
self.data = str(value)
if name is not None:
self.name = Symbol(name)
for k, v in kwargs:
setattr(self, k, v)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
try:
return self.__class__.__name__ + f"(name={self.name}, data={self.data})"
except AttributeError:
return self.__class__.__name__ + f"(data={self.data})"
class Plain(object):
"""A plain object"""
def __init__(self, name=None, **kwargs):
"""Construct a plain object with an optional name and optional other
attributes
"""
logger.debug(f"New Plain({name})")
if name is not None:
self.name = Symbol(name)
for k, v in kwargs:
setattr(self, k, v)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
try:
return self.__class__.__name__ + "(name=" + repr(self.name) + ")"
except AttributeError:
return self.__class__.__name__ + "()"
class List(list):
"""A List of things."""
def __init__(self, *args, **kwargs):
"""Construct a List, and construct its attributes from keyword
arguments.
"""
logger.debug(f"New List({args}, {kwargs})")
_args = []
if len(args) == 1:
if isinstance(args[0], str):
self.append(args[0])
elif isinstance(args[0], (tuple, list)):
for e in args[0]:
if isinstance(e, attr.Class):
setattr(self, e.name, e.value)
else:
_args.append(e)
super(List, self).__init__(_args)
else:
raise ValueError("initializer of List should be collection or string")
else:
for e in args:
if isinstance(e, attr.Class):
setattr(self, e.name, e.value)
else:
_args.append(e)
super(List, self).__init__(_args)
for k, v in kwargs.items():
setattr(self, k, v)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
result = type(self).__name__ + "(" + super(List, self).__repr__()
try:
result += ", name=" + repr(self.name)
except:
pass
return result + ")"
def __eq__(self, other):
return super(List, self).__eq__(list(other))
class _UserDict(object):
# UserDict cannot be used because of metaclass conflicts
def __init__(self, *args, **kwargs):
self.data = dict(*args, **kwargs)
def __len__(self):
return len(self.data)
def __getitem__(self, key):
return self.data[key]
def __setitem__(self, key, value):
self.data[key] = value
def __delitem__(self, key):
del self.data[key]
def __iter__(self):
return self.data.keys()
def __contains__(self, item):
return item in self.data
def items(self):
return self.data.items()
def keys(self):
return self.data.keys()
def values(self):
return self.data.values()
def clear(self):
self.data.clear()
def copy(self):
return self.data.copy()
class Namespace(_UserDict):
"""A dictionary of things, indexed by their name."""
name_by = lambda value: "#" + str(id(value))
def __init__(self, *args, **kwargs):
"""Initialize an OrderedDict containing the data of the Namespace.
Arguments are being put into the Namespace, keyword arguments give the
attributes of the Namespace.
"""
logger.debug(f"New Namespace({args}, {kwargs})")
if args:
self.data = OrderedDict(args)
else:
self.data = OrderedDict()
for k, v in kwargs.items():
setattr(self, k, v)
def __setitem__(self, key, value):
"""x.__setitem__(i, y) <==> x[i]=y"""
if key is None:
name = Symbol(Namespace.name_by(value))
else:
name = Symbol(key)
try:
value.name = name
except AttributeError:
pass
try:
value.namespace
except AttributeError:
try:
value.namespace = weakref.ref(self)
except AttributeError:
pass
else:
if not value.namespace:
value.namespace = weakref.ref(self)
super(Namespace, self).__setitem__(name, value)
def __delitem__(self, key):
"""x.__delitem__(y) <==> del x[y]"""
self[key].namespace = None
super(Namespace, self).__delitem__(key)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
result = type(self).__name__ + "(["
for key, value in self.data.items():
result += "(" + repr(key) + ", " + repr(value) + ")"
result += ", "
result += "]"
try:
result += ", name=" + repr(self.name)
except:
pass
return result + ")"
class Enum(Namespace):
"""A Namespace which is being treated as an Enum.
Enums can only contain Keywords or Symbols."""
def __init__(self, *things, **kwargs):
"""Construct an Enum using a tuple of things."""
logger.debug(f"New Enum({things})")
self.data = OrderedDict()
for thing in things:
if type(thing) == str:
thing = Symbol(thing)
if not isinstance(thing, Symbol):
raise TypeError(repr(thing) + " is not a Symbol")
super(Enum, self).__setitem__(thing, thing)
for k, v in kwargs.items():
setattr(self, k, v)
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
v = [e for e in self.values()]
result = type(self).__name__ + "(" + repr(v)
try:
result += ", name=" + repr(self.name)
except:
pass
return result + ")"
def __setitem__(self, key, value):
"""x.__setitem__(i, y) <==> x[i]=y"""
if not isinstance(value, Keyword) and not isinstance(value, Symbol):
raise TypeError("Enums can only contain Keywords or Symbols")
raise ValueError("Enums cannot be modified after creation.")
class Symbol(str):
r"""Use to scan Symbols.
Class variables:
regex regular expression to scan, default r"\w+"
check_keywords flag if a Symbol is checked for not being a Keyword
default: False
"""
regex = word
check_keywords = False
def __init__(self, name, namespace=None):
"""Construct a Symbol with that name in Namespace namespace.
Raises:
ValueError if check_keywords is True and value is identical to
a Keyword
TypeError if namespace is given and not a Namespace
"""
logger.debug(f"New Symbol({name})")
if Symbol.check_keywords and name in Keyword.table:
raise ValueError(repr(name)
+ " is a Keyword, but is used as a Symbol")
if namespace:
if isinstance(namespace, Namespace):
namespace[name] = self
else:
raise TypeError(repr(namespace) + " is not a Namespace")
else:
self.name = name
self.namespace = None
def __repr__(self):
"""x.__repr__() <==> repr(x)"""
return type(self).__name__ + "(" + str(self).__repr__() + ")"
class Keyword(Symbol):
r"""Use to access the keyword table.
Class variables:
regex regular expression to scan, default r"\w+"
table Namespace with keyword table
"""
regex = word
table = Namespace()
def __init__(self, keyword):
"""Adds keyword to the keyword table."""
if keyword not in Keyword.table:
Keyword.table[keyword] = self
self.name = keyword
K = Keyword
"""Shortcut for Keyword."""
class IKeyword(Keyword):
"""Use for case-insensitive keyword."""
def parse(self, parser, text, pos):
m = type(self).regex.match(text)
if m:
if m.group(0).upper() == str(self).upper():
return text[len(str(self)):], None
else:
return text, SyntaxError("expecting " + repr(self))
else:
return text, SyntaxError("expecting " + repr(self))
IK = IKeyword
"""Shortcut for case-insensitive Keyword."""
class Concat(List):
"""Concatenation of things.
This class exists as a mutable alternative to using a tuple.
"""
def name():
"""Generate a grammar for a symbol with name."""
return attr("name", Symbol)
def ignore(grammar):
"""Ignore what matches to the grammar."""
try:
ignore.serial += 1
except AttributeError:
ignore.serial = 1
return attr("_ignore" + str(ignore.serial), grammar)
def indent(*thing):
"""Indent thing by one level.
Inserts -3 as cardinality before thing.
"""
return _card(-3, thing)
def contiguous(*thing):
"""Disable automated whitespace matching.
Inserts -4 as cardinality before thing.
"""
return _card(-4, thing)
def separated(*thing):
"""Enable automated whitespace matching.
Inserts -5 as cardinality before thing.
"""
return _card(-5, thing)
def omit(*thing):
"""Omit what matches to the grammar."""
return _card(-6, thing)
endl = lambda thing, parser: "\n"
"""End of line marker for composing text."""
blank = lambda thing, parser: " "
"""Space marker for composing text."""
class GrammarError(Exception):
"""Base class for errors in grammars."""
class GrammarTypeError(TypeError, GrammarError):
"""Raised if grammar contains an object of unkown type."""
class GrammarValueError(ValueError, GrammarError):
"""Raised if grammar contains an illegal value."""
def how_many(grammar):
"""Determines the possibly parsed objects of grammar.
Returns:
0 if there will be no objects
1 if there will be a maximum of one object
2 if there can be more than one object
Raises:
GrammarTypeError
if grammar contains an object of unkown type
GrammarValueError
if grammar contains an illegal cardinality value
"""
if grammar is None:
return 0
elif type(grammar) == int:
return grammar
elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)):
return 1
elif isinstance(grammar, (str, Literal)):
return 0
elif isinstance(grammar, attr.Class):
return 0
elif type(grammar) == FunctionType:
return 0
elif isinstance(grammar, (tuple, Concat)):
length, card = 0, 1
for e in grammar:
if type(e) == int:
if e < -6:
raise GrammarValueError(
"illegal cardinality value in grammar: " + str(e))
if e in (-5, -4, -3):
pass
elif e in (-1, -2):
card = 2
elif e == 0:
card = 1
elif e == -6:
return 0
else:
card = min(e, 2)
else:
length += card * how_many(e)
if length >= 2:
return 2
return length
elif isinstance(grammar, list):
m = 0
for e in grammar:
m = max(m, how_many(e))
if m == 2:
return m
return m
elif _issubclass(grammar, object):
return 1
else:
raise GrammarTypeError("grammar contains an illegal type: "
+ type(grammar).__name__ + ": " + repr(grammar))
def parse(text, thing, filename=None, whitespace=whitespace, comment=None,
keep_feeble_things=False):
r"""Parse text following thing as grammar and return the resulting things or
raise an error.
Arguments:
text text to parse
thing grammar for things to parse
filename filename where text is origin from
whitespace regular expression to skip whitespace
default: regex "(?m)\s+"
comment grammar to parse comments
default: None
keep_feeble_things
put whitespace and comments into the .feeble_things
attribute instead of dumping them
Returns generated things.
Raises:
SyntaxError if text does not match the grammar in thing
ValueError if input does not match types
TypeError if output classes have wrong syntax for __init__()
GrammarTypeError
if grammar contains an object of unkown type
GrammarValueError
if grammar contains an illegal cardinality value
"""
logger.debug(f"parse({repr(text)}, {thing})")
parser = Parser()
parser.whitespace = whitespace
parser.comment = comment
parser.text = text
parser.filename = filename
parser.keep_feeble_things = keep_feeble_things
t, r = parser.parse(text, thing)
if t:
raise parser.last_error
return r
def compose(thing, grammar=None, indent=" ", autoblank=True):
"""Compose text using thing with grammar.
Arguments:
thing thing containing other things with grammar
grammar grammar to use to compose thing
default: thing.grammar
indent string to use to indent while composing
default: four spaces
autoblank add blanks if grammar would possibly be
violated otherwise
default: True
Returns text
Raises:
ValueError if input does not match grammar
GrammarTypeError
if grammar contains an object of unkown type
GrammarValueError
if grammar contains an illegal cardinality value
"""
logger.debug(f"compose({thing}, {grammar})")
parser = Parser()
parser.indent = indent
parser.autoblank = autoblank
return parser.compose(thing, grammar)
def _issubclass(obj, cls):
# If obj is not a class, just return False
try:
return issubclass(obj, cls)
except TypeError:
return False
class Parser(object):
r"""Offers parsing and composing capabilities. Implements a Packrat parser.
Instance variables:
whitespace regular expression to scan whitespace
default: "(?m)\s+"
comment grammar to parse comments
last_error syntax error which ended parsing
indent string to use to indent while composing
default: four spaces
indention_level level to indent to
default: 0
text original text to parse; set for decorated syntax
errors
filename filename where text is origin from
autoblank add blanks while composing if grammar would possibly
be violated otherwise
default: True
keep_feeble_things put whitespace and comments into the .feeble_things
attribute instead of dumping them
"""
def __init__(self, name=None):
"""Initialize instance variables to their defaults."""
self.whitespace = whitespace
self.comment = None
self.last_error = None
self.indent = " "
self.indention_level = 0
self.text = None
self.filename = None
self.autoblank = True
self.keep_feeble_things = False
self._memory = {}
self._got_endl = True
self._contiguous = False
self._got_regex = False
self._name = hex(id(self)) if name is None else name
logger.debug(f"New Parser(name={name})")
@property
def name(self):
return self._name
def clear_memory(self, thing=None):
"""Clear cache memory for packrat parsing.
Arguments:
thing thing for which cache memory is cleared,
None if cache memory should be cleared for all
things
"""
if thing is None:
self._memory = {}
else:
try:
del self._memory[id(thing)]
except KeyError:
pass
def parse(self, text, thing, filename=None):
"""(Partially) parse text following thing as grammar and return the
resulting things.
Arguments:
text text to parse
thing grammar for things to parse
filename filename where text is origin from
Returns (text, result) with:
text unparsed text
result generated objects or SyntaxError object
Raises:
ValueError if input does not match types
TypeError if output classes have wrong syntax for __init__()
GrammarTypeError
if grammar contains an object of unkown type
GrammarValueError
if grammar contains an illegal cardinality value
"""
logger.debug(f"Parser({self.name}).parse({repr(text)}, {thing})")
self.text = text
if filename:
self.filename = filename
pos = [1, 0]
t, skip_result = self._skip(text, pos)
t, r = self._parse(t, thing, pos)
if type(r) == SyntaxError:
raise r
else:
if self.keep_feeble_things and skip_result:
try:
r.feeble_things
except AttributeError:
try:
r.feeble_things = skip_result
except AttributeError:
pass
else:
r.feeble_things = skip_result + r.feeble_things
return t, r
def _skip(self, text, pos=None):
# Skip whitespace and comments from input text
logger.debug(f"Parser({self.name})._skip({repr(text)}, {pos})")
t2 = None
t = text
result = []
while t2 != t:
if self.whitespace and not self._contiguous:
t, r = self._parse(t, self.whitespace, pos)
if self.keep_feeble_things and r and not isinstance(r,
SyntaxError):
result.append(r)
t2 = t
if self.comment:
t, r = self._parse(t, self.comment, pos)
if self.keep_feeble_things and r and not isinstance(r,
SyntaxError):
result.append(r)
return t, result
def generate_syntax_error(self, msg, pos):
"""Generate a syntax error construct with
msg string with error message
pos (lineNo, charInText) with positioning information
"""
result = SyntaxError(msg)
if pos:
result.lineno = pos[0]
start = max(pos[1] - 19, 0)
end = min(pos[1] + 20, len(self.text))
result.text = self.text[start:end]
result.offset = pos[1] - start + 1
while "\n" in result.text:
lf = result.text.find("\n")
if lf >= result.offset:
result.text = result.text[:result.offset-1]
break;
else:
L = len(result.text)
result.text = result.text[lf+1:]
result.offset -= L - len(result.text)
if self.filename:
result.filename = self.filename
return result
def _parse(self, text, thing, pos=[1, 0]):
# Parser implementation
logger.debug(f"Parser({self.name})._parse([{type(thing)}]: {repr(text)}, {thing}, {pos})")
def update_pos(text, t, pos):
# Calculate where we are in the text
old_pos = pos
if pos and text != t:
d_text = text[:len(text) - len(t)]
pos[0] += d_text.count("\n")
pos[1] += len(d_text)
logger.debug(f"Parser({self.name})._parse.update_pos(" + f"{pos})" if old_pos == pos else f"{old_pos}->{pos})")
try:
return self._memory[id(thing)][text]
except:
pass
if pos:
current_pos = tuple(pos)
else:
current_pos = None
def syntax_error(msg):
return self.generate_syntax_error(msg, pos)
try:
thing.parse
except AttributeError:
pass
else:
t, r = thing.parse(self, text, pos)
if not isinstance(r, SyntaxError):
t, skip_result = self._skip(t)
update_pos(text, t, pos)
if self.keep_feeble_things:
try:
r.feeble_things
except AttributeError:
try:
r.feeble_things = skip_result
except AttributeError:
pass
else:
r.feeble_things += skip_result
return t, r
skip_result = None
# terminal symbols
if thing is None or type(thing) == FunctionType:
result = text, None
elif isinstance(thing, Symbol):
m = type(thing).regex.match(text)
if m and m.group(0) == str(thing):
t, r = text[len(thing):], None
t, skip_result = self._skip(t)
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'")
elif isinstance(thing, (RegEx, _RegEx)):
m = thing.match(text)
if m:
t, r = text[len(m.group(0)):], m.group(0)
t, skip_result = self._skip(t)
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting match on "
+ thing.pattern + f" in '{text}'")
elif isinstance(thing, (str, Literal)):
if text.startswith(str(thing)):
t, r = text[len(str(thing)):], None
t, skip_result = self._skip(t)
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + repr(thing) + f" in '{text}'")
elif _issubclass(thing, Symbol):
m = thing.regex.match(text)
if m:
result = None
try:
thing.grammar
except AttributeError:
pass
else:
if thing.grammar is None:
pass
elif isinstance(thing.grammar, Enum):
if not m.group(0) in thing.grammar:
result = text, syntax_error(repr(m.group(0))
+ " is not a member of " + repr(thing.grammar))
else:
raise GrammarValueError(
"Symbol " + type(thing).__name__
+ " has a grammar which is not an Enum: "
+ repr(thing.grammar))
if not result:
t, r = text[len(m.group(0)):], thing(m.group(0))
t, skip_result = self._skip(t)
result = t, r
update_pos(text, t, pos)
else:
result = text, syntax_error("expecting " + thing.__name__ + f" in '{text}'")
# non-terminal constructs
elif isinstance(thing, attr.Class):
t, r = self._parse(text, thing.thing, pos)
if type(r) == SyntaxError:
if thing.subtype == "Flag":
result = t, attr(thing.name, False)
else:
result = text, r
else:
if thing.subtype == "Flag":
result = t, attr(thing.name, True)
else:
result = t, attr(thing.name, r)
elif isinstance(thing, (tuple, Concat)):
if self.keep_feeble_things:
L = List()
else:
L = []
t = text
flag = True
_min, _max = 1, 1
contiguous = self._contiguous
omit = False
for e in thing:
if type(e) == int:
if e < -6:
raise GrammarValueError(
"illegal cardinality value in grammar: " + str(e))
if e == -6:
omit = True
elif e == -5:
self._contiguous = False
t, skip_result = self._skip(t)
if self.keep_feeble_things and skip_result:
try:
L.feeble_things
except AttributeError:
try:
L.feeble_things = skip_result
except AttributeError:
pass
else:
L.feeble_things += skip_result
elif e == -4:
self._contiguous = True
elif e == -3:
pass
elif e == -2:
_min, _max = 1, maxsize
elif e == -1:
_min, _max = 0, maxsize
elif e == 0:
_min, _max = 0, 1
else:
_min, _max = e, e
continue
for i in range(_max):
t2, r = self._parse(t, e, pos)
if type(r) == SyntaxError:
i -= 1
break
elif omit:
t = t2
r = None
else:
t = t2
if r is not None:
if type(r) is list:
L.extend(r)
else:
L.append(r)
if i+1 < _min:
if type(r) != SyntaxError:
r = syntax_error("expecting " + str(_min)
+ " occurrence(s) of " + repr(e)
+ " (" + str(i+1) + " found)")
flag = False
break
_min, _max = 1, 1
omit = False
if flag:
if self._contiguous and not contiguous:
self._contiguous = False
t, skip_result = self._skip(t)
if self.keep_feeble_things and skip_result:
try:
L.feeble_things
except AttributeError:
try:
L.feeble_things = skip_result
except AttributeError:
pass
else:
L.feeble_things += skip_result
if len(L) > 1 or how_many(thing) > 1:
result = t, L
elif not L:
if not self.keep_feeble_things:
return t, None
try:
L.feeble_things
except AttributeError:
return t, None
if len(L.feeble_things):
return t, L
else:
return t, None
else:
if self.keep_feeble_things:
try:
L.feeble_things
except AttributeError:
pass
else:
if L.feeble_things:
try:
L[0].feeble_things
except AttributeError:
try:
L[0].feeble_things = L.feeble_things
except AttributeError:
pass
else:
L[0].feeble_things = L.feeble_things + \
L[0].feeble_things
result = t, L[0]
else:
result = text, r
self._contiguous = contiguous
elif isinstance(thing, list):
found = False
for e in thing:
try:
t, r = self._parse(text, e, pos)
if type(r) != SyntaxError:
found = True
break
except GrammarValueError:
raise
except ValueError:
pass
if found:
result = t, r
else:
result = text, syntax_error("expecting one of " + repr(thing))
elif _issubclass(thing, Namespace):
t, r = self._parse(text, thing.grammar, pos)
if type(r) != SyntaxError:
if isinstance(r, thing):
result = t, r
else:
obj = thing()
for e in r:
if type(e) == attr.Class:
setattr(obj, e.name, e.thing)
else:
try:
obj[e.name] = e
except AttributeError:
obj[None] = e
try:
obj.polish()
except AttributeError:
pass
result = t, obj
else:
result = text, r
elif _issubclass(thing, list):
try:
g = thing.grammar
except AttributeError:
g = csl(Symbol)
t, r = self._parse(text, g, pos)
if type(r) != SyntaxError:
if isinstance(r, thing):
result = t, r
else:
obj = thing()
if type(r) == list:
for e in r:
if type(e) == attr.Class:
setattr(obj, e.name, e.thing)
else:
obj.append(e)
else:
if type(r) == attr.Class:
setattr(obj, r.name, r.thing)
else:
obj.append(r)
try:
obj.polish()
except AttributeError:
pass
result = t, obj
else:
result = text, r
elif _issubclass(thing, object):
try:
g = thing.grammar
except AttributeError:
g = word
t, r = self._parse(text, g, pos)
if type(r) != SyntaxError:
if isinstance(r, thing):
result = t, r
else:
try:
if type(r) == list:
L, a = [], []
for e in r:
if type(e) == attr.Class:
a.append(e)
else:
L.append(e)
if L:
lg = how_many(thing.grammar)
if lg == 0:
obj = None
elif lg == 1:
obj = thing(L[0])
else:
obj = thing(L)
else:
obj = thing()
for e in a:
setattr(obj, e.name, e.thing)
else:
if type(r) == attr.Class:
obj = thing()
setattr(obj, r.name, r.thing)
else:
if r is None:
obj = thing()
else:
obj = thing(r)
except TypeError as t:
L = list(t.args)
L[0] = thing.__name__ + ": " + L[0]
t.args = tuple(L)
raise t
try:
obj.polish()
except AttributeError:
pass
result = t, obj
else:
result = text, r
else:
raise GrammarTypeError("in grammar: " + repr(thing))
if pos:
if type(result[1]) == SyntaxError:
pos[0] = current_pos[0]
pos[1] = current_pos[1]
self.last_error = result[1]
else:
try:
result[1].position_in_text = current_pos
except AttributeError:
pass
if self.keep_feeble_things and skip_result:
try:
result[1].feeble_things
except AttributeError:
try:
result[1].feeble_things = skip_result
except AttributeError:
pass
else:
result[1].feeble_things += skip_result
try:
self._memory[id(thing)]
except KeyError:
self._memory[id(thing)] = { text: result }
else:
self._memory[id(thing)][text] = result
return result
def compose(self, thing, grammar=None, attr_of=None):
"""Compose text using thing with grammar.
Arguments:
thing thing containing other things with grammar
grammar grammar to use for composing thing
default: type(thing).grammar
attr_of if composing the value of an attribute, this
is a reference to the thing where this value
is an attribute of; None if this is not an
attribute value
Returns text
Raises:
ValueError if thing does not match grammar
GrammarTypeError
if grammar contains an object of unkown type
GrammarValueError
if grammar contains an illegal cardinality value
"""
logger.debug(f"Parser({self.name}).compose({thing}, {grammar})")
if __debug__:
# make sure that we're not having this typing error
compose = None
def terminal_indent(do_blank=False):
self._got_regex = False
if self._got_endl:
result = self.indent * self.indention_level
self._got_endl = False
return result
elif do_blank and self.whitespace:
if self._contiguous or not self.autoblank:
return ""
else:
return blank(thing, self)
else:
return ""
try:
thing.compose
except AttributeError:
pass
else:
return terminal_indent() + thing.compose(self, attr_of=attr_of)
if not grammar:
try:
grammar = type(thing).grammar
except AttributeError:
if isinstance(thing, Symbol):
grammar = type(thing).regex
elif isinstance(thing, list):
grammar = csl(Symbol)
else:
grammar = word
else:
if isinstance(thing, Symbol):
grammar = type(thing).regex
if grammar is None:
result = ""
elif type(grammar) == FunctionType:
if grammar == endl:
result = endl(thing, self)
self._got_endl = True
elif grammar == blank:
result = terminal_indent() + blank(thing, self)
else:
result = self.compose(thing, grammar(thing, self))
elif isinstance(grammar, (RegEx, _RegEx)):
m = grammar.match(str(thing))
if m:
result = terminal_indent(do_blank=self._got_regex) + str(thing)
else:
raise ValueError(repr(thing) + " does not match "
+ grammar.pattern)
self._got_regex = True
elif isinstance(grammar, Keyword):
result = terminal_indent(do_blank=self._got_regex) + str(grammar)
self._got_regex = True
elif isinstance(grammar, (str, int, Literal)):
result = terminal_indent() + str(grammar)
elif isinstance(grammar, Enum):
if thing in grammar:
if isinstance(thing, Keyword):
result = terminal_indent(do_blank=self._got_regex) + str(thing)
self._got_regex = True
else:
result = terminal_indent() + str(thing)
else:
raise ValueError(repr(thing) + " is not in " + repr(grammar))
elif isinstance(grammar, attr.Class):
if grammar.subtype == "Flag":
if getattr(thing, grammar.name):
result = self.compose(thing, grammar.thing, attr_of=thing)
else:
result = terminal_indent()
else:
result = self.compose(getattr(thing, grammar.name),
grammar.thing, attr_of=thing)
elif isinstance(grammar, (tuple, list)):
def compose_tuple(thing, things, grammar):
text = []
multiple, card = 1, 1
indenting = 0
if isinstance(grammar, (tuple, Concat)):
# concatenation
for g in grammar:
if g is None:
multiple = 1
if self.indenting:
self.indention_level -= indenting
self.indenting = 0
elif type(g) == int:
if g < -6:
raise GrammarValueError(
"illegal cardinality value in grammar: "
+ str(g))
card = g
if g in (-2, -1):
multiple = maxsize
elif g in (-5, -4, -3, 0):
multiple = 1
if g == -3:
self.indention_level += 1
indenting += 1
elif g == -6:
multiple = 0
else:
multiple = g
else:
passes = 0
try:
for r in range(multiple):
if isinstance(g, (str, Symbol, Literal)):
text.append(self.compose(thing, g))
if card < 1:
break
elif isinstance(g, FunctionType):
text.append(self.compose(thing, g))
if card < 1:
break
elif isinstance(g, attr.Class):
text.append(self.compose(getattr(thing,
g.name), g.thing, attr_of=thing))
if card < 1:
break
elif isinstance(g, (tuple, list)):
text.append(compose_tuple(thing, things, g))
if not things:
break
else:
text.append(self.compose(things.pop(), g))
passes += 1
except (IndexError, ValueError):
if card == -2:
if passes < 1:
raise ValueError(repr(g)
+ " has to be there at least once")
elif card > 0:
if passes < multiple:
raise ValueError(repr(g)
+ " has to be there exactly "
+ str(multiple) + " times")
multiple = 1
if indenting:
self.indention_level -= indenting
indenting = 0
return ''.join(text)
else:
# options
for g in grammar:
try:
if isinstance(g, (str, Symbol, Literal)):
return self.compose(thing, g)
elif isinstance(g, FunctionType):
return self.compose(thing, g)
elif isinstance(g, attr.Class):
return self.compose(getattr(thing, g.name), g.thing)
elif isinstance(g, (tuple, list)):
return compose_tuple(thing, things, g)
else:
try:
text = self.compose(things[-1], g)
except Exception as e:
raise e
things.pop()
return text
except GrammarTypeError:
raise
except AttributeError:
pass
except KeyError:
pass
except TypeError:
pass
except ValueError:
pass
raise ValueError("none of the options in " + repr(grammar)
+ " found")
if isinstance(thing, Namespace):
L = [e for e in thing.values()]
L.reverse()
elif isinstance(thing, list):
L = thing[:]
L.reverse()
else:
L = [thing]
result = compose_tuple(thing, L, grammar)
elif _issubclass(grammar, object):
if isinstance(thing, grammar):
try:
grammar.grammar
except AttributeError:
if _issubclass(grammar, Symbol):
result = self.compose(thing, grammar.regex)
else:
result = self.compose(thing)
else:
result = self.compose(thing, grammar.grammar)
else:
if grammar == Symbol and isinstance(thing, str):
result = self.compose(str(thing), Symbol.regex)
else:
raise ValueError(repr(thing) + " is not a " + repr(grammar))
else:
raise GrammarTypeError("in grammar: " + repr(grammar))
return result