Parser/Composer library for Python
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1565 lines
52 KiB

  1. """
  2. pyPEG parsing framework
  3. pyPEG offers a packrat parser as well as a framework to parse and output
  4. languages for Python 3.5+, see http://fdik.org/pyPEG2
  5. Copyleft 2012, Volker Birk.
  6. This program is under GNU General Public License 2.0.
  7. """
  8. import logging
  9. logger = logging.getLogger("pyPEG2")
  10. __version__ = 2.50
  11. __author__ = "Volker Birk"
  12. __license__ = "This program is under GNU General Public License 2.0."
  13. __url__ = "http://fdik.org/pyPEG"
  14. import re
  15. import sys
  16. import weakref
  17. if __debug__:
  18. import warnings
  19. from types import FunctionType
  20. from collections import namedtuple
  21. from collections import OrderedDict
  22. from collections import UserString
  23. word = re.compile(r"\w+")
  24. """Regular expression for scanning a word."""
  25. _RegEx = type(word)
  26. restline = re.compile(r".*")
  27. """Regular expression for rest of line."""
  28. whitespace = re.compile(r"(?m)\s+")
  29. """Regular expression for scanning whitespace."""
  30. comment_sh = re.compile(r"\#.*")
  31. """Shell script style comment."""
  32. comment_cpp = re.compile(r"//.*")
  33. """C++ style comment."""
  34. comment_c = re.compile(r"(?ms)/\*.*?\*/")
  35. """C style comment without nesting comments."""
  36. comment_pas = re.compile(r"(?ms)\(\*.*?\*\)")
  37. """Pascal style comment without nesting comments."""
  38. def _card(n, thing):
  39. # Reduce unnecessary recursions
  40. if len(thing) == 1:
  41. return n, thing[0]
  42. else:
  43. return n, thing
  44. def some(*thing):
  45. """At least one occurrence of thing, + operator.
  46. Inserts -2 as cardinality before thing.
  47. """
  48. return _card(-2, thing)
  49. def maybe_some(*thing):
  50. """No thing or some of them, * operator.
  51. Inserts -1 as cardinality before thing.
  52. """
  53. return _card(-1, thing)
  54. def optional(*thing):
  55. """Thing or no thing, ? operator.
  56. Inserts 0 as cardinality before thing.
  57. """
  58. return _card(0, thing)
  59. def _csl(separator, *thing):
  60. # reduce unnecessary recursions
  61. if len(thing) == 1:
  62. L = [thing[0]]
  63. L.extend(maybe_some(separator, blank, thing[0]))
  64. return tuple(L)
  65. else:
  66. L = list(thing)
  67. L.append(-1)
  68. L2 = [separator, blank]
  69. L2.extend(tuple(thing))
  70. L.append(tuple(L2))
  71. return tuple(L)
  72. try:
  73. # Python 3.x
  74. _exec = eval("exec")
  75. _exec('''
  76. def csl(*thing, separator=","):
  77. """Generate a grammar for a simple comma separated list."""
  78. return _csl(separator, *thing)
  79. ''')
  80. except SyntaxError:
  81. # Python 2.7
  82. def csl(*thing):
  83. """Generate a grammar for a simple comma separated list."""
  84. return _csl(",", *thing)
  85. def attr(name, thing=word, subtype=None):
  86. """Generate an Attribute with that name, referencing the thing.
  87. Instance variables:
  88. Class Attribute class generated by namedtuple()
  89. """
  90. # if __debug__:
  91. # if isinstance(thing, (tuple, list)):
  92. # warnings.warn(type(thing).__name__
  93. # + " not recommended as grammar of attribute "
  94. # + repr(name), SyntaxWarning)
  95. return attr.Class(name, thing, subtype)
  96. attr.Class = namedtuple("Attribute", ("name", "thing", "subtype"))
  97. def flag(name, thing=None):
  98. """Generate an Attribute with that name which is valued True or False."""
  99. if thing is None:
  100. thing = Keyword(name)
  101. return attr(name, thing, "Flag")
  102. def attributes(grammar, invisible=False):
  103. """Iterates all attributes of a grammar."""
  104. if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"):
  105. yield grammar
  106. elif type(grammar) == tuple:
  107. for e in grammar:
  108. for a in attributes(e, invisible):
  109. yield a
  110. class Whitespace(str):
  111. grammar = whitespace
  112. class RegEx(object):
  113. """Regular Expression.
  114. Instance Variables:
  115. regex pre-compiled object from re.compile()
  116. """
  117. def __init__(self, value, **kwargs):
  118. logger.debug(f"New Regex({value})")
  119. self.regex = re.compile(value, re.U)
  120. self.search = self.regex.search
  121. self.match = self.regex.match
  122. self.split = self.regex.split
  123. self.findall = self.regex.findall
  124. self.finditer = self.regex.finditer
  125. self.sub = self.regex.sub
  126. self.subn = self.regex.subn
  127. self.flags = self.regex.flags
  128. self.groups = self.regex.groups
  129. self.groupindex = self.regex.groupindex
  130. self.pattern = value
  131. for k, v in kwargs.items():
  132. setattr(self, k, v)
  133. def __str__(self):
  134. return self.pattern
  135. def __repr__(self):
  136. result = type(self).__name__ + "(" + repr(self.pattern)
  137. try:
  138. result += ", name=" + repr(self.name)
  139. except:
  140. pass
  141. return result + ")"
  142. class Literal(object):
  143. """Literal value."""
  144. _basic_types = (bool, int, float, complex, str, bytes, bytearray, list,
  145. tuple, slice, set, frozenset, dict)
  146. def __init__(self, value, **kwargs):
  147. logger.debug(f"New Literal({value})")
  148. if isinstance(self, Literal._basic_types):
  149. pass
  150. else:
  151. self.value = value
  152. for k, v in kwargs.items():
  153. setattr(self, k, v)
  154. def __str__(self):
  155. if isinstance(self, Literal._basic_types):
  156. return super(Literal, self).__str__()
  157. else:
  158. return str(self.value)
  159. def __repr__(self):
  160. if isinstance(self, Literal._basic_types):
  161. return type(self).__name__ + "(" + \
  162. super(Literal, self).__repr__() + ")"
  163. else:
  164. return type(self).__name__ + "(" + repr(self.value) + ")"
  165. def __eq__(self, other):
  166. if isinstance(self, Literal._basic_types):
  167. if type(self) == type(other) and super().__eq__(other):
  168. return True
  169. else:
  170. return False
  171. else:
  172. if type(self) == type(other) and str(self) == str(other):
  173. return True
  174. else:
  175. return False
  176. class Str(UserString):
  177. """A mutable string like object"""
  178. def __new__(cls, x):
  179. return super().__new__(cls, x)
  180. def __init__(self, value, name=None, **kwargs):
  181. super().__init__(self, )
  182. logger.debug(f"New Str({value})")
  183. if name is not None:
  184. self.name = Symbol(name)
  185. for k, v in kwargs:
  186. setattr(self, k, v)
  187. def __repr__(self):
  188. """x.__repr__() <==> repr(x)"""
  189. try:
  190. return self.__class__.__name__ + f"(name={self.name}, data={self.data})"
  191. except AttributeError:
  192. return self.__class__.__name__ + f"(data={self.data})"
  193. class Plain(object):
  194. """A plain object"""
  195. def __init__(self, name=None, **kwargs):
  196. """Construct a plain object with an optional name and optional other
  197. attributes
  198. """
  199. logger.debug(f"New Plain({repr(name)})")
  200. if name is not None:
  201. self.name = Symbol(name)
  202. for k, v in kwargs:
  203. setattr(self, k, v)
  204. def __repr__(self):
  205. """x.__repr__() <==> repr(x)"""
  206. try:
  207. return self.__class__.__name__ + "(name=" + repr(self.name) + ")"
  208. except AttributeError:
  209. return self.__class__.__name__ + "()"
  210. class List(list):
  211. """A List of things."""
  212. def __init__(self, *args, **kwargs):
  213. """Construct a List, and construct its attributes from keyword
  214. arguments.
  215. """
  216. logger.debug(f"New List({args}, {kwargs})")
  217. _args = []
  218. if len(args) == 1:
  219. if isinstance(args[0], str):
  220. self.append(args[0])
  221. elif isinstance(args[0], (tuple, list)):
  222. for e in args[0]:
  223. if isinstance(e, attr.Class):
  224. setattr(self, e.name, e.value)
  225. else:
  226. _args.append(e)
  227. super(List, self).__init__(_args)
  228. else:
  229. raise ValueError("initializer of List should be collection or string")
  230. else:
  231. for e in args:
  232. if isinstance(e, attr.Class):
  233. setattr(self, e.name, e.value)
  234. else:
  235. _args.append(e)
  236. super(List, self).__init__(_args)
  237. for k, v in kwargs.items():
  238. setattr(self, k, v)
  239. def __repr__(self):
  240. """x.__repr__() <==> repr(x)"""
  241. result = type(self).__name__ + "(" + super(List, self).__repr__()
  242. try:
  243. result += ", name=" + repr(self.name)
  244. except:
  245. pass
  246. return result + ")"
  247. def __eq__(self, other):
  248. return super(List, self).__eq__(list(other))
  249. class _UserDict(object):
  250. # UserDict cannot be used because of metaclass conflicts
  251. def __init__(self, *args, **kwargs):
  252. self.data = dict(*args, **kwargs)
  253. def __len__(self):
  254. return len(self.data)
  255. def __getitem__(self, key):
  256. return self.data[key]
  257. def __setitem__(self, key, value):
  258. self.data[key] = value
  259. def __delitem__(self, key):
  260. del self.data[key]
  261. def __iter__(self):
  262. return self.data.keys()
  263. def __contains__(self, item):
  264. return item in self.data
  265. def items(self):
  266. return self.data.items()
  267. def keys(self):
  268. return self.data.keys()
  269. def values(self):
  270. return self.data.values()
  271. def clear(self):
  272. self.data.clear()
  273. def copy(self):
  274. return self.data.copy()
  275. class Namespace(_UserDict):
  276. """A dictionary of things, indexed by their name."""
  277. @staticmethod
  278. def name_by(value):
  279. return "#" + str(id(value))
  280. def __init__(self, *args, **kwargs):
  281. """Initialize an OrderedDict containing the data of the Namespace.
  282. Arguments are being put into the Namespace, keyword arguments give the
  283. attributes of the Namespace.
  284. """
  285. super().__init__(*args, **kwargs)
  286. logger.debug(f"New Namespace({args}, {kwargs})")
  287. if args:
  288. self.data = OrderedDict(args)
  289. else:
  290. self.data = OrderedDict()
  291. for k, v in kwargs.items():
  292. setattr(self, k, v)
  293. def __setitem__(self, key, value):
  294. """x.__setitem__(i, y) <==> x[i]=y"""
  295. if key is None:
  296. name = Symbol(Namespace.name_by(value))
  297. else:
  298. name = Symbol(key)
  299. try:
  300. value.name = name
  301. except AttributeError:
  302. pass
  303. try:
  304. value.namespace
  305. except AttributeError:
  306. try:
  307. value.namespace = weakref.ref(self)
  308. except AttributeError:
  309. pass
  310. else:
  311. if not value.namespace:
  312. value.namespace = weakref.ref(self)
  313. super(Namespace, self).__setitem__(name, value)
  314. def __delitem__(self, key):
  315. """x.__delitem__(y) <==> del x[y]"""
  316. self[key].namespace = None
  317. super(Namespace, self).__delitem__(key)
  318. def __repr__(self):
  319. """x.__repr__() <==> repr(x)"""
  320. result = type(self).__name__ + "(["
  321. for key, value in self.data.items():
  322. result += "(" + repr(key) + ", " + repr(value) + ")"
  323. result += ", "
  324. result += "]"
  325. try:
  326. result += ", name=" + repr(self.name)
  327. # BUG(JM): self.name is not an attribute of Namespace
  328. except NameError:
  329. pass
  330. return result + ")"
  331. class Enum(Namespace):
  332. """A Namespace which is being treated as an Enum.
  333. Enums can only contain Keywords or Symbols."""
  334. def __init__(self, *things, **kwargs):
  335. """Construct an Enum using a tuple of things."""
  336. super().__init__(*things, **kwargs)
  337. logger.debug(f"New Enum({things})")
  338. self.data = OrderedDict()
  339. for thing in things:
  340. if type(thing) == str:
  341. thing = Symbol(thing)
  342. if not isinstance(thing, Symbol):
  343. raise TypeError(repr(thing) + " is not a Symbol")
  344. super(Enum, self).__setitem__(thing, thing)
  345. for k, v in kwargs.items():
  346. setattr(self, k, v)
  347. def __repr__(self):
  348. """x.__repr__() <==> repr(x)"""
  349. v = [e for e in self.values()]
  350. result = type(self).__name__ + "(" + repr(v)
  351. try:
  352. result += ", name=" + repr(self.name)
  353. # BUG(JM): self.name is not an attribute of Namespace
  354. except NameError:
  355. pass
  356. return result + ")"
  357. def __setitem__(self, key, value):
  358. """x.__setitem__(i, y) <==> x[i]=y"""
  359. if not isinstance(value, Keyword) and not isinstance(value, Symbol):
  360. raise TypeError("Enums can only contain Keywords or Symbols")
  361. raise ValueError("Enums cannot be modified after creation.")
  362. class Symbol(str):
  363. r"""Use to scan Symbols.
  364. Class variables:
  365. regex regular expression to scan, default r"\w+"
  366. check_keywords flag if a Symbol is checked for not being a Keyword
  367. default: False
  368. """
  369. regex = word
  370. check_keywords = False
  371. def __init__(self, name, namespace=None):
  372. """Construct a Symbol with that name in Namespace namespace.
  373. Raises:
  374. ValueError if check_keywords is True and value is identical to
  375. a Keyword
  376. TypeError if namespace is given and not a Namespace
  377. """
  378. logger.debug(f"New Symbol({repr(name)})")
  379. if Symbol.check_keywords and name in Keyword.table:
  380. raise ValueError(repr(name)
  381. + " is a Keyword, but is used as a Symbol")
  382. if namespace:
  383. if isinstance(namespace, Namespace):
  384. namespace[name] = self
  385. else:
  386. raise TypeError(repr(namespace) + " is not a Namespace")
  387. else:
  388. self.name = name
  389. self.namespace = None
  390. def __repr__(self):
  391. """x.__repr__() <==> repr(x)"""
  392. return type(self).__name__ + "(" + str(self).__repr__() + ")"
  393. class Keyword(Symbol):
  394. r"""Use to access the keyword table.
  395. Class variables:
  396. regex regular expression to scan, default r"\w+"
  397. table Namespace with keyword table
  398. """
  399. regex = word
  400. table = Namespace()
  401. def __init__(self, keyword):
  402. """Adds keyword to the keyword table."""
  403. super().__init__(keyword)
  404. if keyword not in Keyword.table:
  405. Keyword.table[keyword] = self
  406. self.name = keyword
  407. K = Keyword
  408. """Shortcut for Keyword."""
  409. class IKeyword(Keyword):
  410. """Use for case-insensitive keyword."""
  411. def parse(self, parser, text, pos):
  412. m = type(self).regex.match(text)
  413. if m:
  414. if m.group(0).upper() == str(self).upper():
  415. return text[len(str(self)):], None
  416. else:
  417. return text, SyntaxError("expecting " + repr(self))
  418. else:
  419. return text, SyntaxError("expecting " + repr(self))
  420. IK = IKeyword
  421. """Shortcut for case-insensitive Keyword."""
  422. class Concat(List):
  423. """Concatenation of things.
  424. This class exists as a mutable alternative to using a tuple.
  425. """
  426. pass
  427. def name():
  428. """Generate a grammar for a symbol with name."""
  429. return attr("name", Symbol)
  430. def ignore(grammar):
  431. """Ignore what matches to the grammar."""
  432. try:
  433. ignore.serial += 1
  434. except AttributeError:
  435. ignore.serial = 1
  436. return attr("_ignore" + str(ignore.serial), grammar)
  437. def indent(*thing):
  438. """Indent thing by one level.
  439. Inserts -3 as cardinality before thing.
  440. """
  441. return _card(-3, thing)
  442. def contiguous(*thing):
  443. """Disable automated whitespace matching.
  444. Inserts -4 as cardinality before thing.
  445. """
  446. return _card(-4, thing)
  447. def separated(*thing):
  448. """Enable automated whitespace matching.
  449. Inserts -5 as cardinality before thing.
  450. """
  451. return _card(-5, thing)
  452. def omit(*thing):
  453. """Omit what matches to the grammar."""
  454. return _card(-6, thing)
  455. def endl(thing, parser):
  456. """End of line marker for composing text."""
  457. return "\n"
  458. def blank(thing, parser):
  459. """Space marker for composing text."""
  460. return " "
  461. class GrammarError(Exception):
  462. """Base class for errors in grammars."""
  463. class GrammarTypeError(TypeError, GrammarError):
  464. """Raised if grammar contains an object of unkown type."""
  465. class GrammarValueError(ValueError, GrammarError):
  466. """Raised if grammar contains an illegal value."""
  467. def how_many(grammar):
  468. """Determines the possibly parsed objects of grammar.
  469. Returns:
  470. 0 if there will be no objects
  471. 1 if there will be a maximum of one object
  472. 2 if there can be more than one object
  473. Raises:
  474. GrammarTypeError
  475. if grammar contains an object of unkown type
  476. GrammarValueError
  477. if grammar contains an illegal cardinality value
  478. """
  479. if grammar is None:
  480. return 0
  481. elif type(grammar) == int:
  482. return grammar
  483. elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)):
  484. return 1
  485. elif isinstance(grammar, (str, Literal)):
  486. return 0
  487. elif isinstance(grammar, attr.Class):
  488. return 0
  489. elif type(grammar) == FunctionType:
  490. return 0
  491. elif isinstance(grammar, (tuple, Concat)):
  492. length, card = 0, 1
  493. for e in grammar:
  494. if type(e) == int:
  495. if e < -6:
  496. raise GrammarValueError("illegal cardinality value in grammar: " + str(e))
  497. if e in (-5, -4, -3):
  498. pass
  499. elif e in (-1, -2):
  500. card = 2
  501. elif e == 0:
  502. card = 1
  503. elif e == -6:
  504. return 0
  505. else:
  506. card = min(e, 2)
  507. else:
  508. length += card * how_many(e)
  509. if length >= 2:
  510. return 2
  511. return length
  512. elif isinstance(grammar, list):
  513. m = 0
  514. for e in grammar:
  515. m = max(m, how_many(e))
  516. if m == 2:
  517. return m
  518. return m
  519. elif _issubclass(grammar, object):
  520. return 1
  521. else:
  522. err = f"grammar contains an illegal type: {type(grammar).__name__}: {repr(grammar)}"
  523. raise GrammarTypeError(err)
  524. def parse(text, thing, filename=None, whitespace=whitespace, comment=None,
  525. keep_feeble_things=False, name=None):
  526. r"""Parse text following thing as grammar and return the resulting things or
  527. raise an error.
  528. Arguments:
  529. text text to parse
  530. thing grammar for things to parse
  531. filename filename where text is origin from
  532. whitespace regular expression to skip whitespace
  533. default: regex "(?m)\s+"
  534. comment grammar to parse comments
  535. default: None
  536. keep_feeble_things
  537. put whitespace and comments into the .feeble_things
  538. attribute instead of dumping them
  539. Returns generated things.
  540. Raises:
  541. SyntaxError if text does not match the grammar in thing
  542. ValueError if input does not match types
  543. TypeError if output classes have wrong syntax for __init__()
  544. GrammarTypeError
  545. if grammar contains an object of unkown type
  546. GrammarValueError
  547. if grammar contains an illegal cardinality value
  548. """
  549. logger.debug(f"parse({repr(text)}, {thing})")
  550. parser = Parser(name=name)
  551. parser.whitespace = whitespace
  552. parser.comment = comment
  553. parser.text = text
  554. parser.filename = filename
  555. parser.keep_feeble_things = keep_feeble_things
  556. t, r = parser.parse(text, thing)
  557. if t:
  558. raise parser.last_error
  559. return r
  560. def compose(thing, grammar=None, indent=" ", autoblank=True):
  561. """Compose text using thing with grammar.
  562. Arguments:
  563. thing thing containing other things with grammar
  564. grammar grammar to use to compose thing
  565. default: thing.grammar
  566. indent string to use to indent while composing
  567. default: four spaces
  568. autoblank add blanks if grammar would possibly be
  569. violated otherwise
  570. default: True
  571. Returns text
  572. Raises:
  573. ValueError if input does not match grammar
  574. GrammarTypeError
  575. if grammar contains an object of unkown type
  576. GrammarValueError
  577. if grammar contains an illegal cardinality value
  578. """
  579. logger.debug(f"compose({thing}, {grammar})")
  580. parser = Parser()
  581. parser.indent = indent
  582. parser.autoblank = autoblank
  583. return parser.compose(thing, grammar)
  584. def _issubclass(obj, cls):
  585. # If obj is not a class, just return False
  586. try:
  587. return issubclass(obj, cls)
  588. except TypeError:
  589. return False
  590. class Parser(object):
  591. r"""Offers parsing and composing capabilities. Implements a Packrat parser.
  592. Instance variables:
  593. whitespace regular expression to scan whitespace
  594. default: "(?m)\s+"
  595. comment grammar to parse comments
  596. last_error syntax error which ended parsing
  597. indent string to use to indent while composing
  598. default: four spaces
  599. indention_level level to indent to
  600. default: 0
  601. text original text to parse; set for decorated syntax
  602. errors
  603. filename filename where text is origin from
  604. autoblank add blanks while composing if grammar would possibly
  605. be violated otherwise
  606. default: True
  607. keep_feeble_things put whitespace and comments into the .feeble_things
  608. attribute instead of dumping them
  609. """
  610. def __init__(self, name=None):
  611. """Initialize instance variables to their defaults."""
  612. self.whitespace = whitespace
  613. self.comment = None
  614. self.last_error = None
  615. self.indent = " "
  616. self.indention_level = 0
  617. self.text = None
  618. self.filename = None
  619. self.autoblank = True
  620. self.keep_feeble_things = False
  621. self._memory = {}
  622. self._got_endl = True
  623. self._contiguous = False
  624. self._got_regex = False
  625. self._name = hex(id(self)) if name is None else name
  626. logger.debug(f"New Parser(name={name})")
  627. @property
  628. def name(self):
  629. return self._name
  630. def clear_memory(self, thing=None):
  631. """Clear cache memory for packrat parsing.
  632. Arguments:
  633. thing thing for which cache memory is cleared,
  634. None if cache memory should be cleared for all
  635. things
  636. """
  637. if thing is None:
  638. self._memory = {}
  639. else:
  640. try:
  641. del self._memory[id(thing)]
  642. except KeyError:
  643. pass
  644. def parse(self, text, thing, filename=None):
  645. """(Partially) parse text following thing as grammar and return the
  646. resulting things.
  647. Arguments:
  648. text text to parse
  649. thing grammar for things to parse
  650. filename filename where text is origin from
  651. Returns (text, result) with:
  652. text unparsed text
  653. result generated objects or SyntaxError object
  654. Raises:
  655. ValueError if input does not match types
  656. TypeError if output classes have wrong syntax for __init__()
  657. GrammarTypeError
  658. if grammar contains an object of unkown type
  659. GrammarValueError
  660. if grammar contains an illegal cardinality value
  661. """
  662. logger.debug(f"Parser({self.name}).parse({repr(text)}, {thing})")
  663. self.text = text
  664. if filename:
  665. self.filename = filename
  666. pos = [1, 0]
  667. t, skip_result = self._skip(text, pos)
  668. t, r = self._parse(t, thing, pos)
  669. if type(r) == SyntaxError:
  670. raise r
  671. else:
  672. if self.keep_feeble_things and skip_result:
  673. try:
  674. r.feeble_things
  675. except AttributeError:
  676. try:
  677. r.feeble_things = skip_result
  678. except AttributeError:
  679. pass
  680. else:
  681. r.feeble_things = skip_result + r.feeble_things
  682. return t, r
  683. def _skip(self, text, pos=None):
  684. # Skip whitespace and comments from input text
  685. logger.debug(f"Parser({self.name})._skip({repr(text)}, {pos})")
  686. t2 = None
  687. t = text
  688. result = []
  689. while t2 != t:
  690. if self.whitespace and not self._contiguous:
  691. t, r = self._parse(t, self.whitespace, pos)
  692. if self.keep_feeble_things and r and not isinstance(r,
  693. SyntaxError):
  694. result.append(r)
  695. t2 = t
  696. if self.comment:
  697. t, r = self._parse(t, self.comment, pos)
  698. if self.keep_feeble_things and r and not isinstance(r,
  699. SyntaxError):
  700. result.append(r)
  701. return t, result
  702. def generate_syntax_error(self, msg, pos):
  703. """Generate a syntax error construct with
  704. msg string with error message
  705. pos (lineNo, charInText) with positioning information
  706. """
  707. result = SyntaxError(msg)
  708. if pos:
  709. result.lineno = pos[0]
  710. start = max(pos[1] - 19, 0)
  711. end = min(pos[1] + 20, len(self.text))
  712. result.text = self.text[start:end]
  713. result.offset = pos[1] - start + 1
  714. while "\n" in result.text:
  715. lf = result.text.find("\n")
  716. if lf >= result.offset:
  717. result.text = result.text[:result.offset-1]
  718. break;
  719. else:
  720. L = len(result.text)
  721. result.text = result.text[lf+1:]
  722. result.offset -= L - len(result.text)
  723. if self.filename:
  724. result.filename = self.filename
  725. return result
  726. def _parse(self, text, thing, pos=[1, 0]):
  727. # Parser implementation
  728. logger.debug(f"Parser({self.name})._parse([{type(thing)}]: {repr(text)}, {thing}, {pos})")
  729. def update_pos(text, t, pos):
  730. # Calculate where we are in the text
  731. old_pos = pos
  732. if pos and text != t:
  733. d_text = text[:len(text) - len(t)]
  734. pos[0] += d_text.count("\n")
  735. pos[1] += len(d_text)
  736. logger.debug(f"Parser({self.name})._parse.update_pos(" + f"{pos})" if old_pos == pos else f"{old_pos}->{pos})")
  737. try:
  738. ret = self._memory[id(thing)][text]
  739. logger.debug(f"Parser({self.name})._parse() -> cached ret: {repr(ret)}")
  740. return ret
  741. except (IndexError, KeyError):
  742. pass
  743. if pos:
  744. current_pos = tuple(pos)
  745. else:
  746. current_pos = None
  747. def syntax_error(msg, thing=None):
  748. # Not all error propagate, we log all of them
  749. if thing is not None:
  750. type_of_thing = ""
  751. else:
  752. type_of_thing = type(thing)
  753. logger.error(f"Syntax Error: Parser({self.name})._parse({type_of_thing}): {repr(msg)})")
  754. return self.generate_syntax_error(msg, pos)
  755. try:
  756. thing.parse
  757. except AttributeError:
  758. pass
  759. else:
  760. t, r = thing.parse(self, text, pos)
  761. if not isinstance(r, SyntaxError):
  762. t, skip_result = self._skip(t)
  763. update_pos(text, t, pos)
  764. if self.keep_feeble_things:
  765. try:
  766. r.feeble_things
  767. except AttributeError:
  768. try:
  769. r.feeble_things = skip_result
  770. except AttributeError:
  771. pass
  772. else:
  773. r.feeble_things += skip_result
  774. return t, r
  775. skip_result = None
  776. # terminal symbols
  777. if thing is None or type(thing) == FunctionType:
  778. result = text, None
  779. #
  780. elif isinstance(thing, Symbol):
  781. m = type(thing).regex.match(text)
  782. if m and m.group(0) == str(thing):
  783. t, r = text[len(thing):], None
  784. t, skip_result = self._skip(t)
  785. result = t, r
  786. update_pos(text, t, pos)
  787. else:
  788. err = "expecting " + repr(thing) + f" in '{text}'"
  789. result = text, syntax_error(err, thing)
  790. #
  791. elif isinstance(thing, (RegEx, _RegEx)):
  792. m = thing.match(text)
  793. if m:
  794. t, r = text[len(m.group(0)):], m.group(0)
  795. t, skip_result = self._skip(t)
  796. result = t, r
  797. update_pos(text, t, pos)
  798. else:
  799. err = "expecting match on " + thing.pattern + f" in '{text}'"
  800. result = text, syntax_error(err, thing)
  801. #
  802. elif isinstance(thing, (str, Literal)):
  803. if text.startswith(str(thing)):
  804. t, r = text[len(str(thing)):], None
  805. t, skip_result = self._skip(t)
  806. result = t, r
  807. update_pos(text, t, pos)
  808. else:
  809. err = "expecting " + repr(thing) + f" in '{text}'"
  810. result = text, syntax_error(err, thing)
  811. #
  812. elif _issubclass(thing, (Symbol, Str)):
  813. m = thing.regex.match(text)
  814. if m:
  815. result = None
  816. try:
  817. thing.grammar
  818. except AttributeError:
  819. pass
  820. else:
  821. if thing.grammar is None:
  822. pass
  823. elif isinstance(thing.grammar, Enum):
  824. if not m.group(0) in thing.grammar:
  825. result = text, syntax_error(repr(m.group(0))
  826. + " is not a member of " + repr(thing.grammar))
  827. else:
  828. raise GrammarValueError(
  829. "Symbol " + type(thing).__name__
  830. + " has a grammar which is not an Enum: "
  831. + repr(thing.grammar))
  832. if not result:
  833. t, r = text[len(m.group(0)):], thing(m.group(0))
  834. t, skip_result = self._skip(t)
  835. result = t, r
  836. update_pos(text, t, pos)
  837. else:
  838. err = "expecting " + thing.__name__ + f" in '{text}'"
  839. result = text, syntax_error(err, thing)
  840. #
  841. # non-terminal constructs
  842. #
  843. elif isinstance(thing, attr.Class):
  844. t, r = self._parse(text, thing.thing, pos)
  845. if type(r) == SyntaxError:
  846. if thing.subtype == "Flag":
  847. result = t, attr(thing.name, False)
  848. else:
  849. result = text, r
  850. else:
  851. if thing.subtype == "Flag":
  852. result = t, attr(thing.name, True)
  853. else:
  854. result = t, attr(thing.name, r)
  855. #
  856. elif isinstance(thing, (tuple, Concat)):
  857. if self.keep_feeble_things:
  858. L = List()
  859. else:
  860. L = []
  861. t = text
  862. flag = True
  863. _min, _max = 1, 1
  864. contiguous = self._contiguous
  865. omit = False
  866. for e in thing:
  867. if type(e) == int:
  868. if e < -6:
  869. raise GrammarValueError(
  870. "illegal cardinality value in grammar: " + str(e))
  871. if e == -6:
  872. omit = True
  873. elif e == -5:
  874. self._contiguous = False
  875. t, skip_result = self._skip(t)
  876. if self.keep_feeble_things and skip_result:
  877. try:
  878. L.feeble_things
  879. except AttributeError:
  880. try:
  881. L.feeble_things = skip_result
  882. except AttributeError:
  883. pass
  884. else:
  885. L.feeble_things += skip_result
  886. elif e == -4:
  887. self._contiguous = True
  888. elif e == -3:
  889. pass
  890. elif e == -2:
  891. _min, _max = 1, sys.maxsize
  892. elif e == -1:
  893. _min, _max = 0, sys.maxsize
  894. elif e == 0:
  895. _min, _max = 0, 1
  896. else:
  897. _min, _max = e, e
  898. continue
  899. for i in range(_max):
  900. t2, r = self._parse(t, e, pos)
  901. if type(r) == SyntaxError:
  902. i -= 1
  903. break
  904. elif omit:
  905. t = t2
  906. r = None
  907. else:
  908. t = t2
  909. if r is not None:
  910. if type(r) is list:
  911. L.extend(r)
  912. else:
  913. L.append(r)
  914. if i+1 < _min:
  915. if type(r) != SyntaxError:
  916. r = syntax_error("expecting " + str(_min)
  917. + " occurrence(s) of " + repr(e)
  918. + " (" + str(i+1) + " found)")
  919. flag = False
  920. break
  921. _min, _max = 1, 1
  922. omit = False
  923. if flag:
  924. if self._contiguous and not contiguous:
  925. self._contiguous = False
  926. t, skip_result = self._skip(t)
  927. if self.keep_feeble_things and skip_result:
  928. try:
  929. L.feeble_things
  930. except AttributeError:
  931. try:
  932. L.feeble_things = skip_result
  933. except AttributeError:
  934. pass
  935. else:
  936. L.feeble_things += skip_result
  937. if len(L) > 1 or how_many(thing) > 1:
  938. result = t, L
  939. elif not L:
  940. if not self.keep_feeble_things:
  941. return t, None
  942. try:
  943. L.feeble_things
  944. except AttributeError:
  945. return t, None
  946. if len(L.feeble_things):
  947. return t, L
  948. else:
  949. return t, None
  950. else:
  951. if self.keep_feeble_things:
  952. try:
  953. L.feeble_things
  954. except AttributeError:
  955. pass
  956. else:
  957. if L.feeble_things:
  958. try:
  959. L[0].feeble_things
  960. except AttributeError:
  961. try:
  962. L[0].feeble_things = L.feeble_things
  963. except AttributeError:
  964. pass
  965. else:
  966. L[0].feeble_things = L.feeble_things + \
  967. L[0].feeble_things
  968. result = t, L[0]
  969. else:
  970. result = text, r
  971. self._contiguous = contiguous
  972. #
  973. elif isinstance(thing, list):
  974. found = False
  975. for e in thing:
  976. try:
  977. t, r = self._parse(text, e, pos)
  978. if type(r) != SyntaxError:
  979. found = True
  980. break
  981. except GrammarValueError:
  982. raise
  983. except ValueError:
  984. pass
  985. if found:
  986. result = t, r
  987. else:
  988. result = text, syntax_error("expecting one of " + repr(thing))
  989. #
  990. elif _issubclass(thing, Namespace):
  991. t, r = self._parse(text, thing.grammar, pos)
  992. if type(r) != SyntaxError:
  993. if isinstance(r, thing):
  994. result = t, r
  995. else:
  996. obj = thing()
  997. for e in r:
  998. if type(e) == attr.Class:
  999. setattr(obj, e.name, e.thing)
  1000. else:
  1001. try:
  1002. obj[e.name] = e
  1003. except AttributeError:
  1004. obj[None] = e
  1005. try:
  1006. obj.polish()
  1007. except AttributeError:
  1008. pass
  1009. result = t, obj
  1010. else:
  1011. result = text, r
  1012. #
  1013. elif _issubclass(thing, list):
  1014. try:
  1015. g = thing.grammar
  1016. except AttributeError:
  1017. g = csl(Symbol)
  1018. t, r = self._parse(text, g, pos)
  1019. if type(r) != SyntaxError:
  1020. if isinstance(r, thing):
  1021. result = t, r
  1022. else:
  1023. obj = thing()
  1024. if type(r) == list:
  1025. for e in r:
  1026. if type(e) == attr.Class:
  1027. setattr(obj, e.name, e.thing)
  1028. else:
  1029. obj.append(e)
  1030. else:
  1031. if type(r) == attr.Class:
  1032. setattr(obj, r.name, r.thing)
  1033. else:
  1034. obj.append(r)
  1035. try:
  1036. obj.polish()
  1037. except AttributeError:
  1038. pass
  1039. result = t, obj
  1040. else:
  1041. result = text, r
  1042. #
  1043. elif _issubclass(thing, object):
  1044. try:
  1045. g = thing.grammar
  1046. except AttributeError:
  1047. g = word
  1048. t, r = self._parse(text, g, pos)
  1049. if type(r) != SyntaxError:
  1050. if isinstance(r, thing):
  1051. result = t, r
  1052. else:
  1053. try:
  1054. if type(r) == list:
  1055. L, a = [], []
  1056. for e in r:
  1057. if type(e) == attr.Class:
  1058. a.append(e)
  1059. else:
  1060. L.append(e)
  1061. if L:
  1062. lg = how_many(thing.grammar)
  1063. if lg == 0:
  1064. obj = None
  1065. elif lg == 1:
  1066. obj = thing(L[0])
  1067. else:
  1068. obj = thing(L)
  1069. else:
  1070. obj = thing()
  1071. for e in a:
  1072. setattr(obj, e.name, e.thing)
  1073. else:
  1074. if type(r) == attr.Class:
  1075. obj = thing()
  1076. setattr(obj, r.name, r.thing)
  1077. else:
  1078. if r is None:
  1079. obj = thing()
  1080. else:
  1081. obj = thing(r)
  1082. except TypeError as t:
  1083. L = list(t.args)
  1084. L[0] = thing.__name__ + ": " + L[0]
  1085. t.args = tuple(L)
  1086. raise t
  1087. try:
  1088. obj.polish()
  1089. except AttributeError:
  1090. pass
  1091. result = t, obj
  1092. else:
  1093. result = text, r
  1094. #
  1095. else:
  1096. raise GrammarTypeError("in grammar: " + repr(thing))
  1097. logger.debug(f"Parser({self.name}).parse() result: {repr(result)}")
  1098. if pos:
  1099. if type(result[1]) == SyntaxError:
  1100. pos[0] = current_pos[0]
  1101. pos[1] = current_pos[1]
  1102. self.last_error = result[1]
  1103. else:
  1104. try:
  1105. result[1].position_in_text = current_pos
  1106. except AttributeError:
  1107. pass
  1108. if self.keep_feeble_things and skip_result:
  1109. try:
  1110. result[1].feeble_things
  1111. except AttributeError:
  1112. try:
  1113. result[1].feeble_things = skip_result
  1114. except AttributeError:
  1115. pass
  1116. else:
  1117. result[1].feeble_things += skip_result
  1118. try:
  1119. self._memory[id(thing)]
  1120. except KeyError:
  1121. self._memory[id(thing)] = { text: result }
  1122. else:
  1123. self._memory[id(thing)][text] = result
  1124. return result
  1125. def compose(self, thing, grammar=None, attr_of=None):
  1126. """Compose text using thing with grammar.
  1127. Arguments:
  1128. thing thing containing other things with grammar
  1129. grammar grammar to use for composing thing
  1130. default: type(thing).grammar
  1131. attr_of if composing the value of an attribute, this
  1132. is a reference to the thing where this value
  1133. is an attribute of; None if this is not an
  1134. attribute value
  1135. Returns text
  1136. Raises:
  1137. ValueError if thing does not match grammar
  1138. GrammarTypeError
  1139. if grammar contains an object of unkown type
  1140. GrammarValueError
  1141. if grammar contains an illegal cardinality value
  1142. """
  1143. logger.debug(f"Parser({self.name}).compose({thing}, {grammar})")
  1144. if __debug__:
  1145. # make sure that we're not having this typing error
  1146. compose = None
  1147. def terminal_indent(do_blank=False):
  1148. self._got_regex = False
  1149. if self._got_endl:
  1150. result = self.indent * self.indention_level
  1151. self._got_endl = False
  1152. return result
  1153. elif do_blank and self.whitespace:
  1154. if self._contiguous or not self.autoblank:
  1155. return ""
  1156. else:
  1157. return blank(thing, self)
  1158. else:
  1159. return ""
  1160. try:
  1161. thing.compose
  1162. except AttributeError:
  1163. pass
  1164. else:
  1165. return terminal_indent() + thing.compose(self, attr_of=attr_of)
  1166. if not grammar:
  1167. try:
  1168. grammar = type(thing).grammar
  1169. except AttributeError:
  1170. if isinstance(thing, Symbol):
  1171. grammar = type(thing).regex
  1172. elif isinstance(thing, list):
  1173. grammar = csl(Symbol)
  1174. else:
  1175. grammar = word
  1176. else:
  1177. if isinstance(thing, Symbol):
  1178. grammar = type(thing).regex
  1179. if grammar is None:
  1180. result = ""
  1181. #
  1182. elif type(grammar) == FunctionType:
  1183. if grammar == endl:
  1184. result = endl(thing, self)
  1185. self._got_endl = True
  1186. elif grammar == blank:
  1187. result = terminal_indent() + blank(thing, self)
  1188. else:
  1189. result = self.compose(thing, grammar(thing, self))
  1190. #
  1191. elif isinstance(grammar, (RegEx, _RegEx)):
  1192. m = grammar.match(str(thing))
  1193. if m:
  1194. result = terminal_indent(do_blank=self._got_regex) + str(thing)
  1195. else:
  1196. raise ValueError(repr(thing) + " does not match "
  1197. + grammar.pattern)
  1198. self._got_regex = True
  1199. #
  1200. elif isinstance(grammar, Keyword):
  1201. result = terminal_indent(do_blank=self._got_regex) + str(grammar)
  1202. self._got_regex = True
  1203. #
  1204. elif isinstance(grammar, (str, int, Literal)):
  1205. result = terminal_indent() + str(grammar)
  1206. #
  1207. elif isinstance(grammar, Enum):
  1208. if thing in grammar:
  1209. if isinstance(thing, Keyword):
  1210. result = terminal_indent(do_blank=self._got_regex) + str(thing)
  1211. self._got_regex = True
  1212. else:
  1213. result = terminal_indent() + str(thing)
  1214. else:
  1215. raise ValueError(repr(thing) + " is not in " + repr(grammar))
  1216. #
  1217. elif isinstance(grammar, attr.Class):
  1218. if grammar.subtype == "Flag":
  1219. if getattr(thing, grammar.name):
  1220. result = self.compose(thing, grammar.thing, attr_of=thing)
  1221. else:
  1222. result = terminal_indent()
  1223. else:
  1224. result = self.compose(getattr(thing, grammar.name),
  1225. grammar.thing, attr_of=thing)
  1226. #
  1227. elif isinstance(grammar, (tuple, list)):
  1228. def compose_tuple(thing, things, grammar):
  1229. text = []
  1230. multiple, card = 1, 1
  1231. indenting = 0
  1232. if isinstance(grammar, (tuple, Concat)):
  1233. # concatenation
  1234. for g in grammar:
  1235. if g is None:
  1236. multiple = 1
  1237. if self.indenting:
  1238. self.indention_level -= indenting
  1239. self.indenting = 0
  1240. elif type(g) == int:
  1241. if g < -6:
  1242. raise GrammarValueError(
  1243. "illegal cardinality value in grammar: "
  1244. + str(g))
  1245. card = g
  1246. if g in (-2, -1):
  1247. multiple = sys.maxsize
  1248. elif g in (-5, -4, -3, 0):
  1249. multiple = 1
  1250. if g == -3:
  1251. self.indention_level += 1
  1252. indenting += 1
  1253. elif g == -6:
  1254. multiple = 0
  1255. else:
  1256. multiple = g
  1257. else:
  1258. passes = 0
  1259. try:
  1260. for r in range(multiple):
  1261. if isinstance(g, (str, Symbol, Literal)):
  1262. text.append(self.compose(thing, g))
  1263. if card < 1:
  1264. break
  1265. elif isinstance(g, FunctionType):
  1266. text.append(self.compose(thing, g))
  1267. if card < 1:
  1268. break
  1269. elif isinstance(g, attr.Class):
  1270. text.append(self.compose(getattr(thing,
  1271. g.name), g.thing, attr_of=thing))
  1272. if card < 1:
  1273. break
  1274. elif isinstance(g, (tuple, list)):
  1275. text.append(compose_tuple(thing, things, g))
  1276. if not things:
  1277. break
  1278. else:
  1279. text.append(self.compose(things.pop(), g))
  1280. passes += 1
  1281. except (IndexError, ValueError):
  1282. if card == -2:
  1283. if passes < 1:
  1284. raise ValueError(repr(g)
  1285. + " has to be there at least once")
  1286. elif card > 0:
  1287. if passes < multiple:
  1288. raise ValueError(repr(g)
  1289. + " has to be there exactly "
  1290. + str(multiple) + " times")
  1291. multiple = 1
  1292. if indenting:
  1293. self.indention_level -= indenting
  1294. indenting = 0
  1295. return ''.join(text)
  1296. else:
  1297. # options
  1298. for g in grammar:
  1299. try:
  1300. if isinstance(g, (str, Symbol, Literal)):
  1301. return self.compose(thing, g)
  1302. elif isinstance(g, FunctionType):
  1303. return self.compose(thing, g)
  1304. elif isinstance(g, attr.Class):
  1305. return self.compose(getattr(thing, g.name), g.thing)
  1306. elif isinstance(g, (tuple, list)):
  1307. return compose_tuple(thing, things, g)
  1308. else:
  1309. try:
  1310. text = self.compose(things[-1], g)
  1311. except Exception as e:
  1312. raise e
  1313. things.pop()
  1314. return text
  1315. except GrammarTypeError:
  1316. raise
  1317. except AttributeError:
  1318. pass
  1319. except KeyError:
  1320. pass
  1321. except TypeError:
  1322. pass
  1323. except ValueError:
  1324. pass
  1325. raise ValueError("none of the options in " + repr(grammar)
  1326. + " found")
  1327. if isinstance(thing, Namespace):
  1328. L = [e for e in thing.values()]
  1329. L.reverse()
  1330. elif isinstance(thing, list):
  1331. L = thing[:]
  1332. L.reverse()
  1333. else:
  1334. L = [thing]
  1335. result = compose_tuple(thing, L, grammar)
  1336. #
  1337. elif _issubclass(grammar, object):
  1338. if isinstance(thing, grammar):
  1339. try:
  1340. grammar.grammar
  1341. except AttributeError:
  1342. if _issubclass(grammar, Symbol):
  1343. result = self.compose(thing, grammar.regex)
  1344. else:
  1345. result = self.compose(thing)
  1346. else:
  1347. result = self.compose(thing, grammar.grammar)
  1348. else:
  1349. if grammar == Symbol and isinstance(thing, str):
  1350. result = self.compose(str(thing), Symbol.regex)
  1351. else:
  1352. raise ValueError(repr(thing) + " is not a " + repr(grammar))
  1353. #
  1354. else:
  1355. raise GrammarTypeError("in grammar: " + repr(grammar))
  1356. return result