Fork of yml2 for pypi maintenance
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

351 lines
11 KiB

6 years ago
  1. # YPL parser 1.5
  2. # written by VB.
  3. import re
  4. import sys, codecs
  5. import exceptions
  6. class keyword(unicode): pass
  7. class code(unicode): pass
  8. class ignore(object):
  9. def __init__(self, regex_text, *args):
  10. self.regex = re.compile(regex_text, *args)
  11. class _and(object):
  12. def __init__(self, something):
  13. self.obj = something
  14. class _not(_and): pass
  15. class Name(unicode):
  16. def __init__(self, *args):
  17. self.line = 0
  18. self.file = u""
  19. class Symbol(list):
  20. def __init__(self, name, what):
  21. self.__name__ = name
  22. self.append(name)
  23. self.what = what
  24. self.append(what)
  25. def __call__(self):
  26. return self.what
  27. def __unicode__(self):
  28. return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')'
  29. def __repr__(self):
  30. return unicode(self)
  31. word_regex = re.compile(ur"\w+")
  32. rest_regex = re.compile(ur".*")
  33. print_trace = False
  34. def u(text):
  35. if isinstance(text, exceptions.BaseException):
  36. text = text.args[0]
  37. if type(text) is unicode:
  38. return text
  39. if isinstance(text, str):
  40. if sys.stdin.encoding:
  41. return codecs.decode(text, sys.stdin.encoding)
  42. else:
  43. return codecs.decode(text, "utf-8")
  44. return unicode(text)
  45. def skip(skipper, text, skipWS, skipComments):
  46. if skipWS:
  47. t = text.lstrip()
  48. else:
  49. t = text
  50. if skipComments:
  51. try:
  52. while True:
  53. skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
  54. if skipWS:
  55. t = t.lstrip()
  56. except: pass
  57. return t
  58. class parser(object):
  59. def __init__(self, another = False, p = False):
  60. self.restlen = -1
  61. if not(another):
  62. self.skipper = parser(True, p)
  63. self.skipper.packrat = p
  64. else:
  65. self.skipper = self
  66. self.lines = None
  67. self.textlen = 0
  68. self.memory = {}
  69. self.packrat = p
  70. # parseLine():
  71. # textline: text to parse
  72. # pattern: pyPEG language description
  73. # resultSoFar: parsing result so far (default: blank list [])
  74. # skipWS: Flag if whitespace should be skipped (default: True)
  75. # skipComments: Python functions returning pyPEG for matching comments
  76. #
  77. # returns: pyAST, textrest
  78. #
  79. # raises: SyntaxError(reason) if textline is detected not being in language
  80. # described by pattern
  81. #
  82. # SyntaxError(reason) if pattern is an illegal language description
  83. def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
  84. name = None
  85. _textline = textline
  86. _pattern = pattern
  87. def R(result, text):
  88. if __debug__:
  89. if print_trace:
  90. try:
  91. if _pattern.__name__ != "comment":
  92. sys.stderr.write(u"match: " + _pattern.__name__ + u"\n")
  93. except: pass
  94. if self.restlen == -1:
  95. self.restlen = len(text)
  96. else:
  97. self.restlen = min(self.restlen, len(text))
  98. res = resultSoFar
  99. if name and result:
  100. name.line = self.lineNo()
  101. res.append(Symbol(name, result))
  102. elif name:
  103. name.line = self.lineNo()
  104. res.append(Symbol(name, []))
  105. elif result:
  106. if type(result) is type([]):
  107. res.extend(result)
  108. else:
  109. res.extend([result])
  110. if self.packrat:
  111. self.memory[(len(_textline), id(_pattern))] = (res, text)
  112. return res, text
  113. def syntaxError():
  114. if self.packrat:
  115. self.memory[(len(_textline), id(_pattern))] = False
  116. raise SyntaxError()
  117. if self.packrat:
  118. try:
  119. result = self.memory[(len(textline), id(pattern))]
  120. if result:
  121. return result
  122. else:
  123. raise SyntaxError()
  124. except: pass
  125. if callable(pattern):
  126. if __debug__:
  127. if print_trace:
  128. try:
  129. if pattern.__name__ != "comment":
  130. sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n")
  131. except: pass
  132. if pattern.__name__[0] != "_":
  133. name = Name(pattern.__name__)
  134. pattern = pattern()
  135. if callable(pattern):
  136. pattern = (pattern,)
  137. text = skip(self.skipper, textline, skipWS, skipComments)
  138. pattern_type = type(pattern)
  139. if pattern_type is str or pattern_type is unicode:
  140. if text[:len(pattern)] == pattern:
  141. text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
  142. return R(None, text)
  143. else:
  144. syntaxError()
  145. elif pattern_type is keyword:
  146. m = word_regex.match(text)
  147. if m:
  148. if m.group(0) == pattern:
  149. text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
  150. return R(None, text)
  151. else:
  152. syntaxError()
  153. else:
  154. syntaxError()
  155. elif pattern_type is _not:
  156. try:
  157. r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
  158. except:
  159. return resultSoFar, textline
  160. syntaxError()
  161. elif pattern_type is _and:
  162. r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
  163. return resultSoFar, textline
  164. elif pattern_type is type(word_regex) or pattern_type is ignore:
  165. if pattern_type is ignore:
  166. pattern = pattern.regex
  167. m = pattern.match(text)
  168. if m:
  169. text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
  170. if pattern_type is ignore:
  171. return R(None, text)
  172. else:
  173. return R(m.group(0), text)
  174. else:
  175. syntaxError()
  176. elif pattern_type is tuple:
  177. result = []
  178. n = 1
  179. for p in pattern:
  180. if type(p) is type(0):
  181. n = p
  182. else:
  183. if n>0:
  184. for i in range(n):
  185. result, text = self.parseLine(text, p, result, skipWS, skipComments)
  186. elif n==0:
  187. if text == "":
  188. pass
  189. else:
  190. try:
  191. newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
  192. result, text = newResult, newText
  193. except SyntaxError:
  194. pass
  195. elif n<0:
  196. found = False
  197. while True:
  198. try:
  199. newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
  200. result, text, found = newResult, newText, True
  201. except SyntaxError:
  202. break
  203. if n == -2 and not(found):
  204. syntaxError()
  205. n = 1
  206. return R(result, text)
  207. elif pattern_type is list:
  208. result = []
  209. found = False
  210. for p in pattern:
  211. try:
  212. result, text = self.parseLine(text, p, result, skipWS, skipComments)
  213. found = True
  214. except SyntaxError:
  215. pass
  216. if found:
  217. break
  218. if found:
  219. return R(result, text)
  220. else:
  221. syntaxError()
  222. else:
  223. raise SyntaxError(u"illegal type in grammar: " + u(pattern_type))
  224. def lineNo(self):
  225. if not(self.lines): return u""
  226. if self.restlen == -1: return u""
  227. parsed = self.textlen - self.restlen
  228. left, right = 0, len(self.lines)
  229. while True:
  230. mid = int((right + left) / 2)
  231. if self.lines[mid][0] <= parsed:
  232. try:
  233. if self.lines[mid + 1][0] >= parsed:
  234. try:
  235. return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
  236. except:
  237. return u""
  238. else:
  239. left = mid + 1
  240. except:
  241. try:
  242. return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2])
  243. except:
  244. return u""
  245. else:
  246. right = mid - 1
  247. if left > right:
  248. return u""
  249. # plain module API
  250. def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
  251. p = parser(p=packrat)
  252. text = skip(p.skipper, textline, skipWS, skipComments)
  253. ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
  254. return ast, text
  255. # parse():
  256. # language: pyPEG language description
  257. # lineSource: a fileinput.FileInput object
  258. # skipWS: Flag if whitespace should be skipped (default: True)
  259. # skipComments: Python function which returns pyPEG for matching comments
  260. # packrat: use memoization
  261. # lineCount: add line number information to AST
  262. #
  263. # returns: pyAST
  264. #
  265. # raises: SyntaxError(reason), if a parsed line is not in language
  266. # SyntaxError(reason), if the language description is illegal
  267. def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
  268. lines, lineNo = [], 0
  269. while callable(language):
  270. language = language()
  271. orig, ld = u"", 0
  272. for line in lineSource:
  273. if lineSource.isfirstline():
  274. ld = 1
  275. else:
  276. ld += 1
  277. lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
  278. orig += u(line)
  279. textlen = len(orig)
  280. try:
  281. p = parser(p=packrat)
  282. p.textlen = len(orig)
  283. if lineCount:
  284. p.lines = lines
  285. else:
  286. p.line = None
  287. text = skip(p.skipper, orig, skipWS, skipComments)
  288. result, text = p.parseLine(text, language, [], skipWS, skipComments)
  289. if text:
  290. raise SyntaxError()
  291. except SyntaxError, msg:
  292. parsed = textlen - p.restlen
  293. textlen = 0
  294. nn, lineNo, file = 0, 0, u""
  295. for n, ld, l in lines:
  296. if n >= parsed:
  297. break
  298. else:
  299. lineNo = l
  300. nn += 1
  301. file = ld
  302. lineNo += 1
  303. nn -= 1
  304. lineCont = orig.splitlines()[nn]
  305. raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont)
  306. return result