# YPL parser 1.5 # written by VB. import re import sys, codecs import exceptions class keyword(unicode): pass class code(unicode): pass class ignore(object): def __init__(self, regex_text, *args): self.regex = re.compile(regex_text, *args) class _and(object): def __init__(self, something): self.obj = something class _not(_and): pass class Name(unicode): def __init__(self, *args): self.line = 0 self.file = u"" class Symbol(list): def __init__(self, name, what): self.__name__ = name self.append(name) self.what = what self.append(what) def __call__(self): return self.what def __unicode__(self): return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')' def __repr__(self): return unicode(self) word_regex = re.compile(ur"\w+") rest_regex = re.compile(ur".*") print_trace = False def u(text): if isinstance(text, exceptions.BaseException): text = text.args[0] if type(text) is unicode: return text if isinstance(text, str): if sys.stdin.encoding: return codecs.decode(text, sys.stdin.encoding) else: return codecs.decode(text, "utf-8") return unicode(text) def skip(skipper, text, skipWS, skipComments): if skipWS: t = text.lstrip() else: t = text if skipComments: try: while True: skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) if skipWS: t = t.lstrip() except: pass return t class parser(object): def __init__(self, another = False, p = False): self.restlen = -1 if not(another): self.skipper = parser(True, p) self.skipper.packrat = p else: self.skipper = self self.lines = None self.textlen = 0 self.memory = {} self.packrat = p # parseLine(): # textline: text to parse # pattern: pyPEG language description # resultSoFar: parsing result so far (default: blank list []) # skipWS: Flag if whitespace should be skipped (default: True) # skipComments: Python functions returning pyPEG for matching comments # # returns: pyAST, textrest # # raises: SyntaxError(reason) if textline is detected not being in language # described by pattern # # SyntaxError(reason) if pattern is an illegal language description def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): name = None _textline = textline _pattern = pattern def R(result, text): if __debug__: if print_trace: try: if _pattern.__name__ != "comment": sys.stderr.write(u"match: " + _pattern.__name__ + u"\n") except: pass if self.restlen == -1: self.restlen = len(text) else: self.restlen = min(self.restlen, len(text)) res = resultSoFar if name and result: name.line = self.lineNo() res.append(Symbol(name, result)) elif name: name.line = self.lineNo() res.append(Symbol(name, [])) elif result: if type(result) is type([]): res.extend(result) else: res.extend([result]) if self.packrat: self.memory[(len(_textline), id(_pattern))] = (res, text) return res, text def syntaxError(): if self.packrat: self.memory[(len(_textline), id(_pattern))] = False raise SyntaxError() if self.packrat: try: result = self.memory[(len(textline), id(pattern))] if result: return result else: raise SyntaxError() except: pass if callable(pattern): if __debug__: if print_trace: try: if pattern.__name__ != "comment": sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n") except: pass if pattern.__name__[0] != "_": name = Name(pattern.__name__) pattern = pattern() if callable(pattern): pattern = (pattern,) text = skip(self.skipper, textline, skipWS, skipComments) pattern_type = type(pattern) if pattern_type is str or pattern_type is unicode: if text[:len(pattern)] == pattern: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) return R(None, text) else: syntaxError() elif pattern_type is keyword: m = word_regex.match(text) if m: if m.group(0) == pattern: text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) return R(None, text) else: syntaxError() else: syntaxError() elif pattern_type is _not: try: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) except: return resultSoFar, textline syntaxError() elif pattern_type is _and: r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) return resultSoFar, textline elif pattern_type is type(word_regex) or pattern_type is ignore: if pattern_type is ignore: pattern = pattern.regex m = pattern.match(text) if m: text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) if pattern_type is ignore: return R(None, text) else: return R(m.group(0), text) else: syntaxError() elif pattern_type is tuple: result = [] n = 1 for p in pattern: if type(p) is type(0): n = p else: if n>0: for i in range(n): result, text = self.parseLine(text, p, result, skipWS, skipComments) elif n==0: if text == "": pass else: try: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) result, text = newResult, newText except SyntaxError: pass elif n<0: found = False while True: try: newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) result, text, found = newResult, newText, True except SyntaxError: break if n == -2 and not(found): syntaxError() n = 1 return R(result, text) elif pattern_type is list: result = [] found = False for p in pattern: try: result, text = self.parseLine(text, p, result, skipWS, skipComments) found = True except SyntaxError: pass if found: break if found: return R(result, text) else: syntaxError() else: raise SyntaxError(u"illegal type in grammar: " + u(pattern_type)) def lineNo(self): if not(self.lines): return u"" if self.restlen == -1: return u"" parsed = self.textlen - self.restlen left, right = 0, len(self.lines) while True: mid = int((right + left) / 2) if self.lines[mid][0] <= parsed: try: if self.lines[mid + 1][0] >= parsed: try: return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) except: return u"" else: left = mid + 1 except: try: return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) except: return u"" else: right = mid - 1 if left > right: return u"" # plain module API def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): p = parser(p=packrat) text = skip(p.skipper, textline, skipWS, skipComments) ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) return ast, text # parse(): # language: pyPEG language description # lineSource: a fileinput.FileInput object # skipWS: Flag if whitespace should be skipped (default: True) # skipComments: Python function which returns pyPEG for matching comments # packrat: use memoization # lineCount: add line number information to AST # # returns: pyAST # # raises: SyntaxError(reason), if a parsed line is not in language # SyntaxError(reason), if the language description is illegal def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): lines, lineNo = [], 0 while callable(language): language = language() orig, ld = u"", 0 for line in lineSource: if lineSource.isfirstline(): ld = 1 else: ld += 1 lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) orig += u(line) textlen = len(orig) try: p = parser(p=packrat) p.textlen = len(orig) if lineCount: p.lines = lines else: p.line = None text = skip(p.skipper, orig, skipWS, skipComments) result, text = p.parseLine(text, language, [], skipWS, skipComments) if text: raise SyntaxError() except SyntaxError, msg: parsed = textlen - p.restlen textlen = 0 nn, lineNo, file = 0, 0, u"" for n, ld, l in lines: if n >= parsed: break else: lineNo = l nn += 1 file = ld lineNo += 1 nn -= 1 lineCont = orig.splitlines()[nn] raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont) return result