Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.3" 
  61  __versionTime__ = "11 May 2016 15:17 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75  import traceback 
  76   
  77  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  78   
  79  __all__ = [ 
  80  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  81  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  82  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  83  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  84  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  85  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  86  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  87  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  88  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  89  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  90  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  91  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  92  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  93  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  94  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  95  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  96  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  97  ] 
  98   
  99  PY_3 = sys.version.startswith('3') 
 100  if PY_3: 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103      unichr = chr 
 104      _ustr = str 
 105   
 106      # build list of single arg builtins, that can be used as parse actions 
 107      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 108   
 109  else: 
 110      _MAX_INT = sys.maxint 
 111      range = xrange 
112 113 - def _ustr(obj):
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 116 then < returns the unicode object | encodes it with the default encoding | ... >. 117 """ 118 if isinstance(obj,unicode): 119 return obj 120 121 try: 122 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 123 # it won't break any existing code. 124 return str(obj) 125 126 except UnicodeEncodeError: 127 # Else encode it 128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 129 xmlcharref = Regex('&#\d+;') 130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 131 return xmlcharref.transformString(ret)
132 133 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 134 singleArgBuiltins = [] 135 import __builtin__ 136 for fname in "sum len sorted reversed list tuple set any all min max".split(): 137 try: 138 singleArgBuiltins.append(getattr(__builtin__,fname)) 139 except AttributeError: 140 continue 141 142 _generatorType = type((y for y in range(1)))
143 144 -def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data.""" 146 147 # ampersand must be replaced first 148 from_symbols = '&><"\'' 149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 150 for from_,to_ in zip(from_symbols, to_symbols): 151 data = data.replace(from_, to_) 152 return data
153
154 -class _Constants(object):
155 pass
156 157 alphas = string.ascii_uppercase + string.ascii_lowercase 158 nums = "0123456789" 159 hexnums = nums + "ABCDEFabcdef" 160 alphanums = alphas + nums 161 _bslash = chr(92) 162 printables = "".join(c for c in string.printable if c not in string.whitespace)
163 164 -class ParseBaseException(Exception):
165 """base exception class for all parsing runtime exceptions""" 166 # Performance tuning: we construct a *lot* of these, so keep this 167 # constructor as small and fast as possible
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc 170 if msg is None: 171 self.msg = pstr 172 self.pstr = "" 173 else: 174 self.msg = msg 175 self.pstr = pstr 176 self.parserElement = elem
177
178 - def __getattr__( self, aname ):
179 """supported attributes by name are: 180 - lineno - returns the line number of the exception text 181 - col - returns the column number of the exception text 182 - line - returns the line containing the exception text 183 """ 184 if( aname == "lineno" ): 185 return lineno( self.loc, self.pstr ) 186 elif( aname in ("col", "column") ): 187 return col( self.loc, self.pstr ) 188 elif( aname == "line" ): 189 return line( self.loc, self.pstr ) 190 else: 191 raise AttributeError(aname)
192
193 - def __str__( self ):
194 return "%s (at char %d), (line:%d, col:%d)" % \ 195 ( self.msg, self.loc, self.lineno, self.column )
196 - def __repr__( self ):
197 return _ustr(self)
198 - def markInputline( self, markerString = ">!<" ):
199 """Extracts the exception line from the input string, and marks 200 the location of the exception with a special symbol. 201 """ 202 line_str = self.line 203 line_column = self.column - 1 204 if markerString: 205 line_str = "".join((line_str[:line_column], 206 markerString, line_str[line_column:])) 207 return line_str.strip()
208 - def __dir__(self):
209 return "lineno col line".split() + dir(type(self))
210
211 -class ParseException(ParseBaseException):
212 """exception thrown when parse expressions don't match class; 213 supported attributes by name are: 214 - lineno - returns the line number of the exception text 215 - col - returns the column number of the exception text 216 - line - returns the line containing the exception text 217 """ 218 pass
219
220 -class ParseFatalException(ParseBaseException):
221 """user-throwable exception thrown when inconsistent parse content 222 is found; stops all parsing immediately""" 223 pass
224
225 -class ParseSyntaxException(ParseFatalException):
226 """just like C{L{ParseFatalException}}, but thrown internally when an 227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 228 an unbacktrackable syntax error has been found"""
229 - def __init__(self, pe):
230 super(ParseSyntaxException, self).__init__( 231 pe.pstr, pe.loc, pe.msg, pe.parserElement)
232
233 #~ class ReparseException(ParseBaseException): 234 #~ """Experimental class - parse actions can raise this exception to cause 235 #~ pyparsing to reparse the input string: 236 #~ - with a modified input string, and/or 237 #~ - with a modified start location 238 #~ Set the values of the ReparseException in the constructor, and raise the 239 #~ exception in a parse action to cause pyparsing to use the new string/location. 240 #~ Setting the values as None causes no change to be made. 241 #~ """ 242 #~ def __init_( self, newstring, restartLoc ): 243 #~ self.newParseText = newstring 244 #~ self.reparseLoc = restartLoc 245 246 -class RecursiveGrammarException(Exception):
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
251 - def __str__( self ):
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
254 -class _ParseResultsWithOffset(object):
255 - def __init__(self,p1,p2):
256 self.tup = (p1,p2)
257 - def __getitem__(self,i):
258 return self.tup[i]
259 - def __repr__(self):
260 return repr(self.tup)
261 - def setOffset(self,i):
262 self.tup = (self.tup[0],i)
263
264 -class ParseResults(object):
265 """Structured parse results, to provide multiple means of access to the parsed data: 266 - as a list (C{len(results)}) 267 - by list index (C{results[0], results[1]}, etc.) 268 - by attribute (C{results.<resultsName>}) 269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls): 272 return toklist 273 retobj = object.__new__(cls) 274 retobj.__doinit = True 275 return retobj
276 277 # Performance tuning: we construct a *lot* of these, so keep this 278 # constructor as small and fast as possible
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit: 281 self.__doinit = False 282 self.__name = None 283 self.__parent = None 284 self.__accumNames = {} 285 self.__asList = asList 286 self.__modal = modal 287 if toklist is None: 288 toklist = [] 289 if isinstance(toklist, list): 290 self.__toklist = toklist[:] 291 elif isinstance(toklist, _generatorType): 292 self.__toklist = list(toklist) 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,(int,slice)): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 #~ for name in self.__tokdict: 355 #~ occurrences = self.__tokdict[name] 356 #~ for j in removed: 357 #~ for k, (value, position) in enumerate(occurrences): 358 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 for name,occurrences in self.__tokdict.items(): 360 for j in removed: 361 for k, (value, position) in enumerate(occurrences): 362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 363 else: 364 del self.__tokdict[i]
365
366 - def __contains__( self, k ):
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
374 - def iterkeys( self ):
375 """Returns all named result keys.""" 376 if hasattr(self.__tokdict, "iterkeys"): 377 return self.__tokdict.iterkeys() 378 else: 379 return iter(self.__tokdict)
380
381 - def itervalues( self ):
382 """Returns all named result values.""" 383 return (self[k] for k in self.iterkeys())
384
385 - def iteritems( self ):
386 return ((k, self[k]) for k in self.iterkeys())
387 388 if PY_3: 389 keys = iterkeys 390 values = itervalues 391 items = iteritems 392 else:
393 - def keys( self ):
394 """Returns all named result keys.""" 395 return list(self.iterkeys())
396
397 - def values( self ):
398 """Returns all named result values.""" 399 return list(self.itervalues())
400
401 - def items( self ):
402 """Returns all named result keys and values as a list of tuples.""" 403 return list(self.iteritems())
404
405 - def haskeys( self ):
406 """Since keys() returns an iterator, this method is helpful in bypassing 407 code that looks for the existence of any defined results names.""" 408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last). 412 Supports both list and dict semantics for pop(). If passed no 413 argument or an integer argument, it will use list semantics 414 and pop tokens from the list of parsed tokens. If passed a 415 non-integer argument (most likely a string), it will use dict 416 semantics and pop the corresponding value from any defined 417 results names. A second default return value argument is 418 supported, just as in dict.pop().""" 419 if not args: 420 args = [-1] 421 for k,v in kwargs.items(): 422 if k == 'default': 423 args = (args[0], v) 424 else: 425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 426 if (isinstance(args[0], int) or 427 len(args) == 1 or 428 args[0] in self): 429 index = args[0] 430 ret = self[index] 431 del self[index] 432 return ret 433 else: 434 defaultvalue = args[1] 435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no 439 such name, then returns the given C{defaultValue} or C{None} if no 440 C{defaultValue} is specified.""" 441 if key in self: 442 return self[key] 443 else: 444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens.""" 448 self.__toklist.insert(index, insStr) 449 # fixup indices in token dictionary 450 #~ for name in self.__tokdict: 451 #~ occurrences = self.__tokdict[name] 452 #~ for k, (value, position) in enumerate(occurrences): 453 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 454 for name,occurrences in self.__tokdict.items(): 455 for k, (value, position) in enumerate(occurrences): 456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
458 - def append( self, item ):
459 """Add single element to end of ParseResults list of elements.""" 460 self.__toklist.append(item)
461
462 - def extend( self, itemseq ):
463 """Add sequence of elements to end of ParseResults list of elements.""" 464 if isinstance(itemseq, ParseResults): 465 self += itemseq 466 else: 467 self.__toklist.extend(itemseq)
468
469 - def clear( self ):
470 """Clear all elements and results names.""" 471 del self.__toklist[:] 472 self.__tokdict.clear()
473
474 - def __getattr__( self, name ):
475 try: 476 return self[name] 477 except KeyError: 478 return "" 479 480 if name in self.__tokdict: 481 if name not in self.__accumNames: 482 return self.__tokdict[name][-1][0] 483 else: 484 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 485 else: 486 return ""
487
488 - def __add__( self, other ):
489 ret = self.copy() 490 ret += other 491 return ret
492
493 - def __iadd__( self, other ):
494 if other.__tokdict: 495 offset = len(self.__toklist) 496 addoffset = lambda a: offset if a<0 else a+offset 497 otheritems = other.__tokdict.items() 498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 499 for (k,vlist) in otheritems for v in vlist] 500 for k,v in otherdictitems: 501 self[k] = v 502 if isinstance(v[0],ParseResults): 503 v[0].__parent = wkref(self) 504 505 self.__toklist += other.__toklist 506 self.__accumNames.update( other.__accumNames ) 507 return self
508
509 - def __radd__(self, other):
510 if isinstance(other,int) and other == 0: 511 # useful for merging many ParseResults using sum() builtin 512 return self.copy() 513 else: 514 # this may raise a TypeError - so be it 515 return other + self
516
517 - def __repr__( self ):
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
520 - def __str__( self ):
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
523 - def _asStringList( self, sep='' ):
524 out = [] 525 for item in self.__toklist: 526 if out and sep: 527 out.append(sep) 528 if isinstance( item, ParseResults ): 529 out += item._asStringList() 530 else: 531 out.append( _ustr(item) ) 532 return out
533
534 - def asList( self ):
535 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
538 - def asDict( self ):
539 """Returns the named parse results as a nested dictionary.""" 540 if PY_3: 541 item_fn = self.items 542 else: 543 item_fn = self.iteritems 544 545 def toItem(obj): 546 if isinstance(obj, ParseResults): 547 if obj.haskeys(): 548 return obj.asDict() 549 else: 550 return [toItem(v) for v in obj] 551 else: 552 return obj
553 554 return dict((k,toItem(v)) for k,v in item_fn())
555
556 - def copy( self ):
557 """Returns a new copy of a C{ParseResults} object.""" 558 ret = ParseResults( self.__toklist ) 559 ret.__tokdict = self.__tokdict.copy() 560 ret.__parent = self.__parent 561 ret.__accumNames.update( self.__accumNames ) 562 ret.__name = self.__name 563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 567 nl = "\n" 568 out = [] 569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 570 for v in vlist) 571 nextLevelIndent = indent + " " 572 573 # collapse out indents if formatting is not desired 574 if not formatted: 575 indent = "" 576 nextLevelIndent = "" 577 nl = "" 578 579 selfTag = None 580 if doctag is not None: 581 selfTag = doctag 582 else: 583 if self.__name: 584 selfTag = self.__name 585 586 if not selfTag: 587 if namedItemsOnly: 588 return "" 589 else: 590 selfTag = "ITEM" 591 592 out += [ nl, indent, "<", selfTag, ">" ] 593 594 for i,res in enumerate(self.__toklist): 595 if isinstance(res,ParseResults): 596 if i in namedItems: 597 out += [ res.asXML(namedItems[i], 598 namedItemsOnly and doctag is None, 599 nextLevelIndent, 600 formatted)] 601 else: 602 out += [ res.asXML(None, 603 namedItemsOnly and doctag is None, 604 nextLevelIndent, 605 formatted)] 606 else: 607 # individual token, see if there is a name for it 608 resTag = None 609 if i in namedItems: 610 resTag = namedItems[i] 611 if not resTag: 612 if namedItemsOnly: 613 continue 614 else: 615 resTag = "ITEM" 616 xmlBodyText = _xml_escape(_ustr(res)) 617 out += [ nl, nextLevelIndent, "<", resTag, ">", 618 xmlBodyText, 619 "</", resTag, ">" ] 620 621 out += [ nl, indent, "</", selfTag, ">" ] 622 return "".join(out)
623
624 - def __lookup(self,sub):
625 for k,vlist in self.__tokdict.items(): 626 for v,loc in vlist: 627 if sub is v: 628 return k 629 return None
630
631 - def getName(self):
632 """Returns the results name for this token expression.""" 633 if self.__name: 634 return self.__name 635 elif self.__parent: 636 par = self.__parent() 637 if par: 638 return par.__lookup(self) 639 else: 640 return None 641 elif (len(self) == 1 and 642 len(self.__tokdict) == 1 and 643 self.__tokdict.values()[0][0][1] in (0,-1)): 644 return self.__tokdict.keys()[0] 645 else: 646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}. 650 Accepts an optional C{indent} argument so that this string can be embedded 651 in a nested display of other data.""" 652 out = [] 653 NL = '\n' 654 out.append( indent+_ustr(self.asList()) ) 655 if self.haskeys(): 656 items = sorted(self.items()) 657 for k,v in items: 658 if out: 659 out.append(NL) 660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 661 if isinstance(v,ParseResults): 662 if v: 663 out.append( v.dump(indent,depth+1) ) 664 else: 665 out.append(_ustr(v)) 666 else: 667 out.append(_ustr(v)) 668 elif any(isinstance(vv,ParseResults) for vv in self): 669 v = self 670 for i,vv in enumerate(v): 671 if isinstance(vv,ParseResults): 672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 673 else: 674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 675 676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module. 680 Accepts additional positional or keyword args as defined for the 681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 682 pprint.pprint(self.asList(), *args, **kwargs)
683 684 # add support for pickle protocol
685 - def __getstate__(self):
686 return ( self.__toklist, 687 ( self.__tokdict.copy(), 688 self.__parent is not None and self.__parent() or None, 689 self.__accumNames, 690 self.__name ) )
691
692 - def __setstate__(self,state):
693 self.__toklist = state[0] 694 (self.__tokdict, 695 par, 696 inAccumNames, 697 self.__name) = state[1] 698 self.__accumNames = {} 699 self.__accumNames.update(inAccumNames) 700 if par is not None: 701 self.__parent = wkref(par) 702 else: 703 self.__parent = None
704
705 - def __getnewargs__(self):
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
708 - def __dir__(self):
709 return (dir(type(self)) + list(self.keys()))
710 711 collections.MutableMapping.register(ParseResults)
712 713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators. 715 The first column is number 1. 716 717 Note: the default parsing behavior is to expand tabs in the input string 718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 720 consistent view of the parsed string, the parse location, and line and column 721 positions within the parsed string. 722 """ 723 s = strg 724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
726 -def lineno(loc,strg):
727 """Returns current line number within a string, counting newlines as line separators. 728 The first line is number 1. 729 730 Note: the default parsing behavior is to expand tabs in the input string 731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 733 consistent view of the parsed string, the parse location, and line and column 734 positions within the parsed string. 735 """ 736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators. 740 """ 741 lastCR = strg.rfind("\n", 0, loc) 742 nextCR = strg.find("\n", loc) 743 if nextCR >= 0: 744 return strg[lastCR+1:nextCR] 745 else: 746 return strg[lastCR+1:]
747
748 -def _defaultStartDebugAction( instring, loc, expr ):
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
751 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
754 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
755 print ("Exception raised:" + _ustr(exc))
756
757 -def nullDebugAction(*args):
758 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 759 pass
760 761 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 762 #~ 'decorator to trim function calls to match the arity of the target' 763 #~ def _trim_arity(func, maxargs=3): 764 #~ if func in singleArgBuiltins: 765 #~ return lambda s,l,t: func(t) 766 #~ limit = 0 767 #~ foundArity = False 768 #~ def wrapper(*args): 769 #~ nonlocal limit,foundArity 770 #~ while 1: 771 #~ try: 772 #~ ret = func(*args[limit:]) 773 #~ foundArity = True 774 #~ return ret 775 #~ except TypeError: 776 #~ if limit == maxargs or foundArity: 777 #~ raise 778 #~ limit += 1 779 #~ continue 780 #~ return wrapper 781 782 # this version is Python 2.x-3.x cross-compatible 783 'decorator to trim function calls to match the arity of the target'
784 -def _trim_arity(func, maxargs=2):
785 if func in singleArgBuiltins: 786 return lambda s,l,t: func(t) 787 limit = [0] 788 foundArity = [False] 789 790 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 791 ver = tuple(sys.version_info)[:3] 792 if ver[:2] >= (3,5): 793 def extract_stack(): 794 # special handling for Python 3.5.0 - extra deep call stack by 1 795 offset = -3 if ver == (3,5,0) else -2 796 frame_summary = traceback.extract_stack()[offset] 797 return [(frame_summary.filename, frame_summary.lineno)]
798 def extract_tb(tb): 799 frames = traceback.extract_tb(tb) 800 frame_summary = frames[-1] 801 return [(frame_summary.filename, frame_summary.lineno)] 802 else: 803 extract_stack = traceback.extract_stack 804 extract_tb = traceback.extract_tb 805 806 # synthesize what would be returned by traceback.extract_stack at the call to 807 # user's parse action 'func', so that we don't incur call penalty at parse time 808 809 LINE_DIFF = 6 810 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 811 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 812 this_line = extract_stack()[-1] 813 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 814 815 def wrapper(*args): 816 while 1: 817 try: 818 ret = func(*args[limit[0]:]) 819 foundArity[0] = True 820 return ret 821 except TypeError: 822 # re-raise TypeErrors if they did not come from our arity testing 823 if foundArity[0]: 824 raise 825 else: 826 try: 827 tb = sys.exc_info()[-1] 828 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 829 raise 830 finally: 831 del tb 832 833 if limit[0] <= maxargs: 834 limit[0] += 1 835 continue 836 raise 837 return wrapper 838
839 -class ParserElement(object):
840 """Abstract base level parser element class.""" 841 DEFAULT_WHITE_CHARS = " \n\t\r" 842 verbose_stacktrace = False 843 844 @staticmethod
845 - def setDefaultWhitespaceChars( chars ):
846 """Overrides the default whitespace chars 847 """ 848 ParserElement.DEFAULT_WHITE_CHARS = chars
849 850 @staticmethod
851 - def inlineLiteralsUsing(cls):
852 """ 853 Set class to be used for inclusion of string literals into a parser. 854 """ 855 ParserElement.literalStringClass = cls
856
857 - def __init__( self, savelist=False ):
858 self.parseAction = list() 859 self.failAction = None 860 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 861 self.strRepr = None 862 self.resultsName = None 863 self.saveAsList = savelist 864 self.skipWhitespace = True 865 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 866 self.copyDefaultWhiteChars = True 867 self.mayReturnEmpty = False # used when checking for left-recursion 868 self.keepTabs = False 869 self.ignoreExprs = list() 870 self.debug = False 871 self.streamlined = False 872 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 873 self.errmsg = "" 874 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 875 self.debugActions = ( None, None, None ) #custom debug actions 876 self.re = None 877 self.callPreparse = True # used to avoid redundant calls to preParse 878 self.callDuringTry = False
879
880 - def copy( self ):
881 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 882 for the same parsing pattern, using copies of the original parse element.""" 883 cpy = copy.copy( self ) 884 cpy.parseAction = self.parseAction[:] 885 cpy.ignoreExprs = self.ignoreExprs[:] 886 if self.copyDefaultWhiteChars: 887 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 888 return cpy
889
890 - def setName( self, name ):
891 """Define name for this expression, for use in debugging.""" 892 self.name = name 893 self.errmsg = "Expected " + self.name 894 if hasattr(self,"exception"): 895 self.exception.msg = self.errmsg 896 return self
897
898 - def setResultsName( self, name, listAllMatches=False ):
899 """Define name for referencing matching tokens as a nested attribute 900 of the returned parse results. 901 NOTE: this returns a *copy* of the original C{ParserElement} object; 902 this is so that the client can define a basic element, such as an 903 integer, and reference it in multiple places with different names. 904 905 You can also set results names using the abbreviated syntax, 906 C{expr("name")} in place of C{expr.setResultsName("name")} - 907 see L{I{__call__}<__call__>}. 908 """ 909 newself = self.copy() 910 if name.endswith("*"): 911 name = name[:-1] 912 listAllMatches=True 913 newself.resultsName = name 914 newself.modalResults = not listAllMatches 915 return newself
916
917 - def setBreak(self,breakFlag = True):
918 """Method to invoke the Python pdb debugger when this element is 919 about to be parsed. Set C{breakFlag} to True to enable, False to 920 disable. 921 """ 922 if breakFlag: 923 _parseMethod = self._parse 924 def breaker(instring, loc, doActions=True, callPreParse=True): 925 import pdb 926 pdb.set_trace() 927 return _parseMethod( instring, loc, doActions, callPreParse )
928 breaker._originalParseMethod = _parseMethod 929 self._parse = breaker 930 else: 931 if hasattr(self._parse,"_originalParseMethod"): 932 self._parse = self._parse._originalParseMethod 933 return self
934
935 - def setParseAction( self, *fns, **kwargs ):
936 """Define action to perform when successfully matching parse element definition. 937 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 938 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 939 - s = the original string being parsed (see note below) 940 - loc = the location of the matching substring 941 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 942 If the functions in fns modify the tokens, they can return them as the return 943 value from fn, and the modified list of tokens will replace the original. 944 Otherwise, fn does not need to return any value. 945 946 Note: the default parsing behavior is to expand tabs in the input string 947 before starting the parsing process. See L{I{parseString}<parseString>} for more information 948 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 949 consistent view of the parsed string, the parse location, and line and column 950 positions within the parsed string. 951 """ 952 self.parseAction = list(map(_trim_arity, list(fns))) 953 self.callDuringTry = kwargs.get("callDuringTry", False) 954 return self
955
956 - def addParseAction( self, *fns, **kwargs ):
957 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 958 self.parseAction += list(map(_trim_arity, list(fns))) 959 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 960 return self
961
962 - def addCondition(self, *fns, **kwargs):
963 """Add a boolean predicate function to expression's list of parse actions. See 964 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 965 be used to define a custom message to be used in the raised exception.""" 966 msg = kwargs.get("message") or "failed user-defined condition" 967 for fn in fns: 968 def pa(s,l,t): 969 if not bool(_trim_arity(fn)(s,l,t)): 970 raise ParseException(s,l,msg) 971 return t
972 self.parseAction.append(pa) 973 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 974 return self 975
976 - def setFailAction( self, fn ):
977 """Define action to perform if parsing fails at this expression. 978 Fail acton fn is a callable function that takes the arguments 979 C{fn(s,loc,expr,err)} where: 980 - s = string being parsed 981 - loc = location where expression match was attempted and failed 982 - expr = the parse expression that failed 983 - err = the exception thrown 984 The function returns no value. It may throw C{L{ParseFatalException}} 985 if it is desired to stop parsing immediately.""" 986 self.failAction = fn 987 return self
988
989 - def _skipIgnorables( self, instring, loc ):
990 exprsFound = True 991 while exprsFound: 992 exprsFound = False 993 for e in self.ignoreExprs: 994 try: 995 while 1: 996 loc,dummy = e._parse( instring, loc ) 997 exprsFound = True 998 except ParseException: 999 pass 1000 return loc
1001
1002 - def preParse( self, instring, loc ):
1003 if self.ignoreExprs: 1004 loc = self._skipIgnorables( instring, loc ) 1005 1006 if self.skipWhitespace: 1007 wt = self.whiteChars 1008 instrlen = len(instring) 1009 while loc < instrlen and instring[loc] in wt: 1010 loc += 1 1011 1012 return loc
1013
1014 - def parseImpl( self, instring, loc, doActions=True ):
1015 return loc, []
1016
1017 - def postParse( self, instring, loc, tokenlist ):
1018 return tokenlist
1019 1020 #~ @profile
1021 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1022 debugging = ( self.debug ) #and doActions ) 1023 1024 if debugging or self.failAction: 1025 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1026 if (self.debugActions[0] ): 1027 self.debugActions[0]( instring, loc, self ) 1028 if callPreParse and self.callPreparse: 1029 preloc = self.preParse( instring, loc ) 1030 else: 1031 preloc = loc 1032 tokensStart = preloc 1033 try: 1034 try: 1035 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1036 except IndexError: 1037 raise ParseException( instring, len(instring), self.errmsg, self ) 1038 except ParseBaseException as err: 1039 #~ print ("Exception raised:", err) 1040 if self.debugActions[2]: 1041 self.debugActions[2]( instring, tokensStart, self, err ) 1042 if self.failAction: 1043 self.failAction( instring, tokensStart, self, err ) 1044 raise 1045 else: 1046 if callPreParse and self.callPreparse: 1047 preloc = self.preParse( instring, loc ) 1048 else: 1049 preloc = loc 1050 tokensStart = preloc 1051 if self.mayIndexError or loc >= len(instring): 1052 try: 1053 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1054 except IndexError: 1055 raise ParseException( instring, len(instring), self.errmsg, self ) 1056 else: 1057 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1058 1059 tokens = self.postParse( instring, loc, tokens ) 1060 1061 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1062 if self.parseAction and (doActions or self.callDuringTry): 1063 if debugging: 1064 try: 1065 for fn in self.parseAction: 1066 tokens = fn( instring, tokensStart, retTokens ) 1067 if tokens is not None: 1068 retTokens = ParseResults( tokens, 1069 self.resultsName, 1070 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1071 modal=self.modalResults ) 1072 except ParseBaseException as err: 1073 #~ print "Exception raised in user parse action:", err 1074 if (self.debugActions[2] ): 1075 self.debugActions[2]( instring, tokensStart, self, err ) 1076 raise 1077 else: 1078 for fn in self.parseAction: 1079 tokens = fn( instring, tokensStart, retTokens ) 1080 if tokens is not None: 1081 retTokens = ParseResults( tokens, 1082 self.resultsName, 1083 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1084 modal=self.modalResults ) 1085 1086 if debugging: 1087 #~ print ("Matched",self,"->",retTokens.asList()) 1088 if (self.debugActions[1] ): 1089 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1090 1091 return loc, retTokens
1092
1093 - def tryParse( self, instring, loc ):
1094 try: 1095 return self._parse( instring, loc, doActions=False )[0] 1096 except ParseFatalException: 1097 raise ParseException( instring, loc, self.errmsg, self)
1098
1099 - def canParseNext(self, instring, loc):
1100 try: 1101 self.tryParse(instring, loc) 1102 except (ParseException, IndexError): 1103 return False 1104 else: 1105 return True
1106 1107 # this method gets repeatedly called during backtracking with the same arguments - 1108 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1109 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1110 lookup = (self,instring,loc,callPreParse,doActions) 1111 if lookup in ParserElement._exprArgCache: 1112 value = ParserElement._exprArgCache[ lookup ] 1113 if isinstance(value, Exception): 1114 raise value 1115 return (value[0],value[1].copy()) 1116 else: 1117 try: 1118 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1119 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1120 return value 1121 except ParseBaseException as pe: 1122 pe.__traceback__ = None 1123 ParserElement._exprArgCache[ lookup ] = pe 1124 raise
1125 1126 _parse = _parseNoCache 1127 1128 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1129 _exprArgCache = {} 1130 @staticmethod
1131 - def resetCache():
1132 ParserElement._exprArgCache.clear()
1133 1134 _packratEnabled = False 1135 @staticmethod
1136 - def enablePackrat():
1137 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1138 Repeated parse attempts at the same string location (which happens 1139 often in many complex grammars) can immediately return a cached value, 1140 instead of re-executing parsing/validating code. Memoizing is done of 1141 both valid results and parsing exceptions. 1142 1143 This speedup may break existing programs that use parse actions that 1144 have side-effects. For this reason, packrat parsing is disabled when 1145 you first import pyparsing. To activate the packrat feature, your 1146 program must call the class method C{ParserElement.enablePackrat()}. If 1147 your program uses C{psyco} to "compile as you go", you must call 1148 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1149 Python will crash. For best results, call C{enablePackrat()} immediately 1150 after importing pyparsing. 1151 """ 1152 if not ParserElement._packratEnabled: 1153 ParserElement._packratEnabled = True 1154 ParserElement._parse = ParserElement._parseCache
1155
1156 - def parseString( self, instring, parseAll=False ):
1157 """Execute the parse expression with the given string. 1158 This is the main interface to the client code, once the complete 1159 expression has been built. 1160 1161 If you want the grammar to require that the entire input string be 1162 successfully parsed, then set C{parseAll} to True (equivalent to ending 1163 the grammar with C{L{StringEnd()}}). 1164 1165 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1166 in order to report proper column numbers in parse actions. 1167 If the input string contains tabs and 1168 the grammar uses parse actions that use the C{loc} argument to index into the 1169 string being parsed, you can ensure you have a consistent view of the input 1170 string by: 1171 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1172 (see L{I{parseWithTabs}<parseWithTabs>}) 1173 - define your parse action using the full C{(s,loc,toks)} signature, and 1174 reference the input string using the parse action's C{s} argument 1175 - explictly expand the tabs in your input string before calling 1176 C{parseString} 1177 """ 1178 ParserElement.resetCache() 1179 if not self.streamlined: 1180 self.streamline() 1181 #~ self.saveAsList = True 1182 for e in self.ignoreExprs: 1183 e.streamline() 1184 if not self.keepTabs: 1185 instring = instring.expandtabs() 1186 try: 1187 loc, tokens = self._parse( instring, 0 ) 1188 if parseAll: 1189 loc = self.preParse( instring, loc ) 1190 se = Empty() + StringEnd() 1191 se._parse( instring, loc ) 1192 except ParseBaseException as exc: 1193 if ParserElement.verbose_stacktrace: 1194 raise 1195 else: 1196 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1197 raise exc 1198 else: 1199 return tokens
1200
1201 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1202 """Scan the input string for expression matches. Each match will return the 1203 matching tokens, start location, and end location. May be called with optional 1204 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1205 C{overlap} is specified, then overlapping matches will be reported. 1206 1207 Note that the start and end locations are reported relative to the string 1208 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1209 strings with embedded tabs.""" 1210 if not self.streamlined: 1211 self.streamline() 1212 for e in self.ignoreExprs: 1213 e.streamline() 1214 1215 if not self.keepTabs: 1216 instring = _ustr(instring).expandtabs() 1217 instrlen = len(instring) 1218 loc = 0 1219 preparseFn = self.preParse 1220 parseFn = self._parse 1221 ParserElement.resetCache() 1222 matches = 0 1223 try: 1224 while loc <= instrlen and matches < maxMatches: 1225 try: 1226 preloc = preparseFn( instring, loc ) 1227 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1228 except ParseException: 1229 loc = preloc+1 1230 else: 1231 if nextLoc > loc: 1232 matches += 1 1233 yield tokens, preloc, nextLoc 1234 if overlap: 1235 nextloc = preparseFn( instring, loc ) 1236 if nextloc > loc: 1237 loc = nextLoc 1238 else: 1239 loc += 1 1240 else: 1241 loc = nextLoc 1242 else: 1243 loc = preloc+1 1244 except ParseBaseException as exc: 1245 if ParserElement.verbose_stacktrace: 1246 raise 1247 else: 1248 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1249 raise exc
1250
1251 - def transformString( self, instring ):
1252 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1253 be returned from a parse action. To use C{transformString}, define a grammar and 1254 attach a parse action to it that modifies the returned token list. 1255 Invoking C{transformString()} on a target string will then scan for matches, 1256 and replace the matched text patterns according to the logic in the parse 1257 action. C{transformString()} returns the resulting transformed string.""" 1258 out = [] 1259 lastE = 0 1260 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1261 # keep string locs straight between transformString and scanString 1262 self.keepTabs = True 1263 try: 1264 for t,s,e in self.scanString( instring ): 1265 out.append( instring[lastE:s] ) 1266 if t: 1267 if isinstance(t,ParseResults): 1268 out += t.asList() 1269 elif isinstance(t,list): 1270 out += t 1271 else: 1272 out.append(t) 1273 lastE = e 1274 out.append(instring[lastE:]) 1275 out = [o for o in out if o] 1276 return "".join(map(_ustr,_flatten(out))) 1277 except ParseBaseException as exc: 1278 if ParserElement.verbose_stacktrace: 1279 raise 1280 else: 1281 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1282 raise exc
1283
1284 - def searchString( self, instring, maxMatches=_MAX_INT ):
1285 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1286 to match the given parse expression. May be called with optional 1287 C{maxMatches} argument, to clip searching after 'n' matches are found. 1288 """ 1289 try: 1290 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1291 except ParseBaseException as exc: 1292 if ParserElement.verbose_stacktrace: 1293 raise 1294 else: 1295 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1296 raise exc
1297
1298 - def __add__(self, other ):
1299 """Implementation of + operator - returns C{L{And}}""" 1300 if isinstance( other, basestring ): 1301 other = ParserElement.literalStringClass( other ) 1302 if not isinstance( other, ParserElement ): 1303 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1304 SyntaxWarning, stacklevel=2) 1305 return None 1306 return And( [ self, other ] )
1307
1308 - def __radd__(self, other ):
1309 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1310 if isinstance( other, basestring ): 1311 other = ParserElement.literalStringClass( other ) 1312 if not isinstance( other, ParserElement ): 1313 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1314 SyntaxWarning, stacklevel=2) 1315 return None 1316 return other + self
1317
1318 - def __sub__(self, other):
1319 """Implementation of - operator, returns C{L{And}} with error stop""" 1320 if isinstance( other, basestring ): 1321 other = ParserElement.literalStringClass( other ) 1322 if not isinstance( other, ParserElement ): 1323 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1324 SyntaxWarning, stacklevel=2) 1325 return None 1326 return And( [ self, And._ErrorStop(), other ] )
1327
1328 - def __rsub__(self, other ):
1329 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1330 if isinstance( other, basestring ): 1331 other = ParserElement.literalStringClass( other ) 1332 if not isinstance( other, ParserElement ): 1333 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1334 SyntaxWarning, stacklevel=2) 1335 return None 1336 return other - self
1337
1338 - def __mul__(self,other):
1339 """Implementation of * operator, allows use of C{expr * 3} in place of 1340 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1341 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1342 may also include C{None} as in: 1343 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1344 to C{expr*n + L{ZeroOrMore}(expr)} 1345 (read as "at least n instances of C{expr}") 1346 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1347 (read as "0 to n instances of C{expr}") 1348 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1349 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1350 1351 Note that C{expr*(None,n)} does not raise an exception if 1352 more than n exprs exist in the input stream; that is, 1353 C{expr*(None,n)} does not enforce a maximum number of expr 1354 occurrences. If this behavior is desired, then write 1355 C{expr*(None,n) + ~expr} 1356 1357 """ 1358 if isinstance(other,int): 1359 minElements, optElements = other,0 1360 elif isinstance(other,tuple): 1361 other = (other + (None, None))[:2] 1362 if other[0] is None: 1363 other = (0, other[1]) 1364 if isinstance(other[0],int) and other[1] is None: 1365 if other[0] == 0: 1366 return ZeroOrMore(self) 1367 if other[0] == 1: 1368 return OneOrMore(self) 1369 else: 1370 return self*other[0] + ZeroOrMore(self) 1371 elif isinstance(other[0],int) and isinstance(other[1],int): 1372 minElements, optElements = other 1373 optElements -= minElements 1374 else: 1375 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1376 else: 1377 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1378 1379 if minElements < 0: 1380 raise ValueError("cannot multiply ParserElement by negative value") 1381 if optElements < 0: 1382 raise ValueError("second tuple value must be greater or equal to first tuple value") 1383 if minElements == optElements == 0: 1384 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1385 1386 if (optElements): 1387 def makeOptionalList(n): 1388 if n>1: 1389 return Optional(self + makeOptionalList(n-1)) 1390 else: 1391 return Optional(self)
1392 if minElements: 1393 if minElements == 1: 1394 ret = self + makeOptionalList(optElements) 1395 else: 1396 ret = And([self]*minElements) + makeOptionalList(optElements) 1397 else: 1398 ret = makeOptionalList(optElements) 1399 else: 1400 if minElements == 1: 1401 ret = self 1402 else: 1403 ret = And([self]*minElements) 1404 return ret 1405
1406 - def __rmul__(self, other):
1407 return self.__mul__(other)
1408
1409 - def __or__(self, other ):
1410 """Implementation of | operator - returns C{L{MatchFirst}}""" 1411 if isinstance( other, basestring ): 1412 other = ParserElement.literalStringClass( other ) 1413 if not isinstance( other, ParserElement ): 1414 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1415 SyntaxWarning, stacklevel=2) 1416 return None 1417 return MatchFirst( [ self, other ] )
1418
1419 - def __ror__(self, other ):
1420 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1421 if isinstance( other, basestring ): 1422 other = ParserElement.literalStringClass( other ) 1423 if not isinstance( other, ParserElement ): 1424 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1425 SyntaxWarning, stacklevel=2) 1426 return None 1427 return other | self
1428
1429 - def __xor__(self, other ):
1430 """Implementation of ^ operator - returns C{L{Or}}""" 1431 if isinstance( other, basestring ): 1432 other = ParserElement.literalStringClass( other ) 1433 if not isinstance( other, ParserElement ): 1434 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1435 SyntaxWarning, stacklevel=2) 1436 return None 1437 return Or( [ self, other ] )
1438
1439 - def __rxor__(self, other ):
1440 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1441 if isinstance( other, basestring ): 1442 other = ParserElement.literalStringClass( other ) 1443 if not isinstance( other, ParserElement ): 1444 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1445 SyntaxWarning, stacklevel=2) 1446 return None 1447 return other ^ self
1448
1449 - def __and__(self, other ):
1450 """Implementation of & operator - returns C{L{Each}}""" 1451 if isinstance( other, basestring ): 1452 other = ParserElement.literalStringClass( other ) 1453 if not isinstance( other, ParserElement ): 1454 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1455 SyntaxWarning, stacklevel=2) 1456 return None 1457 return Each( [ self, other ] )
1458
1459 - def __rand__(self, other ):
1460 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1461 if isinstance( other, basestring ): 1462 other = ParserElement.literalStringClass( other ) 1463 if not isinstance( other, ParserElement ): 1464 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1465 SyntaxWarning, stacklevel=2) 1466 return None 1467 return other & self
1468
1469 - def __invert__( self ):
1470 """Implementation of ~ operator - returns C{L{NotAny}}""" 1471 return NotAny( self )
1472
1473 - def __call__(self, name=None):
1474 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1475 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1476 could be written as:: 1477 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1478 1479 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1480 passed as C{True}. 1481 1482 If C{name} is omitted, same as calling C{L{copy}}. 1483 """ 1484 if name is not None: 1485 return self.setResultsName(name) 1486 else: 1487 return self.copy()
1488
1489 - def suppress( self ):
1490 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1491 cluttering up returned output. 1492 """ 1493 return Suppress( self )
1494
1495 - def leaveWhitespace( self ):
1496 """Disables the skipping of whitespace before matching the characters in the 1497 C{ParserElement}'s defined pattern. This is normally only used internally by 1498 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1499 """ 1500 self.skipWhitespace = False 1501 return self
1502
1503 - def setWhitespaceChars( self, chars ):
1504 """Overrides the default whitespace chars 1505 """ 1506 self.skipWhitespace = True 1507 self.whiteChars = chars 1508 self.copyDefaultWhiteChars = False 1509 return self
1510
1511 - def parseWithTabs( self ):
1512 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1513 Must be called before C{parseString} when the input grammar contains elements that 1514 match C{<TAB>} characters.""" 1515 self.keepTabs = True 1516 return self
1517
1518 - def ignore( self, other ):
1519 """Define expression to be ignored (e.g., comments) while doing pattern 1520 matching; may be called repeatedly, to define multiple comment or other 1521 ignorable patterns. 1522 """ 1523 if isinstance(other, basestring): 1524 other = Suppress(other) 1525 1526 if isinstance( other, Suppress ): 1527 if other not in self.ignoreExprs: 1528 self.ignoreExprs.append(other) 1529 else: 1530 self.ignoreExprs.append( Suppress( other.copy() ) ) 1531 return self
1532
1533 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1534 """Enable display of debugging messages while doing pattern matching.""" 1535 self.debugActions = (startAction or _defaultStartDebugAction, 1536 successAction or _defaultSuccessDebugAction, 1537 exceptionAction or _defaultExceptionDebugAction) 1538 self.debug = True 1539 return self
1540
1541 - def setDebug( self, flag=True ):
1542 """Enable display of debugging messages while doing pattern matching. 1543 Set C{flag} to True to enable, False to disable.""" 1544 if flag: 1545 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1546 else: 1547 self.debug = False 1548 return self
1549
1550 - def __str__( self ):
1551 return self.name
1552
1553 - def __repr__( self ):
1554 return _ustr(self)
1555
1556 - def streamline( self ):
1557 self.streamlined = True 1558 self.strRepr = None 1559 return self
1560
1561 - def checkRecursion( self, parseElementList ):
1562 pass
1563
1564 - def validate( self, validateTrace=[] ):
1565 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1566 self.checkRecursion( [] )
1567
1568 - def parseFile( self, file_or_filename, parseAll=False ):
1569 """Execute the parse expression on the given file or filename. 1570 If a filename is specified (instead of a file object), 1571 the entire file is opened, read, and closed before parsing. 1572 """ 1573 try: 1574 file_contents = file_or_filename.read() 1575 except AttributeError: 1576 f = open(file_or_filename, "r") 1577 file_contents = f.read() 1578 f.close() 1579 try: 1580 return self.parseString(file_contents, parseAll) 1581 except ParseBaseException as exc: 1582 if ParserElement.verbose_stacktrace: 1583 raise 1584 else: 1585 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1586 raise exc
1587
1588 - def __eq__(self,other):
1589 if isinstance(other, ParserElement): 1590 return self is other or vars(self) == vars(other) 1591 elif isinstance(other, basestring): 1592 try: 1593 self.parseString(_ustr(other), parseAll=True) 1594 return True 1595 except ParseBaseException: 1596 return False 1597 else: 1598 return super(ParserElement,self)==other
1599
1600 - def __ne__(self,other):
1601 return not (self == other)
1602
1603 - def __hash__(self):
1604 return hash(id(self))
1605
1606 - def __req__(self,other):
1607 return self == other
1608
1609 - def __rne__(self,other):
1610 return not (self == other)
1611
1612 - def runTests(self, tests, parseAll=False):
1613 """Execute the parse expression on a series of test strings, showing each 1614 test, the parsed results or where the parse failed. Quick and easy way to 1615 run a parse expression against a list of sample strings. 1616 1617 Parameters: 1618 - tests - a list of separate test strings, or a multiline string of test strings 1619 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1620 """ 1621 if isinstance(tests, basestring): 1622 tests = map(str.strip, tests.splitlines()) 1623 for t in tests: 1624 out = [t] 1625 try: 1626 out.append(self.parseString(t, parseAll=parseAll).dump()) 1627 except ParseException as pe: 1628 if '\n' in t: 1629 out.append(line(pe.loc, t)) 1630 out.append(' '*(col(pe.loc,t)-1) + '^') 1631 else: 1632 out.append(' '*pe.loc + '^') 1633 out.append(str(pe)) 1634 out.append('') 1635 print('\n'.join(out))
1636
1637 1638 -class Token(ParserElement):
1639 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1640 - def __init__( self ):
1641 super(Token,self).__init__( savelist=False )
1642
1643 1644 -class Empty(Token):
1645 """An empty token, will always match."""
1646 - def __init__( self ):
1647 super(Empty,self).__init__() 1648 self.name = "Empty" 1649 self.mayReturnEmpty = True 1650 self.mayIndexError = False
1651
1652 1653 -class NoMatch(Token):
1654 """A token that will never match."""
1655 - def __init__( self ):
1656 super(NoMatch,self).__init__() 1657 self.name = "NoMatch" 1658 self.mayReturnEmpty = True 1659 self.mayIndexError = False 1660 self.errmsg = "Unmatchable token"
1661
1662 - def parseImpl( self, instring, loc, doActions=True ):
1663 raise ParseException(instring, loc, self.errmsg, self)
1664
1665 1666 -class Literal(Token):
1667 """Token to exactly match a specified string."""
1668 - def __init__( self, matchString ):
1669 super(Literal,self).__init__() 1670 self.match = matchString 1671 self.matchLen = len(matchString) 1672 try: 1673 self.firstMatchChar = matchString[0] 1674 except IndexError: 1675 warnings.warn("null string passed to Literal; use Empty() instead", 1676 SyntaxWarning, stacklevel=2) 1677 self.__class__ = Empty 1678 self.name = '"%s"' % _ustr(self.match) 1679 self.errmsg = "Expected " + self.name 1680 self.mayReturnEmpty = False 1681 self.mayIndexError = False
1682 1683 # Performance tuning: this routine gets called a *lot* 1684 # if this is a single character match string and the first character matches, 1685 # short-circuit as quickly as possible, and avoid calling startswith 1686 #~ @profile
1687 - def parseImpl( self, instring, loc, doActions=True ):
1688 if (instring[loc] == self.firstMatchChar and 1689 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1690 return loc+self.matchLen, self.match 1691 raise ParseException(instring, loc, self.errmsg, self)
1692 _L = Literal 1693 ParserElement.literalStringClass = Literal
1694 1695 -class Keyword(Token):
1696 """Token to exactly match a specified string as a keyword, that is, it must be 1697 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1698 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1699 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1700 Accepts two optional constructor arguments in addition to the keyword string: 1701 C{identChars} is a string of characters that would be valid identifier characters, 1702 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1703 matching, default is C{False}. 1704 """ 1705 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1706
1707 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1708 super(Keyword,self).__init__() 1709 self.match = matchString 1710 self.matchLen = len(matchString) 1711 try: 1712 self.firstMatchChar = matchString[0] 1713 except IndexError: 1714 warnings.warn("null string passed to Keyword; use Empty() instead", 1715 SyntaxWarning, stacklevel=2) 1716 self.name = '"%s"' % self.match 1717 self.errmsg = "Expected " + self.name 1718 self.mayReturnEmpty = False 1719 self.mayIndexError = False 1720 self.caseless = caseless 1721 if caseless: 1722 self.caselessmatch = matchString.upper() 1723 identChars = identChars.upper() 1724 self.identChars = set(identChars)
1725
1726 - def parseImpl( self, instring, loc, doActions=True ):
1727 if self.caseless: 1728 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1729 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1730 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1731 return loc+self.matchLen, self.match 1732 else: 1733 if (instring[loc] == self.firstMatchChar and 1734 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1735 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1736 (loc == 0 or instring[loc-1] not in self.identChars) ): 1737 return loc+self.matchLen, self.match 1738 raise ParseException(instring, loc, self.errmsg, self)
1739
1740 - def copy(self):
1741 c = super(Keyword,self).copy() 1742 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1743 return c
1744 1745 @staticmethod
1746 - def setDefaultKeywordChars( chars ):
1747 """Overrides the default Keyword chars 1748 """ 1749 Keyword.DEFAULT_KEYWORD_CHARS = chars
1750
1751 -class CaselessLiteral(Literal):
1752 """Token to match a specified string, ignoring case of letters. 1753 Note: the matched results will always be in the case of the given 1754 match string, NOT the case of the input text. 1755 """
1756 - def __init__( self, matchString ):
1757 super(CaselessLiteral,self).__init__( matchString.upper() ) 1758 # Preserve the defining literal. 1759 self.returnString = matchString 1760 self.name = "'%s'" % self.returnString 1761 self.errmsg = "Expected " + self.name
1762
1763 - def parseImpl( self, instring, loc, doActions=True ):
1764 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1765 return loc+self.matchLen, self.returnString 1766 raise ParseException(instring, loc, self.errmsg, self)
1767
1768 -class CaselessKeyword(Keyword):
1769 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1770 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1771
1772 - def parseImpl( self, instring, loc, doActions=True ):
1773 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1774 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1775 return loc+self.matchLen, self.match 1776 raise ParseException(instring, loc, self.errmsg, self)
1777
1778 -class Word(Token):
1779 """Token for matching words composed of allowed character sets. 1780 Defined with string containing all allowed initial characters, 1781 an optional string containing allowed body characters (if omitted, 1782 defaults to the initial character set), and an optional minimum, 1783 maximum, and/or exact length. The default value for C{min} is 1 (a 1784 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1785 are 0, meaning no maximum or exact length restriction. An optional 1786 C{excludeChars} parameter can list characters that might be found in 1787 the input C{bodyChars} string; useful to define a word of all printables 1788 except for one or two characters, for instance. 1789 """
1790 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1791 super(Word,self).__init__() 1792 if excludeChars: 1793 initChars = ''.join(c for c in initChars if c not in excludeChars) 1794 if bodyChars: 1795 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1796 self.initCharsOrig = initChars 1797 self.initChars = set(initChars) 1798 if bodyChars : 1799 self.bodyCharsOrig = bodyChars 1800 self.bodyChars = set(bodyChars) 1801 else: 1802 self.bodyCharsOrig = initChars 1803 self.bodyChars = set(initChars) 1804 1805 self.maxSpecified = max > 0 1806 1807 if min < 1: 1808 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1809 1810 self.minLen = min 1811 1812 if max > 0: 1813 self.maxLen = max 1814 else: 1815 self.maxLen = _MAX_INT 1816 1817 if exact > 0: 1818 self.maxLen = exact 1819 self.minLen = exact 1820 1821 self.name = _ustr(self) 1822 self.errmsg = "Expected " + self.name 1823 self.mayIndexError = False 1824 self.asKeyword = asKeyword 1825 1826 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1827 if self.bodyCharsOrig == self.initCharsOrig: 1828 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1829 elif len(self.initCharsOrig) == 1: 1830 self.reString = "%s[%s]*" % \ 1831 (re.escape(self.initCharsOrig), 1832 _escapeRegexRangeChars(self.bodyCharsOrig),) 1833 else: 1834 self.reString = "[%s][%s]*" % \ 1835 (_escapeRegexRangeChars(self.initCharsOrig), 1836 _escapeRegexRangeChars(self.bodyCharsOrig),) 1837 if self.asKeyword: 1838 self.reString = r"\b"+self.reString+r"\b" 1839 try: 1840 self.re = re.compile( self.reString ) 1841 except: 1842 self.re = None
1843
1844 - def parseImpl( self, instring, loc, doActions=True ):
1845 if self.re: 1846 result = self.re.match(instring,loc) 1847 if not result: 1848 raise ParseException(instring, loc, self.errmsg, self) 1849 1850 loc = result.end() 1851 return loc, result.group() 1852 1853 if not(instring[ loc ] in self.initChars): 1854 raise ParseException(instring, loc, self.errmsg, self) 1855 1856 start = loc 1857 loc += 1 1858 instrlen = len(instring) 1859 bodychars = self.bodyChars 1860 maxloc = start + self.maxLen 1861 maxloc = min( maxloc, instrlen ) 1862 while loc < maxloc and instring[loc] in bodychars: 1863 loc += 1 1864 1865 throwException = False 1866 if loc - start < self.minLen: 1867 throwException = True 1868 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1869 throwException = True 1870 if self.asKeyword: 1871 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1872 throwException = True 1873 1874 if throwException: 1875 raise ParseException(instring, loc, self.errmsg, self) 1876 1877 return loc, instring[start:loc]
1878
1879 - def __str__( self ):
1880 try: 1881 return super(Word,self).__str__() 1882 except: 1883 pass 1884 1885 1886 if self.strRepr is None: 1887 1888 def charsAsStr(s): 1889 if len(s)>4: 1890 return s[:4]+"..." 1891 else: 1892 return s
1893 1894 if ( self.initCharsOrig != self.bodyCharsOrig ): 1895 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1896 else: 1897 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1898 1899 return self.strRepr
1900
1901 1902 -class Regex(Token):
1903 """Token for matching strings that match a given regular expression. 1904 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1905 """ 1906 compiledREtype = type(re.compile("[A-Z]"))
1907 - def __init__( self, pattern, flags=0):
1908 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1909 super(Regex,self).__init__() 1910 1911 if isinstance(pattern, basestring): 1912 if not pattern: 1913 warnings.warn("null string passed to Regex; use Empty() instead", 1914 SyntaxWarning, stacklevel=2) 1915 1916 self.pattern = pattern 1917 self.flags = flags 1918 1919 try: 1920 self.re = re.compile(self.pattern, self.flags) 1921 self.reString = self.pattern 1922 except sre_constants.error: 1923 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1924 SyntaxWarning, stacklevel=2) 1925 raise 1926 1927 elif isinstance(pattern, Regex.compiledREtype): 1928 self.re = pattern 1929 self.pattern = \ 1930 self.reString = str(pattern) 1931 self.flags = flags 1932 1933 else: 1934 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1935 1936 self.name = _ustr(self) 1937 self.errmsg = "Expected " + self.name 1938 self.mayIndexError = False 1939 self.mayReturnEmpty = True
1940
1941 - def parseImpl( self, instring, loc, doActions=True ):
1942 result = self.re.match(instring,loc) 1943 if not result: 1944 raise ParseException(instring, loc, self.errmsg, self) 1945 1946 loc = result.end() 1947 d = result.groupdict() 1948 ret = ParseResults(result.group()) 1949 if d: 1950 for k in d: 1951 ret[k] = d[k] 1952 return loc,ret
1953
1954 - def __str__( self ):
1955 try: 1956 return super(Regex,self).__str__() 1957 except: 1958 pass 1959 1960 if self.strRepr is None: 1961 self.strRepr = "Re:(%s)" % repr(self.pattern) 1962 1963 return self.strRepr
1964
1965 1966 -class QuotedString(Token):
1967 """Token for matching strings that are delimited by quoting characters. 1968 """
1969 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1970 r"""Defined with the following parameters: 1971 - quoteChar - string of one or more characters defining the quote delimiting string 1972 - escChar - character to escape quotes, typically backslash (default=None) 1973 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1974 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1975 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1976 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1977 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 1978 """ 1979 super(QuotedString,self).__init__() 1980 1981 # remove white space from quote chars - wont work anyway 1982 quoteChar = quoteChar.strip() 1983 if not quoteChar: 1984 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1985 raise SyntaxError() 1986 1987 if endQuoteChar is None: 1988 endQuoteChar = quoteChar 1989 else: 1990 endQuoteChar = endQuoteChar.strip() 1991 if not endQuoteChar: 1992 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1993 raise SyntaxError() 1994 1995 self.quoteChar = quoteChar 1996 self.quoteCharLen = len(quoteChar) 1997 self.firstQuoteChar = quoteChar[0] 1998 self.endQuoteChar = endQuoteChar 1999 self.endQuoteCharLen = len(endQuoteChar) 2000 self.escChar = escChar 2001 self.escQuote = escQuote 2002 self.unquoteResults = unquoteResults 2003 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2004 2005 if multiline: 2006 self.flags = re.MULTILINE | re.DOTALL 2007 self.pattern = r'%s(?:[^%s%s]' % \ 2008 ( re.escape(self.quoteChar), 2009 _escapeRegexRangeChars(self.endQuoteChar[0]), 2010 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2011 else: 2012 self.flags = 0 2013 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2014 ( re.escape(self.quoteChar), 2015 _escapeRegexRangeChars(self.endQuoteChar[0]), 2016 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2017 if len(self.endQuoteChar) > 1: 2018 self.pattern += ( 2019 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2020 _escapeRegexRangeChars(self.endQuoteChar[i])) 2021 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2022 ) 2023 if escQuote: 2024 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2025 if escChar: 2026 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2027 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2028 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2029 2030 try: 2031 self.re = re.compile(self.pattern, self.flags) 2032 self.reString = self.pattern 2033 except sre_constants.error: 2034 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2035 SyntaxWarning, stacklevel=2) 2036 raise 2037 2038 self.name = _ustr(self) 2039 self.errmsg = "Expected " + self.name 2040 self.mayIndexError = False 2041 self.mayReturnEmpty = True
2042
2043 - def parseImpl( self, instring, loc, doActions=True ):
2044 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2045 if not result: 2046 raise ParseException(instring, loc, self.errmsg, self) 2047 2048 loc = result.end() 2049 ret = result.group() 2050 2051 if self.unquoteResults: 2052 2053 # strip off quotes 2054 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2055 2056 if isinstance(ret,basestring): 2057 # replace escaped whitespace 2058 if '\\' in ret and self.convertWhitespaceEscapes: 2059 ws_map = { 2060 r'\t' : '\t', 2061 r'\n' : '\n', 2062 r'\f' : '\f', 2063 r'\r' : '\r', 2064 } 2065 for wslit,wschar in ws_map.items(): 2066 ret = ret.replace(wslit, wschar) 2067 2068 # replace escaped characters 2069 if self.escChar: 2070 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2071 2072 # replace escaped quotes 2073 if self.escQuote: 2074 ret = ret.replace(self.escQuote, self.endQuoteChar) 2075 2076 return loc, ret
2077
2078 - def __str__( self ):
2079 try: 2080 return super(QuotedString,self).__str__() 2081 except: 2082 pass 2083 2084 if self.strRepr is None: 2085 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2086 2087 return self.strRepr
2088
2089 2090 -class CharsNotIn(Token):
2091 """Token for matching words composed of characters *not* in a given set. 2092 Defined with string containing all disallowed characters, and an optional 2093 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2094 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2095 are 0, meaning no maximum or exact length restriction. 2096 """
2097 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2098 super(CharsNotIn,self).__init__() 2099 self.skipWhitespace = False 2100 self.notChars = notChars 2101 2102 if min < 1: 2103 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2104 2105 self.minLen = min 2106 2107 if max > 0: 2108 self.maxLen = max 2109 else: 2110 self.maxLen = _MAX_INT 2111 2112 if exact > 0: 2113 self.maxLen = exact 2114 self.minLen = exact 2115 2116 self.name = _ustr(self) 2117 self.errmsg = "Expected " + self.name 2118 self.mayReturnEmpty = ( self.minLen == 0 ) 2119 self.mayIndexError = False
2120
2121 - def parseImpl( self, instring, loc, doActions=True ):
2122 if instring[loc] in self.notChars: 2123 raise ParseException(instring, loc, self.errmsg, self) 2124 2125 start = loc 2126 loc += 1 2127 notchars = self.notChars 2128 maxlen = min( start+self.maxLen, len(instring) ) 2129 while loc < maxlen and \ 2130 (instring[loc] not in notchars): 2131 loc += 1 2132 2133 if loc - start < self.minLen: 2134 raise ParseException(instring, loc, self.errmsg, self) 2135 2136 return loc, instring[start:loc]
2137
2138 - def __str__( self ):
2139 try: 2140 return super(CharsNotIn, self).__str__() 2141 except: 2142 pass 2143 2144 if self.strRepr is None: 2145 if len(self.notChars) > 4: 2146 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2147 else: 2148 self.strRepr = "!W:(%s)" % self.notChars 2149 2150 return self.strRepr
2151
2152 -class White(Token):
2153 """Special matching class for matching whitespace. Normally, whitespace is ignored 2154 by pyparsing grammars. This class is included when some whitespace structures 2155 are significant. Define with a string containing the whitespace characters to be 2156 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2157 as defined for the C{L{Word}} class.""" 2158 whiteStrs = { 2159 " " : "<SPC>", 2160 "\t": "<TAB>", 2161 "\n": "<LF>", 2162 "\r": "<CR>", 2163 "\f": "<FF>", 2164 }
2165 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2166 super(White,self).__init__() 2167 self.matchWhite = ws 2168 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2169 #~ self.leaveWhitespace() 2170 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2171 self.mayReturnEmpty = True 2172 self.errmsg = "Expected " + self.name 2173 2174 self.minLen = min 2175 2176 if max > 0: 2177 self.maxLen = max 2178 else: 2179 self.maxLen = _MAX_INT 2180 2181 if exact > 0: 2182 self.maxLen = exact 2183 self.minLen = exact
2184
2185 - def parseImpl( self, instring, loc, doActions=True ):
2186 if not(instring[ loc ] in self.matchWhite): 2187 raise ParseException(instring, loc, self.errmsg, self) 2188 start = loc 2189 loc += 1 2190 maxloc = start + self.maxLen 2191 maxloc = min( maxloc, len(instring) ) 2192 while loc < maxloc and instring[loc] in self.matchWhite: 2193 loc += 1 2194 2195 if loc - start < self.minLen: 2196 raise ParseException(instring, loc, self.errmsg, self) 2197 2198 return loc, instring[start:loc]
2199
2200 2201 -class _PositionToken(Token):
2202 - def __init__( self ):
2203 super(_PositionToken,self).__init__() 2204 self.name=self.__class__.__name__ 2205 self.mayReturnEmpty = True 2206 self.mayIndexError = False
2207
2208 -class GoToColumn(_PositionToken):
2209 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2210 - def __init__( self, colno ):
2211 super(GoToColumn,self).__init__() 2212 self.col = colno
2213
2214 - def preParse( self, instring, loc ):
2215 if col(loc,instring) != self.col: 2216 instrlen = len(instring) 2217 if self.ignoreExprs: 2218 loc = self._skipIgnorables( instring, loc ) 2219 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2220 loc += 1 2221 return loc
2222
2223 - def parseImpl( self, instring, loc, doActions=True ):
2224 thiscol = col( loc, instring ) 2225 if thiscol > self.col: 2226 raise ParseException( instring, loc, "Text not in expected column", self ) 2227 newloc = loc + self.col - thiscol 2228 ret = instring[ loc: newloc ] 2229 return newloc, ret
2230
2231 -class LineStart(_PositionToken):
2232 """Matches if current position is at the beginning of a line within the parse string"""
2233 - def __init__( self ):
2234 super(LineStart,self).__init__() 2235 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2236 self.errmsg = "Expected start of line"
2237
2238 - def preParse( self, instring, loc ):
2239 preloc = super(LineStart,self).preParse(instring,loc) 2240 if instring[preloc] == "\n": 2241 loc += 1 2242 return loc
2243
2244 - def parseImpl( self, instring, loc, doActions=True ):
2245 if not( loc==0 or 2246 (loc == self.preParse( instring, 0 )) or 2247 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2248 raise ParseException(instring, loc, self.errmsg, self) 2249 return loc, []
2250
2251 -class LineEnd(_PositionToken):
2252 """Matches if current position is at the end of a line within the parse string"""
2253 - def __init__( self ):
2254 super(LineEnd,self).__init__() 2255 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2256 self.errmsg = "Expected end of line"
2257
2258 - def parseImpl( self, instring, loc, doActions=True ):
2259 if loc<len(instring): 2260 if instring[loc] == "\n": 2261 return loc+1, "\n" 2262 else: 2263 raise ParseException(instring, loc, self.errmsg, self) 2264 elif loc == len(instring): 2265 return loc+1, [] 2266 else: 2267 raise ParseException(instring, loc, self.errmsg, self)
2268
2269 -class StringStart(_PositionToken):
2270 """Matches if current position is at the beginning of the parse string"""
2271 - def __init__( self ):
2272 super(StringStart,self).__init__() 2273 self.errmsg = "Expected start of text"
2274
2275 - def parseImpl( self, instring, loc, doActions=True ):
2276 if loc != 0: 2277 # see if entire string up to here is just whitespace and ignoreables 2278 if loc != self.preParse( instring, 0 ): 2279 raise ParseException(instring, loc, self.errmsg, self) 2280 return loc, []
2281
2282 -class StringEnd(_PositionToken):
2283 """Matches if current position is at the end of the parse string"""
2284 - def __init__( self ):
2285 super(StringEnd,self).__init__() 2286 self.errmsg = "Expected end of text"
2287
2288 - def parseImpl( self, instring, loc, doActions=True ):
2289 if loc < len(instring): 2290 raise ParseException(instring, loc, self.errmsg, self) 2291 elif loc == len(instring): 2292 return loc+1, [] 2293 elif loc > len(instring): 2294 return loc, [] 2295 else: 2296 raise ParseException(instring, loc, self.errmsg, self)
2297
2298 -class WordStart(_PositionToken):
2299 """Matches if the current position is at the beginning of a Word, and 2300 is not preceded by any character in a given set of C{wordChars} 2301 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2302 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2303 the string being parsed, or at the beginning of a line. 2304 """
2305 - def __init__(self, wordChars = printables):
2306 super(WordStart,self).__init__() 2307 self.wordChars = set(wordChars) 2308 self.errmsg = "Not at the start of a word"
2309
2310 - def parseImpl(self, instring, loc, doActions=True ):
2311 if loc != 0: 2312 if (instring[loc-1] in self.wordChars or 2313 instring[loc] not in self.wordChars): 2314 raise ParseException(instring, loc, self.errmsg, self) 2315 return loc, []
2316
2317 -class WordEnd(_PositionToken):
2318 """Matches if the current position is at the end of a Word, and 2319 is not followed by any character in a given set of C{wordChars} 2320 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2321 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2322 the string being parsed, or at the end of a line. 2323 """
2324 - def __init__(self, wordChars = printables):
2325 super(WordEnd,self).__init__() 2326 self.wordChars = set(wordChars) 2327 self.skipWhitespace = False 2328 self.errmsg = "Not at the end of a word"
2329
2330 - def parseImpl(self, instring, loc, doActions=True ):
2331 instrlen = len(instring) 2332 if instrlen>0 and loc<instrlen: 2333 if (instring[loc] in self.wordChars or 2334 instring[loc-1] not in self.wordChars): 2335 raise ParseException(instring, loc, self.errmsg, self) 2336 return loc, []
2337
2338 2339 -class ParseExpression(ParserElement):
2340 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2341 - def __init__( self, exprs, savelist = False ):
2342 super(ParseExpression,self).__init__(savelist) 2343 if isinstance( exprs, _generatorType ): 2344 exprs = list(exprs) 2345 2346 if isinstance( exprs, basestring ): 2347 self.exprs = [ Literal( exprs ) ] 2348 elif isinstance( exprs, collections.Sequence ): 2349 # if sequence of strings provided, wrap with Literal 2350 if all(isinstance(expr, basestring) for expr in exprs): 2351 exprs = map(Literal, exprs) 2352 self.exprs = list(exprs) 2353 else: 2354 try: 2355 self.exprs = list( exprs ) 2356 except TypeError: 2357 self.exprs = [ exprs ] 2358 self.callPreparse = False
2359
2360 - def __getitem__( self, i ):
2361 return self.exprs[i]
2362
2363 - def append( self, other ):
2364 self.exprs.append( other ) 2365 self.strRepr = None 2366 return self
2367
2368 - def leaveWhitespace( self ):
2369 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2370 all contained expressions.""" 2371 self.skipWhitespace = False 2372 self.exprs = [ e.copy() for e in self.exprs ] 2373 for e in self.exprs: 2374 e.leaveWhitespace() 2375 return self
2376
2377 - def ignore( self, other ):
2378 if isinstance( other, Suppress ): 2379 if other not in self.ignoreExprs: 2380 super( ParseExpression, self).ignore( other ) 2381 for e in self.exprs: 2382 e.ignore( self.ignoreExprs[-1] ) 2383 else: 2384 super( ParseExpression, self).ignore( other ) 2385 for e in self.exprs: 2386 e.ignore( self.ignoreExprs[-1] ) 2387 return self
2388
2389 - def __str__( self ):
2390 try: 2391 return super(ParseExpression,self).__str__() 2392 except: 2393 pass 2394 2395 if self.strRepr is None: 2396 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2397 return self.strRepr
2398
2399 - def streamline( self ):
2400 super(ParseExpression,self).streamline() 2401 2402 for e in self.exprs: 2403 e.streamline() 2404 2405 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2406 # but only if there are no parse actions or resultsNames on the nested And's 2407 # (likewise for Or's and MatchFirst's) 2408 if ( len(self.exprs) == 2 ): 2409 other = self.exprs[0] 2410 if ( isinstance( other, self.__class__ ) and 2411 not(other.parseAction) and 2412 other.resultsName is None and 2413 not other.debug ): 2414 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2415 self.strRepr = None 2416 self.mayReturnEmpty |= other.mayReturnEmpty 2417 self.mayIndexError |= other.mayIndexError 2418 2419 other = self.exprs[-1] 2420 if ( isinstance( other, self.__class__ ) and 2421 not(other.parseAction) and 2422 other.resultsName is None and 2423 not other.debug ): 2424 self.exprs = self.exprs[:-1] + other.exprs[:] 2425 self.strRepr = None 2426 self.mayReturnEmpty |= other.mayReturnEmpty 2427 self.mayIndexError |= other.mayIndexError 2428 2429 self.errmsg = "Expected " + _ustr(self) 2430 2431 return self
2432
2433 - def setResultsName( self, name, listAllMatches=False ):
2434 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2435 return ret
2436
2437 - def validate( self, validateTrace=[] ):
2438 tmp = validateTrace[:]+[self] 2439 for e in self.exprs: 2440 e.validate(tmp) 2441 self.checkRecursion( [] )
2442
2443 - def copy(self):
2444 ret = super(ParseExpression,self).copy() 2445 ret.exprs = [e.copy() for e in self.exprs] 2446 return ret
2447
2448 -class And(ParseExpression):
2449 """Requires all given C{ParseExpression}s to be found in the given order. 2450 Expressions may be separated by whitespace. 2451 May be constructed using the C{'+'} operator. 2452 """ 2453
2454 - class _ErrorStop(Empty):
2455 - def __init__(self, *args, **kwargs):
2456 super(And._ErrorStop,self).__init__(*args, **kwargs) 2457 self.name = '-' 2458 self.leaveWhitespace()
2459
2460 - def __init__( self, exprs, savelist = True ):
2461 super(And,self).__init__(exprs, savelist) 2462 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2463 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2464 self.skipWhitespace = self.exprs[0].skipWhitespace 2465 self.callPreparse = True
2466
2467 - def parseImpl( self, instring, loc, doActions=True ):
2468 # pass False as last arg to _parse for first element, since we already 2469 # pre-parsed the string as part of our And pre-parsing 2470 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2471 errorStop = False 2472 for e in self.exprs[1:]: 2473 if isinstance(e, And._ErrorStop): 2474 errorStop = True 2475 continue 2476 if errorStop: 2477 try: 2478 loc, exprtokens = e._parse( instring, loc, doActions ) 2479 except ParseSyntaxException: 2480 raise 2481 except ParseBaseException as pe: 2482 pe.__traceback__ = None 2483 raise ParseSyntaxException(pe) 2484 except IndexError: 2485 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2486 else: 2487 loc, exprtokens = e._parse( instring, loc, doActions ) 2488 if exprtokens or exprtokens.haskeys(): 2489 resultlist += exprtokens 2490 return loc, resultlist
2491
2492 - def __iadd__(self, other ):
2493 if isinstance( other, basestring ): 2494 other = Literal( other ) 2495 return self.append( other ) #And( [ self, other ] )
2496
2497 - def checkRecursion( self, parseElementList ):
2498 subRecCheckList = parseElementList[:] + [ self ] 2499 for e in self.exprs: 2500 e.checkRecursion( subRecCheckList ) 2501 if not e.mayReturnEmpty: 2502 break
2503
2504 - def __str__( self ):
2505 if hasattr(self,"name"): 2506 return self.name 2507 2508 if self.strRepr is None: 2509 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2510 2511 return self.strRepr
2512
2513 2514 -class Or(ParseExpression):
2515 """Requires that at least one C{ParseExpression} is found. 2516 If two expressions match, the expression that matches the longest string will be used. 2517 May be constructed using the C{'^'} operator. 2518 """
2519 - def __init__( self, exprs, savelist = False ):
2520 super(Or,self).__init__(exprs, savelist) 2521 if self.exprs: 2522 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2523 else: 2524 self.mayReturnEmpty = True
2525
2526 - def parseImpl( self, instring, loc, doActions=True ):
2527 maxExcLoc = -1 2528 maxException = None 2529 matches = [] 2530 for e in self.exprs: 2531 try: 2532 loc2 = e.tryParse( instring, loc ) 2533 except ParseException as err: 2534 err.__traceback__ = None 2535 if err.loc > maxExcLoc: 2536 maxException = err 2537 maxExcLoc = err.loc 2538 except IndexError: 2539 if len(instring) > maxExcLoc: 2540 maxException = ParseException(instring,len(instring),e.errmsg,self) 2541 maxExcLoc = len(instring) 2542 else: 2543 # save match among all matches, to retry longest to shortest 2544 matches.append((loc2, e)) 2545 2546 if matches: 2547 matches.sort(key=lambda x: -x[0]) 2548 for _,e in matches: 2549 try: 2550 return e._parse( instring, loc, doActions ) 2551 except ParseException as err: 2552 err.__traceback__ = None 2553 if err.loc > maxExcLoc: 2554 maxException = err 2555 maxExcLoc = err.loc 2556 2557 if maxException is not None: 2558 maxException.msg = self.errmsg 2559 raise maxException 2560 else: 2561 raise ParseException(instring, loc, "no defined alternatives to match", self)
2562 2563
2564 - def __ixor__(self, other ):
2565 if isinstance( other, basestring ): 2566 other = ParserElement.literalStringClass( other ) 2567 return self.append( other ) #Or( [ self, other ] )
2568
2569 - def __str__( self ):
2570 if hasattr(self,"name"): 2571 return self.name 2572 2573 if self.strRepr is None: 2574 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2575 2576 return self.strRepr
2577
2578 - def checkRecursion( self, parseElementList ):
2579 subRecCheckList = parseElementList[:] + [ self ] 2580 for e in self.exprs: 2581 e.checkRecursion( subRecCheckList )
2582
2583 2584 -class MatchFirst(ParseExpression):
2585 """Requires that at least one C{ParseExpression} is found. 2586 If two expressions match, the first one listed is the one that will match. 2587 May be constructed using the C{'|'} operator. 2588 """
2589 - def __init__( self, exprs, savelist = False ):
2590 super(MatchFirst,self).__init__(exprs, savelist) 2591 if self.exprs: 2592 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2593 else: 2594 self.mayReturnEmpty = True
2595
2596 - def parseImpl( self, instring, loc, doActions=True ):
2597 maxExcLoc = -1 2598 maxException = None 2599 for e in self.exprs: 2600 try: 2601 ret = e._parse( instring, loc, doActions ) 2602 return ret 2603 except ParseException as err: 2604 if err.loc > maxExcLoc: 2605 maxException = err 2606 maxExcLoc = err.loc 2607 except IndexError: 2608 if len(instring) > maxExcLoc: 2609 maxException = ParseException(instring,len(instring),e.errmsg,self) 2610 maxExcLoc = len(instring) 2611 2612 # only got here if no expression matched, raise exception for match that made it the furthest 2613 else: 2614 if maxException is not None: 2615 maxException.msg = self.errmsg 2616 raise maxException 2617 else: 2618 raise ParseException(instring, loc, "no defined alternatives to match", self)
2619
2620 - def __ior__(self, other ):
2621 if isinstance( other, basestring ): 2622 other = ParserElement.literalStringClass( other ) 2623 return self.append( other ) #MatchFirst( [ self, other ] )
2624
2625 - def __str__( self ):
2626 if hasattr(self,"name"): 2627 return self.name 2628 2629 if self.strRepr is None: 2630 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2631 2632 return self.strRepr
2633
2634 - def checkRecursion( self, parseElementList ):
2635 subRecCheckList = parseElementList[:] + [ self ] 2636 for e in self.exprs: 2637 e.checkRecursion( subRecCheckList )
2638
2639 2640 -class Each(ParseExpression):
2641 """Requires all given C{ParseExpression}s to be found, but in any order. 2642 Expressions may be separated by whitespace. 2643 May be constructed using the C{'&'} operator. 2644 """
2645 - def __init__( self, exprs, savelist = True ):
2646 super(Each,self).__init__(exprs, savelist) 2647 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2648 self.skipWhitespace = True 2649 self.initExprGroups = True
2650
2651 - def parseImpl( self, instring, loc, doActions=True ):
2652 if self.initExprGroups: 2653 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2654 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2655 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2656 self.optionals = opt1 + opt2 2657 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2658 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2659 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2660 self.required += self.multirequired 2661 self.initExprGroups = False 2662 tmpLoc = loc 2663 tmpReqd = self.required[:] 2664 tmpOpt = self.optionals[:] 2665 matchOrder = [] 2666 2667 keepMatching = True 2668 while keepMatching: 2669 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2670 failed = [] 2671 for e in tmpExprs: 2672 try: 2673 tmpLoc = e.tryParse( instring, tmpLoc ) 2674 except ParseException: 2675 failed.append(e) 2676 else: 2677 matchOrder.append(self.opt1map.get(id(e),e)) 2678 if e in tmpReqd: 2679 tmpReqd.remove(e) 2680 elif e in tmpOpt: 2681 tmpOpt.remove(e) 2682 if len(failed) == len(tmpExprs): 2683 keepMatching = False 2684 2685 if tmpReqd: 2686 missing = ", ".join(_ustr(e) for e in tmpReqd) 2687 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2688 2689 # add any unmatched Optionals, in case they have default values defined 2690 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2691 2692 resultlist = [] 2693 for e in matchOrder: 2694 loc,results = e._parse(instring,loc,doActions) 2695 resultlist.append(results) 2696 2697 finalResults = ParseResults() 2698 for r in resultlist: 2699 dups = {} 2700 for k in r.keys(): 2701 if k in finalResults: 2702 tmp = ParseResults(finalResults[k]) 2703 tmp += ParseResults(r[k]) 2704 dups[k] = tmp 2705 finalResults += ParseResults(r) 2706 for k,v in dups.items(): 2707 finalResults[k] = v 2708 return loc, finalResults
2709
2710 - def __str__( self ):
2711 if hasattr(self,"name"): 2712 return self.name 2713 2714 if self.strRepr is None: 2715 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2716 2717 return self.strRepr
2718
2719 - def checkRecursion( self, parseElementList ):
2720 subRecCheckList = parseElementList[:] + [ self ] 2721 for e in self.exprs: 2722 e.checkRecursion( subRecCheckList )
2723
2724 2725 -class ParseElementEnhance(ParserElement):
2726 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2727 - def __init__( self, expr, savelist=False ):
2728 super(ParseElementEnhance,self).__init__(savelist) 2729 if isinstance( expr, basestring ): 2730 expr = Literal(expr) 2731 self.expr = expr 2732 self.strRepr = None 2733 if expr is not None: 2734 self.mayIndexError = expr.mayIndexError 2735 self.mayReturnEmpty = expr.mayReturnEmpty 2736 self.setWhitespaceChars( expr.whiteChars ) 2737 self.skipWhitespace = expr.skipWhitespace 2738 self.saveAsList = expr.saveAsList 2739 self.callPreparse = expr.callPreparse 2740 self.ignoreExprs.extend(expr.ignoreExprs)
2741
2742 - def parseImpl( self, instring, loc, doActions=True ):
2743 if self.expr is not None: 2744 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2745 else: 2746 raise ParseException("",loc,self.errmsg,self)
2747
2748 - def leaveWhitespace( self ):
2749 self.skipWhitespace = False 2750 self.expr = self.expr.copy() 2751 if self.expr is not None: 2752 self.expr.leaveWhitespace() 2753 return self
2754
2755 - def ignore( self, other ):
2756 if isinstance( other, Suppress ): 2757 if other not in self.ignoreExprs: 2758 super( ParseElementEnhance, self).ignore( other ) 2759 if self.expr is not None: 2760 self.expr.ignore( self.ignoreExprs[-1] ) 2761 else: 2762 super( ParseElementEnhance, self).ignore( other ) 2763 if self.expr is not None: 2764 self.expr.ignore( self.ignoreExprs[-1] ) 2765 return self
2766
2767 - def streamline( self ):
2768 super(ParseElementEnhance,self).streamline() 2769 if self.expr is not None: 2770 self.expr.streamline() 2771 return self
2772
2773 - def checkRecursion( self, parseElementList ):
2774 if self in parseElementList: 2775 raise RecursiveGrammarException( parseElementList+[self] ) 2776 subRecCheckList = parseElementList[:] + [ self ] 2777 if self.expr is not None: 2778 self.expr.checkRecursion( subRecCheckList )
2779
2780 - def validate( self, validateTrace=[] ):
2781 tmp = validateTrace[:]+[self] 2782 if self.expr is not None: 2783 self.expr.validate(tmp) 2784 self.checkRecursion( [] )
2785
2786 - def __str__( self ):
2787 try: 2788 return super(ParseElementEnhance,self).__str__() 2789 except: 2790 pass 2791 2792 if self.strRepr is None and self.expr is not None: 2793 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2794 return self.strRepr
2795
2796 2797 -class FollowedBy(ParseElementEnhance):
2798 """Lookahead matching of the given parse expression. C{FollowedBy} 2799 does *not* advance the parsing position within the input string, it only 2800 verifies that the specified parse expression matches at the current 2801 position. C{FollowedBy} always returns a null token list."""
2802 - def __init__( self, expr ):
2803 super(FollowedBy,self).__init__(expr) 2804 self.mayReturnEmpty = True
2805
2806 - def parseImpl( self, instring, loc, doActions=True ):
2807 self.expr.tryParse( instring, loc ) 2808 return loc, []
2809
2810 2811 -class NotAny(ParseElementEnhance):
2812 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2813 does *not* advance the parsing position within the input string, it only 2814 verifies that the specified parse expression does *not* match at the current 2815 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2816 always returns a null token list. May be constructed using the '~' operator."""
2817 - def __init__( self, expr ):
2818 super(NotAny,self).__init__(expr) 2819 #~ self.leaveWhitespace() 2820 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2821 self.mayReturnEmpty = True 2822 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2823
2824 - def parseImpl( self, instring, loc, doActions=True ):
2825 if self.expr.canParseNext(instring, loc): 2826 raise ParseException(instring, loc, self.errmsg, self) 2827 return loc, []
2828
2829 - def __str__( self ):
2830 if hasattr(self,"name"): 2831 return self.name 2832 2833 if self.strRepr is None: 2834 self.strRepr = "~{" + _ustr(self.expr) + "}" 2835 2836 return self.strRepr
2837
2838 2839 -class OneOrMore(ParseElementEnhance):
2840 """Repetition of one or more of the given expression. 2841 2842 Parameters: 2843 - expr - expression that must match one or more times 2844 - stopOn - (default=None) - expression for a terminating sentinel 2845 (only required if the sentinel would ordinarily match the repetition 2846 expression) 2847 """
2848 - def __init__( self, expr, stopOn=None):
2849 super(OneOrMore, self).__init__(expr) 2850 ender = stopOn 2851 if isinstance(ender, basestring): 2852 ender = Literal(ender) 2853 self.not_ender = ~ender if ender is not None else None
2854
2855 - def parseImpl( self, instring, loc, doActions=True ):
2856 self_expr_parse = self.expr._parse 2857 self_skip_ignorables = self._skipIgnorables 2858 check_ender = self.not_ender is not None 2859 if check_ender: 2860 try_not_ender = self.not_ender.tryParse 2861 2862 # must be at least one (but first see if we are the stopOn sentinel; 2863 # if so, fail) 2864 if check_ender: 2865 try_not_ender(instring, loc) 2866 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2867 try: 2868 hasIgnoreExprs = (not not self.ignoreExprs) 2869 while 1: 2870 if check_ender: 2871 try_not_ender(instring, loc) 2872 if hasIgnoreExprs: 2873 preloc = self_skip_ignorables( instring, loc ) 2874 else: 2875 preloc = loc 2876 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2877 if tmptokens or tmptokens.haskeys(): 2878 tokens += tmptokens 2879 except (ParseException,IndexError): 2880 pass 2881 2882 return loc, tokens
2883
2884 - def __str__( self ):
2885 if hasattr(self,"name"): 2886 return self.name 2887 2888 if self.strRepr is None: 2889 self.strRepr = "{" + _ustr(self.expr) + "}..." 2890 2891 return self.strRepr
2892
2893 - def setResultsName( self, name, listAllMatches=False ):
2894 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2895 ret.saveAsList = True 2896 return ret
2897
2898 -class ZeroOrMore(OneOrMore):
2899 """Optional repetition of zero or more of the given expression. 2900 2901 Parameters: 2902 - expr - expression that must match zero or more times 2903 - stopOn - (default=None) - expression for a terminating sentinel 2904 (only required if the sentinel would ordinarily match the repetition 2905 expression) 2906 """
2907 - def __init__( self, expr, stopOn=None):
2908 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2909 self.mayReturnEmpty = True
2910
2911 - def parseImpl( self, instring, loc, doActions=True ):
2912 try: 2913 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 2914 except (ParseException,IndexError): 2915 return loc, []
2916
2917 - def __str__( self ):
2918 if hasattr(self,"name"): 2919 return self.name 2920 2921 if self.strRepr is None: 2922 self.strRepr = "[" + _ustr(self.expr) + "]..." 2923 2924 return self.strRepr
2925
2926 -class _NullToken(object):
2927 - def __bool__(self):
2928 return False
2929 __nonzero__ = __bool__
2930 - def __str__(self):
2931 return ""
2932 2933 _optionalNotMatched = _NullToken()
2934 -class Optional(ParseElementEnhance):
2935 """Optional matching of the given expression. 2936 2937 Parameters: 2938 - expr - expression that must match zero or more times 2939 - default (optional) - value to be returned if the optional expression 2940 is not found. 2941 """
2942 - def __init__( self, expr, default=_optionalNotMatched ):
2943 super(Optional,self).__init__( expr, savelist=False ) 2944 self.defaultValue = default 2945 self.mayReturnEmpty = True
2946
2947 - def parseImpl( self, instring, loc, doActions=True ):
2948 try: 2949 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2950 except (ParseException,IndexError): 2951 if self.defaultValue is not _optionalNotMatched: 2952 if self.expr.resultsName: 2953 tokens = ParseResults([ self.defaultValue ]) 2954 tokens[self.expr.resultsName] = self.defaultValue 2955 else: 2956 tokens = [ self.defaultValue ] 2957 else: 2958 tokens = [] 2959 return loc, tokens
2960
2961 - def __str__( self ):
2962 if hasattr(self,"name"): 2963 return self.name 2964 2965 if self.strRepr is None: 2966 self.strRepr = "[" + _ustr(self.expr) + "]" 2967 2968 return self.strRepr
2969
2970 -class SkipTo(ParseElementEnhance):
2971 """Token for skipping over all undefined text until the matched expression is found. 2972 2973 Parameters: 2974 - expr - target expression marking the end of the data to be skipped 2975 - include - (default=False) if True, the target expression is also parsed 2976 (the skipped text and target expression are returned as a 2-element list). 2977 - ignore - (default=None) used to define grammars (typically quoted strings and 2978 comments) that might contain false matches to the target expression 2979 - failOn - (default=None) define expressions that are not allowed to be 2980 included in the skipped test; if found before the target expression is found, 2981 the SkipTo is not a match 2982 """
2983 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2984 super( SkipTo, self ).__init__( other ) 2985 self.ignoreExpr = ignore 2986 self.mayReturnEmpty = True 2987 self.mayIndexError = False 2988 self.includeMatch = include 2989 self.asList = False 2990 if isinstance(failOn, basestring): 2991 self.failOn = Literal(failOn) 2992 else: 2993 self.failOn = failOn 2994 self.errmsg = "No match found for "+_ustr(self.expr)
2995
2996 - def parseImpl( self, instring, loc, doActions=True ):
2997 startloc = loc 2998 instrlen = len(instring) 2999 expr = self.expr 3000 expr_parse = self.expr._parse 3001 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3002 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3003 3004 tmploc = loc 3005 while tmploc <= instrlen: 3006 if self_failOn_canParseNext is not None: 3007 # break if failOn expression matches 3008 if self_failOn_canParseNext(instring, tmploc): 3009 break 3010 3011 if self_ignoreExpr_tryParse is not None: 3012 # advance past ignore expressions 3013 while 1: 3014 try: 3015 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3016 except ParseBaseException: 3017 break 3018 3019 try: 3020 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3021 except (ParseException, IndexError): 3022 # no match, advance loc in string 3023 tmploc += 1 3024 else: 3025 # matched skipto expr, done 3026 break 3027 3028 else: 3029 # ran off the end of the input string without matching skipto expr, fail 3030 raise ParseException(instring, loc, self.errmsg, self) 3031 3032 # build up return values 3033 loc = tmploc 3034 skiptext = instring[startloc:loc] 3035 skipresult = ParseResults(skiptext) 3036 3037 if self.includeMatch: 3038 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3039 skipresult += mat 3040 3041 return loc, skipresult
3042
3043 -class Forward(ParseElementEnhance):
3044 """Forward declaration of an expression to be defined later - 3045 used for recursive grammars, such as algebraic infix notation. 3046 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3047 3048 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3049 Specifically, '|' has a lower precedence than '<<', so that:: 3050 fwdExpr << a | b | c 3051 will actually be evaluated as:: 3052 (fwdExpr << a) | b | c 3053 thereby leaving b and c out as parseable alternatives. It is recommended that you 3054 explicitly group the values inserted into the C{Forward}:: 3055 fwdExpr << (a | b | c) 3056 Converting to use the '<<=' operator instead will avoid this problem. 3057 """
3058 - def __init__( self, other=None ):
3059 super(Forward,self).__init__( other, savelist=False )
3060
3061 - def __lshift__( self, other ):
3062 if isinstance( other, basestring ): 3063 other = ParserElement.literalStringClass(other) 3064 self.expr = other 3065 self.strRepr = None 3066 self.mayIndexError = self.expr.mayIndexError 3067 self.mayReturnEmpty = self.expr.mayReturnEmpty 3068 self.setWhitespaceChars( self.expr.whiteChars ) 3069 self.skipWhitespace = self.expr.skipWhitespace 3070 self.saveAsList = self.expr.saveAsList 3071 self.ignoreExprs.extend(self.expr.ignoreExprs) 3072 return self
3073
3074 - def __ilshift__(self, other):
3075 return self << other
3076
3077 - def leaveWhitespace( self ):
3078 self.skipWhitespace = False 3079 return self
3080
3081 - def streamline( self ):
3082 if not self.streamlined: 3083 self.streamlined = True 3084 if self.expr is not None: 3085 self.expr.streamline() 3086 return self
3087
3088 - def validate( self, validateTrace=[] ):
3089 if self not in validateTrace: 3090 tmp = validateTrace[:]+[self] 3091 if self.expr is not None: 3092 self.expr.validate(tmp) 3093 self.checkRecursion([])
3094
3095 - def __str__( self ):
3096 if hasattr(self,"name"): 3097 return self.name 3098 return self.__class__.__name__ + ": ..." 3099 3100 # stubbed out for now - creates awful memory and perf issues 3101 self._revertClass = self.__class__ 3102 self.__class__ = _ForwardNoRecurse 3103 try: 3104 if self.expr is not None: 3105 retString = _ustr(self.expr) 3106 else: 3107 retString = "None" 3108 finally: 3109 self.__class__ = self._revertClass 3110 return self.__class__.__name__ + ": " + retString
3111
3112 - def copy(self):
3113 if self.expr is not None: 3114 return super(Forward,self).copy() 3115 else: 3116 ret = Forward() 3117 ret <<= self 3118 return ret
3119
3120 -class _ForwardNoRecurse(Forward):
3121 - def __str__( self ):
3122 return "..."
3123
3124 -class TokenConverter(ParseElementEnhance):
3125 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3126 - def __init__( self, expr, savelist=False ):
3127 super(TokenConverter,self).__init__( expr )#, savelist ) 3128 self.saveAsList = False
3129
3130 -class Combine(TokenConverter):
3131 """Converter to concatenate all matching tokens to a single string. 3132 By default, the matching patterns must also be contiguous in the input string; 3133 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3134 """
3135 - def __init__( self, expr, joinString="", adjacent=True ):
3136 super(Combine,self).__init__( expr ) 3137 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3138 if adjacent: 3139 self.leaveWhitespace() 3140 self.adjacent = adjacent 3141 self.skipWhitespace = True 3142 self.joinString = joinString 3143 self.callPreparse = True
3144
3145 - def ignore( self, other ):
3146 if self.adjacent: 3147 ParserElement.ignore(self, other) 3148 else: 3149 super( Combine, self).ignore( other ) 3150 return self
3151
3152 - def postParse( self, instring, loc, tokenlist ):
3153 retToks = tokenlist.copy() 3154 del retToks[:] 3155 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3156 3157 if self.resultsName and retToks.haskeys(): 3158 return [ retToks ] 3159 else: 3160 return retToks
3161
3162 -class Group(TokenConverter):
3163 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3164 - def __init__( self, expr ):
3165 super(Group,self).__init__( expr ) 3166 self.saveAsList = True
3167
3168 - def postParse( self, instring, loc, tokenlist ):
3169 return [ tokenlist ]
3170
3171 -class Dict(TokenConverter):
3172 """Converter to return a repetitive expression as a list, but also as a dictionary. 3173 Each element can also be referenced using the first token in the expression as its key. 3174 Useful for tabular report scraping when the first column can be used as a item key. 3175 """
3176 - def __init__( self, expr ):
3177 super(Dict,self).__init__( expr ) 3178 self.saveAsList = True
3179
3180 - def postParse( self, instring, loc, tokenlist ):
3181 for i,tok in enumerate(tokenlist): 3182 if len(tok) == 0: 3183 continue 3184 ikey = tok[0] 3185 if isinstance(ikey,int): 3186 ikey = _ustr(tok[0]).strip() 3187 if len(tok)==1: 3188 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3189 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3190 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3191 else: 3192 dictvalue = tok.copy() #ParseResults(i) 3193 del dictvalue[0] 3194 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3195 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3196 else: 3197 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3198 3199 if self.resultsName: 3200 return [ tokenlist ] 3201 else: 3202 return tokenlist
3203
3204 3205 -class Suppress(TokenConverter):
3206 """Converter for ignoring the results of a parsed expression."""
3207 - def postParse( self, instring, loc, tokenlist ):
3208 return []
3209
3210 - def suppress( self ):
3211 return self
3212
3213 3214 -class OnlyOnce(object):
3215 """Wrapper for parse actions, to ensure they are only called once."""
3216 - def __init__(self, methodCall):
3217 self.callable = _trim_arity(methodCall) 3218 self.called = False
3219 - def __call__(self,s,l,t):
3220 if not self.called: 3221 results = self.callable(s,l,t) 3222 self.called = True 3223 return results 3224 raise ParseException(s,l,"")
3225 - def reset(self):
3226 self.called = False
3227
3228 -def traceParseAction(f):
3229 """Decorator for debugging parse actions.""" 3230 f = _trim_arity(f) 3231 def z(*paArgs): 3232 thisFunc = f.func_name 3233 s,l,t = paArgs[-3:] 3234 if len(paArgs)>3: 3235 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3236 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3237 try: 3238 ret = f(*paArgs) 3239 except Exception as exc: 3240 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3241 raise 3242 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3243 return ret
3244 try: 3245 z.__name__ = f.__name__ 3246 except AttributeError: 3247 pass 3248 return z 3249
3250 # 3251 # global helpers 3252 # 3253 -def delimitedList( expr, delim=",", combine=False ):
3254 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3255 By default, the list elements and delimiters can have intervening whitespace, and 3256 comments, but this can be overridden by passing C{combine=True} in the constructor. 3257 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3258 string, with the delimiters included; otherwise, the matching tokens are returned 3259 as a list of tokens, with the delimiters suppressed. 3260 """ 3261 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3262 if combine: 3263 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3264 else: 3265 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3266
3267 -def countedArray( expr, intExpr=None ):
3268 """Helper to define a counted list of expressions. 3269 This helper defines a pattern of the form:: 3270 integer expr expr expr... 3271 where the leading integer tells how many expr expressions follow. 3272 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3273 """ 3274 arrayExpr = Forward() 3275 def countFieldParseAction(s,l,t): 3276 n = t[0] 3277 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3278 return []
3279 if intExpr is None: 3280 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3281 else: 3282 intExpr = intExpr.copy() 3283 intExpr.setName("arrayLen") 3284 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3285 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3286
3287 -def _flatten(L):
3288 ret = [] 3289 for i in L: 3290 if isinstance(i,list): 3291 ret.extend(_flatten(i)) 3292 else: 3293 ret.append(i) 3294 return ret
3295
3296 -def matchPreviousLiteral(expr):
3297 """Helper to define an expression that is indirectly defined from 3298 the tokens matched in a previous expression, that is, it looks 3299 for a 'repeat' of a previous expression. For example:: 3300 first = Word(nums) 3301 second = matchPreviousLiteral(first) 3302 matchExpr = first + ":" + second 3303 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3304 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3305 If this is not desired, use C{matchPreviousExpr}. 3306 Do *not* use with packrat parsing enabled. 3307 """ 3308 rep = Forward() 3309 def copyTokenToRepeater(s,l,t): 3310 if t: 3311 if len(t) == 1: 3312 rep << t[0] 3313 else: 3314 # flatten t tokens 3315 tflat = _flatten(t.asList()) 3316 rep << And(Literal(tt) for tt in tflat) 3317 else: 3318 rep << Empty()
3319 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3320 rep.setName('(prev) ' + _ustr(expr)) 3321 return rep 3322
3323 -def matchPreviousExpr(expr):
3324 """Helper to define an expression that is indirectly defined from 3325 the tokens matched in a previous expression, that is, it looks 3326 for a 'repeat' of a previous expression. For example:: 3327 first = Word(nums) 3328 second = matchPreviousExpr(first) 3329 matchExpr = first + ":" + second 3330 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3331 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3332 the expressions are evaluated first, and then compared, so 3333 C{"1"} is compared with C{"10"}. 3334 Do *not* use with packrat parsing enabled. 3335 """ 3336 rep = Forward() 3337 e2 = expr.copy() 3338 rep <<= e2 3339 def copyTokenToRepeater(s,l,t): 3340 matchTokens = _flatten(t.asList()) 3341 def mustMatchTheseTokens(s,l,t): 3342 theseTokens = _flatten(t.asList()) 3343 if theseTokens != matchTokens: 3344 raise ParseException("",0,"")
3345 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3346 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3347 rep.setName('(prev) ' + _ustr(expr)) 3348 return rep 3349
3350 -def _escapeRegexRangeChars(s):
3351 #~ escape these chars: ^-] 3352 for c in r"\^-]": 3353 s = s.replace(c,_bslash+c) 3354 s = s.replace("\n",r"\n") 3355 s = s.replace("\t",r"\t") 3356 return _ustr(s)
3357
3358 -def oneOf( strs, caseless=False, useRegex=True ):
3359 """Helper to quickly define a set of alternative Literals, and makes sure to do 3360 longest-first testing when there is a conflict, regardless of the input order, 3361 but returns a C{L{MatchFirst}} for best performance. 3362 3363 Parameters: 3364 - strs - a string of space-delimited literals, or a list of string literals 3365 - caseless - (default=False) - treat all literals as caseless 3366 - useRegex - (default=True) - as an optimization, will generate a Regex 3367 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3368 if creating a C{Regex} raises an exception) 3369 """ 3370 if caseless: 3371 isequal = ( lambda a,b: a.upper() == b.upper() ) 3372 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3373 parseElementClass = CaselessLiteral 3374 else: 3375 isequal = ( lambda a,b: a == b ) 3376 masks = ( lambda a,b: b.startswith(a) ) 3377 parseElementClass = Literal 3378 3379 symbols = [] 3380 if isinstance(strs,basestring): 3381 symbols = strs.split() 3382 elif isinstance(strs, collections.Sequence): 3383 symbols = list(strs[:]) 3384 elif isinstance(strs, _generatorType): 3385 symbols = list(strs) 3386 else: 3387 warnings.warn("Invalid argument to oneOf, expected string or list", 3388 SyntaxWarning, stacklevel=2) 3389 if not symbols: 3390 return NoMatch() 3391 3392 i = 0 3393 while i < len(symbols)-1: 3394 cur = symbols[i] 3395 for j,other in enumerate(symbols[i+1:]): 3396 if ( isequal(other, cur) ): 3397 del symbols[i+j+1] 3398 break 3399 elif ( masks(cur, other) ): 3400 del symbols[i+j+1] 3401 symbols.insert(i,other) 3402 cur = other 3403 break 3404 else: 3405 i += 1 3406 3407 if not caseless and useRegex: 3408 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3409 try: 3410 if len(symbols)==len("".join(symbols)): 3411 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3412 else: 3413 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3414 except: 3415 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3416 SyntaxWarning, stacklevel=2) 3417 3418 3419 # last resort, just use MatchFirst 3420 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3421
3422 -def dictOf( key, value ):
3423 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3424 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3425 in the proper order. The key pattern can include delimiting markers or punctuation, 3426 as long as they are suppressed, thereby leaving the significant key text. The value 3427 pattern can include named results, so that the C{Dict} results can include named token 3428 fields. 3429 """ 3430 return Dict( ZeroOrMore( Group ( key + value ) ) )
3431
3432 -def originalTextFor(expr, asString=True):
3433 """Helper to return the original, untokenized text for a given expression. Useful to 3434 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3435 revert separate tokens with intervening whitespace back to the original matching 3436 input text. By default, returns astring containing the original parsed text. 3437 3438 If the optional C{asString} argument is passed as C{False}, then the return value is a 3439 C{L{ParseResults}} containing any results names that were originally matched, and a 3440 single token containing the original matched text from the input string. So if 3441 the expression passed to C{L{originalTextFor}} contains expressions with defined 3442 results names, you must set C{asString} to C{False} if you want to preserve those 3443 results name values.""" 3444 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3445 endlocMarker = locMarker.copy() 3446 endlocMarker.callPreparse = False 3447 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3448 if asString: 3449 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3450 else: 3451 def extractText(s,l,t): 3452 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3453 matchExpr.setParseAction(extractText) 3454 return matchExpr 3455
3456 -def ungroup(expr):
3457 """Helper to undo pyparsing's default grouping of And expressions, even 3458 if all but one are non-empty.""" 3459 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3460
3461 -def locatedExpr(expr):
3462 """Helper to decorate a returned token with its starting and ending locations in the input string. 3463 This helper adds the following results names: 3464 - locn_start = location where matched expression begins 3465 - locn_end = location where matched expression ends 3466 - value = the actual parsed results 3467 3468 Be careful if the input text contains C{<TAB>} characters, you may want to call 3469 C{L{ParserElement.parseWithTabs}} 3470 """ 3471 locator = Empty().setParseAction(lambda s,l,t: l) 3472 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3473 3474 3475 # convenience constants for positional expressions 3476 empty = Empty().setName("empty") 3477 lineStart = LineStart().setName("lineStart") 3478 lineEnd = LineEnd().setName("lineEnd") 3479 stringStart = StringStart().setName("stringStart") 3480 stringEnd = StringEnd().setName("stringEnd") 3481 3482 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3483 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3484 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3485 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3486 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3487 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3488 3489 -def srange(s):
3490 r"""Helper to easily define string ranges for use in Word construction. Borrows 3491 syntax from regexp '[]' string range definitions:: 3492 srange("[0-9]") -> "0123456789" 3493 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3494 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3495 The input string must be enclosed in []'s, and the returned string is the expanded 3496 character set joined into a single string. 3497 The values enclosed in the []'s may be:: 3498 a single character 3499 an escaped character with a leading backslash (such as \- or \]) 3500 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3501 (\0x## is also supported for backwards compatibility) 3502 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3503 a range of any of the above, separated by a dash ('a-z', etc.) 3504 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3505 """ 3506 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3507 try: 3508 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3509 except: 3510 return ""
3511
3512 -def matchOnlyAtCol(n):
3513 """Helper method for defining parse actions that require matching at a specific 3514 column in the input text. 3515 """ 3516 def verifyCol(strg,locn,toks): 3517 if col(locn,strg) != n: 3518 raise ParseException(strg,locn,"matched token not at column %d" % n)
3519 return verifyCol 3520
3521 -def replaceWith(replStr):
3522 """Helper method for common parse actions that simply return a literal value. Especially 3523 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3524 """ 3525 return lambda s,l,t: [replStr]
3526
3527 -def removeQuotes(s,l,t):
3528 """Helper parse action for removing quotation marks from parsed quoted strings. 3529 To use, add this parse action to quoted string using:: 3530 quotedString.setParseAction( removeQuotes ) 3531 """ 3532 return t[0][1:-1]
3533
3534 -def upcaseTokens(s,l,t):
3535 """Helper parse action to convert tokens to upper case.""" 3536 return [ tt.upper() for tt in map(_ustr,t) ]
3537
3538 -def downcaseTokens(s,l,t):
3539 """Helper parse action to convert tokens to lower case.""" 3540 return [ tt.lower() for tt in map(_ustr,t) ]
3541
3542 -def _makeTags(tagStr, xml):
3543 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3544 if isinstance(tagStr,basestring): 3545 resname = tagStr 3546 tagStr = Keyword(tagStr, caseless=not xml) 3547 else: 3548 resname = tagStr.name 3549 3550 tagAttrName = Word(alphas,alphanums+"_-:") 3551 if (xml): 3552 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3553 openTag = Suppress("<") + tagStr("tag") + \ 3554 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3555 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3556 else: 3557 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3558 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3559 openTag = Suppress("<") + tagStr("tag") + \ 3560 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3561 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3562 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3563 closeTag = Combine(_L("</") + tagStr + ">") 3564 3565 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3566 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3567 openTag.tag = resname 3568 closeTag.tag = resname 3569 return openTag, closeTag
3570
3571 -def makeHTMLTags(tagStr):
3572 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3573 return _makeTags( tagStr, False )
3574
3575 -def makeXMLTags(tagStr):
3576 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3577 return _makeTags( tagStr, True )
3578
3579 -def withAttribute(*args,**attrDict):
3580 """Helper to create a validating parse action to be used with start tags created 3581 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3582 with a required attribute value, to avoid false matches on common tags such as 3583 C{<TD>} or C{<DIV>}. 3584 3585 Call C{withAttribute} with a series of attribute names and values. Specify the list 3586 of filter attributes names and values as: 3587 - keyword arguments, as in C{(align="right")}, or 3588 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3589 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3590 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3591 For attribute names with a namespace prefix, you must use the second form. Attribute 3592 names are matched insensitive to upper/lower case. 3593 3594 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3595 3596 To verify that the attribute exists, but without specifying a value, pass 3597 C{withAttribute.ANY_VALUE} as the value. 3598 """ 3599 if args: 3600 attrs = args[:] 3601 else: 3602 attrs = attrDict.items() 3603 attrs = [(k,v) for k,v in attrs] 3604 def pa(s,l,tokens): 3605 for attrName,attrValue in attrs: 3606 if attrName not in tokens: 3607 raise ParseException(s,l,"no matching attribute " + attrName) 3608 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3609 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3610 (attrName, tokens[attrName], attrValue))
3611 return pa 3612 withAttribute.ANY_VALUE = object()
3613 3614 -def withClass(classname, namespace=''):
3615 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3616 difficult because C{class} is a reserved word in Python. 3617 """ 3618 classattr = "%s:class" % namespace if namespace else "class" 3619 return withAttribute(**{classattr : classname})
3620 3621 opAssoc = _Constants() 3622 opAssoc.LEFT = object() 3623 opAssoc.RIGHT = object()
3624 3625 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3626 """Helper method for constructing grammars of expressions made up of 3627 operators working in a precedence hierarchy. Operators may be unary or 3628 binary, left- or right-associative. Parse actions can also be attached 3629 to operator expressions. 3630 3631 Parameters: 3632 - baseExpr - expression representing the most basic element for the nested 3633 - opList - list of tuples, one for each operator precedence level in the 3634 expression grammar; each tuple is of the form 3635 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3636 - opExpr is the pyparsing expression for the operator; 3637 may also be a string, which will be converted to a Literal; 3638 if numTerms is 3, opExpr is a tuple of two expressions, for the 3639 two operators separating the 3 terms 3640 - numTerms is the number of terms for this operator (must 3641 be 1, 2, or 3) 3642 - rightLeftAssoc is the indicator whether the operator is 3643 right or left associative, using the pyparsing-defined 3644 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3645 - parseAction is the parse action to be associated with 3646 expressions matching this operator expression (the 3647 parse action tuple member may be omitted) 3648 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3649 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3650 """ 3651 ret = Forward() 3652 lastExpr = baseExpr | ( lpar + ret + rpar ) 3653 for i,operDef in enumerate(opList): 3654 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3655 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3656 if arity == 3: 3657 if opExpr is None or len(opExpr) != 2: 3658 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3659 opExpr1, opExpr2 = opExpr 3660 thisExpr = Forward().setName(termName) 3661 if rightLeftAssoc == opAssoc.LEFT: 3662 if arity == 1: 3663 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3664 elif arity == 2: 3665 if opExpr is not None: 3666 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3667 else: 3668 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3669 elif arity == 3: 3670 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3671 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3672 else: 3673 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3674 elif rightLeftAssoc == opAssoc.RIGHT: 3675 if arity == 1: 3676 # try to avoid LR with this extra test 3677 if not isinstance(opExpr, Optional): 3678 opExpr = Optional(opExpr) 3679 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3680 elif arity == 2: 3681 if opExpr is not None: 3682 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3683 else: 3684 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3685 elif arity == 3: 3686 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3687 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3688 else: 3689 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3690 else: 3691 raise ValueError("operator must indicate right or left associativity") 3692 if pa: 3693 matchExpr.setParseAction( pa ) 3694 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3695 lastExpr = thisExpr 3696 ret <<= lastExpr 3697 return ret
3698 operatorPrecedence = infixNotation 3699 3700 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 3701 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 3702 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 3703 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 3704 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3705 3706 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3707 """Helper method for defining nested lists enclosed in opening and closing 3708 delimiters ("(" and ")" are the default). 3709 3710 Parameters: 3711 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3712 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3713 - content - expression for items within the nested lists (default=None) 3714 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3715 3716 If an expression is not provided for the content argument, the nested 3717 expression will capture all whitespace-delimited content between delimiters 3718 as a list of separate values. 3719 3720 Use the C{ignoreExpr} argument to define expressions that may contain 3721 opening or closing characters that should not be treated as opening 3722 or closing characters for nesting, such as quotedString or a comment 3723 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3724 The default is L{quotedString}, but if no expressions are to be ignored, 3725 then pass C{None} for this argument. 3726 """ 3727 if opener == closer: 3728 raise ValueError("opening and closing strings cannot be the same") 3729 if content is None: 3730 if isinstance(opener,basestring) and isinstance(closer,basestring): 3731 if len(opener) == 1 and len(closer)==1: 3732 if ignoreExpr is not None: 3733 content = (Combine(OneOrMore(~ignoreExpr + 3734 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3735 ).setParseAction(lambda t:t[0].strip())) 3736 else: 3737 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3738 ).setParseAction(lambda t:t[0].strip())) 3739 else: 3740 if ignoreExpr is not None: 3741 content = (Combine(OneOrMore(~ignoreExpr + 3742 ~Literal(opener) + ~Literal(closer) + 3743 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3744 ).setParseAction(lambda t:t[0].strip())) 3745 else: 3746 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3747 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3748 ).setParseAction(lambda t:t[0].strip())) 3749 else: 3750 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3751 ret = Forward() 3752 if ignoreExpr is not None: 3753 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3754 else: 3755 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3756 ret.setName('nested %s%s expression' % (opener,closer)) 3757 return ret
3758
3759 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3760 """Helper method for defining space-delimited indentation blocks, such as 3761 those used to define block statements in Python source code. 3762 3763 Parameters: 3764 - blockStatementExpr - expression defining syntax of statement that 3765 is repeated within the indented block 3766 - indentStack - list created by caller to manage indentation stack 3767 (multiple statementWithIndentedBlock expressions within a single grammar 3768 should share a common indentStack) 3769 - indent - boolean indicating whether block must be indented beyond the 3770 the current level; set to False for block of left-most statements 3771 (default=True) 3772 3773 A valid block must contain at least one C{blockStatement}. 3774 """ 3775 def checkPeerIndent(s,l,t): 3776 if l >= len(s): return 3777 curCol = col(l,s) 3778 if curCol != indentStack[-1]: 3779 if curCol > indentStack[-1]: 3780 raise ParseFatalException(s,l,"illegal nesting") 3781 raise ParseException(s,l,"not a peer entry")
3782 3783 def checkSubIndent(s,l,t): 3784 curCol = col(l,s) 3785 if curCol > indentStack[-1]: 3786 indentStack.append( curCol ) 3787 else: 3788 raise ParseException(s,l,"not a subentry") 3789 3790 def checkUnindent(s,l,t): 3791 if l >= len(s): return 3792 curCol = col(l,s) 3793 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3794 raise ParseException(s,l,"not an unindent") 3795 indentStack.pop() 3796 3797 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3798 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3799 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3800 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3801 if indent: 3802 smExpr = Group( Optional(NL) + 3803 #~ FollowedBy(blockStatementExpr) + 3804 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3805 else: 3806 smExpr = Group( Optional(NL) + 3807 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3808 blockStatementExpr.ignore(_bslash + LineEnd()) 3809 return smExpr.setName('indented block') 3810 3811 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3812 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3813 3814 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3815 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3816 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3817 -def replaceHTMLEntity(t):
3818 """Helper parser action to replace common HTML entities with their special characters""" 3819 return _htmlEntityMap.get(t.entity)
3820 3821 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3822 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 3823 3824 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3825 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3826 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 3827 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 3828 3829 javaStyleComment = cppStyleComment 3830 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3831 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3832 Optional( Word(" \t") + 3833 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3834 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3835 3836 3837 if __name__ == "__main__": 3838 3839 selectToken = CaselessLiteral( "select" ) 3840 fromToken = CaselessLiteral( "from" ) 3841 3842 ident = Word( alphas, alphanums + "_$" ) 3843 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3844 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3845 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3846 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3847 simpleSQL = ( selectToken + \ 3848 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3849 fromToken + \ 3850 tableNameList.setResultsName( "tables" ) ) 3851 3852 simpleSQL.runTests("""\ 3853 SELECT * from XYZZY, ABC 3854 select * from SYS.XYZZY 3855 Select A from Sys.dual 3856 Select AA,BB,CC from Sys.dual 3857 Select A, B, C from Sys.dual 3858 Select A, B, C from Sys.dual 3859 Xelect A, B, C from Sys.dual 3860 Select A, B, C frox Sys.dual 3861 Select 3862 Select ^^^ frox Sys.dual 3863 Select A, B, C from Sys.dual, Table2""") 3864