regex to re conversion for gadfly; gftest.py shows compatible test results

after changes applied; tutorial functional with gadfly.

regex to re conversion for gadfly; gftest.py shows compatible test results
after changes applied; tutorial functional with gadfly.
bdab8faf · matt@zope.com · da45006f · bdab8faf · bdab8faf · bdab8faf
Commit bdab8faf authored May 16, 2001 by matt@zope.com
4 changed files
--- a/lib/python/Products/ZGadflyDA/gadfly/kjParseBuild.py
+++ b/lib/python/Products/ZGadflyDA/gadfly/kjParseBuild.py
@@ -14,7 +14,7 @@
 import string
 import kjSet
 import kjParser
-import regex
+import re

 # import some constants
 from kjParser import \

--- a/lib/python/Products/ZGadflyDA/gadfly/kjParser.py
+++ b/lib/python/Products/ZGadflyDA/gadfly/kjParser.py
@@ -13,8 +13,7 @@

 import kjSet
 import string
-import regex
-import regsub
+import re
 import string

 # set this flag for regression testing at each load
@@ -40,7 +39,7 @@ THISMODULE = "kjParser"

 # regular expression for matching whitespace
 WHITERE = "["+string.whitespace+"]+"
-WHITEREGEX = regex.compile(WHITERE)
+WHITEREGEX = re.compile(WHITERE)

 # local errors
 LexTokenError = "LexTokenError" # may happen on bad string
@@ -62,6 +61,17 @@ ENDOFFILETOKEN = (TERMFLAG, EOFFLAG)
 # in FSM use the following terminal to indicate eof
 ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG)

+# Utility function for match conversion from regex to re
+def RMATCH(re, key, start=0):
+    #print "RMATCH: %s -> %s <- start=%s" % (re.pattern, key, start)
+    group = re.match(key, start)
+    if group is None:
+        #print "RMATCH: -1"
+        return -1
+    len = group.end() - group.start()
+    #print "RMATCH: %s (%s)" % (len, group.group())
+    return len
+
 # utility function for error diagnostics
 def DumpStringWindow(Str, Pos, Offset=15):
    L = []
@@ -169,7 +179,7 @@ class LexDictionary:
       length = len(key)
       for triple in self.regexprlist:
          (regexpr, Flag, Function) = triple
-          index = regexpr.match(key)
+          index = RMATCH(regexpr,key)
          if index == length:
             found = 1
             # use the function to interpret the string, if given
@@ -205,7 +215,7 @@ class LexDictionary:
  def terminal(self, string, RegExpr=None, Function=None):
    if RegExpr != None and Function != None:
       if type(RegExpr) == type(""):
-          RegExpr = regex.compile(RegExpr)
+          RegExpr = re.compile(RegExpr)
       self[ RegExpr ] = ( string, Function)
    for triple in self.regexprlist:
       (regexpr,token,Function) = triple
@@ -235,7 +245,7 @@ class LexDictionary:
  # register a regular expression as a comment
  def comment(self, string):
    # regexpr better be a uncompiled string regular expression! (not verified)
-    regexpr = regex.compile(string)
+    regexpr = re.compile(string)
    self.commentpatterns = self.commentpatterns + [ regexpr ]
    self.commentstrings = self.commentstrings + [ string ]

@@ -272,7 +282,7 @@ class LexDictionary:
           return (ENDOFFILETERM, 0)
        # skip whitespace
        whitespacefound = 0
-        skip = WHITEREGEX.match(String, StartPosition)
+        skip = RMATCH(WHITEREGEX,String, StartPosition)
        if skip > 0:
           StartPosition = StartPosition + skip
           totalOffset = totalOffset + skip
@@ -281,7 +291,7 @@ class LexDictionary:
        # looking for comment
        commentfound = 0
        for commentexpr in self.commentpatterns:
-           offset = commentexpr.match(String,StartPosition)
+           offset = RMATCH(commentexpr,String,StartPosition)
           if offset != -1:
              if offset<1:
                 info = DumpStringWindow(String,StartPosition)
@@ -296,7 +306,7 @@ class LexDictionary:
           return ( keypair[0], keypair[1] + totalOffset)
        # looking for terminal
        for (regexpr, Flag, Function) in self.regexprlist:
-           offset = regexpr.match(String,StartPosition)
+           offset = RMATCH(regexpr,String,StartPosition)
           if offset != -1:
              matchstring = String[StartPosition : offset+StartPosition]
              if Function != None:
@@ -386,18 +396,17 @@ class lexdictionary:
       punctlist = self.punctuationlist
       termregex = self.termregex
       while not finished:
-          #print String[StartPosition:]
          if len(String) <= StartPosition:
             result = self.lastresult = (ENDOFFILETERM, 0)
             return result
          # skip ws and comments 
-          skip = skipprog.match(String, StartPosition)
+          #skip = skipprog.match(String, StartPosition)
+          skip = RMATCH(skipprog, String, StartPosition)
          if skip>0:
             if skip==0:
                info = DumpStringWindow(String, StartPosition)
                raise LexTokenError, \
                  "zero length whitespace or comment "+info
-             #print "skipping", `String[StartPosition: StartPosition+skip]`
             StartPosition = StartPosition + skip
             totalOffset = totalOffset + skip
             continue
@@ -408,9 +417,10 @@ class lexdictionary:
             result = self.lastresult = (keypair[0], keypair[1]+totalOffset)
             return result
          # look for terminal
+          #print "Termregex: %s --> %s <-- start=%s" % (termregex.pattern, String, StartPosition)
          offset = termregex.match(String, StartPosition)
-          if (offset>0):
-             g = termregex.group
+          if offset is not None:
+             g = offset.group
             for (term, regex, flag, fn) in self.termlist:
                 test = g(term)
                 if test:
@@ -420,7 +430,7 @@ class lexdictionary:
                    else:
                       value = test
                    result = self.lastresult = (
-                       (flag, value), offset + totalOffset)
+                       (flag, value), offset.end() - offset.start() + totalOffset)
                    return result
          # error if we get here
          info = DumpStringWindow(String, StartPosition)
@@ -431,19 +441,19 @@ class lexdictionary:

   def compile(self):
       from string import joinfields, whitespace
-       import regex
+       import re
       skipregexen = self.commentstrings + [WHITERE]
-       skipregex = "\(" + joinfields(skipregexen, "\)\|\(") + "\)"
+       skipregex = "(" + joinfields(skipregexen, ")|(") + ")"
       #print skipregex; import sys; sys.exit(1)
-       self.skipprog = regex.compile(skipregex)
+       self.skipprog = re.compile(skipregex)
       termregexen = []
       termnames = []
       for (term, rgex, flag, fn) in self.termlist:
-           fragment = "\(<%s>%s\)" % (term, rgex)
+           fragment = "(?P<%s>%s)" % (term, rgex)
           termregexen.append(fragment)
           termnames.append(term)
-       termregex = joinfields(termregexen, "\|")
-       self.termregex = regex.symcomp(termregex)
+       termregex = joinfields(termregexen, "|")
+       self.termregex = re.compile(termregex)
       self.termnames = termnames

 LexDictionary = lexdictionary ##### test!

--- a/lib/python/Products/ZGadflyDA/gadfly/pygram.py
+++ b/lib/python/Products/ZGadflyDA/gadfly/pygram.py
@@ -375,7 +375,7 @@ print raise return try while == >= <= <> != >x> << NEWLINE
 **
 """

-import kjParser, string, regex
+import kjParser, string, re
 from kjParser import KEYFLAG, ENDOFFILETERM

 alphanumunder = string.letters+string.digits+"_"
@@ -386,33 +386,33 @@ id_letters = map(None, alphanumunder)

 # terminator re for names
 nametermre = "[^" + alphanumunder + "]"
-nameterm = regex.compile(nametermre)
+nameterm = re.compile(nametermre)

 # terminator re for numbers (same as above but allow "." in num).
 numtermre =  "[^" + alphanumunder + "\.]"
-numterm = regex.compile(numtermre)
+numterm = re.compile(numtermre)

 parseerror = "parseerror"

-pycommentre = "\(#.*\)"
+pycommentre = r"(#.*)"

 # whitespace regex outside of brackets
 #  white followed by (comment\n maybe repeated)
 #  DON'T EAT NEWLINE!!
-pywhiteoutre = "\([ \t\r\014]\|\\\\\n\)*%s?" % pycommentre
-pywhiteout = regex.compile(pywhiteoutre)
+pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre
+pywhiteout = re.compile(pywhiteoutre)

 # whitespace regex inside brackets
 #  white or newline possibly followed by comment, all maybe repeated
-pywhiteinre = pywhiteoutre #"[ \t\r]*\(\\\\\n\)*%s?" % pycommentre
-pywhitein = regex.compile(pywhiteinre)
+pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
+pywhitein = re.compile(pywhiteinre)

 # totally blank lines (only recognize if next char is newline)
 #allblankre = "\n" + pywhiteinre
-#allblank = regex.compile(allblankre)
+#allblank = re.compile(allblankre)

 # re for indentation (might accept empty string)
-indentp = regex.compile("[\t ]*")
+indentp = re.compile(r"[\t ]*")

 # two char kws and puncts
 char2kw = ["if", "or", "in", "is"]
@@ -450,6 +450,11 @@ newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)

 ### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!

+def RMATCH(re, key, start=0):
+    group = re.match(key, start)
+    if group is None: return -1
+    return group.end() - group.start()
+
 class pylexdict(kjParser.LexDictionary):
   def __init__(self):
       kjParser.LexDictionary.__init__(self)
@@ -504,7 +509,7 @@ class pylexdict(kjParser.LexDictionary):
          cursor = 0
          self.lineno = 1
          while 1:
-             test = pywhitein.match(String, cursor)
+             test = RMATCH(pywhitein,String, cursor)
             if test<0: break
             next = cursor + test
             #print "lead skip:", next, String[cursor:next]
@@ -565,7 +570,7 @@ class pylexdict(kjParser.LexDictionary):
                   start = start+1
                   #self.lineno = self.lineno+1
             #print "matching", `String[start:start+10]`
-             skip = pywhitein.match(String, start)
+             skip = RMATCH(pywhitein,String, start)
             #print "skip=", skip
             if skip<0: break
             rs = skip + realindex + (start-realindex)
@@ -599,7 +604,7 @@ class pylexdict(kjParser.LexDictionary):
                skipto = skipto + 1
                self.realindex = realindex = skipto
                continue
-             skip = pywhiteout.match(String, skipto)
+             skip = RMATCH(pywhiteout,String, skipto)
             nextskipto = skipto+skip
             #skipped = String[skipto:nextskipto]
             #if "\n" in skipped:
@@ -610,7 +615,7 @@ class pylexdict(kjParser.LexDictionary):
             else: break
          skip = skipto - realindex
       elif not atlineend:
-          skip = pywhitein.match(String, realindex)
+          skip = RMATCH(pywhitein,String, realindex)
       if skip<=0: 
          skip = 0
       else:
@@ -631,7 +636,7 @@ class pylexdict(kjParser.LexDictionary):
       if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
           and first != "\n"):
          #print "looking for dent", realindex, `String[realindex:realindex+20]`
-          match = indentp.match(String, realindex)
+          match = RMATCH(indentp,String, realindex)
          if match>=0:
             dent = String[realindex: realindex+match]
             #print "dent match", match, `dent`
@@ -923,7 +928,7 @@ teststring = """#
 #
 from string import join, split
 '''
-import regex
+import re

 for a in l:
    a.attr, a[x], b = c
@@ -935,7 +940,7 @@ class zzz:
   #doc string 
   '''
   '''
-   global regex, join
+   global re, join
   
   d = {} 
   for i in range(10): d[i] = i

--- a/lib/python/Products/ZGadflyDA/gadfly/sqlgen.py
+++ b/lib/python/Products/ZGadflyDA/gadfly/sqlgen.py
@@ -27,7 +27,7 @@ def charstfn(str):
 digits = string.digits
 # rely in python to filter out the good/bad/ugly
 intre = "[%s][%s.jJ]*" % (digits,digits)
-numlitre = "%s\([Ee][+-]?%s\)?" % (intre, intre)
+numlitre = "%s([Ee][+-]?%s)?" % (intre, intre)

 def numlitfn(str):
    """Note: this is "safe" because regex