Commit bdab8faf authored by matt@zope.com's avatar matt@zope.com

regex to re conversion for gadfly; gftest.py shows compatible test results

after changes applied; tutorial functional with gadfly.
parent da45006f
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import string import string
import kjSet import kjSet
import kjParser import kjParser
import regex import re
# import some constants # import some constants
from kjParser import \ from kjParser import \
......
...@@ -13,8 +13,7 @@ ...@@ -13,8 +13,7 @@
import kjSet import kjSet
import string import string
import regex import re
import regsub
import string import string
# set this flag for regression testing at each load # set this flag for regression testing at each load
...@@ -40,7 +39,7 @@ THISMODULE = "kjParser" ...@@ -40,7 +39,7 @@ THISMODULE = "kjParser"
# regular expression for matching whitespace # regular expression for matching whitespace
WHITERE = "["+string.whitespace+"]+" WHITERE = "["+string.whitespace+"]+"
WHITEREGEX = regex.compile(WHITERE) WHITEREGEX = re.compile(WHITERE)
# local errors # local errors
LexTokenError = "LexTokenError" # may happen on bad string LexTokenError = "LexTokenError" # may happen on bad string
...@@ -62,6 +61,17 @@ ENDOFFILETOKEN = (TERMFLAG, EOFFLAG) ...@@ -62,6 +61,17 @@ ENDOFFILETOKEN = (TERMFLAG, EOFFLAG)
# in FSM use the following terminal to indicate eof # in FSM use the following terminal to indicate eof
ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG) ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG)
# Utility function for match conversion from regex to re
def RMATCH(re, key, start=0):
#print "RMATCH: %s -> %s <- start=%s" % (re.pattern, key, start)
group = re.match(key, start)
if group is None:
#print "RMATCH: -1"
return -1
len = group.end() - group.start()
#print "RMATCH: %s (%s)" % (len, group.group())
return len
# utility function for error diagnostics # utility function for error diagnostics
def DumpStringWindow(Str, Pos, Offset=15): def DumpStringWindow(Str, Pos, Offset=15):
L = [] L = []
...@@ -169,7 +179,7 @@ class LexDictionary: ...@@ -169,7 +179,7 @@ class LexDictionary:
length = len(key) length = len(key)
for triple in self.regexprlist: for triple in self.regexprlist:
(regexpr, Flag, Function) = triple (regexpr, Flag, Function) = triple
index = regexpr.match(key) index = RMATCH(regexpr,key)
if index == length: if index == length:
found = 1 found = 1
# use the function to interpret the string, if given # use the function to interpret the string, if given
...@@ -205,7 +215,7 @@ class LexDictionary: ...@@ -205,7 +215,7 @@ class LexDictionary:
def terminal(self, string, RegExpr=None, Function=None): def terminal(self, string, RegExpr=None, Function=None):
if RegExpr != None and Function != None: if RegExpr != None and Function != None:
if type(RegExpr) == type(""): if type(RegExpr) == type(""):
RegExpr = regex.compile(RegExpr) RegExpr = re.compile(RegExpr)
self[ RegExpr ] = ( string, Function) self[ RegExpr ] = ( string, Function)
for triple in self.regexprlist: for triple in self.regexprlist:
(regexpr,token,Function) = triple (regexpr,token,Function) = triple
...@@ -235,7 +245,7 @@ class LexDictionary: ...@@ -235,7 +245,7 @@ class LexDictionary:
# register a regular expression as a comment # register a regular expression as a comment
def comment(self, string): def comment(self, string):
# regexpr better be a uncompiled string regular expression! (not verified) # regexpr better be a uncompiled string regular expression! (not verified)
regexpr = regex.compile(string) regexpr = re.compile(string)
self.commentpatterns = self.commentpatterns + [ regexpr ] self.commentpatterns = self.commentpatterns + [ regexpr ]
self.commentstrings = self.commentstrings + [ string ] self.commentstrings = self.commentstrings + [ string ]
...@@ -272,7 +282,7 @@ class LexDictionary: ...@@ -272,7 +282,7 @@ class LexDictionary:
return (ENDOFFILETERM, 0) return (ENDOFFILETERM, 0)
# skip whitespace # skip whitespace
whitespacefound = 0 whitespacefound = 0
skip = WHITEREGEX.match(String, StartPosition) skip = RMATCH(WHITEREGEX,String, StartPosition)
if skip > 0: if skip > 0:
StartPosition = StartPosition + skip StartPosition = StartPosition + skip
totalOffset = totalOffset + skip totalOffset = totalOffset + skip
...@@ -281,7 +291,7 @@ class LexDictionary: ...@@ -281,7 +291,7 @@ class LexDictionary:
# looking for comment # looking for comment
commentfound = 0 commentfound = 0
for commentexpr in self.commentpatterns: for commentexpr in self.commentpatterns:
offset = commentexpr.match(String,StartPosition) offset = RMATCH(commentexpr,String,StartPosition)
if offset != -1: if offset != -1:
if offset<1: if offset<1:
info = DumpStringWindow(String,StartPosition) info = DumpStringWindow(String,StartPosition)
...@@ -296,7 +306,7 @@ class LexDictionary: ...@@ -296,7 +306,7 @@ class LexDictionary:
return ( keypair[0], keypair[1] + totalOffset) return ( keypair[0], keypair[1] + totalOffset)
# looking for terminal # looking for terminal
for (regexpr, Flag, Function) in self.regexprlist: for (regexpr, Flag, Function) in self.regexprlist:
offset = regexpr.match(String,StartPosition) offset = RMATCH(regexpr,String,StartPosition)
if offset != -1: if offset != -1:
matchstring = String[StartPosition : offset+StartPosition] matchstring = String[StartPosition : offset+StartPosition]
if Function != None: if Function != None:
...@@ -386,18 +396,17 @@ class lexdictionary: ...@@ -386,18 +396,17 @@ class lexdictionary:
punctlist = self.punctuationlist punctlist = self.punctuationlist
termregex = self.termregex termregex = self.termregex
while not finished: while not finished:
#print String[StartPosition:]
if len(String) <= StartPosition: if len(String) <= StartPosition:
result = self.lastresult = (ENDOFFILETERM, 0) result = self.lastresult = (ENDOFFILETERM, 0)
return result return result
# skip ws and comments # skip ws and comments
skip = skipprog.match(String, StartPosition) #skip = skipprog.match(String, StartPosition)
skip = RMATCH(skipprog, String, StartPosition)
if skip>0: if skip>0:
if skip==0: if skip==0:
info = DumpStringWindow(String, StartPosition) info = DumpStringWindow(String, StartPosition)
raise LexTokenError, \ raise LexTokenError, \
"zero length whitespace or comment "+info "zero length whitespace or comment "+info
#print "skipping", `String[StartPosition: StartPosition+skip]`
StartPosition = StartPosition + skip StartPosition = StartPosition + skip
totalOffset = totalOffset + skip totalOffset = totalOffset + skip
continue continue
...@@ -408,9 +417,10 @@ class lexdictionary: ...@@ -408,9 +417,10 @@ class lexdictionary:
result = self.lastresult = (keypair[0], keypair[1]+totalOffset) result = self.lastresult = (keypair[0], keypair[1]+totalOffset)
return result return result
# look for terminal # look for terminal
#print "Termregex: %s --> %s <-- start=%s" % (termregex.pattern, String, StartPosition)
offset = termregex.match(String, StartPosition) offset = termregex.match(String, StartPosition)
if (offset>0): if offset is not None:
g = termregex.group g = offset.group
for (term, regex, flag, fn) in self.termlist: for (term, regex, flag, fn) in self.termlist:
test = g(term) test = g(term)
if test: if test:
...@@ -420,7 +430,7 @@ class lexdictionary: ...@@ -420,7 +430,7 @@ class lexdictionary:
else: else:
value = test value = test
result = self.lastresult = ( result = self.lastresult = (
(flag, value), offset + totalOffset) (flag, value), offset.end() - offset.start() + totalOffset)
return result return result
# error if we get here # error if we get here
info = DumpStringWindow(String, StartPosition) info = DumpStringWindow(String, StartPosition)
...@@ -431,19 +441,19 @@ class lexdictionary: ...@@ -431,19 +441,19 @@ class lexdictionary:
def compile(self): def compile(self):
from string import joinfields, whitespace from string import joinfields, whitespace
import regex import re
skipregexen = self.commentstrings + [WHITERE] skipregexen = self.commentstrings + [WHITERE]
skipregex = "\(" + joinfields(skipregexen, "\)\|\(") + "\)" skipregex = "(" + joinfields(skipregexen, ")|(") + ")"
#print skipregex; import sys; sys.exit(1) #print skipregex; import sys; sys.exit(1)
self.skipprog = regex.compile(skipregex) self.skipprog = re.compile(skipregex)
termregexen = [] termregexen = []
termnames = [] termnames = []
for (term, rgex, flag, fn) in self.termlist: for (term, rgex, flag, fn) in self.termlist:
fragment = "\(<%s>%s\)" % (term, rgex) fragment = "(?P<%s>%s)" % (term, rgex)
termregexen.append(fragment) termregexen.append(fragment)
termnames.append(term) termnames.append(term)
termregex = joinfields(termregexen, "\|") termregex = joinfields(termregexen, "|")
self.termregex = regex.symcomp(termregex) self.termregex = re.compile(termregex)
self.termnames = termnames self.termnames = termnames
LexDictionary = lexdictionary ##### test! LexDictionary = lexdictionary ##### test!
......
...@@ -375,7 +375,7 @@ print raise return try while == >= <= <> != >x> << NEWLINE ...@@ -375,7 +375,7 @@ print raise return try while == >= <= <> != >x> << NEWLINE
** **
""" """
import kjParser, string, regex import kjParser, string, re
from kjParser import KEYFLAG, ENDOFFILETERM from kjParser import KEYFLAG, ENDOFFILETERM
alphanumunder = string.letters+string.digits+"_" alphanumunder = string.letters+string.digits+"_"
...@@ -386,33 +386,33 @@ id_letters = map(None, alphanumunder) ...@@ -386,33 +386,33 @@ id_letters = map(None, alphanumunder)
# terminator re for names # terminator re for names
nametermre = "[^" + alphanumunder + "]" nametermre = "[^" + alphanumunder + "]"
nameterm = regex.compile(nametermre) nameterm = re.compile(nametermre)
# terminator re for numbers (same as above but allow "." in num). # terminator re for numbers (same as above but allow "." in num).
numtermre = "[^" + alphanumunder + "\.]" numtermre = "[^" + alphanumunder + "\.]"
numterm = regex.compile(numtermre) numterm = re.compile(numtermre)
parseerror = "parseerror" parseerror = "parseerror"
pycommentre = "\(#.*\)" pycommentre = r"(#.*)"
# whitespace regex outside of brackets # whitespace regex outside of brackets
# white followed by (comment\n maybe repeated) # white followed by (comment\n maybe repeated)
# DON'T EAT NEWLINE!! # DON'T EAT NEWLINE!!
pywhiteoutre = "\([ \t\r\014]\|\\\\\n\)*%s?" % pycommentre pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre
pywhiteout = regex.compile(pywhiteoutre) pywhiteout = re.compile(pywhiteoutre)
# whitespace regex inside brackets # whitespace regex inside brackets
# white or newline possibly followed by comment, all maybe repeated # white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*\(\\\\\n\)*%s?" % pycommentre pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = regex.compile(pywhiteinre) pywhitein = re.compile(pywhiteinre)
# totally blank lines (only recognize if next char is newline) # totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre #allblankre = "\n" + pywhiteinre
#allblank = regex.compile(allblankre) #allblank = re.compile(allblankre)
# re for indentation (might accept empty string) # re for indentation (might accept empty string)
indentp = regex.compile("[\t ]*") indentp = re.compile(r"[\t ]*")
# two char kws and puncts # two char kws and puncts
char2kw = ["if", "or", "in", "is"] char2kw = ["if", "or", "in", "is"]
...@@ -450,6 +450,11 @@ newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1) ...@@ -450,6 +450,11 @@ newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)
### MUST HANDLE WHOLELY BLANK LINES CORRECTLY! ### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!
def RMATCH(re, key, start=0):
group = re.match(key, start)
if group is None: return -1
return group.end() - group.start()
class pylexdict(kjParser.LexDictionary): class pylexdict(kjParser.LexDictionary):
def __init__(self): def __init__(self):
kjParser.LexDictionary.__init__(self) kjParser.LexDictionary.__init__(self)
...@@ -504,7 +509,7 @@ class pylexdict(kjParser.LexDictionary): ...@@ -504,7 +509,7 @@ class pylexdict(kjParser.LexDictionary):
cursor = 0 cursor = 0
self.lineno = 1 self.lineno = 1
while 1: while 1:
test = pywhitein.match(String, cursor) test = RMATCH(pywhitein,String, cursor)
if test<0: break if test<0: break
next = cursor + test next = cursor + test
#print "lead skip:", next, String[cursor:next] #print "lead skip:", next, String[cursor:next]
...@@ -565,7 +570,7 @@ class pylexdict(kjParser.LexDictionary): ...@@ -565,7 +570,7 @@ class pylexdict(kjParser.LexDictionary):
start = start+1 start = start+1
#self.lineno = self.lineno+1 #self.lineno = self.lineno+1
#print "matching", `String[start:start+10]` #print "matching", `String[start:start+10]`
skip = pywhitein.match(String, start) skip = RMATCH(pywhitein,String, start)
#print "skip=", skip #print "skip=", skip
if skip<0: break if skip<0: break
rs = skip + realindex + (start-realindex) rs = skip + realindex + (start-realindex)
...@@ -599,7 +604,7 @@ class pylexdict(kjParser.LexDictionary): ...@@ -599,7 +604,7 @@ class pylexdict(kjParser.LexDictionary):
skipto = skipto + 1 skipto = skipto + 1
self.realindex = realindex = skipto self.realindex = realindex = skipto
continue continue
skip = pywhiteout.match(String, skipto) skip = RMATCH(pywhiteout,String, skipto)
nextskipto = skipto+skip nextskipto = skipto+skip
#skipped = String[skipto:nextskipto] #skipped = String[skipto:nextskipto]
#if "\n" in skipped: #if "\n" in skipped:
...@@ -610,7 +615,7 @@ class pylexdict(kjParser.LexDictionary): ...@@ -610,7 +615,7 @@ class pylexdict(kjParser.LexDictionary):
else: break else: break
skip = skipto - realindex skip = skipto - realindex
elif not atlineend: elif not atlineend:
skip = pywhitein.match(String, realindex) skip = RMATCH(pywhitein,String, realindex)
if skip<=0: if skip<=0:
skip = 0 skip = 0
else: else:
...@@ -631,7 +636,7 @@ class pylexdict(kjParser.LexDictionary): ...@@ -631,7 +636,7 @@ class pylexdict(kjParser.LexDictionary):
if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent) if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
and first != "\n"): and first != "\n"):
#print "looking for dent", realindex, `String[realindex:realindex+20]` #print "looking for dent", realindex, `String[realindex:realindex+20]`
match = indentp.match(String, realindex) match = RMATCH(indentp,String, realindex)
if match>=0: if match>=0:
dent = String[realindex: realindex+match] dent = String[realindex: realindex+match]
#print "dent match", match, `dent` #print "dent match", match, `dent`
...@@ -923,7 +928,7 @@ teststring = """# ...@@ -923,7 +928,7 @@ teststring = """#
# #
from string import join, split from string import join, split
''' '''
import regex import re
for a in l: for a in l:
a.attr, a[x], b = c a.attr, a[x], b = c
...@@ -935,7 +940,7 @@ class zzz: ...@@ -935,7 +940,7 @@ class zzz:
#doc string #doc string
''' '''
''' '''
global regex, join global re, join
d = {} d = {}
for i in range(10): d[i] = i for i in range(10): d[i] = i
......
...@@ -27,7 +27,7 @@ def charstfn(str): ...@@ -27,7 +27,7 @@ def charstfn(str):
digits = string.digits digits = string.digits
# rely in python to filter out the good/bad/ugly # rely in python to filter out the good/bad/ugly
intre = "[%s][%s.jJ]*" % (digits,digits) intre = "[%s][%s.jJ]*" % (digits,digits)
numlitre = "%s\([Ee][+-]?%s\)?" % (intre, intre) numlitre = "%s([Ee][+-]?%s)?" % (intre, intre)
def numlitfn(str): def numlitfn(str):
"""Note: this is "safe" because regex """Note: this is "safe" because regex
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment