Commit bdab8faf authored by matt@zope.com's avatar matt@zope.com

regex to re conversion for gadfly; gftest.py shows compatible test results

after changes applied; tutorial functional with gadfly.
parent da45006f
......@@ -14,7 +14,7 @@
import string
import kjSet
import kjParser
import regex
import re
# import some constants
from kjParser import \
......
......@@ -13,8 +13,7 @@
import kjSet
import string
import regex
import regsub
import re
import string
# set this flag for regression testing at each load
......@@ -40,7 +39,7 @@ THISMODULE = "kjParser"
# regular expression for matching whitespace
WHITERE = "["+string.whitespace+"]+"
WHITEREGEX = regex.compile(WHITERE)
WHITEREGEX = re.compile(WHITERE)
# local errors
LexTokenError = "LexTokenError" # may happen on bad string
......@@ -62,6 +61,17 @@ ENDOFFILETOKEN = (TERMFLAG, EOFFLAG)
# in FSM use the following terminal to indicate eof
ENDOFFILETERM = (ENDOFFILETOKEN, EOFFLAG)
# Utility function for match conversion from regex to re
def RMATCH(re, key, start=0):
#print "RMATCH: %s -> %s <- start=%s" % (re.pattern, key, start)
group = re.match(key, start)
if group is None:
#print "RMATCH: -1"
return -1
len = group.end() - group.start()
#print "RMATCH: %s (%s)" % (len, group.group())
return len
# utility function for error diagnostics
def DumpStringWindow(Str, Pos, Offset=15):
L = []
......@@ -169,7 +179,7 @@ class LexDictionary:
length = len(key)
for triple in self.regexprlist:
(regexpr, Flag, Function) = triple
index = regexpr.match(key)
index = RMATCH(regexpr,key)
if index == length:
found = 1
# use the function to interpret the string, if given
......@@ -205,7 +215,7 @@ class LexDictionary:
def terminal(self, string, RegExpr=None, Function=None):
if RegExpr != None and Function != None:
if type(RegExpr) == type(""):
RegExpr = regex.compile(RegExpr)
RegExpr = re.compile(RegExpr)
self[ RegExpr ] = ( string, Function)
for triple in self.regexprlist:
(regexpr,token,Function) = triple
......@@ -235,7 +245,7 @@ class LexDictionary:
# register a regular expression as a comment
def comment(self, string):
# regexpr better be a uncompiled string regular expression! (not verified)
regexpr = regex.compile(string)
regexpr = re.compile(string)
self.commentpatterns = self.commentpatterns + [ regexpr ]
self.commentstrings = self.commentstrings + [ string ]
......@@ -272,7 +282,7 @@ class LexDictionary:
return (ENDOFFILETERM, 0)
# skip whitespace
whitespacefound = 0
skip = WHITEREGEX.match(String, StartPosition)
skip = RMATCH(WHITEREGEX,String, StartPosition)
if skip > 0:
StartPosition = StartPosition + skip
totalOffset = totalOffset + skip
......@@ -281,7 +291,7 @@ class LexDictionary:
# looking for comment
commentfound = 0
for commentexpr in self.commentpatterns:
offset = commentexpr.match(String,StartPosition)
offset = RMATCH(commentexpr,String,StartPosition)
if offset != -1:
if offset<1:
info = DumpStringWindow(String,StartPosition)
......@@ -296,7 +306,7 @@ class LexDictionary:
return ( keypair[0], keypair[1] + totalOffset)
# looking for terminal
for (regexpr, Flag, Function) in self.regexprlist:
offset = regexpr.match(String,StartPosition)
offset = RMATCH(regexpr,String,StartPosition)
if offset != -1:
matchstring = String[StartPosition : offset+StartPosition]
if Function != None:
......@@ -386,18 +396,17 @@ class lexdictionary:
punctlist = self.punctuationlist
termregex = self.termregex
while not finished:
#print String[StartPosition:]
if len(String) <= StartPosition:
result = self.lastresult = (ENDOFFILETERM, 0)
return result
# skip ws and comments
skip = skipprog.match(String, StartPosition)
#skip = skipprog.match(String, StartPosition)
skip = RMATCH(skipprog, String, StartPosition)
if skip>0:
if skip==0:
info = DumpStringWindow(String, StartPosition)
raise LexTokenError, \
"zero length whitespace or comment "+info
#print "skipping", `String[StartPosition: StartPosition+skip]`
StartPosition = StartPosition + skip
totalOffset = totalOffset + skip
continue
......@@ -408,9 +417,10 @@ class lexdictionary:
result = self.lastresult = (keypair[0], keypair[1]+totalOffset)
return result
# look for terminal
#print "Termregex: %s --> %s <-- start=%s" % (termregex.pattern, String, StartPosition)
offset = termregex.match(String, StartPosition)
if (offset>0):
g = termregex.group
if offset is not None:
g = offset.group
for (term, regex, flag, fn) in self.termlist:
test = g(term)
if test:
......@@ -420,7 +430,7 @@ class lexdictionary:
else:
value = test
result = self.lastresult = (
(flag, value), offset + totalOffset)
(flag, value), offset.end() - offset.start() + totalOffset)
return result
# error if we get here
info = DumpStringWindow(String, StartPosition)
......@@ -431,19 +441,19 @@ class lexdictionary:
def compile(self):
from string import joinfields, whitespace
import regex
import re
skipregexen = self.commentstrings + [WHITERE]
skipregex = "\(" + joinfields(skipregexen, "\)\|\(") + "\)"
skipregex = "(" + joinfields(skipregexen, ")|(") + ")"
#print skipregex; import sys; sys.exit(1)
self.skipprog = regex.compile(skipregex)
self.skipprog = re.compile(skipregex)
termregexen = []
termnames = []
for (term, rgex, flag, fn) in self.termlist:
fragment = "\(<%s>%s\)" % (term, rgex)
fragment = "(?P<%s>%s)" % (term, rgex)
termregexen.append(fragment)
termnames.append(term)
termregex = joinfields(termregexen, "\|")
self.termregex = regex.symcomp(termregex)
termregex = joinfields(termregexen, "|")
self.termregex = re.compile(termregex)
self.termnames = termnames
LexDictionary = lexdictionary ##### test!
......
......@@ -375,7 +375,7 @@ print raise return try while == >= <= <> != >x> << NEWLINE
**
"""
import kjParser, string, regex
import kjParser, string, re
from kjParser import KEYFLAG, ENDOFFILETERM
alphanumunder = string.letters+string.digits+"_"
......@@ -386,33 +386,33 @@ id_letters = map(None, alphanumunder)
# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = regex.compile(nametermre)
nameterm = re.compile(nametermre)
# terminator re for numbers (same as above but allow "." in num).
numtermre = "[^" + alphanumunder + "\.]"
numterm = regex.compile(numtermre)
numterm = re.compile(numtermre)
parseerror = "parseerror"
pycommentre = "\(#.*\)"
pycommentre = r"(#.*)"
# whitespace regex outside of brackets
# white followed by (comment\n maybe repeated)
# DON'T EAT NEWLINE!!
pywhiteoutre = "\([ \t\r\014]\|\\\\\n\)*%s?" % pycommentre
pywhiteout = regex.compile(pywhiteoutre)
pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre
pywhiteout = re.compile(pywhiteoutre)
# whitespace regex inside brackets
# white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*\(\\\\\n\)*%s?" % pycommentre
pywhitein = regex.compile(pywhiteinre)
pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = re.compile(pywhiteinre)
# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = regex.compile(allblankre)
#allblank = re.compile(allblankre)
# re for indentation (might accept empty string)
indentp = regex.compile("[\t ]*")
indentp = re.compile(r"[\t ]*")
# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
......@@ -450,6 +450,11 @@ newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)
### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!
def RMATCH(re, key, start=0):
group = re.match(key, start)
if group is None: return -1
return group.end() - group.start()
class pylexdict(kjParser.LexDictionary):
def __init__(self):
kjParser.LexDictionary.__init__(self)
......@@ -504,7 +509,7 @@ class pylexdict(kjParser.LexDictionary):
cursor = 0
self.lineno = 1
while 1:
test = pywhitein.match(String, cursor)
test = RMATCH(pywhitein,String, cursor)
if test<0: break
next = cursor + test
#print "lead skip:", next, String[cursor:next]
......@@ -565,7 +570,7 @@ class pylexdict(kjParser.LexDictionary):
start = start+1
#self.lineno = self.lineno+1
#print "matching", `String[start:start+10]`
skip = pywhitein.match(String, start)
skip = RMATCH(pywhitein,String, start)
#print "skip=", skip
if skip<0: break
rs = skip + realindex + (start-realindex)
......@@ -599,7 +604,7 @@ class pylexdict(kjParser.LexDictionary):
skipto = skipto + 1
self.realindex = realindex = skipto
continue
skip = pywhiteout.match(String, skipto)
skip = RMATCH(pywhiteout,String, skipto)
nextskipto = skipto+skip
#skipped = String[skipto:nextskipto]
#if "\n" in skipped:
......@@ -610,7 +615,7 @@ class pylexdict(kjParser.LexDictionary):
else: break
skip = skipto - realindex
elif not atlineend:
skip = pywhitein.match(String, realindex)
skip = RMATCH(pywhitein,String, realindex)
if skip<=0:
skip = 0
else:
......@@ -631,7 +636,7 @@ class pylexdict(kjParser.LexDictionary):
if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
and first != "\n"):
#print "looking for dent", realindex, `String[realindex:realindex+20]`
match = indentp.match(String, realindex)
match = RMATCH(indentp,String, realindex)
if match>=0:
dent = String[realindex: realindex+match]
#print "dent match", match, `dent`
......@@ -923,7 +928,7 @@ teststring = """#
#
from string import join, split
'''
import regex
import re
for a in l:
a.attr, a[x], b = c
......@@ -935,7 +940,7 @@ class zzz:
#doc string
'''
'''
global regex, join
global re, join
d = {}
for i in range(10): d[i] = i
......
......@@ -27,7 +27,7 @@ def charstfn(str):
digits = string.digits
# rely in python to filter out the good/bad/ugly
intre = "[%s][%s.jJ]*" % (digits,digits)
numlitre = "%s\([Ee][+-]?%s\)?" % (intre, intre)
numlitre = "%s([Ee][+-]?%s)?" % (intre, intre)
def numlitfn(str):
"""Note: this is "safe" because regex
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment