Commit d04e5363 authored by Guido van Rossum's avatar Guido van Rossum

Add full globbing. This implements * and ? like in the shell,

but the pattern may not begin with a glob character (else
someone specifying "*" as the pattern can tie up the CPU for
a long time).
parent fcb27991
...@@ -16,9 +16,12 @@ import re ...@@ -16,9 +16,12 @@ import re
from BTrees.IOBTree import IOBTree from BTrees.IOBTree import IOBTree
from BTrees.OIBTree import OIBTree from BTrees.OIBTree import OIBTree
from Products.ZCTextIndex.ILexicon import ILexicon from Products.ZCTextIndex.ILexicon import ILexicon
from Products.ZCTextIndex.StopDict import get_stopdict from Products.ZCTextIndex.StopDict import get_stopdict
from PipelineFactory import splitter_factory, element_factory from Products.ZCTextIndex.ParseTree import QueryError
from Products.ZCTextIndex.PipelineFactory import \
splitter_factory, element_factory
class Lexicon: class Lexicon:
...@@ -78,7 +81,7 @@ class Lexicon: ...@@ -78,7 +81,7 @@ class Lexicon:
return last return last
def isGlob(self, word): def isGlob(self, word):
return "*" in word return "*" in word or "?" in word
def get_word(self, wid): def get_word(self, wid):
return self._words[wid] return self._words[wid]
...@@ -87,17 +90,41 @@ class Lexicon: ...@@ -87,17 +90,41 @@ class Lexicon:
return self._wids.get(word, 0) return self._wids.get(word, 0)
def globToWordIds(self, pattern): def globToWordIds(self, pattern):
# This currently only knows about trailing *; # Implement * and ? just as in the shell, except the pattern
# whatever splitter you use should match this # must not start with either of these
assert pattern.endswith("*") prefix = ""
prefix = pattern[:-1] while pattern and pattern[0] not in "*?":
assert prefix and not prefix.endswith("*") prefix += pattern[0]
pattern = pattern[1:]
if not pattern:
# There were no globbing characters in the pattern
wid = self._wids.get(prefix, 0)
if wid:
return [wid]
else:
return []
if not prefix:
# The pattern starts with a globbing character.
# This is too efficient, so we raise an exception.
raise QueryError(
"pattern %r shouldn't start with glob character" % pattern)
pat = prefix
for c in pattern:
if c == "*":
pat += ".*"
elif c == "?":
pat += "."
else:
pat += re.escape(c)
pat += "$"
prog = re.compile(pat)
keys = self._wids.keys(prefix) # Keys starting at prefix keys = self._wids.keys(prefix) # Keys starting at prefix
wids = [] wids = []
for key in keys: for key in keys:
if not key.startswith(prefix): if not key.startswith(prefix):
break break
wids.append(self._wids[key]) if prog.match(key):
wids.append(self._wids[key])
return wids return wids
def _getWordIdCreate(self, word): def _getWordIdCreate(self, word):
...@@ -128,7 +155,7 @@ class Splitter: ...@@ -128,7 +155,7 @@ class Splitter:
import re import re
rx = re.compile(r"\w+") rx = re.compile(r"\w+")
rxGlob = re.compile(r"\w+\*?") # See globToWordIds() above rxGlob = re.compile(r"\w+[\w*?]*") # See globToWordIds() above
def process(self, lst): def process(self, lst):
result = [] result = []
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment