Commit 0a0f97a7 authored by Guido van Rossum's avatar Guido van Rossum

globToWordIds() shouldn't make assumptions about the pipeline. It

still only supports a trailing *, so the pipeline should honor that;
added a comment to the Splitter class referring to globToWordIds().
parent 315bcde9
......@@ -86,9 +86,8 @@ class Lexicon:
return self._wids.get(word, 0)
def globToWordIds(self, pattern):
if not re.match("^\w+\*$", pattern):
return []
pattern = pattern.lower()
# This currently only knows about trailing *;
# whatever splitter you use should match this
assert pattern.endswith("*")
prefix = pattern[:-1]
assert prefix and not prefix.endswith("*")
......@@ -128,7 +127,7 @@ class Splitter:
import re
rx = re.compile(r"\w+")
rxGlob = re.compile(r"\w+\*?")
rxGlob = re.compile(r"\w+\*?") # See globToWordIds() above
def process(self, lst):
result = []
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment