Keep some statistics about indexing: total number of bytes and words

indexed (where the bytes are counted before entry into the pipeline, and the words are counted after the pipeline is done). To get the numbers, use the _nbytes and _nwords instance variables directly.

Keep some statistics about indexing: total number of bytes and words
indexed (where the bytes are counted before entry into the pipeline, and the words are counted after the pipeline is done). To get the numbers, use the _nbytes and _nwords instance variables directly.
c470e7d5 · Guido van Rossum · 79b99fbb · c470e7d5
Commit c470e7d5 authored May 15, 2002 by Guido van Rossum
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

lib/python/Products/ZCTextIndex/Lexicon.py lib/python/Products/ZCTextIndex/Lexicon.py +7 -0

No files found.
--- a/lib/python/Products/ZCTextIndex/Lexicon.py
+++ b/lib/python/Products/ZCTextIndex/Lexicon.py
@@ -30,6 +30,10 @@ class Lexicon:
        self._nextwid = 1
        self._pipeline = pipeline

+        # Keep some statistics about indexing
+        self._nbytes = 0 # Number of bytes indexed (at start of pipeline)
+        self._nwords = 0 # Number of words indexed (after pipeline)
+
    def length(self):
        """Return the number of unique terms in the lexicon."""
        return self._nextwid - 1
@@ -45,8 +49,11 @@ class Lexicon:

    def sourceToWordIds(self, text):
        last = _text2list(text)
+        for t in last:
+            self._nbytes += len(t)
        for element in self._pipeline:
            last = element.process(last)
+        self._nwords += len(last)
        return map(self._getWordIdCreate, last)

    def termToWordIds(self, text):