Commit b96667eb authored by Casey Duncan's avatar Casey Duncan

Added management interface to query words in the lexicon and in the process...

Added management interface to query words in the lexicon and in the process uncovered a BTreeItems bug, whee!
parent 560ace9d
...@@ -200,8 +200,9 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem): ...@@ -200,8 +200,9 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
meta_type = 'ZCTextIndex Lexicon' meta_type = 'ZCTextIndex Lexicon'
manage_options = ({'label':'Overview', 'action':'manage_main'},) + \ manage_options = ({'label':'Overview', 'action':'manage_main'},
SimpleItem.manage_options {'label':'Query', 'action':'queryLexicon'},
) + SimpleItem.manage_options
def __init__(self, id, title='', *pipeline): def __init__(self, id, title='', *pipeline):
self.id = str(id) self.id = str(id)
...@@ -213,7 +214,50 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem): ...@@ -213,7 +214,50 @@ class PLexicon(Lexicon, Acquisition.Implicit, SimpleItem):
def getPipelineNames(self): def getPipelineNames(self):
"""Return list of names of pipeline element classes""" """Return list of names of pipeline element classes"""
return [element.__class__.__name__ for element in self._pipeline] return [element.__class__.__name__ for element in self._pipeline]
_queryLexicon = DTMLFile('dtml/queryLexicon', globals())
def queryLexicon(self, REQUEST, words=None, page=0, rows=20, cols=4):
"""Lexicon browser/query user interface
"""
if words:
wids = []
for word in words:
wids.extend(self.globToWordIds(word))
words = [self.get_word(wid) for wid in wids]
else:
words = self.words()
word_count = len(words)
rows = max(min(rows, 500),1)
cols = max(min(cols, 12), 1)
page_count = word_count / (rows * cols) + \
(word_count % (rows * cols) > 0)
page = max(min(page, page_count - 1), 0)
start = rows * cols * page
end = min(rows * cols * (page + 1), word_count)
if word_count:
words = list(words[start:end])
else:
words = []
columns = []
i = 0
while i < len(words):
columns.append(words[i:i + rows])
i += rows
return self._queryLexicon(self, REQUEST,
page=page,
rows=rows,
cols=cols,
start_word=start+1,
end_word=end,
word_count=word_count,
page_count=page_count,
page_columns=columns)
manage_main = DTMLFile('dtml/manageLexicon', globals()) manage_main = DTMLFile('dtml/manageLexicon', globals())
InitializeClass(PLexicon) InitializeClass(PLexicon)
...@@ -10,12 +10,12 @@ ...@@ -10,12 +10,12 @@
<span class="form-label">Input Pipeline Stages</span> <span class="form-label">Input Pipeline Stages</span>
</p> </p>
<p> <p class="form-help">
Text indexed through this lexicon is processed by the following pipeline Text indexed through this lexicon is processed by the following pipeline
stages stages
</p> </p>
<ol> <ol class="form-help">
<dtml-in name="getPipelineNames"> <dtml-in name="getPipelineNames">
<li>&dtml-sequence-item;</li> <li>&dtml-sequence-item;</li>
</dtml-in> </dtml-in>
......
...@@ -2,8 +2,12 @@ ...@@ -2,8 +2,12 @@
<dtml-var manage_tabs> <dtml-var manage_tabs>
<p class="form-help"> <p class="form-help">
The ZCTextIndex Lexicon in use by this index is:
There is nothing to manage here. Move along. <em><dtml-var expr="lexicon.getId()"></em>
</p>
<p class="form-help">
<em>Note:</em> You cannot change the lexicon assigned to a ZCTextIndex.
To use another lexicon, delete this index and create a new one that
uses the desired lexicon.
</p> </p>
<dtml-var manage_page_footer> <dtml-var manage_page_footer>
<dtml-var manage_page_header>
<dtml-var manage_tabs>
<p class="form-help">
Browse the words in the lexicon or enter the word(s) you are interested in
below. Globbing characters (*, ?) are supported
</p>
<dtml-let words_str="' '.join(REQUEST.get('words',[]))">
<form action="&dtml-URL;">
<p class="form-element">
<span class="form-label">Word(s)</span>
<input name="words:tokens" size="20" value="&dtml-words_str;" />
<input type="submit" value="Query" />
<span class="form-label">&nbsp;Output Columns:</span>
<input name="cols:int" size="2" value="&dtml-cols;" />
<span class="form-label">&nbsp;Rows:</span>
<input name="rows:int" size="2" value="&dtml-rows;" />
</p>
</form>
<hr />
<form action="&dtml-URL;">
<table width="100%" cellpadding="2" cellspacing="0" border="0">
<tr class="section-bar">
<td><span class="form-label">
&dtml-word_count; Words Found<dtml-if word_count>,
Displaying &dtml-start_word;-&dtml-end_word;
</dtml-if>
<dtml-if expr="page_count > 0">
</span></td>
<td align="right"><span class="form-label">
Page:
<select name="page:int" onchange="this.form.submit()">
<dtml-in expr="_.range(page_count)" prefix="page">
<option value="&dtml-page_item;"
<dtml-if expr="page == page_item">
selected
</dtml-if>
>
<dtml-var expr="page_item+1">
</option>
</dtml-in>
</select>
of &dtml-page_count;
<input type="submit" value="Go" />
<input type="hidden" name="cols:int" value="&dtml-cols;" />
<input type="hidden" name="rows:int" value="&dtml-rows;" />
<input type="hidden" name="words:tokens" value="&dtml-words_str;" />
</dtml-if>
</span></td>
</tr>
</table>
</form>
</dtml-let>
<dtml-if name="page_columns">
<table width="100%" cellpadding="0" cellspacing="10" border="0">
<tr>
<dtml-in name="page_columns" prefix="column">
<td align="left" valign="top">
<dtml-var expr="'<br />'.join(column_item)">
</td>
</dtml-in>
</tr>
</table>
</dtml-if>
<dtml-var manage_page_footer
...@@ -25,14 +25,13 @@ ZCTextIndex Lexicon - Add: Create a new ZCTextIndex Lexicon ...@@ -25,14 +25,13 @@ ZCTextIndex Lexicon - Add: Create a new ZCTextIndex Lexicon
tags. The HTML aware splitter gives best results when all of tags. The HTML aware splitter gives best results when all of
the incoming content to index is HTML. the incoming content to index is HTML.
- **Stop Words** To conserve space in the vocabulary, and possibly increase - **Stop Words** To conserve space in the vocabulary, and possibly
performance, you can select a stop word remover which subtracts increase performance, you can select a stop word remover which
very common or single letter words from the Lexicon. Bear in subtracts very common or single letter words from the Lexicon.
mind that you will not be able to search on removed stop words, Bear in mind that you will not be able to search on removed stop
and they will also be removed from queries passed to search words, and they will also be removed from queries passed to
ZCTextIndexes using the Lexicon. search ZCTextIndexes using the Lexicon.
- **Case Normalizer** The case normalizer removes case information from the words in - **Case Normalizer** The case normalizer removes case information
the Lexicon. If case-sensitive searching is desires, then omit from the words in the Lexicon. If case-sensitive searching is
this element from the pipeline. desires, then omit this element from the pipeline.
S
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment