Commit 8644a31a authored by Bartek Górny's avatar Bartek Górny

Support for document-type attributes, incl. SearchableText; automatic...

Support for document-type attributes, incl. SearchableText; automatic extraction of plain text content for searching for Text type

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@9006 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 9841bf3e
...@@ -100,6 +100,7 @@ class OOoDocument(XMLObject,File): ...@@ -100,6 +100,7 @@ class OOoDocument(XMLObject,File):
, PropertySheet.DublinCore , PropertySheet.DublinCore
, PropertySheet.Version , PropertySheet.Version
, PropertySheet.Reference , PropertySheet.Reference
, PropertySheet.Document
, PropertySheet.OOoDocument , PropertySheet.OOoDocument
) )
...@@ -117,6 +118,21 @@ class OOoDocument(XMLObject,File): ...@@ -117,6 +118,21 @@ class OOoDocument(XMLObject,File):
#File.__init__(self,*args,**kwargs) #File.__init__(self,*args,**kwargs)
#self.__dav_collection__=0 #self.__dav_collection__=0
### Content indexing methods
security.declareProtected(Permissions.View, 'getSearchableText')
def getSearchableText(self, md=None):
"""\
Used by the catalog for basic full text indexing
And so we end up with a strange hybrid of File and Document
"""
searchable_attrs=('title','description','id','text_content','reference','version',
'short_title','keywords','subject','original_filename','source_project_title')
searchable_text = ' '.join(map(lambda x: self.getProperty(x) or ' ',searchable_attrs))
return searchable_text
SearchableText=getSearchableText
security.declareProtected(Permissions.ModifyPortalContent,'clearCache') security.declareProtected(Permissions.ModifyPortalContent,'clearCache')
def clearCache(self): def clearCache(self):
""" """
...@@ -235,8 +251,12 @@ class OOoDocument(XMLObject,File): ...@@ -235,8 +251,12 @@ class OOoDocument(XMLObject,File):
self.log('_convert',enc(self._unpackData(self.data))[:500]) self.log('_convert',enc(self._unpackData(self.data))[:500])
meta,oo_data=sp.run_convert(self.getOriginalFilename(),enc(self._unpackData(self.data))) meta,oo_data=sp.run_convert(self.getOriginalFilename(),enc(self._unpackData(self.data)))
self.oo_data=Pdata(dec(oo_data)) self.oo_data=Pdata(dec(oo_data))
# now we get text content (for now, only for Text type)
# converting spreadsheet and presentations into plain text is less trivial
if self.getPortalType()=='Text':
nic,text_data=sp.run_generate(self.getOriginalFilename(),enc(self._unpackData(self.oo_data)),'txt')
self.setTextContent(dec(text_data))
self._setMetaData(meta) self._setMetaData(meta)
#self.refreshAllowedTargets()
security.declarePrivate('_setMetaData') security.declarePrivate('_setMetaData')
def _setMetaData(self,meta): def _setMetaData(self,meta):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment