Commit d17e1a73 authored by Rafael Monnerat's avatar Rafael Monnerat

Use Constants to improve code readability.

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk/utils@41664 20353a03-c40f-0410-a6d1-a30d3c3de9de
parent 1c31bc9e
...@@ -37,6 +37,23 @@ from cloudooo.interfaces.granulate import ITableGranulator, \ ...@@ -37,6 +37,23 @@ from cloudooo.interfaces.granulate import ITableGranulator, \
IImageGranulator, \ IImageGranulator, \
ITextGranulator ITextGranulator
# Odf Namespaces
TABLE_NAME_NAMESPACE = '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}name'
TEXT_STYLENAME_NAMESPACE = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
# XPath queries for ODF format
RELEVANT_PARAGRAPH_XPATH_QUERY = '//text:p[not(ancestor::draw:frame)]'
DRAW_XPATH_QUERY = './/draw:image'
TABLE_XPATH_QUERY = './/table:table'
IMAGE_TITLE_XPATH_QUERY = './/../../text() | .//../../*/text()'
def getTemplatePath(format):
""" Get the path of template file. This should goes to
some utils library.
"""
return path.join(path.dirname(__file__), 'template.%s' % format)
class OOGranulate(object): class OOGranulate(object):
"""Granulate an OpenOffice document into tables, images, chapters and """Granulate an OpenOffice document into tables, images, chapters and
...@@ -51,7 +68,7 @@ class OOGranulate(object): ...@@ -51,7 +68,7 @@ class OOGranulate(object):
"""Returns an odf document without content.xml """Returns an odf document without content.xml
It is a way to escape from this issue: http://bugs.python.org/issue6818""" It is a way to escape from this issue: http://bugs.python.org/issue6818"""
new_odf_document = ZipFile(StringIO(), 'a') new_odf_document = ZipFile(StringIO(), 'a')
template_path = path.join(path.dirname(__file__), 'template.%s' % format) template_path = getTemplatePath(format)
template_file = ZipFile(template_path) template_file = ZipFile(template_path)
for item in template_file.filelist: for item in template_file.filelist:
buffer = template_file.read(item.filename) buffer = template_file.read(item.filename)
...@@ -62,29 +79,28 @@ class OOGranulate(object): ...@@ -62,29 +79,28 @@ class OOGranulate(object):
def getTableItemList(self): def getTableItemList(self):
"""Returns the list of table IDs in the form of (id, title).""" """Returns the list of table IDs in the form of (id, title)."""
xml_table_list = self.document.parsed_content.xpath('.//table:table', xml_table_list = self.document.parsed_content.xpath(TABLE_XPATH_QUERY,
namespaces=self.document.parsed_content.nsmap) namespaces=self.document.parsed_content.nsmap)
name_key = '{urn:oasis:names:tc:opendocument:xmlns:table:1.0}name'
table_list = [] table_list = []
for table in xml_table_list: for table in xml_table_list:
title = ''.join(table.xpath('following-sibling::text:p[position()=1] \ title = ''.join(table.xpath('following-sibling::text:p[position()=1] \
[starts-with(@text:style-name, "Table")]//text()', [starts-with(@text:style-name, "Table")]//text()',
namespaces=table.nsmap)) namespaces=table.nsmap))
id = table.attrib[name_key] id = table.attrib[TABLE_NAME_NAMESPACE]
table_list.append((id, title)) table_list.append((id, title))
return table_list return table_list
def getTableItem(self, id, format='odt'): def getTableItem(self, id, format='odt'):
"""Returns the table into a new 'format' file.""" """Returns the table into a new 'format' file."""
try: try:
template_path = path.join(path.dirname(__file__), 'template.%s' % format) template_path = getTemplatePath(format)
template = ZipFile(template_path) template = ZipFile(template_path)
content_xml = etree.fromstring(template.read('content.xml')) content_xml = etree.fromstring(template.read('content.xml'))
template.close() template.close()
table_list = self.document.parsed_content.xpath( table_list = self.document.parsed_content.xpath(
'//table:table[@table:name="%s"]' % id, '//table:table[@table:name="%s"]' % id,
namespaces=self.document.parsed_content.nsmap) namespaces=self.document.parsed_content.nsmap)
if not table_list: if len(table_list) == 0:
return None return None
table = table_list[0] table = table_list[0]
# Next line do this <office:content><office:body><office:text><table:table> # Next line do this <office:content><office:body><office:text><table:table>
...@@ -115,14 +131,13 @@ class OOGranulate(object): ...@@ -115,14 +131,13 @@ class OOGranulate(object):
def getImageItemList(self): def getImageItemList(self):
"""Return a list of tuples with the id and title of image files""" """Return a list of tuples with the id and title of image files"""
xml_image_list = self.document.parsed_content.xpath('.//draw:image', xml_image_list = self.document.parsed_content.xpath(DRAW_XPATH_QUERY,
namespaces=self.document.parsed_content.nsmap) namespaces=self.document.parsed_content.nsmap)
image_list = [] image_list = []
for xml_image in xml_image_list: for xml_image in xml_image_list:
title_list = xml_image.xpath('.//../../text() | .//../../*/text()', title = ''.join(xml_image.xpath(IMAGE_TITLE_XPATH_QUERY,
namespaces=xml_image.nsmap) namespaces=xml_image.nsmap))
title = ''.join(title_list)
id = xml_image.values()[0].split('/')[-1] id = xml_image.values()[0].split('/')[-1]
image_list.append((id, title)) image_list.append((id, title))
return image_list return image_list
...@@ -135,14 +150,13 @@ class OOGranulate(object): ...@@ -135,14 +150,13 @@ class OOGranulate(object):
def getParagraphItemList(self): def getParagraphItemList(self):
"""Returns the list of paragraphs in the form of (id, class) where class """Returns the list of paragraphs in the form of (id, class) where class
may have special meaning to define TOC/TOI.""" may have special meaning to define TOC/TOI."""
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name'
relevant_paragraph_list = self.document.parsed_content.xpath( relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]', RELEVANT_PARAGRAPH_XPATH_QUERY,
namespaces=self.document.parsed_content.nsmap) namespaces=self.document.parsed_content.nsmap)
id = 0 id = 0
paragraph_list = [] paragraph_list = []
for p in relevant_paragraph_list: for p in relevant_paragraph_list:
paragraph_list.append((id, p.attrib[key])) paragraph_list.append((id, p.attrib[TEXT_STYLENAME_NAMESPACE]))
id += 1 id += 1
return paragraph_list return paragraph_list
...@@ -150,12 +164,11 @@ class OOGranulate(object): ...@@ -150,12 +164,11 @@ class OOGranulate(object):
"""Returns the paragraph in the form of (text, class).""" """Returns the paragraph in the form of (text, class)."""
try: try:
relevant_paragraph_list = self.document.parsed_content.xpath( relevant_paragraph_list = self.document.parsed_content.xpath(
'//text:p[not(ancestor::draw:frame)]', RELEVANT_PARAGRAPH_XPATH_QUERY,
namespaces=self.document.parsed_content.nsmap) namespaces=self.document.parsed_content.nsmap)
paragraph = relevant_paragraph_list[paragraph_id] paragraph = relevant_paragraph_list[paragraph_id]
text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap)) text = ''.join(paragraph.xpath('.//text()', namespaces=paragraph.nsmap))
key = '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name' p_class = paragraph.attrib[TEXT_STYLENAME_NAMESPACE]
p_class = paragraph.attrib[key]
return (text, p_class) return (text, p_class)
except IndexError: except IndexError:
return None return None
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment