Commit 8306b381 authored by Julien Muchembled's avatar Julien Muchembled

Revert "handler.pdf: use pyPdf in setMetada"

This reverts commit 0ff799eb.

For old setups without pyPdf that want the latest changes
by simply updating their local working copies.
parent a09d87af
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
# See https://www.nexedi.com/licensing for rationale and options. # See https://www.nexedi.com/licensing for rationale and options.
# #
############################################################################## ##############################################################################
import io
from zope.interface import implements from zope.interface import implements
from cloudooo.interfaces.handler import IHandler from cloudooo.interfaces.handler import IHandler
...@@ -36,8 +35,6 @@ from cloudooo.util import logger, parseContentType ...@@ -36,8 +35,6 @@ from cloudooo.util import logger, parseContentType
from subprocess import Popen, PIPE from subprocess import Popen, PIPE
from tempfile import mktemp from tempfile import mktemp
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import NameObject, createStringObject
class Handler(object): class Handler(object):
"""PDF Handler is used to handler inputed pdf document.""" """PDF Handler is used to handler inputed pdf document."""
...@@ -52,7 +49,6 @@ class Handler(object): ...@@ -52,7 +49,6 @@ class Handler(object):
def convert(self, destination_format=None, **kw): def convert(self, destination_format=None, **kw):
""" Convert a pdf document """ """ Convert a pdf document """
# TODO: use pyPdf
logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format)) logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format))
output_url = mktemp(suffix=".%s" % destination_format, output_url = mktemp(suffix=".%s" % destination_format,
dir=self.document.directory_name) dir=self.document.directory_name)
...@@ -72,7 +68,6 @@ class Handler(object): ...@@ -72,7 +68,6 @@ class Handler(object):
"""Returns a dictionary with all metadata of document. """Returns a dictionary with all metadata of document.
along with the metadata. along with the metadata.
""" """
# TODO: use pyPdf and not use lower()
command = ["pdfinfo", self.document.getUrl()] command = ["pdfinfo", self.document.getUrl()]
stdout, stderr = Popen(command, stdout, stderr = Popen(command,
stdout=PIPE, stdout=PIPE,
...@@ -82,10 +77,13 @@ class Handler(object): ...@@ -82,10 +77,13 @@ class Handler(object):
info_list = filter(None, stdout.split("\n")) info_list = filter(None, stdout.split("\n"))
metadata = {} metadata = {}
for info in iter(info_list): for info in iter(info_list):
info = info.split(":") if info.count(":") == 1:
info_name = info[0].lower() info_name, info_value = info.split(":")
info_value = ":".join(info[1:]).strip() else:
metadata[info_name] = info_value info_name, info_value = info.split(" ")
info_name = info_name.replace(":", "")
info_value = info_value.strip()
metadata[info_name.lower()] = info_value
self.document.trash() self.document.trash()
return metadata return metadata
...@@ -94,27 +92,31 @@ class Handler(object): ...@@ -94,27 +92,31 @@ class Handler(object):
Keyword arguments: Keyword arguments:
metadata -- expected an dictionary with metadata. metadata -- expected an dictionary with metadata.
""" """
# TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate text_template = "InfoKey: %s\nInfoValue: %s\n"
input_pdf = PdfFileReader(open(self.document.getUrl(), "rb")) text_list = [text_template % (key.capitalize(), value) \
output_pdf = PdfFileWriter() for key, value in metadata.iteritems()]
metadata_file = File(self.document.directory_name,
modification_date = metadata.pop("ModificationDate", None) "".join(text_list),
if modification_date: "txt")
metadata['ModDate'] = modification_date output_url = mktemp(suffix=".pdf",
if type(metadata.get('Keywords', None)) is list: dir=self.document.directory_name)
metadata['Keywords'] = metadata['Keywords'].join(' ') command = ["pdftk",
args = {} self.document.getUrl(),
for key, value in list(metadata.items()): "update_info",
args[NameObject('/' + key.capitalize())] = createStringObject(value) metadata_file.getUrl(),
"output",
output_pdf._info.getObject().update(args) output_url
]
for page_num in range(input_pdf.getNumPages()): stdout, stderr = Popen(command,
output_pdf.addPage(input_pdf.getPage(page_num)) stdout=PIPE,
stderr=PIPE,
output_stream = io.BytesIO() close_fds=True,
output_pdf.write(output_stream) env=self.environment).communicate()
return output_stream.getvalue() self.document.reload(output_url)
try:
return self.document.getContent()
finally:
self.document.trash()
@staticmethod @staticmethod
def getAllowedConversionFormatList(source_mimetype): def getAllowedConversionFormatList(source_mimetype):
......
...@@ -12,7 +12,6 @@ install_requires = [ ...@@ -12,7 +12,6 @@ install_requires = [
'zope.interface', 'zope.interface',
'PasteDeploy', 'PasteDeploy',
'PasteScript[WSGIUtils]', 'PasteScript[WSGIUtils]',
'pyPdf',
'psutil>=3.0.0', 'psutil>=3.0.0',
'lxml', 'lxml',
'python-magic', 'python-magic',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment