Commit 00353b7c authored by Boris Kocherov's avatar Boris Kocherov

handler.pdf: use pyPdf in setMetada

parent ee6a09ee
......@@ -25,6 +25,7 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import io
from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
......@@ -33,6 +34,8 @@ from cloudooo.util import logger, parseContentType
from subprocess import Popen, PIPE
from tempfile import mktemp
from pyPdf import PdfFileWriter, PdfFileReader
from pyPdf.generic import NameObject, createStringObject
class Handler(object):
"""PDF Handler is used to handler inputed pdf document."""
......@@ -47,6 +50,7 @@ class Handler(object):
def convert(self, destination_format=None, **kw):
""" Convert a pdf document """
# TODO: use pyPdf
logger.debug("PDFConvert: %s > %s" % (self.document.source_format, destination_format))
output_url = mktemp(suffix=".%s" % destination_format,
dir=self.document.directory_name)
......@@ -66,6 +70,7 @@ class Handler(object):
"""Returns a dictionary with all metadata of document.
along with the metadata.
"""
# TODO: use pyPdf and not use lower()
command = ["pdfinfo", self.document.getUrl()]
stdout, stderr = Popen(command,
stdout=PIPE,
......@@ -75,13 +80,10 @@ class Handler(object):
info_list = filter(None, stdout.split("\n"))
metadata = {}
for info in iter(info_list):
if info.count(":") == 1:
info_name, info_value = info.split(":")
else:
info_name, info_value = info.split(" ")
info_name = info_name.replace(":", "")
info_value = info_value.strip()
metadata[info_name.lower()] = info_value
info = info.split(":")
info_name = info[0].lower()
info_value = ":".join(info[1:]).strip()
metadata[info_name] = info_value
self.document.trash()
return metadata
......@@ -90,31 +92,27 @@ class Handler(object):
Keyword arguments:
metadata -- expected an dictionary with metadata.
"""
text_template = "InfoKey: %s\nInfoValue: %s\n"
text_list = [text_template % (key.capitalize(), value) \
for key, value in metadata.iteritems()]
metadata_file = File(self.document.directory_name,
"".join(text_list),
"txt")
output_url = mktemp(suffix=".pdf",
dir=self.document.directory_name)
command = ["pdftk",
self.document.getUrl(),
"update_info",
metadata_file.getUrl(),
"output",
output_url
]
stdout, stderr = Popen(command,
stdout=PIPE,
stderr=PIPE,
close_fds=True,
env=self.environment).communicate()
self.document.reload(output_url)
try:
return self.document.getContent()
finally:
self.document.trash()
# TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
output_pdf = PdfFileWriter()
modification_date = metadata.pop("ModificationDate", None)
if modification_date:
metadata['ModDate'] = modification_date
if type(metadata.get('Keywords', None)) is list:
metadata['Keywords'] = metadata['Keywords'].join(' ')
args = {}
for key, value in list(metadata.items()):
args[NameObject('/' + key.capitalize())] = createStringObject(value)
output_pdf._info.getObject().update(args)
for page_num in range(input_pdf.getNumPages()):
output_pdf.addPage(input_pdf.getPage(page_num))
output_stream = io.BytesIO()
output_pdf.write(output_stream)
return output_stream.getvalue()
@staticmethod
def getAllowedConversionFormatList(source_mimetype):
......
from setuptools import setup, find_packages
import sys
version = '1.2.5-dev'
version = '1.2.6-dev'
def read(name):
return open(name).read()
......@@ -13,6 +13,7 @@ install_requires = [
'zope.interface',
'PasteDeploy',
'PasteScript',
'pyPdf',
'WSGIUtils',
'psutil>=3.0.0',
'lxml',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment