From cee0b53606fc9a86c7a82adc475f57eb3fef361b Mon Sep 17 00:00:00 2001 From: Nicolas Delaby <nicolas@nexedi.com> Date: Tue, 30 Nov 2010 13:47:51 +0000 Subject: [PATCH] Use subprocesstransform for better transformation handling. changes in argument: '-enc' use UTF-8 by defaults '-layout' Maintain (as best as possible) the original physical layout of the text. '-nopgbrk' Don't insert page breaks git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40925 20353a03-c40f-0410-a6d1-a30d3c3de9de --- product/PortalTransforms/transforms/pdf_to_text.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/product/PortalTransforms/transforms/pdf_to_text.py b/product/PortalTransforms/transforms/pdf_to_text.py index da5ce4d45a..fd38c596cc 100644 --- a/product/PortalTransforms/transforms/pdf_to_text.py +++ b/product/PortalTransforms/transforms/pdf_to_text.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Uses the xpdf (www.foolabs.com/xpdf) """ @@ -6,10 +7,11 @@ from Products.PortalTransforms.interfaces import itransform from Products.PortalTransforms.libtransforms.utils import bin_search, sansext from Products.PortalTransforms.libtransforms.commandtransform import commandtransform from Products.PortalTransforms.libtransforms.commandtransform import popentransform +from Products.PortalTransforms.libtransforms.commandtransform import subprocesstransform import os from zope.interface import implements -class pdf_to_text(popentransform): +class pdf_to_text(subprocesstransform): implements(itransform) __name__ = "pdf_to_text" @@ -20,8 +22,8 @@ class pdf_to_text(popentransform): __version__ = '2004-07-02.01' binaryName = "pdftotext" - binaryArgs = "%(infile)s -enc UTF-8 -" - useStdin = False + binaryArgs = "-layout -nopgbrk %(infile)s -" + useStdin = True class old_pdf_to_text(commandtransform): implements(itransform) -- GitLab