From cee0b53606fc9a86c7a82adc475f57eb3fef361b Mon Sep 17 00:00:00 2001
From: Nicolas Delaby <nicolas@nexedi.com>
Date: Tue, 30 Nov 2010 13:47:51 +0000
Subject: [PATCH] Use subprocesstransform for better transformation handling.
 changes in argument:  '-enc' use UTF-8 by defaults  '-layout' Maintain (as
 best as possible) the original physical layout of the text.  '-nopgbrk' Don't
 insert page breaks

git-svn-id: https://svn.erp5.org/repos/public/erp5/trunk@40925 20353a03-c40f-0410-a6d1-a30d3c3de9de
---
 product/PortalTransforms/transforms/pdf_to_text.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/product/PortalTransforms/transforms/pdf_to_text.py b/product/PortalTransforms/transforms/pdf_to_text.py
index da5ce4d45a..fd38c596cc 100644
--- a/product/PortalTransforms/transforms/pdf_to_text.py
+++ b/product/PortalTransforms/transforms/pdf_to_text.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """
 Uses the xpdf (www.foolabs.com/xpdf)
 """
@@ -6,10 +7,11 @@ from Products.PortalTransforms.interfaces import itransform
 from Products.PortalTransforms.libtransforms.utils import bin_search, sansext
 from Products.PortalTransforms.libtransforms.commandtransform import commandtransform
 from Products.PortalTransforms.libtransforms.commandtransform import popentransform
+from Products.PortalTransforms.libtransforms.commandtransform import subprocesstransform
 import os
 from zope.interface import implements
 
-class pdf_to_text(popentransform):
+class pdf_to_text(subprocesstransform):
     implements(itransform)
 
     __name__ = "pdf_to_text"
@@ -20,8 +22,8 @@ class pdf_to_text(popentransform):
     __version__ = '2004-07-02.01'
 
     binaryName = "pdftotext"
-    binaryArgs = "%(infile)s -enc UTF-8 -"
-    useStdin = False
+    binaryArgs = "-layout -nopgbrk %(infile)s -"
+    useStdin = True
 
 class old_pdf_to_text(commandtransform):
     implements(itransform)
-- 
GitLab