Commit 51408f04 authored by Leonardo Rochael Almeida's avatar Leonardo Rochael Almeida Committed by Sebastien Robin

Compatibility with pdftohtml/poppler 0.18+

In poppler 0.18+, pdftohtml generates " " instead of " ", so
make sure both are accepted.
parent 3cefffd9
......@@ -71,6 +71,7 @@ from threading import Thread
import httplib
import urllib
import difflib
import re
from AccessControl import Unauthorized
from Products.ERP5Type import Permissions
from Products.ERP5Type.tests.backportUnittest import expectedFailure
......@@ -1238,14 +1239,18 @@ class TestDocument(TestDocumentMixin):
# XXX: search limited to a certain date range
# XXX: search mode
#   and   are equivalent, and "pdftohtml" can generate
# either depending on the version of the "poppler" package used.
re_html_nbsp = re.compile('&(nbsp|#160);')
def test_PDFTextContent(self):
upload_file = makeFileUpload('REF-en-001.pdf')
document = self.portal.portal_contributions.newContent(file=upload_file)
self.assertEquals('PDF', document.getPortalType())
self.assertEquals('I use reference to look up TEST\n',
document._convertToText())
self.assert_('I use reference to look up TEST' in
document._convertToHTML().replace(' ', ' '))
html_data = re.sub(self.re_html_nbsp, ' ', document._convertToHTML())
self.assert_('I use reference to look up TEST' in html_data)
self.assert_('I use reference to look up TEST' in
document.SearchableText())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment