Commit 2e279971 authored by Emmy Vouriot's avatar Emmy Vouriot Committed by Jérome Perrin

bytes to str when reading html or convert to text WIP

parent dc6298a8
......@@ -194,7 +194,7 @@ class PDFDocument(Image):
context=self, filename=filename,
mimetype=self.getContentType())
if result:
return result
return bytes2str(result)
else:
# Try to use OCR from ghostscript, but tolerate that the command might
# not be available.
......@@ -282,7 +282,7 @@ class PDFDocument(Image):
command = ['pdftohtml', '-enc', 'UTF-8', '-stdout',
'-noframes', '-i', tmp.name]
try:
command_result = Popen(command, stdout=PIPE).communicate()[0]
command_result = bytes2str(Popen(command, stdout=PIPE).communicate()[0])
except OSError as e:
if e.errno == errno.ENOENT:
raise ConversionError('pdftohtml was not found')
......@@ -291,10 +291,10 @@ class PDFDocument(Image):
finally:
tmp.close()
# Quick hack to remove bg color - XXX
h = command_result.replace(b'<BODY bgcolor="#A0A0A0"', b'<BODY ')
h = command_result.replace('<BODY bgcolor="#A0A0A0"', '<BODY ')
# Make links relative
h = h.replace(str2bytes('href="%s.html' % tmp.name.split(os.sep)[-1]),
b'href="asEntireHTML')
h = h.replace('href="%s.html' % tmp.name.split(os.sep)[-1],
'href="asEntireHTML')
return h
security.declarePrivate('_convertToDJVU')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment