Commit 78d15e09 authored by Jérome Perrin's avatar Jérome Perrin

Fix mimetypes usage in handlers

cloudooo includes its own mimes.types, so that it does not depend on
system configuration and behave the same regardless of the underlying
system, but the embedded mime.types have to be loaded explicitly by
calling utils.loadMimetypeList() by each python process before using
mimetypes module.

This was not done for ooo and x2t handlers, so in practice they were
depending on the system mime.types and the tests were written to expect
the mimetypes from debian 10, but with debian 11 some mimetypes became
different ( for example .bmp extension was guessed as image/x-ms-bmp [1]
on debian 10 and image/bmp on debian 11 [2]),

Theses changes:
 - include mime.types from debian 10 [3], but keeping the extra mimetypes
   for  only-office documents (docy, ppty and xlsy) that were added in
   0bb5fbdc (x2t: add handler, 2016-09-22)
 - change the handlers to call utils.loadMimetypeList(), it was only
   strictly necessary for ooo handler, but do it as well in x2t for
   consistency.
 - adjust some tests in testX2tHandler, because now that we have
   loaded mimetypes database the mimetype of xlsy is returned in
   metadata, so we have "application/x-asc-spreadsheet" and not just "xlsy"
 - add a few more test for xlsy and docy, because the magic based test
   just verify that they are zip files, these new tests also make a few
   assertions on the content of the zip files

[1]: https://salsa.debian.org/debian/mime-support/-/blob/debian/3.62/mime.types#L677
[2]: https://salsa.debian.org/debian/media-types/-/blob/4.0.0/mime.types#L1804
[3]: https://salsa.debian.org/debian/mime-support/-/blob/debian/3.62/mime.types
parent f0c555b5
Pipeline #21447 failed with stage
in 0 seconds
......@@ -41,7 +41,7 @@ from cloudooo.handler.ooo.mimemapper import mimemapper
from cloudooo.handler.ooo.document import FileSystemDocument
from cloudooo.handler.ooo.monitor.timeout import MonitorTimeout
from cloudooo.handler.ooo.monitor import monitor_sleeping_time
from cloudooo.util import logger, parseContentType
from cloudooo.util import logger, parseContentType, loadMimetypeList
from psutil import pid_exists
......@@ -66,6 +66,7 @@ class Handler(object):
if not self.office_binary_path:
self.office_binary_path = environ.get("office_binary_path")
self._createDocument(base_folder_url, data, source_format)
loadMimetypeList()
def _createDocument(self, base_folder_url, data, source_format):
if source_format == 'csv':
......
......@@ -38,7 +38,7 @@ from zope.interface import implements
from cloudooo.interfaces.handler import IHandler
from cloudooo.file import File
from cloudooo.util import logger, unzip, parseContentType
from cloudooo.util import logger, unzip, parseContentType, loadMimetypeList
from cloudooo.handler.ooo.handler import Handler as OOoHandler
from zipfile import ZipFile
......@@ -179,6 +179,7 @@ class Handler(object):
self._init_kw = kw
self.file = File(base_folder_url, data, source_format)
self.environment = kw.get("env", {})
loadMimetypeList()
def convert(self, destination_format=None, **kw):
""" Convert the inputed file to output as format that were informed """
......
......@@ -101,7 +101,7 @@ class TestHandler(HandlerTestCase):
self.assertEquals(handler.getMetadata(), {
u'CreationDate': u'31/01/2018 21:09:10',
u'Keywords': [u'\u0442\u0435\u0441\u0442', u'\u0441\u0430\u0431\u0436\u0435\u043a\u0442'],
'MIMEType': 'xlsy',
'MIMEType': 'application/x-asc-spreadsheet',
u'ModificationDate': u'31/01/2018 21:22:36',
u'PrintDate': u'00/00/0000 00:00:00',
u'Subject': u'\u0432\u044b\u043a\u043b\u044e\u0447\u0438 \u0442\u0435\u043b\u0435\u0432\u0438\u0437\u043e\u0440',
......@@ -117,7 +117,7 @@ class TestHandler(HandlerTestCase):
"Keywords": "test keywords",
})
handler = Handler(self.tmp_url, new_mime_data, "xlsy", **self.kw)
self.assertEquals(handler.getMetadata(), {u'Keywords': u'test keywords', 'MIMEType': 'xlsy', u'Title': u'test title', u'Subject': u'test subject'})
self.assertEquals(handler.getMetadata(), {u'Keywords': u'test keywords', 'MIMEType': 'application/x-asc-spreadsheet', u'Title': u'test title', u'Subject': u'test subject'})
def testGetAllowedConversionFormatList(self):
"""Test all combination of mimetype
......
......@@ -26,7 +26,11 @@
# See https://www.nexedi.com/licensing for rationale and options.
#
##############################################################################
import io
import zipfile
from base64 import decodestring, encodestring
from os.path import join
from cloudooo.tests.cloudoooTestCase import TestCase
......@@ -35,6 +39,8 @@ class TestServer(TestCase):
def ConversionScenarioList(self):
return [
# magic recognize xlsy and docy files as zip files, so the
# expected mime is application/zip
(join('data', 'test.xlsx'), "xlsx", "xlsy", "application/zip"),
(join('data', 'test.xlsy'), "xlsy", "xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
(join('data', 'test_with_image.docx'), "docx", "docy", "application/zip"),
......@@ -53,3 +59,31 @@ class TestServer(TestCase):
(open(join('data', 'test.xlsx')).read(), 'xlsx', 'xyz'),
]
def test_xlsx_to_xlsy(self):
with open(join('data', 'test.xlsx')) as f:
xlsx_data = f.read()
xlsy_data = self.proxy.convertFile(
encodestring(xlsx_data),
'xlsx',
'xlsy',
False
)
self.assertEqual(
sorted(zipfile.ZipFile(io.BytesIO(decodestring(xlsy_data))).namelist()),
sorted(['Editor.xlsx', 'body.txt', 'metadata.json'])
)
def test_docx_to_docy(self):
with open(join('data', 'test_with_image.docx')) as f:
docx_data = f.read()
docy_data = self.proxy.convertFile(
encodestring(docx_data),
'docx',
'docy',
False
)
self.assertEqual(
sorted(zipfile.ZipFile(io.BytesIO(decodestring(docy_data))).namelist()),
sorted(['body.txt', 'media/image1.png', 'metadata.json'])
)
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment