diff --git a/product/PortalTransforms/TransformEngine.py b/product/PortalTransforms/TransformEngine.py index 35bb383397c22e524f2c821fd9f7719b4bfba697..731a0091c55dc69a079d4ca96c18406d75d53d74 100644 --- a/product/PortalTransforms/TransformEngine.py +++ b/product/PortalTransforms/TransformEngine.py @@ -163,7 +163,8 @@ class TransformTool(UniqueObject, ActionProviderBase, Folder): return data ## get a path to output mime type - requirements = self._policies.get(str(target_mt), []) + requirements = self.getRequirementListByMimetype(str(orig_mt), + str(target_mt)) path = self._findPath(orig_mt, target_mt, list(requirements)) if not path and requirements: log('Unable to satisfy requirements %s' % ', '.join(requirements), @@ -195,6 +196,28 @@ class TransformTool(UniqueObject, ActionProviderBase, Folder): # return idatastream object return result + def getRequirementListByMimetype(self, origin_mimetype, target_mimetype): + """Return requirements only if origin_mimetype + and target_mimetype match transform policy + + As an example pdf => text conversion force a transformation + to intermediate HTML format, just because html_to_text is a requirement. + But we want using pdf_to_text directly. + + So requirements are returned only if + origin_mimetype and target_mimetype sastify + the requirement: ie html_to_text is returned + only if origin_mimetype == 'text/html' and + target_mimetype == 'text/plain' + """ + result_list = [] + candidate_requirement_list = self._policies.get(target_mimetype, []) + for candidate_requirement in candidate_requirement_list: + transform = getattr(self, candidate_requirement) + if origin_mimetype in transform.inputs: + result_list.append(candidate_requirement) + return result_list + security.declarePublic('convertToData') def convertToData(self, target_mimetype, orig, data=None, object=None, usedby=None, context=None, **kwargs):