Commit de321992 authored by Jérome Perrin's avatar Jérome Perrin

unoconverter: support UTF-8 encoded CSV

latin1 is still supported as a fallback when input csv does not decode
as utf8
parent 846f6c5c
...@@ -54,9 +54,6 @@ class Handler(object): ...@@ -54,9 +54,6 @@ class Handler(object):
def __init__(self, base_folder_url, data, source_format, **kw): def __init__(self, base_folder_url, data, source_format, **kw):
"""Creates document in file system and loads it in OOo.""" """Creates document in file system and loads it in OOo."""
self.document = FileSystemDocument(base_folder_url,
data,
source_format)
self.zip = kw.get('zip', False) self.zip = kw.get('zip', False)
self.uno_path = kw.get("uno_path", None) self.uno_path = kw.get("uno_path", None)
self.office_binary_path = kw.get("office_binary_path", None) self.office_binary_path = kw.get("office_binary_path", None)
...@@ -67,6 +64,22 @@ class Handler(object): ...@@ -67,6 +64,22 @@ class Handler(object):
self.uno_path = environ.get("uno_path") self.uno_path = environ.get("uno_path")
if not self.office_binary_path: if not self.office_binary_path:
self.office_binary_path = environ.get("office_binary_path") self.office_binary_path = environ.get("office_binary_path")
self._createDocument(base_folder_url, data, source_format)
def _createDocument(self, base_folder_url, data, source_format):
if source_format == 'csv':
# Cloudooo expect utf-8 encoded csv, but also tolerate latin9 for
# backward compatibility.
# The heuristic is "if it's not utf-8", let's assume it's iso-8859-15.
try:
unicode(data, 'utf-8')
except UnicodeDecodeError:
data = unicode(data, 'iso-8859-15').encode('utf-8')
logger.warn("csv data is not utf-8, assuming iso-8859-15")
self.document = FileSystemDocument(
base_folder_url,
data,
source_format)
def _getCommand(self, *args, **kw): def _getCommand(self, *args, **kw):
"""Transforms all parameters passed in a command""" """Transforms all parameters passed in a command"""
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
import sys import sys
import helper_util import helper_util
from os.path import dirname from os.path import dirname, splitext
from tempfile import mktemp from tempfile import mktemp
from base64 import decodestring, encodestring from base64 import decodestring, encodestring
from getopt import getopt, GetoptError from getopt import getopt, GetoptError
...@@ -144,6 +144,17 @@ class UnoConverter(object): ...@@ -144,6 +144,17 @@ class UnoConverter(object):
else: else:
return () return ()
def _getPropertyToImport(self, source_url):
"""Create the property for import filter, according to the extension of the file."""
_, extension = splitext(source_url)
if extension == '.csv':
# https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
return (
self._createProperty("FilterName", "Text - txt - csv (StarCalc)"),
self._createProperty("FilterOptions", "44,34,UTF-8"), )
return ()
def _load(self): def _load(self):
"""Create one document with basic properties """Create one document with basic properties
refresh argument tells to uno environment to refresh argument tells to uno environment to
...@@ -154,7 +165,11 @@ class UnoConverter(object): ...@@ -154,7 +165,11 @@ class UnoConverter(object):
self.office_binary_path) self.office_binary_path)
desktop = service_manager.createInstance("com.sun.star.frame.Desktop") desktop = service_manager.createInstance("com.sun.star.frame.Desktop")
uno_url = self.systemPathToFileUrl(self.document_url) uno_url = self.systemPathToFileUrl(self.document_url)
uno_document = desktop.loadComponentFromURL(uno_url, "_blank", 0, ()) uno_document = desktop.loadComponentFromURL(
uno_url,
"_blank",
0,
self._getPropertyToImport(self.document_url))
if not uno_document: if not uno_document:
raise AttributeError("This document can not be loaded or is empty") raise AttributeError("This document can not be loaded or is empty")
if self.refresh: if self.refresh:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment