Commit de321992 authored by Jérome Perrin's avatar Jérome Perrin

unoconverter: support UTF-8 encoded CSV

latin1 is still supported as a fallback when input csv does not decode
as utf8
parent 846f6c5c
......@@ -54,9 +54,6 @@ class Handler(object):
def __init__(self, base_folder_url, data, source_format, **kw):
"""Creates document in file system and loads it in OOo."""
self.document = FileSystemDocument(base_folder_url,
data,
source_format)
self.zip = kw.get('zip', False)
self.uno_path = kw.get("uno_path", None)
self.office_binary_path = kw.get("office_binary_path", None)
......@@ -67,6 +64,22 @@ class Handler(object):
self.uno_path = environ.get("uno_path")
if not self.office_binary_path:
self.office_binary_path = environ.get("office_binary_path")
self._createDocument(base_folder_url, data, source_format)
def _createDocument(self, base_folder_url, data, source_format):
if source_format == 'csv':
# Cloudooo expect utf-8 encoded csv, but also tolerate latin9 for
# backward compatibility.
# The heuristic is "if it's not utf-8", let's assume it's iso-8859-15.
try:
unicode(data, 'utf-8')
except UnicodeDecodeError:
data = unicode(data, 'iso-8859-15').encode('utf-8')
logger.warn("csv data is not utf-8, assuming iso-8859-15")
self.document = FileSystemDocument(
base_folder_url,
data,
source_format)
def _getCommand(self, *args, **kw):
"""Transforms all parameters passed in a command"""
......
......@@ -29,7 +29,7 @@
import sys
import helper_util
from os.path import dirname
from os.path import dirname, splitext
from tempfile import mktemp
from base64 import decodestring, encodestring
from getopt import getopt, GetoptError
......@@ -144,6 +144,17 @@ class UnoConverter(object):
else:
return ()
def _getPropertyToImport(self, source_url):
"""Create the property for import filter, according to the extension of the file."""
_, extension = splitext(source_url)
if extension == '.csv':
# https://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options
return (
self._createProperty("FilterName", "Text - txt - csv (StarCalc)"),
self._createProperty("FilterOptions", "44,34,UTF-8"), )
return ()
def _load(self):
"""Create one document with basic properties
refresh argument tells to uno environment to
......@@ -154,7 +165,11 @@ class UnoConverter(object):
self.office_binary_path)
desktop = service_manager.createInstance("com.sun.star.frame.Desktop")
uno_url = self.systemPathToFileUrl(self.document_url)
uno_document = desktop.loadComponentFromURL(uno_url, "_blank", 0, ())
uno_document = desktop.loadComponentFromURL(
uno_url,
"_blank",
0,
self._getPropertyToImport(self.document_url))
if not uno_document:
raise AttributeError("This document can not be loaded or is empty")
if self.refresh:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment