Commit a5349f5d authored by Kirill Smelkov's avatar Kirill Smelkov

*.py: Open files in binary mode and decode to UTF-8 explicitly if needed

On Windows in text mode files are opened with encoding=locale.getdefaultlocale()
which is CP125X instead of UTF-8 even if $PYTHONIOENCODING=UTF-8. This
way e.g. test_strings_print fail as:

    E           Failed: not equal:
    E           Expected:
    E               print(qq(b)): "привет αβγ b"
    E               print(qq(u)): "привет αβγ u"
    E           Got:
    E               print(qq(b)): "привет αβγ b"
    E               print(qq(u)): "привет αβγ u"

where "Expected" was read from golang/testprog/golang_test_str.txt and
decoded wrongly.

-> Fix it by always opening files for reading in binary mode and
utf8-decoding manually, if needed, everywhere.
parent 8d723b34
...@@ -1889,8 +1889,10 @@ def test_fmtargspec(): ...@@ -1889,8 +1889,10 @@ def test_fmtargspec():
# readfile returns content of file @path. # readfile returns content of file @path.
def readfile(path): def readfile(path): # -> bytes
with open(path, "r") as f: # on windows in text mode files are opened with encoding=locale.getdefaultlocale()
# which is CP125X instead of UTF-8. -> manually decode as 'UTF-8'
with open(path, "rb") as f:
return f.read() return f.read()
# abbrev_home returns path with user home prefix abbreviated with ~. # abbrev_home returns path with user home prefix abbreviated with ~.
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2018-2021 Nexedi SA and Contributors. # Copyright (C) 2018-2023 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# #
# This program is free software: you can Use, Study, Modify and Redistribute # This program is free software: you can Use, Study, Modify and Redistribute
...@@ -439,13 +439,13 @@ def main(): ...@@ -439,13 +439,13 @@ def main():
# _is_buildout_script returns whether file @path is generated as python buildout script. # _is_buildout_script returns whether file @path is generated as python buildout script.
def _is_buildout_script(path): def _is_buildout_script(path):
with open(path, 'r') as f: with open(path, 'rb') as f:
src = f.read() src = f.read()
# buildout injects the following prologues into python scripts: # buildout injects the following prologues into python scripts:
# sys.path[0:0] = [ # sys.path[0:0] = [
# ... # ...
# ] # ]
return ('\nsys.path[0:0] = [\n' in src) return (b'\nsys.path[0:0] = [\n' in src)
# _IGetOpt provides getopt-style incremental options parsing. # _IGetOpt provides getopt-style incremental options parsing.
......
...@@ -26,9 +26,12 @@ from os.path import dirname, join ...@@ -26,9 +26,12 @@ from os.path import dirname, join
import sys, os, re import sys, os, re
# read file content # read file content
def readfile(path): def readfile(path): # -> str
with open(path, 'r') as f: with open(path, 'rb') as f:
return f.read() data = f.read()
if not isinstance(data, str): # py3
data = data.decode('utf-8')
return data
# reuse golang.pyx.build to build pygolang dso and extensions. # reuse golang.pyx.build to build pygolang dso and extensions.
# we have to be careful and inject synthetic golang package in order to be # we have to be careful and inject synthetic golang package in order to be
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment