Commit 1a019507 authored by Stefan Behnel's avatar Stefan Behnel

robustness against unicode errors on encoding detection

parent 8c32ed8f
......@@ -139,25 +139,25 @@ class Context:
def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree.
f = Utils.open_source_file(source_filename, "rU")
if isinstance(source_filename, unicode):
name = source_filename
else:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = sys.getdefaultencoding()
name = source_filename.decode(filename_encoding)
try:
f = Utils.open_source_file(source_filename, "rU")
try:
s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self)
tree = Parsing.p_module(s, pxd, full_module_name)
except UnicodeDecodeError, msg:
error((name, 0, 0), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
finally:
f.close()
if isinstance(source_filename, unicode):
name = source_filename
else:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = sys.getdefaultencoding()
name = source_filename.decode(filename_encoding)
s = PyrexScanner(f, name, source_encoding = f.encoding,
type_names = type_names, context = self)
tree = Parsing.p_module(s, pxd, full_module_name)
finally:
f.close()
except UnicodeDecodeError, msg:
error((source_filename, 0, 0), "Decoding error, missing or incorrect coding=<encoding-name> at top of source (%s)" % msg)
if Errors.num_errors > 0:
raise CompileError
return tree
......
......@@ -41,12 +41,15 @@ def detect_file_encoding(source_filename):
# PEPs 263 and 3120
f = codecs.open(source_filename, "rU", encoding="UTF-8")
try:
for line_no, line in enumerate(f):
encoding = _match_file_encoding(line)
chars = []
for i in range(2):
c = f.read(1)
while c and c != '\n':
chars.append(c)
c = f.read(1)
encoding = _match_file_encoding(u''.join(chars))
if encoding:
return encoding.group(1)
if line_no == 1:
break
finally:
f.close()
return "UTF-8"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment