Commit 595502fc authored by Stefan Behnel's avatar Stefan Behnel

adapt 'unicode' usage to Py2/Py3

parent e2922b0c
...@@ -56,7 +56,7 @@ if sys.version_info[0] < 3: ...@@ -56,7 +56,7 @@ if sys.version_info[0] < 3:
if _fs_encoding is None: if _fs_encoding is None:
_fs_encoding = sys.getdefaultencoding() _fs_encoding = sys.getdefaultencoding()
def encode_filename_in_py2(filename): def encode_filename_in_py2(filename):
if isinstance(filename, unicode): if not isinstance(filename, bytes):
return filename.encode(_fs_encoding) return filename.encode(_fs_encoding)
return filename return filename
else: else:
......
...@@ -27,7 +27,7 @@ IS_PY3 = sys.version_info >= (3, 0) ...@@ -27,7 +27,7 @@ IS_PY3 = sys.version_info >= (3, 0)
# A utility function to convert user-supplied ASCII strings to unicode. # A utility function to convert user-supplied ASCII strings to unicode.
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
def to_unicode(s): def to_unicode(s):
if not isinstance(s, unicode): if isinstance(s, bytes):
return s.decode('ascii') return s.decode('ascii')
else: else:
return s return s
......
...@@ -6,8 +6,11 @@ The output is in a strict format, no whitespace or comments from the input ...@@ -6,8 +6,11 @@ The output is in a strict format, no whitespace or comments from the input
is preserved (and it could not be as it is not present in the code tree). is preserved (and it could not be as it is not present in the code tree).
""" """
from Cython.Compiler.Visitor import TreeVisitor from __future__ import absolute_import, print_function
from Cython.Compiler.ExprNodes import *
from .Compiler.Visitor import TreeVisitor
from .Compiler.ExprNodes import *
class LinesResult(object): class LinesResult(object):
def __init__(self): def __init__(self):
...@@ -497,7 +500,7 @@ class CodeWriter(DeclarationWriter): ...@@ -497,7 +500,7 @@ class CodeWriter(DeclarationWriter):
class PxdWriter(DeclarationWriter): class PxdWriter(DeclarationWriter):
def __call__(self, node): def __call__(self, node):
print u'\n'.join(self.write(node).lines) print(u'\n'.join(self.write(node).lines))
return node return node
def visit_CFuncDefNode(self, node): def visit_CFuncDefNode(self, node):
...@@ -516,5 +519,3 @@ class PxdWriter(DeclarationWriter): ...@@ -516,5 +519,3 @@ class PxdWriter(DeclarationWriter):
def visit_StatNode(self, node): def visit_StatNode(self, node):
pass pass
...@@ -218,7 +218,7 @@ class AnnotationCCodeWriter(CCodeWriter): ...@@ -218,7 +218,7 @@ class AnnotationCCodeWriter(CCodeWriter):
def annotate(match): def annotate(match):
group_name = match.lastgroup group_name = match.lastgroup
calls[group_name] += 1 calls[group_name] += 1
return ur"<span class='%s'>%s</span>" % ( return u"<span class='%s'>%s</span>" % (
group_name, match.group(group_name)) group_name, match.group(group_name))
lines = self._htmlify_code(cython_code).splitlines() lines = self._htmlify_code(cython_code).splitlines()
...@@ -275,22 +275,22 @@ class AnnotationCCodeWriter(CCodeWriter): ...@@ -275,22 +275,22 @@ class AnnotationCCodeWriter(CCodeWriter):
return outlist return outlist
_parse_code = re.compile( _parse_code = re.compile((
ur'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|' br'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|'
ur'(?P<trace>__Pyx_Trace[A-Za-z]+)|' br'(?P<trace>__Pyx_Trace[A-Za-z]+)|'
ur'(?:' br'(?:'
ur'(?P<pyx_macro_api>__Pyx_[A-Z][A-Z_]+)|' br'(?P<pyx_macro_api>__Pyx_[A-Z][A-Z_]+)|'
ur'(?P<pyx_c_api>__Pyx_[A-Z][a-z_][A-Za-z_]*)|' br'(?P<pyx_c_api>__Pyx_[A-Z][a-z_][A-Za-z_]*)|'
ur'(?P<py_macro_api>Py[A-Z][a-z]+_[A-Z][A-Z_]+)|' br'(?P<py_macro_api>Py[A-Z][a-z]+_[A-Z][A-Z_]+)|'
ur'(?P<py_c_api>Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]*)' br'(?P<py_c_api>Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]*)'
ur')(?=\()|' # look-ahead to exclude subsequent '(' from replacement br')(?=\()|' # look-ahead to exclude subsequent '(' from replacement
ur'(?P<error_goto>(?:(?<=;) *if .* +)?\{__pyx_filename = .*goto __pyx_L\w+;\})' br'(?P<error_goto>(?:(?<=;) *if .* +)?\{__pyx_filename = .*goto __pyx_L\w+;\})'
).sub ).decode('ascii')).sub
_replace_pos_comment = re.compile( _replace_pos_comment = re.compile(
# this matches what Cython generates as code line marker comment # this matches what Cython generates as code line marker comment
ur'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n', br'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n'.decode('ascii'),
re.M re.M
).sub ).sub
......
...@@ -163,7 +163,7 @@ class UtilityCodeBase(object): ...@@ -163,7 +163,7 @@ class UtilityCodeBase(object):
if ext in ('.pyx', '.py', '.pxd', '.pxi'): if ext in ('.pyx', '.py', '.pxd', '.pxi'):
comment = '#' comment = '#'
strip_comments = partial(re.compile(r'^\s*#.*').sub, '') strip_comments = partial(re.compile(r'^\s*#.*').sub, '')
rstrip = unicode.rstrip rstrip = str.rstrip
else: else:
comment = '/' comment = '/'
strip_comments = partial(re.compile(r'^\s*//.*|/\*[^*]*\*/').sub, '') strip_comments = partial(re.compile(r'^\s*//.*|/\*[^*]*\*/').sub, '')
...@@ -819,7 +819,7 @@ class PyObjectConst(object): ...@@ -819,7 +819,7 @@ class PyObjectConst(object):
cython.declare(possible_unicode_identifier=object, possible_bytes_identifier=object, cython.declare(possible_unicode_identifier=object, possible_bytes_identifier=object,
replace_identifier=object, find_alphanums=object) replace_identifier=object, find_alphanums=object)
possible_unicode_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match possible_unicode_identifier = re.compile(br"(?![0-9])\w+$".decode('ascii'), re.U).match
possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match
replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub
find_alphanums = re.compile('([a-zA-Z0-9]+)').findall find_alphanums = re.compile('([a-zA-Z0-9]+)').findall
...@@ -876,10 +876,10 @@ class StringConst(object): ...@@ -876,10 +876,10 @@ class StringConst(object):
if identifier: if identifier:
intern = True intern = True
elif identifier is None: elif identifier is None:
if isinstance(text, unicode): if isinstance(text, bytes):
intern = bool(possible_unicode_identifier(text))
else:
intern = bool(possible_bytes_identifier(text)) intern = bool(possible_bytes_identifier(text))
else:
intern = bool(possible_unicode_identifier(text))
else: else:
intern = False intern = False
if intern: if intern:
...@@ -2298,9 +2298,8 @@ class PyxCodeWriter(object): ...@@ -2298,9 +2298,8 @@ class PyxCodeWriter(object):
def getvalue(self): def getvalue(self):
result = self.buffer.getvalue() result = self.buffer.getvalue()
if not isinstance(result, unicode): if isinstance(result, bytes):
result = result.decode(self.encoding) result = result.decode(self.encoding)
return result return result
def putln(self, line, context=None): def putln(self, line, context=None):
......
...@@ -4,6 +4,11 @@ ...@@ -4,6 +4,11 @@
from __future__ import absolute_import from __future__ import absolute_import
try:
from __builtin__ import basestring as any_string_type
except ImportError:
any_string_type = (bytes, str)
import sys import sys
from ..Utils import open_new_file from ..Utils import open_new_file
...@@ -21,7 +26,7 @@ class PyrexWarning(Exception): ...@@ -21,7 +26,7 @@ class PyrexWarning(Exception):
def context(position): def context(position):
source = position[0] source = position[0]
assert not (isinstance(source, unicode) or isinstance(source, str)), ( assert not (isinstance(source, any_string_type)), (
"Please replace filename strings with Scanning.FileSourceDescriptor instances %r" % source) "Please replace filename strings with Scanning.FileSourceDescriptor instances %r" % source)
try: try:
F = source.get_lines() F = source.get_lines()
...@@ -167,7 +172,7 @@ def report_error(err): ...@@ -167,7 +172,7 @@ def report_error(err):
def error(position, message): def error(position, message):
#print "Errors.error:", repr(position), repr(message) ### #print("Errors.error:", repr(position), repr(message)) ###
if position is None: if position is None:
raise InternalError(message) raise InternalError(message)
err = CompileError(position, message) err = CompileError(position, message)
......
...@@ -45,12 +45,12 @@ from .DebugFlags import debug_disposal_code, debug_temp_alloc, \ ...@@ -45,12 +45,12 @@ from .DebugFlags import debug_disposal_code, debug_temp_alloc, \
try: try:
from __builtin__ import basestring from __builtin__ import basestring
except ImportError: except ImportError:
basestring = str # Python 3 # Python 3
basestring = str
try: any_string_type = (bytes, str)
from builtins import bytes else:
except ImportError: # Python 2
bytes = str # Python 2 any_string_type = (bytes, unicode)
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
...@@ -1216,7 +1216,7 @@ class FloatNode(ConstNode): ...@@ -1216,7 +1216,7 @@ class FloatNode(ConstNode):
def get_constant_c_result_code(self): def get_constant_c_result_code(self):
strval = self.value strval = self.value
assert isinstance(strval, (str, unicode)) assert isinstance(strval, basestring)
cmpval = repr(float(strval)) cmpval = repr(float(strval))
if cmpval == 'nan': if cmpval == 'nan':
return "(Py_HUGE_VAL * 0)" return "(Py_HUGE_VAL * 0)"
...@@ -10907,8 +10907,8 @@ class CmpNode(object): ...@@ -10907,8 +10907,8 @@ class CmpNode(object):
def calculate_cascaded_constant_result(self, operand1_result): def calculate_cascaded_constant_result(self, operand1_result):
func = compile_time_binary_operators[self.operator] func = compile_time_binary_operators[self.operator]
operand2_result = self.operand2.constant_result operand2_result = self.operand2.constant_result
if (isinstance(operand1_result, (bytes, unicode)) and if (isinstance(operand1_result, any_string_type) and
isinstance(operand2_result, (bytes, unicode)) and isinstance(operand2_result, any_string_type) and
type(operand1_result) != type(operand2_result)): type(operand1_result) != type(operand2_result)):
# string comparison of different types isn't portable # string comparison of different types isn't portable
return return
......
...@@ -6,7 +6,7 @@ import cython ...@@ -6,7 +6,7 @@ import cython
cython.declare(PyrexTypes=object, Naming=object, ExprNodes=object, Nodes=object, cython.declare(PyrexTypes=object, Naming=object, ExprNodes=object, Nodes=object,
Options=object, UtilNodes=object, LetNode=object, Options=object, UtilNodes=object, LetNode=object,
LetRefNode=object, TreeFragment=object, EncodedString=object, LetRefNode=object, TreeFragment=object, EncodedString=object,
error=object, warning=object, copy=object) error=object, warning=object, copy=object, _unicode=object)
from . import PyrexTypes from . import PyrexTypes
from . import Naming from . import Naming
...@@ -19,7 +19,7 @@ from .Visitor import VisitorTransform, TreeVisitor ...@@ -19,7 +19,7 @@ from .Visitor import VisitorTransform, TreeVisitor
from .Visitor import CythonTransform, EnvTransform, ScopeTrackingTransform from .Visitor import CythonTransform, EnvTransform, ScopeTrackingTransform
from .UtilNodes import LetNode, LetRefNode, ResultRefNode from .UtilNodes import LetNode, LetRefNode, ResultRefNode
from .TreeFragment import TreeFragment from .TreeFragment import TreeFragment
from .StringEncoding import EncodedString from .StringEncoding import EncodedString, _unicode
from .Errors import error, warning, CompileError, InternalError from .Errors import error, warning, CompileError, InternalError
from .Code import UtilityCode from .Code import UtilityCode
...@@ -663,7 +663,7 @@ class InterpretCompilerDirectives(CythonTransform, SkipDeclarations): ...@@ -663,7 +663,7 @@ class InterpretCompilerDirectives(CythonTransform, SkipDeclarations):
self.parallel_directives = {} self.parallel_directives = {}
directives = copy.deepcopy(Options.directive_defaults) directives = copy.deepcopy(Options.directive_defaults)
for key, value in compilation_directive_defaults.items(): for key, value in compilation_directive_defaults.items():
directives[unicode(key)] = copy.deepcopy(value) directives[_unicode(key)] = copy.deepcopy(value)
self.directives = directives self.directives = directives
def check_directive_scope(self, pos, directive, scope): def check_directive_scope(self, pos, directive, scope):
......
...@@ -7,7 +7,7 @@ from __future__ import absolute_import ...@@ -7,7 +7,7 @@ from __future__ import absolute_import
import cython import cython
cython.declare(make_lexicon=object, lexicon=object, cython.declare(make_lexicon=object, lexicon=object,
any_string_prefix=unicode, IDENT=unicode, any_string_prefix=cython.unicode, IDENT=cython.unicode,
print_function=object, error=object, warning=object, print_function=object, error=object, warning=object,
os=object, platform=object) os=object, platform=object)
......
...@@ -8,10 +8,10 @@ import re ...@@ -8,10 +8,10 @@ import re
import sys import sys
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
_unicode, _str, _bytes = str, str, bytes _unicode, _str, _bytes, _unichr = str, str, bytes, chr
IS_PYTHON3 = True IS_PYTHON3 = True
else: else:
_unicode, _str, _bytes = unicode, str, str _unicode, _str, _bytes, _unichr = unicode, str, str, unichr
IS_PYTHON3 = False IS_PYTHON3 = False
empty_bytes = _bytes() empty_bytes = _bytes()
...@@ -39,13 +39,13 @@ class UnicodeLiteralBuilder(object): ...@@ -39,13 +39,13 @@ class UnicodeLiteralBuilder(object):
# wide Unicode character on narrow platform => replace # wide Unicode character on narrow platform => replace
# by surrogate pair # by surrogate pair
char_number -= 0x10000 char_number -= 0x10000
self.chars.append( unichr((char_number // 1024) + 0xD800) ) self.chars.append( _unichr((char_number // 1024) + 0xD800) )
self.chars.append( unichr((char_number % 1024) + 0xDC00) ) self.chars.append( _unichr((char_number % 1024) + 0xDC00) )
else: else:
self.chars.append( unichr(char_number) ) self.chars.append( _unichr(char_number) )
else: else:
def append_charval(self, char_number): def append_charval(self, char_number):
self.chars.append( unichr(char_number) ) self.chars.append( _unichr(char_number) )
def append_uescape(self, char_number, escape_string): def append_uescape(self, char_number, escape_string):
self.append_charval(char_number) self.append_charval(char_number)
...@@ -71,7 +71,7 @@ class BytesLiteralBuilder(object): ...@@ -71,7 +71,7 @@ class BytesLiteralBuilder(object):
self.chars.append(characters) self.chars.append(characters)
def append_charval(self, char_number): def append_charval(self, char_number):
self.chars.append( unichr(char_number).encode('ISO-8859-1') ) self.chars.append( _unichr(char_number).encode('ISO-8859-1') )
def append_uescape(self, char_number, escape_string): def append_uescape(self, char_number, escape_string):
self.append(escape_string) self.append(escape_string)
...@@ -311,4 +311,4 @@ def encode_pyunicode_string(s): ...@@ -311,4 +311,4 @@ def encode_pyunicode_string(s):
if utf16 == utf32: if utf16 == utf32:
utf16 = [] utf16 = []
return ",".join(map(unicode, utf16)), ",".join(map(unicode, utf32)) return ",".join(map(_unicode, utf16)), ",".join(map(_unicode, utf32))
...@@ -17,6 +17,7 @@ from . import PyrexTypes ...@@ -17,6 +17,7 @@ from . import PyrexTypes
from .Visitor import VisitorTransform from .Visitor import VisitorTransform
from .Nodes import Node, StatListNode from .Nodes import Node, StatListNode
from .ExprNodes import NameNode from .ExprNodes import NameNode
from .StringEncoding import _unicode
from . import Parsing from . import Parsing
from . import Main from . import Main
from . import UtilNodes from . import UtilNodes
...@@ -59,7 +60,7 @@ def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None, ...@@ -59,7 +60,7 @@ def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
# to use a unicode string so that code fragments don't have to bother # to use a unicode string so that code fragments don't have to bother
# with encoding. This means that test code passed in should not have an # with encoding. This means that test code passed in should not have an
# encoding header. # encoding header.
assert isinstance(code, unicode), "unicode code snippets only please" assert isinstance(code, _unicode), "unicode code snippets only please"
encoding = "UTF-8" encoding = "UTF-8"
module_name = name module_name = name
...@@ -198,7 +199,7 @@ def copy_code_tree(node): ...@@ -198,7 +199,7 @@ def copy_code_tree(node):
return TreeCopier()(node) return TreeCopier()(node)
_match_indent = re.compile(ur"^ *").match _match_indent = re.compile(u"^ *").match
def strip_common_indent(lines): def strip_common_indent(lines):
...@@ -214,7 +215,7 @@ class TreeFragment(object): ...@@ -214,7 +215,7 @@ class TreeFragment(object):
def __init__(self, code, name=None, pxds={}, temps=[], pipeline=[], level=None, initial_pos=None): def __init__(self, code, name=None, pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
if not name: if not name:
name = "(tree fragment)" name = "(tree fragment)"
if isinstance(code, unicode): if isinstance(code, _unicode):
def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n"))) def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n")))
fmt_code = fmt(code) fmt_code = fmt(code)
......
...@@ -17,6 +17,11 @@ try: ...@@ -17,6 +17,11 @@ try:
except ImportError: except ImportError:
from sys import maxint from sys import maxint
try:
unichr
except NameError:
unichr = chr
LOWEST_PRIORITY = -maxint LOWEST_PRIORITY = -maxint
......
# cython: language_level=3
from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF, Py_XINCREF from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF, Py_XINCREF
from cpython.exc cimport PyErr_Fetch, PyErr_Restore from cpython.exc cimport PyErr_Fetch, PyErr_Restore
from cpython.pystate cimport PyThreadState_Get from cpython.pystate cimport PyThreadState_Get
...@@ -72,7 +74,7 @@ cdef void report_unraisable(object e=None): ...@@ -72,7 +74,7 @@ cdef void report_unraisable(object e=None):
if e is None: if e is None:
import sys import sys
e = sys.exc_info()[1] e = sys.exc_info()[1]
print u"refnanny raised an exception: %s" % e print(u"refnanny raised an exception: %s" % e)
except: except:
pass # We absolutely cannot exit with an exception pass # We absolutely cannot exit with an exception
...@@ -159,9 +161,10 @@ cdef void FinishContext(PyObject** ctx): ...@@ -159,9 +161,10 @@ cdef void FinishContext(PyObject** ctx):
context = <Context>ctx[0] context = <Context>ctx[0]
errors = context.end() errors = context.end()
if errors: if errors:
print u"%s: %s()" % (context.filename.decode('latin1'), print(u"%s: %s()" % (
context.name.decode('latin1')) context.filename.decode('latin1'),
print errors context.name.decode('latin1')))
print(errors)
context = None context = None
except: except:
report_unraisable() report_unraisable()
......
...@@ -43,7 +43,7 @@ import tokenize ...@@ -43,7 +43,7 @@ import tokenize
from io import StringIO from io import StringIO
from ._looper import looper from ._looper import looper
from .compat3 import bytes, basestring_, next, is_unicode, coerce_text from .compat3 import bytes, unicode, basestring_, next, is_unicode, coerce_text
__all__ = ['TemplateError', 'Template', 'sub', 'HTMLTemplate', __all__ = ['TemplateError', 'Template', 'sub', 'HTMLTemplate',
'sub_html', 'html', 'bunch'] 'sub_html', 'html', 'bunch']
......
import sys import sys
__all__ = ['b', 'basestring_', 'bytes', 'next', 'is_unicode'] __all__ = ['b', 'basestring_', 'bytes', 'unicode', 'next', 'is_unicode']
if sys.version < "3": if sys.version < "3":
b = bytes = str b = bytes = str
basestring_ = basestring basestring_ = basestring
unicode = unicode
else: else:
def b(s): def b(s):
...@@ -13,6 +14,7 @@ else: ...@@ -13,6 +14,7 @@ else:
return bytes(s) return bytes(s)
basestring_ = (bytes, str) basestring_ = (bytes, str)
bytes = bytes bytes = bytes
unicode = str
text = str text = str
if sys.version < "3": if sys.version < "3":
......
...@@ -20,7 +20,7 @@ TOOLS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', ...@@ -20,7 +20,7 @@ TOOLS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..',
@contextmanager @contextmanager
def _tempfile(code): def _tempfile(code):
code = dedent(code) code = dedent(code)
if isinstance(code, unicode): if not isinstance(code, bytes):
code = code.encode('utf8') code = code.encode('utf8')
with NamedTemporaryFile(suffix='.py') as f: with NamedTemporaryFile(suffix='.py') as f:
......
...@@ -131,8 +131,8 @@ cdef class array: ...@@ -131,8 +131,8 @@ cdef class array:
if itemsize <= 0: if itemsize <= 0:
raise ValueError("itemsize <= 0 for cython.array") raise ValueError("itemsize <= 0 for cython.array")
if isinstance(format, unicode): if not isinstance(format, bytes):
format = (<unicode>format).encode('ASCII') format = format.encode('ASCII')
self._format = format # keep a reference to the byte string self._format = format # keep a reference to the byte string
self.format = self._format self.format = self._format
......
...@@ -187,15 +187,14 @@ def path_exists(path): ...@@ -187,15 +187,14 @@ def path_exists(path):
# file name encodings # file name encodings
def decode_filename(filename): def decode_filename(filename):
if isinstance(filename, unicode): if isinstance(filename, bytes):
return filename try:
try: filename_encoding = sys.getfilesystemencoding()
filename_encoding = sys.getfilesystemencoding() if filename_encoding is None:
if filename_encoding is None: filename_encoding = sys.getdefaultencoding()
filename_encoding = sys.getdefaultencoding() filename = filename.decode(filename_encoding)
filename = filename.decode(filename_encoding) except UnicodeDecodeError:
except UnicodeDecodeError: pass
pass
return filename return filename
# support for source file encoding detection # support for source file encoding detection
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment