Commit c4ba39e1 authored by Stefan Behnel's avatar Stefan Behnel

merged intern table with general string table to support unicode string interning in Py3

parent 84469ac4
...@@ -221,12 +221,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -221,12 +221,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/* Implementation of %s */" % env.qualified_name) code.putln("/* Implementation of %s */" % env.qualified_name)
self.generate_const_definitions(env, code) self.generate_const_definitions(env, code)
self.generate_interned_num_decls(env, code) self.generate_interned_num_decls(env, code)
self.generate_interned_name_decls(env, code)
self.generate_py_string_decls(env, code) self.generate_py_string_decls(env, code)
self.generate_cached_builtins_decls(env, code) self.generate_cached_builtins_decls(env, code)
self.body.generate_function_definitions(env, code, options.transforms) self.body.generate_function_definitions(env, code, options.transforms)
code.mark_pos(None) code.mark_pos(None)
self.generate_interned_name_table(env, code)
self.generate_py_string_table(env, code) self.generate_py_string_table(env, code)
self.generate_typeobj_definitions(env, code) self.generate_typeobj_definitions(env, code)
self.generate_method_table(env, code) self.generate_method_table(env, code)
...@@ -1362,47 +1360,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -1362,47 +1360,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
"{0, 0, 0, 0, 0}") "{0, 0, 0, 0, 0}")
code.putln( code.putln(
"};") "};")
def generate_interned_name_table(self, env, code):
code.mark_pos(None)
items = env.intern_map.items()
if items:
items.sort()
code.putln("")
code.putln(
"static __Pyx_InternTabEntry %s[] = {" %
Naming.intern_tab_cname)
for (name, cname) in items:
code.putln(
'{&%s, "%s"},' % (
cname,
name))
code.putln(
"{0, 0}")
code.putln(
"};")
def generate_py_string_table(self, env, code): def generate_py_string_table(self, env, code):
entries = env.all_pystring_entries entries = env.all_pystring_entries
if entries: if entries:
code.putln("") code.putln("")
for entry in entries:
if entry.is_interned:
code.putln('static char %s[] = "%s";' % (
entry.cname, entry.init))
code.putln("")
code.putln( code.putln(
"static __Pyx_StringTabEntry %s[] = {" % "static __Pyx_StringTabEntry %s[] = {" %
Naming.stringtab_cname) Naming.stringtab_cname)
for entry in entries: for entry in entries:
code.putln( code.putln(
"{&%s, %s, sizeof(%s), %d}," % ( "{&%s, %s, sizeof(%s), %d, %d}," % (
entry.pystring_cname, entry.pystring_cname,
entry.cname, entry.cname,
entry.cname, entry.cname,
entry.type.is_unicode entry.type.is_unicode,
entry.is_interned
)) ))
code.putln( code.putln(
"{0, 0, 0, 0}") "{0, 0, 0, 0, 0}")
code.putln( code.putln(
"};") "};")
def generate_filename_init_prototype(self, code): def generate_filename_init_prototype(self, code):
code.putln(""); code.putln("");
code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname)
...@@ -1546,12 +1530,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -1546,12 +1530,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
entry.cname, entry.cname,
entry.init, entry.init,
code.error_goto_if_null(entry.cname, self.pos))) code.error_goto_if_null(entry.cname, self.pos)))
if env.intern_map:
env.use_utility_code(Nodes.init_intern_tab_utility_code);
code.putln(
"if (__Pyx_InternStrings(%s) < 0) %s;" % (
Naming.intern_tab_cname,
code.error_goto(self.pos)))
def generate_string_init_code(self, env, code): def generate_string_init_code(self, env, code):
if env.all_pystring_entries: if env.all_pystring_entries:
......
...@@ -231,19 +231,6 @@ class BlockNode: ...@@ -231,19 +231,6 @@ class BlockNode:
if not entry.is_interned: if not entry.is_interned:
code.put_var_declaration(entry, static = 1) code.put_var_declaration(entry, static = 1)
def generate_interned_name_decls(self, env, code):
# Flush accumulated interned names from the global scope
# and generate declarations for them.
genv = env.global_scope()
intern_map = genv.intern_map
names = genv.interned_names
if names:
code.putln("")
for name in names:
code.putln(
"static PyObject *%s;" % intern_map[name])
del names[:]
def generate_py_string_decls(self, env, code): def generate_py_string_decls(self, env, code):
entries = env.pystring_entries entries = env.pystring_entries
if entries: if entries:
...@@ -878,7 +865,6 @@ class FuncDefNode(StatNode, BlockNode): ...@@ -878,7 +865,6 @@ class FuncDefNode(StatNode, BlockNode):
# if we supported them, which we probably won't. # if we supported them, which we probably won't.
# ----- Top-level constants used by this function # ----- Top-level constants used by this function
self.generate_interned_num_decls(lenv, code) self.generate_interned_num_decls(lenv, code)
self.generate_interned_name_decls(lenv, code)
self.generate_py_string_decls(lenv, code) self.generate_py_string_decls(lenv, code)
self.generate_cached_builtins_decls(lenv, code) self.generate_cached_builtins_decls(lenv, code)
#code.putln("") #code.putln("")
...@@ -3729,8 +3715,7 @@ utility_function_predeclarations = \ ...@@ -3729,8 +3715,7 @@ utility_function_predeclarations = \
#define INLINE #define INLINE
#endif #endif
typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern;} __Pyx_StringTabEntry; /*proto*/
typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
""" + """ """ + """
...@@ -4306,27 +4291,6 @@ done: ...@@ -4306,27 +4291,6 @@ done:
#------------------------------------------------------------------------------------ #------------------------------------------------------------------------------------
init_intern_tab_utility_code = [
"""
static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/
""","""
static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) {
while (t->p) {
#if PY_MAJOR_VERSION < 3
*t->p = PyString_InternFromString(t->s);
#else
*t->p = PyString_FromString(t->s);
#endif
if (!*t->p)
return -1;
++t;
}
return 0;
}
"""]
#------------------------------------------------------------------------------------
init_string_tab_utility_code = [ init_string_tab_utility_code = [
""" """
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
...@@ -4337,10 +4301,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { ...@@ -4337,10 +4301,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
#if PY_MAJOR_VERSION < 3 #if PY_MAJOR_VERSION < 3
*t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
#else #else
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); if (t->intern) {
*t->p = PyUnicode_InternFromString(t->s);
} else {
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
}
#endif #endif
} else { } else {
*t->p = PyString_FromStringAndSize(t->s, t->n - 1); #if PY_MAJOR_VERSION < 3
if (t->intern)
*t->p = PyString_InternFromString(t->s);
else
#endif
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
} }
if (!*t->p) if (!*t->p)
return -1; return -1;
......
...@@ -15,7 +15,7 @@ from TypeSlots import \ ...@@ -15,7 +15,7 @@ from TypeSlots import \
import ControlFlow import ControlFlow
import __builtin__ import __builtin__
identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$") possible_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match
class Entry: class Entry:
# A symbol table entry in a Scope or ModuleNamespace. # A symbol table entry in a Scope or ModuleNamespace.
...@@ -64,7 +64,6 @@ class Entry: ...@@ -64,7 +64,6 @@ class Entry:
# type is an extension type # type is an extension type
# as_module None Module scope, if a cimported module # as_module None Module scope, if a cimported module
# is_inherited boolean Is an inherited attribute of an extension type # is_inherited boolean Is an inherited attribute of an extension type
# #interned_cname string C name of interned name string
# pystring_cname string C name of Python version of string literal # pystring_cname string C name of Python version of string literal
# is_interned boolean For string const entries, value is interned # is_interned boolean For string const entries, value is interned
# used boolean # used boolean
...@@ -104,7 +103,6 @@ class Entry: ...@@ -104,7 +103,6 @@ class Entry:
in_cinclude = 0 in_cinclude = 0
as_module = None as_module = None
is_inherited = 0 is_inherited = 0
#interned_cname = None
pystring_cname = None pystring_cname = None
is_interned = 0 is_interned = 0
used = 0 used = 0
...@@ -204,10 +202,10 @@ class Scope: ...@@ -204,10 +202,10 @@ class Scope:
def __str__(self): def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.qualified_name) return "<%s %s>" % (self.__class__.__name__, self.qualified_name)
def intern(self, name): def intern(self, name):
return self.global_scope().intern(name) return self.global_scope().intern(name)
def qualifying_scope(self): def qualifying_scope(self):
return self.parent_scope return self.parent_scope
...@@ -465,14 +463,12 @@ class Scope: ...@@ -465,14 +463,12 @@ class Scope:
# Python identifier, it will be interned. # Python identifier, it will be interned.
if not entry.pystring_cname: if not entry.pystring_cname:
value = entry.init value = entry.init
if not entry.type.is_unicode and identifier_pattern.match(value): if possible_identifier(value):
entry.pystring_cname = self.intern(value)
entry.is_interned = 1 entry.is_interned = 1
else: entry.pystring_cname = entry.cname + "p"
entry.pystring_cname = entry.cname + "p" self.pystring_entries.append(entry)
self.pystring_entries.append(entry) self.global_scope().all_pystring_entries.append(entry)
self.global_scope().all_pystring_entries.append(entry)
def add_py_num(self, value): def add_py_num(self, value):
# Add an entry for an int constant. # Add an entry for an int constant.
cname = "%s%s" % (Naming.interned_num_prefix, value) cname = "%s%s" % (Naming.interned_num_prefix, value)
...@@ -678,7 +674,6 @@ class ModuleScope(Scope): ...@@ -678,7 +674,6 @@ class ModuleScope(Scope):
# pxd_file_loaded boolean Corresponding .pxd file has been processed # pxd_file_loaded boolean Corresponding .pxd file has been processed
# cimported_modules [ModuleScope] Modules imported with cimport # cimported_modules [ModuleScope] Modules imported with cimport
# intern_map {string : string} Mapping from Python names to interned strs # intern_map {string : string} Mapping from Python names to interned strs
# interned_names [string] Interned names pending generation of declarations
# interned_nums [int/long] Interned numeric constants # interned_nums [int/long] Interned numeric constants
# all_pystring_entries [Entry] Python string consts from all scopes # all_pystring_entries [Entry] Python string consts from all scopes
# types_imported {PyrexType : 1} Set of types for which import code generated # types_imported {PyrexType : 1} Set of types for which import code generated
...@@ -706,7 +701,6 @@ class ModuleScope(Scope): ...@@ -706,7 +701,6 @@ class ModuleScope(Scope):
self.pxd_file_loaded = 0 self.pxd_file_loaded = 0
self.cimported_modules = [] self.cimported_modules = []
self.intern_map = {} self.intern_map = {}
self.interned_names = []
self.interned_nums = [] self.interned_nums = []
self.interned_objs = [] self.interned_objs = []
self.all_pystring_entries = [] self.all_pystring_entries = []
...@@ -743,15 +737,11 @@ class ModuleScope(Scope): ...@@ -743,15 +737,11 @@ class ModuleScope(Scope):
else: else:
entry.is_builtin = 1 entry.is_builtin = 1
return entry return entry
def intern(self, name): def intern(self, name):
intern_map = self.intern_map string_entry = self.add_string_const(name)
cname = intern_map.get(name) self.add_py_string(string_entry)
if not cname: return string_entry.pystring_cname
cname = Naming.interned_prefix + name
intern_map[name] = cname
self.interned_names.append(name)
return cname
def find_module(self, module_name, pos): def find_module(self, module_name, pos):
# Find a module in the import namespace, interpreting # Find a module in the import namespace, interpreting
...@@ -832,8 +822,6 @@ class ModuleScope(Scope): ...@@ -832,8 +822,6 @@ class ModuleScope(Scope):
"Non-cdef global variable is not a generic Python object") "Non-cdef global variable is not a generic Python object")
entry.is_pyglobal = 1 entry.is_pyglobal = 1
entry.namespace_cname = self.module_cname entry.namespace_cname = self.module_cname
#if Options.intern_names:
# entry.interned_cname = self.intern(name)
else: else:
entry.is_cglobal = 1 entry.is_cglobal = 1
self.var_entries.append(entry) self.var_entries.append(entry)
...@@ -1151,8 +1139,6 @@ class PyClassScope(ClassScope): ...@@ -1151,8 +1139,6 @@ class PyClassScope(ClassScope):
cname, visibility, is_cdef) cname, visibility, is_cdef)
entry.is_pyglobal = 1 entry.is_pyglobal = 1
entry.namespace_cname = self.class_obj_cname entry.namespace_cname = self.class_obj_cname
#if Options.intern_names:
# entry.interned_cname = self.intern(name)
return entry return entry
def allocate_temp(self, type): def allocate_temp(self, type):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment