Commit c4ba39e1 authored by Stefan Behnel's avatar Stefan Behnel

merged intern table with general string table to support unicode string interning in Py3

parent 84469ac4
......@@ -221,12 +221,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/* Implementation of %s */" % env.qualified_name)
self.generate_const_definitions(env, code)
self.generate_interned_num_decls(env, code)
self.generate_interned_name_decls(env, code)
self.generate_py_string_decls(env, code)
self.generate_cached_builtins_decls(env, code)
self.body.generate_function_definitions(env, code, options.transforms)
code.mark_pos(None)
self.generate_interned_name_table(env, code)
self.generate_py_string_table(env, code)
self.generate_typeobj_definitions(env, code)
self.generate_method_table(env, code)
......@@ -1362,47 +1360,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
"{0, 0, 0, 0, 0}")
code.putln(
"};")
def generate_interned_name_table(self, env, code):
code.mark_pos(None)
items = env.intern_map.items()
if items:
items.sort()
code.putln("")
code.putln(
"static __Pyx_InternTabEntry %s[] = {" %
Naming.intern_tab_cname)
for (name, cname) in items:
code.putln(
'{&%s, "%s"},' % (
cname,
name))
code.putln(
"{0, 0}")
code.putln(
"};")
def generate_py_string_table(self, env, code):
entries = env.all_pystring_entries
if entries:
code.putln("")
for entry in entries:
if entry.is_interned:
code.putln('static char %s[] = "%s";' % (
entry.cname, entry.init))
code.putln("")
code.putln(
"static __Pyx_StringTabEntry %s[] = {" %
Naming.stringtab_cname)
for entry in entries:
code.putln(
"{&%s, %s, sizeof(%s), %d}," % (
"{&%s, %s, sizeof(%s), %d, %d}," % (
entry.pystring_cname,
entry.cname,
entry.cname,
entry.type.is_unicode
entry.type.is_unicode,
entry.is_interned
))
code.putln(
"{0, 0, 0, 0}")
"{0, 0, 0, 0, 0}")
code.putln(
"};")
def generate_filename_init_prototype(self, code):
code.putln("");
code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname)
......@@ -1546,12 +1530,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
entry.cname,
entry.init,
code.error_goto_if_null(entry.cname, self.pos)))
if env.intern_map:
env.use_utility_code(Nodes.init_intern_tab_utility_code);
code.putln(
"if (__Pyx_InternStrings(%s) < 0) %s;" % (
Naming.intern_tab_cname,
code.error_goto(self.pos)))
def generate_string_init_code(self, env, code):
if env.all_pystring_entries:
......
......@@ -231,19 +231,6 @@ class BlockNode:
if not entry.is_interned:
code.put_var_declaration(entry, static = 1)
def generate_interned_name_decls(self, env, code):
# Flush accumulated interned names from the global scope
# and generate declarations for them.
genv = env.global_scope()
intern_map = genv.intern_map
names = genv.interned_names
if names:
code.putln("")
for name in names:
code.putln(
"static PyObject *%s;" % intern_map[name])
del names[:]
def generate_py_string_decls(self, env, code):
entries = env.pystring_entries
if entries:
......@@ -878,7 +865,6 @@ class FuncDefNode(StatNode, BlockNode):
# if we supported them, which we probably won't.
# ----- Top-level constants used by this function
self.generate_interned_num_decls(lenv, code)
self.generate_interned_name_decls(lenv, code)
self.generate_py_string_decls(lenv, code)
self.generate_cached_builtins_decls(lenv, code)
#code.putln("")
......@@ -3729,8 +3715,7 @@ utility_function_predeclarations = \
#define INLINE
#endif
typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/
typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern;} __Pyx_StringTabEntry; /*proto*/
""" + """
......@@ -4306,27 +4291,6 @@ done:
#------------------------------------------------------------------------------------
init_intern_tab_utility_code = [
"""
static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/
""","""
static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) {
while (t->p) {
#if PY_MAJOR_VERSION < 3
*t->p = PyString_InternFromString(t->s);
#else
*t->p = PyString_FromString(t->s);
#endif
if (!*t->p)
return -1;
++t;
}
return 0;
}
"""]
#------------------------------------------------------------------------------------
init_string_tab_utility_code = [
"""
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
......@@ -4337,10 +4301,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
#if PY_MAJOR_VERSION < 3
*t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
#else
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
if (t->intern) {
*t->p = PyUnicode_InternFromString(t->s);
} else {
*t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
}
#endif
} else {
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
#if PY_MAJOR_VERSION < 3
if (t->intern)
*t->p = PyString_InternFromString(t->s);
else
#endif
*t->p = PyString_FromStringAndSize(t->s, t->n - 1);
}
if (!*t->p)
return -1;
......
......@@ -15,7 +15,7 @@ from TypeSlots import \
import ControlFlow
import __builtin__
identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$")
possible_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match
class Entry:
# A symbol table entry in a Scope or ModuleNamespace.
......@@ -64,7 +64,6 @@ class Entry:
# type is an extension type
# as_module None Module scope, if a cimported module
# is_inherited boolean Is an inherited attribute of an extension type
# #interned_cname string C name of interned name string
# pystring_cname string C name of Python version of string literal
# is_interned boolean For string const entries, value is interned
# used boolean
......@@ -104,7 +103,6 @@ class Entry:
in_cinclude = 0
as_module = None
is_inherited = 0
#interned_cname = None
pystring_cname = None
is_interned = 0
used = 0
......@@ -204,10 +202,10 @@ class Scope:
def __str__(self):
return "<%s %s>" % (self.__class__.__name__, self.qualified_name)
def intern(self, name):
return self.global_scope().intern(name)
def qualifying_scope(self):
return self.parent_scope
......@@ -465,14 +463,12 @@ class Scope:
# Python identifier, it will be interned.
if not entry.pystring_cname:
value = entry.init
if not entry.type.is_unicode and identifier_pattern.match(value):
entry.pystring_cname = self.intern(value)
if possible_identifier(value):
entry.is_interned = 1
else:
entry.pystring_cname = entry.cname + "p"
self.pystring_entries.append(entry)
self.global_scope().all_pystring_entries.append(entry)
entry.pystring_cname = entry.cname + "p"
self.pystring_entries.append(entry)
self.global_scope().all_pystring_entries.append(entry)
def add_py_num(self, value):
# Add an entry for an int constant.
cname = "%s%s" % (Naming.interned_num_prefix, value)
......@@ -678,7 +674,6 @@ class ModuleScope(Scope):
# pxd_file_loaded boolean Corresponding .pxd file has been processed
# cimported_modules [ModuleScope] Modules imported with cimport
# intern_map {string : string} Mapping from Python names to interned strs
# interned_names [string] Interned names pending generation of declarations
# interned_nums [int/long] Interned numeric constants
# all_pystring_entries [Entry] Python string consts from all scopes
# types_imported {PyrexType : 1} Set of types for which import code generated
......@@ -706,7 +701,6 @@ class ModuleScope(Scope):
self.pxd_file_loaded = 0
self.cimported_modules = []
self.intern_map = {}
self.interned_names = []
self.interned_nums = []
self.interned_objs = []
self.all_pystring_entries = []
......@@ -743,15 +737,11 @@ class ModuleScope(Scope):
else:
entry.is_builtin = 1
return entry
def intern(self, name):
intern_map = self.intern_map
cname = intern_map.get(name)
if not cname:
cname = Naming.interned_prefix + name
intern_map[name] = cname
self.interned_names.append(name)
return cname
string_entry = self.add_string_const(name)
self.add_py_string(string_entry)
return string_entry.pystring_cname
def find_module(self, module_name, pos):
# Find a module in the import namespace, interpreting
......@@ -832,8 +822,6 @@ class ModuleScope(Scope):
"Non-cdef global variable is not a generic Python object")
entry.is_pyglobal = 1
entry.namespace_cname = self.module_cname
#if Options.intern_names:
# entry.interned_cname = self.intern(name)
else:
entry.is_cglobal = 1
self.var_entries.append(entry)
......@@ -1151,8 +1139,6 @@ class PyClassScope(ClassScope):
cname, visibility, is_cdef)
entry.is_pyglobal = 1
entry.namespace_cname = self.class_obj_cname
#if Options.intern_names:
# entry.interned_cname = self.intern(name)
return entry
def allocate_temp(self, type):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment