Commit 0f9a60d9 authored by Stefan Behnel's avatar Stefan Behnel

intern identifier names in parser to save memory

make sure all identifiers are EncodedString objects
parent cf9fc1a9
...@@ -19,6 +19,7 @@ from . import Errors ...@@ -19,6 +19,7 @@ from . import Errors
# conditional metaclass. These options are processed by CmdLine called from # conditional metaclass. These options are processed by CmdLine called from
# main() in this file. # main() in this file.
# import Parsing # import Parsing
from .StringEncoding import EncodedString
from .Scanning import PyrexScanner, FileSourceDescriptor from .Scanning import PyrexScanner, FileSourceDescriptor
from .Errors import PyrexError, CompileError, error, warning from .Errors import PyrexError, CompileError, error, warning
from .Symtab import ModuleScope from .Symtab import ModuleScope
...@@ -76,7 +77,8 @@ class Context(object): ...@@ -76,7 +77,8 @@ class Context(object):
self.cpp = cpp self.cpp = cpp
self.options = options self.options = options
self.pxds = {} # full name -> node tree self.pxds = {} # full name -> node tree
self._interned = {} # (type(value), value, *key_args) -> interned_value
standard_include_path = os.path.abspath(os.path.normpath( standard_include_path = os.path.abspath(os.path.normpath(
os.path.join(os.path.dirname(__file__), os.path.pardir, 'Includes'))) os.path.join(os.path.dirname(__file__), os.path.pardir, 'Includes')))
...@@ -93,6 +95,27 @@ class Context(object): ...@@ -93,6 +95,27 @@ class Context(object):
self.future_directives.update([print_function, unicode_literals, absolute_import, division]) self.future_directives.update([print_function, unicode_literals, absolute_import, division])
self.modules['builtins'] = self.modules['__builtin__'] self.modules['builtins'] = self.modules['__builtin__']
def intern_ustring(self, value, encoding=None):
key = (EncodedString, value, encoding)
try:
return self._interned[key]
except KeyError:
pass
value = EncodedString(value)
if encoding:
value.encoding = encoding
self._interned[key] = value
return value
def intern_value(self, value, *key):
key = (type(value), value) + key
try:
return self._interned[key]
except KeyError:
pass
self._interned[key] = value
return value
# pipeline creation functions can now be found in Pipeline.py # pipeline creation functions can now be found in Pipeline.py
def process_pxd(self, source_desc, scope, module_name): def process_pxd(self, source_desc, scope, module_name):
......
...@@ -53,7 +53,7 @@ class Ctx(object): ...@@ -53,7 +53,7 @@ class Ctx(object):
d.update(kwds) d.update(kwds)
return ctx return ctx
def p_ident(s, message = "Expected an identifier"): def p_ident(s, message="Expected an identifier"):
if s.sy == 'IDENT': if s.sy == 'IDENT':
name = s.systring name = s.systring
s.next() s.next()
...@@ -405,9 +405,9 @@ def p_trailer(s, node1): ...@@ -405,9 +405,9 @@ def p_trailer(s, node1):
return p_index(s, node1) return p_index(s, node1)
else: # s.sy == '.' else: # s.sy == '.'
s.next() s.next()
name = EncodedString( p_ident(s) ) name = p_ident(s)
return ExprNodes.AttributeNode(pos, return ExprNodes.AttributeNode(pos,
obj = node1, attribute = name) obj=node1, attribute=name)
# arglist: argument (',' argument)* [','] # arglist: argument (',' argument)* [',']
# argument: [test '='] test # Really [keyword '='] test # argument: [test '='] test # Really [keyword '='] test
...@@ -434,7 +434,7 @@ def p_call_parse_args(s, allow_genexp = True): ...@@ -434,7 +434,7 @@ def p_call_parse_args(s, allow_genexp = True):
if not arg.is_name: if not arg.is_name:
s.error("Expected an identifier before '='", s.error("Expected an identifier before '='",
pos=arg.pos) pos=arg.pos)
encoded_name = EncodedString(arg.name) encoded_name = s.context.intern_ustring(arg.name)
keyword = ExprNodes.IdentifierStringNode( keyword = ExprNodes.IdentifierStringNode(
arg.pos, value=encoded_name) arg.pos, value=encoded_name)
arg = p_test(s) arg = p_test(s)
...@@ -643,7 +643,7 @@ def p_atom(s): ...@@ -643,7 +643,7 @@ def p_atom(s):
else: else:
return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value) return ExprNodes.StringNode(pos, value = bytes_value, unicode_value = unicode_value)
elif sy == 'IDENT': elif sy == 'IDENT':
name = EncodedString( s.systring ) name = s.systring
s.next() s.next()
if name == "None": if name == "None":
return ExprNodes.NoneNode(pos) return ExprNodes.NoneNode(pos)
...@@ -1295,7 +1295,6 @@ def p_import_statement(s): ...@@ -1295,7 +1295,6 @@ def p_import_statement(s):
stats = [] stats = []
is_absolute = Future.absolute_import in s.context.future_directives is_absolute = Future.absolute_import in s.context.future_directives
for pos, target_name, dotted_name, as_name in items: for pos, target_name, dotted_name, as_name in items:
dotted_name = EncodedString(dotted_name)
if kind == 'cimport': if kind == 'cimport':
stat = Nodes.CImportStatNode( stat = Nodes.CImportStatNode(
pos, pos,
...@@ -1305,7 +1304,7 @@ def p_import_statement(s): ...@@ -1305,7 +1304,7 @@ def p_import_statement(s):
else: else:
if as_name and "." in dotted_name: if as_name and "." in dotted_name:
name_list = ExprNodes.ListNode(pos, args=[ name_list = ExprNodes.ListNode(pos, args=[
ExprNodes.IdentifierStringNode(pos, value=EncodedString("*"))]) ExprNodes.IdentifierStringNode(pos, value=s.context.intern_ustring("*"))])
else: else:
name_list = None name_list = None
stat = Nodes.SingleAssignmentNode( stat = Nodes.SingleAssignmentNode(
...@@ -1347,7 +1346,7 @@ def p_from_import_statement(s, first_statement = 0): ...@@ -1347,7 +1346,7 @@ def p_from_import_statement(s, first_statement = 0):
is_cimport = kind == 'cimport' is_cimport = kind == 'cimport'
is_parenthesized = False is_parenthesized = False
if s.sy == '*': if s.sy == '*':
imported_names = [(s.position(), "*", None, None)] imported_names = [(s.position(), s.context.intern_ustring("*"), None, None)]
s.next() s.next()
else: else:
if s.sy == '(': if s.sy == '(':
...@@ -1361,7 +1360,6 @@ def p_from_import_statement(s, first_statement = 0): ...@@ -1361,7 +1360,6 @@ def p_from_import_statement(s, first_statement = 0):
imported_names.append(p_imported_name(s, is_cimport)) imported_names.append(p_imported_name(s, is_cimport))
if is_parenthesized: if is_parenthesized:
s.expect(')') s.expect(')')
dotted_name = EncodedString(dotted_name)
if dotted_name == '__future__': if dotted_name == '__future__':
if not first_statement: if not first_statement:
s.error("from __future__ imports must occur at the beginning of the file") s.error("from __future__ imports must occur at the beginning of the file")
...@@ -1388,16 +1386,12 @@ def p_from_import_statement(s, first_statement = 0): ...@@ -1388,16 +1386,12 @@ def p_from_import_statement(s, first_statement = 0):
imported_name_strings = [] imported_name_strings = []
items = [] items = []
for (name_pos, name, as_name, kind) in imported_names: for (name_pos, name, as_name, kind) in imported_names:
encoded_name = EncodedString(name)
imported_name_strings.append( imported_name_strings.append(
ExprNodes.IdentifierStringNode(name_pos, value = encoded_name)) ExprNodes.IdentifierStringNode(name_pos, value=name))
items.append( items.append(
(name, (name, ExprNodes.NameNode(name_pos, name=as_name or name)))
ExprNodes.NameNode(name_pos,
name = as_name or name)))
import_list = ExprNodes.ListNode( import_list = ExprNodes.ListNode(
imported_names[0][0], args = imported_name_strings) imported_names[0][0], args=imported_name_strings)
dotted_name = EncodedString(dotted_name)
return Nodes.FromImportStatNode(pos, return Nodes.FromImportStatNode(pos,
module = ExprNodes.ImportNode(dotted_name_pos, module = ExprNodes.ImportNode(dotted_name_pos,
module_name = ExprNodes.IdentifierStringNode(pos, value = dotted_name), module_name = ExprNodes.IdentifierStringNode(pos, value = dotted_name),
...@@ -1405,8 +1399,8 @@ def p_from_import_statement(s, first_statement = 0): ...@@ -1405,8 +1399,8 @@ def p_from_import_statement(s, first_statement = 0):
name_list = import_list), name_list = import_list),
items = items) items = items)
imported_name_kinds = cython.declare(
set, set(['class', 'struct', 'union'])) imported_name_kinds = cython.declare(set, set(['class', 'struct', 'union']))
def p_imported_name(s, is_cimport): def p_imported_name(s, is_cimport):
pos = s.position() pos = s.position()
...@@ -1418,6 +1412,7 @@ def p_imported_name(s, is_cimport): ...@@ -1418,6 +1412,7 @@ def p_imported_name(s, is_cimport):
as_name = p_as_name(s) as_name = p_as_name(s)
return (pos, name, as_name, kind) return (pos, name, as_name, kind)
def p_dotted_name(s, as_allowed): def p_dotted_name(s, as_allowed):
pos = s.position() pos = s.position()
target_name = p_ident(s) target_name = p_ident(s)
...@@ -1428,7 +1423,8 @@ def p_dotted_name(s, as_allowed): ...@@ -1428,7 +1423,8 @@ def p_dotted_name(s, as_allowed):
names.append(p_ident(s)) names.append(p_ident(s))
if as_allowed: if as_allowed:
as_name = p_as_name(s) as_name = p_as_name(s)
return (pos, target_name, u'.'.join(names), as_name) return (pos, target_name, s.context.intern_ustring(u'.'.join(names)), as_name)
def p_as_name(s): def p_as_name(s):
if s.sy == 'IDENT' and s.systring == 'as': if s.sy == 'IDENT' and s.systring == 'as':
...@@ -1437,6 +1433,7 @@ def p_as_name(s): ...@@ -1437,6 +1433,7 @@ def p_as_name(s):
else: else:
return None return None
def p_assert_statement(s): def p_assert_statement(s):
# s.sy == 'assert' # s.sy == 'assert'
pos = s.position() pos = s.position()
...@@ -1449,6 +1446,7 @@ def p_assert_statement(s): ...@@ -1449,6 +1446,7 @@ def p_assert_statement(s):
value = None value = None
return Nodes.AssertStatNode(pos, cond = cond, value = value) return Nodes.AssertStatNode(pos, cond = cond, value = value)
statement_terminators = cython.declare(set, set([';', 'NEWLINE', 'EOF'])) statement_terminators = cython.declare(set, set([';', 'NEWLINE', 'EOF']))
def p_if_statement(s): def p_if_statement(s):
...@@ -1993,8 +1991,7 @@ def p_positional_and_keyword_args(s, end_sy_set, templates = None): ...@@ -1993,8 +1991,7 @@ def p_positional_and_keyword_args(s, end_sy_set, templates = None):
arg = Nodes.CComplexBaseTypeNode(base_type.pos, arg = Nodes.CComplexBaseTypeNode(base_type.pos,
base_type = base_type, declarator = declarator) base_type = base_type, declarator = declarator)
parsed_type = True parsed_type = True
keyword_node = ExprNodes.IdentifierStringNode( keyword_node = ExprNodes.IdentifierStringNode(arg.pos, value=ident)
arg.pos, value = EncodedString(ident))
keyword_args.append((keyword_node, arg)) keyword_args.append((keyword_node, arg))
was_keyword = True was_keyword = True
...@@ -2105,7 +2102,7 @@ def p_c_simple_base_type(s, self_flag, nonempty, templates = None): ...@@ -2105,7 +2102,7 @@ def p_c_simple_base_type(s, self_flag, nonempty, templates = None):
s.next() s.next()
elif looking_at_dotted_name(s): elif looking_at_dotted_name(s):
#print "p_c_simple_base_type: looking_at_type_name at", s.position() #print "p_c_simple_base_type: looking_at_type_name at", s.position()
name = s.systring name = s.context.intern_ustring(s.systring)
s.next() s.next()
while s.sy == '.': while s.sy == '.':
module_path.append(name) module_path.append(name)
...@@ -2361,7 +2358,7 @@ def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0, ...@@ -2361,7 +2358,7 @@ def p_c_declarator(s, ctx = Ctx(), empty = 0, is_type = 0, cmethod_flag = 0,
if s.sy == '(': if s.sy == '(':
s.next() s.next()
if s.sy == ')' or looking_at_name(s): if s.sy == ')' or looking_at_name(s):
base = Nodes.CNameDeclaratorNode(pos, name = EncodedString(u""), cname = None) base = Nodes.CNameDeclaratorNode(pos, name=s.context.intern_ustring(u""), cname=None)
result = p_c_func_declarator(s, pos, ctx, base, cmethod_flag) result = p_c_func_declarator(s, pos, ctx, base, cmethod_flag)
else: else:
result = p_c_declarator(s, ctx, empty = empty, is_type = is_type, result = p_c_declarator(s, ctx, empty = empty, is_type = is_type,
...@@ -2454,7 +2451,7 @@ def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag, ...@@ -2454,7 +2451,7 @@ def p_c_simple_declarator(s, ctx, empty, is_type, cmethod_flag,
else: else:
rhs = None rhs = None
if s.sy == 'IDENT': if s.sy == 'IDENT':
name = EncodedString(s.systring) name = s.systring
if empty: if empty:
error(s.position(), "Declarator should be empty") error(s.position(), "Declarator should be empty")
s.next() s.next()
...@@ -2913,11 +2910,10 @@ def p_decorators(s): ...@@ -2913,11 +2910,10 @@ def p_decorators(s):
s.next() s.next()
decstring = p_dotted_name(s, as_allowed=0)[2] decstring = p_dotted_name(s, as_allowed=0)[2]
names = decstring.split('.') names = decstring.split('.')
decorator = ExprNodes.NameNode(pos, name=EncodedString(names[0])) decorator = ExprNodes.NameNode(pos, name=s.context.intern_ustring(names[0]))
for name in names[1:]: for name in names[1:]:
decorator = ExprNodes.AttributeNode(pos, decorator = ExprNodes.AttributeNode(
attribute=EncodedString(name), pos, attribute=s.context.intern_ustring(name), obj=decorator)
obj=decorator)
if s.sy == '(': if s.sy == '(':
decorator = p_call(s, decorator) decorator = p_call(s, decorator)
decorators.append(Nodes.DecoratorNode(pos, decorator=decorator)) decorators.append(Nodes.DecoratorNode(pos, decorator=decorator))
...@@ -2928,7 +2924,7 @@ def p_def_statement(s, decorators=None): ...@@ -2928,7 +2924,7 @@ def p_def_statement(s, decorators=None):
# s.sy == 'def' # s.sy == 'def'
pos = s.position() pos = s.position()
s.next() s.next()
name = EncodedString( p_ident(s) ) name = p_ident(s)
s.expect('(') s.expect('(')
args, star_arg, starstar_arg = p_varargslist(s, terminator=')') args, star_arg, starstar_arg = p_varargslist(s, terminator=')')
s.expect(')') s.expect(')')
...@@ -2977,8 +2973,8 @@ def p_class_statement(s, decorators): ...@@ -2977,8 +2973,8 @@ def p_class_statement(s, decorators):
# s.sy == 'class' # s.sy == 'class'
pos = s.position() pos = s.position()
s.next() s.next()
class_name = EncodedString( p_ident(s) ) class_name = EncodedString(p_ident(s))
class_name.encoding = s.source_encoding class_name.encoding = s.source_encoding # FIXME: why is this needed?
arg_tuple = None arg_tuple = None
keyword_dict = None keyword_dict = None
starstar_arg = None starstar_arg = None
......
...@@ -421,14 +421,11 @@ class PyrexScanner(Scanner): ...@@ -421,14 +421,11 @@ class PyrexScanner(Scanner):
if systring in self.keywords: if systring in self.keywords:
if systring == u'print' and print_function in self.context.future_directives: if systring == u'print' and print_function in self.context.future_directives:
self.keywords.discard('print') self.keywords.discard('print')
systring = EncodedString(systring)
elif systring == u'exec' and self.context.language_level >= 3: elif systring == u'exec' and self.context.language_level >= 3:
self.keywords.discard('exec') self.keywords.discard('exec')
systring = EncodedString(systring)
else: else:
sy = systring sy = systring
else: systring = self.context.intern_ustring(systring)
systring = EncodedString(systring)
self.sy = sy self.sy = sy
self.systring = systring self.systring = systring
if False: # debug_scanner: if False: # debug_scanner:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment