From a15add64cbb2c0636dd39776da7c4e779e9bc07b Mon Sep 17 00:00:00 2001 From: Stefan Behnel <stefan_ml@behnel.de> Date: Sat, 9 Aug 2014 16:40:36 +0200 Subject: [PATCH] speed up method calls a little by inlining CPython's method_call() and avoiding duplicate args tuple packing --HG-- extra : amend_source : 52480a79feadc93a60bdeee1837a31bf3539b327 --- CHANGES.rst | 2 + Cython/Compiler/ExprNodes.py | 78 ++++++++++++++++++++++++++++++++++++ Cython/Compiler/Optimize.py | 13 +++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9f2f8ce41..bbb7b33fb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -71,6 +71,8 @@ Optimizations * Calls to ``slice()`` are translated to a straight C-API call. +* Simple Python method calls are about 10% faster. + Bugs fixed ---------- diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 0d08ef39f..416e7c4fc 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -4785,6 +4785,84 @@ class SimpleCallNode(CallNode): code.funcstate.release_temp(self.opt_arg_struct) +class PyMethodCallNode(SimpleCallNode): + # Specialised call to a (potential) PyMethodObject with non-constant argument tuple. + # Allows the self argument to be injected directly instead of repacking a tuple for it. + # + # function ExprNode the function/method object to call + # arg_tuple TupleNode the arguments for the args tuple + + subexprs = ['function', 'arg_tuple'] + is_temp = True + + def generate_evaluation_code(self, code): + code.mark_pos(self.pos) + self.allocate_temp_result(code) + + self.function.generate_evaluation_code(code) + args = self.arg_tuple.args + for arg in args: + arg.generate_evaluation_code(code) + + self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True) + function = code.funcstate.allocate_temp(py_object_type, manage_ref=False) + arg_offset = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False) + args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True) + + code.putln("%s = 0;" % arg_offset) + code.putln("%s = %s;" % (function, self.function.py_result())) + code.putln("%s = NULL;" % self_arg) + + code.putln("if (CYTHON_COMPILING_IN_CPYTHON && PyMethod_Check(%s)) {" % function) + code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function)) + # the following is always true in Py3 (kept only for safety), + # but is false for unbound methods in Py2 + code.putln("if (likely(%s)) {" % self_arg) + code.put_incref(self_arg, py_object_type) + code.putln("%s = PyMethod_GET_FUNCTION(%s);" % (function, function)) + code.putln("%s = 1;" % arg_offset) + code.putln("}") + code.putln("}") + + code.putln("%s = PyTuple_New(%d+%s); %s" % ( + args_tuple, len(args), arg_offset, + code.error_goto_if_null(args_tuple, self.pos))) + code.put_gotref(args_tuple) + + code.putln("if (%s == 1) {" % arg_offset) + code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % ( + args_tuple, self_arg, self_arg, self_arg)) + code.funcstate.release_temp(self_arg) + code.putln("}") + + for i, arg in enumerate(args): + arg.make_owned_reference(code) + code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % ( + args_tuple, i, arg_offset, arg.py_result())) + code.put_giveref(arg.py_result()) + code.funcstate.release_temp(arg_offset) + + for arg in args: + arg.generate_post_assignment_code(code) + arg.free_temps(code) + + code.globalstate.use_utility_code( + UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c")) + code.putln( + "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % ( + self.result(), + function, args_tuple, + code.error_goto_if_null(self.result(), self.pos))) + code.put_gotref(self.py_result()) + + code.put_decref_clear(args_tuple, py_object_type) + code.funcstate.release_temp(args_tuple) + code.funcstate.release_temp(function) + + self.function.generate_disposal_code(code) + self.function.free_temps(code) + + class InlinedDefNodeCallNode(CallNode): # Inline call to defnode # diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index b6b0b943e..68bd8d9a4 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -3736,6 +3736,7 @@ class FinalOptimizePhase(Visitor.CythonTransform): - eliminate None assignment and refcounting for first assignment. - isinstance -> typecheck for cdef types - eliminate checks for None and/or types that became redundant after tree changes + - replace Python function calls that look like method calls by a faster PyMethodCallNode """ def visit_SingleAssignmentNode(self, node): """Avoid redundant initialisation of local variables before their @@ -3748,8 +3749,9 @@ class FinalOptimizePhase(Visitor.CythonTransform): return node def visit_SimpleCallNode(self, node): - """Replace generic calls to isinstance(x, type) by a more efficient - type check. + """ + Replace generic calls to isinstance(x, type) by a more efficient type check. + Replace likely Python method calls by a specialised PyMethodCallNode. """ self.visitchildren(node) if node.function.type.is_cfunction and isinstance(node.function, ExprNodes.NameNode): @@ -3761,6 +3763,13 @@ class FinalOptimizePhase(Visitor.CythonTransform): node.function.type = node.function.entry.type PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type) node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr) + elif node.function.type.is_pyobject: + # we could do it for all calls, but attributes are most likely to result in a method call + if node.function.is_attribute: + if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not ( + node.arg_tuple.is_literal or node.arg_tuple.mult_factor): + node = ExprNodes.PyMethodCallNode.from_node( + node, function=node.function, arg_tuple=node.arg_tuple, type=node.type) return node def visit_PyTypeTestNode(self, node): -- 2.30.9