From a15add64cbb2c0636dd39776da7c4e779e9bc07b Mon Sep 17 00:00:00 2001
From: Stefan Behnel <stefan_ml@behnel.de>
Date: Sat, 9 Aug 2014 16:40:36 +0200
Subject: [PATCH] speed up method calls a little by inlining CPython's
 method_call() and avoiding duplicate args tuple packing

--HG--
extra : amend_source : 52480a79feadc93a60bdeee1837a31bf3539b327
---
 CHANGES.rst                  |  2 +
 Cython/Compiler/ExprNodes.py | 78 ++++++++++++++++++++++++++++++++++++
 Cython/Compiler/Optimize.py  | 13 +++++-
 3 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 9f2f8ce41..bbb7b33fb 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -71,6 +71,8 @@ Optimizations
 
 * Calls to ``slice()`` are translated to a straight C-API call.
 
+* Simple Python method calls are about 10% faster.
+
 Bugs fixed
 ----------
 
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py
index 0d08ef39f..416e7c4fc 100644
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -4785,6 +4785,84 @@ class SimpleCallNode(CallNode):
                 code.funcstate.release_temp(self.opt_arg_struct)
 
 
+class PyMethodCallNode(SimpleCallNode):
+    # Specialised call to a (potential) PyMethodObject with non-constant argument tuple.
+    # Allows the self argument to be injected directly instead of repacking a tuple for it.
+    #
+    # function    ExprNode      the function/method object to call
+    # arg_tuple   TupleNode     the arguments for the args tuple
+
+    subexprs = ['function', 'arg_tuple']
+    is_temp = True
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        self.function.generate_evaluation_code(code)
+        args = self.arg_tuple.args
+        for arg in args:
+            arg.generate_evaluation_code(code)
+
+        self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        function = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+        arg_offset = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+        args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+
+        code.putln("%s = 0;" % arg_offset)
+        code.putln("%s = %s;" % (function, self.function.py_result()))
+        code.putln("%s = NULL;" % self_arg)
+
+        code.putln("if (CYTHON_COMPILING_IN_CPYTHON && PyMethod_Check(%s)) {" % function)
+        code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function))
+        # the following is always true in Py3 (kept only for safety),
+        # but is false for unbound methods in Py2
+        code.putln("if (likely(%s)) {" % self_arg)
+        code.put_incref(self_arg, py_object_type)
+        code.putln("%s = PyMethod_GET_FUNCTION(%s);" % (function, function))
+        code.putln("%s = 1;" % arg_offset)
+        code.putln("}")
+        code.putln("}")
+
+        code.putln("%s = PyTuple_New(%d+%s); %s" % (
+            args_tuple, len(args), arg_offset,
+            code.error_goto_if_null(args_tuple, self.pos)))
+        code.put_gotref(args_tuple)
+
+        code.putln("if (%s == 1) {" % arg_offset)
+        code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % (
+            args_tuple, self_arg, self_arg, self_arg))
+        code.funcstate.release_temp(self_arg)
+        code.putln("}")
+
+        for i, arg in enumerate(args):
+            arg.make_owned_reference(code)
+            code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
+                args_tuple, i, arg_offset, arg.py_result()))
+            code.put_giveref(arg.py_result())
+        code.funcstate.release_temp(arg_offset)
+
+        for arg in args:
+            arg.generate_post_assignment_code(code)
+            arg.free_temps(code)
+
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
+        code.putln(
+            "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
+                self.result(),
+                function, args_tuple,
+                code.error_goto_if_null(self.result(), self.pos)))
+        code.put_gotref(self.py_result())
+
+        code.put_decref_clear(args_tuple, py_object_type)
+        code.funcstate.release_temp(args_tuple)
+        code.funcstate.release_temp(function)
+
+        self.function.generate_disposal_code(code)
+        self.function.free_temps(code)
+
+
 class InlinedDefNodeCallNode(CallNode):
     #  Inline call to defnode
     #
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py
index b6b0b943e..68bd8d9a4 100644
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -3736,6 +3736,7 @@ class FinalOptimizePhase(Visitor.CythonTransform):
         - eliminate None assignment and refcounting for first assignment.
         - isinstance -> typecheck for cdef types
         - eliminate checks for None and/or types that became redundant after tree changes
+        - replace Python function calls that look like method calls by a faster PyMethodCallNode
     """
     def visit_SingleAssignmentNode(self, node):
         """Avoid redundant initialisation of local variables before their
@@ -3748,8 +3749,9 @@ class FinalOptimizePhase(Visitor.CythonTransform):
         return node
 
     def visit_SimpleCallNode(self, node):
-        """Replace generic calls to isinstance(x, type) by a more efficient
-        type check.
+        """
+        Replace generic calls to isinstance(x, type) by a more efficient type check.
+        Replace likely Python method calls by a specialised PyMethodCallNode.
         """
         self.visitchildren(node)
         if node.function.type.is_cfunction and isinstance(node.function, ExprNodes.NameNode):
@@ -3761,6 +3763,13 @@ class FinalOptimizePhase(Visitor.CythonTransform):
                     node.function.type = node.function.entry.type
                     PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
                     node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
+        elif node.function.type.is_pyobject:
+            # we could do it for all calls, but attributes are most likely to result in a method call
+            if node.function.is_attribute:
+                if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
+                        node.arg_tuple.is_literal or node.arg_tuple.mult_factor):
+                    node = ExprNodes.PyMethodCallNode.from_node(
+                        node, function=node.function, arg_tuple=node.arg_tuple, type=node.type)
         return node
 
     def visit_PyTypeTestNode(self, node):
-- 
2.30.9