Merge pull request #1181 from kmod/perf8

Some misc performance optimizations

Merge pull request #1181 from kmod/perf8
Some misc performance optimizations
7ac0fc69 · Kevin Modzelewski · 147549c9 · 3b3e5863 · 7ac0fc69 · 7ac0fc69
Commit 7ac0fc69 authored May 18, 2016 by Kevin Modzelewski
Showing with 93 additions and 63 deletions

from_cpython/Objects/obmalloc.c from_cpython/Objects/obmalloc.c +19 -11

from_cpython/Objects/unicodeobject.c from_cpython/Objects/unicodeobject.c +74 -3

src/runtime/types.cpp src/runtime/types.cpp +0 -49

No files found.
--- a/from_cpython/Objects/obmalloc.c
+++ b/from_cpython/Objects/obmalloc.c
@@ -9,9 +9,13 @@
 #endif
 #endif
+#define UNLIKELY(value) __builtin_expect((value), 0)
+#define LIKELY(value) __builtin_expect((value), 1)
 #ifdef WITH_VALGRIND
 #include <valgrind/valgrind.h>
+#if 0
 /* If we're using GCC, use __builtin_expect() to reduce overhead of
   the valgrind checks */
 #if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
@@ -19,6 +23,7 @@
 #else
 #  define UNLIKELY(value) (value)
 #endif
+#endif
 /* -1 indicates that we haven't checked that we're running on valgrind yet. */
 static int running_on_valgrind = -1;
@@ -778,26 +783,17 @@ PyObject_Malloc(size_t nbytes)
        goto redirect;
 #endif
-    /*
-     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
-     * Most python internals blindly use a signed Py_ssize_t to track
-     * things without checking for overflows or negatives.
-     * As size_t is unsigned, checking for nbytes < 0 is not required.
-     */
-    if (nbytes > PY_SSIZE_T_MAX)
-        return NULL;
    /*
     * This implicitly redirects malloc(0).
     */
-    if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
+    if (LIKELY((nbytes - 1) < SMALL_REQUEST_THRESHOLD)) {
        LOCK();
        /*
         * Most frequent paths first
         */
        size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
        pool = usedpools[size + size];
-        if (pool != pool->nextpool) {
+        if (LIKELY(pool != pool->nextpool)) {
            /*
             * There is a used pool for this size class.
             * Pick up the head block of its free list.
@@ -950,6 +946,18 @@ PyObject_Malloc(size_t nbytes)
        goto init_pool;
    }
+    // Pyston change: move this unlikely case below the likely one.
+    // This is ok because the two cases don't overlap.
+    /*
+     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
+     * Most python internals blindly use a signed Py_ssize_t to track
+     * things without checking for overflows or negatives.
+     * As size_t is unsigned, checking for nbytes < 0 is not required.
+     */
+    if (UNLIKELY(nbytes > PY_SSIZE_T_MAX))
+        return NULL;
    /* The small block allocator ends here. */
 redirect:

--- a/from_cpython/Objects/unicodeobject.c
+++ b/from_cpython/Objects/unicodeobject.c
@@ -51,6 +51,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include <windows.h>
 #endif
+#define UNLIKELY(value) __builtin_expect((value), 0)
+#define LIKELY(value) __builtin_expect((value), 1)
 /* Limit for the Unicode object free list */
 #define PyUnicode_MAXFREELIST       1024
@@ -108,9 +111,8 @@ PyUnicodeObject *unicode_empty = NULL;
            Py_INCREF(unicode_empty);                   \
        else {                                          \
            unicode_empty = _PyUnicode_New(0);          \
-            if (unicode_empty != NULL) {                \
+            if (unicode_empty != NULL)                  \
                Py_INCREF(unicode_empty);               \
-            }                                           \
        }                                               \
        return (PyObject *)unicode_empty;               \
    } while (0)
@@ -316,7 +318,76 @@ int unicode_resize(register PyUnicodeObject *unicode,
 */
-extern PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
+static
+PyUnicodeObject *_PyUnicode_New(Py_ssize_t length)
+{
+    register PyUnicodeObject *unicode;
+    /* Optimization for empty strings */
+    if (UNLIKELY(length == 0 && unicode_empty != NULL)) {
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+    /* Ensure we won't overflow the size. */
+    if (UNLIKELY(length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1))) {
+        return (PyUnicodeObject *)PyErr_NoMemory();
+    }
+    /* Unicode freelist & memory allocation */
+    if (LIKELY((intptr_t)free_list)) {
+        unicode = free_list;
+        free_list = *(PyUnicodeObject **)unicode;
+        numfree--;
+        if ((intptr_t)unicode->str) {
+            /* Keep-Alive optimization: we only upsize the buffer,
+               never downsize it. */
+            if ((unicode->length < length) &&
+                unicode_resize(unicode, length) < 0) {
+                PyObject_DEL(unicode->str);
+                unicode->str = NULL;
+            }
+        }
+        else {
+            size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+            unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
+        }
+        PyObject_INIT(unicode, &PyUnicode_Type);
+    }
+    else {
+        size_t new_size;
+        unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
+        if (unicode == NULL)
+            return NULL;
+        new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+        unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
+    }
+    if (UNLIKELY(!unicode->str)) {
+        PyErr_NoMemory();
+        goto onError;
+    }
+    /* Initialize the first element to guard against cases where
+     * the caller fails before initializing str -- unicode_resize()
+     * reads str[0], and the Keep-Alive optimization can keep memory
+     * allocated for str alive across a call to unicode_dealloc(unicode).
+     * We don't want unicode_resize to read uninitialized memory in
+     * that case.
+     */
+    unicode->str[0] = 0;
+    unicode->str[length] = 0;
+    unicode->length = length;
+    unicode->hash = -1;
+    unicode->defenc = NULL;
+    return unicode;
+  onError:
+    /* XXX UNREF/NEWREF interface should be more symmetrical */
+    _Py_DEC_REFTOTAL;
+    _Py_ForgetReference((PyObject *)unicode);
+    PyObject_Del(unicode);
+    return NULL;
+}
 static
 void unicode_dealloc(register PyUnicodeObject *unicode)

--- a/src/runtime/types.cpp
+++ b/src/runtime/types.cpp
@@ -3632,55 +3632,6 @@ out:
    return result;
 }
-extern "C" PyUnicodeObject* unicode_empty;
-extern "C" PyUnicodeObject* _PyUnicode_New(Py_ssize_t length) noexcept {
-    PyUnicodeObject* unicode;
-    /* Optimization for empty strings */
-    if (length == 0 && unicode_empty != NULL) {
-        Py_INCREF(unicode_empty);
-        return unicode_empty;
-    }
-    /* Ensure we won't overflow the size. */
-    if (length > ((PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) {
-        return (PyUnicodeObject*)PyErr_NoMemory();
-    }
-    // Pyston change: allocate ->str first, so that if this allocation
-    // causes a collection, we don't see a half-created unicode object:
-    size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
-    unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
-    if (unicode == NULL)
-        return NULL;
-    unicode->str = (Py_UNICODE*)PyObject_MALLOC(new_size);
-    if (!unicode->str) {
-        Py_DECREF(unicode);
-        return (PyUnicodeObject*)PyErr_NoMemory();
-    }
-#if STAT_ALLOCATIONS
-    {
-        size_t size = sizeof(PyUnicodeObject);
-        ALLOC_STATS(unicode_cls);
-    }
-#endif
-    /* Initialize the first element to guard against cases where
-     * the caller fails before initializing str -- unicode_resize()
-     * reads str[0], and the Keep-Alive optimization can keep memory
-     * allocated for str alive across a call to unicode_dealloc(unicode).
-     * We don't want unicode_resize to read uninitialized memory in
-     * that case.
-     */
-    unicode->str[0] = 0;
-    unicode->str[length] = 0;
-    unicode->length = length;
-    unicode->hash = -1;
-    unicode->defenc = NULL;
-    return unicode;
-}
 void dealloc_null(Box* box) {
    assert(box->cls->tp_del == NULL);
 }