Fix several issues in overflow checks (GH-3591)

* Fix #3588: Make existing overflow code safe Signed overflow is undefined behaviour in C and compilers can and do optimized on that basis. * Speed up our overflow impls Note this is primarily based on performance of compilers which do not support __builtin_add_overflow (i.e., not Clang >= 3.4 & gcc >= 5), mostly looking at several gcc 4 releases (used by older, supported, RHEL releases and Debian 8 "Jessie") and MSVC. * Use __builtin_XXX_overflow if available This is much quicker in general, as these all just then read the overflow flag from the status register.

Fix several issues in overflow checks (GH-3591)
* Fix #3588: Make existing overflow code safe Signed overflow is undefined behaviour in C and compilers can and do optimized on that basis. * Speed up our overflow impls Note this is primarily based on performance of compilers which do not support __builtin_add_overflow (i.e., not Clang >= 3.4 & gcc >= 5), mostly looking at several gcc 4 releases (used by older, supported, RHEL releases and Debian 8 "Jessie") and MSVC. * Use __builtin_XXX_overflow if available This is much quicker in general, as these all just then read the overflow flag from the status register.
92a0b75b · Sam Sneddon · GitHub · 78b799d0 · 92a0b75b · 92a0b75b
Commit 92a0b75b authored May 18, 2020 by Sam Sneddon Committed by GitHub May 18, 2020
Show whitespace changes
Inline Side-by-side

Showing with 148 additions and 52 deletions

Cython/Utility/Overflow.c Cython/Utility/Overflow.c +134 -50

tests/run/overflow_check.pxi tests/run/overflow_check.pxi +14 -2

No files found.
--- a/Cython/Utility/Overflow.c
+++ b/Cython/Utility/Overflow.c
@@ -46,6 +46,24 @@ static int __Pyx_check_twos_complement(void) {
 #define __Pyx_div_no_overflow(a, b, overflow) ((a) / (b))
 #define __Pyx_div_const_no_overflow(a, b, overflow) ((a) / (b))
+#if defined(__has_builtin)
+#  if __has_builtin(__builtin_add_overflow) && !defined(__ibmxl__)
+#    define __PYX_HAVE_BUILTIN_OVERFLOW
+#  endif
+#elif defined(__GNUC__) && (__GNUC__ >= 5) && (!defined(__INTEL_COMPILER) || (__INTEL_COMPILER >= 1800))
+#  define __PYX_HAVE_BUILTIN_OVERFLOW
+#endif
+#if defined(__GNUC__)
+#  define __Pyx_is_constant(x) (__builtin_constant_p(x))
+#elif (defined(__has_builtin))
+#  if __has_builtin(__builtin_constant_p))
+#    define __Pyx_is_constant(x) (__builtin_constant_p(x))
+#  endif
+#else
+#  define __Pyx_is_constant(x) (0)
+#endif
 /////////////// Common.init ///////////////
 //@substitute: naming
@@ -64,11 +82,37 @@ static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {
 // Use these when b is known at compile time.
 #define __Pyx_add_const_{{NAME}}_checking_overflow __Pyx_add_{{NAME}}_checking_overflow
 #define __Pyx_sub_const_{{NAME}}_checking_overflow __Pyx_sub_{{NAME}}_checking_overflow
+#if defined(__PYX_HAVE_BUILTIN_OVERFLOW)
+#define __Pyx_mul_const_{{NAME}}_checking_overflow __Pyx_mul_{{NAME}}_checking_overflow
+#else
 static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} constant, int *overflow);
+#endif
 #define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow
 /////////////// BaseCaseUnsigned ///////////////
+#if defined(__PYX_HAVE_BUILTIN_OVERFLOW)
+static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    {{UINT}} result;
+    *overflow |= __builtin_add_overflow(a, b, &result);
+    return result;
+}
+static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    {{UINT}} result;
+    *overflow |= __builtin_sub_overflow(a, b, &result);
+    return result;
+}
+static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
+    {{UINT}} result;
+    *overflow |= __builtin_mul_overflow(a, b, &result);
+    return result;
+}
+#else
 static CYTHON_INLINE {{UINT}} __Pyx_add_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
    {{UINT}} r = a + b;
    *overflow |= r < a;
@@ -82,7 +126,12 @@ static CYTHON_INLINE {{UINT}} __Pyx_sub_{{NAME}}_checking_overflow({{UINT}} a, {
 }
 static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
-    if ((sizeof({{UINT}}) < sizeof(unsigned long))) {
+    // if we have a constant, use the constant version
+    if (__Pyx_is_constant(b)) {
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(a, b, overflow);
+    } else if (__Pyx_is_constant(a)) {
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(b, a, overflow);
+    } else if ((sizeof({{UINT}}) < sizeof(unsigned long))) {
        unsigned long big_r = ((unsigned long) a) * ((unsigned long) b);
        {{UINT}} r = ({{UINT}}) big_r;
        *overflow |= big_r != r;
@@ -95,21 +144,26 @@ static CYTHON_INLINE {{UINT}} __Pyx_mul_{{NAME}}_checking_overflow({{UINT}} a, {
        return r;
 #endif
    } else {
-        {{UINT}} prod = a * b;
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(a, b, overflow);
-        double dprod = ((double) a) * ((double) b);
-        // Overflow results in an error of at least 2^sizeof(UINT),
-        // whereas rounding represents an error on the order of 2^(sizeof(UINT)-53).
-        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{UINT}}) / 2);
-        return prod;
    }
 }
 static CYTHON_INLINE {{UINT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
-    if (b > 1) {
+    // note that deliberately the overflow check is written such that it divides by b; this
-        *overflow |= a > __PYX_MAX({{UINT}}) / b;
+    // function is used when b is a constant thus the compiler should be able to eliminate the
+    // (very slow on most CPUs!) division operation
+    if (__Pyx_is_constant(a) && !__Pyx_is_constant(b)) {
+        // if a is a compile-time constant and b isn't, swap them
+        {{UINT}} temp = b;
+        b = a;
+        a = temp;
    }
-    return a * b;
+    {{UINT}} prod = a * b;
+    if (b != 0)
+        *overflow |= a > (__PYX_MAX({{UINT}}) / b);
+    return prod;
 }
+#endif // __PYX_HAVE_BUILTIN_OVERFLOW
 static CYTHON_INLINE {{UINT}} __Pyx_div_{{NAME}}_checking_overflow({{UINT}} a, {{UINT}} b, int *overflow) {
@@ -130,13 +184,39 @@ static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{I
 // Use when b is known at compile time.
-static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+#define __Pyx_add_const_{{NAME}}_checking_overflow __Pyx_add_{{NAME}}_checking_overflow
-static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow);
+#define __Pyx_sub_const_{{NAME}}_checking_overflow __Pyx_sub_{{NAME}}_checking_overflow
+#if defined(__PYX_HAVE_BUILTIN_OVERFLOW)
+#define __Pyx_mul_const_{{NAME}}_checking_overflow __Pyx_mul_{{NAME}}_checking_overflow
+#else
 static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} constant, int *overflow);
+#endif
 #define __Pyx_div_const_{{NAME}}_checking_overflow __Pyx_div_{{NAME}}_checking_overflow
 /////////////// BaseCaseSigned ///////////////
+#if defined(__PYX_HAVE_BUILTIN_OVERFLOW)
+static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    {{INT}} result;
+    *overflow |= __builtin_add_overflow(a, b, &result);
+    return result;
+}
+static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    {{INT}} result;
+    *overflow |= __builtin_sub_overflow(a, b, &result);
+    return result;
+}
+static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    {{INT}} result;
+    *overflow |= __builtin_mul_overflow(a, b, &result);
+    return result;
+}
+#else
 static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
    if ((sizeof({{INT}}) < sizeof(long))) {
        long big_r = ((long) a) + ((long) b);
@@ -151,40 +231,33 @@ static CYTHON_INLINE {{INT}} __Pyx_add_{{NAME}}_checking_overflow({{INT}} a, {{I
        return r;
 #endif
    } else {
-        // Signed overflow undefined, but unsigned overflow is well defined.
+        // Signed overflow undefined, but unsigned overflow is well defined. Casting is
-        {{INT}} r = ({{INT}}) ((unsigned {{INT}}) a + (unsigned {{INT}}) b);
+        // implementation-defined, but we assume two's complement (see __Pyx_check_twos_complement
+        // above), and arithmetic in two's-complement is the same as unsigned arithmetic.
+        unsigned {{INT}} r = (unsigned {{INT}}) a + (unsigned {{INT}}) b;
        // Overflow happened if the operands have the same sign, but the result
        // has opposite sign.
-        // sign(a) == sign(b) != sign(r)
+        *overflow |= (((unsigned {{INT}})a ^ r) & ((unsigned {{INT}})b ^ r)) >> (8 * sizeof({{INT}}) - 1);
-        {{INT}} sign_a = __PYX_SIGN_BIT({{INT}}) & a;
+        return ({{INT}}) r;
-        {{INT}} sign_b = __PYX_SIGN_BIT({{INT}}) & b;
-        {{INT}} sign_r = __PYX_SIGN_BIT({{INT}}) & r;
-        *overflow |= (sign_a == sign_b) & (sign_a != sign_r);
-        return r;
-    }
-}
-static CYTHON_INLINE {{INT}} __Pyx_add_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
-    if (b > 0) {
-        *overflow |= a > __PYX_MAX({{INT}}) - b;
-    } else if (b < 0) {
-        *overflow |= a < __PYX_MIN({{INT}}) - b;
    }
-    return a + b;
 }
 static CYTHON_INLINE {{INT}} __Pyx_sub_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
-    *overflow |= b == __PYX_MIN({{INT}});
+    // Compilers don't handle widening as well in the subtraction case, so don't bother
-    return __Pyx_add_{{NAME}}_checking_overflow(a, -b, overflow);
+    unsigned {{INT}} r = (unsigned {{INT}}) a - (unsigned {{INT}}) b;
-}
+    // Overflow happened if the operands differing signs, and the result
+    // has opposite sign to a.
-static CYTHON_INLINE {{INT}} __Pyx_sub_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    *overflow |= (((unsigned {{INT}})a ^ (unsigned {{INT}})b) & ((unsigned {{INT}})a ^ r)) >> (8 * sizeof({{INT}}) - 1);
-    *overflow |= b == __PYX_MIN({{INT}});
+    return ({{INT}}) r;
-    return __Pyx_add_const_{{NAME}}_checking_overflow(a, -b, overflow);
 }
 static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
-    if ((sizeof({{INT}}) < sizeof(long))) {
+    // if we have a constant, use the constant version
+    if (__Pyx_is_constant(b)) {
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(a, b, overflow);
+    } else if (__Pyx_is_constant(a)) {
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(b, a, overflow);
+    } else if ((sizeof({{INT}}) < sizeof(long))) {
        long big_r = ((long) a) * ((long) b);
        {{INT}} r = ({{INT}}) big_r;
        *overflow |= big_r != r;
@@ -197,16 +270,20 @@ static CYTHON_INLINE {{INT}} __Pyx_mul_{{NAME}}_checking_overflow({{INT}} a, {{I
        return ({{INT}}) r;
 #endif
    } else {
-        {{INT}} prod = a * b;
+        return __Pyx_mul_const_{{NAME}}_checking_overflow(a, b, overflow);
-        double dprod = ((double) a) * ((double) b);
-        // Overflow results in an error of at least 2^sizeof(INT),
-        // whereas rounding represents an error on the order of 2^(sizeof(INT)-53).
-        *overflow |= fabs(dprod - prod) > (__PYX_MAX({{INT}}) / 2);
-        return prod;
    }
 }
 static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
+    // note that deliberately all these comparisons are written such that they divide by b; this
+    // function is used when b is a constant thus the compiler should be able to eliminate the
+    // (very slow on most CPUs!) division operations
+    if (__Pyx_is_constant(a) && !__Pyx_is_constant(b)) {
+        // if a is a compile-time constant and b isn't, swap them
+        {{INT}} temp = b;
+        b = a;
+        a = temp;
+    }
    if (b > 1) {
        *overflow |= a > __PYX_MAX({{INT}}) / b;
        *overflow |= a < __PYX_MIN({{INT}}) / b;
@@ -216,16 +293,17 @@ static CYTHON_INLINE {{INT}} __Pyx_mul_const_{{NAME}}_checking_overflow({{INT}}
        *overflow |= a > __PYX_MIN({{INT}}) / b;
        *overflow |= a < __PYX_MAX({{INT}}) / b;
    }
-    return a * b;
+    return ({{INT}}) (((unsigned {{INT}})a) * ((unsigned {{INT}}) b));
 }
+#endif  // defined(__PYX_HAVE_BUILTIN_OVERFLOW)
 static CYTHON_INLINE {{INT}} __Pyx_div_{{NAME}}_checking_overflow({{INT}} a, {{INT}} b, int *overflow) {
    if (b == 0) {
        *overflow |= 1;
        return 0;
    }
-    *overflow |= (a == __PYX_MIN({{INT}})) & (b == -1);
+    *overflow |= a == __PYX_MIN({{INT}}) && b == -1;
-    return a / b;
+    return ({{INT}}) ((unsigned {{INT}}) a / (unsigned {{INT}}) b);
 }
@@ -293,12 +371,18 @@ static CYTHON_INLINE {{TYPE}} __Pyx_{{BINOP}}_{{NAME}}_checking_overflow({{TYPE}
 /////////////// LeftShift.proto ///////////////
 static CYTHON_INLINE {{TYPE}} __Pyx_lshift_{{NAME}}_checking_overflow({{TYPE}} a, {{TYPE}} b, int *overflow) {
-    *overflow |=
+    int overflow_check =
 #if {{SIGNED}}
-        (b < 0) |
+        (a < 0) || (b < 0) ||
 #endif
-        (b > ({{TYPE}}) (8 * sizeof({{TYPE}}))) | (a > (__PYX_MAX({{TYPE}}) >> b));
+        // the following must be a _logical_ OR as the RHS is undefined if the LHS is true
+        (b >= ({{TYPE}}) (8 * sizeof({{TYPE}}))) || (a > (__PYX_MAX({{TYPE}}) >> b));
+    if (overflow_check) {
+        *overflow |= 1;
+        return 0;
+    } else {
        return a << b;
+    }
 }
 #define __Pyx_lshift_const_{{NAME}}_checking_overflow __Pyx_lshift_{{NAME}}_checking_overflow

--- a/tests/run/overflow_check.pxi
+++ b/tests/run/overflow_check.pxi
 cimport cython
 cdef object two = 2
-cdef int size_in_bits = sizeof(INT) * 8
+cdef int size_in_bits_ = sizeof(INT) * 8
 cdef bint is_signed_ = not ((<INT>-1) > 0)
-cdef INT max_value_ = <INT>(two ** (size_in_bits - is_signed_) - 1)
+cdef INT max_value_ = <INT>(two ** (size_in_bits_ - is_signed_) - 1)
 cdef INT min_value_ = ~max_value_
 cdef INT half_ = max_value_ // <INT>2
 # Python visible.
+size_in_bits = size_in_bits_
 is_signed = is_signed_
 max_value = max_value_
 min_value = min_value_
@@ -230,6 +231,17 @@ def test_lshift(INT a, int b):
    """
    >>> test_lshift(1, 10)
    1024
+    >>> test_lshift(1, size_in_bits - 2) == 1 << (size_in_bits - 2)
+    True
+    >>> test_lshift(0, size_in_bits - 1)
+    0
+    >>> test_lshift(1, size_in_bits - 1) == 1 << (size_in_bits - 1) if not is_signed else True
+    True
+    >>> if is_signed: expect_overflow(test_lshift, 1, size_in_bits - 1)
+    >>> expect_overflow(test_lshift, 0, size_in_bits)
+    >>> expect_overflow(test_lshift, 1, size_in_bits)
+    >>> expect_overflow(test_lshift, 0, size_in_bits + 1)
+    >>> expect_overflow(test_lshift, 1, size_in_bits + 1)
    >>> expect_overflow(test_lshift, 1, 100)
    >>> expect_overflow(test_lshift, max_value, 1)
    >>> test_lshift(max_value, 0) == max_value