Commit b3346c2f authored by Sergey Vojtovich's avatar Sergey Vojtovich Committed by Monty

Restore LF_BACKOFF

Moved InnoDB UT_RELAX_CPU() to server. Restored cross-platform LF_BACKOFF
implementation basing on UT_RELAX_CPU().
parent 07e9ff1f
...@@ -90,37 +90,7 @@ C_MODE_END ...@@ -90,37 +90,7 @@ C_MODE_END
ret= 0; /* avoid compiler warning */ \ ret= 0; /* avoid compiler warning */ \
ret= IL_COMP_EXCHG ## S (a, ret, ret); ret= IL_COMP_EXCHG ## S (a, ret, ret);
#endif #endif
/*
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
spin loops also on non-HT machines to reduce power consumption (see e.g
http://softwarecommunity.intel.com/articles/eng/2004.htm)
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
#ifndef YIELD_LOOPS
#define YIELD_LOOPS 200
#endif
static __inline int my_yield_processor()
{
int i;
for(i=0; i<YIELD_LOOPS; i++)
{
#if (_MSC_VER <= 1310)
/* On older compilers YieldProcessor is not available, use inline assembly*/
__asm { rep nop }
#else
YieldProcessor();
#endif
}
return 1;
}
#define LF_BACKOFF my_yield_processor()
#else /* cleanup */ #else /* cleanup */
#undef IL_EXCHG_ADD32 #undef IL_EXCHG_ADD32
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#define INCLUDE_LF_INCLUDED #define INCLUDE_LF_INCLUDED
#include <my_atomic.h> #include <my_atomic.h>
#include <my_cpu.h>
C_MODE_START C_MODE_START
......
...@@ -346,15 +346,6 @@ make_atomic_store(ptr) ...@@ -346,15 +346,6 @@ make_atomic_store(ptr)
#undef make_atomic_fas_body #undef make_atomic_fas_body
#undef intptr #undef intptr
/*
the macro below defines (as an expression) the code that
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
It is expected to be defined in include/atomic/ *.h files
*/
#ifndef LF_BACKOFF
#define LF_BACKOFF (1)
#endif
#define MY_ATOMIC_OK 0 #define MY_ATOMIC_OK 0
#define MY_ATOMIC_NOT_1CPU 1 #define MY_ATOMIC_NOT_1CPU 1
extern int my_atomic_initialize(); extern int my_atomic_initialize();
......
#ifndef MY_CPU_INCLUDED
#define MY_CPU_INCLUDED
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL /* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
...@@ -42,3 +44,58 @@ ...@@ -42,3 +44,58 @@
#define HMT_medium_high() #define HMT_medium_high()
#define HMT_high() #define HMT_high()
#endif #endif
static inline void MY_RELAX_CPU(void)
{
#ifdef HAVE_PAUSE_INSTRUCTION
/*
According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory).
*/
#ifdef __SUNPRO_CC
asm ("pause" );
#else
__asm__ __volatile__ ("pause");
#endif
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
__asm__ __volatile__ ("rep; nop");
#elif defined _WIN32
/*
In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor.
*/
YieldProcessor();
#elif defined(_ARCH_PWR8)
__ppc_get_timebase();
#else
int32 var, oldval = 0;
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
MY_MEMORY_ORDER_RELAXED);
#endif
}
/*
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
recommends to use it in spin loops also on non-HT machines to reduce power
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
and YieldProcessor shows that much better performance is achieved by calling
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
loop count in the range 200-300 brought best results.
*/
static inline int LF_BACKOFF(void)
{
int i;
for (i= 0; i < 200; i++)
MY_RELAX_CPU();
return 1;
}
#endif
...@@ -430,7 +430,7 @@ static void alloc_free(uchar *first, ...@@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
{ {
anext_node(last)= tmp.node; anext_node(last)= tmp.node;
} while (!my_atomic_casptr((void **)(char *)&allocator->top, } while (!my_atomic_casptr((void **)(char *)&allocator->top,
(void **)&tmp.ptr, first) && LF_BACKOFF); (void **)&tmp.ptr, first) && LF_BACKOFF());
} }
/* /*
...@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins) ...@@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
{ {
node= allocator->top; node= allocator->top;
lf_pin(pins, 0, node); lf_pin(pins, 0, node);
} while (node != allocator->top && LF_BACKOFF); } while (node != allocator->top && LF_BACKOFF());
if (!node) if (!node)
{ {
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME)); node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
......
...@@ -102,7 +102,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -102,7 +102,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
do { /* PTR() isn't necessary below, head is a dummy node */ do { /* PTR() isn't necessary below, head is a dummy node */
cursor->curr= (LF_SLIST *)(*cursor->prev); cursor->curr= (LF_SLIST *)(*cursor->prev);
lf_pin(pins, 1, cursor->curr); lf_pin(pins, 1, cursor->curr);
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF); } while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
for (;;) for (;;)
{ {
...@@ -117,7 +117,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -117,7 +117,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
link= cursor->curr->link; link= cursor->curr->link;
cursor->next= PTR(link); cursor->next= PTR(link);
lf_pin(pins, 0, cursor->next); lf_pin(pins, 0, cursor->next);
} while (link != cursor->curr->link && LF_BACKOFF); } while (link != cursor->curr->link && LF_BACKOFF());
if (!DELETED(link)) if (!DELETED(link))
{ {
...@@ -145,7 +145,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr, ...@@ -145,7 +145,7 @@ static int l_find(LF_SLIST * volatile *head, CHARSET_INFO *cs, uint32 hashnr,
and remove this deleted node and remove this deleted node
*/ */
if (my_atomic_casptr((void **) cursor->prev, if (my_atomic_casptr((void **) cursor->prev,
(void **) &cursor->curr, cursor->next) && LF_BACKOFF) (void **) &cursor->curr, cursor->next) && LF_BACKOFF())
lf_alloc_free(pins, cursor->curr); lf_alloc_free(pins, cursor->curr);
else else
goto retry; goto retry;
......
...@@ -617,7 +617,7 @@ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker, ...@@ -617,7 +617,7 @@ static int deadlock_search(struct deadlock_arg *arg, WT_THD *blocker,
{ {
rc= *shared_ptr; rc= *shared_ptr;
lf_pin(arg->thd->pins, 0, rc); lf_pin(arg->thd->pins, 0, rc);
} while (rc != *shared_ptr && LF_BACKOFF); } while (rc != *shared_ptr && LF_BACKOFF());
if (rc == 0) if (rc == 0)
{ {
......
...@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov ...@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
#include "univ.i" #include "univ.i"
#include "ut0ut.h" #include "ut0ut.h"
#include "my_cpu.h"
/** Execute a given function exactly once in a multi-threaded environment /** Execute a given function exactly once in a multi-threaded environment
or wait for the function to be executed by another thread. or wait for the function to be executed by another thread.
...@@ -110,7 +111,7 @@ class os_once { ...@@ -110,7 +111,7 @@ class os_once {
ut_error; ut_error;
} }
UT_RELAX_CPU(); MY_RELAX_CPU();
} }
} }
} }
......
...@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri ...@@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
/** Time stamp */ /** Time stamp */
typedef time_t ib_time_t; typedef time_t ib_time_t;
#ifdef HAVE_PAUSE_INSTRUCTION
/* According to the gcc info page, asm volatile means that the
instruction has important side-effects and must not be removed.
Also asm volatile may trigger a memory barrier (spilling all registers
to memory). */
# ifdef __SUNPRO_CC
# define UT_RELAX_CPU() asm ("pause" )
# else
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
# endif /* __SUNPRO_CC */
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
#elif defined _WIN32
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor. */
# define UT_RELAX_CPU() YieldProcessor()
#elif defined(__powerpc__) && defined __GLIBC__
# include <sys/platform/ppc.h>
# define UT_RELAX_CPU() __ppc_get_timebase()
#else
# define UT_RELAX_CPU() do { \
volatile int32 volatile_var; \
int32 oldval= 0; \
my_atomic_cas32(&volatile_var, &oldval, 1); \
} while (0)
#endif
#if defined (__GNUC__) #if defined (__GNUC__)
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") # define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
#elif defined (_MSC_VER) #elif defined (_MSC_VER)
......
...@@ -293,7 +293,7 @@ ut_delay( ...@@ -293,7 +293,7 @@ ut_delay(
UT_LOW_PRIORITY_CPU(); UT_LOW_PRIORITY_CPU();
for (i = 0; i < delay * 50; i++) { for (i = 0; i < delay * 50; i++) {
UT_RELAX_CPU(); MY_RELAX_CPU();
UT_COMPILER_BARRIER(); UT_COMPILER_BARRIER();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment