Commit 9390ef0c authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Linus Torvalds

percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr

percpu_rw_semaphore->writer_mutex was only added to simplify the initial
rewrite, the only thing it protects is clear_fast_ctr() which otherwise
could be called by multiple writers.  ->rw_sem is enough to serialize the
writers.

Kill this mutex and add "atomic_t write_ctr" instead.  The writers
increment/decrement this counter, the readers check it is zero instead of
mutex_is_locked().

Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to
avoid the race with other writers.  This is a bit sub-optimal, only the
first writer needs this and we do not need to exclude the readers at this
stage.  But this is simple, we do not want another internal lock until we
add more features.

And this speeds up the write-contended case.  Before this patch the racing
writers sleep in synchronize_sched_expedited() sequentially, with this
patch multiple synchronize_sched_expedited's can "overlap" with each
other.  Note: we can do more optimizations, this is only the first step.
Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
Cc: Anton Arapov <anton@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a1fd3e24
#ifndef _LINUX_PERCPU_RWSEM_H #ifndef _LINUX_PERCPU_RWSEM_H
#define _LINUX_PERCPU_RWSEM_H #define _LINUX_PERCPU_RWSEM_H
#include <linux/mutex.h> #include <linux/atomic.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/wait.h> #include <linux/wait.h>
struct percpu_rw_semaphore { struct percpu_rw_semaphore {
unsigned int __percpu *fast_read_ctr; unsigned int __percpu *fast_read_ctr;
struct mutex writer_mutex; atomic_t write_ctr;
struct rw_semaphore rw_sem; struct rw_semaphore rw_sem;
atomic_t slow_read_ctr; atomic_t slow_read_ctr;
wait_queue_head_t write_waitq; wait_queue_head_t write_waitq;
......
#include <linux/mutex.h> #include <linux/atomic.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw) ...@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
if (unlikely(!brw->fast_read_ctr)) if (unlikely(!brw->fast_read_ctr))
return -ENOMEM; return -ENOMEM;
mutex_init(&brw->writer_mutex);
init_rwsem(&brw->rw_sem); init_rwsem(&brw->rw_sem);
atomic_set(&brw->write_ctr, 0);
atomic_set(&brw->slow_read_ctr, 0); atomic_set(&brw->slow_read_ctr, 0);
init_waitqueue_head(&brw->write_waitq); init_waitqueue_head(&brw->write_waitq);
return 0; return 0;
...@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) ...@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
/* /*
* This is the fast-path for down_read/up_read, it only needs to ensure * This is the fast-path for down_read/up_read, it only needs to ensure
* there is no pending writer (!mutex_is_locked() check) and inc/dec the * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
* fast per-cpu counter. The writer uses synchronize_sched_expedited() to * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
* serialize with the preempt-disabled section below. * serialize with the preempt-disabled section below.
* *
...@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) ...@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
* If this helper fails the callers rely on the normal rw_semaphore and * If this helper fails the callers rely on the normal rw_semaphore and
* atomic_dec_and_test(), so in this case we have the necessary barriers. * atomic_dec_and_test(), so in this case we have the necessary barriers.
* *
* But if it succeeds we do not have any barriers, mutex_is_locked() or * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
* __this_cpu_add() below can be reordered with any LOAD/STORE done by the * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
* reader inside the critical section. See the comments in down_write and * reader inside the critical section. See the comments in down_write and
* up_write below. * up_write below.
...@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) ...@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
bool success = false; bool success = false;
preempt_disable(); preempt_disable();
if (likely(!mutex_is_locked(&brw->writer_mutex))) { if (likely(!atomic_read(&brw->write_ctr))) {
__this_cpu_add(*brw->fast_read_ctr, val); __this_cpu_add(*brw->fast_read_ctr, val);
success = true; success = true;
} }
...@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) ...@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
} }
/* /*
* A writer takes ->writer_mutex to exclude other writers and to force the * A writer increments ->write_ctr to force the readers to switch to the
* readers to switch to the slow mode, note the mutex_is_locked() check in * slow mode, note the atomic_read() check in update_fast_ctr().
* update_fast_ctr().
* *
* After that the readers can only inc/dec the slow ->slow_read_ctr counter, * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
* ->fast_read_ctr is stable. Once the writer moves its sum into the slow * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
...@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) ...@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
*/ */
void percpu_down_write(struct percpu_rw_semaphore *brw) void percpu_down_write(struct percpu_rw_semaphore *brw)
{ {
/* also blocks update_fast_ctr() which checks mutex_is_locked() */ /* tell update_fast_ctr() there is a pending writer */
mutex_lock(&brw->writer_mutex); atomic_inc(&brw->write_ctr);
/* /*
* 1. Ensures mutex_is_locked() is visible to any down_read/up_read * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
* so that update_fast_ctr() can't succeed. * so that update_fast_ctr() can't succeed.
* *
* 2. Ensures we see the result of every previous this_cpu_add() in * 2. Ensures we see the result of every previous this_cpu_add() in
...@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) ...@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
*/ */
synchronize_sched_expedited(); synchronize_sched_expedited();
/* exclude other writers, and block the new readers completely */
down_write(&brw->rw_sem);
/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
/* block the new readers completely */
down_write(&brw->rw_sem);
/* wait for all readers to complete their percpu_up_read() */ /* wait for all readers to complete their percpu_up_read() */
wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
} }
void percpu_up_write(struct percpu_rw_semaphore *brw) void percpu_up_write(struct percpu_rw_semaphore *brw)
{ {
/* allow the new readers, but only the slow-path */ /* release the lock, but the readers can't use the fast-path */
up_write(&brw->rw_sem); up_write(&brw->rw_sem);
/* /*
* Insert the barrier before the next fast-path in down_read, * Insert the barrier before the next fast-path in down_read,
* see W_R case in the comment above update_fast_ctr(). * see W_R case in the comment above update_fast_ctr().
*/ */
synchronize_sched_expedited(); synchronize_sched_expedited();
mutex_unlock(&brw->writer_mutex); /* the last writer unblocks update_fast_ctr() */
atomic_dec(&brw->write_ctr);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment