Commit 37a098e9 authored by Mike Snitzer's avatar Mike Snitzer

dm round robin: revert "use percpu 'repeat_count' and 'current_path'"

The sloppy nature of lockless access to percpu pointers
(s->current_path) in rr_select_path(), from multiple threads, is
causing some paths to used more than others -- which results in less
IO performance being observed.

Revert these upstream commits to restore truly symmetric round-robin
IO submission in DM multipath:

b0b477c7 dm round robin: use percpu 'repeat_count' and 'current_path'
802934b2 dm round robin: do not use this_cpu_ptr() without having preemption disabled

There is no benefit to all this complexity if repeat_count = 1 (which is
the recommended default).

Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 60858318
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
#include <linux/module.h> #include <linux/module.h>
#define DM_MSG_PREFIX "multipath round-robin" #define DM_MSG_PREFIX "multipath round-robin"
#define RR_MIN_IO 1000 #define RR_MIN_IO 1
#define RR_VERSION "1.1.0" #define RR_VERSION "1.2.0"
/*----------------------------------------------------------------- /*-----------------------------------------------------------------
* Path-handling code, paths are held in lists * Path-handling code, paths are held in lists
...@@ -47,44 +47,19 @@ struct selector { ...@@ -47,44 +47,19 @@ struct selector {
struct list_head valid_paths; struct list_head valid_paths;
struct list_head invalid_paths; struct list_head invalid_paths;
spinlock_t lock; spinlock_t lock;
struct dm_path * __percpu *current_path;
struct percpu_counter repeat_count;
}; };
static void set_percpu_current_path(struct selector *s, struct dm_path *path)
{
int cpu;
for_each_possible_cpu(cpu)
*per_cpu_ptr(s->current_path, cpu) = path;
}
static struct selector *alloc_selector(void) static struct selector *alloc_selector(void)
{ {
struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s) if (s) {
return NULL; INIT_LIST_HEAD(&s->valid_paths);
INIT_LIST_HEAD(&s->invalid_paths);
INIT_LIST_HEAD(&s->valid_paths); spin_lock_init(&s->lock);
INIT_LIST_HEAD(&s->invalid_paths); }
spin_lock_init(&s->lock);
s->current_path = alloc_percpu(struct dm_path *);
if (!s->current_path)
goto out_current_path;
set_percpu_current_path(s, NULL);
if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL))
goto out_repeat_count;
return s; return s;
out_repeat_count:
free_percpu(s->current_path);
out_current_path:
kfree(s);
return NULL;;
} }
static int rr_create(struct path_selector *ps, unsigned argc, char **argv) static int rr_create(struct path_selector *ps, unsigned argc, char **argv)
...@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps) ...@@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps)
free_paths(&s->valid_paths); free_paths(&s->valid_paths);
free_paths(&s->invalid_paths); free_paths(&s->invalid_paths);
free_percpu(s->current_path);
percpu_counter_destroy(&s->repeat_count);
kfree(s); kfree(s);
ps->context = NULL; ps->context = NULL;
} }
...@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path, ...@@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path,
return -EINVAL; return -EINVAL;
} }
if (repeat_count > 1) {
DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
repeat_count = 1;
}
/* allocate the path */ /* allocate the path */
pi = kmalloc(sizeof(*pi), GFP_KERNEL); pi = kmalloc(sizeof(*pi), GFP_KERNEL);
if (!pi) { if (!pi) {
...@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p) ...@@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p)
struct path_info *pi = p->pscontext; struct path_info *pi = p->pscontext;
spin_lock_irqsave(&s->lock, flags); spin_lock_irqsave(&s->lock, flags);
if (p == *this_cpu_ptr(s->current_path))
set_percpu_current_path(s, NULL);
list_move(&pi->list, &s->invalid_paths); list_move(&pi->list, &s->invalid_paths);
spin_unlock_irqrestore(&s->lock, flags); spin_unlock_irqrestore(&s->lock, flags);
} }
...@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) ...@@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
unsigned long flags; unsigned long flags;
struct selector *s = ps->context; struct selector *s = ps->context;
struct path_info *pi = NULL; struct path_info *pi = NULL;
struct dm_path *current_path = NULL;
local_irq_save(flags);
current_path = *this_cpu_ptr(s->current_path);
if (current_path) {
percpu_counter_dec(&s->repeat_count);
if (percpu_counter_read_positive(&s->repeat_count) > 0) {
local_irq_restore(flags);
return current_path;
}
}
spin_lock(&s->lock); spin_lock_irqsave(&s->lock, flags);
if (!list_empty(&s->valid_paths)) { if (!list_empty(&s->valid_paths)) {
pi = list_entry(s->valid_paths.next, struct path_info, list); pi = list_entry(s->valid_paths.next, struct path_info, list);
list_move_tail(&pi->list, &s->valid_paths); list_move_tail(&pi->list, &s->valid_paths);
percpu_counter_set(&s->repeat_count, pi->repeat_count);
set_percpu_current_path(s, pi->path);
current_path = pi->path;
} }
spin_unlock_irqrestore(&s->lock, flags); spin_unlock_irqrestore(&s->lock, flags);
return current_path; return pi ? pi->path : NULL;
} }
static struct path_selector_type rr_ps = { static struct path_selector_type rr_ps = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment