Commit e4d77700 authored by Roman Gushchin's avatar Roman Gushchin Committed by Dennis Zhou

percpu: optimize locking in pcpu_balance_workfn()

pcpu_balance_workfn() unconditionally calls pcpu_balance_free(),
pcpu_reclaim_populated(), pcpu_balance_populated() and
pcpu_balance_free() again.

Each call to pcpu_balance_free() and pcpu_reclaim_populated() will
cause at least one acquisition of the pcpu_lock. So even if the
balancing was scheduled because of a failed atomic allocation,
pcpu_lock will be acquired at least 4 times. This obviously
increases the contention on the pcpu_lock.

To optimize the scheme let's grab the pcpu_lock on the upper level
(in pcpu_balance_workfn()) and keep it generally locked for the whole
duration of the scheduled work, but release conditionally to perform
any slow operations like chunk (de)population and creation of new
chunks.
Signed-off-by: default avatarRoman Gushchin <guro@fb.com>
Signed-off-by: default avatarDennis Zhou <dennis@kernel.org>
parent 4829c791
...@@ -1980,6 +1980,9 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) ...@@ -1980,6 +1980,9 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
* If empty_only is %false, reclaim all fully free chunks regardless of the * If empty_only is %false, reclaim all fully free chunks regardless of the
* number of populated pages. Otherwise, only reclaim chunks that have no * number of populated pages. Otherwise, only reclaim chunks that have no
* populated pages. * populated pages.
*
* CONTEXT:
* pcpu_lock (can be dropped temporarily)
*/ */
static void pcpu_balance_free(bool empty_only) static void pcpu_balance_free(bool empty_only)
{ {
...@@ -1987,12 +1990,12 @@ static void pcpu_balance_free(bool empty_only) ...@@ -1987,12 +1990,12 @@ static void pcpu_balance_free(bool empty_only)
struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot]; struct list_head *free_head = &pcpu_chunk_lists[pcpu_free_slot];
struct pcpu_chunk *chunk, *next; struct pcpu_chunk *chunk, *next;
lockdep_assert_held(&pcpu_lock);
/* /*
* There's no reason to keep around multiple unused chunks and VM * There's no reason to keep around multiple unused chunks and VM
* areas can be scarce. Destroy all free chunks except for one. * areas can be scarce. Destroy all free chunks except for one.
*/ */
spin_lock_irq(&pcpu_lock);
list_for_each_entry_safe(chunk, next, free_head, list) { list_for_each_entry_safe(chunk, next, free_head, list) {
WARN_ON(chunk->immutable); WARN_ON(chunk->immutable);
...@@ -2004,8 +2007,10 @@ static void pcpu_balance_free(bool empty_only) ...@@ -2004,8 +2007,10 @@ static void pcpu_balance_free(bool empty_only)
list_move(&chunk->list, &to_free); list_move(&chunk->list, &to_free);
} }
spin_unlock_irq(&pcpu_lock); if (list_empty(&to_free))
return;
spin_unlock_irq(&pcpu_lock);
list_for_each_entry_safe(chunk, next, &to_free, list) { list_for_each_entry_safe(chunk, next, &to_free, list) {
unsigned int rs, re; unsigned int rs, re;
...@@ -2019,6 +2024,7 @@ static void pcpu_balance_free(bool empty_only) ...@@ -2019,6 +2024,7 @@ static void pcpu_balance_free(bool empty_only)
pcpu_destroy_chunk(chunk); pcpu_destroy_chunk(chunk);
cond_resched(); cond_resched();
} }
spin_lock_irq(&pcpu_lock);
} }
/** /**
...@@ -2029,6 +2035,9 @@ static void pcpu_balance_free(bool empty_only) ...@@ -2029,6 +2035,9 @@ static void pcpu_balance_free(bool empty_only)
* OOM killer to be triggered. We should avoid doing so until an actual * OOM killer to be triggered. We should avoid doing so until an actual
* allocation causes the failure as it is possible that requests can be * allocation causes the failure as it is possible that requests can be
* serviced from already backed regions. * serviced from already backed regions.
*
* CONTEXT:
* pcpu_lock (can be dropped temporarily)
*/ */
static void pcpu_balance_populated(void) static void pcpu_balance_populated(void)
{ {
...@@ -2037,6 +2046,8 @@ static void pcpu_balance_populated(void) ...@@ -2037,6 +2046,8 @@ static void pcpu_balance_populated(void)
struct pcpu_chunk *chunk; struct pcpu_chunk *chunk;
int slot, nr_to_pop, ret; int slot, nr_to_pop, ret;
lockdep_assert_held(&pcpu_lock);
/* /*
* Ensure there are certain number of free populated pages for * Ensure there are certain number of free populated pages for
* atomic allocs. Fill up from the most packed so that atomic * atomic allocs. Fill up from the most packed so that atomic
...@@ -2064,13 +2075,11 @@ static void pcpu_balance_populated(void) ...@@ -2064,13 +2075,11 @@ static void pcpu_balance_populated(void)
if (!nr_to_pop) if (!nr_to_pop)
break; break;
spin_lock_irq(&pcpu_lock);
list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) { list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
nr_unpop = chunk->nr_pages - chunk->nr_populated; nr_unpop = chunk->nr_pages - chunk->nr_populated;
if (nr_unpop) if (nr_unpop)
break; break;
} }
spin_unlock_irq(&pcpu_lock);
if (!nr_unpop) if (!nr_unpop)
continue; continue;
...@@ -2080,12 +2089,13 @@ static void pcpu_balance_populated(void) ...@@ -2080,12 +2089,13 @@ static void pcpu_balance_populated(void)
chunk->nr_pages) { chunk->nr_pages) {
int nr = min_t(int, re - rs, nr_to_pop); int nr = min_t(int, re - rs, nr_to_pop);
spin_unlock_irq(&pcpu_lock);
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
cond_resched();
spin_lock_irq(&pcpu_lock);
if (!ret) { if (!ret) {
nr_to_pop -= nr; nr_to_pop -= nr;
spin_lock_irq(&pcpu_lock);
pcpu_chunk_populated(chunk, rs, rs + nr); pcpu_chunk_populated(chunk, rs, rs + nr);
spin_unlock_irq(&pcpu_lock);
} else { } else {
nr_to_pop = 0; nr_to_pop = 0;
} }
...@@ -2097,11 +2107,12 @@ static void pcpu_balance_populated(void) ...@@ -2097,11 +2107,12 @@ static void pcpu_balance_populated(void)
if (nr_to_pop) { if (nr_to_pop) {
/* ran out of chunks to populate, create a new one and retry */ /* ran out of chunks to populate, create a new one and retry */
spin_unlock_irq(&pcpu_lock);
chunk = pcpu_create_chunk(gfp); chunk = pcpu_create_chunk(gfp);
cond_resched();
spin_lock_irq(&pcpu_lock);
if (chunk) { if (chunk) {
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1); pcpu_chunk_relocate(chunk, -1);
spin_unlock_irq(&pcpu_lock);
goto retry_pop; goto retry_pop;
} }
} }
...@@ -2117,6 +2128,10 @@ static void pcpu_balance_populated(void) ...@@ -2117,6 +2128,10 @@ static void pcpu_balance_populated(void)
* populated pages threshold, reintegrate the chunk if it has empty free pages. * populated pages threshold, reintegrate the chunk if it has empty free pages.
* Each chunk is scanned in the reverse order to keep populated pages close to * Each chunk is scanned in the reverse order to keep populated pages close to
* the beginning of the chunk. * the beginning of the chunk.
*
* CONTEXT:
* pcpu_lock (can be dropped temporarily)
*
*/ */
static void pcpu_reclaim_populated(void) static void pcpu_reclaim_populated(void)
{ {
...@@ -2124,7 +2139,7 @@ static void pcpu_reclaim_populated(void) ...@@ -2124,7 +2139,7 @@ static void pcpu_reclaim_populated(void)
struct pcpu_block_md *block; struct pcpu_block_md *block;
int i, end; int i, end;
spin_lock_irq(&pcpu_lock); lockdep_assert_held(&pcpu_lock);
restart: restart:
/* /*
...@@ -2190,8 +2205,6 @@ static void pcpu_reclaim_populated(void) ...@@ -2190,8 +2205,6 @@ static void pcpu_reclaim_populated(void)
list_move(&chunk->list, list_move(&chunk->list,
&pcpu_chunk_lists[pcpu_sidelined_slot]); &pcpu_chunk_lists[pcpu_sidelined_slot]);
} }
spin_unlock_irq(&pcpu_lock);
} }
/** /**
...@@ -2212,10 +2225,14 @@ static void pcpu_balance_workfn(struct work_struct *work) ...@@ -2212,10 +2225,14 @@ static void pcpu_balance_workfn(struct work_struct *work)
* appropriate. * appropriate.
*/ */
mutex_lock(&pcpu_alloc_mutex); mutex_lock(&pcpu_alloc_mutex);
spin_lock_irq(&pcpu_lock);
pcpu_balance_free(false); pcpu_balance_free(false);
pcpu_reclaim_populated(); pcpu_reclaim_populated();
pcpu_balance_populated(); pcpu_balance_populated();
pcpu_balance_free(true); pcpu_balance_free(true);
spin_unlock_irq(&pcpu_lock);
mutex_unlock(&pcpu_alloc_mutex); mutex_unlock(&pcpu_alloc_mutex);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment