Commit bd673145 authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

memcg, slab: simplify synchronization scheme

At present, we have the following mutexes protecting data related to per
memcg kmem caches:

 - slab_mutex.  This one is held during the whole kmem cache creation
   and destruction paths.  We also take it when updating per root cache
   memcg_caches arrays (see memcg_update_all_caches).  As a result, taking
   it guarantees there will be no changes to any kmem cache (including per
   memcg).  Why do we need something else then?  The point is it is
   private to slab implementation and has some internal dependencies with
   other mutexes (get_online_cpus).  So we just don't want to rely upon it
   and prefer to introduce additional mutexes instead.

 - activate_kmem_mutex.  Initially it was added to synchronize
   initializing kmem limit (memcg_activate_kmem).  However, since we can
   grow per root cache memcg_caches arrays only on kmem limit
   initialization (see memcg_update_all_caches), we also employ it to
   protect against memcg_caches arrays relocation (e.g.  see
   __kmem_cache_destroy_memcg_children).

 - We have a convention not to take slab_mutex in memcontrol.c, but we
   want to walk over per memcg memcg_slab_caches lists there (e.g.  for
   destroying all memcg caches on offline).  So we have per memcg
   slab_caches_mutex's protecting those lists.

The mutexes are taken in the following order:

   activate_kmem_mutex -> slab_mutex -> memcg::slab_caches_mutex

Such a syncrhonization scheme has a number of flaws, for instance:

 - We can't call kmem_cache_{destroy,shrink} while walking over a
   memcg::memcg_slab_caches list due to locking order.  As a result, in
   mem_cgroup_destroy_all_caches we schedule the
   memcg_cache_params::destroy work shrinking and destroying the cache.

 - We don't have a mutex to synchronize per memcg caches destruction
   between memcg offline (mem_cgroup_destroy_all_caches) and root cache
   destruction (__kmem_cache_destroy_memcg_children).  Currently we just
   don't bother about it.

This patch simplifies it by substituting per memcg slab_caches_mutex's
with the global memcg_slab_mutex.  It will be held whenever a new per
memcg cache is created or destroyed, so it protects per root cache
memcg_caches arrays and per memcg memcg_slab_caches lists.  The locking
order is following:

   activate_kmem_mutex -> memcg_slab_mutex -> slab_mutex

This allows us to call kmem_cache_{create,shrink,destroy} under the
memcg_slab_mutex.  As a result, we don't need memcg_cache_params::destroy
work any more - we can simply destroy caches while iterating over a per
memcg slab caches list.

Also using the global mutex simplifies synchronization between concurrent
per memcg caches creation/destruction, e.g.  mem_cgroup_destroy_all_caches
vs __kmem_cache_destroy_memcg_children.

The downside of this is that we substitute per-memcg slab_caches_mutex's
with a hummer-like global mutex, but since we already take either the
slab_mutex or the cgroup_mutex along with a memcg::slab_caches_mutex, it
shouldn't hurt concurrency a lot.
Signed-off-by: default avatarVladimir Davydov <vdavydov@parallels.com>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent c67a8a68
...@@ -497,8 +497,6 @@ char *memcg_create_cache_name(struct mem_cgroup *memcg, ...@@ -497,8 +497,6 @@ char *memcg_create_cache_name(struct mem_cgroup *memcg,
int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
struct kmem_cache *root_cache); struct kmem_cache *root_cache);
void memcg_free_cache_params(struct kmem_cache *s); void memcg_free_cache_params(struct kmem_cache *s);
void memcg_register_cache(struct kmem_cache *s);
void memcg_unregister_cache(struct kmem_cache *s);
int memcg_update_cache_size(struct kmem_cache *s, int num_groups); int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
void memcg_update_array_size(int num_groups); void memcg_update_array_size(int num_groups);
...@@ -640,14 +638,6 @@ static inline void memcg_free_cache_params(struct kmem_cache *s) ...@@ -640,14 +638,6 @@ static inline void memcg_free_cache_params(struct kmem_cache *s)
{ {
} }
static inline void memcg_register_cache(struct kmem_cache *s)
{
}
static inline void memcg_unregister_cache(struct kmem_cache *s)
{
}
static inline struct kmem_cache * static inline struct kmem_cache *
memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{ {
......
...@@ -116,7 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, ...@@ -116,7 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
unsigned long, unsigned long,
void (*)(void *)); void (*)(void *));
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *); struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *,
struct kmem_cache *);
#endif #endif
void kmem_cache_destroy(struct kmem_cache *); void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *);
...@@ -525,8 +526,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) ...@@ -525,8 +526,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* @list: list_head for the list of all caches in this memcg * @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from * @root_cache: pointer to the global, root cache, this cache was derived from
* @nr_pages: number of pages that belongs to this cache. * @nr_pages: number of pages that belongs to this cache.
* @destroy: worker to be called whenever we are ready, or believe we may be
* ready, to destroy this cache.
*/ */
struct memcg_cache_params { struct memcg_cache_params {
bool is_root_cache; bool is_root_cache;
...@@ -540,7 +539,6 @@ struct memcg_cache_params { ...@@ -540,7 +539,6 @@ struct memcg_cache_params {
struct list_head list; struct list_head list;
struct kmem_cache *root_cache; struct kmem_cache *root_cache;
atomic_t nr_pages; atomic_t nr_pages;
struct work_struct destroy;
}; };
}; };
}; };
......
...@@ -357,10 +357,9 @@ struct mem_cgroup { ...@@ -357,10 +357,9 @@ struct mem_cgroup {
struct cg_proto tcp_mem; struct cg_proto tcp_mem;
#endif #endif
#if defined(CONFIG_MEMCG_KMEM) #if defined(CONFIG_MEMCG_KMEM)
/* analogous to slab_common's slab_caches list. per-memcg */ /* analogous to slab_common's slab_caches list, but per-memcg;
* protected by memcg_slab_mutex */
struct list_head memcg_slab_caches; struct list_head memcg_slab_caches;
/* Not a spinlock, we can take a lot of time walking the list */
struct mutex slab_caches_mutex;
/* Index in the kmem_cache->memcg_params->memcg_caches array */ /* Index in the kmem_cache->memcg_params->memcg_caches array */
int kmemcg_id; int kmemcg_id;
#endif #endif
...@@ -2913,6 +2912,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, ...@@ -2913,6 +2912,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
static DEFINE_MUTEX(set_limit_mutex); static DEFINE_MUTEX(set_limit_mutex);
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
/*
* The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
* destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
*/
static DEFINE_MUTEX(memcg_slab_mutex);
static DEFINE_MUTEX(activate_kmem_mutex); static DEFINE_MUTEX(activate_kmem_mutex);
static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
...@@ -2945,10 +2950,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v) ...@@ -2945,10 +2950,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
print_slabinfo_header(m); print_slabinfo_header(m);
mutex_lock(&memcg->slab_caches_mutex); mutex_lock(&memcg_slab_mutex);
list_for_each_entry(params, &memcg->memcg_slab_caches, list) list_for_each_entry(params, &memcg->memcg_slab_caches, list)
cache_show(memcg_params_to_cache(params), m); cache_show(memcg_params_to_cache(params), m);
mutex_unlock(&memcg->slab_caches_mutex); mutex_unlock(&memcg_slab_mutex);
return 0; return 0;
} }
...@@ -3050,8 +3055,6 @@ void memcg_update_array_size(int num) ...@@ -3050,8 +3055,6 @@ void memcg_update_array_size(int num)
memcg_limited_groups_array_size = memcg_caches_array_size(num); memcg_limited_groups_array_size = memcg_caches_array_size(num);
} }
static void kmem_cache_destroy_work_func(struct work_struct *w);
int memcg_update_cache_size(struct kmem_cache *s, int num_groups) int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
{ {
struct memcg_cache_params *cur_params = s->memcg_params; struct memcg_cache_params *cur_params = s->memcg_params;
...@@ -3148,8 +3151,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, ...@@ -3148,8 +3151,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
if (memcg) { if (memcg) {
s->memcg_params->memcg = memcg; s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache; s->memcg_params->root_cache = root_cache;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
css_get(&memcg->css); css_get(&memcg->css);
} else } else
s->memcg_params->is_root_cache = true; s->memcg_params->is_root_cache = true;
...@@ -3166,24 +3167,34 @@ void memcg_free_cache_params(struct kmem_cache *s) ...@@ -3166,24 +3167,34 @@ void memcg_free_cache_params(struct kmem_cache *s)
kfree(s->memcg_params); kfree(s->memcg_params);
} }
void memcg_register_cache(struct kmem_cache *s) static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
struct kmem_cache *root_cache)
{ {
struct kmem_cache *root; struct kmem_cache *cachep;
struct mem_cgroup *memcg;
int id; int id;
if (is_root_cache(s)) lockdep_assert_held(&memcg_slab_mutex);
id = memcg_cache_id(memcg);
/*
* Since per-memcg caches are created asynchronously on first
* allocation (see memcg_kmem_get_cache()), several threads can try to
* create the same cache, but only one of them may succeed.
*/
if (cache_from_memcg_idx(root_cache, id))
return; return;
cachep = kmem_cache_create_memcg(memcg, root_cache);
/* /*
* Holding the slab_mutex assures nobody will touch the memcg_caches * If we could not create a memcg cache, do not complain, because
* array while we are modifying it. * that's not critical at all as we can always proceed with the root
* cache.
*/ */
lockdep_assert_held(&slab_mutex); if (!cachep)
return;
root = s->memcg_params->root_cache; list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
memcg = s->memcg_params->memcg;
id = memcg_cache_id(memcg);
/* /*
* Since readers won't lock (see cache_from_memcg_idx()), we need a * Since readers won't lock (see cache_from_memcg_idx()), we need a
...@@ -3192,49 +3203,30 @@ void memcg_register_cache(struct kmem_cache *s) ...@@ -3192,49 +3203,30 @@ void memcg_register_cache(struct kmem_cache *s)
*/ */
smp_wmb(); smp_wmb();
/* BUG_ON(root_cache->memcg_params->memcg_caches[id]);
* Initialize the pointer to this cache in its parent's memcg_params root_cache->memcg_params->memcg_caches[id] = cachep;
* before adding it to the memcg_slab_caches list, otherwise we can
* fail to convert memcg_params_to_cache() while traversing the list.
*/
VM_BUG_ON(root->memcg_params->memcg_caches[id]);
root->memcg_params->memcg_caches[id] = s;
mutex_lock(&memcg->slab_caches_mutex);
list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
mutex_unlock(&memcg->slab_caches_mutex);
} }
void memcg_unregister_cache(struct kmem_cache *s) static void memcg_kmem_destroy_cache(struct kmem_cache *cachep)
{ {
struct kmem_cache *root; struct kmem_cache *root_cache;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
int id; int id;
if (is_root_cache(s)) lockdep_assert_held(&memcg_slab_mutex);
return;
/* BUG_ON(is_root_cache(cachep));
* Holding the slab_mutex assures nobody will touch the memcg_caches
* array while we are modifying it.
*/
lockdep_assert_held(&slab_mutex);
root = s->memcg_params->root_cache; root_cache = cachep->memcg_params->root_cache;
memcg = s->memcg_params->memcg; memcg = cachep->memcg_params->memcg;
id = memcg_cache_id(memcg); id = memcg_cache_id(memcg);
mutex_lock(&memcg->slab_caches_mutex); BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
list_del(&s->memcg_params->list); root_cache->memcg_params->memcg_caches[id] = NULL;
mutex_unlock(&memcg->slab_caches_mutex);
/* list_del(&cachep->memcg_params->list);
* Clear the pointer to this cache in its parent's memcg_params only
* after removing it from the memcg_slab_caches list, otherwise we can kmem_cache_destroy(cachep);
* fail to convert memcg_params_to_cache() while traversing the list.
*/
VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
root->memcg_params->memcg_caches[id] = NULL;
} }
/* /*
...@@ -3268,70 +3260,42 @@ static inline void memcg_resume_kmem_account(void) ...@@ -3268,70 +3260,42 @@ static inline void memcg_resume_kmem_account(void)
current->memcg_kmem_skip_account--; current->memcg_kmem_skip_account--;
} }
static void kmem_cache_destroy_work_func(struct work_struct *w)
{
struct kmem_cache *cachep;
struct memcg_cache_params *p;
p = container_of(w, struct memcg_cache_params, destroy);
cachep = memcg_params_to_cache(p);
kmem_cache_shrink(cachep);
if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
kmem_cache_destroy(cachep);
}
int __kmem_cache_destroy_memcg_children(struct kmem_cache *s) int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
{ {
struct kmem_cache *c; struct kmem_cache *c;
int i, failed = 0; int i, failed = 0;
/* mutex_lock(&memcg_slab_mutex);
* If the cache is being destroyed, we trust that there is no one else
* requesting objects from it. Even if there are, the sanity checks in
* kmem_cache_destroy should caught this ill-case.
*
* Still, we don't want anyone else freeing memcg_caches under our
* noses, which can happen if a new memcg comes to life. As usual,
* we'll take the activate_kmem_mutex to protect ourselves against
* this.
*/
mutex_lock(&activate_kmem_mutex);
for_each_memcg_cache_index(i) { for_each_memcg_cache_index(i) {
c = cache_from_memcg_idx(s, i); c = cache_from_memcg_idx(s, i);
if (!c) if (!c)
continue; continue;
/* memcg_kmem_destroy_cache(c);
* We will now manually delete the caches, so to avoid races
* we need to cancel all pending destruction workers and
* proceed with destruction ourselves.
*/
cancel_work_sync(&c->memcg_params->destroy);
kmem_cache_destroy(c);
if (cache_from_memcg_idx(s, i)) if (cache_from_memcg_idx(s, i))
failed++; failed++;
} }
mutex_unlock(&activate_kmem_mutex); mutex_unlock(&memcg_slab_mutex);
return failed; return failed;
} }
static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct memcg_cache_params *params; struct memcg_cache_params *params, *tmp;
if (!memcg_kmem_is_active(memcg)) if (!memcg_kmem_is_active(memcg))
return; return;
mutex_lock(&memcg->slab_caches_mutex); mutex_lock(&memcg_slab_mutex);
list_for_each_entry(params, &memcg->memcg_slab_caches, list) { list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
cachep = memcg_params_to_cache(params); cachep = memcg_params_to_cache(params);
schedule_work(&cachep->memcg_params->destroy); kmem_cache_shrink(cachep);
if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
memcg_kmem_destroy_cache(cachep);
} }
mutex_unlock(&memcg->slab_caches_mutex); mutex_unlock(&memcg_slab_mutex);
} }
struct create_work { struct create_work {
...@@ -3346,7 +3310,10 @@ static void memcg_create_cache_work_func(struct work_struct *w) ...@@ -3346,7 +3310,10 @@ static void memcg_create_cache_work_func(struct work_struct *w)
struct mem_cgroup *memcg = cw->memcg; struct mem_cgroup *memcg = cw->memcg;
struct kmem_cache *cachep = cw->cachep; struct kmem_cache *cachep = cw->cachep;
kmem_cache_create_memcg(memcg, cachep); mutex_lock(&memcg_slab_mutex);
memcg_kmem_create_cache(memcg, cachep);
mutex_unlock(&memcg_slab_mutex);
css_put(&memcg->css); css_put(&memcg->css);
kfree(cw); kfree(cw);
} }
...@@ -5022,13 +4989,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg, ...@@ -5022,13 +4989,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
* Make sure we have enough space for this cgroup in each root cache's * Make sure we have enough space for this cgroup in each root cache's
* memcg_params. * memcg_params.
*/ */
mutex_lock(&memcg_slab_mutex);
err = memcg_update_all_caches(memcg_id + 1); err = memcg_update_all_caches(memcg_id + 1);
mutex_unlock(&memcg_slab_mutex);
if (err) if (err)
goto out_rmid; goto out_rmid;
memcg->kmemcg_id = memcg_id; memcg->kmemcg_id = memcg_id;
INIT_LIST_HEAD(&memcg->memcg_slab_caches); INIT_LIST_HEAD(&memcg->memcg_slab_caches);
mutex_init(&memcg->slab_caches_mutex);
/* /*
* We couldn't have accounted to this cgroup, because it hasn't got the * We couldn't have accounted to this cgroup, because it hasn't got the
......
...@@ -160,7 +160,6 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, ...@@ -160,7 +160,6 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
s->refcount = 1; s->refcount = 1;
list_add(&s->list, &slab_caches); list_add(&s->list, &slab_caches);
memcg_register_cache(s);
out: out:
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
...@@ -270,9 +269,10 @@ EXPORT_SYMBOL(kmem_cache_create); ...@@ -270,9 +269,10 @@ EXPORT_SYMBOL(kmem_cache_create);
* requests going from @memcg to @root_cache. The new cache inherits properties * requests going from @memcg to @root_cache. The new cache inherits properties
* from its parent. * from its parent.
*/ */
void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache) struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
struct kmem_cache *root_cache)
{ {
struct kmem_cache *s; struct kmem_cache *s = NULL;
char *cache_name; char *cache_name;
get_online_cpus(); get_online_cpus();
...@@ -280,14 +280,6 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c ...@@ -280,14 +280,6 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
mutex_lock(&slab_mutex); mutex_lock(&slab_mutex);
/*
* Since per-memcg caches are created asynchronously on first
* allocation (see memcg_kmem_get_cache()), several threads can try to
* create the same cache, but only one of them may succeed.
*/
if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
goto out_unlock;
cache_name = memcg_create_cache_name(memcg, root_cache); cache_name = memcg_create_cache_name(memcg, root_cache);
if (!cache_name) if (!cache_name)
goto out_unlock; goto out_unlock;
...@@ -296,14 +288,18 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c ...@@ -296,14 +288,18 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
root_cache->size, root_cache->align, root_cache->size, root_cache->align,
root_cache->flags, root_cache->ctor, root_cache->flags, root_cache->ctor,
memcg, root_cache); memcg, root_cache);
if (IS_ERR(s)) if (IS_ERR(s)) {
kfree(cache_name); kfree(cache_name);
s = NULL;
}
out_unlock: out_unlock:
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
put_online_mems(); put_online_mems();
put_online_cpus(); put_online_cpus();
return s;
} }
static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
...@@ -348,11 +344,8 @@ void kmem_cache_destroy(struct kmem_cache *s) ...@@ -348,11 +344,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
goto out_unlock; goto out_unlock;
list_del(&s->list); list_del(&s->list);
memcg_unregister_cache(s);
if (__kmem_cache_shutdown(s) != 0) { if (__kmem_cache_shutdown(s) != 0) {
list_add(&s->list, &slab_caches); list_add(&s->list, &slab_caches);
memcg_register_cache(s);
printk(KERN_ERR "kmem_cache_destroy %s: " printk(KERN_ERR "kmem_cache_destroy %s: "
"Slab cache still has objects\n", s->name); "Slab cache still has objects\n", s->name);
dump_stack(); dump_stack();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment