Commit 9fb6fde9 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] infrastructure for handling radix_tree_node allocation

radix_tree_node_alloc() uses GFP_ATOMIC, under spinlocking.  If the
allocation fails then userspace sees ENOMEM and application failure occurs.

A single add_to_page_cache() will require up to six radix_tree_nodes on
32-bit machines, twice this on 64-bit machines (quadruple the worst-case
storage on 64-bit).

My approach to solving this problem is to create a per-cpu pool of
preallocated radix_tree_nodes, private to the radix-tree code.

The radix-tree user will call the new radix-tree API function
radix_tree_preload() to ensure that this pool has sufficient nodes to cover
the worst-case.  radix_tree_preload() should be called outside locks, with
GFP_KERNEL so that it can run page reclaim.

If it succeeds, radix_tree_preload() will return with preemption disabled so
that the per-cpu radix_tree_node pool is protected.  The user must call
radix_tree_preload_end() to terminate the transaction.

In the common case, the per-cpu pools will never be touched:
radix_tree_insert() will only dip into the pool if kmem_cache_alloc() fails.
The pools will remain full at all times.  This is to optimise the fastpath -
it is just a few instructions.

This patch also removes the now-unneeded radix-tree mempool.  This saves 130
kbytes of permanently allocated kernel memory.  260k on 64-bit platforms.
parent aaf2ef19
......@@ -19,6 +19,8 @@
#ifndef _LINUX_RADIX_TREE_H
#define _LINUX_RADIX_TREE_H
#include <linux/preempt.h>
struct radix_tree_node;
struct radix_tree_root {
......@@ -45,5 +47,11 @@ extern int radix_tree_delete(struct radix_tree_root *, unsigned long);
extern unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items);
int radix_tree_preload(int gfp_mask);
static inline void radix_tree_preload_end(void)
{
preempt_enable();
}
#endif /* _LINUX_RADIX_TREE_H */
......@@ -20,10 +20,11 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mempool.h>
#include <linux/module.h>
#include <linux/radix-tree.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/gfp.h>
#include <linux/string.h>
/*
......@@ -49,18 +50,74 @@ struct radix_tree_path {
* Radix tree node cache.
*/
static kmem_cache_t *radix_tree_node_cachep;
static mempool_t *radix_tree_node_pool;
static inline struct radix_tree_node *
/*
* Per-cpu pool of preloaded nodes
*/
struct radix_tree_preload {
int nr;
struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
};
DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
*/
static struct radix_tree_node *
radix_tree_node_alloc(struct radix_tree_root *root)
{
return mempool_alloc(radix_tree_node_pool, root->gfp_mask);
struct radix_tree_node *ret;
ret = kmem_cache_alloc(radix_tree_node_cachep, root->gfp_mask);
if (ret == NULL && !(root->gfp_mask & __GFP_WAIT)) {
struct radix_tree_preload *rtp;
rtp = &__get_cpu_var(radix_tree_preloads);
if (rtp->nr) {
ret = rtp->nodes[rtp->nr - 1];
rtp->nodes[rtp->nr - 1] = NULL;
rtp->nr--;
}
}
return ret;
}
static inline void
radix_tree_node_free(struct radix_tree_node *node)
{
mempool_free(node, radix_tree_node_pool);
kmem_cache_free(radix_tree_node_cachep, node);
}
/*
* Load up this CPU's radix_tree_node buffer with sufficient objects to
* ensure that the addition of a single element in the tree cannot fail. On
* success, return zero, with preemption disabled. On error, return -ENOMEM
* with preemption not disabled.
*/
int radix_tree_preload(int gfp_mask)
{
struct radix_tree_preload *rtp;
struct radix_tree_node *node;
int ret = -ENOMEM;
preempt_disable();
rtp = &__get_cpu_var(radix_tree_preloads);
while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
preempt_enable();
node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
if (node == NULL)
goto out;
preempt_disable();
rtp = &__get_cpu_var(radix_tree_preloads);
if (rtp->nr < ARRAY_SIZE(rtp->nodes))
rtp->nodes[rtp->nr++] = node;
else
kmem_cache_free(radix_tree_node_cachep, node);
}
ret = 0;
out:
return ret;
}
/*
......@@ -339,26 +396,12 @@ int radix_tree_delete(struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_delete);
static void radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
static void
radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
{
memset(node, 0, sizeof(struct radix_tree_node));
}
static void *radix_tree_node_pool_alloc(int gfp_mask, void *data)
{
return kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
}
static void radix_tree_node_pool_free(void *node, void *data)
{
kmem_cache_free(radix_tree_node_cachep, node);
}
/*
* FIXME! 512 nodes is 200-300k of memory. This needs to be
* scaled by the amount of available memory, and hopefully
* reduced also.
*/
void __init radix_tree_init(void)
{
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
......@@ -366,8 +409,4 @@ void __init radix_tree_init(void)
0, radix_tree_node_ctor, NULL);
if (!radix_tree_node_cachep)
panic ("Failed to create radix_tree_node cache\n");
radix_tree_node_pool = mempool_create(512, radix_tree_node_pool_alloc,
radix_tree_node_pool_free, NULL);
if (!radix_tree_node_pool)
panic ("Failed to create radix_tree_node pool\n");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment