Commit 2fe9c14c authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 17: real prio_tree

From: Hugh Dickins <hugh@veritas.com>

Rajesh Venkatasubramanian's implementation of a radix priority search tree of
vmas, to handle object-based reverse mapping corner cases well.

Amongst the objections to object-based rmap were test cases by akpm and by
mingo, in which large numbers of vmas mapping disjoint or overlapping parts of
a file showed strikingly poor performance of the i_mmap lists.  Perhaps those
tests are irrelevant in the real world?  We cannot be too sure: the prio_tree
is well-suited to solving precisely that problem, so unless it turns out to
bring too much overhead, let's include it.

Why is this prio_tree.c placed in mm rather than lib?  See GET_INDEX: this
implementation is geared throughout to use with vmas, though the first half of
the file appears more general than the second half.

Each node of the prio_tree is itself (contained within) a vma: might save
memory by allocating distinct nodes from which to hang vmas, but wouldn't save
much, and would complicate the usage with preallocations.  Off each node of
the prio_tree itself hangs a list of like vmas, if any.

The connection from node to list is a little awkward, but probably the best
compromise: it would be more straightforward to list likes directly from the
tree node, but that would use more memory per vma, for the list_head and to
identify that head.  Instead, node's shared.vm_set.head points to next vma
(whose shared.vm_set.head points back to node vma), and that next contains the
list_head from which the rest hang - reusing fields already used in the
prio_tree node itself.

Currently lacks prefetch: Rajesh hopes to add some soon.
parent fc96c90f
...@@ -73,7 +73,15 @@ struct vm_area_struct { ...@@ -73,7 +73,15 @@ struct vm_area_struct {
* For areas with an address space and backing store, * For areas with an address space and backing store,
* one of the address_space->i_mmap{,shared} trees. * one of the address_space->i_mmap{,shared} trees.
*/ */
struct list_head shared; union {
struct {
struct list_head list;
void *parent; /* aligns with prio_tree_node parent */
struct vm_area_struct *head;
} vm_set;
struct prio_tree_node prio_tree_node;
} shared;
/* Function pointers to deal with this struct. */ /* Function pointers to deal with this struct. */
struct vm_operations_struct * vm_ops; struct vm_operations_struct * vm_ops;
...@@ -589,27 +597,16 @@ extern void si_meminfo_node(struct sysinfo *val, int nid); ...@@ -589,27 +597,16 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
static inline void vma_prio_tree_init(struct vm_area_struct *vma) static inline void vma_prio_tree_init(struct vm_area_struct *vma)
{ {
INIT_LIST_HEAD(&vma->shared); vma->shared.vm_set.list.next = NULL;
} vma->shared.vm_set.list.prev = NULL;
vma->shared.vm_set.parent = NULL;
static inline void vma_prio_tree_add(struct vm_area_struct *vma, vma->shared.vm_set.head = NULL;
struct vm_area_struct *old)
{
list_add(&vma->shared, &old->shared);
}
static inline void vma_prio_tree_insert(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
list_add_tail(&vma->shared, &root->list);
}
static inline void vma_prio_tree_remove(struct vm_area_struct *vma,
struct prio_tree_root *root)
{
list_del_init(&vma->shared);
} }
/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
struct vm_area_struct *vma_prio_tree_next( struct vm_area_struct *vma_prio_tree_next(
struct vm_area_struct *, struct prio_tree_root *, struct vm_area_struct *, struct prio_tree_root *,
struct prio_tree_iter *, pgoff_t begin, pgoff_t end); struct prio_tree_iter *, pgoff_t begin, pgoff_t end);
......
#ifndef _LINUX_PRIO_TREE_H #ifndef _LINUX_PRIO_TREE_H
#define _LINUX_PRIO_TREE_H #define _LINUX_PRIO_TREE_H
/*
* Dummy version of include/linux/prio_tree.h, just for this patch: struct prio_tree_node {
* no radix priority search tree whatsoever, just implement interfaces struct prio_tree_node *left;
* using the old lists. struct prio_tree_node *right;
*/ struct prio_tree_node *parent;
};
struct prio_tree_root { struct prio_tree_root {
struct list_head list; struct prio_tree_node *prio_tree_node;
unsigned int index_bits;
}; };
struct prio_tree_iter { struct prio_tree_iter {
int not_used_yet; struct prio_tree_node *cur;
unsigned long mask;
unsigned long value;
int size_level;
}; };
#define INIT_PRIO_TREE_ROOT(ptr) \ #define INIT_PRIO_TREE_ROOT(ptr) \
do { \ do { \
INIT_LIST_HEAD(&(ptr)->list); \ (ptr)->prio_tree_node = NULL; \
} while (0) \ (ptr)->index_bits = 1; \
} while (0)
#define INIT_PRIO_TREE_NODE(ptr) \
do { \
(ptr)->left = (ptr)->right = (ptr)->parent = (ptr); \
} while (0)
#define INIT_PRIO_TREE_ITER(ptr) \
do { \
(ptr)->cur = NULL; \
(ptr)->mask = 0UL; \
(ptr)->value = 0UL; \
(ptr)->size_level = 0; \
} while (0)
#define prio_tree_entry(ptr, type, member) \
((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
static inline int prio_tree_empty(const struct prio_tree_root *root) static inline int prio_tree_empty(const struct prio_tree_root *root)
{ {
return list_empty(&root->list); return root->prio_tree_node == NULL;
}
static inline int prio_tree_root(const struct prio_tree_node *node)
{
return node->parent == node;
}
static inline int prio_tree_left_empty(const struct prio_tree_node *node)
{
return node->left == node;
}
static inline int prio_tree_right_empty(const struct prio_tree_node *node)
{
return node->right == node;
} }
#endif /* _LINUX_PRIO_TREE_H */ #endif /* _LINUX_PRIO_TREE_H */
...@@ -84,6 +84,7 @@ extern void signals_init(void); ...@@ -84,6 +84,7 @@ extern void signals_init(void);
extern void buffer_init(void); extern void buffer_init(void);
extern void pidhash_init(void); extern void pidhash_init(void);
extern void pidmap_init(void); extern void pidmap_init(void);
extern void prio_tree_init(void);
extern void radix_tree_init(void); extern void radix_tree_init(void);
extern void free_initmem(void); extern void free_initmem(void);
extern void populate_rootfs(void); extern void populate_rootfs(void);
...@@ -459,6 +460,7 @@ asmlinkage void __init start_kernel(void) ...@@ -459,6 +460,7 @@ asmlinkage void __init start_kernel(void)
calibrate_delay(); calibrate_delay();
pidmap_init(); pidmap_init();
pgtable_cache_init(); pgtable_cache_init();
prio_tree_init();
#ifdef CONFIG_X86 #ifdef CONFIG_X86
if (efi_enabled) if (efi_enabled)
efi_enter_virtual_mode(); efi_enter_virtual_mode();
......
...@@ -8,8 +8,9 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \ ...@@ -8,8 +8,9 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
shmem.o vmalloc.o shmem.o vmalloc.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
page_alloc.o page-writeback.o pdflush.o readahead.o \ page_alloc.o page-writeback.o pdflush.o prio_tree.o \
slab.o swap.o truncate.o vmscan.o $(mmu-y) readahead.o slab.o swap.o truncate.o vmscan.o \
$(mmu-y)
obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o
......
...@@ -322,31 +322,6 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) ...@@ -322,31 +322,6 @@ __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
validate_mm(mm); validate_mm(mm);
} }
/*
* Dummy version of vma_prio_tree_next, just for this patch:
* no radix priority search tree whatsoever, just implement interface
* using the old lists: return the next vma overlapping [begin,end].
*/
struct vm_area_struct *vma_prio_tree_next(
struct vm_area_struct *vma, struct prio_tree_root *root,
struct prio_tree_iter *iter, pgoff_t begin, pgoff_t end)
{
struct list_head *next;
pgoff_t vba, vea;
next = vma? vma->shared.next: root->list.next;
while (next != &root->list) {
vma = list_entry(next, struct vm_area_struct, shared);
vba = vma->vm_pgoff;
vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1;
/* Return vma if it overlaps [begin,end] */
if (vba <= end && vea >= begin)
return vma;
next = next->next;
}
return NULL;
}
/* /*
* We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that is * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that is
* already present in an i_mmap{_shared} tree without adjusting the tree. * already present in an i_mmap{_shared} tree without adjusting the tree.
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment