Commit a89cd0f0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] rmap 38 remove anonmm rmap

From: Hugh Dickins <hugh@veritas.com>

Before moving on to anon_vma rmap, remove now what's peculiar to anonmm rmap:
the anonmm handling and the mremap move cows.  Temporarily reduce
page_referenced_anon and try_to_unmap_anon to stubs, so a kernel built with
this patch will not swap anonymous at all.
parent e1fd9cc9
......@@ -76,7 +76,7 @@
#define PG_reclaim 18 /* To be reclaimed asap */
#define PG_compound 19 /* Part of a compound page */
#define PG_anon 20 /* Anonymous page: anonmm in mapping */
#define PG_anon 20 /* Anonymous: anon_vma in mapping */
/*
......
......@@ -35,54 +35,6 @@ static inline void page_dup_rmap(struct page *page)
page_map_unlock(page);
}
int mremap_move_anon_rmap(struct page *page, unsigned long addr);
/**
* mremap_moved_anon_rmap - does new address clash with that noted?
* @page: the page just brought back in from swap
* @addr: the user virtual address at which it is mapped
*
* Returns boolean, true if addr clashes with address already in page.
*
* For do_swap_page and unuse_pte: anonmm rmap cannot find the page if
* it's at different addresses in different mms, so caller must take a
* copy of the page to avoid that: not very clever, but too rare a case
* to merit cleverness.
*/
static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr)
{
return page->index != (addr & PAGE_MASK);
}
/**
* make_page_exclusive - try to make page exclusive to one mm
* @vma the vm_area_struct covering this address
* @addr the user virtual address of the page in question
*
* Assumes that the page at this address is anonymous (COWable),
* and that the caller holds mmap_sem for reading or for writing.
*
* For mremap's move_page_tables and for swapoff's unuse_process:
* not a general purpose routine, and in general may not succeed.
* But move_page_tables loops until it succeeds, and unuse_process
* holds the original page locked, which protects against races.
*/
static inline int make_page_exclusive(struct vm_area_struct *vma,
unsigned long addr)
{
if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM)
return 0;
return -ENOMEM;
}
/*
* Called from kernel/fork.c to manage anonymous memory
*/
void init_rmap(void);
int exec_rmap(struct mm_struct *);
int dup_rmap(struct mm_struct *, struct mm_struct *oldmm);
void exit_rmap(struct mm_struct *);
/*
* Called from mm/vmscan.c to handle paging out
*/
......@@ -91,11 +43,6 @@ int try_to_unmap(struct page *);
#else /* !CONFIG_MMU */
#define init_rmap() do {} while (0)
#define exec_rmap(mm) (0)
#define dup_rmap(mm, oldmm) (0)
#define exit_rmap(mm) do {} while (0)
#define page_referenced(page) TestClearPageReferenced(page)
#define try_to_unmap(page) SWAP_FAIL
......
......@@ -207,7 +207,6 @@ struct mm_struct {
* together off init_mm.mmlist, and are protected
* by mmlist_lock
*/
struct anonmm *anonmm; /* For rmap to track anon mem */
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
......
......@@ -432,11 +432,6 @@ struct mm_struct * mm_alloc(void)
if (mm) {
memset(mm, 0, sizeof(*mm));
mm = mm_init(mm);
if (mm && exec_rmap(mm)) {
mm_free_pgd(mm);
free_mm(mm);
mm = NULL;
}
}
return mm;
}
......@@ -465,7 +460,6 @@ void mmput(struct mm_struct *mm)
spin_unlock(&mmlist_lock);
exit_aio(mm);
exit_mmap(mm);
exit_rmap(mm);
mmdrop(mm);
}
}
......@@ -569,12 +563,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (!mm_init(mm))
goto fail_nomem;
if (dup_rmap(mm, oldmm)) {
mm_free_pgd(mm);
free_mm(mm);
goto fail_nomem;
}
if (init_new_context(tsk,mm))
goto fail_nocontext;
......@@ -1298,5 +1286,4 @@ void __init proc_caches_init(void)
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
init_rmap();
}
......@@ -1368,7 +1368,7 @@ static int do_swap_page(struct mm_struct * mm,
set_pte(page_table, pte);
page_add_anon_rmap(page, vma, address);
if (write_access || mremap_moved_anon_rmap(page, address)) {
if (write_access) {
if (do_wp_page(mm, vma, address,
page_table, pmd, pte) == VM_FAULT_OOM)
ret = VM_FAULT_OOM;
......
......@@ -15,7 +15,6 @@
#include <linux/swap.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/rmap.h>
#include <linux/security.h>
#include <asm/uaccess.h>
......@@ -81,21 +80,6 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
return pte;
}
static inline int
can_move_one_pte(pte_t *src, unsigned long new_addr)
{
int move = 1;
if (pte_present(*src)) {
unsigned long pfn = pte_pfn(*src);
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
if (PageAnon(page))
move = mremap_move_anon_rmap(page, new_addr);
}
}
return move;
}
static int
move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr)
......@@ -142,15 +126,12 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
* page_table_lock, we should re-check the src entry...
*/
if (src) {
if (!dst)
error = -ENOMEM;
else if (!can_move_one_pte(src, new_addr))
error = -EAGAIN;
else {
if (dst) {
pte_t pte;
pte = ptep_clear_flush(vma, old_addr, src);
set_pte(dst, pte);
}
} else
error = -ENOMEM;
pte_unmap_nested(src);
}
if (dst)
......@@ -164,7 +145,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
static unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long new_addr, unsigned long old_addr,
unsigned long len, int *cows)
unsigned long len)
{
unsigned long offset;
......@@ -176,21 +157,7 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
* only a few pages.. This also makes error recovery easier.
*/
for (offset = 0; offset < len; offset += PAGE_SIZE) {
int ret = move_one_page(vma, old_addr+offset, new_addr+offset);
/*
* The anonmm objrmap can only track anon page movements
* if the page is exclusive to one mm. In the rare case
* when mremap move is applied to a shared page, break
* COW (take a copy of the page) to make it exclusive.
* If shared while on swap, page will be copied when
* brought back in (if it's still shared by then).
*/
if (ret == -EAGAIN) {
ret = make_page_exclusive(vma, old_addr+offset);
offset -= PAGE_SIZE;
(*cows)++;
}
if (ret)
if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
break;
cond_resched();
}
......@@ -208,7 +175,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long moved_len;
unsigned long excess = 0;
int split = 0;
int cows = 0;
/*
* We'd prefer to avoid failure later on in do_munmap:
......@@ -222,22 +188,19 @@ static unsigned long move_vma(struct vm_area_struct *vma,
if (!new_vma)
return -ENOMEM;
moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows);
moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
if (moved_len < old_len) {
/*
* On error, move entries back from new area to old,
* which will succeed since page tables still there,
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, old_addr, new_addr, moved_len, &cows);
move_page_tables(new_vma, old_addr, new_addr, moved_len);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
new_addr = -ENOMEM;
}
if (cows) /* Downgrade or remove this message later */
printk(KERN_WARNING "%s: mremap moved %d cows\n",
current->comm, cows);
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
......
......@@ -27,125 +27,11 @@
#include <asm/tlbflush.h>
/*
* struct anonmm: to track a bundle of anonymous memory mappings.
*
* Could be embedded in mm_struct, but mm_struct is rather heavyweight,
* and we may need the anonmm to stay around long after the mm_struct
* and its pgd have been freed: because pages originally faulted into
* that mm have been duped into forked mms, and still need tracking.
*/
struct anonmm {
atomic_t count; /* ref count, including 1 per page */
spinlock_t lock; /* head's locks list; others unused */
struct mm_struct *mm; /* assoc mm_struct, NULL when gone */
struct anonmm *head; /* exec starts new chain from head */
struct list_head list; /* chain of associated anonmms */
};
static kmem_cache_t *anonmm_cachep;
/**
** Functions for creating and destroying struct anonmm.
**/
void __init init_rmap(void)
{
anonmm_cachep = kmem_cache_create("anonmm",
sizeof(struct anonmm), 0, SLAB_PANIC, NULL, NULL);
}
int exec_rmap(struct mm_struct *mm)
{
struct anonmm *anonmm;
anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL);
if (unlikely(!anonmm))
return -ENOMEM;
atomic_set(&anonmm->count, 2); /* ref by mm and head */
anonmm->lock = SPIN_LOCK_UNLOCKED; /* this lock is used */
anonmm->mm = mm;
anonmm->head = anonmm;
INIT_LIST_HEAD(&anonmm->list);
mm->anonmm = anonmm;
return 0;
}
int dup_rmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
struct anonmm *anonmm;
struct anonmm *anonhd = oldmm->anonmm->head;
anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL);
if (unlikely(!anonmm))
return -ENOMEM;
/*
* copy_mm calls us before dup_mmap has reset the mm fields,
* so reset rss ourselves before adding to anonhd's list,
* to keep away from this mm until it's worth examining.
*/
mm->rss = 0;
atomic_set(&anonmm->count, 1); /* ref by mm */
anonmm->lock = SPIN_LOCK_UNLOCKED; /* this lock is not used */
anonmm->mm = mm;
anonmm->head = anonhd;
spin_lock(&anonhd->lock);
atomic_inc(&anonhd->count); /* ref by anonmm's head */
list_add_tail(&anonmm->list, &anonhd->list);
spin_unlock(&anonhd->lock);
mm->anonmm = anonmm;
return 0;
}
void exit_rmap(struct mm_struct *mm)
{
struct anonmm *anonmm = mm->anonmm;
struct anonmm *anonhd = anonmm->head;
int anonhd_count;
mm->anonmm = NULL;
spin_lock(&anonhd->lock);
anonmm->mm = NULL;
if (atomic_dec_and_test(&anonmm->count)) {
BUG_ON(anonmm == anonhd);
list_del(&anonmm->list);
kmem_cache_free(anonmm_cachep, anonmm);
if (atomic_dec_and_test(&anonhd->count))
BUG();
}
anonhd_count = atomic_read(&anonhd->count);
spin_unlock(&anonhd->lock);
if (anonhd_count == 1) {
BUG_ON(anonhd->mm);
BUG_ON(!list_empty(&anonhd->list));
kmem_cache_free(anonmm_cachep, anonhd);
}
}
static void free_anonmm(struct anonmm *anonmm)
{
struct anonmm *anonhd = anonmm->head;
BUG_ON(anonmm->mm);
BUG_ON(anonmm == anonhd);
spin_lock(&anonhd->lock);
list_del(&anonmm->list);
if (atomic_dec_and_test(&anonhd->count))
BUG();
spin_unlock(&anonhd->lock);
kmem_cache_free(anonmm_cachep, anonmm);
}
static inline void clear_page_anon(struct page *page)
{
struct anonmm *anonmm = (struct anonmm *) page->mapping;
BUG_ON(!page->mapping);
page->mapping = NULL;
ClearPageAnon(page);
if (atomic_dec_and_test(&anonmm->count))
free_anonmm(anonmm);
}
/*
......@@ -213,75 +99,7 @@ static int page_referenced_one(struct page *page,
static inline int page_referenced_anon(struct page *page)
{
unsigned int mapcount = page->mapcount;
struct anonmm *anonmm = (struct anonmm *) page->mapping;
struct anonmm *anonhd = anonmm->head;
struct anonmm *new_anonmm = anonmm;
struct list_head *seek_head;
int referenced = 0;
int failed = 0;
spin_lock(&anonhd->lock);
/*
* First try the indicated mm, it's the most likely.
* Make a note to migrate the page if this mm is extinct.
*/
if (!anonmm->mm)
new_anonmm = NULL;
else if (anonmm->mm->rss) {
referenced += page_referenced_one(page,
anonmm->mm, page->index, &mapcount, &failed);
if (!mapcount)
goto out;
}
/*
* Then down the rest of the list, from that as the head. Stop
* when we reach anonhd? No: although a page cannot get dup'ed
* into an older mm, once swapped, its indicated mm may not be
* the oldest, just the first into which it was faulted back.
* If original mm now extinct, note first to contain the page.
*/
seek_head = &anonmm->list;
list_for_each_entry(anonmm, seek_head, list) {
if (!anonmm->mm || !anonmm->mm->rss)
continue;
referenced += page_referenced_one(page,
anonmm->mm, page->index, &mapcount, &failed);
if (!new_anonmm && mapcount < page->mapcount)
new_anonmm = anonmm;
if (!mapcount) {
anonmm = (struct anonmm *) page->mapping;
if (new_anonmm == anonmm)
goto out;
goto migrate;
}
}
/*
* The warning below may appear if page_referenced_anon catches
* the page in between page_add_anon_rmap and its replacement
* demanded by mremap_moved_anon_page: so remove the warning once
* we're convinced that anonmm rmap really is finding its pages.
*/
WARN_ON(!failed);
out:
spin_unlock(&anonhd->lock);
return referenced;
migrate:
/*
* Migrate pages away from an extinct mm, so that its anonmm
* can be freed in due course: we could leave this to happen
* through the natural attrition of try_to_unmap, but that
* would miss locked pages and frequently referenced pages.
*/
spin_unlock(&anonhd->lock);
page->mapping = (void *) new_anonmm;
atomic_inc(&new_anonmm->count);
if (atomic_dec_and_test(&anonmm->count))
free_anonmm(anonmm);
return referenced;
return 1; /* until next patch */
}
/**
......@@ -373,8 +191,6 @@ int page_referenced(struct page *page)
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
struct anonmm *anonmm = vma->vm_mm->anonmm;
BUG_ON(PageReserved(page));
page_map_lock(page);
......@@ -382,8 +198,7 @@ void page_add_anon_rmap(struct page *page,
BUG_ON(page->mapping);
SetPageAnon(page);
page->index = address & PAGE_MASK;
page->mapping = (void *) anonmm;
atomic_inc(&anonmm->count);
page->mapping = (void *) vma; /* until next patch */
inc_page_state(nr_mapped);
}
page->mapcount++;
......@@ -432,32 +247,6 @@ void page_remove_rmap(struct page *page)
page_map_unlock(page);
}
/**
* mremap_move_anon_rmap - try to note new address of anonymous page
* @page: page about to be moved
* @address: user virtual address at which it is going to be mapped
*
* Returns boolean, true if page is not shared, so address updated.
*
* For mremap's can_move_one_page: to update address when vma is moved,
* provided that anon page is not shared with a parent or child mm.
* If it is shared, then caller must take a copy of the page instead:
* not very clever, but too rare a case to merit cleverness.
*/
int mremap_move_anon_rmap(struct page *page, unsigned long address)
{
int move = 0;
if (page->mapcount == 1) {
page_map_lock(page);
if (page->mapcount == 1) {
page->index = address & PAGE_MASK;
move = 1;
}
page_map_unlock(page);
}
return move;
}
/*
* Subfunctions of try_to_unmap: try_to_unmap_one called
* repeatedly from either try_to_unmap_anon or try_to_unmap_file.
......@@ -651,38 +440,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor,
static inline int try_to_unmap_anon(struct page *page)
{
struct anonmm *anonmm = (struct anonmm *) page->mapping;
struct anonmm *anonhd = anonmm->head;
struct list_head *seek_head;
int ret = SWAP_AGAIN;
spin_lock(&anonhd->lock);
/*
* First try the indicated mm, it's the most likely.
*/
if (anonmm->mm && anonmm->mm->rss) {
ret = try_to_unmap_one(page, anonmm->mm, page->index, NULL);
if (ret == SWAP_FAIL || !page->mapcount)
goto out;
}
/*
* Then down the rest of the list, from that as the head. Stop
* when we reach anonhd? No: although a page cannot get dup'ed
* into an older mm, once swapped, its indicated mm may not be
* the oldest, just the first into which it was faulted back.
*/
seek_head = &anonmm->list;
list_for_each_entry(anonmm, seek_head, list) {
if (!anonmm->mm || !anonmm->mm->rss)
continue;
ret = try_to_unmap_one(page, anonmm->mm, page->index, NULL);
if (ret == SWAP_FAIL || !page->mapcount)
goto out;
}
out:
spin_unlock(&anonhd->lock);
return ret;
return SWAP_FAIL; /* until next patch */
}
/**
......
......@@ -537,7 +537,6 @@ static int unuse_process(struct mm_struct * mm,
{
struct vm_area_struct* vma;
unsigned long foundaddr = 0;
int ret = 0;
/*
* Go through process' page directory.
......@@ -553,10 +552,12 @@ static int unuse_process(struct mm_struct * mm,
}
}
spin_unlock(&mm->page_table_lock);
if (foundaddr && mremap_moved_anon_rmap(page, foundaddr))
ret = make_page_exclusive(vma, foundaddr);
up_read(&mm->mmap_sem);
return ret;
/*
* Currently unuse_process cannot fail, but leave error handling
* at call sites for now, since we change it from time to time.
*/
return 0;
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment