[PATCH] rmap 38 remove anonmm rmap

From: Hugh Dickins <hugh@veritas.com> Before moving on to anon_vma rmap, remove now what's peculiar to anonmm rmap: the anonmm handling and the mremap move cows. Temporarily reduce page_referenced_anon and try_to_unmap_anon to stubs, so a kernel built with this patch will not swap anonymous at all.

[PATCH] rmap 38 remove anonmm rmap
From: Hugh Dickins <hugh@veritas.com> Before moving on to anon_vma rmap, remove now what's peculiar to anonmm rmap: the anonmm handling and the mremap move cows. Temporarily reduce page_referenced_anon and try_to_unmap_anon to stubs, so a kernel built with this patch will not swap anonymous at all.
a89cd0f0 · Andrew Morton · Linus Torvalds · e1fd9cc9 · a89cd0f0 · a89cd0f0
Commit a89cd0f0 authored May 22, 2004 by Andrew Morton Committed by Linus Torvalds May 22, 2004
8 changed files
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -76,7 +76,7 @@
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_compound		19	/* Part of a compound page */

-#define PG_anon			20	/* Anonymous page: anonmm in mapping */
+#define PG_anon			20	/* Anonymous: anon_vma in mapping */


 /*

--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -35,54 +35,6 @@ static inline void page_dup_rmap(struct page *page)
 	page_map_unlock(page);
 }

-int mremap_move_anon_rmap(struct page *page, unsigned long addr);
-
-/**
- * mremap_moved_anon_rmap - does new address clash with that noted?
- * @page:	the page just brought back in from swap
- * @addr:	the user virtual address at which it is mapped
- *
- * Returns boolean, true if addr clashes with address already in page.
- *
- * For do_swap_page and unuse_pte: anonmm rmap cannot find the page if
- * it's at different addresses in different mms, so caller must take a
- * copy of the page to avoid that: not very clever, but too rare a case
- * to merit cleverness.
- */
-static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr)
-{
-	return page->index != (addr & PAGE_MASK);
-}
-
-/**
- * make_page_exclusive - try to make page exclusive to one mm
- * @vma		the vm_area_struct covering this address
- * @addr	the user virtual address of the page in question
- *
- * Assumes that the page at this address is anonymous (COWable),
- * and that the caller holds mmap_sem for reading or for writing.
- *
- * For mremap's move_page_tables and for swapoff's unuse_process:
- * not a general purpose routine, and in general may not succeed.
- * But move_page_tables loops until it succeeds, and unuse_process
- * holds the original page locked, which protects against races.
- */
-static inline int make_page_exclusive(struct vm_area_struct *vma,
-					unsigned long addr)
-{
-	if (handle_mm_fault(vma->vm_mm, vma, addr, 1) != VM_FAULT_OOM)
-		return 0;
-	return -ENOMEM;
-}
-
-/*
- * Called from kernel/fork.c to manage anonymous memory
- */
-void init_rmap(void);
-int exec_rmap(struct mm_struct *);
-int dup_rmap(struct mm_struct *, struct mm_struct *oldmm);
-void exit_rmap(struct mm_struct *);
-
 /*
 * Called from mm/vmscan.c to handle paging out
 */
@@ -91,11 +43,6 @@ int try_to_unmap(struct page *);

 #else	/* !CONFIG_MMU */

-#define init_rmap()		do {} while (0)
-#define exec_rmap(mm)		(0)
-#define dup_rmap(mm, oldmm)	(0)
-#define exit_rmap(mm)		do {} while (0)
-
 #define page_referenced(page)	TestClearPageReferenced(page)
 #define try_to_unmap(page)	SWAP_FAIL


--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -207,7 +207,6 @@ struct mm_struct {
 						 * together off init_mm.mmlist, and are protected
 						 * by mmlist_lock
 						 */
-	struct anonmm *anonmm;			/* For rmap to track anon mem */

 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;

--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -432,11 +432,6 @@ struct mm_struct * mm_alloc(void)
 	if (mm) {
 		memset(mm, 0, sizeof(*mm));
 		mm = mm_init(mm);
-		if (mm && exec_rmap(mm)) {
-			mm_free_pgd(mm);
-			free_mm(mm);
-			mm = NULL;
-		}
 	}
 	return mm;
 }
@@ -465,7 +460,6 @@ void mmput(struct mm_struct *mm)
 		spin_unlock(&mmlist_lock);
 		exit_aio(mm);
 		exit_mmap(mm);
-		exit_rmap(mm);
 		mmdrop(mm);
 	}
 }
@@ -569,12 +563,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 	if (!mm_init(mm))
 		goto fail_nomem;

-	if (dup_rmap(mm, oldmm)) {
-		mm_free_pgd(mm);
-		free_mm(mm);
-		goto fail_nomem;
-	}
-
 	if (init_new_context(tsk,mm))
 		goto fail_nocontext;

@@ -1298,5 +1286,4 @@ void __init proc_caches_init(void)
 	mm_cachep = kmem_cache_create("mm_struct",
 			sizeof(struct mm_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
-	init_rmap();
 }
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1368,7 +1368,7 @@ static int do_swap_page(struct mm_struct * mm,
 	set_pte(page_table, pte);
 	page_add_anon_rmap(page, vma, address);

-	if (write_access || mremap_moved_anon_rmap(page, address)) {
+	if (write_access) {
 		if (do_wp_page(mm, vma, address,
 				page_table, pmd, pte) == VM_FAULT_OOM)
 			ret = VM_FAULT_OOM;

--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -15,7 +15,6 @@
 #include <linux/swap.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
-#include <linux/rmap.h>
 #include <linux/security.h>

 #include <asm/uaccess.h>
@@ -81,21 +80,6 @@ static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr)
 	return pte;
 }

-static inline int
-can_move_one_pte(pte_t *src, unsigned long new_addr)
-{
-	int move = 1;
-	if (pte_present(*src)) {
-		unsigned long pfn = pte_pfn(*src);
-		if (pfn_valid(pfn)) {
-			struct page *page = pfn_to_page(pfn);
-			if (PageAnon(page))
-				move = mremap_move_anon_rmap(page, new_addr);
-		}
-	}
-	return move;
-}
-
 static int
 move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 		unsigned long new_addr)
@@ -142,15 +126,12 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
 		 * page_table_lock, we should re-check the src entry...
 		 */
 		if (src) {
-			if (!dst)
-				error = -ENOMEM;
-			else if (!can_move_one_pte(src, new_addr))
-				error = -EAGAIN;
-			else {
+			if (dst) {
 				pte_t pte;
 				pte = ptep_clear_flush(vma, old_addr, src);
 				set_pte(dst, pte);
-			}
+			} else
+				error = -ENOMEM;
 			pte_unmap_nested(src);
 		}
 		if (dst)
@@ -164,7 +145,7 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,

 static unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long new_addr, unsigned long old_addr,
-		unsigned long len, int *cows)
+		unsigned long len)
 {
 	unsigned long offset;

@@ -176,21 +157,7 @@ static unsigned long move_page_tables(struct vm_area_struct *vma,
 	 * only a few pages.. This also makes error recovery easier.
 	 */
 	for (offset = 0; offset < len; offset += PAGE_SIZE) {
-		int ret = move_one_page(vma, old_addr+offset, new_addr+offset);
-		/*
-		 * The anonmm objrmap can only track anon page movements
-		 * if the page is exclusive to one mm.  In the rare case
-		 * when mremap move is applied to a shared page, break
-		 * COW (take a copy of the page) to make it exclusive.
-		 * If shared while on swap, page will be copied when
-		 * brought back in (if it's still shared by then).
-		 */
-		if (ret == -EAGAIN) {
-			ret = make_page_exclusive(vma, old_addr+offset);
-			offset -= PAGE_SIZE;
-			(*cows)++;
-		}
-		if (ret)
+		if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0)
 			break;
 		cond_resched();
 	}
@@ -208,7 +175,6 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	unsigned long moved_len;
 	unsigned long excess = 0;
 	int split = 0;
-	int cows = 0;

 	/*
 	 * We'd prefer to avoid failure later on in do_munmap:
@@ -222,22 +188,19 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 	if (!new_vma)
 		return -ENOMEM;

-	moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows);
+	moved_len = move_page_tables(vma, new_addr, old_addr, old_len);
 	if (moved_len < old_len) {
 		/*
 		 * On error, move entries back from new area to old,
 		 * which will succeed since page tables still there,
 		 * and then proceed to unmap new area instead of old.
 		 */
-		move_page_tables(new_vma, old_addr, new_addr, moved_len, &cows);
+		move_page_tables(new_vma, old_addr, new_addr, moved_len);
 		vma = new_vma;
 		old_len = new_len;
 		old_addr = new_addr;
 		new_addr = -ENOMEM;
 	}
-	if (cows)	/* Downgrade or remove this message later */
-		printk(KERN_WARNING "%s: mremap moved %d cows\n",
-							current->comm, cows);

 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT) {

--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -27,125 +27,11 @@

 #include <asm/tlbflush.h>

-/*
- * struct anonmm: to track a bundle of anonymous memory mappings.
- *
- * Could be embedded in mm_struct, but mm_struct is rather heavyweight,
- * and we may need the anonmm to stay around long after the mm_struct
- * and its pgd have been freed: because pages originally faulted into
- * that mm have been duped into forked mms, and still need tracking.
- */
-struct anonmm {
-	atomic_t	 count;	/* ref count, including 1 per page */
-	spinlock_t	 lock;	/* head's locks list; others unused */
-	struct mm_struct *mm;	/* assoc mm_struct, NULL when gone */
-	struct anonmm	 *head;	/* exec starts new chain from head */
-	struct list_head list;	/* chain of associated anonmms */
-};
-static kmem_cache_t *anonmm_cachep;
-
-/**
- ** Functions for creating and destroying struct anonmm.
- **/
-
-void __init init_rmap(void)
-{
-	anonmm_cachep = kmem_cache_create("anonmm",
-			sizeof(struct anonmm), 0, SLAB_PANIC, NULL, NULL);
-}
-
-int exec_rmap(struct mm_struct *mm)
-{
-	struct anonmm *anonmm;
-
-	anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL);
-	if (unlikely(!anonmm))
-		return -ENOMEM;
-
-	atomic_set(&anonmm->count, 2);		/* ref by mm and head */
-	anonmm->lock = SPIN_LOCK_UNLOCKED;	/* this lock is used */
-	anonmm->mm = mm;
-	anonmm->head = anonmm;
-	INIT_LIST_HEAD(&anonmm->list);
-	mm->anonmm = anonmm;
-	return 0;
-}
-
-int dup_rmap(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-	struct anonmm *anonmm;
-	struct anonmm *anonhd = oldmm->anonmm->head;
-
-	anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL);
-	if (unlikely(!anonmm))
-		return -ENOMEM;
-
-	/*
-	 * copy_mm calls us before dup_mmap has reset the mm fields,
-	 * so reset rss ourselves before adding to anonhd's list,
-	 * to keep away from this mm until it's worth examining.
-	 */
-	mm->rss = 0;
-
-	atomic_set(&anonmm->count, 1);		/* ref by mm */
-	anonmm->lock = SPIN_LOCK_UNLOCKED;	/* this lock is not used */
-	anonmm->mm = mm;
-	anonmm->head = anonhd;
-	spin_lock(&anonhd->lock);
-	atomic_inc(&anonhd->count);		/* ref by anonmm's head */
-	list_add_tail(&anonmm->list, &anonhd->list);
-	spin_unlock(&anonhd->lock);
-	mm->anonmm = anonmm;
-	return 0;
-}
-
-void exit_rmap(struct mm_struct *mm)
-{
-	struct anonmm *anonmm = mm->anonmm;
-	struct anonmm *anonhd = anonmm->head;
-	int anonhd_count;
-
-	mm->anonmm = NULL;
-	spin_lock(&anonhd->lock);
-	anonmm->mm = NULL;
-	if (atomic_dec_and_test(&anonmm->count)) {
-		BUG_ON(anonmm == anonhd);
-		list_del(&anonmm->list);
-		kmem_cache_free(anonmm_cachep, anonmm);
-		if (atomic_dec_and_test(&anonhd->count))
-			BUG();
-	}
-	anonhd_count = atomic_read(&anonhd->count);
-	spin_unlock(&anonhd->lock);
-	if (anonhd_count == 1) {
-		BUG_ON(anonhd->mm);
-		BUG_ON(!list_empty(&anonhd->list));
-		kmem_cache_free(anonmm_cachep, anonhd);
-	}
-}
-
-static void free_anonmm(struct anonmm *anonmm)
-{
-	struct anonmm *anonhd = anonmm->head;
-
-	BUG_ON(anonmm->mm);
-	BUG_ON(anonmm == anonhd);
-	spin_lock(&anonhd->lock);
-	list_del(&anonmm->list);
-	if (atomic_dec_and_test(&anonhd->count))
-		BUG();
-	spin_unlock(&anonhd->lock);
-	kmem_cache_free(anonmm_cachep, anonmm);
-}
-
 static inline void clear_page_anon(struct page *page)
 {
-	struct anonmm *anonmm = (struct anonmm *) page->mapping;
-
+	BUG_ON(!page->mapping);
 	page->mapping = NULL;
 	ClearPageAnon(page);
-	if (atomic_dec_and_test(&anonmm->count))
-		free_anonmm(anonmm);
 }

 /*
@@ -213,75 +99,7 @@ static int page_referenced_one(struct page *page,

 static inline int page_referenced_anon(struct page *page)
 {
-	unsigned int mapcount = page->mapcount;
-	struct anonmm *anonmm = (struct anonmm *) page->mapping;
-	struct anonmm *anonhd = anonmm->head;
-	struct anonmm *new_anonmm = anonmm;
-	struct list_head *seek_head;
-	int referenced = 0;
-	int failed = 0;
-
-	spin_lock(&anonhd->lock);
-	/*
-	 * First try the indicated mm, it's the most likely.
-	 * Make a note to migrate the page if this mm is extinct.
-	 */
-	if (!anonmm->mm)
-		new_anonmm = NULL;
-	else if (anonmm->mm->rss) {
-		referenced += page_referenced_one(page,
-			anonmm->mm, page->index, &mapcount, &failed);
-		if (!mapcount)
-			goto out;
-	}
-
-	/*
-	 * Then down the rest of the list, from that as the head.  Stop
-	 * when we reach anonhd?  No: although a page cannot get dup'ed
-	 * into an older mm, once swapped, its indicated mm may not be
-	 * the oldest, just the first into which it was faulted back.
-	 * If original mm now extinct, note first to contain the page.
-	 */
-	seek_head = &anonmm->list;
-	list_for_each_entry(anonmm, seek_head, list) {
-		if (!anonmm->mm || !anonmm->mm->rss)
-			continue;
-		referenced += page_referenced_one(page,
-			anonmm->mm, page->index, &mapcount, &failed);
-		if (!new_anonmm && mapcount < page->mapcount)
-			new_anonmm = anonmm;
-		if (!mapcount) {
-			anonmm = (struct anonmm *) page->mapping;
-			if (new_anonmm == anonmm)
-				goto out;
-			goto migrate;
-		}
-	}
-
-	/*
-	 * The warning below may appear if page_referenced_anon catches
-	 * the page in between page_add_anon_rmap and its replacement
-	 * demanded by mremap_moved_anon_page: so remove the warning once
-	 * we're convinced that anonmm rmap really is finding its pages.
-	 */
-	WARN_ON(!failed);
-out:
-	spin_unlock(&anonhd->lock);
-	return referenced;
-
-migrate:
-	/*
-	 * Migrate pages away from an extinct mm, so that its anonmm
-	 * can be freed in due course: we could leave this to happen
-	 * through the natural attrition of try_to_unmap, but that
-	 * would miss locked pages and frequently referenced pages.
-	 */
-	spin_unlock(&anonhd->lock);
-	page->mapping = (void *) new_anonmm;
-	atomic_inc(&new_anonmm->count);
-	if (atomic_dec_and_test(&anonmm->count))
-		free_anonmm(anonmm);
-	return referenced;
+	return 1;	/* until next patch */
 }

 /**
@@ -373,8 +191,6 @@ int page_referenced(struct page *page)
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	struct anonmm *anonmm = vma->vm_mm->anonmm;
-
 	BUG_ON(PageReserved(page));

 	page_map_lock(page);
@@ -382,8 +198,7 @@ void page_add_anon_rmap(struct page *page,
 		BUG_ON(page->mapping);
 		SetPageAnon(page);
 		page->index = address & PAGE_MASK;
-		page->mapping = (void *) anonmm;
-		atomic_inc(&anonmm->count);
+		page->mapping = (void *) vma;	/* until next patch */
 		inc_page_state(nr_mapped);
 	}
 	page->mapcount++;
@@ -432,32 +247,6 @@ void page_remove_rmap(struct page *page)
 	page_map_unlock(page);
 }

-/**
- * mremap_move_anon_rmap - try to note new address of anonymous page
- * @page:	page about to be moved
- * @address:	user virtual address at which it is going to be mapped
- *
- * Returns boolean, true if page is not shared, so address updated.
- *
- * For mremap's can_move_one_page: to update address when vma is moved,
- * provided that anon page is not shared with a parent or child mm.
- * If it is shared, then caller must take a copy of the page instead:
- * not very clever, but too rare a case to merit cleverness.
- */
-int mremap_move_anon_rmap(struct page *page, unsigned long address)
-{
-	int move = 0;
-	if (page->mapcount == 1) {
-		page_map_lock(page);
-		if (page->mapcount == 1) {
-			page->index = address & PAGE_MASK;
-			move = 1;
-		}
-		page_map_unlock(page);
-	}
-	return move;
-}
-
 /*
 * Subfunctions of try_to_unmap: try_to_unmap_one called
 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
@@ -651,38 +440,7 @@ static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor,

 static inline int try_to_unmap_anon(struct page *page)
 {
-	struct anonmm *anonmm = (struct anonmm *) page->mapping;
-	struct anonmm *anonhd = anonmm->head;
-	struct list_head *seek_head;
-	int ret = SWAP_AGAIN;
-
-	spin_lock(&anonhd->lock);
-	/*
-	 * First try the indicated mm, it's the most likely.
-	 */
-	if (anonmm->mm && anonmm->mm->rss) {
-		ret = try_to_unmap_one(page, anonmm->mm, page->index, NULL);
-		if (ret == SWAP_FAIL || !page->mapcount)
-			goto out;
-	}
-
-	/*
-	 * Then down the rest of the list, from that as the head.  Stop
-	 * when we reach anonhd?  No: although a page cannot get dup'ed
-	 * into an older mm, once swapped, its indicated mm may not be
-	 * the oldest, just the first into which it was faulted back.
-	 */
-	seek_head = &anonmm->list;
-	list_for_each_entry(anonmm, seek_head, list) {
-		if (!anonmm->mm || !anonmm->mm->rss)
-			continue;
-		ret = try_to_unmap_one(page, anonmm->mm, page->index, NULL);
-		if (ret == SWAP_FAIL || !page->mapcount)
-			goto out;
-	}
-out:
-	spin_unlock(&anonhd->lock);
-	return ret;
+	return SWAP_FAIL;	/* until next patch */
 }

 /**

--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -537,7 +537,6 @@ static int unuse_process(struct mm_struct * mm,
 {
 	struct vm_area_struct* vma;
 	unsigned long foundaddr = 0;
-	int ret = 0;

 	/*
 	 * Go through process' page directory.
@@ -553,10 +552,12 @@ static int unuse_process(struct mm_struct * mm,
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
-	if (foundaddr && mremap_moved_anon_rmap(page, foundaddr))
-		ret = make_page_exclusive(vma, foundaddr);
 	up_read(&mm->mmap_sem);
-	return ret;
+	/*
+	 * Currently unuse_process cannot fail, but leave error handling
+	 * at call sites for now, since we change it from time to time.
+	 */
+	return 0;
 }

 /*