v2.4.13.6 -> v2.4.13.7

- me: reinstate "delete swap cache on low swap" code - David Miller: ksoftirqd startup race fix - Hugh Dickins: make tmpfs free swap cache entries proactively

v2.4.13.6 -> v2.4.13.7
- me: reinstate "delete swap cache on low swap" code - David Miller: ksoftirqd startup race fix - Hugh Dickins: make tmpfs free swap cache entries proactively
595cf06f · Linus Torvalds · 857805c6 · 595cf06f · 595cf06f · 595cf06f
Commit 595cf06f authored Feb 04, 2002 by Linus Torvalds
11 changed files
--- a/Makefile
+++ b/Makefile
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 14
-EXTRAVERSION =-pre6
+EXTRAVERSION =-pre7

 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)


--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -282,6 +282,7 @@ typedef struct page {
 #define PG_launder		15	/* written out by VM pressure.. */

 /* Make it prettier to test the above... */
+#define UnlockPage(page)	unlock_page(page)
 #define Page_Uptodate(page)	test_bit(PG_uptodate, &(page)->flags)
 #define SetPageUptodate(page)	set_bit(PG_uptodate, &(page)->flags)
 #define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
@@ -296,13 +297,7 @@ typedef struct page {
 #define PageLaunder(page)	test_bit(PG_launder, &(page)->flags)
 #define SetPageLaunder(page)	set_bit(PG_launder, &(page)->flags)

-extern void __set_page_dirty(struct page *);
-
-static inline void set_page_dirty(struct page * page)
-{
-	if (!test_and_set_bit(PG_dirty, &page->flags))
-		__set_page_dirty(page);
-}
+extern void FASTCALL(set_page_dirty(struct page *));

 /*
 * The first mb is necessary to safely close the critical section opened by the
@@ -310,14 +305,6 @@ static inline void set_page_dirty(struct page * page)
 * the clear_bit and the read of the waitqueue (to avoid SMP races with a
 * parallel wait_on_page).
 */
-#define UnlockPage(page)	do { \
-					clear_bit(PG_launder, &(page)->flags); \
-					smp_mb__before_clear_bit(); \
-					if (!test_and_clear_bit(PG_locked, &(page)->flags)) BUG(); \
-					smp_mb__after_clear_bit(); \
-					if (waitqueue_active(&(page)->wait)) \
-						wake_up(&(page)->wait); \
-				} while (0)
 #define PageError(page)		test_bit(PG_error, &(page)->flags)
 #define SetPageError(page)	set_bit(PG_error, &(page)->flags)
 #define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
@@ -465,6 +452,7 @@ static inline int is_page_cache_freeable(struct page * page)
 	return page_count(page) - !!page->buffers == 1;
 }

+extern int can_share_swap_page(struct page *);
 extern int remove_exclusive_swap_page(struct page *);

 extern void __free_pte(pte_t);

--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -79,7 +79,8 @@ extern struct page * __find_lock_page (struct address_space * mapping,
 extern struct page * find_or_create_page(struct address_space *mapping,
 				unsigned long index, unsigned int gfp_mask);

-extern void lock_page(struct page *page);
+extern void FASTCALL(lock_page(struct page *page));
+extern void FASTCALL(unlock_page(struct page *page));
 #define find_lock_page(mapping, index) \
 	__find_lock_page(mapping, index, page_hash(mapping, index))
 extern struct page *find_trylock_page(struct address_space *, unsigned long);

--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -361,7 +361,7 @@ void __run_task_queue(task_queue *list)

 static int ksoftirqd(void * __bind_cpu)
 {
-	int bind_cpu = *(int *) __bind_cpu;
+	int bind_cpu = (int) (long) __bind_cpu;
 	int cpu = cpu_logical_map(bind_cpu);

 	daemonize();
@@ -401,7 +401,7 @@ static __init int spawn_ksoftirqd(void)
 	int cpu;

 	for (cpu = 0; cpu < smp_num_cpus; cpu++) {
-		if (kernel_thread(ksoftirqd, (void *) &cpu,
+		if (kernel_thread(ksoftirqd, (void *) (long) cpu,
 				  CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
 			printk("spawn_ksoftirqd() failed for cpu %d\n", cpu);
 		else {

--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -141,17 +141,21 @@ static inline int sync_page(struct page *page)
 /*
 * Add a page to the dirty page list.
 */
-void __set_page_dirty(struct page *page)
+void set_page_dirty(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
+	if (!test_and_set_bit(PG_dirty, &page->flags)) {
+		struct address_space *mapping = page->mapping;

-	spin_lock(&pagecache_lock);
-	list_del(&page->list);
-	list_add(&page->list, &mapping->dirty_pages);
-	spin_unlock(&pagecache_lock);
+		if (mapping) {
+			spin_lock(&pagecache_lock);
+			list_del(&page->list);
+			list_add(&page->list, &mapping->dirty_pages);
+			spin_unlock(&pagecache_lock);

-	if (mapping->host)
-		mark_inode_dirty_pages(mapping->host);
+			if (mapping->host)
+				mark_inode_dirty_pages(mapping->host);
+		}
+	}
 }

 /**
@@ -771,6 +775,17 @@ void ___wait_on_page(struct page *page)
 	remove_wait_queue(&page->wait, &wait);
 }

+void unlock_page(struct page *page)
+{
+	clear_bit(PG_launder, &(page)->flags);
+	smp_mb__before_clear_bit();
+	if (!test_and_clear_bit(PG_locked, &(page)->flags))
+		BUG();
+	smp_mb__after_clear_bit(); 
+	if (waitqueue_active(&(page)->wait))
+	wake_up(&(page)->wait);
+}
+
 /*
 * Get a lock on the page, assuming we need to sleep
 * to get it..
@@ -1837,8 +1852,7 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
 		struct page *page = pte_page(pte);
 		if (VALID_PAGE(page) && !PageReserved(page) && ptep_test_and_clear_dirty(ptep)) {
 			flush_tlb_page(vma, address);
-			if (page->mapping)
-				set_page_dirty(page);
+			set_page_dirty(page);
 		}
 	}
 	return 0;

--- a/mm/memory.c
+++ b/mm/memory.c
@@ -78,15 +78,8 @@ void __free_pte(pte_t pte)
 	struct page *page = pte_page(pte);
 	if ((!VALID_PAGE(page)) || PageReserved(page))
 		return;
-	/*
-	 * free_page() used to be able to clear swap cache
-	 * entries.  We may now have to do it manually.
-	 */
-	if (page->mapping) {
-		if (pte_dirty(pte))
-			set_page_dirty(page);
-	}
-		
+	if (pte_dirty(pte))
+		set_page_dirty(page);		
 	free_page_and_swap_cache(page);
 }

@@ -917,36 +910,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
 	old_page = pte_page(pte);
 	if (!VALID_PAGE(old_page))
 		goto bad_wp_page;
-	
-	/*
-	 * We can avoid the copy if:
-	 * - we're the only user (count == 1)
-	 * - the only other user is the swap cache,
-	 *   and the only swap cache user is itself,
-	 *   in which case we can just continue to
-	 *   use the same swap cache (it will be
-	 *   marked dirty).
-	 */
-	switch (page_count(old_page)) {
-	int can_reuse;
-	case 3:
-		if (!old_page->buffers)
-			break;
-		/* FallThrough */
-	case 2:
-		if (!PageSwapCache(old_page))
-			break;
-		if (TryLockPage(old_page))
-			break;
-		/* Recheck swapcachedness once the page is locked */
-		can_reuse = remove_exclusive_swap_page(old_page);
-		UnlockPage(old_page);
-		if (!can_reuse)
-			break;
-		/* FallThrough */
-	case 1:
-		if (PageReserved(old_page))
-			break;
+
+	if (can_share_swap_page(old_page)) {	
 		flush_cache_page(vma, address);
 		establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
 		spin_unlock(&mm->page_table_lock);
@@ -1152,12 +1117,20 @@ static int do_swap_page(struct mm_struct * mm,
 		spin_unlock(&mm->page_table_lock);
 		return 1;
 	}
-		
+
 	/* The page isn't present yet, go ahead with the fault. */
+		
+	swap_free(entry);
+	if (vm_swap_full()) {
+		lock_page(page);
+		remove_exclusive_swap_page(page);
+		UnlockPage(page);
+	}
+
 	mm->rss++;
 	pte = mk_pte(page, vma->vm_page_prot);
-
-	swap_free(entry);
+	if (write_access && can_share_swap_page(page))
+		pte = pte_mkdirty(pte_mkwrite(pte));

 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -137,14 +137,10 @@ static void __free_pages_ok (struct page *page, unsigned int order)
 	return;

 local_freelist:
-	/*
-	 * This is a little subtle: if the allocation order
-	 * wanted is major than zero we'd better take all the pages
-	 * local since we must deal with fragmentation too and we
-	 * can't rely on the nr_local_pages information.
-	 */
-	if (current->nr_local_pages && !current->allocation_order)
+	if (current->nr_local_pages)
 		goto back_local_freelist;
+	if (in_interrupt())
+		goto back_local_freelist;		

 	list_add(&page->list, &current->local_pages);
 	page->index = order;

--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -212,9 +212,7 @@ static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
 		entry = *ptr;
 		*ptr = (swp_entry_t){0};
 		freed++;
-
-		/* vmscan will do the actual page freeing later.. */
-		swap_free (entry);
+		free_swap_and_cache(entry);
 	}
 	return freed;
 }

--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,8 +17,16 @@

 #include <asm/pgtable.h>

+/*
+ * We may have stale swap cache pages in memory: notice
+ * them here and get rid of the unnecessary final write.
+ */
 static int swap_writepage(struct page *page)
 {
+	if (remove_exclusive_swap_page(page)) {
+		UnlockPage(page);
+		return 0;
+	}
 	rw_swap_page(WRITE, page);
 	return 0;
 }

--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -223,6 +223,64 @@ void swap_free(swp_entry_t entry)
 	}
 }

+/*
+ * Check if we're the only user of a swap page,
+ * when the page is locked.
+ */
+static int exclusive_swap_page(struct page *page)
+{
+	int retval = 0;
+	struct swap_info_struct * p;
+	swp_entry_t entry;
+
+	entry.val = page->index;
+	p = swap_info_get(entry);
+	if (p) {
+		/* Is the only swap cache user the cache itself? */
+		if (p->swap_map[SWP_OFFSET(entry)] == 1) {
+			/* Recheck the page count with the pagecache lock held.. */
+			spin_lock(&pagecache_lock);
+			if (page_count(page) - !!page->buffers == 2)
+				retval = 1;
+			spin_unlock(&pagecache_lock);
+		}
+		swap_info_put(p);
+	}
+	return retval;
+}
+
+/*
+ * We can use this swap cache entry directly
+ * if there are no other references to it.
+ *
+ * Here "exclusive_swap_page()" does the real
+ * work, but we opportunistically check whether
+ * we need to get all the locks first..
+ */
+int can_share_swap_page(struct page *page)
+{
+	int retval = 0;
+	switch (page_count(page)) {
+	case 3:
+		if (!page->buffers)
+			break;
+		/* Fallthrough */
+	case 2:
+		if (!PageSwapCache(page))
+			break;
+		if (TryLockPage(page))
+			break;
+		retval = exclusive_swap_page(page);
+		UnlockPage(page);
+		break;
+	case 1:
+		if (PageReserved(page))
+			break;
+		retval = 1;
+	}
+	return retval;
+}
+
 /*
 * Work out if there are any other processes sharing this
 * swap cache page. Free it if you can. Return success.
@@ -252,6 +310,7 @@ int remove_exclusive_swap_page(struct page *page)
 		spin_lock(&pagecache_lock);
 		if (page_count(page) - !!page->buffers == 2) {
 			__delete_from_swap_cache(page);
+			SetPageDirty(page);
 			retval = 1;
 		}
 		spin_unlock(&pagecache_lock);

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -74,6 +74,9 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
 	pte = ptep_get_and_clear(page_table);
 	flush_tlb_page(vma, address);

+	if (pte_dirty(pte))
+		set_page_dirty(page);
+
 	/*
 	 * Is the page already in the swap cache? If so, then
 	 * we can just drop our reference to it without doing
@@ -81,8 +84,6 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
 	 */
 	if (PageSwapCache(page)) {
 		entry.val = page->index;
-		if (pte_dirty(pte))
-			set_page_dirty(page);
 		swap_duplicate(entry);
 set_swap_pte:
 		set_pte(page_table, swp_entry_to_pte(entry));
@@ -110,16 +111,9 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
 	 * Basically, this just makes it possible for us to do
 	 * some real work in the future in "refill_inactive()".
 	 */
-	if (page->mapping) {
-		if (pte_dirty(pte))
-			set_page_dirty(page);
+	if (page->mapping)
 		goto drop_pte;
-	}
-	/*
-	 * Check PageDirty as well as pte_dirty: page may
-	 * have been brought back from swap by swapoff.
-	 */
-	if (!pte_dirty(pte) && !PageDirty(page))
+	if (!PageDirty(page))
 		goto drop_pte;

 	/*
@@ -132,7 +126,10 @@ static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct*
 		entry = get_swap_page();
 		if (!entry.val)
 			break;
-		/* Add it to the swap cache and mark it dirty */
+		/* Add it to the swap cache and mark it dirty
+		 * (adding to the page cache will clear the dirty
+		 * and uptodate bits, so we need to do it again)
+		 */
 		if (add_to_swap_cache(page, entry) == 0) {
 			SetPageUptodate(page);
 			set_page_dirty(page);