Commit 053837fc authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] mm: migration page refcounting fix

Migration code currently does not take a reference to target page
properly, so between unlocking the pte and trying to take a new
reference to the page with isolate_lru_page, anything could happen to
it.

Fix this by holding the pte lock until we get a chance to elevate the
refcount.

Other small cleanups while we're here.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarChristoph Lameter <clameter@sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent e236a166
...@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page) ...@@ -39,24 +39,3 @@ del_page_from_lru(struct zone *zone, struct page *page)
} }
} }
/*
* Isolate one page from the LRU lists.
*
* - zone->lru_lock must be held
*/
static inline int __isolate_lru_page(struct page *page)
{
if (unlikely(!TestClearPageLRU(page)))
return 0;
if (get_page_testone(page)) {
/*
* It is being freed elsewhere
*/
__put_page(page);
SetPageLRU(page);
return -ENOENT;
}
return 1;
}
...@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *)); ...@@ -167,6 +167,7 @@ extern void FASTCALL(lru_cache_add_active(struct page *));
extern void FASTCALL(activate_page(struct page *)); extern void FASTCALL(activate_page(struct page *));
extern void FASTCALL(mark_page_accessed(struct page *)); extern void FASTCALL(mark_page_accessed(struct page *));
extern void lru_add_drain(void); extern void lru_add_drain(void);
extern int lru_add_drain_all(void);
extern int rotate_reclaimable_page(struct page *page); extern int rotate_reclaimable_page(struct page *page);
extern void swap_setup(void); extern void swap_setup(void);
......
...@@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ...@@ -94,6 +94,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* ->private_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one)
* ->tree_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one)
* ->zone.lru_lock (follow_page->mark_page_accessed) * ->zone.lru_lock (follow_page->mark_page_accessed)
* ->zone.lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty) * ->private_lock (page_remove_rmap->set_page_dirty)
* ->tree_lock (page_remove_rmap->set_page_dirty) * ->tree_lock (page_remove_rmap->set_page_dirty)
* ->inode_lock (page_remove_rmap->set_page_dirty) * ->inode_lock (page_remove_rmap->set_page_dirty)
......
...@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -208,6 +208,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
page = vm_normal_page(vma, addr, *pte); page = vm_normal_page(vma, addr, *pte);
if (!page) if (!page)
continue; continue;
/*
* The check for PageReserved here is important to avoid
* handling zero pages and other pages that may have been
* marked special by the system.
*
* If the PageReserved would not be checked here then f.e.
* the location of the zero page could have an influence
* on MPOL_MF_STRICT, zero pages would be counted for
* the per node stats, and there would be useless attempts
* to put zero pages on the migration list.
*/
if (PageReserved(page)) if (PageReserved(page))
continue; continue;
nid = page_to_nid(page); nid = page_to_nid(page);
...@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -216,11 +227,8 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
if (flags & MPOL_MF_STATS) if (flags & MPOL_MF_STATS)
gather_stats(page, private); gather_stats(page, private);
else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
spin_unlock(ptl);
migrate_page_add(vma, page, private, flags); migrate_page_add(vma, page, private, flags);
spin_lock(ptl);
}
else else
break; break;
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
...@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, ...@@ -309,6 +317,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
int err; int err;
struct vm_area_struct *first, *vma, *prev; struct vm_area_struct *first, *vma, *prev;
/* Clear the LRU lists so pages can be isolated */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
lru_add_drain_all();
first = find_vma(mm, start); first = find_vma(mm, start);
if (!first) if (!first)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
...@@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma, ...@@ -555,15 +567,8 @@ static void migrate_page_add(struct vm_area_struct *vma,
if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) || if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
mapping_writably_mapped(page->mapping) || mapping_writably_mapped(page->mapping) ||
single_mm_mapping(vma->vm_mm, page->mapping)) { single_mm_mapping(vma->vm_mm, page->mapping)) {
int rc = isolate_lru_page(page); if (isolate_lru_page(page))
if (rc == 1)
list_add(&page->lru, pagelist); list_add(&page->lru, pagelist);
/*
* If the isolate attempt was not successful then we just
* encountered an unswappable page. Something must be wrong.
*/
WARN_ON(rc == 0);
} }
} }
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* mapping->i_mmap_lock * mapping->i_mmap_lock
* anon_vma->lock * anon_vma->lock
* mm->page_table_lock or pte_lock * mm->page_table_lock or pte_lock
* zone->lru_lock (in mark_page_accessed) * zone->lru_lock (in mark_page_accessed, isolate_lru_page)
* swap_lock (in swap_duplicate, swap_info_get) * swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others) * mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers) * mapping->private_lock (in __set_page_dirty_buffers)
......
...@@ -174,6 +174,32 @@ void lru_add_drain(void) ...@@ -174,6 +174,32 @@ void lru_add_drain(void)
put_cpu(); put_cpu();
} }
#ifdef CONFIG_NUMA
static void lru_add_drain_per_cpu(void *dummy)
{
lru_add_drain();
}
/*
* Returns 0 for success
*/
int lru_add_drain_all(void)
{
return schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
}
#else
/*
* Returns 0 for success
*/
int lru_add_drain_all(void)
{
lru_add_drain();
return 0;
}
#endif
/* /*
* This path almost never happens for VM activity - pages are normally * This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking. * freed via pagevecs. But it gets used by networking.
......
...@@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page) ...@@ -586,7 +586,7 @@ static inline void move_to_lru(struct page *page)
} }
/* /*
* Add isolated pages on the list back to the LRU * Add isolated pages on the list back to the LRU.
* *
* returns the number of pages put back. * returns the number of pages put back.
*/ */
...@@ -760,46 +760,33 @@ int migrate_pages(struct list_head *from, struct list_head *to, ...@@ -760,46 +760,33 @@ int migrate_pages(struct list_head *from, struct list_head *to,
return nr_failed + retry; return nr_failed + retry;
} }
static void lru_add_drain_per_cpu(void *dummy)
{
lru_add_drain();
}
/* /*
* Isolate one page from the LRU lists and put it on the * Isolate one page from the LRU lists and put it on the
* indicated list. Do necessary cache draining if the * indicated list with elevated refcount.
* page is not on the LRU lists yet.
* *
* Result: * Result:
* 0 = page not on LRU list * 0 = page not on LRU list
* 1 = page removed from LRU list and added to the specified list. * 1 = page removed from LRU list and added to the specified list.
* -ENOENT = page is being freed elsewhere.
*/ */
int isolate_lru_page(struct page *page) int isolate_lru_page(struct page *page)
{ {
int rc = 0; int ret = 0;
struct zone *zone = page_zone(page);
redo: if (PageLRU(page)) {
struct zone *zone = page_zone(page);
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
rc = __isolate_lru_page(page); if (TestClearPageLRU(page)) {
if (rc == 1) { ret = 1;
get_page(page);
if (PageActive(page)) if (PageActive(page))
del_page_from_active_list(zone, page); del_page_from_active_list(zone, page);
else else
del_page_from_inactive_list(zone, page); del_page_from_inactive_list(zone, page);
} }
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
if (rc == 0) {
/*
* Maybe this page is still waiting for a cpu to drain it
* from one of the lru lists?
*/
rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
if (rc == 0 && PageLRU(page))
goto redo;
} }
return rc;
return ret;
} }
#endif #endif
...@@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src, ...@@ -831,18 +818,20 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
page = lru_to_page(src); page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags); prefetchw_prev_lru_page(page, src, flags);
switch (__isolate_lru_page(page)) { if (!TestClearPageLRU(page))
case 1:
/* Succeeded to isolate page */
list_move(&page->lru, dst);
nr_taken++;
break;
case -ENOENT:
/* Not possible to isolate */
list_move(&page->lru, src);
break;
default:
BUG(); BUG();
list_del(&page->lru);
if (get_page_testone(page)) {
/*
* It is being freed elsewhere
*/
__put_page(page);
SetPageLRU(page);
list_add(&page->lru, src);
continue;
} else {
list_add(&page->lru, dst);
nr_taken++;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment