Commit a54a407f authored by Mel Gorman's avatar Mel Gorman Committed by Ingo Molnar

mm: Close races between THP migration and PMD numa clearing

THP migration uses the page lock to guard against parallel allocations
but there are cases like this still open

  Task A					Task B
  ---------------------				---------------------
  do_huge_pmd_numa_page				do_huge_pmd_numa_page
  lock_page
  mpol_misplaced == -1
  unlock_page
  goto clear_pmdnuma
						lock_page
						mpol_misplaced == 2
						migrate_misplaced_transhuge
  pmd = pmd_mknonnuma
  set_pmd_at

During hours of testing, one crashed with weird errors and while I have
no direct evidence, I suspect something like the race above happened.
This patch extends the page lock to being held until the pmd_numa is
cleared to prevent migration starting in parallel while the pmd_numa is
being cleared. It also flushes the old pmd entry and orders pagetable
insertion before rmap insertion.
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
Reviewed-by: default avatarRik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.deSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 8191acbd
...@@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
target_nid = mpol_misplaced(page, vma, haddr); target_nid = mpol_misplaced(page, vma, haddr);
if (target_nid == -1) { if (target_nid == -1) {
/* If the page was locked, there are no parallel migrations */ /* If the page was locked, there are no parallel migrations */
if (page_locked) { if (page_locked)
unlock_page(page);
goto clear_pmdnuma; goto clear_pmdnuma;
}
/* Otherwise wait for potential migrations and retry fault */ /*
* Otherwise wait for potential migrations and retry. We do
* relock and check_same as the page may no longer be mapped.
* As the fault is being retried, do not account for it.
*/
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
wait_on_page_locked(page); wait_on_page_locked(page);
page_nid = -1;
goto out; goto out;
} }
/* Page is misplaced, serialise migrations and parallel THP splits */ /* Page is misplaced, serialise migrations and parallel THP splits */
get_page(page); get_page(page);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (!page_locked) { if (!page_locked)
lock_page(page); lock_page(page);
page_locked = true;
}
anon_vma = page_lock_anon_vma_read(page); anon_vma = page_lock_anon_vma_read(page);
/* Confirm the PMD did not change while page_table_lock was released */ /* Confirm the PMD did not change while page_table_lock was released */
...@@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(!pmd_same(pmd, *pmdp))) { if (unlikely(!pmd_same(pmd, *pmdp))) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
page_nid = -1;
goto out_unlock; goto out_unlock;
} }
/* Migrate the THP to the requested node */ /*
* Migrate the THP to the requested node, returns with page unlocked
* and pmd_numa cleared.
*/
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
migrated = migrate_misplaced_transhuge_page(mm, vma, migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, page, target_nid); pmdp, pmd, addr, page, target_nid);
if (migrated) if (migrated)
page_nid = target_nid; page_nid = target_nid;
else
goto check_same;
goto out; goto out;
check_same:
spin_lock(&mm->page_table_lock);
if (unlikely(!pmd_same(pmd, *pmdp))) {
/* Someone else took our fault */
page_nid = -1;
goto out_unlock;
}
clear_pmdnuma: clear_pmdnuma:
BUG_ON(!PageLocked(page));
pmd = pmd_mknonnuma(pmd); pmd = pmd_mknonnuma(pmd);
set_pmd_at(mm, haddr, pmdp, pmd); set_pmd_at(mm, haddr, pmdp, pmd);
VM_BUG_ON(pmd_numa(*pmdp)); VM_BUG_ON(pmd_numa(*pmdp));
update_mmu_cache_pmd(vma, addr, pmdp); update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page);
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
......
...@@ -1713,12 +1713,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1713,12 +1713,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unlock_page(new_page); unlock_page(new_page);
put_page(new_page); /* Free it */ put_page(new_page); /* Free it */
unlock_page(page); /* Retake the callers reference and putback on LRU */
get_page(page);
putback_lru_page(page); putback_lru_page(page);
mod_zone_page_state(page_zone(page),
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
isolated = 0; goto out_fail;
goto out;
} }
/* /*
...@@ -1735,9 +1735,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1735,9 +1735,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
entry = pmd_mkhuge(entry); entry = pmd_mkhuge(entry);
page_add_new_anon_rmap(new_page, vma, haddr); pmdp_clear_flush(vma, haddr, pmd);
set_pmd_at(mm, haddr, pmd, entry); set_pmd_at(mm, haddr, pmd, entry);
page_add_new_anon_rmap(new_page, vma, haddr);
update_mmu_cache_pmd(vma, address, &entry); update_mmu_cache_pmd(vma, address, &entry);
page_remove_rmap(page); page_remove_rmap(page);
/* /*
...@@ -1756,7 +1756,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1756,7 +1756,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
out:
mod_zone_page_state(page_zone(page), mod_zone_page_state(page_zone(page),
NR_ISOLATED_ANON + page_lru, NR_ISOLATED_ANON + page_lru,
-HPAGE_PMD_NR); -HPAGE_PMD_NR);
...@@ -1765,6 +1764,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1765,6 +1764,10 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
out_fail: out_fail:
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
out_dropref: out_dropref:
entry = pmd_mknonnuma(entry);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, &entry);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment