Commit 77da9389 authored by Matthew Wilcox's avatar Matthew Wilcox

mm: Convert collapse_shmem to XArray

I found another victim of the radix tree being hard to use.  Because
there was no call to radix_tree_preload(), khugepaged was allocating
radix_tree_nodes using GFP_ATOMIC.

I also converted a local_irq_save()/restore() pair to
disable()/enable().
Signed-off-by: default avatarMatthew Wilcox <willy@infradead.org>
parent aa5dc07f
...@@ -1288,17 +1288,17 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) ...@@ -1288,17 +1288,17 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* *
* Basic scheme is simple, details are more complex: * Basic scheme is simple, details are more complex:
* - allocate and freeze a new huge page; * - allocate and freeze a new huge page;
* - scan over radix tree replacing old pages the new one * - scan page cache replacing old pages with the new one
* + swap in pages if necessary; * + swap in pages if necessary;
* + fill in gaps; * + fill in gaps;
* + keep old pages around in case if rollback is required; * + keep old pages around in case rollback is required;
* - if replacing succeed: * - if replacing succeeds:
* + copy data over; * + copy data over;
* + free old pages; * + free old pages;
* + unfreeze huge page; * + unfreeze huge page;
* - if replacing failed; * - if replacing failed;
* + put all pages back and unfreeze them; * + put all pages back and unfreeze them;
* + restore gaps in the radix-tree; * + restore gaps in the page cache;
* + free huge page; * + free huge page;
*/ */
static void collapse_shmem(struct mm_struct *mm, static void collapse_shmem(struct mm_struct *mm,
...@@ -1306,12 +1306,11 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1306,12 +1306,11 @@ static void collapse_shmem(struct mm_struct *mm,
struct page **hpage, int node) struct page **hpage, int node)
{ {
gfp_t gfp; gfp_t gfp;
struct page *page, *new_page, *tmp; struct page *new_page;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
pgoff_t index, end = start + HPAGE_PMD_NR; pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist); LIST_HEAD(pagelist);
struct radix_tree_iter iter; XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
void **slot;
int nr_none = 0, result = SCAN_SUCCEED; int nr_none = 0, result = SCAN_SUCCEED;
VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
...@@ -1336,48 +1335,49 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1336,48 +1335,49 @@ static void collapse_shmem(struct mm_struct *mm,
__SetPageLocked(new_page); __SetPageLocked(new_page);
BUG_ON(!page_ref_freeze(new_page, 1)); BUG_ON(!page_ref_freeze(new_page, 1));
/* /*
* At this point the new_page is 'frozen' (page_count() is zero), locked * At this point the new_page is 'frozen' (page_count() is zero),
* and not up-to-date. It's safe to insert it into radix tree, because * locked and not up-to-date. It's safe to insert it into the page
* nobody would be able to map it or use it in other way until we * cache, because nobody would be able to map it or use it in other
* unfreeze it. * way until we unfreeze it.
*/ */
index = start; /* This will be less messy when we use multi-index entries */
xa_lock_irq(&mapping->i_pages); do {
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { xas_lock_irq(&xas);
int n = min(iter.index, end) - index; xas_create_range(&xas);
if (!xas_error(&xas))
break;
xas_unlock_irq(&xas);
if (!xas_nomem(&xas, GFP_KERNEL))
goto out;
} while (1);
/* xas_set(&xas, start);
* Handle holes in the radix tree: charge it from shmem and for (index = start; index < end; index++) {
* insert relevant subpage of new_page into the radix-tree. struct page *page = xas_next(&xas);
*/
if (n && !shmem_charge(mapping->host, n)) { VM_BUG_ON(index != xas.xa_index);
if (!page) {
if (!shmem_charge(mapping->host, 1)) {
result = SCAN_FAIL; result = SCAN_FAIL;
break; break;
} }
nr_none += n; xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
for (; index < min(iter.index, end); index++) { nr_none++;
radix_tree_insert(&mapping->i_pages, index, continue;
new_page + (index % HPAGE_PMD_NR));
} }
/* We are done. */
if (index >= end)
break;
page = radix_tree_deref_slot_protected(slot,
&mapping->i_pages.xa_lock);
if (xa_is_value(page) || !PageUptodate(page)) { if (xa_is_value(page) || !PageUptodate(page)) {
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
/* swap in or instantiate fallocated page */ /* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, &page, if (shmem_getpage(mapping->host, index, &page,
SGP_NOHUGE)) { SGP_NOHUGE)) {
result = SCAN_FAIL; result = SCAN_FAIL;
goto tree_unlocked; goto xa_unlocked;
} }
xa_lock_irq(&mapping->i_pages); xas_lock_irq(&xas);
xas_set(&xas, index);
} else if (trylock_page(page)) { } else if (trylock_page(page)) {
get_page(page); get_page(page);
} else { } else {
...@@ -1397,7 +1397,7 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1397,7 +1397,7 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_TRUNCATED; result = SCAN_TRUNCATED;
goto out_unlock; goto out_unlock;
} }
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
if (isolate_lru_page(page)) { if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU; result = SCAN_DEL_PAGE_LRU;
...@@ -1407,17 +1407,16 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1407,17 +1407,16 @@ static void collapse_shmem(struct mm_struct *mm,
if (page_mapped(page)) if (page_mapped(page))
unmap_mapping_pages(mapping, index, 1, false); unmap_mapping_pages(mapping, index, 1, false);
xa_lock_irq(&mapping->i_pages); xas_lock_irq(&xas);
xas_set(&xas, index);
slot = radix_tree_lookup_slot(&mapping->i_pages, index); VM_BUG_ON_PAGE(page != xas_load(&xas), page);
VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
&mapping->i_pages.xa_lock), page);
VM_BUG_ON_PAGE(page_mapped(page), page); VM_BUG_ON_PAGE(page_mapped(page), page);
/* /*
* The page is expected to have page_count() == 3: * The page is expected to have page_count() == 3:
* - we hold a pin on it; * - we hold a pin on it;
* - one reference from radix tree; * - one reference from page cache;
* - one from isolate_lru_page; * - one from isolate_lru_page;
*/ */
if (!page_ref_freeze(page, 3)) { if (!page_ref_freeze(page, 3)) {
...@@ -1432,56 +1431,30 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1432,56 +1431,30 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist); list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */ /* Finally, replace with the new page. */
radix_tree_replace_slot(&mapping->i_pages, slot, xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
new_page + (index % HPAGE_PMD_NR));
slot = radix_tree_iter_resume(slot, &iter);
index++;
continue; continue;
out_lru: out_lru:
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
putback_lru_page(page); putback_lru_page(page);
out_isolate_failed: out_isolate_failed:
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
goto tree_unlocked; goto xa_unlocked;
out_unlock: out_unlock:
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
break; break;
} }
xas_unlock_irq(&xas);
/* xa_unlocked:
* Handle hole in radix tree at the end of the range.
* This code only triggers if there's nothing in radix tree
* beyond 'end'.
*/
if (result == SCAN_SUCCEED && index < end) {
int n = end - index;
if (!shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
goto tree_locked;
}
for (; index < end; index++) {
radix_tree_insert(&mapping->i_pages, index,
new_page + (index % HPAGE_PMD_NR));
}
nr_none += n;
}
tree_locked:
xa_unlock_irq(&mapping->i_pages);
tree_unlocked:
if (result == SCAN_SUCCEED) { if (result == SCAN_SUCCEED) {
unsigned long flags; struct page *page, *tmp;
struct zone *zone = page_zone(new_page); struct zone *zone = page_zone(new_page);
/* /*
* Replacing old pages with new one has succeed, now we need to * Replacing old pages with new one has succeeded, now we
* copy the content and free old pages. * need to copy the content and free the old pages.
*/ */
list_for_each_entry_safe(page, tmp, &pagelist, lru) { list_for_each_entry_safe(page, tmp, &pagelist, lru) {
copy_highpage(new_page + (page->index % HPAGE_PMD_NR), copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
...@@ -1495,16 +1468,16 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1495,16 +1468,16 @@ static void collapse_shmem(struct mm_struct *mm,
put_page(page); put_page(page);
} }
local_irq_save(flags); local_irq_disable();
__inc_node_page_state(new_page, NR_SHMEM_THPS); __inc_node_page_state(new_page, NR_SHMEM_THPS);
if (nr_none) { if (nr_none) {
__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none); __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none); __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
} }
local_irq_restore(flags); local_irq_enable();
/* /*
* Remove pte page tables, so we can re-faulti * Remove pte page tables, so we can re-fault
* the page as huge. * the page as huge.
*/ */
retract_page_tables(mapping, start); retract_page_tables(mapping, start);
...@@ -1521,37 +1494,37 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1521,37 +1494,37 @@ static void collapse_shmem(struct mm_struct *mm,
khugepaged_pages_collapsed++; khugepaged_pages_collapsed++;
} else { } else {
/* Something went wrong: rollback changes to the radix-tree */ struct page *page;
/* Something went wrong: roll back page cache changes */
shmem_uncharge(mapping->host, nr_none); shmem_uncharge(mapping->host, nr_none);
xa_lock_irq(&mapping->i_pages); xas_lock_irq(&xas);
radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) { xas_set(&xas, start);
if (iter.index >= end) xas_for_each(&xas, page, end - 1) {
break;
page = list_first_entry_or_null(&pagelist, page = list_first_entry_or_null(&pagelist,
struct page, lru); struct page, lru);
if (!page || iter.index < page->index) { if (!page || xas.xa_index < page->index) {
if (!nr_none) if (!nr_none)
break; break;
nr_none--; nr_none--;
/* Put holes back where they were */ /* Put holes back where they were */
radix_tree_delete(&mapping->i_pages, iter.index); xas_store(&xas, NULL);
continue; continue;
} }
VM_BUG_ON_PAGE(page->index != iter.index, page); VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
/* Unfreeze the page. */ /* Unfreeze the page. */
list_del(&page->lru); list_del(&page->lru);
page_ref_unfreeze(page, 2); page_ref_unfreeze(page, 2);
radix_tree_replace_slot(&mapping->i_pages, slot, page); xas_store(&xas, page);
slot = radix_tree_iter_resume(slot, &iter); xas_pause(&xas);
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
putback_lru_page(page); putback_lru_page(page);
unlock_page(page); unlock_page(page);
xa_lock_irq(&mapping->i_pages); xas_lock_irq(&xas);
} }
VM_BUG_ON(nr_none); VM_BUG_ON(nr_none);
xa_unlock_irq(&mapping->i_pages); xas_unlock_irq(&xas);
/* Unfreeze new_page, caller would take care about freeing it */ /* Unfreeze new_page, caller would take care about freeing it */
page_ref_unfreeze(new_page, 1); page_ref_unfreeze(new_page, 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment