Commit 78fb9076 authored by Martin Schwidefsky's avatar Martin Schwidefsky

s390/mm: simplify page table alloc/free code

With the removal of the dynamic reallocation of page tables for
KVM (see git commit 0b46e0a3)
the page table allocation / freeing code can be simplified.

The page table free code can now use the alloc_pgste bit in the
mm context to decide if a page table is 2K or 4K, there is no mix
of different sized page tables anymore. This eliminates the need
to use "page->_mapcount == 0" to check for 4K page table.

Use the lower two bits in page->_mapcount to indicate which
2K fragments of the 4K page are in use.

As 31-bit support is gone, remove the two defines ALLOC_ORDER
and FRAG_MASK and use the constants directly where appropriate.
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent 3d8258e4
...@@ -10,11 +10,7 @@ ...@@ -10,11 +10,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/quicklist.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/swapops.h> #include <linux/swapops.h>
...@@ -28,12 +24,9 @@ ...@@ -28,12 +24,9 @@
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#define ALLOC_ORDER 2
#define FRAG_MASK 0x03
unsigned long *crst_table_alloc(struct mm_struct *mm) unsigned long *crst_table_alloc(struct mm_struct *mm)
{ {
struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); struct page *page = alloc_pages(GFP_KERNEL, 2);
if (!page) if (!page)
return NULL; return NULL;
...@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) ...@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table) void crst_table_free(struct mm_struct *mm, unsigned long *table)
{ {
free_pages((unsigned long) table, ALLOC_ORDER); free_pages((unsigned long) table, 2);
} }
static void __crst_table_upgrade(void *arg) static void __crst_table_upgrade(void *arg)
...@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) ...@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
spin_lock_init(&gmap->guest_table_lock); spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm; gmap->mm = mm;
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); page = alloc_pages(GFP_KERNEL, 2);
if (!page) if (!page)
goto out_free; goto out_free;
page->index = 0; page->index = 0;
...@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap) ...@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
/* Free all segment & region tables. */ /* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru) list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
__free_pages(page, ALLOC_ORDER); __free_pages(page, 2);
gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest); gmap_radix_tree_free(&gmap->host_to_guest);
down_write(&gmap->mm->mmap_sem); down_write(&gmap->mm->mmap_sem);
...@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, ...@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new; unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */ /* since we dont free the gmap table until gmap_free we can unlock */
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); page = alloc_pages(GFP_KERNEL, 2);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
new = (unsigned long *) page_to_phys(page); new = (unsigned long *) page_to_phys(page);
...@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, ...@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
} }
spin_unlock(&gmap->mm->page_table_lock); spin_unlock(&gmap->mm->page_table_lock);
if (page) if (page)
__free_pages(page, ALLOC_ORDER); __free_pages(page, 2);
return 0; return 0;
} }
...@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) ...@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
} }
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
static inline int page_table_with_pgste(struct page *page)
{
return atomic_read(&page->_mapcount) == 0;
}
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
struct page *page;
unsigned long *table;
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
return table;
}
static inline void page_table_free_pgste(unsigned long *table)
{
struct page *page;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1);
__free_page(page);
}
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq) unsigned long key, bool nq)
{ {
...@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl); ...@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
#else /* CONFIG_PGSTE */ #else /* CONFIG_PGSTE */
static inline int page_table_with_pgste(struct page *page)
{
return 0;
}
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
return NULL;
}
static inline void page_table_free_pgste(unsigned long *table)
{
}
static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
unsigned long vmaddr) unsigned long vmaddr)
{ {
...@@ -994,24 +939,33 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) ...@@ -994,24 +939,33 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
*/ */
unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *page_table_alloc(struct mm_struct *mm)
{ {
unsigned long *uninitialized_var(table); unsigned long *table;
struct page *uninitialized_var(page); struct page *page;
unsigned int mask, bit; unsigned int mask, bit;
if (mm_alloc_pgste(mm)) /* Try to get a fragment of a 4K page as a 2K page table */
return page_table_alloc_pgste(mm); if (!mm_alloc_pgste(mm)) {
/* Allocate fragments of a 4K page as 1K/2K page table */ table = NULL;
spin_lock_bh(&mm->context.list_lock); spin_lock_bh(&mm->context.list_lock);
mask = FRAG_MASK;
if (!list_empty(&mm->context.pgtable_list)) { if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list, page = list_first_entry(&mm->context.pgtable_list,
struct page, lru); struct page, lru);
table = (unsigned long *) page_to_phys(page);
mask = atomic_read(&page->_mapcount); mask = atomic_read(&page->_mapcount);
mask = mask | (mask >> 4); mask = (mask | (mask >> 4)) & 3;
if (mask != 3) {
table = (unsigned long *) page_to_phys(page);
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
atomic_xor_bits(&page->_mapcount, 1U << bit);
list_del(&page->lru);
}
} }
if ((mask & FRAG_MASK) == FRAG_MASK) {
spin_unlock_bh(&mm->context.list_lock); spin_unlock_bh(&mm->context.list_lock);
if (table)
return table;
}
/* Allocate a fresh page */
page = alloc_page(GFP_KERNEL|__GFP_REPEAT); page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page) if (!page)
return NULL; return NULL;
...@@ -1019,19 +973,21 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -1019,19 +973,21 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
__free_page(page); __free_page(page);
return NULL; return NULL;
} }
atomic_set(&page->_mapcount, 1); /* Initialize page table */
table = (unsigned long *) page_to_phys(page); table = (unsigned long *) page_to_phys(page);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
atomic_set(&page->_mapcount, 3);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
} else {
/* Return the first 2K fragment of the page */
atomic_set(&page->_mapcount, 1);
clear_table(table, _PAGE_INVALID, PAGE_SIZE); clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock); spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list); list_add(&page->lru, &mm->context.pgtable_list);
} else {
for (bit = 1; mask & bit; bit <<= 1)
table += PTRS_PER_PTE;
mask = atomic_xor_bits(&page->_mapcount, bit);
if ((mask & FRAG_MASK) == FRAG_MASK)
list_del(&page->lru);
}
spin_unlock_bh(&mm->context.list_lock); spin_unlock_bh(&mm->context.list_lock);
}
return table; return table;
} }
...@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) ...@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask; unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT); page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (page_table_with_pgste(page)) if (!mm_alloc_pgste(mm)) {
return page_table_free_pgste(table); /* Free 2K page table fragment of a 4K page */
/* Free 1K/2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock); spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
list_del(&page->lru); if (mask & 3)
mask = atomic_xor_bits(&page->_mapcount, bit);
if (mask & FRAG_MASK)
list_add(&page->lru, &mm->context.pgtable_list); list_add(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.list_lock); spin_unlock_bh(&mm->context.list_lock);
if (mask == 0) { if (mask != 0)
pgtable_page_dtor(page); return;
atomic_set(&page->_mapcount, -1);
__free_page(page);
} }
}
static void __page_table_free_rcu(void *table, unsigned bit)
{
struct page *page;
if (bit == FRAG_MASK)
return page_table_free_pgste(table);
/* Free 1K/2K page table fragment of a 4K page */
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
pgtable_page_dtor(page); pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1); atomic_set(&page->_mapcount, -1);
__free_page(page); __free_page(page);
}
} }
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
...@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, ...@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
mm = tlb->mm; mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT); page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (page_table_with_pgste(page)) { if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr); gmap_unlink(mm, table, vmaddr);
table = (unsigned long *) (__pa(table) | FRAG_MASK); table = (unsigned long *) (__pa(table) | 3);
tlb_remove_table(tlb, table); tlb_remove_table(tlb, table);
return; return;
} }
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.list_lock); spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
list_del(&page->lru); if (mask & 3)
mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
if (mask & FRAG_MASK)
list_add_tail(&page->lru, &mm->context.pgtable_list); list_add_tail(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.list_lock); spin_unlock_bh(&mm->context.list_lock);
table = (unsigned long *) (__pa(table) | (bit << 4)); table = (unsigned long *) (__pa(table) | (1U << bit));
tlb_remove_table(tlb, table); tlb_remove_table(tlb, table);
} }
static void __tlb_remove_table(void *_table) static void __tlb_remove_table(void *_table)
{ {
const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; unsigned int mask = (unsigned long) _table & 3;
void *table = (void *)((unsigned long) _table & ~mask); void *table = (void *)((unsigned long) _table ^ mask);
unsigned type = (unsigned long) _table & mask; struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (type) switch (mask) {
__page_table_free_rcu(table, type); case 0: /* pmd or pud */
else free_pages((unsigned long) table, 2);
free_pages((unsigned long) table, ALLOC_ORDER); break;
case 1: /* lower 2K of a 4K page table */
case 2: /* higher 2K of a 4K page table */
if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
break;
/* fallthrough */
case 3: /* 4K page table with pgstes */
pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1);
__free_page(page);
break;
}
} }
static void tlb_remove_table_smp_sync(void *arg) static void tlb_remove_table_smp_sync(void *arg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment