Commit 50aa98ba authored by Martin Schwidefsky's avatar Martin Schwidefsky

[S390] fix recursive locking on page_table_lock

Suzuki Poulose reported the following recursive locking bug on s390:

Here is the stack trace : (see Appendix I for more info)

  [<0000000000406ed6>] _spin_lock+0x52/0x94
  [<0000000000103bde>] crst_table_free+0x14e/0x1a4
  [<00000000001ba684>] __pmd_alloc+0x114/0x1ec
  [<00000000001be8d0>] handle_mm_fault+0x2cc/0xb80
  [<0000000000407d62>] do_dat_exception+0x2b6/0x3a0
  [<0000000000114f8c>] sysc_return+0x0/0x8
  [<00000200001642b2>] 0x200001642b2

The page_table_lock is already acquired in __pmd_alloc (mm/memory.c) and
it tries to populate the pud/pgd with a new pmd allocated. If another
thread populates it before we get a chance, we free the pmd using
pmd_free().

On s390x, pmd_free(even pud_free ) is #defined to crst_table_free(),
which acquires the page_table_lock to protect the crst_table index updates.

Hence this ends up in a recursive locking of the page_table_lock.

The solution suggested by Dave Hansen is to use a new spin lock in the mmu
context to protect the access to the crst_list and the pgtable_list.
Reported-by: default avatarSuzuki Poulose <suzuki@in.ibm.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent c4de0c1a
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define __MMU_H #define __MMU_H
typedef struct { typedef struct {
spinlock_t list_lock;
struct list_head crst_list; struct list_head crst_list;
struct list_head pgtable_list; struct list_head pgtable_list;
unsigned long asce_bits; unsigned long asce_bits;
......
...@@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) ...@@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{ {
spin_lock_init(&mm->context.list_lock);
INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.crst_list);
INIT_LIST_HEAD(&mm->context.pgtable_list); INIT_LIST_HEAD(&mm->context.pgtable_list);
return (pgd_t *) crst_table_alloc(mm, s390_noexec); return (pgd_t *) crst_table_alloc(mm, s390_noexec);
......
...@@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) ...@@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
} }
page->index = page_to_phys(shadow); page->index = page_to_phys(shadow);
} }
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
list_add(&page->lru, &mm->context.crst_list); list_add(&page->lru, &mm->context.crst_list);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
return (unsigned long *) page_to_phys(page); return (unsigned long *) page_to_phys(page);
} }
...@@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) ...@@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
unsigned long *shadow = get_shadow_table(table); unsigned long *shadow = get_shadow_table(table);
struct page *page = virt_to_page(table); struct page *page = virt_to_page(table);
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
list_del(&page->lru); list_del(&page->lru);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
if (shadow) if (shadow)
free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) shadow, ALLOC_ORDER);
free_pages((unsigned long) table, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER);
...@@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
unsigned long bits; unsigned long bits;
bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
page = NULL; page = NULL;
if (!list_empty(&mm->context.pgtable_list)) { if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list, page = list_first_entry(&mm->context.pgtable_list,
...@@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
page = NULL; page = NULL;
} }
if (!page) { if (!page) {
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
page = alloc_page(GFP_KERNEL|__GFP_REPEAT); page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page) if (!page)
return NULL; return NULL;
...@@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
clear_table_pgstes(table); clear_table_pgstes(table);
else else
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list); list_add(&page->lru, &mm->context.pgtable_list);
} }
table = (unsigned long *) page_to_phys(page); table = (unsigned long *) page_to_phys(page);
...@@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) ...@@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
page->flags |= bits; page->flags |= bits;
if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
list_move_tail(&page->lru, &mm->context.pgtable_list); list_move_tail(&page->lru, &mm->context.pgtable_list);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
return table; return table;
} }
...@@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) ...@@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT); page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
page->flags ^= bits; page->flags ^= bits;
if (page->flags & FRAG_MASK) { if (page->flags & FRAG_MASK) {
/* Page now has some free pgtable fragments. */ /* Page now has some free pgtable fragments. */
...@@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) ...@@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
} else } else
/* All fragments of the 4K page have been freed. */ /* All fragments of the 4K page have been freed. */
list_del(&page->lru); list_del(&page->lru);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
if (page) { if (page) {
pgtable_page_dtor(page); pgtable_page_dtor(page);
__free_page(page); __free_page(page);
...@@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) ...@@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
{ {
struct page *page; struct page *page;
spin_lock(&mm->page_table_lock); spin_lock(&mm->context.list_lock);
/* Free shadow region and segment tables. */ /* Free shadow region and segment tables. */
list_for_each_entry(page, &mm->context.crst_list, lru) list_for_each_entry(page, &mm->context.crst_list, lru)
if (page->index) { if (page->index) {
...@@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) ...@@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
/* "Free" second halves of page tables. */ /* "Free" second halves of page tables. */
list_for_each_entry(page, &mm->context.pgtable_list, lru) list_for_each_entry(page, &mm->context.pgtable_list, lru)
page->flags &= ~SECOND_HALVES; page->flags &= ~SECOND_HALVES;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->context.list_lock);
mm->context.noexec = 0; mm->context.noexec = 0;
update_mm(mm, tsk); update_mm(mm, tsk);
} }
......
...@@ -331,6 +331,7 @@ void __init vmem_map_init(void) ...@@ -331,6 +331,7 @@ void __init vmem_map_init(void)
unsigned long start, end; unsigned long start, end;
int i; int i;
spin_lock_init(&init_mm.context.list_lock);
INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.crst_list);
INIT_LIST_HEAD(&init_mm.context.pgtable_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list);
init_mm.context.noexec = 0; init_mm.context.noexec = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment