Commit 972d8c37 authored by Arun Sharma's avatar Arun Sharma Committed by David Mosberger

[PATCH] ia64: fix ia32 virtual memory leaks due to partial-page mappings

Certain IA-32 applications which do mmap/munmaps which are not
PAGE_SIZE aligned could see temporary (recovered at process exit time)
memory leaks, because the kernel didn't have enough data to decide if
the complete page could be unmapped. This patch adds a new data
structure called the "partial page list" which helps the kernel keep
track of precisely which 4k pages are in use by the IA-32 application.

Armed with this data, the kernel can make better decisions at munmap
and mprotect time. No significant performance degradation was observed
in the workloads we tested and in some cases, the performance actually
improved!  This is possibly due to the reduced length of the vma list.
Signed-off-by: default avatarArun Sharma <arun.sharma@intel.com>
Signed-off-by: default avatarGordon Jin <gordon.jin@intel.com>
Signed-off-by: default avatarDavid Mosberger <davidm@hpl.hp.com>
parent d69e4b12
......@@ -197,6 +197,10 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
}
up_write(&current->mm->mmap_sem);
/* Can't do it in ia64_elf32_init(). Needs to be done before calls to
elf32_map() */
current->thread.ppl = ia32_init_pp_list();
return 0;
}
......
......@@ -371,7 +371,7 @@ ia32_syscall_table:
data8 sys_sched_get_priority_min /* 160 */
data8 sys32_sched_rr_get_interval
data8 compat_sys_nanosleep
data8 sys_mremap
data8 sys32_mremap
data8 sys_setresuid /* 16-bit version */
data8 sys32_getresuid16 /* 16-bit version */ /* 165 */
data8 sys_ni_syscall /* vm86 */
......
......@@ -211,6 +211,8 @@ ia32_cpu_init (void)
static int __init
ia32_init (void)
{
extern kmem_cache_t *partial_page_cachep;
ia32_exec_domain.name = "Linux/x86";
ia32_exec_domain.handler = NULL;
ia32_exec_domain.pers_low = PER_LINUX32;
......@@ -218,6 +220,12 @@ ia32_init (void)
ia32_exec_domain.signal_map = default_exec_domain.signal_map;
ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
register_exec_domain(&ia32_exec_domain);
partial_page_cachep = kmem_cache_create("partial_page_cache",
sizeof(struct partial_page), 0, 0, NULL, NULL);
if (!partial_page_cachep)
panic("Cannot create partial page SLAB cache");
return 0;
}
......
......@@ -9,6 +9,7 @@
#include <linux/binfmts.h>
#include <linux/compat.h>
#include <linux/rbtree.h>
#include <asm/processor.h>
......@@ -22,6 +23,30 @@
#define IA32_PAGE_ALIGN(addr) (((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK)
#define IA32_CLOCKS_PER_SEC 100 /* Cast in stone for IA32 Linux */
/*
* partially mapped pages provide precise accounting of which 4k sub pages
* are mapped and which ones are not, thereby improving IA-32 compatibility.
*/
struct partial_page {
struct partial_page *next; /* linked list, sorted by address */
struct rb_node pp_rb;
/* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32
* should suffice.*/
unsigned int bitmap;
unsigned int base;
};
struct partial_page_list {
struct partial_page *pp_head; /* list head, points to the lowest
* addressed partial page */
struct rb_root ppl_rb;
struct partial_page *pp_hint; /* pp_hint->next is the last
* accessed partial page */
atomic_t pp_count; /* reference count */
};
struct partial_page_list* ia32_init_pp_list (void);
/* sigcontext.h */
/*
* As documented in the iBCS2 standard..
......
This diff is collapsed.
......@@ -439,6 +439,10 @@ copy_thread (int nr, unsigned long clone_flags,
ia32_save_state(p);
if (clone_flags & CLONE_SETTLS)
retval = ia32_clone_tls(p, child_ptregs);
/* Copy partially mapped page list */
if (!retval)
retval = ia32_copy_partial_page_list(p, clone_flags);
}
#endif
......@@ -672,6 +676,10 @@ flush_thread (void)
/* drop floating-point and debug-register state if it exists: */
current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
ia64_drop_fpu(current);
#ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(current)))
ia32_drop_partial_page_list(current->thread.ppl);
#endif
}
/*
......@@ -691,6 +699,10 @@ exit_thread (void)
if (current->thread.flags & IA64_THREAD_DBG_VALID)
pfm_release_debug_registers(current);
#endif
#ifdef CONFIG_IA32_SUPPORT
if (IS_IA32_PROCESS(ia64_task_regs(current)))
ia32_drop_partial_page_list(current->thread.ppl);
#endif
}
unsigned long
......
......@@ -18,6 +18,8 @@ extern void ia32_gdt_init (void);
extern int ia32_exception (struct pt_regs *regs, unsigned long isr);
extern int ia32_intercept (struct pt_regs *regs, unsigned long isr);
extern int ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs);
extern int ia32_copy_partial_page_list (struct task_struct *, unsigned long);
extern void ia32_drop_partial_page_list (struct partial_page_list *);
# endif /* !CONFIG_IA32_SUPPORT */
......
......@@ -230,6 +230,7 @@ struct desc_struct {
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
struct partial_page_list;
#endif
struct thread_struct {
......@@ -251,6 +252,7 @@ struct thread_struct {
__u64 fdr; /* IA32 fp except. data reg */
__u64 old_k1; /* old value of ar.k1 */
__u64 old_iob; /* old IOBase value */
struct partial_page_list *ppl; /* partial page list for 4K page size issue */
/* cached TLS descriptors. */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
......@@ -260,7 +262,8 @@ struct thread_struct {
.fir = 0, \
.fdr = 0, \
.old_k1 = 0, \
.old_iob = 0,
.old_iob = 0, \
.ppl = 0,
#else
# define INIT_THREAD_IA32
#endif /* CONFIG_IA32_SUPPORT */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment