Commit 5d6af116 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] futex-2.5.42-A2

This is my current futex patchset against BK-curr.  It mostly includes
must-have crash/correctness fixes from Martin Wirth, tested and reworked
somewhat by myself:

 - crash fix: futex_close did not detach from the vcache. Detach cleanups.
   (Martin Wirth)

 - memory leak fix: forgotten put_page() in a rare path in __pin_page().
   (Martin Wirth)

 - crash fix: do not do any quickcheck in unqueue_me(). (Martin, me)

 - correctness fix: the fastpath in __pin_page() now handles reserved
   pages the same way get_user_pages() does. (Martin Wirth)

 - queueing improvement: __attach_vcache() now uses list_add_tail() to
   avoid the reversal of the futex queue if a COW happens. (Martin Wirth)

 - simplified alignment check in sys_futex. (Martin Wirth)

 - comment fix: make it clear how the vcache hash quickcheck works. (me)
parent f35e6551
...@@ -18,7 +18,7 @@ extern void __attach_vcache(vcache_t *vcache, ...@@ -18,7 +18,7 @@ extern void __attach_vcache(vcache_t *vcache,
struct mm_struct *mm, struct mm_struct *mm,
void (*callback)(struct vcache_s *data, struct page *new_page)); void (*callback)(struct vcache_s *data, struct page *new_page));
extern void detach_vcache(vcache_t *vcache); extern void __detach_vcache(vcache_t *vcache);
extern void invalidate_vcache(unsigned long address, struct mm_struct *mm, extern void invalidate_vcache(unsigned long address, struct mm_struct *mm,
struct page *new_page); struct page *new_page);
......
...@@ -115,8 +115,9 @@ static struct page *__pin_page(unsigned long addr) ...@@ -115,8 +115,9 @@ static struct page *__pin_page(unsigned long addr)
* Do a quick atomic lookup first - this is the fastpath. * Do a quick atomic lookup first - this is the fastpath.
*/ */
page = follow_page(mm, addr, 0); page = follow_page(mm, addr, 0);
if (likely(page != NULL)) { if (likely(page != NULL)) {
get_page(page); if (!PageReserved(page))
get_page(page);
return page; return page;
} }
...@@ -140,8 +141,10 @@ static struct page *__pin_page(unsigned long addr) ...@@ -140,8 +141,10 @@ static struct page *__pin_page(unsigned long addr)
* check for races: * check for races:
*/ */
tmp = follow_page(mm, addr, 0); tmp = follow_page(mm, addr, 0);
if (tmp != page) if (tmp != page) {
put_page(page);
goto repeat_lookup; goto repeat_lookup;
}
return page; return page;
} }
...@@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num) ...@@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
if (this->page == page && this->offset == offset) { if (this->page == page && this->offset == offset) {
list_del_init(i); list_del_init(i);
__detach_vcache(&this->vcache);
tell_waiter(this); tell_waiter(this);
ret++; ret++;
if (ret >= num) if (ret >= num)
...@@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q) ...@@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q)
{ {
int ret = 0; int ret = 0;
detach_vcache(&q->vcache); spin_lock(&vcache_lock);
spin_lock(&futex_lock); spin_lock(&futex_lock);
if (!list_empty(&q->list)) { if (!list_empty(&q->list)) {
list_del(&q->list); list_del(&q->list);
__detach_vcache(&q->vcache);
ret = 1; ret = 1;
} }
spin_unlock(&futex_lock); spin_unlock(&futex_lock);
spin_unlock(&vcache_lock);
return ret; return ret;
} }
...@@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp) ...@@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp)
{ {
struct futex_q *q = filp->private_data; struct futex_q *q = filp->private_data;
spin_lock(&futex_lock); unqueue_me(q);
if (!list_empty(&q->list)) {
list_del(&q->list);
/* Noone can be polling on us now. */
BUG_ON(waitqueue_active(&q->waiters));
}
spin_unlock(&futex_lock);
unpin_page(q->page); unpin_page(q->page);
kfree(filp->private_data); kfree(filp->private_data);
return 0; return 0;
...@@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec * ...@@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec *
pos_in_page = uaddr % PAGE_SIZE; pos_in_page = uaddr % PAGE_SIZE;
/* Must be "naturally" aligned, and not on page boundary. */ /* Must be "naturally" aligned */
if ((pos_in_page % __alignof__(int)) != 0 if (pos_in_page % sizeof(int))
|| pos_in_page + sizeof(int) > PAGE_SIZE)
return -EINVAL; return -EINVAL;
switch (op) { switch (op) {
......
...@@ -41,14 +41,12 @@ void __attach_vcache(vcache_t *vcache, ...@@ -41,14 +41,12 @@ void __attach_vcache(vcache_t *vcache,
hash_head = hash_vcache(address, mm); hash_head = hash_vcache(address, mm);
list_add(&vcache->hash_entry, hash_head); list_add_tail(&vcache->hash_entry, hash_head);
} }
void detach_vcache(vcache_t *vcache) void __detach_vcache(vcache_t *vcache)
{ {
spin_lock(&vcache_lock); list_del_init(&vcache->hash_entry);
list_del(&vcache->hash_entry);
spin_unlock(&vcache_lock);
} }
void invalidate_vcache(unsigned long address, struct mm_struct *mm, void invalidate_vcache(unsigned long address, struct mm_struct *mm,
...@@ -61,12 +59,11 @@ void invalidate_vcache(unsigned long address, struct mm_struct *mm, ...@@ -61,12 +59,11 @@ void invalidate_vcache(unsigned long address, struct mm_struct *mm,
hash_head = hash_vcache(address, mm); hash_head = hash_vcache(address, mm);
/* /*
* This is safe, because this path is called with the mm * This is safe, because this path is called with the pagetable
* semaphore read-held, and the add/remove path calls with the * lock held. So while other mm's might add new entries in
* mm semaphore write-held. So while other mm's might add new * parallel, *this* mm is locked out, so if the list is empty
* entries in parallel, and *this* mm is locked out, so if the * now then we do not have to take the vcache lock to see it's
* list is empty now then we do not have to take the vcache * really empty.
* lock to see it's really empty.
*/ */
if (likely(list_empty(hash_head))) if (likely(list_empty(hash_head)))
return; return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment