[PATCH] futex-2.5.42-A2

This is my current futex patchset against BK-curr. It mostly includes must-have crash/correctness fixes from Martin Wirth, tested and reworked somewhat by myself: - crash fix: futex_close did not detach from the vcache. Detach cleanups. (Martin Wirth) - memory leak fix: forgotten put_page() in a rare path in __pin_page(). (Martin Wirth) - crash fix: do not do any quickcheck in unqueue_me(). (Martin, me) - correctness fix: the fastpath in __pin_page() now handles reserved pages the same way get_user_pages() does. (Martin Wirth) - queueing improvement: __attach_vcache() now uses list_add_tail() to avoid the reversal of the futex queue if a COW happens. (Martin Wirth) - simplified alignment check in sys_futex. (Martin Wirth) - comment fix: make it clear how the vcache hash quickcheck works. (me)

[PATCH] futex-2.5.42-A2
This is my current futex patchset against BK-curr. It mostly includes must-have crash/correctness fixes from Martin Wirth, tested and reworked somewhat by myself: - crash fix: futex_close did not detach from the vcache. Detach cleanups. (Martin Wirth) - memory leak fix: forgotten put_page() in a rare path in __pin_page(). (Martin Wirth) - crash fix: do not do any quickcheck in unqueue_me(). (Martin, me) - correctness fix: the fastpath in __pin_page() now handles reserved pages the same way get_user_pages() does. (Martin Wirth) - queueing improvement: __attach_vcache() now uses list_add_tail() to avoid the reversal of the futex queue if a COW happens. (Martin Wirth) - simplified alignment check in sys_futex. (Martin Wirth) - comment fix: make it clear how the vcache hash quickcheck works. (me)
5d6af116 · Ingo Molnar · Linus Torvalds · f35e6551 · 5d6af116 · 5d6af116
Commit 5d6af116 authored Oct 15, 2002 by Ingo Molnar Committed by Linus Torvalds Oct 15, 2002
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 28 deletions

include/linux/vcache.h include/linux/vcache.h +1 -1

kernel/futex.c kernel/futex.c +13 -16

mm/vcache.c mm/vcache.c +8 -11

No files found.
--- a/include/linux/vcache.h
+++ b/include/linux/vcache.h
@@ -18,7 +18,7 @@ extern void __attach_vcache(vcache_t *vcache,
 		struct mm_struct *mm,
 		void (*callback)(struct vcache_s *data, struct page *new_page));
-extern void detach_vcache(vcache_t *vcache);
+extern void __detach_vcache(vcache_t *vcache);
 extern void invalidate_vcache(unsigned long address, struct mm_struct *mm,
 				struct page *new_page);

--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -116,6 +116,7 @@ static struct page *__pin_page(unsigned long addr)
 	 */
 	page = follow_page(mm, addr, 0);
 	if (likely(page != NULL)) {	
+		if (!PageReserved(page))
 			get_page(page);
 		return page;
 	}
@@ -140,8 +141,10 @@ static struct page *__pin_page(unsigned long addr)
 	 * check for races:
 	 */
 	tmp = follow_page(mm, addr, 0);
-	if (tmp != page)
+	if (tmp != page) {
+		put_page(page);
 		goto repeat_lookup;
+	}
 	return page;
 }
@@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
 		if (this->page == page && this->offset == offset) {
 			list_del_init(i);
+			__detach_vcache(&this->vcache);
 			tell_waiter(this);
 			ret++;
 			if (ret >= num)
@@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q)
 {
 	int ret = 0;
-	detach_vcache(&q->vcache);
+	spin_lock(&vcache_lock);
 	spin_lock(&futex_lock);
 	if (!list_empty(&q->list)) {
 		list_del(&q->list);
+		__detach_vcache(&q->vcache);
 		ret = 1;
 	}
 	spin_unlock(&futex_lock);
+	spin_unlock(&vcache_lock);
 	return ret;
 }
@@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp)
 {
 	struct futex_q *q = filp->private_data;
-	spin_lock(&futex_lock);
+	unqueue_me(q);
-	if (!list_empty(&q->list)) {
-		list_del(&q->list);
-		/* Noone can be polling on us now. */
-		BUG_ON(waitqueue_active(&q->waiters));
-	}
-	spin_unlock(&futex_lock);
 	unpin_page(q->page);
 	kfree(filp->private_data);
 	return 0;
@@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec *
 	pos_in_page = uaddr % PAGE_SIZE;
-	/* Must be "naturally" aligned, and not on page boundary. */
+	/* Must be "naturally" aligned */
-	if ((pos_in_page % __alignof__(int)) != 0
+	if (pos_in_page % sizeof(int))
-	    || pos_in_page + sizeof(int) > PAGE_SIZE)
 		return -EINVAL;
 	switch (op) {

--- a/mm/vcache.c
+++ b/mm/vcache.c
@@ -41,14 +41,12 @@ void __attach_vcache(vcache_t *vcache,
 	hash_head = hash_vcache(address, mm);
-	list_add(&vcache->hash_entry, hash_head);
+	list_add_tail(&vcache->hash_entry, hash_head);
 }
-void detach_vcache(vcache_t *vcache)
+void __detach_vcache(vcache_t *vcache)
 {
-	spin_lock(&vcache_lock);
+	list_del_init(&vcache->hash_entry);
-	list_del(&vcache->hash_entry);
-	spin_unlock(&vcache_lock);
 }
 void invalidate_vcache(unsigned long address, struct mm_struct *mm,
@@ -61,12 +59,11 @@ void invalidate_vcache(unsigned long address, struct mm_struct *mm,
 	hash_head = hash_vcache(address, mm);
 	/*
-	 * This is safe, because this path is called with the mm
+	 * This is safe, because this path is called with the pagetable
-	 * semaphore read-held, and the add/remove path calls with the
+	 * lock held. So while other mm's might add new entries in
-	 * mm semaphore write-held. So while other mm's might add new
+	 * parallel, *this* mm is locked out, so if the list is empty
-	 * entries in parallel, and *this* mm is locked out, so if the
+	 * now then we do not have to take the vcache lock to see it's
-	 * list is empty now then we do not have to take the vcache
+	 * really empty.
-	 * lock to see it's really empty.
 	 */
 	if (likely(list_empty(hash_head)))
 		return;