Commit 34d5a4b3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'locking-urgent-2020-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull futex fix from Thomas Gleixner:
 "Fix for yet another subtle futex issue.

  The futex code used ihold() to prevent inodes from vanishing, but
  ihold() does not guarantee inode persistence. Replace the inode
  pointer with a per boot, machine wide, unique inode identifier.

  The second commit fixes the breakage of the hash mechanism which
  causes a 100% performance regression"

* tag 'locking-urgent-2020-03-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Unbreak futex hashing
  futex: Fix inode life-time issue
parents ec181b7f 8d677436
...@@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) ...@@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_sb = sb; inode->i_sb = sb;
inode->i_blkbits = sb->s_blocksize_bits; inode->i_blkbits = sb->s_blocksize_bits;
inode->i_flags = 0; inode->i_flags = 0;
atomic64_set(&inode->i_sequence, 0);
atomic_set(&inode->i_count, 1); atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops; inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops; inode->i_fop = &no_open_fops;
......
...@@ -698,6 +698,7 @@ struct inode { ...@@ -698,6 +698,7 @@ struct inode {
struct rcu_head i_rcu; struct rcu_head i_rcu;
}; };
atomic64_t i_version; atomic64_t i_version;
atomic64_t i_sequence; /* see futex */
atomic_t i_count; atomic_t i_count;
atomic_t i_dio_count; atomic_t i_dio_count;
atomic_t i_writecount; atomic_t i_writecount;
......
...@@ -31,23 +31,26 @@ struct task_struct; ...@@ -31,23 +31,26 @@ struct task_struct;
union futex_key { union futex_key {
struct { struct {
u64 i_seq;
unsigned long pgoff; unsigned long pgoff;
struct inode *inode; unsigned int offset;
int offset;
} shared; } shared;
struct { struct {
unsigned long address; union {
struct mm_struct *mm; struct mm_struct *mm;
int offset; u64 __tmp;
};
unsigned long address;
unsigned int offset;
} private; } private;
struct { struct {
u64 ptr;
unsigned long word; unsigned long word;
void *ptr; unsigned int offset;
int offset;
} both; } both;
}; };
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
#ifdef CONFIG_FUTEX #ifdef CONFIG_FUTEX
enum { enum {
......
...@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) ...@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
*/ */
static struct futex_hash_bucket *hash_futex(union futex_key *key) static struct futex_hash_bucket *hash_futex(union futex_key *key)
{ {
u32 hash = jhash2((u32*)&key->both.word, u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
key->both.offset); key->both.offset);
return &futex_queues[hash & (futex_hashsize - 1)]; return &futex_queues[hash & (futex_hashsize - 1)];
} }
...@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key) ...@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE: case FUT_OFF_INODE:
ihold(key->shared.inode); /* implies smp_mb(); (B) */ smp_mb(); /* explicit smp_mb(); (B) */
break; break;
case FUT_OFF_MMSHARED: case FUT_OFF_MMSHARED:
futex_get_mm(key); /* implies smp_mb(); (B) */ futex_get_mm(key); /* implies smp_mb(); (B) */
...@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key) ...@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE: case FUT_OFF_INODE:
iput(key->shared.inode);
break; break;
case FUT_OFF_MMSHARED: case FUT_OFF_MMSHARED:
mmdrop(key->private.mm); mmdrop(key->private.mm);
...@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, ...@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
return timeout; return timeout;
} }
/*
* Generate a machine wide unique identifier for this inode.
*
* This relies on u64 not wrapping in the life-time of the machine; which with
* 1ns resolution means almost 585 years.
*
* This further relies on the fact that a well formed program will not unmap
* the file while it has a (shared) futex waiting on it. This mapping will have
* a file reference which pins the mount and inode.
*
* If for some reason an inode gets evicted and read back in again, it will get
* a new sequence number and will _NOT_ match, even though it is the exact same
* file.
*
* It is important that match_futex() will never have a false-positive, esp.
* for PI futexes that can mess up the state. The above argues that false-negatives
* are only possible for malformed programs.
*/
static u64 get_inode_sequence_number(struct inode *inode)
{
static atomic64_t i_seq;
u64 old;
/* Does the inode already have a sequence number? */
old = atomic64_read(&inode->i_sequence);
if (likely(old))
return old;
for (;;) {
u64 new = atomic64_add_return(1, &i_seq);
if (WARN_ON_ONCE(!new))
continue;
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
if (old)
return old;
return new;
}
}
/** /**
* get_futex_key() - Get parameters which are the keys for a futex * get_futex_key() - Get parameters which are the keys for a futex
* @uaddr: virtual address of the futex * @uaddr: virtual address of the futex
...@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, ...@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
* *
* The key words are stored in @key on success. * The key words are stored in @key on success.
* *
* For shared mappings, it's (page->index, file_inode(vma->vm_file), * For shared mappings (when @fshared), the key is:
* offset_within_page). For private mappings, it's (uaddr, current->mm). * ( inode->i_sequence, page->index, offset_within_page )
* We can usually work out the index without swapping in the page. * [ also see get_inode_sequence_number() ]
*
* For private mappings (or when !@fshared), the key is:
* ( current->mm, address, 0 )
*
* This allows (cross process, where applicable) identification of the futex
* without keeping the page pinned for the duration of the FUTEX_WAIT.
* *
* lock_page() might sleep, the caller should not hold a spinlock. * lock_page() might sleep, the caller should not hold a spinlock.
*/ */
...@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a ...@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
get_futex_key_refs(key); /* implies smp_mb(); (B) */
} else { } else {
struct inode *inode; struct inode *inode;
...@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a ...@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
goto again; goto again;
} }
/*
* Take a reference unless it is about to be freed. Previously
* this reference was taken by ihold under the page lock
* pinning the inode in place so i_lock was unnecessary. The
* only way for this check to fail is if the inode was
* truncated in parallel which is almost certainly an
* application bug. In such a case, just retry.
*
* We are not calling into get_futex_key_refs() in file-backed
* cases, therefore a successful atomic_inc return below will
* guarantee that get_futex_key() will still imply smp_mb(); (B).
*/
if (!atomic_inc_not_zero(&inode->i_count)) {
rcu_read_unlock();
put_page(page);
goto again;
}
/* Should be impossible but lets be paranoid for now */
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
err = -EFAULT;
rcu_read_unlock();
iput(inode);
goto out;
}
key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = inode; key->shared.i_seq = get_inode_sequence_number(inode);
key->shared.pgoff = basepage_index(tail); key->shared.pgoff = basepage_index(tail);
rcu_read_unlock(); rcu_read_unlock();
} }
get_futex_key_refs(key); /* implies smp_mb(); (B) */
out: out:
put_page(page); put_page(page);
return err; return err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment