Commit 0cc55a02 authored by Michel Lespinasse's avatar Michel Lespinasse Committed by Linus Torvalds

mmap locking API: add mmap_read_trylock_non_owner()

Add a couple APIs used by kernel/bpf/stackmap.c only:
- mmap_read_trylock_non_owner()
- mmap_read_unlock_non_owner() (may be called from a work queue).

It's still not ideal that bpf/stackmap subverts the lock ownership in this
way.  Thanks to Peter Zijlstra for suggesting this API as the least-ugly
way of addressing this in the short term.
Signed-off-by: default avatarMichel Lespinasse <walken@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarDaniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Laurent Dufour <ldufour@linux.ibm.com>
Cc: Liam Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ying Han <yinghan@google.com>
Link: http://lkml.kernel.org/r/20200520052908.204642-8-walken@google.comSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent aaa2cc56
...@@ -56,4 +56,18 @@ static inline void mmap_read_unlock(struct mm_struct *mm) ...@@ -56,4 +56,18 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
} }
static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
if (down_read_trylock(&mm->mmap_sem)) {
rwsem_release(&mm->mmap_sem.dep_map, _RET_IP_);
return true;
}
return false;
}
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
up_read_non_owner(&mm->mmap_sem);
}
#endif /* _LINUX_MMAP_LOCK_H */ #endif /* _LINUX_MMAP_LOCK_H */
...@@ -33,7 +33,7 @@ struct bpf_stack_map { ...@@ -33,7 +33,7 @@ struct bpf_stack_map {
/* irq_work to run up_read() for build_id lookup in nmi context */ /* irq_work to run up_read() for build_id lookup in nmi context */
struct stack_map_irq_work { struct stack_map_irq_work {
struct irq_work irq_work; struct irq_work irq_work;
struct rw_semaphore *sem; struct mm_struct *mm;
}; };
static void do_up_read(struct irq_work *entry) static void do_up_read(struct irq_work *entry)
...@@ -44,8 +44,7 @@ static void do_up_read(struct irq_work *entry) ...@@ -44,8 +44,7 @@ static void do_up_read(struct irq_work *entry)
return; return;
work = container_of(entry, struct stack_map_irq_work, irq_work); work = container_of(entry, struct stack_map_irq_work, irq_work);
up_read_non_owner(work->sem); mmap_read_unlock_non_owner(work->mm);
work->sem = NULL;
} }
static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work); static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
...@@ -317,7 +316,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, ...@@ -317,7 +316,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
* with build_id. * with build_id.
*/ */
if (!user || !current || !current->mm || irq_work_busy || if (!user || !current || !current->mm || irq_work_busy ||
mmap_read_trylock(current->mm) == 0) { !mmap_read_trylock_non_owner(current->mm)) {
/* cannot access current->mm, fall back to ips */ /* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) { for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP; id_offs[i].status = BPF_STACK_BUILD_ID_IP;
...@@ -342,16 +341,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, ...@@ -342,16 +341,10 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
} }
if (!work) { if (!work) {
mmap_read_unlock(current->mm); mmap_read_unlock_non_owner(current->mm);
} else { } else {
work->sem = &current->mm->mmap_sem; work->mm = current->mm;
irq_work_queue(&work->irq_work); irq_work_queue(&work->irq_work);
/*
* The irq_work will release the mmap_sem with
* up_read_non_owner(). The rwsem_release() is called
* here to release the lock from lockdep's perspective.
*/
rwsem_release(&current->mm->mmap_sem.dep_map, _RET_IP_);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment