Commit 6119418f authored by Marco Elver's avatar Marco Elver Committed by Paul E. McKenney

kcsan: Avoid blocking producers in prepare_report()

To avoid deadlock in case watchers can be interrupted, we need to ensure
that producers of the struct other_info can never be blocked by an
unrelated consumer. (Likely to occur with KCSAN_INTERRUPT_WATCHER.)

There are several cases that can lead to this scenario, for example:

	1. A watchpoint A was set up by task T1, but interrupted by
	   interrupt I1. Some other thread (task or interrupt) finds
	   watchpoint A consumes it, and sets other_info. Then I1 also
	   finds some unrelated watchpoint B, consumes it, but is blocked
	   because other_info is in use. T1 cannot consume other_info
	   because I1 never returns -> deadlock.

	2. A watchpoint A was set up by task T1, but interrupted by
	   interrupt I1, which also sets up a watchpoint B. Some other
	   thread finds watchpoint A, and consumes it and sets up
	   other_info with its information. Similarly some other thread
	   finds watchpoint B and consumes it, but is then blocked because
	   other_info is in use. When I1 continues it sees its watchpoint
	   was consumed, and that it must wait for other_info, which
	   currently contains information to be consumed by T1. However, T1
	   cannot unblock other_info because I1 never returns -> deadlock.

To avoid this, we need to ensure that producers of struct other_info
always have a usable other_info entry. This is obviously not the case
with only a single instance of struct other_info, as concurrent
producers must wait for the entry to be released by some consumer (which
may be locked up as illustrated above).

While it would be nice if producers could simply call kmalloc() and
append their instance of struct other_info to a list, we are very
limited in this code path: since KCSAN can instrument the allocators
themselves, calling kmalloc() could lead to deadlock or corrupted
allocator state.

Since producers of the struct other_info will always succeed at
try_consume_watchpoint(), preceding the call into kcsan_report(), we
know that the particular watchpoint slot cannot simply be reused or
consumed by another potential other_info producer. If we move removal of
a watchpoint after reporting (by the consumer of struct other_info), we
can see a consumed watchpoint as a held lock on elements of other_info,
if we create a one-to-one mapping of a watchpoint to an other_info
element.

Therefore, the simplest solution is to create an array of struct
other_info that is as large as the watchpoints array in core.c, and pass
the watchpoint index to kcsan_report() for producers and consumers, and
change watchpoints to be removed after reporting is done.

With a default config on a 64-bit system, the array other_infos consumes
~37KiB. For most systems today this is not a problem. On smaller memory
constrained systems, the config value CONFIG_KCSAN_NUM_WATCHPOINTS can
be reduced appropriately.

Overall, this change is a simplification of the prepare_report() code,
and makes some of the checks (such as checking if at least one access is
a write) redundant.

Tested:
$ tools/testing/selftests/rcutorture/bin/kvm.sh \
	--cpus 12 --duration 10 --kconfig "CONFIG_DEBUG_INFO=y \
	CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n \
	CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n \
	CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y \
	CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_PROVE_LOCKING=y" \
	--configs TREE03
=> No longer hangs and runs to completion as expected.
Reported-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Signed-off-by: default avatarMarco Elver <elver@google.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent 135c0872
...@@ -69,7 +69,6 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { ...@@ -69,7 +69,6 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = {
* slot=9: [10, 11, 9] * slot=9: [10, 11, 9]
* slot=63: [64, 65, 63] * slot=63: [64, 65, 63]
*/ */
#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
#define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS))
/* /*
...@@ -171,12 +170,16 @@ try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) ...@@ -171,12 +170,16 @@ try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
} }
/* /* Return true if watchpoint was not touched, false if already consumed. */
* Return true if watchpoint was not touched, false if consumed. static inline bool consume_watchpoint(atomic_long_t *watchpoint)
*/
static inline bool remove_watchpoint(atomic_long_t *watchpoint)
{ {
return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; return atomic_long_xchg_relaxed(watchpoint, CONSUMED_WATCHPOINT) != CONSUMED_WATCHPOINT;
}
/* Remove the watchpoint -- its slot may be reused after. */
static inline void remove_watchpoint(atomic_long_t *watchpoint)
{
atomic_long_set(watchpoint, INVALID_WATCHPOINT);
} }
static __always_inline struct kcsan_ctx *get_ctx(void) static __always_inline struct kcsan_ctx *get_ctx(void)
...@@ -322,7 +325,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, ...@@ -322,7 +325,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr,
if (consumed) { if (consumed) {
kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE, kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_MAYBE,
KCSAN_REPORT_CONSUMED_WATCHPOINT); KCSAN_REPORT_CONSUMED_WATCHPOINT,
watchpoint - watchpoints);
} else { } else {
/* /*
* The other thread may not print any diagnostics, as it has * The other thread may not print any diagnostics, as it has
...@@ -470,7 +474,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) ...@@ -470,7 +474,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
value_change = KCSAN_VALUE_CHANGE_TRUE; value_change = KCSAN_VALUE_CHANGE_TRUE;
/* Check if this access raced with another. */ /* Check if this access raced with another. */
if (!remove_watchpoint(watchpoint)) { if (!consume_watchpoint(watchpoint)) {
/* /*
* Depending on the access type, map a value_change of MAYBE to * Depending on the access type, map a value_change of MAYBE to
* TRUE (always report) or FALSE (never report). * TRUE (always report) or FALSE (never report).
...@@ -500,7 +504,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) ...@@ -500,7 +504,8 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE) if (is_assert && value_change == KCSAN_VALUE_CHANGE_TRUE)
kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES);
kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL); kcsan_report(ptr, size, type, value_change, KCSAN_REPORT_RACE_SIGNAL,
watchpoint - watchpoints);
} else if (value_change == KCSAN_VALUE_CHANGE_TRUE) { } else if (value_change == KCSAN_VALUE_CHANGE_TRUE) {
/* Inferring a race, since the value should not have changed. */ /* Inferring a race, since the value should not have changed. */
...@@ -510,9 +515,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) ...@@ -510,9 +515,15 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type)
if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert)
kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE, kcsan_report(ptr, size, type, KCSAN_VALUE_CHANGE_TRUE,
KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); KCSAN_REPORT_RACE_UNKNOWN_ORIGIN,
watchpoint - watchpoints);
} }
/*
* Remove watchpoint; must be after reporting, since the slot may be
* reused after this point.
*/
remove_watchpoint(watchpoint);
kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS); kcsan_counter_dec(KCSAN_COUNTER_USED_WATCHPOINTS);
out_unlock: out_unlock:
if (!kcsan_interrupt_watcher) if (!kcsan_interrupt_watcher)
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
/* The number of adjacent watchpoints to check. */ /* The number of adjacent watchpoints to check. */
#define KCSAN_CHECK_ADJACENT 1 #define KCSAN_CHECK_ADJACENT 1
#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT)
extern unsigned int kcsan_udelay_task; extern unsigned int kcsan_udelay_task;
extern unsigned int kcsan_udelay_interrupt; extern unsigned int kcsan_udelay_interrupt;
...@@ -136,6 +137,6 @@ enum kcsan_report_type { ...@@ -136,6 +137,6 @@ enum kcsan_report_type {
*/ */
extern void kcsan_report(const volatile void *ptr, size_t size, int access_type, extern void kcsan_report(const volatile void *ptr, size_t size, int access_type,
enum kcsan_value_change value_change, enum kcsan_value_change value_change,
enum kcsan_report_type type); enum kcsan_report_type type, int watchpoint_idx);
#endif /* _KERNEL_KCSAN_KCSAN_H */ #endif /* _KERNEL_KCSAN_KCSAN_H */
...@@ -30,9 +30,7 @@ struct access_info { ...@@ -30,9 +30,7 @@ struct access_info {
/* /*
* Other thread info: communicated from other racing thread to thread that set * Other thread info: communicated from other racing thread to thread that set
* up the watchpoint, which then prints the complete report atomically. Only * up the watchpoint, which then prints the complete report atomically.
* need one struct, as all threads should to be serialized regardless to print
* the reports, with reporting being in the slow-path.
*/ */
struct other_info { struct other_info {
struct access_info ai; struct access_info ai;
...@@ -59,7 +57,11 @@ struct other_info { ...@@ -59,7 +57,11 @@ struct other_info {
struct task_struct *task; struct task_struct *task;
}; };
static struct other_info other_infos[1]; /*
* To never block any producers of struct other_info, we need as many elements
* as we have watchpoints (upper bound on concurrent races to report).
*/
static struct other_info other_infos[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
/* /*
* Information about reported races; used to rate limit reporting. * Information about reported races; used to rate limit reporting.
...@@ -96,10 +98,11 @@ struct report_time { ...@@ -96,10 +98,11 @@ struct report_time {
static struct report_time report_times[REPORT_TIMES_SIZE]; static struct report_time report_times[REPORT_TIMES_SIZE];
/* /*
* This spinlock protects reporting and other_info, since other_info is usually * Spinlock serializing report generation, and access to @other_infos. Although
* required when reporting. * it could make sense to have a finer-grained locking story for @other_infos,
* report generation needs to be serialized either way, so not much is gained.
*/ */
static DEFINE_SPINLOCK(report_lock); static DEFINE_RAW_SPINLOCK(report_lock);
/* /*
* Checks if the race identified by thread frames frame1 and frame2 has * Checks if the race identified by thread frames frame1 and frame2 has
...@@ -395,9 +398,13 @@ static bool print_report(enum kcsan_value_change value_change, ...@@ -395,9 +398,13 @@ static bool print_report(enum kcsan_value_change value_change,
static void release_report(unsigned long *flags, struct other_info *other_info) static void release_report(unsigned long *flags, struct other_info *other_info)
{ {
if (other_info) if (other_info)
other_info->ai.ptr = NULL; /* Mark for reuse. */ /*
* Use size to denote valid/invalid, since KCSAN entirely
* ignores 0-sized accesses.
*/
other_info->ai.size = 0;
spin_unlock_irqrestore(&report_lock, *flags); raw_spin_unlock_irqrestore(&report_lock, *flags);
} }
/* /*
...@@ -435,14 +442,14 @@ static void set_other_info_task_blocking(unsigned long *flags, ...@@ -435,14 +442,14 @@ static void set_other_info_task_blocking(unsigned long *flags,
*/ */
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
} }
spin_unlock_irqrestore(&report_lock, *flags); raw_spin_unlock_irqrestore(&report_lock, *flags);
/* /*
* We cannot call schedule() since we also cannot reliably * We cannot call schedule() since we also cannot reliably
* determine if sleeping here is permitted -- see in_atomic(). * determine if sleeping here is permitted -- see in_atomic().
*/ */
udelay(1); udelay(1);
spin_lock_irqsave(&report_lock, *flags); raw_spin_lock_irqsave(&report_lock, *flags);
if (timeout-- < 0) { if (timeout-- < 0) {
/* /*
* Abort. Reset @other_info->task to NULL, since it * Abort. Reset @other_info->task to NULL, since it
...@@ -454,62 +461,63 @@ static void set_other_info_task_blocking(unsigned long *flags, ...@@ -454,62 +461,63 @@ static void set_other_info_task_blocking(unsigned long *flags,
break; break;
} }
/* /*
* If @ptr nor @current matches, then our information has been * If invalid, or @ptr nor @current matches, then @other_info
* consumed and we may continue. If not, retry. * has been consumed and we may continue. If not, retry.
*/ */
} while (other_info->ai.ptr == ai->ptr && other_info->task == current); } while (other_info->ai.size && other_info->ai.ptr == ai->ptr &&
other_info->task == current);
if (is_running) if (is_running)
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
} }
/* /* Populate @other_info; requires that the provided @other_info not in use. */
* Depending on the report type either sets other_info and returns false, or static void prepare_report_producer(unsigned long *flags,
* acquires the matching other_info and returns true. If other_info is not const struct access_info *ai,
* required for the report type, simply acquires report_lock and returns true. struct other_info *other_info)
*/
static bool prepare_report(unsigned long *flags, enum kcsan_report_type type,
const struct access_info *ai, struct other_info *other_info)
{ {
if (type != KCSAN_REPORT_CONSUMED_WATCHPOINT && raw_spin_lock_irqsave(&report_lock, *flags);
type != KCSAN_REPORT_RACE_SIGNAL) {
/* other_info not required; just acquire report_lock */
spin_lock_irqsave(&report_lock, *flags);
return true;
}
retry: /*
spin_lock_irqsave(&report_lock, *flags); * The same @other_infos entry cannot be used concurrently, because
* there is a one-to-one mapping to watchpoint slots (@watchpoints in
switch (type) { * core.c), and a watchpoint is only released for reuse after reporting
case KCSAN_REPORT_CONSUMED_WATCHPOINT: * is done by the consumer of @other_info. Therefore, it is impossible
if (other_info->ai.ptr) * for another concurrent prepare_report_producer() to set the same
break; /* still in use, retry */ * @other_info, and are guaranteed exclusivity for the @other_infos
* entry pointed to by @other_info.
*
* To check this property holds, size should never be non-zero here,
* because every consumer of struct other_info resets size to 0 in
* release_report().
*/
WARN_ON(other_info->ai.size);
other_info->ai = *ai; other_info->ai = *ai;
other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 1); other_info->num_stack_entries = stack_trace_save(other_info->stack_entries, NUM_STACK_ENTRIES, 2);
if (IS_ENABLED(CONFIG_KCSAN_VERBOSE)) if (IS_ENABLED(CONFIG_KCSAN_VERBOSE))
set_other_info_task_blocking(flags, ai, other_info); set_other_info_task_blocking(flags, ai, other_info);
spin_unlock_irqrestore(&report_lock, *flags); raw_spin_unlock_irqrestore(&report_lock, *flags);
}
/* /* Awaits producer to fill @other_info and then returns. */
* The other thread will print the summary; other_info may now static bool prepare_report_consumer(unsigned long *flags,
* be consumed. const struct access_info *ai,
*/ struct other_info *other_info)
return false; {
case KCSAN_REPORT_RACE_SIGNAL: raw_spin_lock_irqsave(&report_lock, *flags);
if (!other_info->ai.ptr) while (!other_info->ai.size) { /* Await valid @other_info. */
break; /* no data available yet, retry */ raw_spin_unlock_irqrestore(&report_lock, *flags);
cpu_relax();
raw_spin_lock_irqsave(&report_lock, *flags);
}
/* /* Should always have a matching access based on watchpoint encoding. */
* First check if this is the other_info we are expecting, i.e. if (WARN_ON(!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
* matches based on how watchpoint was encoded. (unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size)))
*/ goto discard;
if (!matching_access((unsigned long)other_info->ai.ptr & WATCHPOINT_ADDR_MASK, other_info->ai.size,
(unsigned long)ai->ptr & WATCHPOINT_ADDR_MASK, ai->size))
break; /* mismatching watchpoint, retry */
if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size, if (!matching_access((unsigned long)other_info->ai.ptr, other_info->ai.size,
(unsigned long)ai->ptr, ai->size)) { (unsigned long)ai->ptr, ai->size)) {
...@@ -518,64 +526,42 @@ static bool prepare_report(unsigned long *flags, enum kcsan_report_type type, ...@@ -518,64 +526,42 @@ static bool prepare_report(unsigned long *flags, enum kcsan_report_type type,
* positive due to watchpoint encoding. * positive due to watchpoint encoding.
*/ */
kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES); kcsan_counter_inc(KCSAN_COUNTER_ENCODING_FALSE_POSITIVES);
goto discard;
}
return true;
/* discard this other_info */ discard:
release_report(flags, other_info); release_report(flags, other_info);
return false; return false;
} }
if (!((ai->access_type | other_info->ai.access_type) & KCSAN_ACCESS_WRITE)) { /*
/* * Depending on the report type either sets @other_info and returns false, or
* While the address matches, this is not the other_info * awaits @other_info and returns true. If @other_info is not required for the
* from the thread that consumed our watchpoint, since * report type, simply acquires @report_lock and returns true.
* neither this nor the access in other_info is a write.
* It is invalid to continue with the report, since we
* only have information about reads.
*
* This can happen due to concurrent races on the same
* address, with at least 4 threads. To avoid locking up
* other_info and all other threads, we have to consume
* it regardless.
*
* A concrete case to illustrate why we might lock up if
* we do not consume other_info:
*
* We have 4 threads, all accessing the same address
* (or matching address ranges). Assume the following
* watcher and watchpoint consumer pairs:
* write1-read1, read2-write2. The first to populate
* other_info is write2, however, write1 consumes it,
* resulting in a report of write1-write2. This report
* is valid, however, now read1 populates other_info;
* read2-read1 is an invalid conflict, yet, no other
* conflicting access is left. Therefore, we must
* consume read1's other_info.
*
* Since this case is assumed to be rare, it is
* reasonable to omit this report: one of the other
* reports includes information about the same shared
* data, and at this point the likelihood that we
* re-report the same race again is high.
*/ */
release_report(flags, other_info); static noinline bool prepare_report(unsigned long *flags,
enum kcsan_report_type type,
const struct access_info *ai,
struct other_info *other_info)
{
switch (type) {
case KCSAN_REPORT_CONSUMED_WATCHPOINT:
prepare_report_producer(flags, ai, other_info);
return false; return false;
} case KCSAN_REPORT_RACE_SIGNAL:
return prepare_report_consumer(flags, ai, other_info);
/* Matching access in other_info. */
return true;
default: default:
BUG(); /* @other_info not required; just acquire @report_lock. */
raw_spin_lock_irqsave(&report_lock, *flags);
return true;
} }
spin_unlock_irqrestore(&report_lock, *flags);
goto retry;
} }
void kcsan_report(const volatile void *ptr, size_t size, int access_type, void kcsan_report(const volatile void *ptr, size_t size, int access_type,
enum kcsan_value_change value_change, enum kcsan_value_change value_change,
enum kcsan_report_type type) enum kcsan_report_type type, int watchpoint_idx)
{ {
unsigned long flags = 0; unsigned long flags = 0;
const struct access_info ai = { const struct access_info ai = {
...@@ -586,7 +572,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, ...@@ -586,7 +572,11 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
.cpu_id = raw_smp_processor_id() .cpu_id = raw_smp_processor_id()
}; };
struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN struct other_info *other_info = type == KCSAN_REPORT_RACE_UNKNOWN_ORIGIN
? NULL : &other_infos[0]; ? NULL : &other_infos[watchpoint_idx];
kcsan_disable_current();
if (WARN_ON(watchpoint_idx < 0 || watchpoint_idx >= ARRAY_SIZE(other_infos)))
goto out;
/* /*
* With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if
...@@ -596,7 +586,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, ...@@ -596,7 +586,6 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
*/ */
lockdep_off(); lockdep_off();
kcsan_disable_current();
if (prepare_report(&flags, type, &ai, other_info)) { if (prepare_report(&flags, type, &ai, other_info)) {
/* /*
* Never report if value_change is FALSE, only if we it is * Never report if value_change is FALSE, only if we it is
...@@ -611,7 +600,8 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, ...@@ -611,7 +600,8 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type,
release_report(&flags, other_info); release_report(&flags, other_info);
} }
kcsan_enable_current();
lockdep_on(); lockdep_on();
out:
kcsan_enable_current();
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment