iio: trigger: Fix a scheduling whilst atomic issue seen on tsc2046

IIO triggers are software IRQ chips that split an incoming IRQ into separate IRQs routed to all devices using the trigger. When all consumers are done then a trigger callback reenable() is called. There are a few circumstances under which this can happen in atomic context. 1) A single user of the trigger that calls the iio_trigger_done() function from interrupt context. 2) A race between disconnecting the last device from a trigger and the trigger itself sucessfully being disabled. To avoid a resulting scheduling whilst atomic, close this second corner by using schedule_work() to ensure the reenable is not done in atomic context. Note that drivers must be careful to manage the interaction of set_state() and reenable() callbacks to ensure appropriate reference counting if they are relying on the same hardware controls. Deliberately taking this the slow path rather than via a fixes tree because the error has hard to hit and I would like it to soak for a while before hitting a release kernel. Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Pengutronix Kernel Team <kernel@pengutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> Cc: <Stable@vger.kernel.org> Link: https://lore.kernel.org/r/20211017172209.112387-1-jic23@kernel.org

iio: trigger: Fix a scheduling whilst atomic issue seen on tsc2046
IIO triggers are software IRQ chips that split an incoming IRQ into separate IRQs routed to all devices using the trigger. When all consumers are done then a trigger callback reenable() is called. There are a few circumstances under which this can happen in atomic context. 1) A single user of the trigger that calls the iio_trigger_done() function from interrupt context. 2) A race between disconnecting the last device from a trigger and the trigger itself sucessfully being disabled. To avoid a resulting scheduling whilst atomic, close this second corner by using schedule_work() to ensure the reenable is not done in atomic context. Note that drivers must be careful to manage the interaction of set_state() and reenable() callbacks to ensure appropriate reference counting if they are relying on the same hardware controls. Deliberately taking this the slow path rather than via a fixes tree because the error has hard to hit and I would like it to soak for a while before hitting a release kernel. Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Cc: Pengutronix Kernel Team <kernel@pengutronix.de> Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com> Tested-by: Oleksij Rempel <o.rempel@pengutronix.de> Cc: <Stable@vger.kernel.org> Link: https://lore.kernel.org/r/20211017172209.112387-1-jic23@kernel.org
9020ef65 · Jonathan Cameron · fea251b6 · 9020ef65 · 9020ef65
Commit 9020ef65 authored Oct 17, 2021 by Jonathan Cameron
Hide whitespace changes
Inline Side-by-side

Showing with 37 additions and 1 deletion

drivers/iio/industrialio-trigger.c drivers/iio/industrialio-trigger.c +35 -1

include/linux/iio/trigger.h include/linux/iio/trigger.h +2 -0

No files found.
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -162,6 +162,39 @@ static struct iio_trigger *iio_trigger_acquire_by_name(const char *name)
 	return trig;
 }

+static void iio_reenable_work_fn(struct work_struct *work)
+{
+	struct iio_trigger *trig = container_of(work, struct iio_trigger,
+						reenable_work);
+
+	/*
+	 * This 'might' occur after the trigger state is set to disabled -
+	 * in that case the driver should skip reenabling.
+	 */
+	trig->ops->reenable(trig);
+}
+
+/*
+ * In general, reenable callbacks may need to sleep and this path is
+ * not performance sensitive, so just queue up a work item
+ * to reneable the trigger for us.
+ *
+ * Races that can cause this.
+ * 1) A handler occurs entirely in interrupt context so the counter
+ *    the final decrement is still in this interrupt.
+ * 2) The trigger has been removed, but one last interrupt gets through.
+ *
+ * For (1) we must call reenable, but not in atomic context.
+ * For (2) it should be safe to call reenanble, if drivers never blindly
+ * reenable after state is off.
+ */
+static void iio_trigger_notify_done_atomic(struct iio_trigger *trig)
+{
+	if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
+	    trig->ops->reenable)
+		schedule_work(&trig->reenable_work);
+}
+
 void iio_trigger_poll(struct iio_trigger *trig)
 {
 	int i;
@@ -173,7 +206,7 @@ void iio_trigger_poll(struct iio_trigger *trig)
 			if (trig->subirqs[i].enabled)
 				generic_handle_irq(trig->subirq_base + i);
 			else
-				iio_trigger_notify_done(trig);
+				iio_trigger_notify_done_atomic(trig);
 		}
 	}
 }
@@ -535,6 +568,7 @@ struct iio_trigger *viio_trigger_alloc(struct device *parent,
 	trig->dev.type = &iio_trig_type;
 	trig->dev.bus = &iio_bus_type;
 	device_initialize(&trig->dev);
+	INIT_WORK(&trig->reenable_work, iio_reenable_work_fn);

 	mutex_init(&trig->pool_lock);
 	trig->subirq_base = irq_alloc_descs(-1, 0,

--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -55,6 +55,7 @@ struct iio_trigger_ops {
 * @attached_own_device:[INTERN] if we are using our own device as trigger,
 *			i.e. if we registered a poll function to the same
 *			device as the one providing the trigger.
+ * @reenable_work:	[INTERN] work item used to ensure reenable can sleep.
 **/
 struct iio_trigger {
 	const struct iio_trigger_ops	*ops;
@@ -74,6 +75,7 @@ struct iio_trigger {
 	unsigned long pool[BITS_TO_LONGS(CONFIG_IIO_CONSUMERS_PER_TRIGGER)];
 	struct mutex			pool_lock;
 	bool				attached_own_device;
+	struct work_struct		reenable_work;
 };