Commit 7dadf88f authored by Douglas Anderson's avatar Douglas Anderson Committed by Enric Balletbo i Serra

platform/chrome: cros_ec_spi: Move to real time priority for transfers

In commit 37a18622 ("platform/chrome: cros_ec_spi: Transfer
messages at high priority") we moved transfers to a high priority
workqueue.  This helped make them much more reliable.

...but, we still saw failures.

We were actually finding ourselves competing for time with dm-crypt
which also scheduled work on HIGHPRI workqueues.  While we can
consider reverting the change that made dm-crypt run its work at
HIGHPRI, the argument in commit a1b89132 ("dm crypt: use
WQ_HIGHPRI for the IO and crypt workqueues") is somewhat compelling.
It does make sense for IO to be scheduled at a priority that's higher
than the default user priority.  It also turns out that dm-crypt isn't
alone in using high priority like this.  loop_prepare_queue() does
something similar for loopback devices.

Looking in more detail, it can be seen that the high priority
workqueue isn't actually that high of a priority.  It runs at MIN_NICE
which is _fairly_ high priority but still below all real time
priority.

Should we move cros_ec_spi to real time priority to fix our problems,
or is this just escalating a priority war?  I'll argue here that
cros_ec_spi _does_ belong at real time priority.  Specifically
cros_ec_spi actually needs to run quickly for correctness.  As I
understand this is exactly what real time priority is for.

There currently doesn't appear to be any way to use the standard
workqueue APIs with a real time priority, so we'll switch over to
using using a kthread worker.  We'll match the priority that the SPI
core uses when it wants to do things on a realtime thread and just use
"MAX_RT_PRIO - 1".

This commit plus the patch ("platform/chrome: cros_ec_spi: Request the
SPI thread be realtime") are enough to get communications very close
to 100% reliable (the only known problem left is when serial console
is turned on, which isn't something that happens in shipping devices).
Specifically this test case now passes (tested on rk3288-veyron-jerry):

  dd if=/dev/zero of=/var/log/foo.txt bs=4M count=512&
  while true; do
    ectool version > /dev/null;
  done

It should be noted that "/var/log" is encrypted (and goes through
dm-crypt) and also passes through a loopback device.
Signed-off-by: default avatarDouglas Anderson <dianders@chromium.org>
Reviewed-by: default avatarGuenter Roeck <groeck@chromium.org>
Signed-off-by: default avatarEnric Balletbo i Serra <enric.balletbo@collabora.com>
parent 26a14267
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/spi/spi.h> #include <linux/spi/spi.h>
#include <uapi/linux/sched/types.h>
/* The header byte, which follows the preamble */ /* The header byte, which follows the preamble */
#define EC_MSG_HEADER 0xec #define EC_MSG_HEADER 0xec
...@@ -67,12 +67,14 @@ ...@@ -67,12 +67,14 @@
* is sent when we want to turn on CS at the start of a transaction. * is sent when we want to turn on CS at the start of a transaction.
* @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that * @end_of_msg_delay: used to set the delay_usecs on the spi_transfer that
* is sent when we want to turn off CS at the end of a transaction. * is sent when we want to turn off CS at the end of a transaction.
* @high_pri_worker: Used to schedule high priority work.
*/ */
struct cros_ec_spi { struct cros_ec_spi {
struct spi_device *spi; struct spi_device *spi;
s64 last_transfer_ns; s64 last_transfer_ns;
unsigned int start_of_msg_delay; unsigned int start_of_msg_delay;
unsigned int end_of_msg_delay; unsigned int end_of_msg_delay;
struct kthread_worker *high_pri_worker;
}; };
typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev, typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev,
...@@ -89,7 +91,7 @@ typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev, ...@@ -89,7 +91,7 @@ typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev,
*/ */
struct cros_ec_xfer_work_params { struct cros_ec_xfer_work_params {
struct work_struct work; struct kthread_work work;
cros_ec_xfer_fn_t fn; cros_ec_xfer_fn_t fn;
struct cros_ec_device *ec_dev; struct cros_ec_device *ec_dev;
struct cros_ec_command *ec_msg; struct cros_ec_command *ec_msg;
...@@ -632,7 +634,7 @@ static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, ...@@ -632,7 +634,7 @@ static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
return ret; return ret;
} }
static void cros_ec_xfer_high_pri_work(struct work_struct *work) static void cros_ec_xfer_high_pri_work(struct kthread_work *work)
{ {
struct cros_ec_xfer_work_params *params; struct cros_ec_xfer_work_params *params;
...@@ -644,12 +646,14 @@ static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev, ...@@ -644,12 +646,14 @@ static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev,
struct cros_ec_command *ec_msg, struct cros_ec_command *ec_msg,
cros_ec_xfer_fn_t fn) cros_ec_xfer_fn_t fn)
{ {
struct cros_ec_xfer_work_params params; struct cros_ec_spi *ec_spi = ec_dev->priv;
struct cros_ec_xfer_work_params params = {
INIT_WORK_ONSTACK(&params.work, cros_ec_xfer_high_pri_work); .work = KTHREAD_WORK_INIT(params.work,
params.ec_dev = ec_dev; cros_ec_xfer_high_pri_work),
params.ec_msg = ec_msg; .ec_dev = ec_dev,
params.fn = fn; .ec_msg = ec_msg,
.fn = fn,
};
/* /*
* This looks a bit ridiculous. Why do the work on a * This looks a bit ridiculous. Why do the work on a
...@@ -660,9 +664,8 @@ static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev, ...@@ -660,9 +664,8 @@ static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev,
* context switched out for too long and the EC giving up on * context switched out for too long and the EC giving up on
* the transfer. * the transfer.
*/ */
queue_work(system_highpri_wq, &params.work); kthread_queue_work(ec_spi->high_pri_worker, &params.work);
flush_work(&params.work); kthread_flush_work(&params.work);
destroy_work_on_stack(&params.work);
return params.ret; return params.ret;
} }
...@@ -694,6 +697,40 @@ static void cros_ec_spi_dt_probe(struct cros_ec_spi *ec_spi, struct device *dev) ...@@ -694,6 +697,40 @@ static void cros_ec_spi_dt_probe(struct cros_ec_spi *ec_spi, struct device *dev)
ec_spi->end_of_msg_delay = val; ec_spi->end_of_msg_delay = val;
} }
static void cros_ec_spi_high_pri_release(void *worker)
{
kthread_destroy_worker(worker);
}
static int cros_ec_spi_devm_high_pri_alloc(struct device *dev,
struct cros_ec_spi *ec_spi)
{
struct sched_param sched_priority = {
.sched_priority = MAX_RT_PRIO - 1,
};
int err;
ec_spi->high_pri_worker =
kthread_create_worker(0, "cros_ec_spi_high_pri");
if (IS_ERR(ec_spi->high_pri_worker)) {
err = PTR_ERR(ec_spi->high_pri_worker);
dev_err(dev, "Can't create cros_ec high pri worker: %d\n", err);
return err;
}
err = devm_add_action_or_reset(dev, cros_ec_spi_high_pri_release,
ec_spi->high_pri_worker);
if (err)
return err;
err = sched_setscheduler_nocheck(ec_spi->high_pri_worker->task,
SCHED_FIFO, &sched_priority);
if (err)
dev_err(dev, "Can't set cros_ec high pri priority: %d\n", err);
return err;
}
static int cros_ec_spi_probe(struct spi_device *spi) static int cros_ec_spi_probe(struct spi_device *spi)
{ {
struct device *dev = &spi->dev; struct device *dev = &spi->dev;
...@@ -732,6 +769,10 @@ static int cros_ec_spi_probe(struct spi_device *spi) ...@@ -732,6 +769,10 @@ static int cros_ec_spi_probe(struct spi_device *spi)
ec_spi->last_transfer_ns = ktime_get_ns(); ec_spi->last_transfer_ns = ktime_get_ns();
err = cros_ec_spi_devm_high_pri_alloc(dev, ec_spi);
if (err)
return err;
err = cros_ec_register(ec_dev); err = cros_ec_register(ec_dev);
if (err) { if (err) {
dev_err(dev, "cannot register EC\n"); dev_err(dev, "cannot register EC\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment