Commit bf8f91e7 authored by Romain Perier's avatar Romain Perier Committed by Herbert Xu

crypto: marvell - Add load balancing between engines

This commits adds support for fine grained load balancing on
multi-engine IPs. The engine is pre-selected based on its current load
and on the weight of the crypto request that is about to be processed.
The global crypto queue is also moved to each engine. These changes are
required to allow chaining crypto requests at the DMA level. By using
a crypto queue per engine, we make sure that we keep the state of the
tdma chain synchronized with the crypto queue. We also reduce contention
on 'cesa_dev->lock' and improve parallelism.
Signed-off-by: default avatarRomain Perier <romain.perier@free-electrons.com>
Acked-by: default avatarBoris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 2786cee8
...@@ -40,16 +40,14 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over ...@@ -40,16 +40,14 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over
struct mv_cesa_dev *cesa_dev; struct mv_cesa_dev *cesa_dev;
static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine)
{ {
struct crypto_async_request *req, *backlog; struct crypto_async_request *req, *backlog;
struct mv_cesa_ctx *ctx; struct mv_cesa_ctx *ctx;
spin_lock_bh(&cesa_dev->lock); backlog = crypto_get_backlog(&engine->queue);
backlog = crypto_get_backlog(&cesa_dev->queue); req = crypto_dequeue_request(&engine->queue);
req = crypto_dequeue_request(&cesa_dev->queue);
engine->req = req; engine->req = req;
spin_unlock_bh(&cesa_dev->lock);
if (!req) if (!req)
return; return;
...@@ -58,7 +56,6 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) ...@@ -58,7 +56,6 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine)
backlog->complete(backlog, -EINPROGRESS); backlog->complete(backlog, -EINPROGRESS);
ctx = crypto_tfm_ctx(req->tfm); ctx = crypto_tfm_ctx(req->tfm);
ctx->ops->prepare(req, engine);
ctx->ops->step(req); ctx->ops->step(req);
} }
...@@ -96,7 +93,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) ...@@ -96,7 +93,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv)
if (res != -EINPROGRESS) { if (res != -EINPROGRESS) {
spin_lock_bh(&engine->lock); spin_lock_bh(&engine->lock);
engine->req = NULL; engine->req = NULL;
mv_cesa_dequeue_req_unlocked(engine); mv_cesa_dequeue_req_locked(engine);
spin_unlock_bh(&engine->lock); spin_unlock_bh(&engine->lock);
ctx->ops->complete(req); ctx->ops->complete(req);
ctx->ops->cleanup(req); ctx->ops->cleanup(req);
...@@ -116,21 +113,19 @@ int mv_cesa_queue_req(struct crypto_async_request *req, ...@@ -116,21 +113,19 @@ int mv_cesa_queue_req(struct crypto_async_request *req,
struct mv_cesa_req *creq) struct mv_cesa_req *creq)
{ {
int ret; int ret;
int i; struct mv_cesa_engine *engine = creq->engine;
spin_lock_bh(&cesa_dev->lock); spin_lock_bh(&engine->lock);
ret = crypto_enqueue_request(&cesa_dev->queue, req); ret = crypto_enqueue_request(&engine->queue, req);
spin_unlock_bh(&cesa_dev->lock); spin_unlock_bh(&engine->lock);
if (ret != -EINPROGRESS) if (ret != -EINPROGRESS)
return ret; return ret;
for (i = 0; i < cesa_dev->caps->nengines; i++) { spin_lock_bh(&engine->lock);
spin_lock_bh(&cesa_dev->engines[i].lock); if (!engine->req)
if (!cesa_dev->engines[i].req) mv_cesa_dequeue_req_locked(engine);
mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); spin_unlock_bh(&engine->lock);
spin_unlock_bh(&cesa_dev->engines[i].lock);
}
return -EINPROGRESS; return -EINPROGRESS;
} }
...@@ -425,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev) ...@@ -425,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev)
return -ENOMEM; return -ENOMEM;
spin_lock_init(&cesa->lock); spin_lock_init(&cesa->lock);
crypto_init_queue(&cesa->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN);
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
cesa->regs = devm_ioremap_resource(dev, res); cesa->regs = devm_ioremap_resource(dev, res);
if (IS_ERR(cesa->regs)) if (IS_ERR(cesa->regs))
...@@ -498,6 +493,9 @@ static int mv_cesa_probe(struct platform_device *pdev) ...@@ -498,6 +493,9 @@ static int mv_cesa_probe(struct platform_device *pdev)
engine); engine);
if (ret) if (ret)
goto err_cleanup; goto err_cleanup;
crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN);
atomic_set(&engine->load, 0);
} }
cesa_dev = cesa; cesa_dev = cesa;
......
...@@ -400,7 +400,6 @@ struct mv_cesa_dev_dma { ...@@ -400,7 +400,6 @@ struct mv_cesa_dev_dma {
* @regs: device registers * @regs: device registers
* @sram_size: usable SRAM size * @sram_size: usable SRAM size
* @lock: device lock * @lock: device lock
* @queue: crypto request queue
* @engines: array of engines * @engines: array of engines
* @dma: dma pools * @dma: dma pools
* *
...@@ -412,7 +411,6 @@ struct mv_cesa_dev { ...@@ -412,7 +411,6 @@ struct mv_cesa_dev {
struct device *dev; struct device *dev;
unsigned int sram_size; unsigned int sram_size;
spinlock_t lock; spinlock_t lock;
struct crypto_queue queue;
struct mv_cesa_engine *engines; struct mv_cesa_engine *engines;
struct mv_cesa_dev_dma *dma; struct mv_cesa_dev_dma *dma;
}; };
...@@ -431,6 +429,8 @@ struct mv_cesa_dev { ...@@ -431,6 +429,8 @@ struct mv_cesa_dev {
* @int_mask: interrupt mask cache * @int_mask: interrupt mask cache
* @pool: memory pool pointing to the memory region reserved in * @pool: memory pool pointing to the memory region reserved in
* SRAM * SRAM
* @queue: fifo of the pending crypto requests
* @load: engine load counter, useful for load balancing
* *
* Structure storing CESA engine information. * Structure storing CESA engine information.
*/ */
...@@ -446,11 +446,12 @@ struct mv_cesa_engine { ...@@ -446,11 +446,12 @@ struct mv_cesa_engine {
size_t max_req_len; size_t max_req_len;
u32 int_mask; u32 int_mask;
struct gen_pool *pool; struct gen_pool *pool;
struct crypto_queue queue;
atomic_t load;
}; };
/** /**
* struct mv_cesa_req_ops - CESA request operations * struct mv_cesa_req_ops - CESA request operations
* @prepare: prepare a request to be executed on the specified engine
* @process: process a request chunk result (should return 0 if the * @process: process a request chunk result (should return 0 if the
* operation, -EINPROGRESS if it needs more steps or an error * operation, -EINPROGRESS if it needs more steps or an error
* code) * code)
...@@ -460,8 +461,6 @@ struct mv_cesa_engine { ...@@ -460,8 +461,6 @@ struct mv_cesa_engine {
* needed. * needed.
*/ */
struct mv_cesa_req_ops { struct mv_cesa_req_ops {
void (*prepare)(struct crypto_async_request *req,
struct mv_cesa_engine *engine);
int (*process)(struct crypto_async_request *req, u32 status); int (*process)(struct crypto_async_request *req, u32 status);
void (*step)(struct crypto_async_request *req); void (*step)(struct crypto_async_request *req);
void (*cleanup)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req);
...@@ -690,6 +689,26 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) ...@@ -690,6 +689,26 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op)
int mv_cesa_queue_req(struct crypto_async_request *req, int mv_cesa_queue_req(struct crypto_async_request *req,
struct mv_cesa_req *creq); struct mv_cesa_req *creq);
static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight)
{
int i;
u32 min_load = U32_MAX;
struct mv_cesa_engine *selected = NULL;
for (i = 0; i < cesa_dev->caps->nengines; i++) {
struct mv_cesa_engine *engine = cesa_dev->engines + i;
u32 load = atomic_read(&engine->load);
if (load < min_load) {
min_load = load;
selected = engine;
}
}
atomic_add(weight, &selected->load);
return selected;
}
/* /*
* Helper function that indicates whether a crypto request needs to be * Helper function that indicates whether a crypto request needs to be
* cleaned up or not after being enqueued using mv_cesa_queue_req(). * cleaned up or not after being enqueued using mv_cesa_queue_req().
......
...@@ -214,6 +214,7 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) ...@@ -214,6 +214,7 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
struct mv_cesa_engine *engine = creq->base.engine; struct mv_cesa_engine *engine = creq->base.engine;
unsigned int ivsize; unsigned int ivsize;
atomic_sub(ablkreq->nbytes, &engine->load);
ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq));
if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) {
...@@ -231,7 +232,6 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) ...@@ -231,7 +232,6 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = {
.step = mv_cesa_ablkcipher_step, .step = mv_cesa_ablkcipher_step,
.process = mv_cesa_ablkcipher_process, .process = mv_cesa_ablkcipher_process,
.prepare = mv_cesa_ablkcipher_prepare,
.cleanup = mv_cesa_ablkcipher_req_cleanup, .cleanup = mv_cesa_ablkcipher_req_cleanup,
.complete = mv_cesa_ablkcipher_complete, .complete = mv_cesa_ablkcipher_complete,
}; };
...@@ -456,29 +456,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, ...@@ -456,29 +456,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req,
return ret; return ret;
} }
static int mv_cesa_des_op(struct ablkcipher_request *req, static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req,
struct mv_cesa_op_ctx *tmpl) struct mv_cesa_op_ctx *tmpl)
{ {
struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
int ret; int ret;
struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, struct mv_cesa_engine *engine;
CESA_SA_DESC_CFG_CRYPTM_MSK);
memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
ret = mv_cesa_ablkcipher_req_init(req, tmpl); ret = mv_cesa_ablkcipher_req_init(req, tmpl);
if (ret) if (ret)
return ret; return ret;
engine = mv_cesa_select_engine(req->nbytes);
mv_cesa_ablkcipher_prepare(&req->base, engine);
ret = mv_cesa_queue_req(&req->base, &creq->base); ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret)) if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ablkcipher_cleanup(req); mv_cesa_ablkcipher_cleanup(req);
return ret; return ret;
} }
static int mv_cesa_des_op(struct ablkcipher_request *req,
struct mv_cesa_op_ctx *tmpl)
{
struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES,
CESA_SA_DESC_CFG_CRYPTM_MSK);
memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE);
return mv_cesa_ablkcipher_queue_req(req, tmpl);
}
static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req)
{ {
struct mv_cesa_op_ctx tmpl; struct mv_cesa_op_ctx tmpl;
...@@ -580,24 +592,14 @@ struct crypto_alg mv_cesa_cbc_des_alg = { ...@@ -580,24 +592,14 @@ struct crypto_alg mv_cesa_cbc_des_alg = {
static int mv_cesa_des3_op(struct ablkcipher_request *req, static int mv_cesa_des3_op(struct ablkcipher_request *req,
struct mv_cesa_op_ctx *tmpl) struct mv_cesa_op_ctx *tmpl)
{ {
struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
int ret;
mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES,
CESA_SA_DESC_CFG_CRYPTM_MSK); CESA_SA_DESC_CFG_CRYPTM_MSK);
memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE);
ret = mv_cesa_ablkcipher_req_init(req, tmpl); return mv_cesa_ablkcipher_queue_req(req, tmpl);
if (ret)
return ret;
ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ablkcipher_cleanup(req);
return ret;
} }
static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req)
...@@ -707,9 +709,8 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = { ...@@ -707,9 +709,8 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = {
static int mv_cesa_aes_op(struct ablkcipher_request *req, static int mv_cesa_aes_op(struct ablkcipher_request *req,
struct mv_cesa_op_ctx *tmpl) struct mv_cesa_op_ctx *tmpl)
{ {
struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req);
struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
int ret, i; int i;
u32 *key; u32 *key;
u32 cfg; u32 cfg;
...@@ -732,15 +733,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, ...@@ -732,15 +733,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req,
CESA_SA_DESC_CFG_CRYPTM_MSK | CESA_SA_DESC_CFG_CRYPTM_MSK |
CESA_SA_DESC_CFG_AES_LEN_MSK); CESA_SA_DESC_CFG_AES_LEN_MSK);
ret = mv_cesa_ablkcipher_req_init(req, tmpl); return mv_cesa_ablkcipher_queue_req(req, tmpl);
if (ret)
return ret;
ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ablkcipher_cleanup(req);
return ret;
} }
static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req)
......
...@@ -335,6 +335,8 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) ...@@ -335,6 +335,8 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
result[i] = cpu_to_be32(creq->state[i]); result[i] = cpu_to_be32(creq->state[i]);
} }
} }
atomic_sub(ahashreq->nbytes, &engine->load);
} }
static void mv_cesa_ahash_prepare(struct crypto_async_request *req, static void mv_cesa_ahash_prepare(struct crypto_async_request *req,
...@@ -365,7 +367,6 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) ...@@ -365,7 +367,6 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req)
static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = {
.step = mv_cesa_ahash_step, .step = mv_cesa_ahash_step,
.process = mv_cesa_ahash_process, .process = mv_cesa_ahash_process,
.prepare = mv_cesa_ahash_prepare,
.cleanup = mv_cesa_ahash_req_cleanup, .cleanup = mv_cesa_ahash_req_cleanup,
.complete = mv_cesa_ahash_complete, .complete = mv_cesa_ahash_complete,
}; };
...@@ -682,13 +683,13 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) ...@@ -682,13 +683,13 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached)
return ret; return ret;
} }
static int mv_cesa_ahash_update(struct ahash_request *req) static int mv_cesa_ahash_queue_req(struct ahash_request *req)
{ {
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
struct mv_cesa_engine *engine;
bool cached = false; bool cached = false;
int ret; int ret;
creq->len += req->nbytes;
ret = mv_cesa_ahash_req_init(req, &cached); ret = mv_cesa_ahash_req_init(req, &cached);
if (ret) if (ret)
return ret; return ret;
...@@ -696,61 +697,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req) ...@@ -696,61 +697,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req)
if (cached) if (cached)
return 0; return 0;
engine = mv_cesa_select_engine(req->nbytes);
mv_cesa_ahash_prepare(&req->base, engine);
ret = mv_cesa_queue_req(&req->base, &creq->base); ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret)) if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ahash_cleanup(req); mv_cesa_ahash_cleanup(req);
return ret; return ret;
} }
static int mv_cesa_ahash_update(struct ahash_request *req)
{
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
creq->len += req->nbytes;
return mv_cesa_ahash_queue_req(req);
}
static int mv_cesa_ahash_final(struct ahash_request *req) static int mv_cesa_ahash_final(struct ahash_request *req)
{ {
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
bool cached = false;
int ret;
mv_cesa_set_mac_op_total_len(tmpl, creq->len); mv_cesa_set_mac_op_total_len(tmpl, creq->len);
creq->last_req = true; creq->last_req = true;
req->nbytes = 0; req->nbytes = 0;
ret = mv_cesa_ahash_req_init(req, &cached); return mv_cesa_ahash_queue_req(req);
if (ret)
return ret;
if (cached)
return 0;
ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ahash_cleanup(req);
return ret;
} }
static int mv_cesa_ahash_finup(struct ahash_request *req) static int mv_cesa_ahash_finup(struct ahash_request *req)
{ {
struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(req);
struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl;
bool cached = false;
int ret;
creq->len += req->nbytes; creq->len += req->nbytes;
mv_cesa_set_mac_op_total_len(tmpl, creq->len); mv_cesa_set_mac_op_total_len(tmpl, creq->len);
creq->last_req = true; creq->last_req = true;
ret = mv_cesa_ahash_req_init(req, &cached); return mv_cesa_ahash_queue_req(req);
if (ret)
return ret;
if (cached)
return 0;
ret = mv_cesa_queue_req(&req->base, &creq->base);
if (mv_cesa_req_needs_cleanup(&req->base, ret))
mv_cesa_ahash_cleanup(req);
return ret;
} }
static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, static int mv_cesa_ahash_export(struct ahash_request *req, void *hash,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment