Commit 9dd6956b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-6.4/block-2023-04-21' of git://git.kernel.dk/linux

Pull block updates from Jens Axboe:

 - drbd patches, bringing us closer to unifying the out-of-tree version
   and the in tree one (Andreas, Christoph)

 - support for auto-quiesce for the s390 dasd driver (Stefan)

 - MD pull request via Song:
      - md/bitmap: Optimal last page size (Jon Derrick)
      - Various raid10 fixes (Yu Kuai, Li Nan)
      - md: add error_handlers for raid0 and linear (Mariusz Tkaczyk)

 - NVMe pull request via Christoph:
      - Drop redundant pci_enable_pcie_error_reporting (Bjorn Helgaas)
      - Validate nvmet module parameters (Chaitanya Kulkarni)
      - Fence TCP socket on receive error (Chris Leech)
      - Fix async event trace event (Keith Busch)
      - Minor cleanups (Chaitanya Kulkarni, zhenwei pi)
      - Fix and cleanup nvmet Identify handling (Damien Le Moal,
        Christoph Hellwig)
      - Fix double blk_mq_complete_request race in the timeout handler
        (Lei Yin)
      - Fix irq lockin...
parents 5b9a7bb7 55793ea5
......@@ -336,18 +336,11 @@ What: /sys/block/<disk>/queue/io_poll_delay
Date: November 2016
Contact: linux-block@vger.kernel.org
Description:
[RW] If polling is enabled, this controls what kind of polling
will be performed. It defaults to -1, which is classic polling.
[RW] This was used to control what kind of polling will be
performed. It is now fixed to -1, which is classic polling.
In this mode, the CPU will repeatedly ask for completions
without giving up any time. If set to 0, a hybrid polling mode
is used, where the kernel will attempt to make an educated guess
at when the IO will complete. Based on this guess, the kernel
will put the process issuing IO to sleep for an amount of time,
before entering a classic poll loop. This mode might be a little
slower than pure classic polling, but it will be more efficient.
If set to a value larger than 0, the kernel will put the process
issuing IO to sleep for this amount of microseconds before
entering classic polling.
without giving up any time.
<deprecated>
What: /sys/block/<disk>/queue/io_timeout
......
......@@ -270,8 +270,7 @@ Request queue based layered devices like dm-rq that wish to support inline
encryption need to create their own blk_crypto_profile for their request_queue,
and expose whatever functionality they choose. When a layered device wants to
pass a clone of that request to another request_queue, blk-crypto will
initialize and prepare the clone as necessary; see
``blk_crypto_insert_cloned_request()``.
initialize and prepare the clone as necessary.
Interaction between inline encryption and blk integrity
=======================================================
......
......@@ -52,6 +52,14 @@ Available fault injection capabilities
status code is NVME_SC_INVALID_OPCODE with no retry. The status code and
retry flag can be set via the debugfs.
- Null test block driver fault injection
inject IO timeouts by setting config items under
/sys/kernel/config/nullb/<disk>/timeout_inject,
inject requeue requests by setting config items under
/sys/kernel/config/nullb/<disk>/requeue_inject, and
inject init_hctx() errors by setting config items under
/sys/kernel/config/nullb/<disk>/init_hctx_fault_inject.
Configure fault-injection capabilities behavior
-----------------------------------------------
......
......@@ -78,6 +78,7 @@ typedef struct dasd_information2_t {
* 0x040: give access to raw eckd data
* 0x080: enable discard support
* 0x100: enable autodisable for IFCC errors (default)
* 0x200: enable requeue of all requests on autoquiesce
*/
#define DASD_FEATURE_READONLY 0x001
#define DASD_FEATURE_USEDIAG 0x002
......@@ -88,6 +89,7 @@ typedef struct dasd_information2_t {
#define DASD_FEATURE_USERAW 0x040
#define DASD_FEATURE_DISCARD 0x080
#define DASD_FEATURE_PATH_AUTODISABLE 0x100
#define DASD_FEATURE_REQUEUEQUIESCE 0x200
#define DASD_FEATURE_DEFAULT DASD_FEATURE_PATH_AUTODISABLE
#define DASD_PARTN_BITS 2
......
......@@ -215,11 +215,6 @@ config BLK_MQ_VIRTIO
depends on VIRTIO
default y
config BLK_MQ_RDMA
bool
depends on INFINIBAND
default y
config BLK_PM
def_bool PM
......
......@@ -30,7 +30,6 @@ obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
......
......@@ -419,6 +419,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
bdev->bd_inode = inode;
bdev->bd_queue = disk->queue;
bdev->bd_stats = alloc_percpu(struct disk_stats);
bdev->bd_has_submit_bio = false;
if (!bdev->bd_stats) {
iput(inode);
return NULL;
......
......@@ -497,15 +497,9 @@ static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp)
bgd = kzalloc(sizeof(*bgd), gfp);
if (!bgd)
return NULL;
return &bgd->pd;
}
static void bfq_cpd_init(struct blkcg_policy_data *cpd)
{
struct bfq_group_data *d = cpd_to_bfqgd(cpd);
d->weight = cgroup_subsys_on_dfl(io_cgrp_subsys) ?
CGROUP_WEIGHT_DFL : BFQ_WEIGHT_LEGACY_DFL;
bgd->weight = CGROUP_WEIGHT_DFL;
return &bgd->pd;
}
static void bfq_cpd_free(struct blkcg_policy_data *cpd)
......@@ -1111,9 +1105,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
struct bfq_group *bfqg;
u64 v;
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx);
if (ret)
return ret;
goto out;
if (sscanf(ctx.body, "%llu", &v) == 1) {
/* require "default" on dfl */
......@@ -1135,7 +1131,7 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
ret = 0;
}
out:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
......@@ -1301,8 +1297,6 @@ struct blkcg_policy blkcg_policy_bfq = {
.legacy_cftypes = bfq_blkcg_legacy_files,
.cpd_alloc_fn = bfq_cpd_alloc,
.cpd_init_fn = bfq_cpd_init,
.cpd_bind_fn = bfq_cpd_init,
.cpd_free_fn = bfq_cpd_free,
.pd_alloc_fn = bfq_pd_alloc,
......
......@@ -129,7 +129,6 @@
#include "elevator.h"
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
#include "bfq-iosched.h"
#include "blk-wbt.h"
......@@ -649,6 +648,8 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
sched_data->service_tree[i].wsum;
}
}
if (!wsum)
continue;
limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
if (entity->allocated >= limit) {
bfq_log_bfqq(bfqq->bfqd, bfqq,
......@@ -6232,7 +6233,7 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
static struct bfq_queue *bfq_init_rq(struct request *rq);
static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head)
blk_insert_t flags)
{
struct request_queue *q = hctx->queue;
struct bfq_data *bfqd = q->elevator->elevator_data;
......@@ -6255,11 +6256,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
trace_block_rq_insert(rq);
if (!bfqq || at_head) {
if (at_head)
list_add(&rq->queuelist, &bfqd->dispatch);
else
list_add_tail(&rq->queuelist, &bfqd->dispatch);
if (flags & BLK_MQ_INSERT_AT_HEAD) {
list_add(&rq->queuelist, &bfqd->dispatch);
} else if (!bfqq) {
list_add_tail(&rq->queuelist, &bfqd->dispatch);
} else {
idle_timer_disabled = __bfq_insert_request(bfqd, rq);
/*
......@@ -6289,14 +6289,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
}
static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,
struct list_head *list, bool at_head)
struct list_head *list,
blk_insert_t flags)
{
while (!list_empty(list)) {
struct request *rq;
rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist);
bfq_insert_request(hctx, rq, at_head);
bfq_insert_request(hctx, rq, flags);
}
}
......
......@@ -20,7 +20,6 @@
#define BFQ_DEFAULT_QUEUE_IOPRIO 4
#define BFQ_WEIGHT_LEGACY_DFL 100
#define BFQ_DEFAULT_GRP_IOPRIO 0
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
......
......@@ -33,7 +33,6 @@
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"
#include "blk-rq-qos.h"
/*
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
......@@ -693,69 +692,93 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
/**
* blkcg_conf_open_bdev - parse and open bdev for per-blkg config update
* @inputp: input string pointer
* blkg_conf_init - initialize a blkg_conf_ctx
* @ctx: blkg_conf_ctx to initialize
* @input: input string
*
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update
* from @input and get and return the matching bdev. *@inputp is
* updated to point past the device node prefix. Returns an ERR_PTR()
* value on error.
* Initialize @ctx which can be used to parse blkg config input string @input.
* Once initialized, @ctx can be used with blkg_conf_open_bdev() and
* blkg_conf_prep(), and must be cleaned up with blkg_conf_exit().
*/
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input)
{
*ctx = (struct blkg_conf_ctx){ .input = input };
}
EXPORT_SYMBOL_GPL(blkg_conf_init);
/**
* blkg_conf_open_bdev - parse and open bdev for per-blkg config update
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Use this function iff blkg_conf_prep() can't be used for some reason.
* Parse the device node prefix part, MAJ:MIN, of per-blkg config update from
* @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is
* set to point past the device node prefix.
*
* This function may be called multiple times on @ctx and the extra calls become
* NOOPs. blkg_conf_prep() implicitly calls this function. Use this function
* explicitly if bdev access is needed without resolving the blkcg / policy part
* of @ctx->input. Returns -errno on error.
*/
struct block_device *blkcg_conf_open_bdev(char **inputp)
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
{
char *input = *inputp;
char *input = ctx->input;
unsigned int major, minor;
struct block_device *bdev;
int key_len;
if (ctx->bdev)
return 0;
if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
return ERR_PTR(-EINVAL);
return -EINVAL;
input += key_len;
if (!isspace(*input))
return ERR_PTR(-EINVAL);
return -EINVAL;
input = skip_spaces(input);
bdev = blkdev_get_no_open(MKDEV(major, minor));
if (!bdev)
return ERR_PTR(-ENODEV);
return -ENODEV;
if (bdev_is_partition(bdev)) {
blkdev_put_no_open(bdev);
return ERR_PTR(-ENODEV);
return -ENODEV;
}
*inputp = input;
return bdev;
ctx->body = input;
ctx->bdev = bdev;
return 0;
}
/**
* blkg_conf_prep - parse and prepare for per-blkg config update
* @blkcg: target block cgroup
* @pol: target policy
* @input: input string
* @ctx: blkg_conf_ctx to be filled
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Parse per-blkg config update from @ctx->input and initialize @ctx
* accordingly. On success, @ctx->body points to the part of @ctx->input
* following MAJ:MIN, @ctx->bdev points to the target block device and
* @ctx->blkg to the blkg being configured.
*
* Parse per-blkg config update from @input and initialize @ctx with the
* result. @ctx->blkg points to the blkg to be updated and @ctx->body the
* part of @input following MAJ:MIN. This function returns with RCU read
* lock and queue lock held and must be paired with blkg_conf_finish().
* blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this
* function returns with queue lock held and must be followed by
* blkg_conf_exit().
*/
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx)
__acquires(rcu) __acquires(&bdev->bd_queue->queue_lock)
struct blkg_conf_ctx *ctx)
__acquires(&bdev->bd_queue->queue_lock)
{
struct block_device *bdev;
struct gendisk *disk;
struct request_queue *q;
struct blkcg_gq *blkg;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
disk = bdev->bd_disk;
ret = blkg_conf_open_bdev(ctx);
if (ret)
return ret;
disk = ctx->bdev->bd_disk;
q = disk->queue;
/*
......@@ -766,7 +789,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
if (ret)
goto fail;
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol)) {
......@@ -795,7 +817,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
/* Drop locks to do new blkg allocation with GFP_KERNEL. */
spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
new_blkg = blkg_alloc(pos, disk, GFP_KERNEL);
if (unlikely(!new_blkg)) {
......@@ -809,7 +830,6 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
goto fail_exit_queue;
}
rcu_read_lock();
spin_lock_irq(&q->queue_lock);
if (!blkcg_policy_enabled(q, pol)) {
......@@ -836,20 +856,16 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
}
success:
blk_queue_exit(q);
ctx->bdev = bdev;
ctx->blkg = blkg;
ctx->body = input;
return 0;
fail_preloaded:
radix_tree_preload_end();
fail_unlock:
spin_unlock_irq(&q->queue_lock);
rcu_read_unlock();
fail_exit_queue:
blk_queue_exit(q);
fail:
blkdev_put_no_open(bdev);
/*
* If queue was bypassing, we should retry. Do so after a
* short msleep(). It isn't strictly necessary but queue
......@@ -865,20 +881,27 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
EXPORT_SYMBOL_GPL(blkg_conf_prep);
/**
* blkg_conf_finish - finish up per-blkg config update
* @ctx: blkg_conf_ctx initialized by blkg_conf_prep()
* blkg_conf_exit - clean up per-blkg config update
* @ctx: blkg_conf_ctx initialized with blkg_conf_init()
*
* Finish up after per-blkg config update. This function must be paired
* with blkg_conf_prep().
* Clean up after per-blkg config update. This function must be called on all
* blkg_conf_ctx's initialized with blkg_conf_init().
*/
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock) __releases(rcu)
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
{
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
rcu_read_unlock();
blkdev_put_no_open(ctx->bdev);
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
ctx->blkg = NULL;
}
if (ctx->bdev) {
blkdev_put_no_open(ctx->bdev);
ctx->body = NULL;
ctx->bdev = NULL;
}
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
EXPORT_SYMBOL_GPL(blkg_conf_exit);
static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
{
......@@ -1289,8 +1312,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
blkcg->cpd[i] = cpd;
cpd->blkcg = blkcg;
cpd->plid = i;
if (pol->cpd_init_fn)
pol->cpd_init_fn(cpd);
}
spin_lock_init(&blkcg->lock);
......@@ -1368,14 +1389,8 @@ int blkcg_init_disk(struct gendisk *disk)
if (ret)
goto err_ioprio_exit;
ret = blk_iolatency_init(disk);
if (ret)
goto err_throtl_exit;
return 0;
err_throtl_exit:
blk_throtl_exit(disk);
err_ioprio_exit:
blk_ioprio_exit(disk);
err_destroy_all:
......@@ -1391,30 +1406,9 @@ int blkcg_init_disk(struct gendisk *disk)
void blkcg_exit_disk(struct gendisk *disk)
{
blkg_destroy_all(disk);
rq_qos_exit(disk->queue);
blk_throtl_exit(disk);
}
static void blkcg_bind(struct cgroup_subsys_state *root_css)
{
int i;
mutex_lock(&blkcg_pol_mutex);
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkcg *blkcg;
if (!pol || !pol->cpd_bind_fn)
continue;
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node)
if (blkcg->cpd[pol->plid])
pol->cpd_bind_fn(blkcg->cpd[pol->plid]);
}
mutex_unlock(&blkcg_pol_mutex);
}
static void blkcg_exit(struct task_struct *tsk)
{
if (tsk->throttle_disk)
......@@ -1428,7 +1422,6 @@ struct cgroup_subsys io_cgrp_subsys = {
.css_offline = blkcg_css_offline,
.css_free = blkcg_css_free,
.css_rstat_flush = blkcg_rstat_flush,
.bind = blkcg_bind,
.dfl_cftypes = blkcg_files,
.legacy_cftypes = blkcg_legacy_files,
.legacy_name = "blkio",
......@@ -1666,8 +1659,6 @@ int blkcg_policy_register(struct blkcg_policy *pol)
blkcg->cpd[pol->plid] = cpd;
cpd->blkcg = blkcg;
cpd->plid = pol->plid;
if (pol->cpd_init_fn)
pol->cpd_init_fn(cpd);
}
}
......
......@@ -174,9 +174,7 @@ struct blkcg_policy {
/* operations */
blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
blkcg_pol_init_cpd_fn *cpd_init_fn;
blkcg_pol_free_cpd_fn *cpd_free_fn;
blkcg_pol_bind_cpd_fn *cpd_bind_fn;
blkcg_pol_alloc_pd_fn *pd_alloc_fn;
blkcg_pol_init_pd_fn *pd_init_fn;
......@@ -209,15 +207,17 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
struct blkg_conf_ctx {
char *input;
char *body;
struct block_device *bdev;
struct blkcg_gq *blkg;
char *body;
};
struct block_device *blkcg_conf_open_bdev(char **inputp);
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
......
......@@ -263,13 +263,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
static void blk_free_queue(struct request_queue *q)
{
if (q->poll_stat)
blk_stat_remove_callback(q, q->poll_cb);
blk_stat_free_callback(q->poll_cb);
blk_free_queue_stats(q->stats);
kfree(q->poll_stat);
if (queue_is_mq(q))
blk_mq_release(q);
......@@ -593,14 +587,14 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
static void __submit_bio(struct bio *bio)
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
if (unlikely(!blk_crypto_bio_prep(&bio)))
return;
if (!disk->fops->submit_bio) {
if (!bio->bi_bdev->bd_has_submit_bio) {
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
disk->fops->submit_bio(bio);
blk_queue_exit(disk->queue);
}
......@@ -704,7 +698,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
*/
if (current->bio_list)
bio_list_add(&current->bio_list[0], bio);
else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
else if (!bio->bi_bdev->bd_has_submit_bio)
__submit_bio_noacct_mq(bio);
else
__submit_bio_noacct(bio);
......
......@@ -65,6 +65,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
return rq->crypt_ctx;
}
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
{
return rq->crypt_keyslot;
}
blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
const struct blk_crypto_key *key,
struct blk_crypto_keyslot **slot_ptr);
......@@ -119,6 +124,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq)
return false;
}
static inline bool blk_crypto_rq_has_keyslot(struct request *rq)
{
return false;
}
#endif /* CONFIG_BLK_INLINE_ENCRYPTION */
void __bio_crypt_advance(struct bio *bio, unsigned int bytes);
......@@ -153,14 +163,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr)
return true;
}
blk_status_t __blk_crypto_init_request(struct request *rq);
static inline blk_status_t blk_crypto_init_request(struct request *rq)
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq);
static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq)
{
if (blk_crypto_rq_is_encrypted(rq))
return __blk_crypto_init_request(rq);
return __blk_crypto_rq_get_keyslot(rq);
return BLK_STS_OK;
}
void __blk_crypto_rq_put_keyslot(struct request *rq);
static inline void blk_crypto_rq_put_keyslot(struct request *rq)
{
if (blk_crypto_rq_has_keyslot(rq))
__blk_crypto_rq_put_keyslot(rq);
}
void __blk_crypto_free_request(struct request *rq);
static inline void blk_crypto_free_request(struct request *rq)
{
......@@ -188,21 +205,6 @@ static inline int blk_crypto_rq_bio_prep(struct request *rq, struct bio *bio,
return 0;
}
/**
* blk_crypto_insert_cloned_request - Prepare a cloned request to be inserted
* into a request queue.
* @rq: the request being queued
*
* Return: BLK_STS_OK on success, nonzero on error.
*/
static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq)
{
if (blk_crypto_rq_is_encrypted(rq))
return blk_crypto_init_request(rq);
return BLK_STS_OK;
}
#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK
int blk_crypto_fallback_start_using_mode(enum blk_crypto_mode_num mode_num);
......
......@@ -227,14 +227,13 @@ EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index);
* @profile: the crypto profile of the device the key will be used on
* @key: the key that will be used
* @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct
* will be stored here; otherwise NULL will be stored here.
* will be stored here. blk_crypto_put_keyslot() must be called
* later to release it. Otherwise, NULL will be stored here.
*
* If the device has keyslots, this gets a keyslot that's been programmed with
* the specified key. If the key is already in a slot, this reuses it;
* otherwise this waits for a slot to become idle and programs the key into it.
*
* This must be paired with a call to blk_crypto_put_keyslot().
*
* Context: Process context. Takes and releases profile->lock.
* Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or
* one wasn't needed; or a blk_status_t error on failure.
......@@ -312,20 +311,15 @@ blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
/**
* blk_crypto_put_keyslot() - Release a reference to a keyslot
* @slot: The keyslot to release the reference of (may be NULL).
* @slot: The keyslot to release the reference of
*
* Context: Any context.
*/
void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot)
{
struct blk_crypto_profile *profile;
struct blk_crypto_profile *profile = slot->profile;
unsigned long flags;
if (!slot)
return;
profile = slot->profile;
if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
&profile->idle_slots_lock, flags)) {
list_add_tail(&slot->idle_slot_node, &profile->idle_slots);
......@@ -354,28 +348,16 @@ bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
return true;
}
/**
* __blk_crypto_evict_key() - Evict a key from a device.
* @profile: the crypto profile of the device
* @key: the key to evict. It must not still be used in any I/O.
*
* If the device has keyslots, this finds the keyslot (if any) that contains the
* specified key and calls the driver's keyslot_evict function to evict it.
*
* Otherwise, this just calls the driver's keyslot_evict function if it is
* implemented, passing just the key (without any particular keyslot). This
* allows layered devices to evict the key from their underlying devices.
*
* Context: Process context. Takes and releases profile->lock.
* Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
* if the keyslot is still in use, or another -errno value on other
* error.
/*
* This is an internal function that evicts a key from an inline encryption
* device that can be either a real device or the blk-crypto-fallback "device".
* It is used only by blk_crypto_evict_key(); see that function for details.
*/
int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
const struct blk_crypto_key *key)
{
struct blk_crypto_keyslot *slot;
int err = 0;
int err;
if (profile->num_slots == 0) {
if (profile->ll_ops.keyslot_evict) {
......@@ -389,22 +371,30 @@ int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
blk_crypto_hw_enter(profile);
slot = blk_crypto_find_keyslot(profile, key);
if (!slot)
goto out_unlock;
if (!slot) {
/*
* Not an error, since a key not in use by I/O is not guaranteed
* to be in a keyslot. There can be more keys than keyslots.
*/
err = 0;
goto out;
}
if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
/* BUG: key is still in use by I/O */
err = -EBUSY;
goto out_unlock;
goto out_remove;
}
err = profile->ll_ops.keyslot_evict(profile, key,
blk_crypto_keyslot_index(slot));
if (err)
goto out_unlock;
out_remove:
/*
* Callers free the key even on error, so unlink the key from the hash
* table and clear slot->key even on error.
*/
hlist_del(&slot->hash_node);
slot->key = NULL;
err = 0;
out_unlock:
out:
blk_crypto_hw_exit(profile);
return err;
}
......
......@@ -13,6 +13,7 @@
#include <linux/blkdev.h>
#include <linux/blk-crypto-profile.h>
#include <linux/module.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include "blk-crypto-internal.h"
......@@ -224,27 +225,27 @@ static bool bio_crypt_check_alignment(struct bio *bio)
return true;
}
blk_status_t __blk_crypto_init_request(struct request *rq)
blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq)
{
return blk_crypto_get_keyslot(rq->q->crypto_profile,
rq->crypt_ctx->bc_key,
&rq->crypt_keyslot);
}
/**
* __blk_crypto_free_request - Uninitialize the crypto fields of a request.
*
* @rq: The request whose crypto fields to uninitialize.
*
* Completely uninitializes the crypto fields of a request. If a keyslot has
* been programmed into some inline encryption hardware, that keyslot is
* released. The rq->crypt_ctx is also freed.
*/
void __blk_crypto_free_request(struct request *rq)
void __blk_crypto_rq_put_keyslot(struct request *rq)
{
blk_crypto_put_keyslot(rq->crypt_keyslot);
rq->crypt_keyslot = NULL;
}
void __blk_crypto_free_request(struct request *rq)
{
/* The keyslot, if one was needed, should have been released earlier. */
if (WARN_ON_ONCE(rq->crypt_keyslot))
__blk_crypto_rq_put_keyslot(rq);
mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
blk_crypto_rq_set_defaults(rq);
rq->crypt_ctx = NULL;
}
/**
......@@ -399,30 +400,39 @@ int blk_crypto_start_using_key(struct block_device *bdev,
}
/**
* blk_crypto_evict_key() - Evict a key from any inline encryption hardware
* it may have been programmed into
* @bdev: The block_device who's associated inline encryption hardware this key
* might have been programmed into
* @key: The key to evict
* blk_crypto_evict_key() - Evict a blk_crypto_key from a block_device
* @bdev: a block_device on which I/O using the key may have been done
* @key: the key to evict
*
* For a given block_device, this function removes the given blk_crypto_key from
* the keyslot management structures and evicts it from any underlying hardware
* keyslot(s) or blk-crypto-fallback keyslot it may have been programmed into.
*
* Upper layers (filesystems) must call this function to ensure that a key is
* evicted from any hardware that it might have been programmed into. The key
* must not be in use by any in-flight IO when this function is called.
* Upper layers must call this before freeing the blk_crypto_key. It must be
* called for every block_device the key may have been used on. The key must no
* longer be in use by any I/O when this function is called.
*
* Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
* Context: May sleep.
*/
int blk_crypto_evict_key(struct block_device *bdev,
const struct blk_crypto_key *key)
void blk_crypto_evict_key(struct block_device *bdev,
const struct blk_crypto_key *key)
{
struct request_queue *q = bdev_get_queue(bdev);
int err;
if (blk_crypto_config_supported_natively(bdev, &key->crypto_cfg))
return __blk_crypto_evict_key(q->crypto_profile, key);
err = __blk_crypto_evict_key(q->crypto_profile, key);
else
err = blk_crypto_fallback_evict_key(key);
/*
* If the block_device didn't support the key, then blk-crypto-fallback
* may have been used, so try to evict the key from blk-crypto-fallback.
* An error can only occur here if the key failed to be evicted from a
* keyslot (due to a hardware or driver issue) or is allegedly still in
* use by I/O (due to a kernel bug). Even in these cases, the key is
* still unlinked from the keyslot management structures, and the caller
* is allowed and expected to free it right away. There's nothing
* callers can do to handle errors, so just log them and return void.
*/
return blk_crypto_fallback_evict_key(key);
if (err)
pr_warn_ratelimited("%pg: error %d evicting key\n", bdev, err);
}
EXPORT_SYMBOL_GPL(blk_crypto_evict_key);
......@@ -68,12 +68,10 @@
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include <linux/blk-mq.h>
#include <linux/part_stat.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-mq-sched.h"
/* PREFLUSH/FUA sequences */
......@@ -138,11 +136,6 @@ static void blk_flush_restore_request(struct request *rq)
rq->end_io = rq->flush.saved_end_io;
}
static void blk_flush_queue_rq(struct request *rq, bool add_front)
{
blk_mq_add_to_requeue_list(rq, add_front, true);
}
static void blk_account_io_flush(struct request *rq)
{
struct block_device *part = rq->q->disk->part0;
......@@ -195,7 +188,8 @@ static void blk_flush_complete_seq(struct request *rq,
case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
blk_flush_queue_rq(rq, true);
blk_mq_add_to_requeue_list(rq, BLK_MQ_INSERT_AT_HEAD);
blk_mq_kick_requeue_list(q);
break;
case REQ_FSEQ_DONE:
......@@ -352,7 +346,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
smp_wmb();
req_ref_set(flush_rq, 1);
blk_flush_queue_rq(flush_rq, false);
blk_mq_add_to_requeue_list(flush_rq, 0);
blk_mq_kick_requeue_list(q);
}
static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
......@@ -396,6 +391,7 @@ void blk_insert_flush(struct request *rq)
unsigned long fflags = q->queue_flags; /* may change, cache */
unsigned int policy = blk_flush_policy(fflags, rq);
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
/*
* @policy now records what operations need to be done. Adjust
......@@ -432,7 +428,8 @@ void blk_insert_flush(struct request *rq)
*/
if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
blk_mq_request_bypass_insert(rq, false, true);
blk_mq_request_bypass_insert(rq, 0);
blk_mq_run_hw_queue(hctx, false);
return;
}
......
......@@ -3106,9 +3106,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
return nbytes;
}
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx);
if (ret)
return ret;
goto err;
iocg = blkg_to_iocg(ctx.blkg);
......@@ -3127,12 +3129,14 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
weight_updated(iocg, &now);
spin_unlock(&iocg->ioc->lock);
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return nbytes;
einval:
blkg_conf_finish(&ctx);
return -EINVAL;
ret = -EINVAL;
err:
blkg_conf_exit(&ctx);
return ret;
}
static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
......@@ -3189,19 +3193,22 @@ static const match_table_t qos_tokens = {
static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
struct block_device *bdev;
struct blkg_conf_ctx ctx;
struct gendisk *disk;
struct ioc *ioc;
u32 qos[NR_QOS_PARAMS];
bool enable, user;
char *p;
char *body, *p;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
blkg_conf_init(&ctx, input);
disk = bdev->bd_disk;
ret = blkg_conf_open_bdev(&ctx);
if (ret)
goto err;
body = ctx.body;
disk = ctx.bdev->bd_disk;
if (!queue_is_mq(disk->queue)) {
ret = -EOPNOTSUPP;
goto err;
......@@ -3223,7 +3230,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
enable = ioc->enabled;
user = ioc->user_qos_params;
while ((p = strsep(&input, " \t\n"))) {
while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
......@@ -3313,7 +3320,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);
......@@ -3323,7 +3330,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ret = -EINVAL;
err:
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return ret;
}
......@@ -3376,19 +3383,22 @@ static const match_table_t i_lcoef_tokens = {
static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
size_t nbytes, loff_t off)
{
struct block_device *bdev;
struct blkg_conf_ctx ctx;
struct request_queue *q;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
char *p;
char *body, *p;
int ret;
bdev = blkcg_conf_open_bdev(&input);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
blkg_conf_init(&ctx, input);
ret = blkg_conf_open_bdev(&ctx);
if (ret)
goto err;
q = bdev_get_queue(bdev);
body = ctx.body;
q = bdev_get_queue(ctx.bdev);
if (!queue_is_mq(q)) {
ret = -EOPNOTSUPP;
goto err;
......@@ -3396,7 +3406,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(q);
if (!ioc) {
ret = blk_iocost_init(bdev->bd_disk);
ret = blk_iocost_init(ctx.bdev->bd_disk);
if (ret)
goto err;
ioc = q_to_ioc(q);
......@@ -3409,7 +3419,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
memcpy(u, ioc->params.i_lcoefs, sizeof(u));
user = ioc->user_cost_model;
while ((p = strsep(&input, " \t\n"))) {
while ((p = strsep(&body, " \t\n"))) {
substring_t args[MAX_OPT_ARGS];
char buf[32];
int tok;
......@@ -3456,7 +3466,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return nbytes;
einval:
......@@ -3467,7 +3477,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ret = -EINVAL;
err:
blkdev_put_no_open(bdev);
blkg_conf_exit(&ctx);
return ret;
}
......
......@@ -755,7 +755,7 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
}
}
int blk_iolatency_init(struct gendisk *disk)
static int blk_iolatency_init(struct gendisk *disk)
{
struct blk_iolatency *blkiolat;
int ret;
......@@ -824,6 +824,29 @@ static void iolatency_clear_scaling(struct blkcg_gq *blkg)
}
}
static int blk_iolatency_try_init(struct blkg_conf_ctx *ctx)
{
static DEFINE_MUTEX(init_mutex);
int ret;
ret = blkg_conf_open_bdev(ctx);
if (ret)
return ret;
/*
* blk_iolatency_init() may fail after rq_qos_add() succeeds which can
* confuse iolat_rq_qos() test. Make the test and init atomic.
*/
mutex_lock(&init_mutex);
if (!iolat_rq_qos(ctx->bdev->bd_queue))
ret = blk_iolatency_init(ctx->bdev->bd_disk);
mutex_unlock(&init_mutex);
return ret;
}
static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
......@@ -836,9 +859,15 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
u64 oldval;
int ret;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx);
blkg_conf_init(&ctx, buf);
ret = blk_iolatency_try_init(&ctx);
if (ret)
return ret;
goto out;
ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx);
if (ret)
goto out;
iolat = blkg_to_lat(ctx.blkg);
p = ctx.body;
......@@ -874,7 +903,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf,
iolatency_clear_scaling(blkg);
ret = 0;
out:
blkg_conf_finish(&ctx);
blkg_conf_exit(&ctx);
return ret ?: nbytes;
}
......@@ -967,7 +996,7 @@ static void iolatency_pd_init(struct blkg_policy_data *pd)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
struct blkcg_gq *blkg = lat_to_blkg(iolat);
struct rq_qos *rqos = blkcg_rq_qos(blkg->q);
struct rq_qos *rqos = iolat_rq_qos(blkg->q);
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
u64 now = ktime_to_ns(ktime_get());
int cpu;
......
......@@ -867,6 +867,8 @@ static struct request *attempt_merge(struct request_queue *q,
if (!blk_discard_mergable(req))
elv_merge_requests(q, req, next);
blk_crypto_rq_put_keyslot(next);
/*
* 'next' is going away, so update stats accordingly
*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment