Merge tag 'block-5.16-2021-11-19' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe: - Flip a cap check to avoid a selinux error (Alistair) - Fix for a regression this merge window where we can miss a queue ref put (me) - Un-mark pstore-blk as broken, as the condition that triggered that change has been rectified (Kees) - Queue quiesce and sync fixes (Ming) - FUA insertion fix (Ming) - blk-cgroup error path put fix (Yu) * tag 'block-5.16-2021-11-19' of git://git.kernel.dk/linux-block: blk-mq: don't insert FUA request with data into scheduler queue blk-cgroup: fix missing put device in error path from blkg_conf_pref() block: avoid to quiesce queue in elevator_init_mq Revert "mark pstore-blk as broken" blk-mq: cancel blk-mq dispatch work in both blk_cleanup_queue and disk_release() block: fix missing queue put in error path block: Check ADMIN before NICE for IOPRIO_CLASS_RT

Merge tag 'block-5.16-2021-11-19' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - Flip a cap check to avoid a selinux error (Alistair) - Fix for a regression this merge window where we can miss a queue ref put (me) - Un-mark pstore-blk as broken, as the condition that triggered that change has been rectified (Kees) - Queue quiesce and sync fixes (Ming) - FUA insertion fix (Ming) - blk-cgroup error path put fix (Yu) * tag 'block-5.16-2021-11-19' of git://git.kernel.dk/linux-block: blk-mq: don't insert FUA request with data into scheduler queue blk-cgroup: fix missing put device in error path from blkg_conf_pref() block: avoid to quiesce queue in elevator_init_mq Revert "mark pstore-blk as broken" blk-mq: cancel blk-mq dispatch work in both blk_cleanup_queue and disk_release() block: fix missing queue put in error path block: Check ADMIN before NICE for IOPRIO_CLASS_RT
61564e7b · Linus Torvalds · b100274c · 2b504bd4 · 61564e7b · 61564e7b
Commit 61564e7b authored Nov 20, 2021 by Linus Torvalds
11 changed files
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -640,7 +640,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	 */
 	ret = blk_queue_enter(q, 0);
 	if (ret)
-		return ret;
+		goto fail;

 	rcu_read_lock();
 	spin_lock_irq(&q->queue_lock);
@@ -676,13 +676,13 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		new_blkg = blkg_alloc(pos, q, GFP_KERNEL);
 		if (unlikely(!new_blkg)) {
 			ret = -ENOMEM;
-			goto fail;
+			goto fail_exit_queue;
 		}

 		if (radix_tree_preload(GFP_KERNEL)) {
 			blkg_free(new_blkg);
 			ret = -ENOMEM;
-			goto fail;
+			goto fail_exit_queue;
 		}

 		rcu_read_lock();
@@ -722,9 +722,10 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 fail_unlock:
 	spin_unlock_irq(&q->queue_lock);
 	rcu_read_unlock();
+fail_exit_queue:
+	blk_queue_exit(q);
 fail:
 	blkdev_put_no_open(bdev);
-	blk_queue_exit(q);
 	/*
 	 * If queue was bypassing, we should retry.  Do so after a
 	 * short msleep().  It isn't strictly necessary but queue

--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -363,8 +363,10 @@ void blk_cleanup_queue(struct request_queue *q)
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);

 	blk_sync_queue(q);
-	if (queue_is_mq(q))
+	if (queue_is_mq(q)) {
+		blk_mq_cancel_work_sync(q);
 		blk_mq_exit_queue(q);
+	}

 	/*
 	 * In theory, request pool of sched_tags belongs to request queue.

--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -379,7 +379,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
 * @rq is being submitted.  Analyze what needs to be done and put it on the
 * right queue.
 */
-bool blk_insert_flush(struct request *rq)
+void blk_insert_flush(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	unsigned long fflags = q->queue_flags;	/* may change, cache */
@@ -409,7 +409,7 @@ bool blk_insert_flush(struct request *rq)
 	 */
 	if (!policy) {
 		blk_mq_end_request(rq, 0);
-		return true;
+		return;
 	}

 	BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */
@@ -420,8 +420,10 @@ bool blk_insert_flush(struct request *rq)
 	 * for normal execution.
 	 */
 	if ((policy & REQ_FSEQ_DATA) &&
-	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH)))
-		return false;
+	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
+		blk_mq_request_bypass_insert(rq, false, true);
+		return;
+	}

 	/*
 	 * @rq should go through flush machinery.  Mark it part of flush
@@ -437,8 +439,6 @@ bool blk_insert_flush(struct request *rq)
 	spin_lock_irq(&fq->mq_flush_lock);
 	blk_flush_complete_seq(rq, fq, REQ_FSEQ_ACTIONS & ~policy, 0);
 	spin_unlock_irq(&fq->mq_flush_lock);
-
-	return true;
 }

 /**

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2543,8 +2543,7 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	return NULL;
 }

-static inline bool blk_mq_can_use_cached_rq(struct request *rq,
-		struct bio *bio)
+static inline bool blk_mq_can_use_cached_rq(struct request *rq, struct bio *bio)
 {
 	if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
 		return false;
@@ -2565,7 +2564,6 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
 	bool checked = false;

 	if (plug) {
-
 		rq = rq_list_peek(&plug->cached_rq);
 		if (rq && rq->q == q) {
 			if (unlikely(!submit_bio_checks(bio)))
@@ -2587,12 +2585,14 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
 fallback:
 	if (unlikely(bio_queue_enter(bio)))
 		return NULL;
-	if (!checked && !submit_bio_checks(bio))
-		return NULL;
+	if (unlikely(!checked && !submit_bio_checks(bio)))
+		goto out_put;
 	rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
-	if (!rq)
-		blk_queue_exit(q);
-	return rq;
+	if (rq)
+		return rq;
+out_put:
+	blk_queue_exit(q);
+	return NULL;
 }

 /**
@@ -2647,8 +2647,10 @@ void blk_mq_submit_bio(struct bio *bio)
 		return;
 	}

-	if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
+	if (op_is_flush(bio->bi_opf)) {
+		blk_insert_flush(rq);
 		return;
+	}

 	if (plug && (q->nr_hw_queues == 1 ||
 	    blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
@@ -4417,6 +4419,19 @@ unsigned int blk_mq_rq_cpu(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_rq_cpu);

+void blk_mq_cancel_work_sync(struct request_queue *q)
+{
+	if (queue_is_mq(q)) {
+		struct blk_mq_hw_ctx *hctx;
+		int i;
+
+		cancel_delayed_work_sync(&q->requeue_work);
+
+		queue_for_each_hw_ctx(q, hctx, i)
+			cancel_delayed_work_sync(&hctx->run_work);
+	}
+}
+
 static int __init blk_mq_init(void)
 {
 	int i;

--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,6 +128,8 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 void blk_mq_free_plug_rqs(struct blk_plug *plug);
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);

+void blk_mq_cancel_work_sync(struct request_queue *q);
+
 void blk_mq_release(struct request_queue *q);

 static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,

--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -791,16 +791,6 @@ static void blk_release_queue(struct kobject *kobj)

 	blk_free_queue_stats(q->stats);

-	if (queue_is_mq(q)) {
-		struct blk_mq_hw_ctx *hctx;
-		int i;
-
-		cancel_delayed_work_sync(&q->requeue_work);
-
-		queue_for_each_hw_ctx(q, hctx, i)
-			cancel_delayed_work_sync(&hctx->run_work);
-	}
-
 	blk_exit_queue(q);

 	blk_queue_free_zone_bitmaps(q);

--- a/block/blk.h
+++ b/block/blk.h
@@ -271,7 +271,7 @@ void __blk_account_io_done(struct request *req, u64 now);
 */
 #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED)

-bool blk_insert_flush(struct request *rq);
+void blk_insert_flush(struct request *rq);

 int elevator_switch_mq(struct request_queue *q,
 			      struct elevator_type *new_e);

--- a/block/elevator.c
+++ b/block/elevator.c
@@ -694,12 +694,18 @@ void elevator_init_mq(struct request_queue *q)
 	if (!e)
 		return;

+	/*
+	 * We are called before adding disk, when there isn't any FS I/O,
+	 * so freezing queue plus canceling dispatch work is enough to
+	 * drain any dispatch activities originated from passthrough
+	 * requests, then no need to quiesce queue which may add long boot
+	 * latency, especially when lots of disks are involved.
+	 */
 	blk_mq_freeze_queue(q);
-	blk_mq_quiesce_queue(q);
+	blk_mq_cancel_work_sync(q);

 	err = blk_mq_init_sched(q, e);

-	blk_mq_unquiesce_queue(q);
 	blk_mq_unfreeze_queue(q);

 	if (err) {

--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1111,6 +1111,8 @@ static void disk_release(struct device *dev)
 	might_sleep();
 	WARN_ON_ONCE(disk_live(disk));

+	blk_mq_cancel_work_sync(disk->queue);
+
 	disk_release_events(disk);
 	kfree(disk->random);
 	xa_destroy(&disk->part_tbl);

--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio)

 	switch (class) {
 		case IOPRIO_CLASS_RT:
-			if (!capable(CAP_SYS_NICE) && !capable(CAP_SYS_ADMIN))
+			/*
+			 * Originally this only checked for CAP_SYS_ADMIN,
+			 * which was implicitly allowed for pid 0 by security
+			 * modules such as SELinux. Make sure we check
+			 * CAP_SYS_ADMIN first to avoid a denial/avc for
+			 * possibly missing CAP_SYS_NICE permission.
+			 */
+			if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
 				return -EPERM;
 			fallthrough;
 			/* rt has prio field too */

--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -173,7 +173,6 @@ config PSTORE_BLK
 	tristate "Log panic/oops to a block device"
 	depends on PSTORE
 	depends on BLOCK
-	depends on BROKEN
 	select PSTORE_ZONE
 	default n
 	help