Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe: - Dead code removal for loop/sunvdc (Chengguang) - Mark BIDI support for bsg as deprecated, logging a single dmesg warning if anyone is actually using it (Christoph) - blkcg cleanup, killing a dead function and making the tryget_closest variant easier to read (Dennis) - Floppy fixes, one fixing a regression in swim3 (Finn) - lightnvm use-after-free fix (Gustavo) - gdrom leak fix (Wenwen) - a set of drbd updates (Lars, Luc, Nathan, Roland) * tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block: (28 commits) block/swim3: Fix regression on PowerBook G3 block/swim3: Fix -EBUSY error when re-opening device after unmount block/swim3: Remove dead return statement block/amiflop: Don't log error message on invalid ioctl gdrom: fix a memory leak bug lightnvm: pblk: fix use-after-free bug block: sunvdc: remove redundant code block: loop: remove redundant code bsg: deprecate BIDI support in bsg blkcg: remove unused __blkg_release_rcu() blkcg: clean up blkg_tryget_closest() drbd: Change drbd_request_detach_interruptible's return type to int drbd: Avoid Clang warning about pointless switch statment drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire") drbd: skip spurious timeout (ping-timeo) when failing promote drbd: don't retry connection if peers do not agree on "authentication" settings drbd: fix print_st_err()'s prototype to match the definition drbd: avoid spurious self-outdating with concurrent disconnect / down drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix comment typos ...

Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block
Pull more block updates from Jens Axboe: - Dead code removal for loop/sunvdc (Chengguang) - Mark BIDI support for bsg as deprecated, logging a single dmesg warning if anyone is actually using it (Christoph) - blkcg cleanup, killing a dead function and making the tryget_closest variant easier to read (Dennis) - Floppy fixes, one fixing a regression in swim3 (Finn) - lightnvm use-after-free fix (Gustavo) - gdrom leak fix (Wenwen) - a set of drbd updates (Lars, Luc, Nathan, Roland) * tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block: (28 commits) block/swim3: Fix regression on PowerBook G3 block/swim3: Fix -EBUSY error when re-opening device after unmount block/swim3: Remove dead return statement block/amiflop: Don't log error message on invalid ioctl gdrom: fix a memory leak bug lightnvm: pblk: fix use-after-free bug block: sunvdc: remove redundant code block: loop: remove redundant code bsg: deprecate BIDI support in bsg blkcg: remove unused __blkg_release_rcu() blkcg: clean up blkg_tryget_closest() drbd: Change drbd_request_detach_interruptible's return type to int drbd: Avoid Clang warning about pointless switch statment drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire") drbd: skip spurious timeout (ping-timeo) when failing promote drbd: don't retry connection if peers do not agree on "authentication" settings drbd: fix print_st_err()'s prototype to match the definition drbd: avoid spurious self-outdating with concurrent disconnect / down drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix comment typos ...
77d0b194 · Linus Torvalds · b79f9f93 · 427c5ce4 · 77d0b194 · 77d0b194
Commit 77d0b194 authored Jan 02, 2019 by Linus Torvalds
23 changed files
--- a/block/bio.c
+++ b/block/bio.c
@@ -2097,8 +2097,12 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
 */
 void bio_clone_blkg_association(struct bio *dst, struct bio *src)
 {
+	rcu_read_lock();
 	if (src->bi_blkg)
 		__bio_associate_blkg(dst, src->bi_blkg);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
 #endif /* CONFIG_BLK_CGROUP */

--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -438,29 +438,6 @@ static void blkg_destroy_all(struct request_queue *q)
 	spin_unlock_irq(&q->queue_lock);
 }
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-void __blkg_release_rcu(struct rcu_head *rcu_head)
-{
-	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-	/* release the blkcg and parent blkg refs this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-	if (blkg->parent)
-		blkg_put(blkg->parent);
-	wb_congested_put(blkg->wb_congested);
-	blkg_free(blkg);
-}
-EXPORT_SYMBOL_GPL(__blkg_release_rcu);
 static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 			     struct cftype *cftype, u64 val)
 {

--- a/block/bsg.c
+++ b/block/bsg.c
@@ -177,6 +177,10 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 			goto out;
 		}
+		pr_warn_once(
+			"BIDI support in bsg has been deprecated and might be removed. "
+			"Please report your use case to linux-scsi@vger.kernel.org\n");
 		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
 		if (IS_ERR(next_rq)) {
 			ret = PTR_ERR(next_rq);

--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1601,8 +1601,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 			return p->type->read_size;
 #endif
 	default:
-		printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.",
-		       cmd, drive);
 		return -ENOSYS;
 	}
 	return 0;

--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -237,6 +237,8 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
 	seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
 	seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
 	seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+	seq_print_rq_state_bit(m, f & EE_TRIM, &sep, "trim");
+	seq_print_rq_state_bit(m, f & EE_ZEROOUT, &sep, "zero-out");
 	seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
 	seq_putc(m, '\n');
 }

--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -430,7 +430,11 @@ enum {
 	__EE_MAY_SET_IN_SYNC,
 	/* is this a TRIM aka REQ_OP_DISCARD? */
-	__EE_IS_TRIM,
+	__EE_TRIM,
+	/* explicit zero-out requested, or
+	 * our lower level cannot handle trim,
+	 * and we want to fall back to zeroout instead */
+	__EE_ZEROOUT,
 	/* In case a barrier failed,
 	 * we need to resubmit without the barrier flag. */
@@ -472,7 +476,8 @@ enum {
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
-#define EE_IS_TRIM             (1<<__EE_IS_TRIM)
+#define EE_TRIM                (1<<__EE_TRIM)
+#define EE_ZEROOUT             (1<<__EE_ZEROOUT)
 #define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
@@ -1556,6 +1561,8 @@ extern void start_resync_timer_fn(struct timer_list *t);
 extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 /* drbd_receiver.c */
+extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
+		sector_t start, unsigned int nr_sectors, int flags);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_ack_receiver(struct drbd_thread *thi);
 extern void drbd_send_ping_wf(struct work_struct *ws);
@@ -1609,13 +1616,7 @@ static inline void drbd_tcp_quickack(struct socket *sock)
 }
 /* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(struct drbd_device *device,
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
-					sector_t size)
-{
-	/* set_capacity(device->this_bdev->bd_disk, size); */
-	set_capacity(device->vdisk, size);
-	device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
-}
 /*
 * used to submit our private bio

--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1668,7 +1668,11 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
 			(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
 			(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
 			(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
-			(bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
+			(bio_op(bio) == REQ_OP_WRITE_ZEROES ?
+			  ((connection->agreed_features & DRBD_FF_WZEROES) ?
+			   (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
+			   : DP_DISCARD)
+			: 0);
 	else
 		return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
 }
@@ -1712,10 +1716,11 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 	}
 	p->dp_flags = cpu_to_be32(dp_flags);
-	if (dp_flags & DP_DISCARD) {
+	if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
+		enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
 		struct p_trim *t = (struct p_trim*)p;
 		t->size = cpu_to_be32(req->i.size);
-		err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
+		err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
 		goto out;
 	}
 	if (dp_flags & DP_WSAME) {
@@ -2034,6 +2039,21 @@ void drbd_init_set_defaults(struct drbd_device *device)
 	device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
 }
+static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+	/* set_capacity(device->this_bdev->bd_disk, size); */
+	set_capacity(device->vdisk, size);
+	device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+	char ppb[10];
+	_drbd_set_my_capacity(device, size);
+	drbd_info(device, "size = %s (%llu KB)\n",
+		ppsize(ppb, size>>1), (unsigned long long)size>>1);
+}
 void drbd_device_cleanup(struct drbd_device *device)
 {
 	int i;
@@ -2059,7 +2079,7 @@ void drbd_device_cleanup(struct drbd_device *device)
 	}
 	D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
-	drbd_set_my_capacity(device, 0);
+	_drbd_set_my_capacity(device, 0);
 	if (device->bitmap) {
 		/* maybe never allocated. */
 		drbd_bm_resize(device, 0, 1);

--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -127,6 +127,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
 	return 0;
 }
+__printf(2, 3)
+static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
+{
+	va_list args;
+	struct nlattr *nla, *txt;
+	int err = -EMSGSIZE;
+	int len;
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+	if (!nla)
+		return err;
+	txt = nla_reserve(skb, T_info_text, 256);
+	if (!txt) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	}
+	va_start(args, fmt);
+	len = vscnprintf(nla_data(txt), 256, fmt, args);
+	va_end(args);
+	/* maybe: retry with larger reserve, if truncated */
+	txt->nla_len = nla_attr_size(len+1);
+	nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
+	nla_nest_end(skb, nla);
+	return 0;
+}
 /* This would be a good candidate for a "pre_doit" hook,
 * and per-family private info->pointers.
 * But we need to stay compatible with older kernels.
@@ -668,14 +697,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for
 		if (rv == SS_TWO_PRIMARIES) {
 			/* Maybe the peer is detected as dead very soon...
 			   retry at most once more in this case. */
+			if (try < max_tries) {
 				int timeo;
+				try = max_tries - 1;
 				rcu_read_lock();
 				nc = rcu_dereference(connection->net_conf);
 				timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 				rcu_read_unlock();
 				schedule_timeout_interruptible(timeo);
-			if (try < max_tries)
+			}
-				try = max_tries - 1;
 			continue;
 		}
 		if (rv < SS_SUCCESS) {
@@ -921,7 +951,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
 	} prev;
 	sector_t u_size, size;
 	struct drbd_md *md = &device->ldev->md;
-	char ppb[10];
 	void *buffer;
 	int md_moved, la_size_changed;
@@ -999,8 +1028,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
 		/* racy, see comments above. */
 		drbd_set_my_capacity(device, size);
 		md->la_size_sect = size;
-		drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
-		     (unsigned long long)size>>1);
 	}
 	if (rv <= DS_ERROR)
 		goto err_out;
@@ -1234,6 +1261,21 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
 	}
 }
+static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
+{
+	/* Fixup max_write_zeroes_sectors after blk_queue_stack_limits():
+	 * if we can handle "zeroes" efficiently on the protocol,
+	 * we want to do that, even if our backend does not announce
+	 * max_write_zeroes_sectors itself. */
+	struct drbd_connection *connection = first_peer_device(device)->connection;
+	/* If the peer announces WZEROES support, use it.  Otherwise, rather
+	 * send explicit zeroes than rely on some discard-zeroes-data magic. */
+	if (connection->agreed_features & DRBD_FF_WZEROES)
+		q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
+	else
+		q->limits.max_write_zeroes_sectors = 0;
+}
 static void decide_on_write_same_support(struct drbd_device *device,
 			struct request_queue *q,
 			struct request_queue *b, struct o_qlim *o,
@@ -1344,6 +1386,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
 		}
 	}
 	fixup_discard_if_not_supported(q);
+	fixup_write_zeroes(device, q);
 }
 void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
@@ -1514,6 +1557,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
 	}
 }
+static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
+{
+	int err = -EBUSY;
+	if (device->act_log &&
+	    device->act_log->nr_elements == dc->al_extents)
+		return 0;
+	drbd_suspend_io(device);
+	/* If IO completion is currently blocked, we would likely wait
+	 * "forever" for the activity log to become unused. So we don't. */
+	if (atomic_read(&device->ap_bio_cnt))
+		goto out;
+	wait_event(device->al_wait, lc_try_lock(device->act_log));
+	drbd_al_shrink(device);
+	err = drbd_check_al_size(device, dc);
+	lc_unlock(device->act_log);
+	wake_up(&device->al_wait);
+out:
+	drbd_resume_io(device);
+	return err;
+}
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
@@ -1576,15 +1643,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
-	drbd_suspend_io(device);
+	err = disk_opts_check_al_size(device, new_disk_conf);
-	wait_event(device->al_wait, lc_try_lock(device->act_log));
-	drbd_al_shrink(device);
-	err = drbd_check_al_size(device, new_disk_conf);
-	lc_unlock(device->act_log);
-	wake_up(&device->al_wait);
-	drbd_resume_io(device);
 	if (err) {
+		/* Could be just "busy". Ignore?
+		 * Introduce dedicated error code? */
+		drbd_msg_put_info(adm_ctx.reply_skb,
+			"Try again without changing current al-extents setting");
 		retcode = ERR_NOMEM;
 		goto fail_unlock;
 	}
@@ -1934,8 +1998,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
-	if (device->state.conn < C_CONNECTED &&
+	if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
-	    device->state.role == R_PRIMARY && device->ed_uuid &&
+	    (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
            (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
 		drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
 		    (unsigned long long)device->ed_uuid);
@@ -1950,12 +2014,22 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	}
 	/* Prevent shrinking of consistent devices ! */
-	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
+	{
-	    drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
+	unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
-		drbd_warn(device, "refusing to truncate a consistent device\n");
+	unsigned long long eff = nbc->md.la_size_sect;
-		retcode = ERR_DISK_TOO_SMALL;
+	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
+		if (nsz == nbc->disk_conf->disk_size) {
+			drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
+		} else {
+			drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
+			drbd_msg_sprintf_info(adm_ctx.reply_skb,
+				"To-be-attached device has last effective > current size, and is consistent\n"
+				"(%llu > %llu sectors). Refusing to attach.", eff, nsz);
+			retcode = ERR_IMPLICIT_SHRINK;
 			goto force_diskless_dec;
 		}
+	}
+	}
 	lock_all_resources();
 	retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
@@ -2654,8 +2728,10 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
 {
+	enum drbd_conns cstate;
 	enum drbd_state_rv rv;
+repeat:
 	rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
 			force ? CS_HARD : 0);
@@ -2673,6 +2749,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
 		break;
 	case SS_CW_FAILED_BY_PEER:
+		spin_lock_irq(&connection->resource->req_lock);
+		cstate = connection->cstate;
+		spin_unlock_irq(&connection->resource->req_lock);
+		if (cstate <= C_WF_CONNECTION)
+			goto repeat;
 		/* The peer probably wants to see us outdated. */
 		rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
 							disk, D_OUTDATED), 0);

--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -70,6 +70,11 @@ enum drbd_packet {
 	 * we may fall back to an opencoded loop instead. */
 	P_WSAME               = 0x34,
+	/* 0x35 already claimed in DRBD 9 */
+	P_ZEROES              = 0x36, /* data sock: zero-out, WRITE_ZEROES */
+	/* 0x40 .. 0x48 already claimed in DRBD 9 */
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
@@ -130,6 +135,12 @@ struct p_header100 {
 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 #define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 #define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
+#define DP_ZEROES          1024 /* equiv. REQ_OP_WRITE_ZEROES */
+/* possible combinations:
+ * REQ_OP_WRITE_ZEROES:  DP_DISCARD | DP_ZEROES
+ * REQ_OP_WRITE_ZEROES + REQ_NOUNMAP: DP_ZEROES
+ */
 struct p_data {
 	u64	    sector;    /* 64 bits sector number */
@@ -197,6 +208,42 @@ struct p_block_req {
 */
 #define DRBD_FF_WSAME 4
+/* supports REQ_OP_WRITE_ZEROES on the "wire" protocol.
+ *
+ * We used to map that to "discard" on the sending side, and if we cannot
+ * guarantee that discard zeroes data, the receiving side would map discard
+ * back to zero-out.
+ *
+ * With the introduction of REQ_OP_WRITE_ZEROES,
+ * we started to use that for both WRITE_ZEROES and DISCARDS,
+ * hoping that WRITE_ZEROES would "do what we want",
+ * UNMAP if possible, zero-out the rest.
+ *
+ * The example scenario is some LVM "thin" backend.
+ *
+ * While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
+ * with "skip_block_zeroing=true", after a partial block write allocated
+ * that block, that same block may well map "undefined old garbage" from
+ * the backends on LBAs that have not yet been written to.
+ *
+ * If we cannot distinguish between zero-out and discard on the receiving
+ * side, to avoid "undefined old garbage" to pop up randomly at later times
+ * on supposedly zero-initialized blocks, we'd need to map all discards to
+ * zero-out on the receiving side.  But that would potentially do a full
+ * alloc on thinly provisioned backends, even when the expectation was to
+ * unmap/trim/discard/de-allocate.
+ *
+ * We need to distinguish on the protocol level, whether we need to guarantee
+ * zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
+ * or if we want to put the emphasis on discard, and only do a "best effort
+ * zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
+ * only potential unaligned head and tail clippings), to at least *try* to
+ * avoid "false positives" in an online-verify later, hoping that someone
+ * set skip_block_zeroing=false.
+ */
+#define DRBD_FF_WZEROES 8
 struct p_connection_features {
 	u32 protocol_min;
 	u32 feature_flags;

--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -63,7 +63,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
 	drbd_req_make_private_bio(req, bio_src);
 	req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
 		      | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
-		      | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0)
+		      | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
 		      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
 	req->device = device;
 	req->master_bio = bio_src;
@@ -1155,12 +1155,11 @@ static int drbd_process_write_request(struct drbd_request *req)
 	return remote;
 }
-static void drbd_process_discard_req(struct drbd_request *req)
+static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
 {
-	struct block_device *bdev = req->device->ldev->backing_bdev;
+	int err = drbd_issue_discard_or_zero_out(req->device,
+				req->i.sector, req->i.size >> 9, flags);
-	if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
+	if (err)
-			GFP_NOIO, 0))
 		req->private_bio->bi_status = BLK_STS_IOERR;
 	bio_endio(req->private_bio);
 }
@@ -1189,9 +1188,11 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 	if (get_ldev(device)) {
 		if (drbd_insert_fault(device, type))
 			bio_io_error(bio);
-		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
+		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
-			 bio_op(bio) == REQ_OP_DISCARD)
+			drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
-			drbd_process_discard_req(req);
+			    ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
+		else if (bio_op(bio) == REQ_OP_DISCARD)
+			drbd_process_discard_or_zeroes_req(req, EE_TRIM);
 		else
 			generic_make_request(bio);
 		put_ldev(device);

--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -208,6 +208,7 @@ enum drbd_req_state_bits {
 	__RQ_WRITE,
 	__RQ_WSAME,
 	__RQ_UNMAP,
+	__RQ_ZEROES,
 	/* Should call drbd_al_complete_io() for this request... */
 	__RQ_IN_ACT_LOG,
@@ -253,6 +254,7 @@ enum drbd_req_state_bits {
 #define RQ_WRITE           (1UL << __RQ_WRITE)
 #define RQ_WSAME           (1UL << __RQ_WSAME)
 #define RQ_UNMAP           (1UL << __RQ_UNMAP)
+#define RQ_ZEROES          (1UL << __RQ_ZEROES)
 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
 #define RQ_UNPLUG          (1UL << __RQ_UNPLUG)
 #define RQ_POSTPONED	   (1UL << __RQ_POSTPONED)

--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -688,11 +688,9 @@ request_detach(struct drbd_device *device)
 			CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
 }
-enum drbd_state_rv
+int drbd_request_detach_interruptible(struct drbd_device *device)
-drbd_request_detach_interruptible(struct drbd_device *device)
 {
-	enum drbd_state_rv rv;
+	int ret, rv;
-	int ret;
 	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
 	wait_event_interruptible(device->state_wait,
@@ -1124,7 +1122,7 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
 			ns.pdsk = D_UP_TO_DATE;
 	}
-	/* Implications of the connection stat on the disk states */
+	/* Implications of the connection state on the disk states */
 	disk_min = D_DISKLESS;
 	disk_max = D_UP_TO_DATE;
 	pdsk_min = D_INCONSISTENT;
@@ -2109,9 +2107,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 			spin_unlock_irq(&connection->resource->req_lock);
 		}
 	}
-	kref_put(&connection->kref, drbd_destroy_connection);
 	conn_md_sync(connection);
+	kref_put(&connection->kref, drbd_destroy_connection);
 	return 0;
 }

--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -131,7 +131,7 @@ extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state
 					  enum chg_state_flags,
 					  struct completion *done);
 extern void print_st_err(struct drbd_device *, union drbd_state,
-			union drbd_state, int);
+			union drbd_state, enum drbd_state_rv);
 enum drbd_state_rv
 _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
@@ -162,8 +162,7 @@ static inline int drbd_request_state(struct drbd_device *device,
 }
 /* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
-enum drbd_state_rv
+int drbd_request_detach_interruptible(struct drbd_device *device);
-drbd_request_detach_interruptible(struct drbd_device *device);
 enum drbd_role conn_highest_role(struct drbd_connection *connection);
 enum drbd_role conn_highest_peer(struct drbd_connection *connection);

--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -153,7 +153,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
 	do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
 	/* FIXME do we want to detach for failed REQ_OP_DISCARD?
-	 * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
+	 * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */
 	if (peer_req->flags & EE_WAS_ERROR)
 		__drbd_chk_io_error(device, DRBD_WRITE_ERROR);

--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -616,7 +616,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 	default:
 		WARN_ON_ONCE(1);
 		return -EIO;
-		break;
 	}
 }

--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -633,7 +633,6 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
 	case VD_OP_GET_EFI:
 	case VD_OP_SET_EFI:
 		return -EOPNOTSUPP;
-		break;
 	};
 	map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;

--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -995,7 +995,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
 	struct swim3 __iomem *sw = fs->swim3;
 	mutex_lock(&swim3_mutex);
-	if (fs->ref_count > 0 && --fs->ref_count == 0) {
+	if (fs->ref_count > 0)
+		--fs->ref_count;
+	else if (fs->ref_count == -1)
+		fs->ref_count = 0;
+	if (fs->ref_count == 0) {
 		swim3_action(fs, MOTOR_OFF);
 		out_8(&sw->control_bic, 0xff);
 		swim3_select(fs, RELAX);
@@ -1087,8 +1091,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 	struct floppy_state *fs = &floppy_states[index];
 	int rc = -EBUSY;
-	/* Do this first for message macros */
-	memset(fs, 0, sizeof(*fs));
 	fs->mdev = mdev;
 	fs->index = index;
@@ -1151,7 +1153,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 		swim3_err("%s", "Couldn't request interrupt\n");
 		pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
 		goto out_unmap;
-		return -EBUSY;
 	}
 	timer_setup(&fs->timeout, NULL, 0);
@@ -1188,14 +1189,15 @@ static int swim3_attach(struct macio_dev *mdev,
 			return rc;
 	}
-	fs = &floppy_states[floppy_count];
 	disk = alloc_disk(1);
 	if (disk == NULL) {
 		rc = -ENOMEM;
 		goto out_unregister;
 	}
+	fs = &floppy_states[floppy_count];
+	memset(fs, 0, sizeof(*fs));
 	disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
 						BLK_MQ_F_SHOULD_MERGE);
 	if (IS_ERR(disk->queue)) {

--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -873,6 +873,7 @@ static void __exit exit_gdrom(void)
 	platform_device_unregister(pd);
 	platform_driver_unregister(&gdrom_driver);
 	kfree(gd.toc);
+	kfree(gd.cd_info);
 }
 module_init(init_gdrom);

--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -418,7 +418,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	if (ret) {
 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
 		bio_put(bio);
-		bio_put(bio);
 		return ret;
 	}

--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -499,22 +499,33 @@ static inline void blkg_get(struct blkcg_gq *blkg)
 */
 static inline bool blkg_tryget(struct blkcg_gq *blkg)
 {
-	return percpu_ref_tryget(&blkg->refcnt);
+	return blkg && percpu_ref_tryget(&blkg->refcnt);
 }
 /**
 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
 * @blkg: blkg to get
 *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
+ * This needs to be called rcu protected.  As the failure mode here is to walk
- * the blkg that it did association with as it may not be the passed in blkg.
+ * up the blkg tree, this ensure that the blkg->parent pointers are always
+ * valid.  This returns the blkg that it ended up taking a reference on or %NULL
+ * if no reference was taken.
 */
 static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
 {
-	while (blkg && !percpu_ref_tryget(&blkg->refcnt))
+	struct blkcg_gq *ret_blkg = NULL;
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	while (blkg) {
+		if (blkg_tryget(blkg)) {
+			ret_blkg = blkg;
+			break;
+		}
 		blkg = blkg->parent;
+	}
-	return blkg;
+	return ret_blkg;
 }
 /**

--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,7 +51,7 @@
 #endif
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.10"
+#define REL_VERSION "8.4.11"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101

--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -191,6 +191,7 @@ static inline void ct_assert_unique_operations(void)
 {
 	switch (0) {
 #include GENL_MAGIC_INCLUDE_FILE
+	case 0:
 		;
 	}
 }
@@ -209,6 +210,7 @@ static inline void ct_assert_unique_top_level_attributes(void)
 {
 	switch (0) {
 #include GENL_MAGIC_INCLUDE_FILE
+	case 0:
 		;
 	}
 }
@@ -219,6 +221,7 @@ static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
 {									\
 	switch (0) {							\
 	s_fields							\
+	case 0:								\
 			;						\
 	}								\
 }