blkcg: move io_service_bytes and io_serviced stats into blkcg_gq

Currently, both cfq-iosched and blk-throttle keep track of io_service_bytes and io_serviced stats. While keeping track of them separately may be useful during development, it doesn't make much sense otherwise. Also, blk-throttle was counting bio's as IOs while cfq-iosched request's, which is more confusing than informative. This patch adds ->stat_bytes and ->stat_ios to blkg (blkcg_gq), removes the counterparts from cfq-iosched and blk-throttle and let them print from the common blkg counters. The common counters are incremented during bio issue in blkcg_bio_issue_check(). The outputs are still filtered by whether the policy has blkg_policy_data on a given blkg, so cfq's output won't show up if it has never been used for a given blkg. The only times when the outputs would differ significantly are when policies are attached on the fly or elevators are switched back and forth. Those are quite exceptional operations and I don't think they warrant keeping separate counters. v3: Update blkio-controller.txt accordingly. v2: Account IOs during bio issues instead of request completions so that bio-based drivers can be handled the same way. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>

blkcg: move io_service_bytes and io_serviced stats into blkcg_gq
Currently, both cfq-iosched and blk-throttle keep track of io_service_bytes and io_serviced stats. While keeping track of them separately may be useful during development, it doesn't make much sense otherwise. Also, blk-throttle was counting bio's as IOs while cfq-iosched request's, which is more confusing than informative. This patch adds ->stat_bytes and ->stat_ios to blkg (blkcg_gq), removes the counterparts from cfq-iosched and blk-throttle and let them print from the common blkg counters. The common counters are incremented during bio issue in blkcg_bio_issue_check(). The outputs are still filtered by whether the policy has blkg_policy_data on a given blkg, so cfq's output won't show up if it has never been used for a given blkg. The only times when the outputs would differ significantly are when policies are attached on the fly or elevators are switched back and forth. Those are quite exceptional operations and I don't think they warrant keeping separate counters. v3: Update blkio-controller.txt accordingly. v2: Account IOs during bio issues instead of request completions so that bio-based drivers can be handled the same way. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>
77ea7338 · Tejun Heo · Jens Axboe · f12c74ca · 77ea7338 · 77ea7338
Commit 77ea7338 authored Aug 18, 2015 by Tejun Heo Committed by Jens Axboe Aug 18, 2015
5 changed files
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -201,7 +201,7 @@ Proportional weight policy files
 	  specifies the number of bytes.

 - blkio.io_serviced
-	- Number of IOs completed to/from the disk by the group. These
+	- Number of IOs (bio) issued to the disk by the group. These
 	  are further divided by the type of operation - read or write, sync
 	  or async. First two fields specify the major and minor number of the
 	  device, third field specifies the operation type and the fourth field
@@ -327,18 +327,11 @@ Note: If both BW and IOPS rules are specified for a device, then IO is
      subjected to both the constraints.

 - blkio.throttle.io_serviced
-	- Number of IOs (bio) completed to/from the disk by the group (as
-	  seen by throttling policy). These are further divided by the type
-	  of operation - read or write, sync or async. First two fields specify
-	  the major and minor number of the device, third field specifies the
-	  operation type and the fourth field specifies the number of IOs.
-
-	  blkio.io_serviced does accounting as seen by CFQ and counts are in
-	  number of requests (struct request). On the other hand,
-	  blkio.throttle.io_serviced counts number of IO in terms of number
-	  of bios as seen by throttling policy.  These bios can later be
-	  merged by elevator and total number of requests completed can be
-	  lesser.
+	- Number of IOs (bio) issued to the disk by the group. These
+	  are further divided by the type of operation - read or write, sync
+	  or async. First two fields specify the major and minor number of the
+	  device, third field specifies the operation type and the fourth field
+	  specifies the number of IOs.

 - blkio.throttle.io_service_bytes
 	- Number of bytes transferred to/from the disk by the group. These
@@ -347,11 +340,6 @@ Note: If both BW and IOPS rules are specified for a device, then IO is
 	  device, third field specifies the operation type and the fourth field
 	  specifies the number of bytes.

-	  These numbers should roughly be same as blkio.io_service_bytes as
-	  updated by CFQ. The difference between two is that
-	  blkio.io_service_bytes will not be updated if CFQ is not operating
-	  on request queue.
-
 Common files among various policies
 -----------------------------------
 - blkio.reset_stats

--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -73,6 +73,9 @@ static void blkg_free(struct blkcg_gq *blkg)

 	if (blkg->blkcg != &blkcg_root)
 		blk_exit_rl(&blkg->rl);
+
+	blkg_rwstat_exit(&blkg->stat_ios);
+	blkg_rwstat_exit(&blkg->stat_bytes);
 	kfree(blkg);
 }

@@ -95,6 +98,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	if (!blkg)
 		return NULL;

+	if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
+	    blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
+		goto err_free;
+
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
@@ -300,6 +307,7 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
+	struct blkcg_gq *parent = blkg->parent;
 	int i;

 	lockdep_assert_held(blkg->q->queue_lock);
@@ -315,6 +323,12 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 		if (blkg->pd[i] && pol->pd_offline_fn)
 			pol->pd_offline_fn(blkg->pd[i]);
 	}
+
+	if (parent) {
+		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
+		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
+	}
+
 	blkg->online = false;

 	radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
@@ -431,6 +445,9 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
 	 * anyway.  If you get hit by a race, retry.
 	 */
 	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+		blkg_rwstat_reset(&blkg->stat_bytes);
+		blkg_rwstat_reset(&blkg->stat_ios);
+
 		for (i = 0; i < BLKCG_MAX_POLS; i++) {
 			struct blkcg_policy *pol = blkcg_policy[i];

@@ -579,6 +596,87 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 }
 EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);

+static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
+				    struct blkg_policy_data *pd, int off)
+{
+	struct blkg_rwstat rwstat = blkg_rwstat_read((void *)pd->blkg + off);
+
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/**
+ * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
+ * @sf: seq_file to print to
+ * @v: unused
+ *
+ * To be used as cftype->seq_show to print blkg->stat_bytes.
+ * cftype->private must be set to the blkcg_policy.
+ */
+int blkg_print_stat_bytes(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
+			  offsetof(struct blkcg_gq, stat_bytes), true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
+
+/**
+ * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
+ * @sf: seq_file to print to
+ * @v: unused
+ *
+ * To be used as cftype->seq_show to print blkg->stat_ios.  cftype->private
+ * must be set to the blkcg_policy.
+ */
+int blkg_print_stat_ios(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
+			  offsetof(struct blkcg_gq, stat_ios), true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
+
+static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
+					      struct blkg_policy_data *pd,
+					      int off)
+{
+	struct blkg_rwstat rwstat = blkg_rwstat_recursive_sum(pd->blkg,
+							      NULL, off);
+	return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+
+/**
+ * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
+ * @sf: seq_file to print to
+ * @v: unused
+ */
+int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  blkg_prfill_rwstat_field_recursive,
+			  (void *)seq_cft(sf)->private,
+			  offsetof(struct blkcg_gq, stat_bytes), true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
+
+/**
+ * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
+ * @sf: seq_file to print to
+ * @v: unused
+ */
+int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
+{
+	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+			  blkg_prfill_rwstat_field_recursive,
+			  (void *)seq_cft(sf)->private,
+			  offsetof(struct blkcg_gq, stat_ios), true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
+
 /**
 * blkg_stat_recursive_sum - collect hierarchical blkg_stat
 * @blkg: blkg of interest

--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -133,11 +133,6 @@ struct throtl_grp {
 	/* When did we start a new slice */
 	unsigned long slice_start[2];
 	unsigned long slice_end[2];
-
-	/* total bytes transferred */
-	struct blkg_rwstat		service_bytes;
-	/* total IOs serviced, post merge */
-	struct blkg_rwstat		serviced;
 };

 struct throtl_data
@@ -335,11 +330,7 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)

 	tg = kzalloc_node(sizeof(*tg), gfp, node);
 	if (!tg)
-		goto err;
-
-	if (blkg_rwstat_init(&tg->service_bytes, gfp) ||
-	    blkg_rwstat_init(&tg->serviced, gfp))
-		goto err_free_tg;
+		return NULL;

 	throtl_service_queue_init(&tg->service_queue);

@@ -355,13 +346,6 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
 	tg->iops[WRITE] = -1;

 	return &tg->pd;
-
-err_free_tg:
-	blkg_rwstat_exit(&tg->serviced);
-	blkg_rwstat_exit(&tg->service_bytes);
-	kfree(tg);
-err:
-	return NULL;
 }

 static void throtl_pd_init(struct blkg_policy_data *pd)
@@ -419,19 +403,9 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
 	struct throtl_grp *tg = pd_to_tg(pd);

 	del_timer_sync(&tg->service_queue.pending_timer);
-	blkg_rwstat_exit(&tg->serviced);
-	blkg_rwstat_exit(&tg->service_bytes);
 	kfree(tg);
 }

-static void throtl_pd_reset_stats(struct blkg_policy_data *pd)
-{
-	struct throtl_grp *tg = pd_to_tg(pd);
-
-	blkg_rwstat_reset(&tg->service_bytes);
-	blkg_rwstat_reset(&tg->serviced);
-}
-
 static struct throtl_grp *
 throtl_rb_first(struct throtl_service_queue *parent_sq)
 {
@@ -839,25 +813,6 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 	return 0;
 }

-static void throtl_update_dispatch_stats(struct blkcg_gq *blkg, u64 bytes,
-					 int rw)
-{
-	struct throtl_grp *tg = blkg_to_tg(blkg);
-	unsigned long flags;
-
-	/*
-	 * Disabling interrupts to provide mutual exclusion between two
-	 * writes on same cpu. It probably is not needed for 64bit. Not
-	 * optimizing that case yet.
-	 */
-	local_irq_save(flags);
-
-	blkg_rwstat_add(&tg->serviced, rw, 1);
-	blkg_rwstat_add(&tg->service_bytes, rw, bytes);
-
-	local_irq_restore(flags);
-}
-
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 {
 	bool rw = bio_data_dir(bio);
@@ -871,17 +826,9 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 	 * more than once as a throttled bio will go through blk-throtl the
 	 * second time when it eventually gets issued.  Set it when a bio
 	 * is being charged to a tg.
-	 *
-	 * Dispatch stats aren't recursive and each @bio should only be
-	 * accounted by the @tg it was originally associated with.  Let's
-	 * update the stats when setting REQ_THROTTLED for the first time
-	 * which is guaranteed to be for the @bio's original tg.
 	 */
-	if (!(bio->bi_rw & REQ_THROTTLED)) {
+	if (!(bio->bi_rw & REQ_THROTTLED))
 		bio->bi_rw |= REQ_THROTTLED;
-		throtl_update_dispatch_stats(tg_to_blkg(tg),
-					     bio->bi_iter.bi_size, bio->bi_rw);
-	}
 }

 /**
@@ -1161,13 +1108,6 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work)
 	}
 }

-static int tg_print_rwstat(struct seq_file *sf, void *v)
-{
-	blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
-			  &blkcg_policy_throtl, seq_cft(sf)->private, true);
-	return 0;
-}
-
 static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd,
 			      int off)
 {
@@ -1304,13 +1244,13 @@ static struct cftype throtl_files[] = {
 	},
 	{
 		.name = "throttle.io_service_bytes",
-		.private = offsetof(struct throtl_grp, service_bytes),
-		.seq_show = tg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_throtl,
+		.seq_show = blkg_print_stat_bytes,
 	},
 	{
 		.name = "throttle.io_serviced",
-		.private = offsetof(struct throtl_grp, serviced),
-		.seq_show = tg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_throtl,
+		.seq_show = blkg_print_stat_ios,
 	},
 	{ }	/* terminate */
 };
@@ -1329,7 +1269,6 @@ static struct blkcg_policy blkcg_policy_throtl = {
 	.pd_init_fn		= throtl_pd_init,
 	.pd_online_fn		= throtl_pd_online,
 	.pd_free_fn		= throtl_pd_free,
-	.pd_reset_stats_fn	= throtl_pd_reset_stats,
 };

 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,

--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -177,10 +177,6 @@ enum wl_type_t {

 struct cfqg_stats {
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-	/* total bytes transferred */
-	struct blkg_rwstat		service_bytes;
-	/* total IOs serviced, post merge */
-	struct blkg_rwstat		serviced;
 	/* number of ios merged */
 	struct blkg_rwstat		merged;
 	/* total time spent on device in ns, may not be accurate w/ queueing */
@@ -696,8 +692,6 @@ static inline void cfqg_stats_update_dispatch(struct cfq_group *cfqg,
 					      uint64_t bytes, int rw)
 {
 	blkg_stat_add(&cfqg->stats.sectors, bytes >> 9);
-	blkg_rwstat_add(&cfqg->stats.serviced, rw, 1);
-	blkg_rwstat_add(&cfqg->stats.service_bytes, rw, bytes);
 }

 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
@@ -717,8 +711,6 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
 static void cfqg_stats_reset(struct cfqg_stats *stats)
 {
 	/* queued stats shouldn't be cleared */
-	blkg_rwstat_reset(&stats->service_bytes);
-	blkg_rwstat_reset(&stats->serviced);
 	blkg_rwstat_reset(&stats->merged);
 	blkg_rwstat_reset(&stats->service_time);
 	blkg_rwstat_reset(&stats->wait_time);
@@ -738,8 +730,6 @@ static void cfqg_stats_reset(struct cfqg_stats *stats)
 static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
 {
 	/* queued stats shouldn't be cleared */
-	blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
-	blkg_rwstat_add_aux(&to->serviced, &from->serviced);
 	blkg_rwstat_add_aux(&to->merged, &from->merged);
 	blkg_rwstat_add_aux(&to->service_time, &from->service_time);
 	blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
@@ -1544,8 +1534,6 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
 static void cfqg_stats_exit(struct cfqg_stats *stats)
 {
-	blkg_rwstat_exit(&stats->service_bytes);
-	blkg_rwstat_exit(&stats->serviced);
 	blkg_rwstat_exit(&stats->merged);
 	blkg_rwstat_exit(&stats->service_time);
 	blkg_rwstat_exit(&stats->wait_time);
@@ -1566,9 +1554,7 @@ static void cfqg_stats_exit(struct cfqg_stats *stats)

 static int cfqg_stats_init(struct cfqg_stats *stats, gfp_t gfp)
 {
-	if (blkg_rwstat_init(&stats->service_bytes, gfp) ||
-	    blkg_rwstat_init(&stats->serviced, gfp) ||
-	    blkg_rwstat_init(&stats->merged, gfp) ||
+	if (blkg_rwstat_init(&stats->merged, gfp) ||
 	    blkg_rwstat_init(&stats->service_time, gfp) ||
 	    blkg_rwstat_init(&stats->wait_time, gfp) ||
 	    blkg_rwstat_init(&stats->queued, gfp) ||
@@ -1994,13 +1980,13 @@ static struct cftype cfq_blkcg_files[] = {
 	},
 	{
 		.name = "io_service_bytes",
-		.private = offsetof(struct cfq_group, stats.service_bytes),
-		.seq_show = cfqg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_cfq,
+		.seq_show = blkg_print_stat_bytes,
 	},
 	{
 		.name = "io_serviced",
-		.private = offsetof(struct cfq_group, stats.serviced),
-		.seq_show = cfqg_print_rwstat,
+		.private = (unsigned long)&blkcg_policy_cfq,
+		.seq_show = blkg_print_stat_ios,
 	},
 	{
 		.name = "io_service_time",
@@ -2036,13 +2022,13 @@ static struct cftype cfq_blkcg_files[] = {
 	},
 	{
 		.name = "io_service_bytes_recursive",
-		.private = offsetof(struct cfq_group, stats.service_bytes),
-		.seq_show = cfqg_print_rwstat_recursive,
+		.private = (unsigned long)&blkcg_policy_cfq,
+		.seq_show = blkg_print_stat_bytes_recursive,
 	},
 	{
 		.name = "io_serviced_recursive",
-		.private = offsetof(struct cfq_group, stats.serviced),
-		.seq_show = cfqg_print_rwstat_recursive,
+		.private = (unsigned long)&blkcg_policy_cfq,
+		.seq_show = blkg_print_stat_ios_recursive,
 	},
 	{
 		.name = "io_service_time_recursive",

--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -127,6 +127,9 @@ struct blkcg_gq {
 	/* is this blkg online? protected by both blkcg and q locks */
 	bool				online;

+	struct blkg_rwstat		stat_bytes;
+	struct blkg_rwstat		stat_ios;
+
 	struct blkg_policy_data		*pd[BLKCG_MAX_POLS];

 	struct rcu_head			rcu_head;
@@ -190,6 +193,10 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
 u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 		       int off);
+int blkg_print_stat_bytes(struct seq_file *sf, void *v);
+int blkg_print_stat_ios(struct seq_file *sf, void *v);
+int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
+int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);

 u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
 			    struct blkcg_policy *pol, int off);
@@ -700,6 +707,13 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,

 	throtl = blk_throtl_bio(q, blkg, bio);

+	if (!throtl) {
+		blkg = blkg ?: q->root_blkg;
+		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
+				bio->bi_iter.bi_size);
+		blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
+	}
+
 	rcu_read_unlock();
 	return !throtl;
 }