Commit e6269c44 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

blkcg: add blkg_[rw]stat->aux_cnt and replace cfq_group->dead_stats with it

cgroup stats are local to each cgroup and doesn't propagate to
ancestors by default.  When recursive stats are necessary, the sum is
calculated over all the descendants.  This initially was for backward
compatibility to support both group-local and recursive stats but this
mode of operation makes general sense as stat update is much hotter
thafn reporting those stats.

This however ends up losing recursive stats when a child is removed.
To work around this, cfq-iosched adds its stats to its parent
cfq_group->dead_stats which is summed up together when calculating
recursive stats.

It's planned that the core stats will be moved to blkcg_gq, so we want
to move the mechanism for keeping track of the stats of dead children
from cfq to blkcg core.  This patch adds blkg_[rw]stat->aux_cnt which
are atomic64_t's keeping track of auxiliary counts which are excluded
when reading local counts but included for recursive.

blkg_[rw]stat_merge() which were used by cfq to implement dead_stats
are replaced by blkg_[rw]stat_add_aux(), and cfq now forwards stats of
a dead cgroup to the aux counts of parent->stats instead of separate
->dead_stats.

This will also help making blkg_[rw]stats per-cpu.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent ae118896
...@@ -584,7 +584,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); ...@@ -584,7 +584,7 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
* @off: offset to the blkg_stat in @pd * @off: offset to the blkg_stat in @pd
* *
* Collect the blkg_stat specified by @off from @pd and all its online * Collect the blkg_stat specified by @off from @pd and all its online
* descendants and return the sum. The caller must be holding the queue * descendants and their aux counts. The caller must be holding the queue
* lock for online tests. * lock for online tests.
*/ */
u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
...@@ -602,7 +602,8 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) ...@@ -602,7 +602,8 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
struct blkg_stat *stat = (void *)pos_pd + off; struct blkg_stat *stat = (void *)pos_pd + off;
if (pos_blkg->online) if (pos_blkg->online)
sum += blkg_stat_read(stat); sum += blkg_stat_read(stat) +
atomic64_read(&stat->aux_cnt);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -616,7 +617,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); ...@@ -616,7 +617,7 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
* @off: offset to the blkg_stat in @pd * @off: offset to the blkg_stat in @pd
* *
* Collect the blkg_rwstat specified by @off from @pd and all its online * Collect the blkg_rwstat specified by @off from @pd and all its online
* descendants and return the sum. The caller must be holding the queue * descendants and their aux counts. The caller must be holding the queue
* lock for online tests. * lock for online tests.
*/ */
struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
...@@ -642,7 +643,8 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, ...@@ -642,7 +643,8 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
tmp = blkg_rwstat_read(rwstat); tmp = blkg_rwstat_read(rwstat);
for (i = 0; i < BLKG_RWSTAT_NR; i++) for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum.cnt[i] += tmp.cnt[i]; sum.cnt[i] += tmp.cnt[i] +
atomic64_read(&rwstat->aux_cnt[i]);
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -304,7 +304,6 @@ struct cfq_group { ...@@ -304,7 +304,6 @@ struct cfq_group {
int dispatched; int dispatched;
struct cfq_ttime ttime; struct cfq_ttime ttime;
struct cfqg_stats stats; /* stats for this cfqg */ struct cfqg_stats stats; /* stats for this cfqg */
struct cfqg_stats dead_stats; /* stats pushed from dead children */
/* async queue for each priority case */ /* async queue for each priority case */
struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR]; struct cfq_queue *async_cfqq[2][IOPRIO_BE_NR];
...@@ -736,28 +735,28 @@ static void cfqg_stats_reset(struct cfqg_stats *stats) ...@@ -736,28 +735,28 @@ static void cfqg_stats_reset(struct cfqg_stats *stats)
} }
/* @to += @from */ /* @to += @from */
static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from) static void cfqg_stats_add_aux(struct cfqg_stats *to, struct cfqg_stats *from)
{ {
/* queued stats shouldn't be cleared */ /* queued stats shouldn't be cleared */
blkg_rwstat_merge(&to->service_bytes, &from->service_bytes); blkg_rwstat_add_aux(&to->service_bytes, &from->service_bytes);
blkg_rwstat_merge(&to->serviced, &from->serviced); blkg_rwstat_add_aux(&to->serviced, &from->serviced);
blkg_rwstat_merge(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->merged, &from->merged);
blkg_rwstat_merge(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->service_time, &from->service_time);
blkg_rwstat_merge(&to->wait_time, &from->wait_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time);
blkg_stat_merge(&from->time, &from->time); blkg_stat_add_aux(&from->time, &from->time);
#ifdef CONFIG_DEBUG_BLK_CGROUP #ifdef CONFIG_DEBUG_BLK_CGROUP
blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time); blkg_stat_add_aux(&to->unaccounted_time, &from->unaccounted_time);
blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum); blkg_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum);
blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples); blkg_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples);
blkg_stat_merge(&to->dequeue, &from->dequeue); blkg_stat_add_aux(&to->dequeue, &from->dequeue);
blkg_stat_merge(&to->group_wait_time, &from->group_wait_time); blkg_stat_add_aux(&to->group_wait_time, &from->group_wait_time);
blkg_stat_merge(&to->idle_time, &from->idle_time); blkg_stat_add_aux(&to->idle_time, &from->idle_time);
blkg_stat_merge(&to->empty_time, &from->empty_time); blkg_stat_add_aux(&to->empty_time, &from->empty_time);
#endif #endif
} }
/* /*
* Transfer @cfqg's stats to its parent's dead_stats so that the ancestors' * Transfer @cfqg's stats to its parent's aux counts so that the ancestors'
* recursive stats can still account for the amount used by this cfqg after * recursive stats can still account for the amount used by this cfqg after
* it's gone. * it's gone.
*/ */
...@@ -770,10 +769,8 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg) ...@@ -770,10 +769,8 @@ static void cfqg_stats_xfer_dead(struct cfq_group *cfqg)
if (unlikely(!parent)) if (unlikely(!parent))
return; return;
cfqg_stats_merge(&parent->dead_stats, &cfqg->stats); cfqg_stats_add_aux(&parent->stats, &cfqg->stats);
cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats);
cfqg_stats_reset(&cfqg->stats); cfqg_stats_reset(&cfqg->stats);
cfqg_stats_reset(&cfqg->dead_stats);
} }
#else /* CONFIG_CFQ_GROUP_IOSCHED */ #else /* CONFIG_CFQ_GROUP_IOSCHED */
...@@ -1606,7 +1603,6 @@ static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node) ...@@ -1606,7 +1603,6 @@ static struct blkg_policy_data *cfq_pd_alloc(gfp_t gfp, int node)
cfq_init_cfqg_base(cfqg); cfq_init_cfqg_base(cfqg);
cfqg_stats_init(&cfqg->stats); cfqg_stats_init(&cfqg->stats);
cfqg_stats_init(&cfqg->dead_stats);
return &cfqg->pd; return &cfqg->pd;
} }
...@@ -1649,38 +1645,11 @@ static void cfq_pd_free(struct blkg_policy_data *pd) ...@@ -1649,38 +1645,11 @@ static void cfq_pd_free(struct blkg_policy_data *pd)
return kfree(pd); return kfree(pd);
} }
/* offset delta from cfqg->stats to cfqg->dead_stats */
static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) -
offsetof(struct cfq_group, stats);
/* to be used by recursive prfill, sums live and dead stats recursively */
static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off)
{
u64 sum = 0;
sum += blkg_stat_recursive_sum(pd, off);
sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta);
return sum;
}
/* to be used by recursive prfill, sums live and dead rwstats recursively */
static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd,
int off)
{
struct blkg_rwstat a, b;
a = blkg_rwstat_recursive_sum(pd, off);
b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta);
blkg_rwstat_merge(&a, &b);
return a;
}
static void cfq_pd_reset_stats(struct blkg_policy_data *pd) static void cfq_pd_reset_stats(struct blkg_policy_data *pd)
{ {
struct cfq_group *cfqg = pd_to_cfqg(pd); struct cfq_group *cfqg = pd_to_cfqg(pd);
cfqg_stats_reset(&cfqg->stats); cfqg_stats_reset(&cfqg->stats);
cfqg_stats_reset(&cfqg->dead_stats);
} }
static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd, static struct cfq_group *cfq_lookup_cfqg(struct cfq_data *cfqd,
...@@ -1883,7 +1852,7 @@ static int cfqg_print_rwstat(struct seq_file *sf, void *v) ...@@ -1883,7 +1852,7 @@ static int cfqg_print_rwstat(struct seq_file *sf, void *v)
static u64 cfqg_prfill_stat_recursive(struct seq_file *sf, static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
struct blkg_policy_data *pd, int off) struct blkg_policy_data *pd, int off)
{ {
u64 sum = cfqg_stat_pd_recursive_sum(pd, off); u64 sum = blkg_stat_recursive_sum(pd, off);
return __blkg_prfill_u64(sf, pd, sum); return __blkg_prfill_u64(sf, pd, sum);
} }
...@@ -1891,7 +1860,7 @@ static u64 cfqg_prfill_stat_recursive(struct seq_file *sf, ...@@ -1891,7 +1860,7 @@ static u64 cfqg_prfill_stat_recursive(struct seq_file *sf,
static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
struct blkg_policy_data *pd, int off) struct blkg_policy_data *pd, int off)
{ {
struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off); struct blkg_rwstat sum = blkg_rwstat_recursive_sum(pd, off);
return __blkg_prfill_rwstat(sf, pd, &sum); return __blkg_prfill_rwstat(sf, pd, &sum);
} }
......
...@@ -53,14 +53,20 @@ struct blkcg { ...@@ -53,14 +53,20 @@ struct blkcg {
#endif #endif
}; };
/*
* blkg_[rw]stat->aux_cnt is excluded for local stats but included for
* recursive. Used to carry stats of dead children.
*/
struct blkg_stat { struct blkg_stat {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
uint64_t cnt; uint64_t cnt;
atomic64_t aux_cnt;
}; };
struct blkg_rwstat { struct blkg_rwstat {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
uint64_t cnt[BLKG_RWSTAT_NR]; uint64_t cnt[BLKG_RWSTAT_NR];
atomic64_t aux_cnt[BLKG_RWSTAT_NR];
}; };
/* /*
...@@ -483,6 +489,7 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, ...@@ -483,6 +489,7 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
static inline void blkg_stat_init(struct blkg_stat *stat) static inline void blkg_stat_init(struct blkg_stat *stat)
{ {
u64_stats_init(&stat->syncp); u64_stats_init(&stat->syncp);
atomic64_set(&stat->aux_cnt, 0);
} }
/** /**
...@@ -504,8 +511,9 @@ static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) ...@@ -504,8 +511,9 @@ static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
* blkg_stat_read - read the current value of a blkg_stat * blkg_stat_read - read the current value of a blkg_stat
* @stat: blkg_stat to read * @stat: blkg_stat to read
* *
* Read the current value of @stat. This function can be called without * Read the current value of @stat. The returned value doesn't include the
* synchroniztion and takes care of u64 atomicity. * aux count. This function can be called without synchroniztion and takes
* care of u64 atomicity.
*/ */
static inline uint64_t blkg_stat_read(struct blkg_stat *stat) static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
{ {
...@@ -527,23 +535,31 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) ...@@ -527,23 +535,31 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
static inline void blkg_stat_reset(struct blkg_stat *stat) static inline void blkg_stat_reset(struct blkg_stat *stat)
{ {
stat->cnt = 0; stat->cnt = 0;
atomic64_set(&stat->aux_cnt, 0);
} }
/** /**
* blkg_stat_merge - merge a blkg_stat into another * blkg_stat_add_aux - add a blkg_stat into another's aux count
* @to: the destination blkg_stat * @to: the destination blkg_stat
* @from: the source * @from: the source
* *
* Add @from's count to @to. * Add @from's count including the aux one to @to's aux count.
*/ */
static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) static inline void blkg_stat_add_aux(struct blkg_stat *to,
struct blkg_stat *from)
{ {
blkg_stat_add(to, blkg_stat_read(from)); atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
&to->aux_cnt);
} }
static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
{ {
int i;
u64_stats_init(&rwstat->syncp); u64_stats_init(&rwstat->syncp);
for (i = 0; i < BLKG_RWSTAT_NR; i++)
atomic64_set(&rwstat->aux_cnt[i], 0);
} }
/** /**
...@@ -614,26 +630,30 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) ...@@ -614,26 +630,30 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
*/ */
static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
{ {
int i;
memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
for (i = 0; i < BLKG_RWSTAT_NR; i++)
atomic64_set(&rwstat->aux_cnt[i], 0);
} }
/** /**
* blkg_rwstat_merge - merge a blkg_rwstat into another * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
* @to: the destination blkg_rwstat * @to: the destination blkg_rwstat
* @from: the source * @from: the source
* *
* Add @from's counts to @to. * Add @from's count including the aux one to @to's aux count.
*/ */
static inline void blkg_rwstat_merge(struct blkg_rwstat *to, static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
struct blkg_rwstat *from) struct blkg_rwstat *from)
{ {
struct blkg_rwstat v = blkg_rwstat_read(from); struct blkg_rwstat v = blkg_rwstat_read(from);
int i; int i;
u64_stats_update_begin(&to->syncp);
for (i = 0; i < BLKG_RWSTAT_NR; i++) for (i = 0; i < BLKG_RWSTAT_NR; i++)
to->cnt[i] += v.cnt[i]; atomic64_add(v.cnt[i] + atomic64_read(&from->aux_cnt[i]),
u64_stats_update_end(&to->syncp); &to->aux_cnt[i]);
} }
#ifdef CONFIG_BLK_DEV_THROTTLING #ifdef CONFIG_BLK_DEV_THROTTLING
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment