Commit b8e24a93 authored by Johannes Weiner's avatar Johannes Weiner Committed by Jens Axboe

block: annotate refault stalls from IO submission

psi tracks the time tasks wait for refaulting pages to become
uptodate, but it does not track the time spent submitting the IO. The
submission part can be significant if backing storage is contended or
when cgroup throttling (io.latency) is in effect - a lot of time is
spent in submit_bio(). In that case, we underreport memory pressure.

Annotate submit_bio() to account submission time as memory stall when
the bio is reading userspace workingset pages.
Tested-by: default avatarSuren Baghdasaryan <surenb@google.com>
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 73d9c8d4
...@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page, ...@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
bio->bi_iter.bi_size += len; bio->bi_iter.bi_size += len;
bio->bi_vcnt++; bio->bi_vcnt++;
if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
bio_set_flag(bio, BIO_WORKINGSET);
} }
EXPORT_SYMBOL_GPL(__bio_add_page); EXPORT_SYMBOL_GPL(__bio_add_page);
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/psi.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/block.h> #include <trace/events/block.h>
...@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request); ...@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/ */
blk_qc_t submit_bio(struct bio *bio) blk_qc_t submit_bio(struct bio *bio)
{ {
bool workingset_read = false;
unsigned long pflags;
blk_qc_t ret;
if (blkcg_punt_bio_submit(bio)) if (blkcg_punt_bio_submit(bio))
return BLK_QC_T_NONE; return BLK_QC_T_NONE;
...@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio) ...@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) { if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count); count_vm_events(PGPGOUT, count);
} else { } else {
if (bio_flagged(bio, BIO_WORKINGSET))
workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size); task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count); count_vm_events(PGPGIN, count);
} }
...@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio) ...@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio)
} }
} }
return generic_make_request(bio); /*
* If we're reading data that is part of the userspace
* workingset, count submission time as memory stall. When the
* device is congested, or the submitting cgroup IO-throttled,
* submission can be a significant part of overall IO time.
*/
if (workingset_read)
psi_memstall_enter(&pflags);
ret = generic_make_request(bio);
if (workingset_read)
psi_memstall_leave(&pflags);
return ret;
} }
EXPORT_SYMBOL(submit_bio); EXPORT_SYMBOL(submit_bio);
......
...@@ -209,6 +209,7 @@ enum { ...@@ -209,6 +209,7 @@ enum {
BIO_BOUNCED, /* bio is a bounce bio */ BIO_BOUNCED, /* bio is a bounce bio */
BIO_USER_MAPPED, /* contains user pages */ BIO_USER_MAPPED, /* contains user pages */
BIO_NULL_MAPPED, /* contains invalid user pages */ BIO_NULL_MAPPED, /* contains invalid user pages */
BIO_WORKINGSET, /* contains userspace workingset pages */
BIO_QUIET, /* Make BIO Quiet */ BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */ BIO_REFFED, /* bio has elevated ->bi_cnt */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment