Commit 17604240 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

mm: add PSI accounting around ->read_folio and ->readahead calls

PSI tries to account for the cost of bringing back in pages discarded by
the MM LRU management.  Currently the prime place for that is hooked into
the bio submission path, which is a rather bad place:

 - it does not actually account I/O for non-block file systems, of which
   we have many
 - it adds overhead and a layering violation to the block layer

Add the accounting into the two places in the core MM code that read
pages into an address space by calling into ->read_folio and ->readahead
so that the entire file system operations are covered, to broaden
the coverage and allow removing the accounting in the block layer going
forward.

As psi_memstall_enter can deal with nested calls this will not lead to
double accounting even while the bio annotations are still present.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/20220915094200.139713-2-hch@lst.deSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent e8848087
...@@ -1173,6 +1173,8 @@ struct readahead_control { ...@@ -1173,6 +1173,8 @@ struct readahead_control {
pgoff_t _index; pgoff_t _index;
unsigned int _nr_pages; unsigned int _nr_pages;
unsigned int _batch_count; unsigned int _batch_count;
bool _workingset;
unsigned long _pflags;
}; };
#define DEFINE_READAHEAD(ractl, f, r, m, i) \ #define DEFINE_READAHEAD(ractl, f, r, m, i) \
......
...@@ -2382,6 +2382,8 @@ static void filemap_get_read_batch(struct address_space *mapping, ...@@ -2382,6 +2382,8 @@ static void filemap_get_read_batch(struct address_space *mapping,
static int filemap_read_folio(struct file *file, filler_t filler, static int filemap_read_folio(struct file *file, filler_t filler,
struct folio *folio) struct folio *folio)
{ {
bool workingset = folio_test_workingset(folio);
unsigned long pflags;
int error; int error;
/* /*
...@@ -2390,8 +2392,13 @@ static int filemap_read_folio(struct file *file, filler_t filler, ...@@ -2390,8 +2392,13 @@ static int filemap_read_folio(struct file *file, filler_t filler,
* fails. * fails.
*/ */
folio_clear_error(folio); folio_clear_error(folio);
/* Start the actual read. The read will unlock the page. */ /* Start the actual read. The read will unlock the page. */
if (unlikely(workingset))
psi_memstall_enter(&pflags);
error = filler(file, folio); error = filler(file, folio);
if (unlikely(workingset))
psi_memstall_leave(&pflags);
if (error) if (error)
return error; return error;
......
...@@ -122,6 +122,7 @@ ...@@ -122,6 +122,7 @@
#include <linux/task_io_accounting_ops.h> #include <linux/task_io_accounting_ops.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/psi.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/mm_inline.h> #include <linux/mm_inline.h>
...@@ -152,6 +153,8 @@ static void read_pages(struct readahead_control *rac) ...@@ -152,6 +153,8 @@ static void read_pages(struct readahead_control *rac)
if (!readahead_count(rac)) if (!readahead_count(rac))
return; return;
if (unlikely(rac->_workingset))
psi_memstall_enter(&rac->_pflags);
blk_start_plug(&plug); blk_start_plug(&plug);
if (aops->readahead) { if (aops->readahead) {
...@@ -179,6 +182,9 @@ static void read_pages(struct readahead_control *rac) ...@@ -179,6 +182,9 @@ static void read_pages(struct readahead_control *rac)
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
if (unlikely(rac->_workingset))
psi_memstall_leave(&rac->_pflags);
rac->_workingset = false;
BUG_ON(readahead_count(rac)); BUG_ON(readahead_count(rac));
} }
...@@ -252,6 +258,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, ...@@ -252,6 +258,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
} }
if (i == nr_to_read - lookahead_size) if (i == nr_to_read - lookahead_size)
folio_set_readahead(folio); folio_set_readahead(folio);
ractl->_workingset |= folio_test_workingset(folio);
ractl->_nr_pages++; ractl->_nr_pages++;
} }
...@@ -480,11 +487,14 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, ...@@ -480,11 +487,14 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
if (index == mark) if (index == mark)
folio_set_readahead(folio); folio_set_readahead(folio);
err = filemap_add_folio(ractl->mapping, folio, index, gfp); err = filemap_add_folio(ractl->mapping, folio, index, gfp);
if (err) if (err) {
folio_put(folio); folio_put(folio);
else return err;
ractl->_nr_pages += 1UL << order; }
return err;
ractl->_nr_pages += 1UL << order;
ractl->_workingset |= folio_test_workingset(folio);
return 0;
} }
void page_cache_ra_order(struct readahead_control *ractl, void page_cache_ra_order(struct readahead_control *ractl,
...@@ -826,6 +836,10 @@ void readahead_expand(struct readahead_control *ractl, ...@@ -826,6 +836,10 @@ void readahead_expand(struct readahead_control *ractl,
put_page(page); put_page(page);
return; return;
} }
if (unlikely(PageWorkingset(page)) && !ractl->_workingset) {
ractl->_workingset = true;
psi_memstall_enter(&ractl->_pflags);
}
ractl->_nr_pages++; ractl->_nr_pages++;
if (ra) { if (ra) {
ra->size++; ra->size++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment