Commit 99fbb6bf authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Andrew Morton

mm: make folios_put() the basis of release_pages()

Patch series "Rearrange batched folio freeing", v3.

Other than the obvious "remove calls to compound_head" changes, the
fundamental belief here is that iterating a linked list is much slower
than iterating an array (5-15x slower in my testing).  There's also an
associated belief that since we iterate the batch of folios three times,
we do better when the array is small (ie 15 entries) than we do with a
batch that is hundreds of entries long, which only gives us the
opportunity for the first pages to fall out of cache by the time we get to
the end.

It is possible we should increase the size of folio_batch.  Hopefully the
bots let us know if this introduces any performance regressions.


This patch (of 3):

By making release_pages() call folios_put(), we can get rid of the calls
to compound_head() for the callers that already know they have folios.  We
can also get rid of the lock_batch tracking as we know the size of the
batch is limited by folio_batch.  This does reduce the maximum number of
pages for which the lruvec lock is held, from SWAP_CLUSTER_MAX (32) to
PAGEVEC_SIZE (15).  I do not expect this to make a significant difference,
but if it does, we can increase PAGEVEC_SIZE to 31.

Link: https://lkml.kernel.org/r/20240227174254.710559-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20240227174254.710559-2-willy@infradead.orgSigned-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 5dad6048
...@@ -36,6 +36,7 @@ struct anon_vma; ...@@ -36,6 +36,7 @@ struct anon_vma;
struct anon_vma_chain; struct anon_vma_chain;
struct user_struct; struct user_struct;
struct pt_regs; struct pt_regs;
struct folio_batch;
extern int sysctl_page_lock_unfairness; extern int sysctl_page_lock_unfairness;
...@@ -1512,6 +1513,8 @@ static inline void folio_put_refs(struct folio *folio, int refs) ...@@ -1512,6 +1513,8 @@ static inline void folio_put_refs(struct folio *folio, int refs)
__folio_put(folio); __folio_put(folio);
} }
void folios_put_refs(struct folio_batch *folios, unsigned int *refs);
/* /*
* union release_pages_arg - an array of pages or folios * union release_pages_arg - an array of pages or folios
* *
...@@ -1534,18 +1537,19 @@ void release_pages(release_pages_arg, int nr); ...@@ -1534,18 +1537,19 @@ void release_pages(release_pages_arg, int nr);
/** /**
* folios_put - Decrement the reference count on an array of folios. * folios_put - Decrement the reference count on an array of folios.
* @folios: The folios. * @folios: The folios.
* @nr: How many folios there are.
* *
* Like folio_put(), but for an array of folios. This is more efficient * Like folio_put(), but for a batch of folios. This is more efficient
* than writing the loop yourself as it will optimise the locks which * than writing the loop yourself as it will optimise the locks which need
* need to be taken if the folios are freed. * to be taken if the folios are freed. The folios batch is returned
* empty and ready to be reused for another batch; there is no need to
* reinitialise it.
* *
* Context: May be called in process or interrupt context, but not in NMI * Context: May be called in process or interrupt context, but not in NMI
* context. May be called while holding a spinlock. * context. May be called while holding a spinlock.
*/ */
static inline void folios_put(struct folio **folios, unsigned int nr) static inline void folios_put(struct folio_batch *folios)
{ {
release_pages(folios, nr); folios_put_refs(folios, NULL);
} }
static inline void put_page(struct page *page) static inline void put_page(struct page *page)
......
...@@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch) ...@@ -206,8 +206,7 @@ static void mlock_folio_batch(struct folio_batch *fbatch)
if (lruvec) if (lruvec)
unlock_page_lruvec_irq(lruvec); unlock_page_lruvec_irq(lruvec);
folios_put(fbatch->folios, folio_batch_count(fbatch)); folios_put(fbatch);
folio_batch_reinit(fbatch);
} }
void mlock_drain_local(void) void mlock_drain_local(void)
......
...@@ -89,7 +89,7 @@ static void __page_cache_release(struct folio *folio) ...@@ -89,7 +89,7 @@ static void __page_cache_release(struct folio *folio)
__folio_clear_lru_flags(folio); __folio_clear_lru_flags(folio);
unlock_page_lruvec_irqrestore(lruvec, flags); unlock_page_lruvec_irqrestore(lruvec, flags);
} }
/* See comment on folio_test_mlocked in release_pages() */ /* See comment on folio_test_mlocked in folios_put() */
if (unlikely(folio_test_mlocked(folio))) { if (unlikely(folio_test_mlocked(folio))) {
long nr_pages = folio_nr_pages(folio); long nr_pages = folio_nr_pages(folio);
...@@ -175,7 +175,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio) ...@@ -175,7 +175,7 @@ static void lru_add_fn(struct lruvec *lruvec, struct folio *folio)
* while the LRU lock is held. * while the LRU lock is held.
* *
* (That is not true of __page_cache_release(), and not necessarily * (That is not true of __page_cache_release(), and not necessarily
* true of release_pages(): but those only clear the mlocked flag after * true of folios_put(): but those only clear the mlocked flag after
* folio_put_testzero() has excluded any other users of the folio.) * folio_put_testzero() has excluded any other users of the folio.)
*/ */
if (folio_evictable(folio)) { if (folio_evictable(folio)) {
...@@ -221,8 +221,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) ...@@ -221,8 +221,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
if (lruvec) if (lruvec)
unlock_page_lruvec_irqrestore(lruvec, flags); unlock_page_lruvec_irqrestore(lruvec, flags);
folios_put(fbatch->folios, folio_batch_count(fbatch)); folios_put(fbatch);
folio_batch_reinit(fbatch);
} }
static void folio_batch_add_and_move(struct folio_batch *fbatch, static void folio_batch_add_and_move(struct folio_batch *fbatch,
...@@ -946,47 +945,30 @@ void lru_cache_disable(void) ...@@ -946,47 +945,30 @@ void lru_cache_disable(void)
} }
/** /**
* release_pages - batched put_page() * folios_put_refs - Reduce the reference count on a batch of folios.
* @arg: array of pages to release * @folios: The folios.
* @nr: number of pages * @refs: The number of refs to subtract from each folio.
* *
* Decrement the reference count on all the pages in @arg. If it * Like folio_put(), but for a batch of folios. This is more efficient
* fell to zero, remove the page from the LRU and free it. * than writing the loop yourself as it will optimise the locks which need
* to be taken if the folios are freed. The folios batch is returned
* empty and ready to be reused for another batch; there is no need
* to reinitialise it. If @refs is NULL, we subtract one from each
* folio refcount.
* *
* Note that the argument can be an array of pages, encoded pages, * Context: May be called in process or interrupt context, but not in NMI
* or folio pointers. We ignore any encoded bits, and turn any of * context. May be called while holding a spinlock.
* them into just a folio that gets free'd.
*/ */
void release_pages(release_pages_arg arg, int nr) void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
{ {
int i; int i;
struct encoded_page **encoded = arg.encoded_pages;
LIST_HEAD(pages_to_free); LIST_HEAD(pages_to_free);
struct lruvec *lruvec = NULL; struct lruvec *lruvec = NULL;
unsigned long flags = 0; unsigned long flags = 0;
unsigned int lock_batch;
for (i = 0; i < nr; i++) { for (i = 0; i < folios->nr; i++) {
unsigned int nr_refs = 1; struct folio *folio = folios->folios[i];
struct folio *folio; unsigned int nr_refs = refs ? refs[i] : 1;
/* Turn any of the argument types into a folio */
folio = page_folio(encoded_page_ptr(encoded[i]));
/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
if (unlikely(encoded_page_flags(encoded[i]) &
ENCODED_PAGE_BIT_NR_PAGES_NEXT))
nr_refs = encoded_nr_pages(encoded[++i]);
/*
* Make sure the IRQ-safe lock-holding time does not get
* excessive with a continuous string of pages from the
* same lruvec. The lock is held only if lruvec != NULL.
*/
if (lruvec && ++lock_batch == SWAP_CLUSTER_MAX) {
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
if (is_huge_zero_page(&folio->page)) if (is_huge_zero_page(&folio->page))
continue; continue;
...@@ -1016,13 +998,8 @@ void release_pages(release_pages_arg arg, int nr) ...@@ -1016,13 +998,8 @@ void release_pages(release_pages_arg arg, int nr)
} }
if (folio_test_lru(folio)) { if (folio_test_lru(folio)) {
struct lruvec *prev_lruvec = lruvec;
lruvec = folio_lruvec_relock_irqsave(folio, lruvec, lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
&flags); &flags);
if (prev_lruvec != lruvec)
lock_batch = 0;
lruvec_del_folio(lruvec, folio); lruvec_del_folio(lruvec, folio);
__folio_clear_lru_flags(folio); __folio_clear_lru_flags(folio);
} }
...@@ -1046,6 +1023,47 @@ void release_pages(release_pages_arg arg, int nr) ...@@ -1046,6 +1023,47 @@ void release_pages(release_pages_arg arg, int nr)
mem_cgroup_uncharge_list(&pages_to_free); mem_cgroup_uncharge_list(&pages_to_free);
free_unref_page_list(&pages_to_free); free_unref_page_list(&pages_to_free);
folio_batch_reinit(folios);
}
EXPORT_SYMBOL(folios_put_refs);
/**
* release_pages - batched put_page()
* @arg: array of pages to release
* @nr: number of pages
*
* Decrement the reference count on all the pages in @arg. If it
* fell to zero, remove the page from the LRU and free it.
*
* Note that the argument can be an array of pages, encoded pages,
* or folio pointers. We ignore any encoded bits, and turn any of
* them into just a folio that gets free'd.
*/
void release_pages(release_pages_arg arg, int nr)
{
struct folio_batch fbatch;
int refs[PAGEVEC_SIZE];
struct encoded_page **encoded = arg.encoded_pages;
int i;
folio_batch_init(&fbatch);
for (i = 0; i < nr; i++) {
/* Turn any of the argument types into a folio */
struct folio *folio = page_folio(encoded_page_ptr(encoded[i]));
/* Is our next entry actually "nr_pages" -> "nr_refs" ? */
refs[fbatch.nr] = 1;
if (unlikely(encoded_page_flags(encoded[i]) &
ENCODED_PAGE_BIT_NR_PAGES_NEXT))
refs[fbatch.nr] = encoded_nr_pages(encoded[++i]);
if (folio_batch_add(&fbatch, folio) > 0)
continue;
folios_put_refs(&fbatch, refs);
}
if (fbatch.nr)
folios_put_refs(&fbatch, refs);
} }
EXPORT_SYMBOL(release_pages); EXPORT_SYMBOL(release_pages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment