Commit f4b7e272 authored by Andrey Ryabinin's avatar Andrey Ryabinin Committed by Linus Torvalds

mm: remove zone_lru_lock() function, access ->lru_lock directly

We have common pattern to access lru_lock from a page pointer:
	zone_lru_lock(page_zone(page))

Which is silly, because it unfolds to this:
	&NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]->zone_pgdat->lru_lock
while we can simply do
	&NODE_DATA(page_to_nid(page))->lru_lock

Remove zone_lru_lock() function, since it's only complicate things.  Use
'page_pgdat(page)->lru_lock' pattern instead.

[aryabinin@virtuozzo.com: a slightly better version of __split_huge_page()]
  Link: http://lkml.kernel.org/r/20190301121651.7741-1-aryabinin@virtuozzo.com
Link: http://lkml.kernel.org/r/20190228083329.31892-2-aryabinin@virtuozzo.comSigned-off-by: default avatarAndrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarMel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a7ca12f9
......@@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
8. LRU
Each memcg has its own private LRU. Now, its handling is under global
VM's control (means that it's handled under global zone_lru_lock).
VM's control (means that it's handled under global pgdat->lru_lock).
Almost all routines around memcg's LRU is called by global LRU's
list management functions under zone_lru_lock().
list management functions under pgdat->lru_lock.
A special function is mem_cgroup_isolate_pages(). This scans
memcg's private LRU and call __isolate_lru_page() to extract a page
......
......@@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
Other lock order is following:
PG_locked.
mm->page_table_lock
zone_lru_lock
pgdat->lru_lock
lock_page_cgroup.
In many cases, just lock_page_cgroup() is called.
per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
zone_lru_lock, it has no lock of its own.
pgdat->lru_lock, it has no lock of its own.
2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
......
......@@ -80,7 +80,7 @@ struct page {
struct { /* Page cache and anonymous pages */
/**
* @lru: Pageout list, eg. active_list protected by
* zone_lru_lock. Sometimes used as a generic list
* pgdat->lru_lock. Sometimes used as a generic list
* by the page owner.
*/
struct list_head lru;
......
......@@ -730,10 +730,6 @@ typedef struct pglist_data {
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
static inline spinlock_t *zone_lru_lock(struct zone *zone)
{
return &zone->zone_pgdat->lru_lock;
}
static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
{
......
......@@ -775,6 +775,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
unsigned long end_pfn, isolate_mode_t isolate_mode)
{
struct zone *zone = cc->zone;
pg_data_t *pgdat = zone->zone_pgdat;
unsigned long nr_scanned = 0, nr_isolated = 0;
struct lruvec *lruvec;
unsigned long flags = 0;
......@@ -839,8 +840,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
* if contended.
*/
if (!(low_pfn % SWAP_CLUSTER_MAX)
&& compact_unlock_should_abort(zone_lru_lock(zone), flags,
&locked, cc))
&& compact_unlock_should_abort(&pgdat->lru_lock,
flags, &locked, cc))
break;
if (!pfn_valid_within(low_pfn))
......@@ -910,7 +911,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
if (unlikely(__PageMovable(page)) &&
!PageIsolated(page)) {
if (locked) {
spin_unlock_irqrestore(zone_lru_lock(zone),
spin_unlock_irqrestore(&pgdat->lru_lock,
flags);
locked = false;
}
......@@ -940,7 +941,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
locked = compact_lock_irqsave(zone_lru_lock(zone),
locked = compact_lock_irqsave(&pgdat->lru_lock,
&flags, cc);
/* Try get exclusive access under lock */
......@@ -965,7 +966,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
}
}
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
......@@ -1007,7 +1008,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
*/
if (nr_isolated) {
if (locked) {
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
locked = false;
}
putback_movable_pages(&cc->migratepages);
......@@ -1034,7 +1035,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
isolate_abort:
if (locked)
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
/*
* Updated the cached scanner pfn once the pageblock has been scanned
......
......@@ -98,8 +98,8 @@
* ->swap_lock (try_to_unmap_one)
* ->private_lock (try_to_unmap_one)
* ->i_pages lock (try_to_unmap_one)
* ->zone_lru_lock(zone) (follow_page->mark_page_accessed)
* ->zone_lru_lock(zone) (check_pte_range->isolate_lru_page)
* ->pgdat->lru_lock (follow_page->mark_page_accessed)
* ->pgdat->lru_lock (check_pte_range->isolate_lru_page)
* ->private_lock (page_remove_rmap->set_page_dirty)
* ->i_pages lock (page_remove_rmap->set_page_dirty)
* bdi.wb->list_lock (page_remove_rmap->set_page_dirty)
......
......@@ -2440,11 +2440,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
pgoff_t end, unsigned long flags)
{
struct page *head = compound_head(page);
struct zone *zone = page_zone(head);
pg_data_t *pgdat = page_pgdat(head);
struct lruvec *lruvec;
int i;
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
lruvec = mem_cgroup_page_lruvec(head, pgdat);
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
......@@ -2475,7 +2475,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
xa_unlock(&head->mapping->i_pages);
}
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
remap_page(head);
......@@ -2686,7 +2686,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
lru_add_drain();
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
spin_lock_irqsave(&pgdata->lru_lock, flags);
if (mapping) {
XA_STATE(xas, &mapping->i_pages, page_index(head));
......@@ -2731,7 +2731,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
spin_unlock(&pgdata->split_queue_lock);
fail: if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
remap_page(head);
ret = -EBUSY;
}
......
......@@ -2362,13 +2362,13 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
static void lock_page_lru(struct page *page, int *isolated)
{
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
spin_lock_irq(zone_lru_lock(zone));
spin_lock_irq(&pgdat->lru_lock);
if (PageLRU(page)) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_lru(page));
*isolated = 1;
......@@ -2378,17 +2378,17 @@ static void lock_page_lru(struct page *page, int *isolated)
static void unlock_page_lru(struct page *page, int isolated)
{
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
if (isolated) {
struct lruvec *lruvec;
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
VM_BUG_ON_PAGE(PageLRU(page), page);
SetPageLRU(page);
add_page_to_lru_list(page, lruvec, page_lru(page));
}
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&pgdat->lru_lock);
}
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
......@@ -2674,7 +2674,7 @@ void __memcg_kmem_uncharge(struct page *page, int order)
/*
* Because tail pages are not marked as "used", set it. We're under
* zone_lru_lock and migration entries setup in all page mappings.
* pgdat->lru_lock and migration entries setup in all page mappings.
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
......
......@@ -182,7 +182,7 @@ static void __munlock_isolation_failed(struct page *page)
unsigned int munlock_vma_page(struct page *page)
{
int nr_pages;
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
/* For try_to_munlock() and to serialize with page migration */
BUG_ON(!PageLocked(page));
......@@ -194,7 +194,7 @@ unsigned int munlock_vma_page(struct page *page)
* might otherwise copy PageMlocked to part of the tail pages before
* we clear it in the head page. It also stabilizes hpage_nr_pages().
*/
spin_lock_irq(zone_lru_lock(zone));
spin_lock_irq(&pgdat->lru_lock);
if (!TestClearPageMlocked(page)) {
/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
......@@ -203,17 +203,17 @@ unsigned int munlock_vma_page(struct page *page)
}
nr_pages = hpage_nr_pages(page);
__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
if (__munlock_isolate_lru_page(page, true)) {
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&pgdat->lru_lock);
__munlock_isolated_page(page);
goto out;
}
__munlock_isolation_failed(page);
unlock_out:
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&pgdat->lru_lock);
out:
return nr_pages - 1;
......@@ -298,7 +298,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
pagevec_init(&pvec_putback);
/* Phase 1: page isolation */
spin_lock_irq(zone_lru_lock(zone));
spin_lock_irq(&zone->zone_pgdat->lru_lock);
for (i = 0; i < nr; i++) {
struct page *page = pvec->pages[i];
......@@ -325,7 +325,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
pvec->pages[i] = NULL;
}
__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&zone->zone_pgdat->lru_lock);
/* Now we can release pins of pages that we are not munlocking */
pagevec_release(&pvec_putback);
......
......@@ -31,7 +31,7 @@
static struct page *page_idle_get_page(unsigned long pfn)
{
struct page *page;
struct zone *zone;
pg_data_t *pgdat;
if (!pfn_valid(pfn))
return NULL;
......@@ -41,13 +41,13 @@ static struct page *page_idle_get_page(unsigned long pfn)
!get_page_unless_zero(page))
return NULL;
zone = page_zone(page);
spin_lock_irq(zone_lru_lock(zone));
pgdat = page_pgdat(page);
spin_lock_irq(&pgdat->lru_lock);
if (unlikely(!PageLRU(page))) {
put_page(page);
page = NULL;
}
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&pgdat->lru_lock);
return page;
}
......
......@@ -27,7 +27,7 @@
* mapping->i_mmap_rwsem
* anon_vma->rwsem
* mm->page_table_lock or pte_lock
* zone_lru_lock (in mark_page_accessed, isolate_lru_page)
* pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
* swap_lock (in swap_duplicate, swap_info_get)
* mmlist_lock (in mmput, drain_mmlist and others)
* mapping->private_lock (in __set_page_dirty_buffers)
......
......@@ -58,16 +58,16 @@ static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
static void __page_cache_release(struct page *page)
{
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
unsigned long flags;
spin_lock_irqsave(zone_lru_lock(zone), flags);
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
spin_lock_irqsave(&pgdat->lru_lock, flags);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
VM_BUG_ON_PAGE(!PageLRU(page), page);
__ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page));
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
}
__ClearPageWaiters(page);
mem_cgroup_uncharge(page);
......@@ -322,12 +322,12 @@ static inline void activate_page_drain(int cpu)
void activate_page(struct page *page)
{
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
page = compound_head(page);
spin_lock_irq(zone_lru_lock(zone));
__activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
spin_unlock_irq(zone_lru_lock(zone));
spin_lock_irq(&pgdat->lru_lock);
__activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
spin_unlock_irq(&pgdat->lru_lock);
}
#endif
......
......@@ -1614,8 +1614,8 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
}
/*
* zone_lru_lock is heavily contended. Some of the functions that
/**
* pgdat->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
......@@ -1750,11 +1750,11 @@ int isolate_lru_page(struct page *page)
WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
if (PageLRU(page)) {
struct zone *zone = page_zone(page);
pg_data_t *pgdat = page_pgdat(page);
struct lruvec *lruvec;
spin_lock_irq(zone_lru_lock(zone));
lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
spin_lock_irq(&pgdat->lru_lock);
lruvec = mem_cgroup_page_lruvec(page, pgdat);
if (PageLRU(page)) {
int lru = page_lru(page);
get_page(page);
......@@ -1762,7 +1762,7 @@ int isolate_lru_page(struct page *page)
del_page_from_lru_list(page, lruvec, lru);
ret = 0;
}
spin_unlock_irq(zone_lru_lock(zone));
spin_unlock_irq(&pgdat->lru_lock);
}
return ret;
}
......@@ -1990,9 +1990,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* processes, from rmap.
*
* If the pages are mostly unmapped, the processing is fast and it is
* appropriate to hold zone_lru_lock across the whole operation. But if
* appropriate to hold pgdat->lru_lock across the whole operation. But if
* the pages are mapped, the processing is slow (page_referenced()) so we
* should drop zone_lru_lock around each page. It's impossible to balance
* should drop pgdat->lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment