Commit 36005bae authored by Tim Chen's avatar Tim Chen Committed by Linus Torvalds

mm/swap: allocate swap slots in batches

Currently, the swap slots are allocated one page at a time, causing
contention to the swap_info lock protecting the swap partition on every
page being swapped.

This patch adds new functions get_swap_pages and scan_swap_map_slots to
request multiple swap slots at once.  This will reduces the lock
contention on the swap_info lock.  Also scan_swap_map_slots can operate
more efficiently as swap slots often occurs in clusters close to each
other on a swap device and it is quicker to allocate them together.

Link: http://lkml.kernel.org/r/9fec2845544371f62c3763d43510045e33d286a6.1484082593.git.tim.c.chen@linux.intel.comSigned-off-by: default avatarTim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: default avatar"Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e8c26ab6
...@@ -27,6 +27,7 @@ struct bio; ...@@ -27,6 +27,7 @@ struct bio;
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \ #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
SWAP_FLAG_DISCARD_PAGES) SWAP_FLAG_DISCARD_PAGES)
#define SWAP_BATCH 64
static inline int current_is_kswapd(void) static inline int current_is_kswapd(void)
{ {
...@@ -386,6 +387,7 @@ static inline long get_nr_swap_pages(void) ...@@ -386,6 +387,7 @@ static inline long get_nr_swap_pages(void)
extern void si_swapinfo(struct sysinfo *); extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void); extern swp_entry_t get_swap_page(void);
extern swp_entry_t get_swap_page_of_type(int); extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t); extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t); extern int swap_duplicate(swp_entry_t);
......
...@@ -496,7 +496,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si, ...@@ -496,7 +496,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
* Try to get a swap entry from current cpu's swap entry pool (a cluster). This * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
* might involve allocating a new cluster for current CPU too. * might involve allocating a new cluster for current CPU too.
*/ */
static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
unsigned long *offset, unsigned long *scan_base) unsigned long *offset, unsigned long *scan_base)
{ {
struct percpu_cluster *cluster; struct percpu_cluster *cluster;
...@@ -520,7 +520,7 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, ...@@ -520,7 +520,7 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
*scan_base = *offset = si->cluster_next; *scan_base = *offset = si->cluster_next;
goto new_cluster; goto new_cluster;
} else } else
return; return false;
} }
found_free = false; found_free = false;
...@@ -552,16 +552,22 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, ...@@ -552,16 +552,22 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
cluster->next = tmp + 1; cluster->next = tmp + 1;
*offset = tmp; *offset = tmp;
*scan_base = tmp; *scan_base = tmp;
return found_free;
} }
static unsigned long scan_swap_map(struct swap_info_struct *si, static int scan_swap_map_slots(struct swap_info_struct *si,
unsigned char usage) unsigned char usage, int nr,
swp_entry_t slots[])
{ {
struct swap_cluster_info *ci; struct swap_cluster_info *ci;
unsigned long offset; unsigned long offset;
unsigned long scan_base; unsigned long scan_base;
unsigned long last_in_cluster = 0; unsigned long last_in_cluster = 0;
int latency_ration = LATENCY_LIMIT; int latency_ration = LATENCY_LIMIT;
int n_ret = 0;
if (nr > SWAP_BATCH)
nr = SWAP_BATCH;
/* /*
* We try to cluster swap pages by allocating them sequentially * We try to cluster swap pages by allocating them sequentially
...@@ -579,8 +585,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, ...@@ -579,8 +585,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
/* SSD algorithm */ /* SSD algorithm */
if (si->cluster_info) { if (si->cluster_info) {
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
goto checks; goto checks;
else
goto scan;
} }
if (unlikely(!si->cluster_nr--)) { if (unlikely(!si->cluster_nr--)) {
...@@ -624,8 +632,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, ...@@ -624,8 +632,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
checks: checks:
if (si->cluster_info) { if (si->cluster_info) {
while (scan_swap_map_ssd_cluster_conflict(si, offset)) while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); /* take a break if we already got some slots */
if (n_ret)
goto done;
if (!scan_swap_map_try_ssd_cluster(si, &offset,
&scan_base))
goto scan;
}
} }
if (!(si->flags & SWP_WRITEOK)) if (!(si->flags & SWP_WRITEOK))
goto no_page; goto no_page;
...@@ -650,7 +664,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, ...@@ -650,7 +664,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
if (si->swap_map[offset]) { if (si->swap_map[offset]) {
unlock_cluster(ci); unlock_cluster(ci);
if (!n_ret)
goto scan; goto scan;
else
goto done;
} }
if (offset == si->lowest_bit) if (offset == si->lowest_bit)
...@@ -669,9 +686,43 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, ...@@ -669,9 +686,43 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
inc_cluster_info_page(si, si->cluster_info, offset); inc_cluster_info_page(si, si->cluster_info, offset);
unlock_cluster(ci); unlock_cluster(ci);
si->cluster_next = offset + 1; si->cluster_next = offset + 1;
si->flags -= SWP_SCANNING; slots[n_ret++] = swp_entry(si->type, offset);
/* got enough slots or reach max slots? */
if ((n_ret == nr) || (offset >= si->highest_bit))
goto done;
/* search for next available slot */
/* time to take a break? */
if (unlikely(--latency_ration < 0)) {
if (n_ret)
goto done;
spin_unlock(&si->lock);
cond_resched();
spin_lock(&si->lock);
latency_ration = LATENCY_LIMIT;
}
/* try to get more slots in cluster */
if (si->cluster_info) {
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
goto checks;
else
goto done;
}
/* non-ssd case */
++offset;
return offset; /* non-ssd case, still more slots in cluster? */
if (si->cluster_nr && !si->swap_map[offset]) {
--si->cluster_nr;
goto checks;
}
done:
si->flags -= SWP_SCANNING;
return n_ret;
scan: scan:
spin_unlock(&si->lock); spin_unlock(&si->lock);
...@@ -709,17 +760,41 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, ...@@ -709,17 +760,41 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
no_page: no_page:
si->flags -= SWP_SCANNING; si->flags -= SWP_SCANNING;
return n_ret;
}
static unsigned long scan_swap_map(struct swap_info_struct *si,
unsigned char usage)
{
swp_entry_t entry;
int n_ret;
n_ret = scan_swap_map_slots(si, usage, 1, &entry);
if (n_ret)
return swp_offset(entry);
else
return 0; return 0;
} }
swp_entry_t get_swap_page(void) int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
{ {
struct swap_info_struct *si, *next; struct swap_info_struct *si, *next;
pgoff_t offset; long avail_pgs;
int n_ret = 0;
if (atomic_long_read(&nr_swap_pages) <= 0) avail_pgs = atomic_long_read(&nr_swap_pages);
if (avail_pgs <= 0)
goto noswap; goto noswap;
atomic_long_dec(&nr_swap_pages);
if (n_goal > SWAP_BATCH)
n_goal = SWAP_BATCH;
if (n_goal > avail_pgs)
n_goal = avail_pgs;
atomic_long_sub(n_goal, &nr_swap_pages);
spin_lock(&swap_avail_lock); spin_lock(&swap_avail_lock);
...@@ -745,14 +820,14 @@ swp_entry_t get_swap_page(void) ...@@ -745,14 +820,14 @@ swp_entry_t get_swap_page(void)
spin_unlock(&si->lock); spin_unlock(&si->lock);
goto nextsi; goto nextsi;
} }
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
/* This is called for allocating swap entry for cache */ n_goal, swp_entries);
offset = scan_swap_map(si, SWAP_HAS_CACHE);
spin_unlock(&si->lock); spin_unlock(&si->lock);
if (offset) if (n_ret)
return swp_entry(si->type, offset); goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n", pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type); si->type);
spin_lock(&swap_avail_lock); spin_lock(&swap_avail_lock);
nextsi: nextsi:
/* /*
...@@ -763,7 +838,8 @@ swp_entry_t get_swap_page(void) ...@@ -763,7 +838,8 @@ swp_entry_t get_swap_page(void)
* up between us dropping swap_avail_lock and taking si->lock. * up between us dropping swap_avail_lock and taking si->lock.
* Since we dropped the swap_avail_lock, the swap_avail_head * Since we dropped the swap_avail_lock, the swap_avail_head
* list may have been modified; so if next is still in the * list may have been modified; so if next is still in the
* swap_avail_head list then try it, otherwise start over. * swap_avail_head list then try it, otherwise start over
* if we have not gotten any slots.
*/ */
if (plist_node_empty(&next->avail_list)) if (plist_node_empty(&next->avail_list))
goto start_over; goto start_over;
...@@ -771,9 +847,19 @@ swp_entry_t get_swap_page(void) ...@@ -771,9 +847,19 @@ swp_entry_t get_swap_page(void)
spin_unlock(&swap_avail_lock); spin_unlock(&swap_avail_lock);
atomic_long_inc(&nr_swap_pages); check_out:
if (n_ret < n_goal)
atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
noswap: noswap:
return (swp_entry_t) {0}; return n_ret;
}
swp_entry_t get_swap_page(void)
{
swp_entry_t entry;
get_swap_pages(1, &entry);
return entry;
} }
/* The only caller of this function is now suspend routine */ /* The only caller of this function is now suspend routine */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment