Commit d1dc6f1b authored by Dan Streetman's avatar Dan Streetman Committed by Linus Torvalds

frontswap: allow multiple backends

Change frontswap single pointer to a singly linked list of frontswap
implementations.  Update Xen tmem implementation as register no longer
returns anything.

Frontswap only keeps track of a single implementation; any
implementation that registers second (or later) will replace the
previously registered implementation, and gets a pointer to the previous
implementation that the new implementation is expected to pass all
frontswap functions to if it can't handle the function itself.  However
that method doesn't really make much sense, as passing that work on to
every implementation adds unnecessary work to implementations; instead,
frontswap should simply keep a list of all registered implementations
and try each implementation for any function.  Most importantly, neither
of the two currently existing frontswap implementations in the kernel
actually do anything with any previous frontswap implementation that
they replace when registering.

This allows frontswap to successfully manage multiple implementations by
keeping a list of them all.
Signed-off-by: default avatarDan Streetman <ddstreet@ieee.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent b05b9f5f
...@@ -381,15 +381,9 @@ static int __init xen_tmem_init(void) ...@@ -381,15 +381,9 @@ static int __init xen_tmem_init(void)
#ifdef CONFIG_FRONTSWAP #ifdef CONFIG_FRONTSWAP
if (tmem_enabled && frontswap) { if (tmem_enabled && frontswap) {
char *s = ""; char *s = "";
struct frontswap_ops *old_ops;
tmem_frontswap_poolid = -1; tmem_frontswap_poolid = -1;
old_ops = frontswap_register_ops(&tmem_frontswap_ops); frontswap_register_ops(&tmem_frontswap_ops);
if (IS_ERR(old_ops) || old_ops) {
if (IS_ERR(old_ops))
return PTR_ERR(old_ops);
s = " (WARNING: frontswap_ops overridden)";
}
pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n", pr_info("frontswap enabled, RAM provided by Xen Transcendent Memory%s\n",
s); s);
} }
......
...@@ -6,16 +6,16 @@ ...@@ -6,16 +6,16 @@
#include <linux/bitops.h> #include <linux/bitops.h>
struct frontswap_ops { struct frontswap_ops {
void (*init)(unsigned); void (*init)(unsigned); /* this swap type was just swapon'ed */
int (*store)(unsigned, pgoff_t, struct page *); int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
int (*load)(unsigned, pgoff_t, struct page *); int (*load)(unsigned, pgoff_t, struct page *); /* load a page */
void (*invalidate_page)(unsigned, pgoff_t); void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */
void (*invalidate_area)(unsigned); void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */
struct frontswap_ops *next; /* private pointer to next ops */
}; };
extern bool frontswap_enabled; extern bool frontswap_enabled;
extern struct frontswap_ops * extern void frontswap_register_ops(struct frontswap_ops *ops);
frontswap_register_ops(struct frontswap_ops *ops);
extern void frontswap_shrink(unsigned long); extern void frontswap_shrink(unsigned long);
extern unsigned long frontswap_curr_pages(void); extern unsigned long frontswap_curr_pages(void);
extern void frontswap_writethrough(bool); extern void frontswap_writethrough(bool);
......
...@@ -21,11 +21,16 @@ ...@@ -21,11 +21,16 @@
#include <linux/swapfile.h> #include <linux/swapfile.h>
/* /*
* frontswap_ops is set by frontswap_register_ops to contain the pointers * frontswap_ops are added by frontswap_register_ops, and provide the
* to the frontswap "backend" implementation functions. * frontswap "backend" implementation functions. Multiple implementations
* may be registered, but implementations can never deregister. This
* is a simple singly-linked list of all registered implementations.
*/ */
static struct frontswap_ops *frontswap_ops __read_mostly; static struct frontswap_ops *frontswap_ops __read_mostly;
#define for_each_frontswap_ops(ops) \
for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next)
/* /*
* If enabled, frontswap_store will return failure even on success. As * If enabled, frontswap_store will return failure even on success. As
* a result, the swap subsystem will always write the page to swap, in * a result, the swap subsystem will always write the page to swap, in
...@@ -79,15 +84,6 @@ static inline void inc_frontswap_invalidates(void) { } ...@@ -79,15 +84,6 @@ static inline void inc_frontswap_invalidates(void) { }
* on all frontswap functions to not call the backend until the backend * on all frontswap functions to not call the backend until the backend
* has registered. * has registered.
* *
* Specifically when no backend is registered (nobody called
* frontswap_register_ops) all calls to frontswap_init (which is done via
* swapon -> enable_swap_info -> frontswap_init) are registered and remembered
* (via the setting of need_init bitmap) but fail to create tmem_pools. When a
* backend registers with frontswap at some later point the previous
* calls to frontswap_init are executed (by iterating over the need_init
* bitmap) to create tmem_pools and set the respective poolids. All of that is
* guarded by us using atomic bit operations on the 'need_init' bitmap.
*
* This would not guards us against the user deciding to call swapoff right as * This would not guards us against the user deciding to call swapoff right as
* we are calling the backend to initialize (so swapon is in action). * we are calling the backend to initialize (so swapon is in action).
* Fortunatly for us, the swapon_mutex has been taked by the callee so we are * Fortunatly for us, the swapon_mutex has been taked by the callee so we are
...@@ -106,37 +102,64 @@ static inline void inc_frontswap_invalidates(void) { } ...@@ -106,37 +102,64 @@ static inline void inc_frontswap_invalidates(void) { }
* *
* Obviously the opposite (unloading the backend) must be done after all * Obviously the opposite (unloading the backend) must be done after all
* the frontswap_[store|load|invalidate_area|invalidate_page] start * the frontswap_[store|load|invalidate_area|invalidate_page] start
* ignorning or failing the requests - at which point frontswap_ops * ignoring or failing the requests. However, there is currently no way
* would have to be made in some fashion atomic. * to unload a backend once it is registered.
*/ */
static DECLARE_BITMAP(need_init, MAX_SWAPFILES);
/* /*
* Register operations for frontswap, returning previous thus allowing * Register operations for frontswap
* detection of multiple backends and possible nesting.
*/ */
struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) void frontswap_register_ops(struct frontswap_ops *ops)
{ {
struct frontswap_ops *old = frontswap_ops; DECLARE_BITMAP(a, MAX_SWAPFILES);
int i; DECLARE_BITMAP(b, MAX_SWAPFILES);
struct swap_info_struct *si;
unsigned int i;
for (i = 0; i < MAX_SWAPFILES; i++) { bitmap_zero(a, MAX_SWAPFILES);
if (test_and_clear_bit(i, need_init)) { bitmap_zero(b, MAX_SWAPFILES);
struct swap_info_struct *sis = swap_info[i];
/* __frontswap_init _should_ have set it! */ spin_lock(&swap_lock);
if (!sis->frontswap_map) plist_for_each_entry(si, &swap_active_head, list) {
return ERR_PTR(-EINVAL); if (!WARN_ON(!si->frontswap_map))
ops->init(i); set_bit(si->type, a);
} }
spin_unlock(&swap_lock);
/* the new ops needs to know the currently active swap devices */
for_each_set_bit(i, a, MAX_SWAPFILES)
ops->init(i);
/*
* Setting frontswap_ops must happen after the ops->init() calls
* above; cmpxchg implies smp_mb() which will ensure the init is
* complete at this point.
*/
do {
ops->next = frontswap_ops;
} while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
spin_lock(&swap_lock);
plist_for_each_entry(si, &swap_active_head, list) {
if (si->frontswap_map)
set_bit(si->type, b);
} }
spin_unlock(&swap_lock);
/* /*
* We MUST have frontswap_ops set _after_ the frontswap_init's * On the very unlikely chance that a swap device was added or
* have been called. Otherwise __frontswap_store might fail. Hence * removed between setting the "a" list bits and the ops init
* the barrier to make sure compiler does not re-order us. * calls, we re-check and do init or invalidate for any changed
* bits.
*/ */
barrier(); if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) {
frontswap_ops = ops; for (i = 0; i < MAX_SWAPFILES; i++) {
return old; if (!test_bit(i, a) && test_bit(i, b))
ops->init(i);
else if (test_bit(i, a) && !test_bit(i, b))
ops->invalidate_area(i);
}
}
} }
EXPORT_SYMBOL(frontswap_register_ops); EXPORT_SYMBOL(frontswap_register_ops);
...@@ -164,6 +187,7 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); ...@@ -164,6 +187,7 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
void __frontswap_init(unsigned type, unsigned long *map) void __frontswap_init(unsigned type, unsigned long *map)
{ {
struct swap_info_struct *sis = swap_info[type]; struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
BUG_ON(sis == NULL); BUG_ON(sis == NULL);
...@@ -179,26 +203,28 @@ void __frontswap_init(unsigned type, unsigned long *map) ...@@ -179,26 +203,28 @@ void __frontswap_init(unsigned type, unsigned long *map)
* p->frontswap set to something valid to work properly. * p->frontswap set to something valid to work properly.
*/ */
frontswap_map_set(sis, map); frontswap_map_set(sis, map);
if (frontswap_ops)
frontswap_ops->init(type); for_each_frontswap_ops(ops)
else { ops->init(type);
BUG_ON(type >= MAX_SWAPFILES);
set_bit(type, need_init);
}
} }
EXPORT_SYMBOL(__frontswap_init); EXPORT_SYMBOL(__frontswap_init);
bool __frontswap_test(struct swap_info_struct *sis, bool __frontswap_test(struct swap_info_struct *sis,
pgoff_t offset) pgoff_t offset)
{ {
bool ret = false; if (sis->frontswap_map)
return test_bit(offset, sis->frontswap_map);
if (frontswap_ops && sis->frontswap_map) return false;
ret = test_bit(offset, sis->frontswap_map);
return ret;
} }
EXPORT_SYMBOL(__frontswap_test); EXPORT_SYMBOL(__frontswap_test);
static inline void __frontswap_set(struct swap_info_struct *sis,
pgoff_t offset)
{
set_bit(offset, sis->frontswap_map);
atomic_inc(&sis->frontswap_pages);
}
static inline void __frontswap_clear(struct swap_info_struct *sis, static inline void __frontswap_clear(struct swap_info_struct *sis,
pgoff_t offset) pgoff_t offset)
{ {
...@@ -215,39 +241,46 @@ static inline void __frontswap_clear(struct swap_info_struct *sis, ...@@ -215,39 +241,46 @@ static inline void __frontswap_clear(struct swap_info_struct *sis,
*/ */
int __frontswap_store(struct page *page) int __frontswap_store(struct page *page)
{ {
int ret = -1, dup = 0; int ret = -1;
swp_entry_t entry = { .val = page_private(page), }; swp_entry_t entry = { .val = page_private(page), };
int type = swp_type(entry); int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type]; struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry); pgoff_t offset = swp_offset(entry);
struct frontswap_ops *ops;
/* /*
* Return if no backend registed. * Return if no backend registed.
* Don't need to inc frontswap_failed_stores here. * Don't need to inc frontswap_failed_stores here.
*/ */
if (!frontswap_ops) if (!frontswap_ops)
return ret; return -1;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL); BUG_ON(sis == NULL);
if (__frontswap_test(sis, offset))
dup = 1;
ret = frontswap_ops->store(type, offset, page);
if (ret == 0) {
set_bit(offset, sis->frontswap_map);
inc_frontswap_succ_stores();
if (!dup)
atomic_inc(&sis->frontswap_pages);
} else {
/* /*
failed dup always results in automatic invalidate of * If a dup, we must remove the old page first; we can't leave the
the (older) page from frontswap * old page no matter if the store of the new page succeeds or fails,
* and we can't rely on the new page replacing the old page as we may
* not store to the same implementation that contains the old page.
*/ */
inc_frontswap_failed_stores(); if (__frontswap_test(sis, offset)) {
if (dup) {
__frontswap_clear(sis, offset); __frontswap_clear(sis, offset);
frontswap_ops->invalidate_page(type, offset); for_each_frontswap_ops(ops)
ops->invalidate_page(type, offset);
}
/* Try to store in each implementation, until one succeeds. */
for_each_frontswap_ops(ops) {
ret = ops->store(type, offset, page);
if (!ret) /* successful store */
break;
} }
if (ret == 0) {
__frontswap_set(sis, offset);
inc_frontswap_succ_stores();
} else {
inc_frontswap_failed_stores();
} }
if (frontswap_writethrough_enabled) if (frontswap_writethrough_enabled)
/* report failure so swap also writes to swap device */ /* report failure so swap also writes to swap device */
...@@ -268,14 +301,22 @@ int __frontswap_load(struct page *page) ...@@ -268,14 +301,22 @@ int __frontswap_load(struct page *page)
int type = swp_type(entry); int type = swp_type(entry);
struct swap_info_struct *sis = swap_info[type]; struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry); pgoff_t offset = swp_offset(entry);
struct frontswap_ops *ops;
if (!frontswap_ops)
return -1;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL); BUG_ON(sis == NULL);
/* if (!__frontswap_test(sis, offset))
* __frontswap_test() will check whether there is backend registered return -1;
*/
if (__frontswap_test(sis, offset)) /* Try loading from each implementation, until one succeeds. */
ret = frontswap_ops->load(type, offset, page); for_each_frontswap_ops(ops) {
ret = ops->load(type, offset, page);
if (!ret) /* successful load */
break;
}
if (ret == 0) { if (ret == 0) {
inc_frontswap_loads(); inc_frontswap_loads();
if (frontswap_tmem_exclusive_gets_enabled) { if (frontswap_tmem_exclusive_gets_enabled) {
...@@ -294,16 +335,19 @@ EXPORT_SYMBOL(__frontswap_load); ...@@ -294,16 +335,19 @@ EXPORT_SYMBOL(__frontswap_load);
void __frontswap_invalidate_page(unsigned type, pgoff_t offset) void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
{ {
struct swap_info_struct *sis = swap_info[type]; struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
if (!frontswap_ops)
return;
BUG_ON(sis == NULL); BUG_ON(sis == NULL);
/* if (!__frontswap_test(sis, offset))
* __frontswap_test() will check whether there is backend registered return;
*/
if (__frontswap_test(sis, offset)) { for_each_frontswap_ops(ops)
frontswap_ops->invalidate_page(type, offset); ops->invalidate_page(type, offset);
__frontswap_clear(sis, offset); __frontswap_clear(sis, offset);
inc_frontswap_invalidates(); inc_frontswap_invalidates();
}
} }
EXPORT_SYMBOL(__frontswap_invalidate_page); EXPORT_SYMBOL(__frontswap_invalidate_page);
...@@ -314,16 +358,19 @@ EXPORT_SYMBOL(__frontswap_invalidate_page); ...@@ -314,16 +358,19 @@ EXPORT_SYMBOL(__frontswap_invalidate_page);
void __frontswap_invalidate_area(unsigned type) void __frontswap_invalidate_area(unsigned type)
{ {
struct swap_info_struct *sis = swap_info[type]; struct swap_info_struct *sis = swap_info[type];
struct frontswap_ops *ops;
if (!frontswap_ops)
return;
if (frontswap_ops) {
BUG_ON(sis == NULL); BUG_ON(sis == NULL);
if (sis->frontswap_map == NULL) if (sis->frontswap_map == NULL)
return; return;
frontswap_ops->invalidate_area(type);
for_each_frontswap_ops(ops)
ops->invalidate_area(type);
atomic_set(&sis->frontswap_pages, 0); atomic_set(&sis->frontswap_pages, 0);
bitmap_zero(sis->frontswap_map, sis->max); bitmap_zero(sis->frontswap_map, sis->max);
}
clear_bit(type, need_init);
} }
EXPORT_SYMBOL(__frontswap_invalidate_area); EXPORT_SYMBOL(__frontswap_invalidate_area);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment