Commit c43cf3ea authored by Jennifer Herbert's avatar Jennifer Herbert Committed by David Vrabel

xen-blkback: safely unmap grants in case they are still in use

Use gnttab_unmap_refs_async() to wait until the mapped pages are no
longer in use before unmapping them.

This allows blkback to use network storage which may retain refs to
pages in queued skbs after the block I/O has completed.
Signed-off-by: default avatarJennifer Herbert <jennifer.herbert@citrix.com>
Acked-by: default avatarRoger Pau Monné <roger.pau@citrix.com>
Acked-by: default avatarJens Axboe <axboe@kernel.de>
Signed-off-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
parent 74528225
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <asm/xen/hypervisor.h> #include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include <xen/balloon.h> #include <xen/balloon.h>
#include <xen/grant_table.h>
#include "common.h" #include "common.h"
/* /*
...@@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif, ...@@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif,
atomic_dec(&blkif->persistent_gnt_in_use); atomic_dec(&blkif->persistent_gnt_in_use);
} }
static void free_persistent_gnts_unmap_callback(int result,
struct gntab_unmap_queue_data *data)
{
struct completion *c = data->data;
/* BUG_ON used to reproduce existing behaviour,
but is this the best way to deal with this? */
BUG_ON(result);
complete(c);
}
static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
unsigned int num) unsigned int num)
{ {
...@@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, ...@@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt; struct persistent_gnt *persistent_gnt;
struct rb_node *n; struct rb_node *n;
int ret = 0;
int segs_to_unmap = 0; int segs_to_unmap = 0;
struct gntab_unmap_queue_data unmap_data;
struct completion unmap_completion;
init_completion(&unmap_completion);
unmap_data.data = &unmap_completion;
unmap_data.done = &free_persistent_gnts_unmap_callback;
unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;
foreach_grant_safe(persistent_gnt, n, root, node) { foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle == BUG_ON(persistent_gnt->handle ==
...@@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, ...@@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
!rb_next(&persistent_gnt->node)) { !rb_next(&persistent_gnt->node)) {
ret = gnttab_unmap_refs(unmap, NULL, pages,
segs_to_unmap); unmap_data.count = segs_to_unmap;
BUG_ON(ret); gnttab_unmap_refs_async(&unmap_data);
wait_for_completion(&unmap_completion);
put_free_pages(blkif, pages, segs_to_unmap); put_free_pages(blkif, pages, segs_to_unmap);
segs_to_unmap = 0; segs_to_unmap = 0;
} }
...@@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif) ...@@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif)
shrink_free_pagepool(blkif, 0 /* All */); shrink_free_pagepool(blkif, 0 /* All */);
} }
/* static unsigned int xen_blkbk_unmap_prepare(
* Unmap the grant references, and also remove the M2P over-rides struct xen_blkif *blkif,
* used in the 'pending_req'. struct grant_page **pages,
*/ unsigned int num,
static void xen_blkbk_unmap(struct xen_blkif *blkif, struct gnttab_unmap_grant_ref *unmap_ops,
struct grant_page *pages[], struct page **unmap_pages)
int num)
{ {
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int i, invcount = 0; unsigned int i, invcount = 0;
int ret;
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
if (pages[i]->persistent_gnt != NULL) { if (pages[i]->persistent_gnt != NULL) {
...@@ -674,22 +693,96 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, ...@@ -674,22 +693,96 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif,
if (pages[i]->handle == BLKBACK_INVALID_HANDLE) if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
continue; continue;
unmap_pages[invcount] = pages[i]->page; unmap_pages[invcount] = pages[i]->page;
gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[i]->page),
GNTMAP_host_map, pages[i]->handle); GNTMAP_host_map, pages[i]->handle);
pages[i]->handle = BLKBACK_INVALID_HANDLE; pages[i]->handle = BLKBACK_INVALID_HANDLE;
if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { invcount++;
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
invcount);
BUG_ON(ret);
put_free_pages(blkif, unmap_pages, invcount);
invcount = 0;
} }
return invcount;
}
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
{
struct pending_req* pending_req = (struct pending_req*) (data->data);
struct xen_blkif *blkif = pending_req->blkif;
/* BUG_ON used to reproduce existing behaviour,
but is this the best way to deal with this? */
BUG_ON(result);
put_free_pages(blkif, data->pages, data->count);
make_response(blkif, pending_req->id,
pending_req->operation, pending_req->status);
free_req(blkif, pending_req);
/*
* Make sure the request is freed before releasing blkif,
* or there could be a race between free_req and the
* cleanup done in xen_blkif_free during shutdown.
*
* NB: The fact that we might try to wake up pending_free_wq
* before drain_complete (in case there's a drain going on)
* it's not a problem with our current implementation
* because we can assure there's no thread waiting on
* pending_free_wq if there's a drain going on, but it has
* to be taken into account if the current model is changed.
*/
if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
complete(&blkif->drain_complete);
} }
xen_blkif_put(blkif);
}
static void xen_blkbk_unmap_and_respond(struct pending_req *req)
{
struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
struct xen_blkif *blkif = req->blkif;
struct grant_page **pages = req->segments;
unsigned int invcount;
invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
req->unmap, req->unmap_pages);
work->data = req;
work->done = xen_blkbk_unmap_and_respond_callback;
work->unmap_ops = req->unmap;
work->kunmap_ops = NULL;
work->pages = req->unmap_pages;
work->count = invcount;
gnttab_unmap_refs_async(&req->gnttab_unmap_data);
}
/*
* Unmap the grant references.
*
* This could accumulate ops up to the batch size to reduce the number
* of hypercalls, but since this is only used in error paths there's
* no real need.
*/
static void xen_blkbk_unmap(struct xen_blkif *blkif,
struct grant_page *pages[],
int num)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unsigned int invcount = 0;
int ret;
while (num) {
unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
unmap, unmap_pages);
if (invcount) { if (invcount) {
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret); BUG_ON(ret);
put_free_pages(blkif, unmap_pages, invcount); put_free_pages(blkif, unmap_pages, invcount);
} }
pages += batch;
num -= batch;
}
} }
static int xen_blkbk_map(struct xen_blkif *blkif, static int xen_blkbk_map(struct xen_blkif *blkif,
...@@ -982,32 +1075,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) ...@@ -982,32 +1075,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
* the grant references associated with 'request' and provide * the grant references associated with 'request' and provide
* the proper response on the ring. * the proper response on the ring.
*/ */
if (atomic_dec_and_test(&pending_req->pendcnt)) { if (atomic_dec_and_test(&pending_req->pendcnt))
struct xen_blkif *blkif = pending_req->blkif; xen_blkbk_unmap_and_respond(pending_req);
xen_blkbk_unmap(blkif,
pending_req->segments,
pending_req->nr_pages);
make_response(blkif, pending_req->id,
pending_req->operation, pending_req->status);
free_req(blkif, pending_req);
/*
* Make sure the request is freed before releasing blkif,
* or there could be a race between free_req and the
* cleanup done in xen_blkif_free during shutdown.
*
* NB: The fact that we might try to wake up pending_free_wq
* before drain_complete (in case there's a drain going on)
* it's not a problem with our current implementation
* because we can assure there's no thread waiting on
* pending_free_wq if there's a drain going on, but it has
* to be taken into account if the current model is changed.
*/
if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
complete(&blkif->drain_complete);
}
xen_blkif_put(blkif);
}
} }
/* /*
......
...@@ -350,6 +350,9 @@ struct pending_req { ...@@ -350,6 +350,9 @@ struct pending_req {
struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; struct grant_page *indirect_pages[MAX_INDIRECT_PAGES];
struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; struct seg_buf seg[MAX_INDIRECT_SEGMENTS];
struct bio *biolist[MAX_INDIRECT_SEGMENTS]; struct bio *biolist[MAX_INDIRECT_SEGMENTS];
struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS];
struct page *unmap_pages[MAX_INDIRECT_SEGMENTS];
struct gntab_unmap_queue_data gnttab_unmap_data;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment