Commit e9f7bee1 authored by Trond Myklebust's avatar Trond Myklebust Committed by Linus Torvalds

[PATCH] NFS: large non-page-aligned direct I/O clobbers memory

The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can
allow data->npages to be one larger than rpages.  This causes a page
pointer to be written beyond the end of the pagevec in nfs_read_data (or
nfs_write_data).

Fix this by making nfs_(read|write)_alloc() calculate the size of the
pagevec array, and initialise data->npages.

Also get rid of the redundant argument to nfs_commit_alloc().
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 016eb4a0
...@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) ...@@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count); return atomic_dec_and_test(&dreq->io_count);
} }
/*
* "size" is never larger than rsize or wsize.
*/
static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size)
{
int page_count;
page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
page_count -= user_addr >> PAGE_SHIFT;
BUG_ON(page_count < 0);
return page_count;
}
static inline unsigned int nfs_max_pages(unsigned int size)
{
return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}
/** /**
* nfs_direct_IO - NFS address space operation for direct I/O * nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write) * @rw: direction (read or write)
...@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo ...@@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
struct nfs_open_context *ctx = dreq->ctx; struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode; struct inode *inode = ctx->dentry->d_inode;
size_t rsize = NFS_SERVER(inode)->rsize; size_t rsize = NFS_SERVER(inode)->rsize;
unsigned int rpages = nfs_max_pages(rsize);
unsigned int pgbase; unsigned int pgbase;
int result; int result;
ssize_t started = 0; ssize_t started = 0;
get_dreq(dreq); get_dreq(dreq);
pgbase = user_addr & ~PAGE_MASK;
do { do {
struct nfs_read_data *data; struct nfs_read_data *data;
size_t bytes; size_t bytes;
pgbase = user_addr & ~PAGE_MASK;
bytes = min(rsize,count);
result = -ENOMEM; result = -ENOMEM;
data = nfs_readdata_alloc(rpages); data = nfs_readdata_alloc(pgbase + bytes);
if (unlikely(!data)) if (unlikely(!data))
break; break;
bytes = rsize;
if (count < rsize)
bytes = count;
data->npages = nfs_direct_count_pages(user_addr, bytes);
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr, result = get_user_pages(current, current->mm, user_addr,
data->npages, 1, 0, data->pagevec, NULL); data->npages, 1, 0, data->pagevec, NULL);
...@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo ...@@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
started += bytes; started += bytes;
user_addr += bytes; user_addr += bytes;
pos += bytes; pos += bytes;
/* FIXME: Remove this unnecessary math from final patch */
pgbase += bytes; pgbase += bytes;
pgbase &= ~PAGE_MASK; pgbase &= ~PAGE_MASK;
BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes; count -= bytes;
} while (count != 0); } while (count != 0);
...@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode ...@@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
{ {
dreq->commit_data = nfs_commit_alloc(0); dreq->commit_data = nfs_commit_alloc();
if (dreq->commit_data != NULL) if (dreq->commit_data != NULL)
dreq->commit_data->req = (struct nfs_page *) dreq; dreq->commit_data->req = (struct nfs_page *) dreq;
} }
...@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l ...@@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
struct nfs_open_context *ctx = dreq->ctx; struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode; struct inode *inode = ctx->dentry->d_inode;
size_t wsize = NFS_SERVER(inode)->wsize; size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int wpages = nfs_max_pages(wsize);
unsigned int pgbase; unsigned int pgbase;
int result; int result;
ssize_t started = 0; ssize_t started = 0;
get_dreq(dreq); get_dreq(dreq);
pgbase = user_addr & ~PAGE_MASK;
do { do {
struct nfs_write_data *data; struct nfs_write_data *data;
size_t bytes; size_t bytes;
pgbase = user_addr & ~PAGE_MASK;
bytes = min(wsize,count);
result = -ENOMEM; result = -ENOMEM;
data = nfs_writedata_alloc(wpages); data = nfs_writedata_alloc(pgbase + bytes);
if (unlikely(!data)) if (unlikely(!data))
break; break;
bytes = wsize;
if (count < wsize)
bytes = count;
data->npages = nfs_direct_count_pages(user_addr, bytes);
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr, result = get_user_pages(current, current->mm, user_addr,
data->npages, 0, 0, data->pagevec, NULL); data->npages, 0, 0, data->pagevec, NULL);
...@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l ...@@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
started += bytes; started += bytes;
user_addr += bytes; user_addr += bytes;
pos += bytes; pos += bytes;
/* FIXME: Remove this useless math from the final patch */
pgbase += bytes; pgbase += bytes;
pgbase &= ~PAGE_MASK; pgbase &= ~PAGE_MASK;
BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
count -= bytes; count -= bytes;
} while (count != 0); } while (count != 0);
......
...@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool; ...@@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32) #define MIN_POOL_READ (32)
struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) struct nfs_read_data *nfs_readdata_alloc(size_t len)
{ {
unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p) { if (p) {
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages); INIT_LIST_HEAD(&p->pages);
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array)) if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array; p->pagevec = p->page_array;
else { else {
...@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, ...@@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result; int result;
struct nfs_read_data *rdata; struct nfs_read_data *rdata;
rdata = nfs_readdata_alloc(1); rdata = nfs_readdata_alloc(count);
if (!rdata) if (!rdata)
return -ENOMEM; return -ENOMEM;
...@@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) ...@@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
struct nfs_page *req = nfs_list_entry(head->next); struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page; struct page *page = req->wb_page;
struct nfs_read_data *data; struct nfs_read_data *data;
unsigned int rsize = NFS_SERVER(inode)->rsize; size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
unsigned int nbytes, offset; unsigned int offset;
int requests = 0; int requests = 0;
LIST_HEAD(list); LIST_HEAD(list);
nfs_list_remove_request(req); nfs_list_remove_request(req);
nbytes = req->wb_bytes; nbytes = req->wb_bytes;
for(;;) { do {
data = nfs_readdata_alloc(1); size_t len = min(nbytes,rsize);
data = nfs_readdata_alloc(len);
if (!data) if (!data)
goto out_bad; goto out_bad;
INIT_LIST_HEAD(&data->pages); INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, &list); list_add(&data->pages, &list);
requests++; requests++;
if (nbytes <= rsize) nbytes -= len;
break; } while(nbytes != 0);
nbytes -= rsize;
}
atomic_set(&req->wb_complete, requests); atomic_set(&req->wb_complete, requests);
ClearPageError(page); ClearPageError(page);
...@@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) ...@@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
return nfs_pagein_multi(head, inode); return nfs_pagein_multi(head, inode);
data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
if (!data) if (!data)
goto out_bad; goto out_bad;
......
...@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool; ...@@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) struct nfs_write_data *nfs_commit_alloc(void)
{ {
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) { if (p) {
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages); INIT_LIST_HEAD(&p->pages);
if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array;
else {
p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
if (!p->pagevec) {
mempool_free(p, nfs_commit_mempool);
p = NULL;
}
}
} }
return p; return p;
} }
...@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p) ...@@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p)
mempool_free(p, nfs_commit_mempool); mempool_free(p, nfs_commit_mempool);
} }
struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) struct nfs_write_data *nfs_writedata_alloc(size_t len)
{ {
unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) { if (p) {
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages); INIT_LIST_HEAD(&p->pages);
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array)) if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array; p->pagevec = p->page_array;
else { else {
...@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ...@@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result, written = 0; int result, written = 0;
struct nfs_write_data *wdata; struct nfs_write_data *wdata;
wdata = nfs_writedata_alloc(1); wdata = nfs_writedata_alloc(wsize);
if (!wdata) if (!wdata)
return -ENOMEM; return -ENOMEM;
...@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) ...@@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
struct nfs_page *req = nfs_list_entry(head->next); struct nfs_page *req = nfs_list_entry(head->next);
struct page *page = req->wb_page; struct page *page = req->wb_page;
struct nfs_write_data *data; struct nfs_write_data *data;
unsigned int wsize = NFS_SERVER(inode)->wsize; size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
unsigned int nbytes, offset; unsigned int offset;
int requests = 0; int requests = 0;
LIST_HEAD(list); LIST_HEAD(list);
nfs_list_remove_request(req); nfs_list_remove_request(req);
nbytes = req->wb_bytes; nbytes = req->wb_bytes;
for (;;) { do {
data = nfs_writedata_alloc(1); size_t len = min(nbytes, wsize);
data = nfs_writedata_alloc(len);
if (!data) if (!data)
goto out_bad; goto out_bad;
list_add(&data->pages, &list); list_add(&data->pages, &list);
requests++; requests++;
if (nbytes <= wsize) nbytes -= len;
break; } while (nbytes != 0);
nbytes -= wsize;
}
atomic_set(&req->wb_complete, requests); atomic_set(&req->wb_complete, requests);
ClearPageError(page); ClearPageError(page);
...@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) ...@@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
struct nfs_write_data *data; struct nfs_write_data *data;
unsigned int count; unsigned int count;
data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize);
if (!data) if (!data)
goto out_bad; goto out_bad;
...@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) ...@@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
struct nfs_write_data *data; struct nfs_write_data *data;
struct nfs_page *req; struct nfs_page *req;
data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); data = nfs_commit_alloc();
if (!data) if (!data)
goto out_bad; goto out_bad;
......
...@@ -427,7 +427,7 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); ...@@ -427,7 +427,7 @@ extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
extern void nfs_writedata_release(void *); extern void nfs_writedata_release(void *);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount); struct nfs_write_data *nfs_commit_alloc(void);
void nfs_commit_free(struct nfs_write_data *p); void nfs_commit_free(struct nfs_write_data *p);
#endif #endif
...@@ -478,7 +478,7 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) ...@@ -478,7 +478,7 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
/* /*
* Allocate nfs_write_data structures * Allocate nfs_write_data structures
*/ */
extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount); extern struct nfs_write_data *nfs_writedata_alloc(size_t len);
/* /*
* linux/fs/nfs/read.c * linux/fs/nfs/read.c
...@@ -492,7 +492,7 @@ extern void nfs_readdata_release(void *data); ...@@ -492,7 +492,7 @@ extern void nfs_readdata_release(void *data);
/* /*
* Allocate nfs_read_data structures * Allocate nfs_read_data structures
*/ */
extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount); extern struct nfs_read_data *nfs_readdata_alloc(size_t len);
/* /*
* linux/fs/nfs3proc.c * linux/fs/nfs3proc.c
......
...@@ -729,7 +729,7 @@ struct nfs_read_data { ...@@ -729,7 +729,7 @@ struct nfs_read_data {
struct list_head pages; /* Coalesced read requests */ struct list_head pages; /* Coalesced read requests */
struct nfs_page *req; /* multi ops per nfs_page */ struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec; struct page **pagevec;
unsigned int npages; /* active pages in pagevec */ unsigned int npages; /* Max length of pagevec */
struct nfs_readargs args; struct nfs_readargs args;
struct nfs_readres res; struct nfs_readres res;
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
...@@ -748,7 +748,7 @@ struct nfs_write_data { ...@@ -748,7 +748,7 @@ struct nfs_write_data {
struct list_head pages; /* Coalesced requests we wish to flush */ struct list_head pages; /* Coalesced requests we wish to flush */
struct nfs_page *req; /* multi ops per nfs_page */ struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec; struct page **pagevec;
unsigned int npages; /* active pages in pagevec */ unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */ struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */ struct nfs_writeres res; /* result struct */
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment