Commit a12587b0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

NFS client bugfixes and cleanups for Linux 3.3 (pull 2)

* tag 'nfs-for-3.3-2' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  pnfsblock: alloc short extent before submit bio
  pnfsblock: remove rpc_call_ops from struct parallel_io
  pnfsblock: move find lock page logic out of bl_write_pagelist
  pnfsblock: cleanup bl_mark_sectors_init
  pnfsblock: limit bio page count
  pnfsblock: don't spinlock when freeing block_dev
  pnfsblock: clean up _add_entry
  pnfsblock: set read/write tk_status to pnfs_error
  pnfsblock: acquire im_lock in _preload_range
  NFS4: fix compile warnings in nfs4proc.c
  nfs: check for integer overflow in decode_devicenotify_args()
  NFS: cleanup endian type in decode_ds_addr()
  NFS: add an endian notation
parents adfeb6e9 7c5465d6
...@@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect) ...@@ -90,9 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/ */
struct parallel_io { struct parallel_io {
struct kref refcnt; struct kref refcnt;
struct rpc_call_ops call_ops; void (*pnfs_callback) (void *data, int num_se);
void (*pnfs_callback) (void *data);
void *data; void *data;
int bse_count;
}; };
static inline struct parallel_io *alloc_parallel(void *data) static inline struct parallel_io *alloc_parallel(void *data)
...@@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data) ...@@ -103,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) { if (rv) {
rv->data = data; rv->data = data;
kref_init(&rv->refcnt); kref_init(&rv->refcnt);
rv->bse_count = 0;
} }
return rv; return rv;
} }
...@@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref) ...@@ -117,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt); struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
p->pnfs_callback(p->data); p->pnfs_callback(p->data, p->bse_count);
kfree(p); kfree(p);
} }
...@@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect, ...@@ -146,14 +147,19 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
{ {
struct bio *bio; struct bio *bio;
npg = min(npg, BIO_MAX_PAGES);
bio = bio_alloc(GFP_NOIO, npg); bio = bio_alloc(GFP_NOIO, npg);
if (!bio) if (!bio && (current->flags & PF_MEMALLOC)) {
return NULL; while (!bio && (npg /= 2))
bio = bio_alloc(GFP_NOIO, npg);
}
if (bio) {
bio->bi_sector = isect - be->be_f_offset + be->be_v_offset; bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
bio->bi_bdev = be->be_mdev; bio->bi_bdev = be->be_mdev;
bio->bi_end_io = end_io; bio->bi_end_io = end_io;
bio->bi_private = par; bio->bi_private = par;
}
return bio; return bio;
} }
...@@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work) ...@@ -212,22 +218,15 @@ static void bl_read_cleanup(struct work_struct *work)
} }
static void static void
bl_end_par_io_read(void *data) bl_end_par_io_read(void *data, int unused)
{ {
struct nfs_read_data *rdata = data; struct nfs_read_data *rdata = data;
rdata->task.tk_status = rdata->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work); schedule_work(&rdata->task.u.tk_work);
} }
/* We don't want normal .rpc_call_done callback used, so we replace it
* with this stub.
*/
static void bl_rpc_do_nothing(struct rpc_task *task, void *calldata)
{
return;
}
static enum pnfs_try_status static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata) bl_read_pagelist(struct nfs_read_data *rdata)
{ {
...@@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata) ...@@ -247,8 +246,6 @@ bl_read_pagelist(struct nfs_read_data *rdata)
par = alloc_parallel(rdata); par = alloc_parallel(rdata);
if (!par) if (!par)
goto use_mds; goto use_mds;
par->call_ops = *rdata->mds_ops;
par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_read; par->pnfs_callback = bl_end_par_io_read;
/* At this point, we can no longer jump to use_mds */ /* At this point, we can no longer jump to use_mds */
...@@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl, ...@@ -322,6 +319,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{ {
sector_t isect, end; sector_t isect, end;
struct pnfs_block_extent *be; struct pnfs_block_extent *be;
struct pnfs_block_short_extent *se;
dprintk("%s(%llu, %u)\n", __func__, offset, count); dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0) if (count == 0)
...@@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl, ...@@ -334,8 +332,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL); be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */ BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect; len = min(end, be->be_f_offset + be->be_length) - isect;
if (be->be_state == PNFS_BLOCK_INVALID_DATA) if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
bl_mark_for_commit(be, isect, len); /* What if fails? */ se = bl_pop_one_short_extent(be->be_inval);
BUG_ON(!se);
bl_mark_for_commit(be, isect, len, se);
}
isect += len; isect += len;
bl_put_extent(be); bl_put_extent(be);
} }
...@@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err) ...@@ -357,7 +358,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page); end_page_writeback(page);
page_cache_release(page); page_cache_release(page);
} while (bvec >= bio->bi_io_vec); } while (bvec >= bio->bi_io_vec);
if (!uptodate) {
if (unlikely(!uptodate)) {
if (!wdata->pnfs_error) if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO; wdata->pnfs_error = -EIO;
pnfs_set_lo_fail(wdata->lseg); pnfs_set_lo_fail(wdata->lseg);
...@@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err) ...@@ -366,7 +368,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
put_parallel(par); put_parallel(par);
} }
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err) static void bl_end_io_write(struct bio *bio, int err)
{ {
struct parallel_io *par = bio->bi_private; struct parallel_io *par = bio->bi_private;
...@@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work) ...@@ -392,7 +393,7 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work); task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task); wdata = container_of(task, struct nfs_write_data, task);
if (!wdata->pnfs_error) { if (likely(!wdata->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */ /* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg), mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
wdata->args.offset, wdata->args.count); wdata->args.offset, wdata->args.count);
...@@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work) ...@@ -401,11 +402,16 @@ static void bl_write_cleanup(struct work_struct *work)
} }
/* Called when last of bios associated with a bl_write_pagelist call finishes */ /* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data) static void bl_end_par_io_write(void *data, int num_se)
{ {
struct nfs_write_data *wdata = data; struct nfs_write_data *wdata = data;
wdata->task.tk_status = 0; if (unlikely(wdata->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
num_se);
}
wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC; wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work); schedule_work(&wdata->task.u.tk_work);
...@@ -484,6 +490,55 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read) ...@@ -484,6 +490,55 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
return ret; return ret;
} }
/* Find or create a zeroing page marked being writeback.
* Return ERR_PTR on error, NULL to indicate skip this page and page itself
* to indicate write out.
*/
static struct page *
bl_find_get_zeroing_page(struct inode *inode, pgoff_t index,
struct pnfs_block_extent *cow_read)
{
struct page *page;
int locked = 0;
page = find_get_page(inode->i_mapping, index);
if (page)
goto check_page;
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s oom\n", __func__);
return ERR_PTR(-ENOMEM);
}
locked = 1;
check_page:
/* PageDirty: Other will write this out
* PageWriteback: Other is writing this out
* PageUptodate: It was read before
*/
if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
if (locked)
unlock_page(page);
page_cache_release(page);
return NULL;
}
if (!locked) {
lock_page(page);
locked = 1;
goto check_page;
}
if (!PageUptodate(page)) {
/* New page, readin or zero it */
init_page_for_write(page, cow_read);
}
set_page_writeback(page);
unlock_page(page);
return page;
}
static enum pnfs_try_status static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync) bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{ {
...@@ -508,9 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -508,9 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/ */
par = alloc_parallel(wdata); par = alloc_parallel(wdata);
if (!par) if (!par)
return PNFS_NOT_ATTEMPTED; goto out_mds;
par->call_ops = *wdata->mds_ops;
par->call_ops.rpc_call_done = bl_rpc_do_nothing;
par->pnfs_callback = bl_end_par_io_write; par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */ /* At this point, have to be more careful with error handling */
...@@ -518,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -518,12 +571,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read); be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) { if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__); dprintk("%s no matching extents!\n", __func__);
wdata->pnfs_error = -EINVAL; goto out_mds;
goto out;
} }
/* First page inside INVALID extent */ /* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) { if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
if (likely(!bl_push_one_short_extent(be->be_inval)))
par->bse_count++;
else
goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT; temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block); npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) & isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
...@@ -543,36 +599,16 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -543,36 +599,16 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
dprintk("%s zero %dth page: index %lu isect %llu\n", dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index, __func__, npg_zero, index,
(unsigned long long)isect); (unsigned long long)isect);
page = page = bl_find_get_zeroing_page(wdata->inode, index,
find_or_create_page(wdata->inode->i_mapping, index, cow_read);
GFP_NOFS); if (unlikely(IS_ERR(page))) {
if (!page) { wdata->pnfs_error = PTR_ERR(page);
dprintk("%s oom\n", __func__);
wdata->pnfs_error = -ENOMEM;
goto out; goto out;
} } else if (page == NULL)
/* PageDirty: Other will write this out
* PageWriteback: Other is writing this out
* PageUptodate: It was read before
* sector_initialized: already written out
*/
if (PageDirty(page) || PageWriteback(page)) {
print_page(page);
unlock_page(page);
page_cache_release(page);
goto next_page; goto next_page;
}
if (!PageUptodate(page)) {
/* New page, readin or zero it */
init_page_for_write(page, cow_read);
}
set_page_writeback(page);
unlock_page(page);
ret = bl_mark_sectors_init(be->be_inval, isect, ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS, PAGE_CACHE_SECTORS);
NULL);
if (unlikely(ret)) { if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n", dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret); __func__, ret);
...@@ -581,6 +617,19 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -581,6 +617,19 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
wdata->pnfs_error = ret; wdata->pnfs_error = ret;
goto out; goto out;
} }
if (likely(!bl_push_one_short_extent(be->be_inval)))
par->bse_count++;
else {
end_page_writeback(page);
page_cache_release(page);
wdata->pnfs_error = -ENOMEM;
goto out;
}
/* FIXME: This should be done in bi_end_io */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);
bio = bl_add_page_to_bio(bio, npg_zero, WRITE, bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be, isect, page, be,
bl_end_io_write_zero, par); bl_end_io_write_zero, par);
...@@ -589,10 +638,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -589,10 +638,6 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
bio = NULL; bio = NULL;
goto out; goto out;
} }
/* FIXME: This should be done in bi_end_io */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);
next_page: next_page:
isect += PAGE_CACHE_SECTORS; isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS;
...@@ -616,13 +661,21 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -616,13 +661,21 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
wdata->pnfs_error = -EINVAL; wdata->pnfs_error = -EINVAL;
goto out; goto out;
} }
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
if (likely(!bl_push_one_short_extent(
be->be_inval)))
par->bse_count++;
else {
wdata->pnfs_error = -ENOMEM;
goto out;
}
}
extent_length = be->be_length - extent_length = be->be_length -
(isect - be->be_f_offset); (isect - be->be_f_offset);
} }
if (be->be_state == PNFS_BLOCK_INVALID_DATA) { if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
ret = bl_mark_sectors_init(be->be_inval, isect, ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS, PAGE_CACHE_SECTORS);
NULL);
if (unlikely(ret)) { if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n", dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret); __func__, ret);
...@@ -664,6 +717,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync) ...@@ -664,6 +717,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
bl_submit_bio(WRITE, bio); bl_submit_bio(WRITE, bio);
put_parallel(par); put_parallel(par);
return PNFS_ATTEMPTED; return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
kfree(par);
return PNFS_NOT_ATTEMPTED;
} }
/* FIXME - range ignored */ /* FIXME - range ignored */
...@@ -690,11 +747,17 @@ static void ...@@ -690,11 +747,17 @@ static void
release_inval_marks(struct pnfs_inval_markings *marks) release_inval_marks(struct pnfs_inval_markings *marks)
{ {
struct pnfs_inval_tracking *pos, *temp; struct pnfs_inval_tracking *pos, *temp;
struct pnfs_block_short_extent *se, *stemp;
list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) { list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
list_del(&pos->it_link); list_del(&pos->it_link);
kfree(pos); kfree(pos);
} }
list_for_each_entry_safe(se, stemp, &marks->im_extents, bse_node) {
list_del(&se->bse_node);
kfree(se);
}
return; return;
} }
...@@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata) ...@@ -779,16 +842,13 @@ bl_cleanup_layoutcommit(struct nfs4_layoutcommit_data *lcdata)
static void free_blk_mountid(struct block_mount_id *mid) static void free_blk_mountid(struct block_mount_id *mid)
{ {
if (mid) { if (mid) {
struct pnfs_block_dev *dev; struct pnfs_block_dev *dev, *tmp;
spin_lock(&mid->bm_lock);
while (!list_empty(&mid->bm_devlist)) { /* No need to take bm_lock as we are last user freeing bm_devlist */
dev = list_first_entry(&mid->bm_devlist, list_for_each_entry_safe(dev, tmp, &mid->bm_devlist, bm_node) {
struct pnfs_block_dev,
bm_node);
list_del(&dev->bm_node); list_del(&dev->bm_node);
bl_free_block_dev(dev); bl_free_block_dev(dev);
} }
spin_unlock(&mid->bm_lock);
kfree(mid); kfree(mid);
} }
} }
......
...@@ -70,6 +70,7 @@ struct pnfs_inval_markings { ...@@ -70,6 +70,7 @@ struct pnfs_inval_markings {
spinlock_t im_lock; spinlock_t im_lock;
struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */ struct my_tree im_tree; /* Sectors that need LAYOUTCOMMIT */
sector_t im_block_size; /* Server blocksize in sectors */ sector_t im_block_size; /* Server blocksize in sectors */
struct list_head im_extents; /* Short extents for INVAL->RW conversion */
}; };
struct pnfs_inval_tracking { struct pnfs_inval_tracking {
...@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize) ...@@ -105,6 +106,7 @@ BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
{ {
spin_lock_init(&marks->im_lock); spin_lock_init(&marks->im_lock);
INIT_LIST_HEAD(&marks->im_tree.mtt_stub); INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
INIT_LIST_HEAD(&marks->im_extents);
marks->im_block_size = blocksize; marks->im_block_size = blocksize;
marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS, marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
blocksize); blocksize);
...@@ -186,8 +188,7 @@ struct pnfs_block_extent * ...@@ -186,8 +188,7 @@ struct pnfs_block_extent *
bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect, bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
struct pnfs_block_extent **cow_read); struct pnfs_block_extent **cow_read);
int bl_mark_sectors_init(struct pnfs_inval_markings *marks, int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
sector_t offset, sector_t length, sector_t offset, sector_t length);
sector_t **pages);
void bl_put_extent(struct pnfs_block_extent *be); void bl_put_extent(struct pnfs_block_extent *be);
struct pnfs_block_extent *bl_alloc_extent(void); struct pnfs_block_extent *bl_alloc_extent(void);
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect); int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
...@@ -200,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, ...@@ -200,6 +201,11 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
int bl_add_merge_extent(struct pnfs_block_layout *bl, int bl_add_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new); struct pnfs_block_extent *new);
int bl_mark_for_commit(struct pnfs_block_extent *be, int bl_mark_for_commit(struct pnfs_block_extent *be,
sector_t offset, sector_t length); sector_t offset, sector_t length,
struct pnfs_block_short_extent *new);
int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
struct pnfs_block_short_extent *
bl_pop_one_short_extent(struct pnfs_inval_markings *marks);
void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */ #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
...@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag, ...@@ -110,13 +110,7 @@ static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
return 0; return 0;
} else { } else {
struct pnfs_inval_tracking *new; struct pnfs_inval_tracking *new;
if (storage)
new = storage; new = storage;
else {
new = kmalloc(sizeof(*new), GFP_NOFS);
if (!new)
return -ENOMEM;
}
new->it_sector = s; new->it_sector = s;
new->it_tags = (1 << tag); new->it_tags = (1 << tag);
list_add(&new->it_link, &pos->it_link); list_add(&new->it_link, &pos->it_link);
...@@ -139,11 +133,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length) ...@@ -139,11 +133,13 @@ static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
} }
/* Ensure that future operations on given range of tree will not malloc */ /* Ensure that future operations on given range of tree will not malloc */
static int _preload_range(struct my_tree *tree, u64 offset, u64 length) static int _preload_range(struct pnfs_inval_markings *marks,
u64 offset, u64 length)
{ {
u64 start, end, s; u64 start, end, s;
int count, i, used = 0, status = -ENOMEM; int count, i, used = 0, status = -ENOMEM;
struct pnfs_inval_tracking **storage; struct pnfs_inval_tracking **storage;
struct my_tree *tree = &marks->im_tree;
dprintk("%s(%llu, %llu) enter\n", __func__, offset, length); dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
start = normalize(offset, tree->mtt_step_size); start = normalize(offset, tree->mtt_step_size);
...@@ -161,12 +157,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) ...@@ -161,12 +157,11 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
goto out_cleanup; goto out_cleanup;
} }
/* Now need lock - HOW??? */ spin_lock_bh(&marks->im_lock);
for (s = start; s < end; s += tree->mtt_step_size) for (s = start; s < end; s += tree->mtt_step_size)
used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]); used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);
spin_unlock_bh(&marks->im_lock);
/* Unlock - HOW??? */
status = 0; status = 0;
out_cleanup: out_cleanup:
...@@ -179,41 +174,14 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length) ...@@ -179,41 +174,14 @@ static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
return status; return status;
} }
static void set_needs_init(sector_t *array, sector_t offset)
{
sector_t *p = array;
dprintk("%s enter\n", __func__);
if (!p)
return;
while (*p < offset)
p++;
if (*p == offset)
return;
else if (*p == ~0) {
*p++ = offset;
*p = ~0;
return;
} else {
sector_t *save = p;
dprintk("%s Adding %llu\n", __func__, (u64)offset);
while (*p != ~0)
p++;
p++;
memmove(save + 1, save, (char *)p - (char *)save);
*save = offset;
return;
}
}
/* We are relying on page lock to serialize this */ /* We are relying on page lock to serialize this */
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect) int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{ {
int rv; int rv;
spin_lock(&marks->im_lock); spin_lock_bh(&marks->im_lock);
rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED); rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
spin_unlock(&marks->im_lock); spin_unlock_bh(&marks->im_lock);
return rv; return rv;
} }
...@@ -253,78 +221,39 @@ static int is_range_written(struct pnfs_inval_markings *marks, ...@@ -253,78 +221,39 @@ static int is_range_written(struct pnfs_inval_markings *marks,
{ {
int rv; int rv;
spin_lock(&marks->im_lock); spin_lock_bh(&marks->im_lock);
rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN); rv = _range_has_tag(&marks->im_tree, start, end, EXTENT_WRITTEN);
spin_unlock(&marks->im_lock); spin_unlock_bh(&marks->im_lock);
return rv; return rv;
} }
/* Marks sectors in [offest, offset_length) as having been initialized. /* Marks sectors in [offest, offset_length) as having been initialized.
* All lengths are step-aligned, where step is min(pagesize, blocksize). * All lengths are step-aligned, where step is min(pagesize, blocksize).
* Notes where partial block is initialized, and helps prepare it for * Currently assumes offset is page-aligned
* complete initialization later.
*/ */
/* Currently assumes offset is page-aligned */
int bl_mark_sectors_init(struct pnfs_inval_markings *marks, int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
sector_t offset, sector_t length, sector_t offset, sector_t length)
sector_t **pages)
{ {
sector_t s, start, end; sector_t start, end;
sector_t *array = NULL; /* Pages to mark */
dprintk("%s(offset=%llu,len=%llu) enter\n", dprintk("%s(offset=%llu,len=%llu) enter\n",
__func__, (u64)offset, (u64)length); __func__, (u64)offset, (u64)length);
s = max((sector_t) 3,
2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
dprintk("%s set max=%llu\n", __func__, (u64)s);
if (pages) {
array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
if (!array)
goto outerr;
array[0] = ~0;
}
start = normalize(offset, marks->im_block_size); start = normalize(offset, marks->im_block_size);
end = normalize_up(offset + length, marks->im_block_size); end = normalize_up(offset + length, marks->im_block_size);
if (_preload_range(&marks->im_tree, start, end - start)) if (_preload_range(marks, start, end - start))
goto outerr; goto outerr;
spin_lock(&marks->im_lock); spin_lock_bh(&marks->im_lock);
for (s = normalize_up(start, PAGE_CACHE_SECTORS);
s < offset; s += PAGE_CACHE_SECTORS) {
dprintk("%s pre-area pages\n", __func__);
/* Portion of used block is not initialized */
if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
set_needs_init(array, s);
}
if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length)) if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
goto out_unlock; goto out_unlock;
for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS); spin_unlock_bh(&marks->im_lock);
s < end; s += PAGE_CACHE_SECTORS) {
dprintk("%s post-area pages\n", __func__);
if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
set_needs_init(array, s);
}
spin_unlock(&marks->im_lock);
if (pages) {
if (array[0] == ~0) {
kfree(array);
*pages = NULL;
} else
*pages = array;
}
return 0; return 0;
out_unlock: out_unlock:
spin_unlock(&marks->im_lock); spin_unlock_bh(&marks->im_lock);
outerr: outerr:
if (pages) {
kfree(array);
*pages = NULL;
}
return -ENOMEM; return -ENOMEM;
} }
...@@ -338,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks, ...@@ -338,9 +267,9 @@ static int mark_written_sectors(struct pnfs_inval_markings *marks,
dprintk("%s(offset=%llu,len=%llu) enter\n", __func__, dprintk("%s(offset=%llu,len=%llu) enter\n", __func__,
(u64)offset, (u64)length); (u64)offset, (u64)length);
spin_lock(&marks->im_lock); spin_lock_bh(&marks->im_lock);
status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length); status = _set_range(&marks->im_tree, EXTENT_WRITTEN, offset, length);
spin_unlock(&marks->im_lock); spin_unlock_bh(&marks->im_lock);
return status; return status;
} }
...@@ -440,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl, ...@@ -440,20 +369,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
/* Note the range described by offset, length is guaranteed to be contained /* Note the range described by offset, length is guaranteed to be contained
* within be. * within be.
* new will be freed, either by this function or add_to_commitlist if they
* decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/ */
int bl_mark_for_commit(struct pnfs_block_extent *be, int bl_mark_for_commit(struct pnfs_block_extent *be,
sector_t offset, sector_t length) sector_t offset, sector_t length,
struct pnfs_block_short_extent *new)
{ {
sector_t new_end, end = offset + length; sector_t new_end, end = offset + length;
struct pnfs_block_short_extent *new;
struct pnfs_block_layout *bl = container_of(be->be_inval, struct pnfs_block_layout *bl = container_of(be->be_inval,
struct pnfs_block_layout, struct pnfs_block_layout,
bl_inval); bl_inval);
new = kmalloc(sizeof(*new), GFP_NOFS);
if (!new)
return -ENOMEM;
mark_written_sectors(be->be_inval, offset, length); mark_written_sectors(be->be_inval, offset, length);
/* We want to add the range to commit list, but it must be /* We want to add the range to commit list, but it must be
* block-normalized, and verified that the normalized range has * block-normalized, and verified that the normalized range has
...@@ -483,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be, ...@@ -483,9 +410,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
new->bse_mdev = be->be_mdev; new->bse_mdev = be->be_mdev;
spin_lock(&bl->bl_ext_lock); spin_lock(&bl->bl_ext_lock);
/* new will be freed, either by add_to_commitlist if it decides not
* to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/
add_to_commitlist(bl, new); add_to_commitlist(bl, new);
spin_unlock(&bl->bl_ext_lock); spin_unlock(&bl->bl_ext_lock);
return 0; return 0;
...@@ -933,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl, ...@@ -933,3 +857,53 @@ clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
} }
} }
} }
int bl_push_one_short_extent(struct pnfs_inval_markings *marks)
{
struct pnfs_block_short_extent *new;
new = kmalloc(sizeof(*new), GFP_NOFS);
if (unlikely(!new))
return -ENOMEM;
spin_lock_bh(&marks->im_lock);
list_add(&new->bse_node, &marks->im_extents);
spin_unlock_bh(&marks->im_lock);
return 0;
}
struct pnfs_block_short_extent *
bl_pop_one_short_extent(struct pnfs_inval_markings *marks)
{
struct pnfs_block_short_extent *rv = NULL;
spin_lock_bh(&marks->im_lock);
if (!list_empty(&marks->im_extents)) {
rv = list_entry((&marks->im_extents)->next,
struct pnfs_block_short_extent, bse_node);
list_del_init(&rv->bse_node);
}
spin_unlock_bh(&marks->im_lock);
return rv;
}
void bl_free_short_extents(struct pnfs_inval_markings *marks, int num_to_free)
{
struct pnfs_block_short_extent *se = NULL, *tmp;
if (num_to_free <= 0)
return;
spin_lock(&marks->im_lock);
list_for_each_entry_safe(se, tmp, &marks->im_extents, bse_node) {
list_del(&se->bse_node);
kfree(se);
if (--num_to_free == 0)
break;
}
spin_unlock(&marks->im_lock);
BUG_ON(num_to_free > 0);
}
...@@ -162,7 +162,7 @@ struct cb_layoutrecallargs { ...@@ -162,7 +162,7 @@ struct cb_layoutrecallargs {
}; };
}; };
extern unsigned nfs4_callback_layoutrecall( extern __be32 nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args, struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps); void *dummy, struct cb_process_state *cps);
......
...@@ -305,6 +305,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, ...@@ -305,6 +305,10 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
n = ntohl(*p++); n = ntohl(*p++);
if (n <= 0) if (n <= 0)
goto out; goto out;
if (n > ULONG_MAX / sizeof(*args->devs)) {
status = htonl(NFS4ERR_BADXDR);
goto out;
}
args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
if (!args->devs) { if (!args->devs) {
......
...@@ -382,7 +382,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) ...@@ -382,7 +382,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
{ {
struct nfs4_pnfs_ds_addr *da = NULL; struct nfs4_pnfs_ds_addr *da = NULL;
char *buf, *portstr; char *buf, *portstr;
u32 port; __be16 port;
int nlen, rlen; int nlen, rlen;
int tmp[2]; int tmp[2];
__be32 *p; __be32 *p;
......
...@@ -3587,7 +3587,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu ...@@ -3587,7 +3587,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
res.acl_flags |= NFS4_ACL_LEN_REQUEST; res.acl_flags |= NFS4_ACL_LEN_REQUEST;
resp_buf = page_address(pages[0]); resp_buf = page_address(pages[0]);
dprintk("%s buf %p buflen %ld npages %d args.acl_len %ld\n", dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
__func__, buf, buflen, npages, args.acl_len); __func__, buf, buflen, npages, args.acl_len);
ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode),
&msg, &args.seq_args, &res.seq_res, 0); &msg, &args.seq_args, &res.seq_res, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment