Commit 9e203936 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
  ore: Must support none-PAGE-aligned IO
  ore: fix BUG_ON, too few sgs when reading
  ore: Fix crash in case of an IO error.
  ore: FIX breakage when MISC_FILESYSTEMS is not set
parents 993ecff8 724577ca
...@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig" ...@@ -218,6 +218,8 @@ source "fs/exofs/Kconfig"
endif # MISC_FILESYSTEMS endif # MISC_FILESYSTEMS
source "fs/exofs/Kconfig.ore"
menuconfig NETWORK_FILESYSTEMS menuconfig NETWORK_FILESYSTEMS
bool "Network File Systems" bool "Network File Systems"
default y default y
......
# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
# for every ORE user we do it like this. Any user should add itself here
# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
# selected here, and we default to "ON". So in effect it is like been
# selected by any of the users.
config ORE
tristate
depends on EXOFS_FS || PNFS_OBJLAYOUT
select ASYNC_XOR
default SCSI_OSD_ULD
config EXOFS_FS config EXOFS_FS
tristate "exofs: OSD based file system support" tristate "exofs: OSD based file system support"
depends on SCSI_OSD_ULD depends on SCSI_OSD_ULD
......
# ORE - Objects Raid Engine (libore.ko)
#
# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
# for every ORE user we do it like this. Any user should add itself here
# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
# selected here, and we default to "ON". So in effect it is like been
# selected by any of the users.
config ORE
tristate
depends on EXOFS_FS || PNFS_OBJLAYOUT
select ASYNC_XOR
default SCSI_OSD_ULD
...@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, ...@@ -266,7 +266,7 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
/* first/last seg is split */ /* first/last seg is split */
num_raid_units += layout->group_width; num_raid_units += layout->group_width;
sgs_per_dev = div_u64(num_raid_units, data_devs); sgs_per_dev = div_u64(num_raid_units, data_devs) + 2;
} else { } else {
/* For Writes add parity pages array. */ /* For Writes add parity pages array. */
max_par_pages = num_raid_units * pages_in_unit * max_par_pages = num_raid_units * pages_in_unit *
...@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error) ...@@ -445,10 +445,10 @@ int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
u64 residual = ios->reading ? u64 residual = ios->reading ?
or->in.residual : or->out.residual; or->in.residual : or->out.residual;
u64 offset = (ios->offset + ios->length) - residual; u64 offset = (ios->offset + ios->length) - residual;
struct ore_dev *od = ios->oc->ods[ unsigned dev = per_dev->dev - ios->oc->first_dev;
per_dev->dev - ios->oc->first_dev]; struct ore_dev *od = ios->oc->ods[dev];
on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri, on_dev_error(ios, od, dev, osi.osd_err_pri,
offset, residual); offset, residual);
} }
if (osi.osd_err_pri >= acumulated_osd_err) { if (osi.osd_err_pri >= acumulated_osd_err) {
......
...@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios) ...@@ -328,8 +328,8 @@ static int _alloc_read_4_write(struct ore_io_state *ios)
/* @si contains info of the to-be-inserted page. Update of @si should be /* @si contains info of the to-be-inserted page. Update of @si should be
* maintained by caller. Specificaly si->dev, si->obj_offset, ... * maintained by caller. Specificaly si->dev, si->obj_offset, ...
*/ */
static int _add_to_read_4_write(struct ore_io_state *ios, static int _add_to_r4w(struct ore_io_state *ios, struct ore_striping_info *si,
struct ore_striping_info *si, struct page *page) struct page *page, unsigned pg_len)
{ {
struct request_queue *q; struct request_queue *q;
struct ore_per_dev_state *per_dev; struct ore_per_dev_state *per_dev;
...@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios, ...@@ -366,17 +366,60 @@ static int _add_to_read_4_write(struct ore_io_state *ios,
_ore_add_sg_seg(per_dev, gap, true); _ore_add_sg_seg(per_dev, gap, true);
} }
q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev)); q = osd_request_queue(ore_comp_dev(read_ios->oc, per_dev->dev));
added_len = bio_add_pc_page(q, per_dev->bio, page, PAGE_SIZE, 0); added_len = bio_add_pc_page(q, per_dev->bio, page, pg_len,
if (unlikely(added_len != PAGE_SIZE)) { si->obj_offset % PAGE_SIZE);
if (unlikely(added_len != pg_len)) {
ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n", ORE_DBGMSG("Failed to bio_add_pc_page bi_vcnt=%d\n",
per_dev->bio->bi_vcnt); per_dev->bio->bi_vcnt);
return -ENOMEM; return -ENOMEM;
} }
per_dev->length += PAGE_SIZE; per_dev->length += pg_len;
return 0; return 0;
} }
/* read the beginning of an unaligned first page */
static int _add_to_r4w_first_page(struct ore_io_state *ios, struct page *page)
{
struct ore_striping_info si;
unsigned pg_len;
ore_calc_stripe_info(ios->layout, ios->offset, 0, &si);
pg_len = si.obj_offset % PAGE_SIZE;
si.obj_offset -= pg_len;
ORE_DBGMSG("offset=0x%llx len=0x%x index=0x%lx dev=%x\n",
_LLU(si.obj_offset), pg_len, page->index, si.dev);
return _add_to_r4w(ios, &si, page, pg_len);
}
/* read the end of an incomplete last page */
static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)
{
struct ore_striping_info si;
struct page *page;
unsigned pg_len, p, c;
ore_calc_stripe_info(ios->layout, *offset, 0, &si);
p = si.unit_off / PAGE_SIZE;
c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
ios->layout->mirrors_p1, si.par_dev, si.dev);
page = ios->sp2d->_1p_stripes[p].pages[c];
pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE);
*offset += pg_len;
ORE_DBGMSG("p=%d, c=%d next-offset=0x%llx len=0x%x dev=%x par_dev=%d\n",
p, c, _LLU(*offset), pg_len, si.dev, si.par_dev);
BUG_ON(!page);
return _add_to_r4w(ios, &si, page, pg_len);
}
static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret) static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
{ {
struct bio_vec *bv; struct bio_vec *bv;
...@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios) ...@@ -444,9 +487,13 @@ static int _read_4_write(struct ore_io_state *ios)
struct page **pp = &_1ps->pages[c]; struct page **pp = &_1ps->pages[c];
bool uptodate; bool uptodate;
if (*pp) if (*pp) {
if (ios->offset % PAGE_SIZE)
/* Read the remainder of the page */
_add_to_r4w_first_page(ios, *pp);
/* to-be-written pages start here */ /* to-be-written pages start here */
goto read_last_stripe; goto read_last_stripe;
}
*pp = ios->r4w->get_page(ios->private, offset, *pp = ios->r4w->get_page(ios->private, offset,
&uptodate); &uptodate);
...@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios) ...@@ -454,7 +501,7 @@ static int _read_4_write(struct ore_io_state *ios)
return -ENOMEM; return -ENOMEM;
if (!uptodate) if (!uptodate)
_add_to_read_4_write(ios, &read_si, *pp); _add_to_r4w(ios, &read_si, *pp, PAGE_SIZE);
/* Mark read-pages to be cache_released */ /* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true; _1ps->page_is_read[c] = true;
...@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios) ...@@ -465,8 +512,11 @@ static int _read_4_write(struct ore_io_state *ios)
} }
read_last_stripe: read_last_stripe:
offset = ios->offset + (ios->length + PAGE_SIZE - 1) / offset = ios->offset + ios->length;
PAGE_SIZE * PAGE_SIZE; if (offset % PAGE_SIZE)
_add_to_r4w_last_page(ios, &offset);
/* offset will be aligned to next page */
last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe) last_stripe_end = div_u64(offset + bytes_in_stripe - 1, bytes_in_stripe)
* bytes_in_stripe; * bytes_in_stripe;
if (offset == last_stripe_end) /* Optimize for the aligned case */ if (offset == last_stripe_end) /* Optimize for the aligned case */
...@@ -503,7 +553,7 @@ static int _read_4_write(struct ore_io_state *ios) ...@@ -503,7 +553,7 @@ static int _read_4_write(struct ore_io_state *ios)
/* Mark read-pages to be cache_released */ /* Mark read-pages to be cache_released */
_1ps->page_is_read[c] = true; _1ps->page_is_read[c] = true;
if (!uptodate) if (!uptodate)
_add_to_read_4_write(ios, &read_si, page); _add_to_r4w(ios, &read_si, page, PAGE_SIZE);
} }
offset += PAGE_SIZE; offset += PAGE_SIZE;
...@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios, ...@@ -551,7 +601,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
unsigned cur_len) unsigned cur_len)
{ {
if (ios->reading) { if (ios->reading) {
BUG_ON(per_dev->cur_sg >= ios->sgs_per_dev); if (per_dev->cur_sg >= ios->sgs_per_dev) {
ORE_DBGMSG("cur_sg(%d) >= sgs_per_dev(%d)\n" ,
per_dev->cur_sg, ios->sgs_per_dev);
return -ENOMEM;
}
_ore_add_sg_seg(per_dev, cur_len, true); _ore_add_sg_seg(per_dev, cur_len, true);
} else { } else {
struct __stripe_pages_2d *sp2d = ios->sp2d; struct __stripe_pages_2d *sp2d = ios->sp2d;
...@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) ...@@ -612,8 +666,6 @@ int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
return -ENOMEM; return -ENOMEM;
} }
BUG_ON(ios->offset % PAGE_SIZE);
/* Round io down to last full strip */ /* Round io down to last full strip */
first_stripe = div_u64(ios->offset, stripe_size); first_stripe = div_u64(ios->offset, stripe_size);
last_stripe = div_u64(ios->offset + ios->length, stripe_size); last_stripe = div_u64(ios->offset + ios->length, stripe_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment