Commit c28cfd60 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

* 'for-linus' of git://git.open-osd.org/linux-open-osd: (21 commits)
  ore: Enable RAID5 mounts
  exofs: Support for RAID5 read-4-write interface.
  ore: RAID5 Write
  ore: RAID5 read
  fs/Makefile: Always inspect exofs/
  ore: Make ore_calc_stripe_info EXPORT_SYMBOL
  ore/exofs: Change ore_check_io API
  ore/exofs: Define new ore_verify_layout
  ore: Support for partial component table
  ore: Support for short read/writes
  exofs: Support for short read/writes
  ore: Remove check for ios->kern_buff in _prepare_for_striping to later
  ore: cleanup: Embed an ore_striping_info inside ore_io_state
  ore: Only IO one group at a time (API change)
  ore/exofs: Change the type of the devices array (API change)
  ore: Make ore_striping_info and ore_calc_stripe_info public
  exofs: Remove unused data_map member from exofs_sb_info
  exofs: Rename struct ore_components comps => oc
  exofs/super.c: local functions should be static
  exofs/ore.c: local functions should be static
  ...
parents dfa4a423 44231e68
......@@ -11,10 +11,6 @@
# it under the terms of the GNU General Public version 2 License as
# published by the Free Software Foundation
#
# FIXME: SCSI_OSD_INITIATOR should select CONFIG (HMAC) SHA1 somehow.
# How is it done properly?
#
config SCSI_OSD_INITIATOR
tristate "OSD-Initiator library"
depends on SCSI
......
......@@ -120,6 +120,6 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
obj-$(CONFIG_EXOFS_FS) += exofs/
obj-$(y) += exofs/ # Multiple mods, used by nfs/objlayout
obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
......@@ -13,7 +13,8 @@
#
# ore module library
obj-$(CONFIG_ORE) += ore.o
libore-y := ore.o ore_raid.o
obj-$(CONFIG_ORE) += libore.o
exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
obj-$(CONFIG_EXOFS_FS) += exofs.o
# Note ORE needs to "select ASYNC_XOR". So Not to force multiple selects
# for every ORE user we do it like this. Any user should add itself here
# at the "depends on EXOFS_FS || ..." with an ||. The dependencies are
# selected here, and we default to "ON". So in effect it is like been
# selected by any of the users.
config ORE
tristate
depends on EXOFS_FS
select ASYNC_XOR
default SCSI_OSD_ULD
config EXOFS_FS
tristate "exofs: OSD based file system support"
depends on SCSI_OSD_ULD
select ORE
help
EXOFS is a file system that uses an OSD storage device,
as its backing storage.
......
......@@ -53,6 +53,10 @@
/* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x)
struct exofs_dev {
struct ore_dev ored;
unsigned did;
};
/*
* our extension to the in-memory superblock
*/
......@@ -66,13 +70,9 @@ struct exofs_sb_info {
u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */
struct pnfs_osd_data_map data_map; /* Default raid to use
* FIXME: Needed ?
*/
struct ore_layout layout; /* Default files layout */
struct ore_comp one_comp; /* id & cred of partition id=0*/
struct ore_components comps; /* comps for the partition */
struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
struct ore_components oc; /* comps for the partition */
};
/*
......@@ -86,7 +86,7 @@ struct exofs_i_info {
uint32_t i_dir_start_lookup; /* which page to start lookup */
uint64_t i_commit_size; /* the object's written length */
struct ore_comp one_comp; /* same component for all devices */
struct ore_components comps; /* inode view of the device table */
struct ore_components oc; /* inode view of the device table */
};
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
......@@ -207,7 +207,7 @@ extern const struct inode_operations exofs_fast_symlink_inode_operations;
* bigger and that the device table repeats twice.
* See: exofs_read_lookup_dev_table()
*/
static inline void exofs_init_comps(struct ore_components *comps,
static inline void exofs_init_comps(struct ore_components *oc,
struct ore_comp *one_comp,
struct exofs_sb_info *sbi, osd_id oid)
{
......@@ -217,13 +217,15 @@ static inline void exofs_init_comps(struct ore_components *comps,
one_comp->obj.id = oid;
exofs_make_credential(one_comp->cred, &one_comp->obj);
comps->numdevs = sbi->comps.numdevs;
comps->single_comp = EC_SINGLE_COMP;
comps->comps = one_comp;
oc->first_dev = 0;
oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
sbi->layout.group_count;
oc->single_comp = EC_SINGLE_COMP;
oc->comps = one_comp;
/* Round robin device view of the table */
first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs;
comps->ods = sbi->comps.ods + first_dev;
first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
oc->ods = &sbi->oc.ods[first_dev];
}
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) from 2011
* Boaz Harrosh <bharrosh@panasas.com>
*
* This file is part of the objects raid engine (ore).
*
* It is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* You should have received a copy of the GNU General Public License
* along with "ore". If not, write to the Free Software Foundation, Inc:
* "Free Software Foundation <info@fsf.org>"
*/
#include <scsi/osd_ore.h>
#define ORE_ERR(fmt, a...) printk(KERN_ERR "ore: " fmt, ##a)
#ifdef CONFIG_EXOFS_DEBUG
#define ORE_DBGMSG(fmt, a...) \
printk(KERN_NOTICE "ore @%s:%d: " fmt, __func__, __LINE__, ##a)
#else
#define ORE_DBGMSG(fmt, a...) \
do { if (0) printk(fmt, ##a); } while (0)
#endif
/* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x)
#define ORE_DBGMSG2(M...) do {} while (0)
/* #define ORE_DBGMSG2 ORE_DBGMSG */
/* Calculate the component order in a stripe. eg the logical data unit
* address within the stripe of @dev given the @par_dev of this stripe.
*/
static inline unsigned _dev_order(unsigned devs_in_group, unsigned mirrors_p1,
unsigned par_dev, unsigned dev)
{
unsigned first_dev = dev - dev % devs_in_group;
dev -= first_dev;
par_dev -= first_dev;
if (devs_in_group == par_dev) /* The raid 0 case */
return dev / mirrors_p1;
/* raid4/5/6 case */
return ((devs_in_group + dev - par_dev - mirrors_p1) % devs_in_group) /
mirrors_p1;
}
/* ios_raid.c stuff needed by ios.c */
int _ore_post_alloc_raid_stuff(struct ore_io_state *ios);
void _ore_free_raid_stuff(struct ore_io_state *ios);
void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,
bool not_last);
int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si,
struct ore_per_dev_state *per_dev, unsigned cur_len);
void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,
struct ore_striping_info *si, struct page *page);
static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,
struct ore_striping_info *si, struct page *page)
{
if (!sp2d) /* Inline the fast path */
return; /* Hay no raid stuff */
_ore_add_stripe_page(sp2d, si, page);
}
/* ios.c stuff needed by ios_raid.c */
int _ore_get_io_state(struct ore_layout *layout,
struct ore_components *oc, unsigned numdevs,
unsigned sgs_per_dev, unsigned num_par_pages,
struct ore_io_state **pios);
int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
unsigned pgbase, struct page **pages,
struct ore_per_dev_state *per_dev, int cur_len);
int _ore_read_mirror(struct ore_io_state *ios, unsigned cur_comp);
int ore_io_execute(struct ore_io_state *ios);
This diff is collapsed.
......@@ -34,15 +34,30 @@ struct ore_comp {
struct ore_layout {
/* Our way of looking at the data_map */
enum pnfs_osd_raid_algorithm4
raid_algorithm;
unsigned stripe_unit;
unsigned mirrors_p1;
unsigned group_width;
unsigned parity;
u64 group_depth;
unsigned group_count;
/* Cached often needed calculations filled in by
* ore_verify_layout
*/
unsigned long max_io_length; /* Max length that should be passed to
* ore_get_rw_state
*/
};
struct ore_dev {
struct osd_dev *od;
};
struct ore_components {
unsigned first_dev; /* First logical device no */
unsigned numdevs; /* Num of devices in array */
/* If @single_comp == EC_SINGLE_COMP, @comps points to a single
* component. else there are @numdevs components
......@@ -51,20 +66,60 @@ struct ore_components {
EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
} single_comp;
struct ore_comp *comps;
struct osd_dev **ods; /* osd_dev array */
/* Array of pointers to ore_dev-* . User will usually have these pointed
* too a bigger struct which contain an "ore_dev ored" member and use
* container_of(oc->ods[i], struct foo_dev, ored) to access the bigger
* structure.
*/
struct ore_dev **ods;
};
/* ore_comp_dev Recievies a logical device index */
static inline struct osd_dev *ore_comp_dev(
const struct ore_components *oc, unsigned i)
{
BUG_ON((i < oc->first_dev) || (oc->first_dev + oc->numdevs <= i));
return oc->ods[i - oc->first_dev]->od;
}
static inline void ore_comp_set_dev(
struct ore_components *oc, unsigned i, struct osd_dev *od)
{
oc->ods[i - oc->first_dev]->od = od;
}
struct ore_striping_info {
u64 offset;
u64 obj_offset;
u64 length;
u64 first_stripe_start; /* only used in raid writes */
u64 M; /* for truncate */
unsigned bytes_in_stripe;
unsigned dev;
unsigned par_dev;
unsigned unit_off;
unsigned cur_pg;
unsigned cur_comp;
};
struct ore_io_state;
typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private);
struct _ore_r4w_op {
/* @Priv given here is passed ios->private */
struct page * (*get_page)(void *priv, u64 page_index, bool *uptodate);
void (*put_page)(void *priv, struct page *page);
};
struct ore_io_state {
struct kref kref;
struct ore_striping_info si;
void *private;
ore_io_done_fn done;
struct ore_layout *layout;
struct ore_components *comps;
struct ore_components *oc;
/* Global read/write IO*/
loff_t offset;
......@@ -84,6 +139,16 @@ struct ore_io_state {
bool reading;
/* House keeping of Parity pages */
bool extra_part_alloc;
struct page **parity_pages;
unsigned max_par_pages;
unsigned cur_par_page;
unsigned sgs_per_dev;
struct __stripe_pages_2d *sp2d;
struct ore_io_state *ios_read_4_write;
const struct _ore_r4w_op *r4w;
/* Variable array of size numdevs */
unsigned numdevs;
struct ore_per_dev_state {
......@@ -91,7 +156,10 @@ struct ore_io_state {
struct bio *bio;
loff_t offset;
unsigned length;
unsigned last_sgs_total;
unsigned dev;
struct osd_sg_entry *sglist;
unsigned cur_sg;
} per_dev[];
};
......@@ -102,6 +170,9 @@ static inline unsigned ore_io_state_size(unsigned numdevs)
}
/* ore.c */
int ore_verify_layout(unsigned total_comps, struct ore_layout *layout);
void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
u64 length, struct ore_striping_info *si);
int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
bool is_reading, u64 offset, u64 length,
struct ore_io_state **ios);
......@@ -109,7 +180,10 @@ int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
struct ore_io_state **ios);
void ore_put_io_state(struct ore_io_state *ios);
int ore_check_io(struct ore_io_state *ios, u64 *resid);
typedef void (*ore_on_dev_error)(struct ore_io_state *ios, struct ore_dev *od,
unsigned dev_index, enum osd_err_priority oep,
u64 dev_offset, u64 dev_len);
int ore_check_io(struct ore_io_state *ios, ore_on_dev_error rep);
int ore_create(struct ore_io_state *ios);
int ore_remove(struct ore_io_state *ios);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment