Commit 51042122 authored by Sage Weil's avatar Sage Weil

ceph: fix endian conversions for ceph_pg

The endian conversions don't quite work with the old union ceph_pg.  Just
make it a regular struct, and make each field __le.  This is simpler and it
has the added bonus of actually working.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 63f2d211
...@@ -99,7 +99,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -99,7 +99,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
u64 len = 1, olen; u64 len = 1, olen;
u64 tmp; u64 tmp;
struct ceph_object_layout ol; struct ceph_object_layout ol;
union ceph_pg pgid; struct ceph_pg pgid;
/* copy and validate */ /* copy and validate */
if (copy_from_user(&dl, arg, sizeof(dl))) if (copy_from_user(&dl, arg, sizeof(dl)))
...@@ -121,7 +121,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -121,7 +121,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout,
osdc->osdmap); osdc->osdmap);
pgid.pg64 = le64_to_cpu(ol.ol_pgid); pgid = ol.ol_pgid;
dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
if (dl.osd >= 0) { if (dl.osd >= 0) {
struct ceph_entity_addr *a = struct ceph_entity_addr *a =
......
...@@ -520,7 +520,7 @@ static int __map_osds(struct ceph_osd_client *osdc, ...@@ -520,7 +520,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
struct ceph_osd_request *req) struct ceph_osd_request *req)
{ {
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
union ceph_pg pgid; struct ceph_pg pgid;
int o = -1; int o = -1;
int err; int err;
struct ceph_osd *newosd = NULL; struct ceph_osd *newosd = NULL;
...@@ -530,7 +530,7 @@ static int __map_osds(struct ceph_osd_client *osdc, ...@@ -530,7 +530,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
&req->r_file_layout, osdc->osdmap); &req->r_file_layout, osdc->osdmap);
if (err) if (err)
return err; return err;
pgid.pg64 = le64_to_cpu(reqhead->layout.ol_pgid); pgid = reqhead->layout.ol_pgid;
o = ceph_calc_pg_primary(osdc->osdmap, pgid); o = ceph_calc_pg_primary(osdc->osdmap, pgid);
if ((req->r_osd && req->r_osd->o_osd == o && if ((req->r_osd && req->r_osd->o_osd == o &&
...@@ -538,8 +538,8 @@ static int __map_osds(struct ceph_osd_client *osdc, ...@@ -538,8 +538,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
(req->r_osd == NULL && o == -1)) (req->r_osd == NULL && o == -1))
return 0; /* no change */ return 0; /* no change */
dout("map_osds tid %llu pgid %llx pool %d osd%d (was osd%d)\n", dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n",
req->r_tid, pgid.pg64, pgid.pg.pool, o, req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
req->r_osd ? req->r_osd->o_osd : -1); req->r_osd ? req->r_osd->o_osd : -1);
if (req->r_osd) { if (req->r_osd) {
......
...@@ -366,19 +366,33 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) ...@@ -366,19 +366,33 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
/* /*
* Insert a new pg_temp mapping * Insert a new pg_temp mapping
*/ */
static int pgid_cmp(struct ceph_pg l, struct ceph_pg r)
{
u64 a = *(u64 *)&l;
u64 b = *(u64 *)&r;
if (a < b)
return -1;
if (a > b)
return 1;
return 0;
}
static int __insert_pg_mapping(struct ceph_pg_mapping *new, static int __insert_pg_mapping(struct ceph_pg_mapping *new,
struct rb_root *root) struct rb_root *root)
{ {
struct rb_node **p = &root->rb_node; struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct ceph_pg_mapping *pg = NULL; struct ceph_pg_mapping *pg = NULL;
int c;
while (*p) { while (*p) {
parent = *p; parent = *p;
pg = rb_entry(parent, struct ceph_pg_mapping, node); pg = rb_entry(parent, struct ceph_pg_mapping, node);
if (new->pgid < pg->pgid) c = pgid_cmp(new->pgid, pg->pgid);
if (c < 0)
p = &(*p)->rb_left; p = &(*p)->rb_left;
else if (new->pgid > pg->pgid) else if (c > 0)
p = &(*p)->rb_right; p = &(*p)->rb_right;
else else
return -EEXIST; return -EEXIST;
...@@ -467,11 +481,11 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -467,11 +481,11 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_32_safe(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
int n, j; int n, j;
u64 pgid; struct ceph_pg pgid;
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
pgid = ceph_decode_64(p); ceph_decode_copy(p, &pgid, sizeof(pgid));
n = ceph_decode_32(p); n = ceph_decode_32(p);
ceph_decode_need(p, end, n * sizeof(u32), bad); ceph_decode_need(p, end, n * sizeof(u32), bad);
pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
...@@ -487,7 +501,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ...@@ -487,7 +501,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
err = __insert_pg_mapping(pg, &map->pg_temp); err = __insert_pg_mapping(pg, &map->pg_temp);
if (err) if (err)
goto bad; goto bad;
dout(" added pg_temp %llx len %d\n", pgid, len); dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len);
} }
/* crush */ /* crush */
...@@ -659,19 +673,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -659,19 +673,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
while (len--) { while (len--) {
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
int j; int j;
u64 pgid; struct ceph_pg pgid;
u32 pglen; u32 pglen;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
pgid = ceph_decode_64(p); ceph_decode_copy(p, &pgid, sizeof(pgid));
pglen = ceph_decode_32(p); pglen = ceph_decode_32(p);
/* remove any? */ /* remove any? */
while (rbp && rb_entry(rbp, struct ceph_pg_mapping, while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
node)->pgid <= pgid) { node)->pgid, pgid) <= 0) {
struct rb_node *cur = rbp; struct rb_node *cur = rbp;
rbp = rb_next(rbp); rbp = rb_next(rbp);
dout(" removed pg_temp %llx\n", dout(" removed pg_temp %llx\n",
rb_entry(cur, struct ceph_pg_mapping, node)->pgid); *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
node)->pgid);
rb_erase(cur, &map->pg_temp); rb_erase(cur, &map->pg_temp);
} }
...@@ -690,14 +705,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -690,14 +705,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
err = __insert_pg_mapping(pg, &map->pg_temp); err = __insert_pg_mapping(pg, &map->pg_temp);
if (err) if (err)
goto bad; goto bad;
dout(" added pg_temp %llx len %d\n", pgid, pglen); dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen);
} }
} }
while (rbp) { while (rbp) {
struct rb_node *cur = rbp; struct rb_node *cur = rbp;
rbp = rb_next(rbp); rbp = rb_next(rbp);
dout(" removed pg_temp %llx\n", dout(" removed pg_temp %llx\n",
rb_entry(cur, struct ceph_pg_mapping, node)->pgid); *(u64 *)&rb_entry(cur, struct ceph_pg_mapping,
node)->pgid);
rb_erase(cur, &map->pg_temp); rb_erase(cur, &map->pg_temp);
} }
...@@ -782,16 +799,19 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -782,16 +799,19 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
struct ceph_osdmap *osdmap) struct ceph_osdmap *osdmap)
{ {
unsigned num, num_mask; unsigned num, num_mask;
union ceph_pg pgid; struct ceph_pg pgid;
s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
int poolid = le32_to_cpu(fl->fl_pg_pool); int poolid = le32_to_cpu(fl->fl_pg_pool);
struct ceph_pg_pool_info *pool; struct ceph_pg_pool_info *pool;
unsigned ps;
if (poolid >= osdmap->num_pools) if (poolid >= osdmap->num_pools)
return -EIO; return -EIO;
pool = &osdmap->pg_pool[poolid];
pool = &osdmap->pg_pool[poolid];
ps = ceph_full_name_hash(oid, strlen(oid));
if (preferred >= 0) { if (preferred >= 0) {
ps += preferred;
num = le32_to_cpu(pool->v.lpg_num); num = le32_to_cpu(pool->v.lpg_num);
num_mask = pool->lpg_num_mask; num_mask = pool->lpg_num_mask;
} else { } else {
...@@ -799,22 +819,17 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -799,22 +819,17 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
num_mask = pool->pg_num_mask; num_mask = pool->pg_num_mask;
} }
pgid.pg64 = 0; /* start with it zeroed out */ pgid.ps = cpu_to_le16(ps);
pgid.pg.ps = ceph_full_name_hash(oid, strlen(oid)); pgid.preferred = cpu_to_le16(preferred);
pgid.pg.preferred = preferred; pgid.pool = fl->fl_pg_pool;
if (preferred >= 0)
pgid.pg.ps += preferred;
pgid.pg.pool = le32_to_cpu(fl->fl_pg_pool);
if (preferred >= 0) if (preferred >= 0)
dout("calc_object_layout '%s' pgid %d.%xp%d (%llx)\n", oid, dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
pgid.pg.pool, pgid.pg.ps, (int)preferred, pgid.pg64); (int)preferred);
else else
dout("calc_object_layout '%s' pgid %d.%x (%llx)\n", oid, dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
pgid.pg.pool, pgid.pg.ps, pgid.pg64);
ol->ol_pgid = cpu_to_le64(pgid.pg64); ol->ol_pgid = pgid;
ol->ol_stripe_unit = fl->fl_object_stripe_unit; ol->ol_stripe_unit = fl->fl_object_stripe_unit;
return 0; return 0;
} }
...@@ -822,21 +837,24 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -822,21 +837,24 @@ int ceph_calc_object_layout(struct ceph_object_layout *ol,
* Calculate raw osd vector for the given pgid. Return pointer to osd * Calculate raw osd vector for the given pgid. Return pointer to osd
* array, or NULL on failure. * array, or NULL on failure.
*/ */
static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid, static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
int *osds, int *num) int *osds, int *num)
{ {
struct rb_node *n = osdmap->pg_temp.rb_node; struct rb_node *n = osdmap->pg_temp.rb_node;
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
struct ceph_pg_pool_info *pool; struct ceph_pg_pool_info *pool;
int ruleno; int ruleno;
unsigned pps; /* placement ps */ unsigned poolid, ps, pps;
int preferred;
int c;
/* pg_temp? */ /* pg_temp? */
while (n) { while (n) {
pg = rb_entry(n, struct ceph_pg_mapping, node); pg = rb_entry(n, struct ceph_pg_mapping, node);
if (pgid.pg64 < pg->pgid) c = pgid_cmp(pgid, pg->pgid);
if (c < 0)
n = n->rb_left; n = n->rb_left;
else if (pgid.pg64 > pg->pgid) else if (c > 0)
n = n->rb_right; n = n->rb_right;
else { else {
*num = pg->len; *num = pg->len;
...@@ -845,36 +863,40 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid, ...@@ -845,36 +863,40 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, union ceph_pg pgid,
} }
/* crush */ /* crush */
if (pgid.pg.pool >= osdmap->num_pools) poolid = le32_to_cpu(pgid.pool);
ps = le16_to_cpu(pgid.ps);
preferred = (s16)le16_to_cpu(pgid.preferred);
if (poolid >= osdmap->num_pools)
return NULL; return NULL;
pool = &osdmap->pg_pool[pgid.pg.pool]; pool = &osdmap->pg_pool[poolid];
ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset,
pool->v.type, pool->v.size); pool->v.type, pool->v.size);
if (ruleno < 0) { if (ruleno < 0) {
pr_err("no crush rule pool %d type %d size %d\n", pr_err("no crush rule pool %d type %d size %d\n",
pgid.pg.pool, pool->v.type, pool->v.size); poolid, pool->v.type, pool->v.size);
return NULL; return NULL;
} }
if (pgid.pg.preferred >= 0) if (preferred >= 0)
pps = ceph_stable_mod(pgid.pg.ps, pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.lpgp_num), le32_to_cpu(pool->v.lpgp_num),
pool->lpgp_num_mask); pool->lpgp_num_mask);
else else
pps = ceph_stable_mod(pgid.pg.ps, pps = ceph_stable_mod(ps,
le32_to_cpu(pool->v.pgp_num), le32_to_cpu(pool->v.pgp_num),
pool->pgp_num_mask); pool->pgp_num_mask);
pps += pgid.pg.pool; pps += poolid;
*num = crush_do_rule(osdmap->crush, ruleno, pps, osds, *num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
min_t(int, pool->v.size, *num), min_t(int, pool->v.size, *num),
pgid.pg.preferred, osdmap->osd_weight); preferred, osdmap->osd_weight);
return osds; return osds;
} }
/* /*
* Return primary osd for given pgid, or -1 if none. * Return primary osd for given pgid, or -1 if none.
*/ */
int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid) int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
{ {
int rawosds[10], *osds; int rawosds[10], *osds;
int i, num = ARRAY_SIZE(rawosds); int i, num = ARRAY_SIZE(rawosds);
......
...@@ -25,7 +25,7 @@ struct ceph_pg_pool_info { ...@@ -25,7 +25,7 @@ struct ceph_pg_pool_info {
struct ceph_pg_mapping { struct ceph_pg_mapping {
struct rb_node node; struct rb_node node;
u64 pgid; struct ceph_pg pgid;
int len; int len;
int osds[]; int osds[];
}; };
...@@ -118,6 +118,7 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol, ...@@ -118,6 +118,7 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
const char *oid, const char *oid,
struct ceph_file_layout *fl, struct ceph_file_layout *fl,
struct ceph_osdmap *osdmap); struct ceph_osdmap *osdmap);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, union ceph_pg pgid); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
#endif #endif
...@@ -55,13 +55,10 @@ struct ceph_timespec { ...@@ -55,13 +55,10 @@ struct ceph_timespec {
* placement group. * placement group.
* we encode this into one __le64. * we encode this into one __le64.
*/ */
union ceph_pg { struct ceph_pg {
__u64 pg64; __le16 preferred; /* preferred primary osd */
struct { __le16 ps; /* placement seed */
__s16 preferred; /* preferred primary osd */ __le32 pool; /* object pool */
__u16 ps; /* placement seed */
__u32 pool; /* object pool */
} __attribute__ ((packed)) pg;
} __attribute__ ((packed)); } __attribute__ ((packed));
/* /*
...@@ -117,7 +114,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) ...@@ -117,7 +114,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask)
* object layout - how a given object should be stored. * object layout - how a given object should be stored.
*/ */
struct ceph_object_layout { struct ceph_object_layout {
__le64 ol_pgid; /* raw pg, with _full_ ps precision. */ struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */
__le32 ol_stripe_unit; /* for per-object parity, if any */ __le32 ol_stripe_unit; /* for per-object parity, if any */
} __attribute__ ((packed)); } __attribute__ ((packed));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment