Commit 8adc8b3d authored by Sage Weil's avatar Sage Weil

libceph: fix pg_temp mapping update

The incremental map updates have a record for each pg_temp mapping that is
to be add/updated (len > 0) or removed (len == 0).  The old code was
written as if the updates were a complete enumeration; that was just wrong.
Update the code to remove 0-length entries and drop the rbtree traversal.

This avoids misdirected (and hung) requests that manifest as server
errors like

[WRN] client4104 10.0.1.219:0/275025290 misdirected client4104.1:129 0.1 to osd0 not [1,0] in e11/11
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 782e182e
...@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new, ...@@ -339,6 +339,7 @@ static int __insert_pg_mapping(struct ceph_pg_mapping *new,
struct ceph_pg_mapping *pg = NULL; struct ceph_pg_mapping *pg = NULL;
int c; int c;
dout("__insert_pg_mapping %llx %p\n", *(u64 *)&new->pgid, new);
while (*p) { while (*p) {
parent = *p; parent = *p;
pg = rb_entry(parent, struct ceph_pg_mapping, node); pg = rb_entry(parent, struct ceph_pg_mapping, node);
...@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, ...@@ -366,16 +367,33 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root,
while (n) { while (n) {
pg = rb_entry(n, struct ceph_pg_mapping, node); pg = rb_entry(n, struct ceph_pg_mapping, node);
c = pgid_cmp(pgid, pg->pgid); c = pgid_cmp(pgid, pg->pgid);
if (c < 0) if (c < 0) {
n = n->rb_left; n = n->rb_left;
else if (c > 0) } else if (c > 0) {
n = n->rb_right; n = n->rb_right;
else } else {
dout("__lookup_pg_mapping %llx got %p\n",
*(u64 *)&pgid, pg);
return pg; return pg;
}
} }
return NULL; return NULL;
} }
static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid)
{
struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid);
if (pg) {
dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg);
rb_erase(&pg->node, root);
kfree(pg);
return 0;
}
dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid);
return -ENOENT;
}
/* /*
* rbtree of pg pool info * rbtree of pg pool info
*/ */
...@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -711,7 +729,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
void *start = *p; void *start = *p;
int err = -EINVAL; int err = -EINVAL;
u16 version; u16 version;
struct rb_node *rbp;
ceph_decode_16_safe(p, end, version, bad); ceph_decode_16_safe(p, end, version, bad);
if (version > CEPH_OSDMAP_INC_VERSION) { if (version > CEPH_OSDMAP_INC_VERSION) {
...@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -861,7 +878,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
} }
/* new_pg_temp */ /* new_pg_temp */
rbp = rb_first(&map->pg_temp);
ceph_decode_32_safe(p, end, len, bad); ceph_decode_32_safe(p, end, len, bad);
while (len--) { while (len--) {
struct ceph_pg_mapping *pg; struct ceph_pg_mapping *pg;
...@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -872,18 +888,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
ceph_decode_copy(p, &pgid, sizeof(pgid)); ceph_decode_copy(p, &pgid, sizeof(pgid));
pglen = ceph_decode_32(p); pglen = ceph_decode_32(p);
/* remove any? */
while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping,
node)->pgid, pgid) <= 0) {
struct ceph_pg_mapping *cur =
rb_entry(rbp, struct ceph_pg_mapping, node);
rbp = rb_next(rbp);
dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
rb_erase(&cur->node, &map->pg_temp);
kfree(cur);
}
if (pglen) { if (pglen) {
/* insert */ /* insert */
ceph_decode_need(p, end, pglen*sizeof(u32), bad); ceph_decode_need(p, end, pglen*sizeof(u32), bad);
...@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, ...@@ -903,17 +907,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
} }
dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid,
pglen); pglen);
} else {
/* remove */
__remove_pg_mapping(&map->pg_temp, pgid);
} }
} }
while (rbp) {
struct ceph_pg_mapping *cur =
rb_entry(rbp, struct ceph_pg_mapping, node);
rbp = rb_next(rbp);
dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid);
rb_erase(&cur->node, &map->pg_temp);
kfree(cur);
}
/* ignore the rest */ /* ignore the rest */
*p = end; *p = end;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment