Commit b50c2de5 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: choose readdir frag based on previous readdir reply

The dirfragtree is lazily updated, it's not always accurate. Infinite
loops happens in following circumstance.

- client send request to read frag A
- frag A has been fragmented into frag B and C. So mds fills the reply
  with contents of frag B
- client wants to read next frag C. ceph_choose_frag(frag value of C)
  return frag A.

The fix is using previous readdir reply to calculate next readdir frag
when possible.
Signed-off-by: default avatar"Yan, Zheng" <zyan@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent e010dd0a
...@@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -294,7 +294,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
int i; int i;
int err; int err;
u32 ftype; unsigned frag = -1;
struct ceph_mds_reply_info_parsed *rinfo; struct ceph_mds_reply_info_parsed *rinfo;
dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos); dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
...@@ -341,7 +341,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -341,7 +341,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* do we have the correct frag content buffered? */ /* do we have the correct frag content buffered? */
if (need_send_readdir(fi, ctx->pos)) { if (need_send_readdir(fi, ctx->pos)) {
struct ceph_mds_request *req; struct ceph_mds_request *req;
unsigned frag;
int op = ceph_snap(inode) == CEPH_SNAPDIR ? int op = ceph_snap(inode) == CEPH_SNAPDIR ?
CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR; CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
...@@ -352,6 +351,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -352,6 +351,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
} }
if (is_hash_order(ctx->pos)) { if (is_hash_order(ctx->pos)) {
/* fragtree isn't always accurate. choose frag
* based on previous reply when possible. */
if (frag == (unsigned)-1)
frag = ceph_choose_frag(ci, fpos_hash(ctx->pos), frag = ceph_choose_frag(ci, fpos_hash(ctx->pos),
NULL, NULL); NULL, NULL);
} else { } else {
...@@ -480,6 +482,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -480,6 +482,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
struct ceph_vino vino; struct ceph_vino vino;
ino_t ino; ino_t ino;
u32 ftype;
BUG_ON(rde->offset < ctx->pos); BUG_ON(rde->offset < ctx->pos);
...@@ -502,15 +505,17 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) ...@@ -502,15 +505,17 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
ctx->pos++; ctx->pos++;
} }
if (fi->next_offset > 2) {
ceph_mdsc_put_request(fi->last_readdir); ceph_mdsc_put_request(fi->last_readdir);
fi->last_readdir = NULL; fi->last_readdir = NULL;
if (fi->next_offset > 2) {
frag = fi->frag;
goto more; goto more;
} }
/* more frags? */ /* more frags? */
if (!ceph_frag_is_rightmost(fi->frag)) { if (!ceph_frag_is_rightmost(fi->frag)) {
unsigned frag = ceph_frag_next(fi->frag); frag = ceph_frag_next(fi->frag);
if (is_hash_order(ctx->pos)) { if (is_hash_order(ctx->pos)) {
loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag), loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
fi->next_offset, true); fi->next_offset, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment