Commit 893ab747 authored by wang di's avatar wang di Committed by Greg Kroah-Hartman

staging: lustre: lmv: try all stripes for unknown hash functions

For unknown hash type, LMV should try all stripes to locate
the name entry. But it will only for lookup and unlink, i.e.
we can only list and unlink entries under striped dir with
unknown hash type.
Signed-off-by: default avatarwang di <di.wang@intel.com>
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4921
Reviewed-on: http://review.whamcloud.com/10041Reviewed-by: default avatarJohn L. Hammond <john.hammond@intel.com>
Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarJames Simmons <jsimmons@infradead.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b14b3ba5
...@@ -383,6 +383,7 @@ struct lmv_user_mds_data { ...@@ -383,6 +383,7 @@ struct lmv_user_mds_data {
}; };
enum lmv_hash_type { enum lmv_hash_type {
LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
LMV_HASH_TYPE_ALL_CHARS = 1, LMV_HASH_TYPE_ALL_CHARS = 1,
LMV_HASH_TYPE_FNV_1A_64 = 2, LMV_HASH_TYPE_FNV_1A_64 = 2,
}; };
......
...@@ -483,6 +483,9 @@ extern char obd_jobid_var[]; ...@@ -483,6 +483,9 @@ extern char obd_jobid_var[];
#define OBD_FAIL_UPDATE_OBJ_NET 0x1700 #define OBD_FAIL_UPDATE_OBJ_NET 0x1700
#define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701 #define OBD_FAIL_UPDATE_OBJ_NET_REP 0x1701
/* LMV */
#define OBD_FAIL_UNKNOWN_LMV_STRIPE 0x1901
/* Assign references to moved code to reduce code changes */ /* Assign references to moved code to reduce code changes */
#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) #define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id)
#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id) #define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id)
......
...@@ -402,10 +402,28 @@ static int lmv_intent_lookup(struct obd_export *exp, ...@@ -402,10 +402,28 @@ static int lmv_intent_lookup(struct obd_export *exp,
struct mdt_body *body; struct mdt_body *body;
int rc = 0; int rc = 0;
/*
* If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
* it will try all stripes to locate the object
*/
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
if (IS_ERR(tgt)) if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
return PTR_ERR(tgt); return PTR_ERR(tgt);
/*
* Both migrating dir and unknown hash dir need to try
* all of sub-stripes
*/
if (lsm && !lmv_is_known_hash_type(lsm)) {
struct lmv_oinfo *oinfo = &lsm->lsm_md_oinfo[0];
op_data->op_fid1 = oinfo->lmo_fid;
op_data->op_mds = oinfo->lmo_mds;
tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
}
if (!fid_is_sane(&op_data->op_fid2)) if (!fid_is_sane(&op_data->op_fid2))
fid_zero(&op_data->op_fid2); fid_zero(&op_data->op_fid2);
...@@ -435,27 +453,39 @@ static int lmv_intent_lookup(struct obd_export *exp, ...@@ -435,27 +453,39 @@ static int lmv_intent_lookup(struct obd_export *exp,
} }
return rc; return rc;
} else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm && } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm &&
lsm->lsm_md_magic & LMV_HASH_FLAG_MIGRATION) { lmv_need_try_all_stripes(lsm)) {
/* /*
* For migrating directory, if it can not find the child in * For migrating and unknown hash type directory, it will
* the source directory(master stripe), try the targeting * try to target the entry on other stripes
* directory(stripe 1)
*/ */
tgt = lmv_find_target(lmv, &lsm->lsm_md_oinfo[1].lmo_fid); int stripe_index;
if (IS_ERR(tgt))
return PTR_ERR(tgt); for (stripe_index = 1;
stripe_index < lsm->lsm_md_stripe_count &&
ptlrpc_req_finished(*reqp); it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
it->it_request = NULL; struct lmv_oinfo *oinfo;
*reqp = NULL;
/* release the previous request */
CDEBUG(D_INODE, "For migrating dir, try target dir "DFID"\n", ptlrpc_req_finished(*reqp);
PFID(&lsm->lsm_md_oinfo[1].lmo_fid)); it->it_request = NULL;
*reqp = NULL;
op_data->op_fid1 = lsm->lsm_md_oinfo[1].lmo_fid;
it->it_disposition &= ~DISP_ENQ_COMPLETE; oinfo = &lsm->lsm_md_oinfo[stripe_index];
rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
flags, reqp, cb_blocking, extra_lock_flags); if (IS_ERR(tgt))
return PTR_ERR(tgt);
CDEBUG(D_INODE, "Try other stripes " DFID"\n",
PFID(&oinfo->lmo_fid));
op_data->op_fid1 = oinfo->lmo_fid;
it->it_disposition &= ~DISP_ENQ_COMPLETE;
rc = md_intent_lock(tgt->ltd_exp, op_data, lmm,
lmmsize, it, flags, reqp,
cb_blocking, extra_lock_flags);
if (rc)
return rc;
}
} }
/* /*
......
...@@ -147,6 +147,18 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name, ...@@ -147,6 +147,18 @@ lsm_name_to_stripe_info(const struct lmv_stripe_md *lsm, const char *name,
return &lsm->lsm_md_oinfo[stripe_index]; return &lsm->lsm_md_oinfo[stripe_index];
} }
static inline bool lmv_is_known_hash_type(const struct lmv_stripe_md *lsm)
{
return lsm->lsm_md_hash_type == LMV_HASH_TYPE_FNV_1A_64 ||
lsm->lsm_md_hash_type == LMV_HASH_TYPE_ALL_CHARS;
}
static inline bool lmv_need_try_all_stripes(const struct lmv_stripe_md *lsm)
{
return !lmv_is_known_hash_type(lsm) ||
lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
}
struct lmv_tgt_desc struct lmv_tgt_desc
*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, *lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
struct lu_fid *fid); struct lu_fid *fid);
......
...@@ -102,8 +102,8 @@ int lmv_name_to_stripe_index(__u32 lmv_hash_type, unsigned int stripe_count, ...@@ -102,8 +102,8 @@ int lmv_name_to_stripe_index(__u32 lmv_hash_type, unsigned int stripe_count,
idx = lmv_hash_fnv1a(stripe_count, name, namelen); idx = lmv_hash_fnv1a(stripe_count, name, namelen);
break; break;
default: default:
CERROR("Unknown hash type 0x%x\n", hash_type); idx = -EBADFD;
return -EINVAL; break;
} }
CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name, CDEBUG(D_INFO, "name %.*s hash_type %d idx %d\n", namelen, name,
...@@ -1697,6 +1697,23 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm, ...@@ -1697,6 +1697,23 @@ lmv_locate_target_for_name(struct lmv_obd *lmv, struct lmv_stripe_md *lsm,
return tgt; return tgt;
} }
/**
* Locate mds by fid or name
*
* For striped directory (lsm != NULL), it will locate the stripe
* by name hash (see lsm_name_to_stripe_info()). Note: if the hash_type
* is unknown, it will return -EBADFD, and lmv_intent_lookup might need
* walk through all of stripes to locate the entry.
*
* For normal direcotry, it will locate MDS by FID directly.
* \param[in] lmv LMV device
* \param[in] op_data client MD stack parameters, name, namelen
* mds_num etc.
* \param[in] fid object FID used to locate MDS.
*
* retval pointer to the lmv_tgt_desc if succeed.
* ERR_PTR(errno) if failed.
*/
struct lmv_tgt_desc struct lmv_tgt_desc
*lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data, *lmv_locate_mds(struct lmv_obd *lmv, struct md_op_data *op_data,
struct lu_fid *fid) struct lu_fid *fid)
...@@ -2351,45 +2368,94 @@ static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data, ...@@ -2351,45 +2368,94 @@ static int lmv_readpage(struct obd_export *exp, struct md_op_data *op_data,
return rc; return rc;
} }
/**
* Unlink a file/directory
*
* Unlink a file or directory under the parent dir. The unlink request
* usually will be sent to the MDT where the child is located, but if
* the client does not have the child FID then request will be sent to the
* MDT where the parent is located.
*
* If the parent is a striped directory then it also needs to locate which
* stripe the name of the child is located, and replace the parent FID
* (@op->op_fid1) with the stripe FID. Note: if the stripe is unknown,
* it will walk through all of sub-stripes until the child is being
* unlinked finally.
*
* \param[in] exp export refer to LMV
* \param[in] op_data different parameters transferred beween client
* MD stacks, name, namelen, FIDs etc.
* op_fid1 is the parent FID, op_fid2 is the child
* FID.
* \param[out] request point to the request of unlink.
*
* retval 0 if succeed
* negative errno if failed.
*/
static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
struct ptlrpc_request **request) struct ptlrpc_request **request)
{ {
struct obd_device *obd = exp->exp_obd; struct lmv_stripe_md *lsm = op_data->op_mea1;
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv; struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *parent_tgt = NULL; struct lmv_tgt_desc *parent_tgt = NULL;
struct lmv_tgt_desc *tgt = NULL; struct lmv_tgt_desc *tgt = NULL;
struct mdt_body *body; struct mdt_body *body;
int stripe_index = 0;
int rc; int rc;
rc = lmv_check_connect(obd); rc = lmv_check_connect(obd);
if (rc) if (rc)
return rc; return rc;
retry: retry_unlink:
/* Send unlink requests to the MDT where the child is located */ /* For striped dir, we need to locate the parent as well */
if (likely(!fid_is_zero(&op_data->op_fid2))) { if (lsm) {
tgt = lmv_find_target(lmv, &op_data->op_fid2); struct lmv_tgt_desc *tmp;
if (IS_ERR(tgt))
return PTR_ERR(tgt);
/* For striped dir, we need to locate the parent as well */ LASSERT(op_data->op_name && op_data->op_namelen);
if (op_data->op_mea1) {
struct lmv_tgt_desc *tmp; tmp = lmv_locate_target_for_name(lmv, lsm,
op_data->op_name,
LASSERT(op_data->op_name && op_data->op_namelen); op_data->op_namelen,
tmp = lmv_locate_target_for_name(lmv, op_data->op_mea1, &op_data->op_fid1,
op_data->op_name, &op_data->op_mds);
op_data->op_namelen,
&op_data->op_fid1, /*
&op_data->op_mds); * return -EBADFD means unknown hash type, might
if (IS_ERR(tmp)) * need try all sub-stripe here
return PTR_ERR(tmp); */
if (IS_ERR(tmp) && PTR_ERR(tmp) != -EBADFD)
return PTR_ERR(tmp);
/*
* Note: both migrating dir and unknown hash dir need to
* try all of sub-stripes, so we need start search the
* name from stripe 0, but migrating dir is already handled
* inside lmv_locate_target_for_name(), so we only check
* unknown hash type directory here
*/
if (!lmv_is_known_hash_type(lsm)) {
struct lmv_oinfo *oinfo;
oinfo = &lsm->lsm_md_oinfo[stripe_index];
op_data->op_fid1 = oinfo->lmo_fid;
op_data->op_mds = oinfo->lmo_mds;
} }
} else {
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
} }
try_next_stripe:
/* Send unlink requests to the MDT where the child is located */
if (likely(!fid_is_zero(&op_data->op_fid2)))
tgt = lmv_find_target(lmv, &op_data->op_fid2);
else if (lsm)
tgt = lmv_get_target(lmv, op_data->op_mds, NULL);
else
tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
if (IS_ERR(tgt))
return PTR_ERR(tgt);
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid()); op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid()); op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
op_data->op_cap = cfs_curproc_cap_pack(); op_data->op_cap = cfs_curproc_cap_pack();
...@@ -2425,9 +2491,28 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, ...@@ -2425,9 +2491,28 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx); PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), tgt->ltd_idx);
rc = md_unlink(tgt->ltd_exp, op_data, request); rc = md_unlink(tgt->ltd_exp, op_data, request);
if (rc != 0 && rc != -EREMOTE) if (rc != 0 && rc != -EREMOTE && rc != -ENOENT)
return rc; return rc;
/* Try next stripe if it is needed. */
if (rc == -ENOENT && lsm && lmv_need_try_all_stripes(lsm)) {
struct lmv_oinfo *oinfo;
stripe_index++;
if (stripe_index >= lsm->lsm_md_stripe_count)
return rc;
oinfo = &lsm->lsm_md_oinfo[stripe_index];
op_data->op_fid1 = oinfo->lmo_fid;
op_data->op_mds = oinfo->lmo_mds;
ptlrpc_req_finished(*request);
*request = NULL;
goto try_next_stripe;
}
body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
if (!body) if (!body)
return -EPROTO; return -EPROTO;
...@@ -2463,7 +2548,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data, ...@@ -2463,7 +2548,7 @@ static int lmv_unlink(struct obd_export *exp, struct md_op_data *op_data,
ptlrpc_req_finished(*request); ptlrpc_req_finished(*request);
*request = NULL; *request = NULL;
goto retry; goto retry_unlink;
} }
static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) static int lmv_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
...@@ -2683,7 +2768,10 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm, ...@@ -2683,7 +2768,10 @@ static int lmv_unpack_md_v1(struct obd_export *exp, struct lmv_stripe_md *lsm,
lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic); lsm->lsm_md_magic = le32_to_cpu(lmm1->lmv_magic);
lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count); lsm->lsm_md_stripe_count = le32_to_cpu(lmm1->lmv_stripe_count);
lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index); lsm->lsm_md_master_mdt_index = le32_to_cpu(lmm1->lmv_master_mdt_index);
lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type); if (OBD_FAIL_CHECK(OBD_FAIL_UNKNOWN_LMV_STRIPE))
lsm->lsm_md_hash_type = LMV_HASH_TYPE_UNKNOWN;
else
lsm->lsm_md_hash_type = le32_to_cpu(lmm1->lmv_hash_type);
lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version); lsm->lsm_md_layout_version = le32_to_cpu(lmm1->lmv_layout_version);
fid_le_to_cpu(&lsm->lsm_md_master_fid, &lmm1->lmv_master_fid); fid_le_to_cpu(&lsm->lsm_md_master_fid, &lmm1->lmv_master_fid);
cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name, cplen = strlcpy(lsm->lsm_md_pool_name, lmm1->lmv_pool_name,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment