Commit e97b71de authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: fix ioctl magic
  ceph: Behave better when handling file lock replies.
  ceph: pass lock information by struct file_lock instead of as individual params.
  ceph: Handle file locks in replies from the MDS.
  ceph: avoid possible null deref in readdir after dir llseek
parents 38971ce2 1cd275f6
......@@ -114,8 +114,8 @@ static int __dcache_readdir(struct file *filp,
spin_lock(&dcache_lock);
/* start at beginning? */
if (filp->f_pos == 2 || (last &&
filp->f_pos < ceph_dentry(last)->offset)) {
if (filp->f_pos == 2 || last == NULL ||
filp->f_pos < ceph_dentry(last)->offset) {
if (list_empty(&parent->d_subdirs))
goto out_unlock;
p = parent->d_subdirs.prev;
......
......@@ -4,7 +4,7 @@
#include <linux/ioctl.h>
#include <linux/types.h>
#define CEPH_IOCTL_MAGIC 0x98
#define CEPH_IOCTL_MAGIC 0x97
/* just use u64 to align sanely on all archs */
struct ceph_ioctl_layout {
......
......@@ -11,40 +11,68 @@
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
u64 pid, u64 pid_ns,
int cmd, u64 start, u64 length, u8 wait)
int cmd, u8 wait, struct file_lock *fl)
{
struct inode *inode = file->f_dentry->d_inode;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
int err;
u64 length = 0;
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
req->r_inode = igrab(inode);
/* mds requires start and length rather than start and end */
if (LLONG_MAX == fl->fl_end)
length = 0;
else
length = fl->fl_end - fl->fl_start + 1;
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
"length: %llu, wait: %d, type`: %d", (int)lock_type,
(int)operation, pid, start, length, wait, cmd);
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type);
req->r_args.filelock_change.rule = lock_type;
req->r_args.filelock_change.type = cmd;
req->r_args.filelock_change.pid = cpu_to_le64(pid);
req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
/* This should be adjusted, but I'm not sure if
namespaces actually get id numbers*/
req->r_args.filelock_change.pid_namespace =
cpu_to_le64((u64)pid_ns);
req->r_args.filelock_change.start = cpu_to_le64(start);
cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
err = ceph_mdsc_do_request(mdsc, inode, req);
if ( operation == CEPH_MDS_OP_GETFILELOCK){
fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
fl->fl_type = F_RDLCK;
else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
fl->fl_type = F_WRLCK;
else
fl->fl_type = F_UNLCK;
fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
le64_to_cpu(req->r_reply_info.filelock_reply->length);
if (length >= 1)
fl->fl_end = length -1;
else
fl->fl_end = 0;
}
ceph_mdsc_put_request(req);
dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
"length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
(int)operation, pid, start, length, wait, cmd, err);
"length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
(int)operation, (u64)fl->fl_pid, fl->fl_start,
length, wait, fl->fl_type, err);
return err;
}
......@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
*/
int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
{
u64 length;
u8 lock_cmd;
int err;
u8 wait = 0;
......@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
else
lock_cmd = CEPH_LOCK_UNLOCK;
if (LLONG_MAX == fl->fl_end)
length = 0;
else
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
(u64)fl->fl_pid,
(u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
if (!err) {
dout("mds locked, locking locally");
err = posix_lock_file(file, fl, NULL);
if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
/* undo! This should only happen if the kernel detects
* local deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
(u64)fl->fl_pid,
(u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
dout("got %d on posix_lock_file, undid lock", err);
if ( op != CEPH_MDS_OP_GETFILELOCK ){
dout("mds locked, locking locally");
err = posix_lock_file(file, fl, NULL);
if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
/* undo! This should only happen if the kernel detects
* local deadlock. */
ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on posix_lock_file, undid lock", err);
}
}
} else {
dout("mds returned error code %d", err);
}
......@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
{
u64 length;
u8 lock_cmd;
int err;
u8 wait = 1;
......@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
lock_cmd = CEPH_LOCK_EXCL;
else
lock_cmd = CEPH_LOCK_UNLOCK;
/* mds requires start and length rather than start and end */
if (LLONG_MAX == fl->fl_end)
length = 0;
else
length = fl->fl_end - fl->fl_start + 1;
err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
file, (u64)fl->fl_pid,
(u64)(unsigned long)fl->fl_nspid,
lock_cmd, fl->fl_start,
length, wait);
file, lock_cmd, wait, fl);
if (!err) {
err = flock_lock_file_wait(file, fl);
if (err) {
ceph_lock_message(CEPH_LOCK_FLOCK,
CEPH_MDS_OP_SETFILELOCK,
file, (u64)fl->fl_pid,
(u64)(unsigned long)fl->fl_nspid,
CEPH_LOCK_UNLOCK, fl->fl_start,
length, 0);
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
} else {
......
......@@ -201,6 +201,38 @@ static int parse_reply_info_dir(void **p, void *end,
return err;
}
/*
* parse fcntl F_GETLK results
*/
static int parse_reply_info_filelock(void **p, void *end,
struct ceph_mds_reply_info_parsed *info)
{
if (*p + sizeof(*info->filelock_reply) > end)
goto bad;
info->filelock_reply = *p;
*p += sizeof(*info->filelock_reply);
if (unlikely(*p != end))
goto bad;
return 0;
bad:
return -EIO;
}
/*
* parse extra results
*/
static int parse_reply_info_extra(void **p, void *end,
struct ceph_mds_reply_info_parsed *info)
{
if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info);
else
return parse_reply_info_dir(p, end, info);
}
/*
* parse entire mds reply
*/
......@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
goto out_bad;
}
/* dir content */
/* extra */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
err = parse_reply_info_dir(&p, p+len, info);
err = parse_reply_info_extra(&p, p+len, info);
if (err < 0)
goto out_bad;
}
......@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&session->s_mutex);
if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
ceph_msg_dump(msg);
goto out_err;
}
......@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
mutex_lock(&req->r_fill_mutex);
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && rinfo->dir_nr)
if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
rinfo->dir_nr)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
......
......@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
};
/*
* parsed info about an mds reply, including information about the
* target inode and/or its parent directory and dentry, and directory
* contents (for readdir results).
* parsed info about an mds reply, including information about
* either: 1) the target inode and/or its parent directory and dentry,
* and directory contents (for readdir results), or
* 2) the file range lock info (for fcntl F_GETLK results).
*/
struct ceph_mds_reply_info_parsed {
struct ceph_mds_reply_head *head;
/* trace */
struct ceph_mds_reply_info_in diri, targeti;
struct ceph_mds_reply_dirfrag *dirfrag;
char *dname;
u32 dname_len;
struct ceph_mds_reply_lease *dlease;
struct ceph_mds_reply_dirfrag *dir_dir;
int dir_nr;
char **dir_dname;
u32 *dir_dname_len;
struct ceph_mds_reply_lease **dir_dlease;
struct ceph_mds_reply_info_in *dir_in;
u8 dir_complete, dir_end;
/* extra */
union {
/* for fcntl F_GETLK results */
struct ceph_filelock *filelock_reply;
/* for readdir results */
struct {
struct ceph_mds_reply_dirfrag *dir_dir;
int dir_nr;
char **dir_dname;
u32 *dir_dname_len;
struct ceph_mds_reply_lease **dir_dlease;
struct ceph_mds_reply_info_in *dir_in;
u8 dir_complete, dir_end;
};
};
/* encoded blob describing snapshot contexts for certain
operations (e.g., open) */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment