Commit 1d9d7cbf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "On the filesystem side we have:

   - a fix to enforce quotas set above the mount point (Luis Henriques)

   - support for exporting snapshots through NFS (Zheng Yan)

   - proper statx implementation (Jeff Layton). statx flags are mapped
     to MDS caps, with AT_STATX_{DONT,FORCE}_SYNC taken into account.

   - some follow-up dentry name handling fixes, in particular
     elimination of our hand-rolled helper and the switch to __getname()
     as suggested by Al (Jeff Layton)

   - a set of MDS client cleanups in preparation for async MDS requests
     in the future (Jeff Layton)

   - a fix to sync the filesystem before remounting (Jeff Layton)

  On the rbd side, work is on-going on object-map and fast-diff image
  features"

* tag 'ceph-for-5.2-rc1' of git://github.com/ceph/ceph-client: (29 commits)
  ceph: flush dirty inodes before proceeding with remount
  ceph: fix unaligned access in ceph_send_cap_releases
  libceph: make ceph_pr_addr take an struct ceph_entity_addr pointer
  libceph: fix unaligned accesses in ceph_entity_addr handling
  rbd: don't assert on writes to snapshots
  rbd: client_mutex is never nested
  ceph: print inode number in __caps_issued_mask debugging messages
  ceph: just call get_session in __ceph_lookup_mds_session
  ceph: simplify arguments and return semantics of try_get_cap_refs
  ceph: fix comment over ceph_drop_caps_for_unlink
  ceph: move wait for mds request into helper function
  ceph: have ceph_mdsc_do_request call ceph_mdsc_submit_request
  ceph: after an MDS request, do callback and completions
  ceph: use pathlen values returned by set_request_path_attr
  ceph: use __getname/__putname in ceph_mdsc_build_path
  ceph: use ceph_mdsc_build_path instead of clone_dentry_name
  ceph: fix potential use-after-free in ceph_mdsc_build_path
  ceph: dump granular cap info in "caps" debugfs file
  ceph: make iterate_session_caps a public symbol
  ceph: fix NULL pointer deref when debugging is enabled
  ...
parents 2c45e7fb 00abf69d
...@@ -934,7 +934,7 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) ...@@ -934,7 +934,7 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
struct rbd_client *rbdc; struct rbd_client *rbdc;
int ret; int ret;
mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING); mutex_lock(&client_mutex);
rbdc = rbd_client_find(ceph_opts); rbdc = rbd_client_find(ceph_opts);
if (rbdc) { if (rbdc) {
ceph_destroy_options(ceph_opts); ceph_destroy_options(ceph_opts);
...@@ -1326,7 +1326,7 @@ static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off, ...@@ -1326,7 +1326,7 @@ static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
zero_bvecs(&obj_req->bvec_pos, off, bytes); zero_bvecs(&obj_req->bvec_pos, off, bytes);
break; break;
default: default:
rbd_assert(0); BUG();
} }
} }
...@@ -1581,7 +1581,7 @@ static void rbd_obj_request_destroy(struct kref *kref) ...@@ -1581,7 +1581,7 @@ static void rbd_obj_request_destroy(struct kref *kref)
kfree(obj_request->bvec_pos.bvecs); kfree(obj_request->bvec_pos.bvecs);
break; break;
default: default:
rbd_assert(0); BUG();
} }
kfree(obj_request->img_extents); kfree(obj_request->img_extents);
...@@ -1781,7 +1781,7 @@ static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which) ...@@ -1781,7 +1781,7 @@ static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
&obj_req->bvec_pos); &obj_req->bvec_pos);
break; break;
default: default:
rbd_assert(0); BUG();
} }
} }
...@@ -2036,7 +2036,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req) ...@@ -2036,7 +2036,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
ret = rbd_obj_setup_zeroout(obj_req); ret = rbd_obj_setup_zeroout(obj_req);
break; break;
default: default:
rbd_assert(0); BUG();
} }
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -2383,7 +2383,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req) ...@@ -2383,7 +2383,7 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
&obj_req->bvec_pos); &obj_req->bvec_pos);
break; break;
default: default:
rbd_assert(0); BUG();
} }
} else { } else {
ret = rbd_img_fill_from_bvecs(child_img_req, ret = rbd_img_fill_from_bvecs(child_img_req,
...@@ -2515,7 +2515,7 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) ...@@ -2515,7 +2515,7 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
num_osd_ops += count_zeroout_ops(obj_req); num_osd_ops += count_zeroout_ops(obj_req);
break; break;
default: default:
rbd_assert(0); BUG();
} }
obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops); obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
...@@ -2542,7 +2542,7 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) ...@@ -2542,7 +2542,7 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
__rbd_obj_setup_zeroout(obj_req, which); __rbd_obj_setup_zeroout(obj_req, which);
break; break;
default: default:
rbd_assert(0); BUG();
} }
ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO); ret = ceph_osdc_alloc_messages(obj_req->osd_req, GFP_NOIO);
...@@ -3842,8 +3842,12 @@ static void rbd_queue_workfn(struct work_struct *work) ...@@ -3842,8 +3842,12 @@ static void rbd_queue_workfn(struct work_struct *work)
goto err_rq; goto err_rq;
} }
rbd_assert(op_type == OBJ_OP_READ || if (op_type != OBJ_OP_READ && rbd_dev->spec->snap_id != CEPH_NOSNAP) {
rbd_dev->spec->snap_id == CEPH_NOSNAP); rbd_warn(rbd_dev, "%s on read-only snapshot",
obj_op_name(op_type));
result = -EIO;
goto err;
}
/* /*
* Quit early if the mapped snapshot no longer exists. It's * Quit early if the mapped snapshot no longer exists. It's
......
...@@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) ...@@ -892,8 +892,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
int have = ci->i_snap_caps; int have = ci->i_snap_caps;
if ((have & mask) == mask) { if ((have & mask) == mask) {
dout("__ceph_caps_issued_mask %p snap issued %s" dout("__ceph_caps_issued_mask ino 0x%lx snap issued %s"
" (mask %s)\n", &ci->vfs_inode, " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(have), ceph_cap_string(have),
ceph_cap_string(mask)); ceph_cap_string(mask));
return 1; return 1;
...@@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) ...@@ -904,8 +904,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
if (!__cap_is_valid(cap)) if (!__cap_is_valid(cap))
continue; continue;
if ((cap->issued & mask) == mask) { if ((cap->issued & mask) == mask) {
dout("__ceph_caps_issued_mask %p cap %p issued %s" dout("__ceph_caps_issued_mask ino 0x%lx cap %p issued %s"
" (mask %s)\n", &ci->vfs_inode, cap, " (mask %s)\n", ci->vfs_inode.i_ino, cap,
ceph_cap_string(cap->issued), ceph_cap_string(cap->issued),
ceph_cap_string(mask)); ceph_cap_string(mask));
if (touch) if (touch)
...@@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch) ...@@ -916,8 +916,8 @@ int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
/* does a combination of caps satisfy mask? */ /* does a combination of caps satisfy mask? */
have |= cap->issued; have |= cap->issued;
if ((have & mask) == mask) { if ((have & mask) == mask) {
dout("__ceph_caps_issued_mask %p combo issued %s" dout("__ceph_caps_issued_mask ino 0x%lx combo issued %s"
" (mask %s)\n", &ci->vfs_inode, " (mask %s)\n", ci->vfs_inode.i_ino,
ceph_cap_string(cap->issued), ceph_cap_string(cap->issued),
ceph_cap_string(mask)); ceph_cap_string(mask));
if (touch) { if (touch) {
...@@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -2257,8 +2257,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync) if (datasync)
goto out; goto out;
inode_lock(inode);
dirty = try_flush_caps(inode, &flush_tid); dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
...@@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -2273,7 +2271,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_event_interruptible(ci->i_cap_wq, ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid)); caps_are_flushed(inode, flush_tid));
} }
inode_unlock(inode);
out: out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret; return ret;
...@@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got, ...@@ -2528,9 +2525,14 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
* to (when applicable), and check against max_size here as well. * to (when applicable), and check against max_size here as well.
* Note that caller is responsible for ensuring max_size increases are * Note that caller is responsible for ensuring max_size increases are
* requested from the MDS. * requested from the MDS.
*
* Returns 0 if caps were not able to be acquired (yet), a 1 if they were,
* or a negative error code.
*
* FIXME: how does a 0 return differ from -EAGAIN?
*/ */
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, bool nonblock, int *got, int *err) loff_t endoff, bool nonblock, int *got)
{ {
struct inode *inode = &ci->vfs_inode; struct inode *inode = &ci->vfs_inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
...@@ -2550,8 +2552,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2550,8 +2552,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
if ((file_wanted & need) != need) { if ((file_wanted & need) != need) {
dout("try_get_cap_refs need %s file_wanted %s, EBADF\n", dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
ceph_cap_string(need), ceph_cap_string(file_wanted)); ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF; ret = -EBADF;
ret = 1;
goto out_unlock; goto out_unlock;
} }
...@@ -2572,10 +2573,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2572,10 +2573,8 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
dout("get_cap_refs %p endoff %llu > maxsize %llu\n", dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
inode, endoff, ci->i_max_size); inode, endoff, ci->i_max_size);
if (endoff > ci->i_requested_max_size) { if (endoff > ci->i_requested_max_size)
*err = -EAGAIN; ret = -EAGAIN;
ret = 1;
}
goto out_unlock; goto out_unlock;
} }
/* /*
...@@ -2610,8 +2609,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2610,8 +2609,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
* task isn't in TASK_RUNNING state * task isn't in TASK_RUNNING state
*/ */
if (nonblock) { if (nonblock) {
*err = -EAGAIN; ret = -EAGAIN;
ret = 1;
goto out_unlock; goto out_unlock;
} }
...@@ -2640,8 +2638,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2640,8 +2638,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
if (session_readonly) { if (session_readonly) {
dout("get_cap_refs %p needed %s but mds%d readonly\n", dout("get_cap_refs %p needed %s but mds%d readonly\n",
inode, ceph_cap_string(need), ci->i_auth_cap->mds); inode, ceph_cap_string(need), ci->i_auth_cap->mds);
*err = -EROFS; ret = -EROFS;
ret = 1;
goto out_unlock; goto out_unlock;
} }
...@@ -2650,16 +2647,14 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2650,16 +2647,14 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
if (READ_ONCE(mdsc->fsc->mount_state) == if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) { CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode); dout("get_cap_refs %p forced umount\n", inode);
*err = -EIO; ret = -EIO;
ret = 1;
goto out_unlock; goto out_unlock;
} }
mds_wanted = __ceph_caps_mds_wanted(ci, false); mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~(mds_wanted & need)) { if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped" dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode); " (session killed?)\n", inode);
*err = -ESTALE; ret = -ESTALE;
ret = 1;
goto out_unlock; goto out_unlock;
} }
if (!(file_wanted & ~mds_wanted)) if (!(file_wanted & ~mds_wanted))
...@@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff) ...@@ -2710,7 +2705,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
bool nonblock, int *got) bool nonblock, int *got)
{ {
int ret, err = 0; int ret;
BUG_ON(need & ~CEPH_CAP_FILE_RD); BUG_ON(need & ~CEPH_CAP_FILE_RD);
BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED)); BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
...@@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, ...@@ -2718,15 +2713,8 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = try_get_cap_refs(ci, need, want, 0, nonblock, got, &err); ret = try_get_cap_refs(ci, need, want, 0, nonblock, got);
if (ret) { return ret == -EAGAIN ? 0 : ret;
if (err == -EAGAIN) {
ret = 0;
} else if (err < 0) {
ret = err;
}
}
return ret;
} }
/* /*
...@@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want, ...@@ -2737,7 +2725,7 @@ int ceph_try_get_caps(struct ceph_inode_info *ci, int need, int want,
int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
loff_t endoff, int *got, struct page **pinned_page) loff_t endoff, int *got, struct page **pinned_page)
{ {
int _got, ret, err = 0; int _got, ret;
ret = ceph_pool_perm_check(ci, need); ret = ceph_pool_perm_check(ci, need);
if (ret < 0) if (ret < 0)
...@@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, ...@@ -2747,21 +2735,19 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
if (endoff > 0) if (endoff > 0)
check_max_size(&ci->vfs_inode, endoff); check_max_size(&ci->vfs_inode, endoff);
err = 0;
_got = 0; _got = 0;
ret = try_get_cap_refs(ci, need, want, endoff, ret = try_get_cap_refs(ci, need, want, endoff,
false, &_got, &err); false, &_got);
if (ret) { if (ret == -EAGAIN) {
if (err == -EAGAIN) continue;
continue; } else if (!ret) {
if (err < 0) int err;
ret = err;
} else {
DEFINE_WAIT_FUNC(wait, woken_wake_function); DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(&ci->i_cap_wq, &wait); add_wait_queue(&ci->i_cap_wq, &wait);
while (!try_get_cap_refs(ci, need, want, endoff, while (!(err = try_get_cap_refs(ci, need, want, endoff,
true, &_got, &err)) { true, &_got))) {
if (signal_pending(current)) { if (signal_pending(current)) {
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
break; break;
...@@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, ...@@ -2770,19 +2756,14 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
} }
remove_wait_queue(&ci->i_cap_wq, &wait); remove_wait_queue(&ci->i_cap_wq, &wait);
if (err == -EAGAIN) if (err == -EAGAIN)
continue; continue;
if (err < 0)
ret = err;
} }
if (ret < 0) { if (ret == -ESTALE) {
if (err == -ESTALE) { /* session was killed, try renew caps */
/* session was killed, try renew caps */ ret = ceph_renew_caps(&ci->vfs_inode);
ret = ceph_renew_caps(&ci->vfs_inode); if (ret == 0)
if (ret == 0) continue;
continue;
}
return ret; return ret;
} }
...@@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) ...@@ -4099,7 +4080,7 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
} }
/* /*
* For a soon-to-be unlinked file, drop the AUTH_RDCACHE caps. If it * For a soon-to-be unlinked file, drop the LINK caps. If it
* looks like the link count will hit 0, drop any other caps (other * looks like the link count will hit 0, drop any other caps (other
* than PIN) we don't specifically want (due to the file still being * than PIN) we don't specifically want (due to the file still being
* open). * open).
......
...@@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p) ...@@ -37,7 +37,7 @@ static int mdsmap_show(struct seq_file *s, void *p)
struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr;
int state = mdsmap->m_info[i].state; int state = mdsmap->m_info[i].state;
seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, seq_printf(s, "\tmds%d\t%s\t(%s)\n", i,
ceph_pr_addr(&addr->in_addr), ceph_pr_addr(addr),
ceph_mds_state_name(state)); ceph_mds_state_name(state));
} }
return 0; return 0;
...@@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p) ...@@ -88,7 +88,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_dentry, req->r_dentry,
path ? path : ""); path ? path : "");
spin_unlock(&req->r_dentry->d_lock); spin_unlock(&req->r_dentry->d_lock);
kfree(path); ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path1) { } else if (req->r_path1) {
seq_printf(s, " #%llx/%s", req->r_ino1.ino, seq_printf(s, " #%llx/%s", req->r_ino1.ino,
req->r_path1); req->r_path1);
...@@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p) ...@@ -108,7 +108,7 @@ static int mdsc_show(struct seq_file *s, void *p)
req->r_old_dentry, req->r_old_dentry,
path ? path : ""); path ? path : "");
spin_unlock(&req->r_old_dentry->d_lock); spin_unlock(&req->r_old_dentry->d_lock);
kfree(path); ceph_mdsc_free_path(path, pathlen);
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
if (req->r_ino2.ino) if (req->r_ino2.ino)
seq_printf(s, " #%llx/%s", req->r_ino2.ino, seq_printf(s, " #%llx/%s", req->r_ino2.ino,
...@@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p) ...@@ -124,18 +124,48 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0; return 0;
} }
static int caps_show_cb(struct inode *inode, struct ceph_cap *cap, void *p)
{
struct seq_file *s = p;
seq_printf(s, "0x%-17lx%-17s%-17s\n", inode->i_ino,
ceph_cap_string(cap->issued),
ceph_cap_string(cap->implemented));
return 0;
}
static int caps_show(struct seq_file *s, void *p) static int caps_show(struct seq_file *s, void *p)
{ {
struct ceph_fs_client *fsc = s->private; struct ceph_fs_client *fsc = s->private;
int total, avail, used, reserved, min; struct ceph_mds_client *mdsc = fsc->mdsc;
int total, avail, used, reserved, min, i;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n" seq_printf(s, "total\t\t%d\n"
"avail\t\t%d\n" "avail\t\t%d\n"
"used\t\t%d\n" "used\t\t%d\n"
"reserved\t%d\n" "reserved\t%d\n"
"min\t%d\n", "min\t\t%d\n\n",
total, avail, used, reserved, min); total, avail, used, reserved, min);
seq_printf(s, "ino issued implemented\n");
seq_printf(s, "-----------------------------------------------\n");
mutex_lock(&mdsc->mutex);
for (i = 0; i < mdsc->max_sessions; i++) {
struct ceph_mds_session *session;
session = __ceph_lookup_mds_session(mdsc, i);
if (!session)
continue;
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
ceph_iterate_session_caps(session, caps_show_cb, s);
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
mutex_lock(&mdsc->mutex);
}
mutex_unlock(&mdsc->mutex);
return 0; return 0;
} }
......
...@@ -22,18 +22,77 @@ struct ceph_nfs_confh { ...@@ -22,18 +22,77 @@ struct ceph_nfs_confh {
u64 ino, parent_ino; u64 ino, parent_ino;
} __attribute__ ((packed)); } __attribute__ ((packed));
/*
* fh for snapped inode
*/
struct ceph_nfs_snapfh {
u64 ino;
u64 snapid;
u64 parent_ino;
u32 hash;
} __attribute__ ((packed));
static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode)
{
const static int snap_handle_length =
sizeof(struct ceph_nfs_snapfh) >> 2;
struct ceph_nfs_snapfh *sfh = (void *)rawfh;
u64 snapid = ceph_snap(inode);
int ret;
bool no_parent = true;
if (*max_len < snap_handle_length) {
*max_len = snap_handle_length;
ret = FILEID_INVALID;
goto out;
}
ret = -EINVAL;
if (snapid != CEPH_SNAPDIR) {
struct inode *dir;
struct dentry *dentry = d_find_alias(inode);
if (!dentry)
goto out;
rcu_read_lock();
dir = d_inode_rcu(dentry->d_parent);
if (ceph_snap(dir) != CEPH_SNAPDIR) {
sfh->parent_ino = ceph_ino(dir);
sfh->hash = ceph_dentry_hash(dir, dentry);
no_parent = false;
}
rcu_read_unlock();
dput(dentry);
}
if (no_parent) {
if (!S_ISDIR(inode->i_mode))
goto out;
sfh->parent_ino = sfh->ino;
sfh->hash = 0;
}
sfh->ino = ceph_ino(inode);
sfh->snapid = snapid;
*max_len = snap_handle_length;
ret = FILEID_BTRFS_WITH_PARENT;
out:
dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
return ret;
}
static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
struct inode *parent_inode) struct inode *parent_inode)
{ {
const static int handle_length =
sizeof(struct ceph_nfs_fh) >> 2;
const static int connected_handle_length =
sizeof(struct ceph_nfs_confh) >> 2;
int type; int type;
struct ceph_nfs_fh *fh = (void *)rawfh;
struct ceph_nfs_confh *cfh = (void *)rawfh;
int connected_handle_length = sizeof(*cfh)/4;
int handle_length = sizeof(*fh)/4;
/* don't re-export snaps */
if (ceph_snap(inode) != CEPH_NOSNAP) if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL; return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
if (parent_inode && (*max_len < connected_handle_length)) { if (parent_inode && (*max_len < connected_handle_length)) {
*max_len = connected_handle_length; *max_len = connected_handle_length;
...@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, ...@@ -44,6 +103,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
} }
if (parent_inode) { if (parent_inode) {
struct ceph_nfs_confh *cfh = (void *)rawfh;
dout("encode_fh %llx with parent %llx\n", dout("encode_fh %llx with parent %llx\n",
ceph_ino(inode), ceph_ino(parent_inode)); ceph_ino(inode), ceph_ino(parent_inode));
cfh->ino = ceph_ino(inode); cfh->ino = ceph_ino(inode);
...@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, ...@@ -51,6 +111,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
*max_len = connected_handle_length; *max_len = connected_handle_length;
type = FILEID_INO32_GEN_PARENT; type = FILEID_INO32_GEN_PARENT;
} else { } else {
struct ceph_nfs_fh *fh = (void *)rawfh;
dout("encode_fh %llx\n", ceph_ino(inode)); dout("encode_fh %llx\n", ceph_ino(inode));
fh->ino = ceph_ino(inode); fh->ino = ceph_ino(inode);
*max_len = handle_length; *max_len = handle_length;
...@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, ...@@ -59,7 +120,7 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
return type; return type;
} }
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
{ {
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct inode *inode; struct inode *inode;
...@@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) ...@@ -81,7 +142,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
mask = CEPH_STAT_CAP_INODE; mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root))) if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED; mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask); req->r_args.lookupino.mask = cpu_to_le32(mask);
req->r_ino1 = vino; req->r_ino1 = vino;
req->r_num_caps = 1; req->r_num_caps = 1;
...@@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) ...@@ -91,16 +152,114 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
ihold(inode); ihold(inode);
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
if (!inode) if (!inode)
return ERR_PTR(-ESTALE); return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
if (inode->i_nlink == 0) {
iput(inode);
return ERR_PTR(-ESTALE);
}
} }
return inode;
}
struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
{
struct inode *inode = __lookup_inode(sb, ino);
if (IS_ERR(inode))
return inode;
if (inode->i_nlink == 0) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return inode;
}
static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
{
struct inode *inode = __lookup_inode(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (inode->i_nlink == 0) {
iput(inode);
return ERR_PTR(-ESTALE);
}
return d_obtain_alias(inode); return d_obtain_alias(inode);
} }
static struct dentry *__snapfh_to_dentry(struct super_block *sb,
struct ceph_nfs_snapfh *sfh,
bool want_parent)
{
struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
struct ceph_mds_request *req;
struct inode *inode;
struct ceph_vino vino;
int mask;
int err;
bool unlinked = false;
if (want_parent) {
vino.ino = sfh->parent_ino;
if (sfh->snapid == CEPH_SNAPDIR)
vino.snap = CEPH_NOSNAP;
else if (sfh->ino == sfh->parent_ino)
vino.snap = CEPH_SNAPDIR;
else
vino.snap = sfh->snapid;
} else {
vino.ino = sfh->ino;
vino.snap = sfh->snapid;
}
inode = ceph_find_inode(sb, vino);
if (inode)
return d_obtain_alias(inode);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
USE_ANY_MDS);
if (IS_ERR(req))
return ERR_CAST(req);
mask = CEPH_STAT_CAP_INODE;
if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.lookupino.mask = cpu_to_le32(mask);
if (vino.snap < CEPH_NOSNAP) {
req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
if (!want_parent && sfh->ino != sfh->parent_ino) {
req->r_args.lookupino.parent =
cpu_to_le64(sfh->parent_ino);
req->r_args.lookupino.hash =
cpu_to_le32(sfh->hash);
}
}
req->r_ino1 = vino;
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
if (inode) {
if (vino.snap == CEPH_SNAPDIR) {
if (inode->i_nlink == 0)
unlinked = true;
inode = ceph_get_snapdir(inode);
} else if (ceph_snap(inode) == vino.snap) {
ihold(inode);
} else {
/* mds does not support lookup snapped inode */
err = -EOPNOTSUPP;
inode = NULL;
}
}
ceph_mdsc_put_request(req);
if (want_parent) {
dout("snapfh_to_parent %llx.%llx\n err=%d\n",
vino.ino, vino.snap, err);
} else {
dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
}
if (!inode)
return ERR_PTR(-ESTALE);
/* see comments in ceph_get_parent() */
return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
}
/* /*
* convert regular fh to dentry * convert regular fh to dentry
*/ */
...@@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb, ...@@ -110,6 +269,11 @@ static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
{ {
struct ceph_nfs_fh *fh = (void *)fid->raw; struct ceph_nfs_fh *fh = (void *)fid->raw;
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
return __snapfh_to_dentry(sb, sfh, false);
}
if (fh_type != FILEID_INO32_GEN && if (fh_type != FILEID_INO32_GEN &&
fh_type != FILEID_INO32_GEN_PARENT) fh_type != FILEID_INO32_GEN_PARENT)
return NULL; return NULL;
...@@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb, ...@@ -163,13 +327,49 @@ static struct dentry *__get_parent(struct super_block *sb,
static struct dentry *ceph_get_parent(struct dentry *child) static struct dentry *ceph_get_parent(struct dentry *child)
{ {
/* don't re-export snaps */ struct inode *inode = d_inode(child);
if (ceph_snap(d_inode(child)) != CEPH_NOSNAP) struct dentry *dn;
return ERR_PTR(-EINVAL);
if (ceph_snap(inode) != CEPH_NOSNAP) {
dout("get_parent %p ino %llx.%llx\n", struct inode* dir;
child, ceph_vinop(d_inode(child))); bool unlinked = false;
return __get_parent(child->d_sb, child, 0); /* do not support non-directory */
if (!d_is_dir(child)) {
dn = ERR_PTR(-EINVAL);
goto out;
}
dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
if (IS_ERR(dir)) {
dn = ERR_CAST(dir);
goto out;
}
/* There can be multiple paths to access snapped inode.
* For simplicity, treat snapdir of head inode as parent */
if (ceph_snap(inode) != CEPH_SNAPDIR) {
struct inode *snapdir = ceph_get_snapdir(dir);
if (dir->i_nlink == 0)
unlinked = true;
iput(dir);
if (IS_ERR(snapdir)) {
dn = ERR_CAST(snapdir);
goto out;
}
dir = snapdir;
}
/* If directory has already been deleted, futher get_parent
* will fail. Do not mark snapdir dentry as disconnected,
* this prevent exportfs from doing futher get_parent. */
if (unlinked)
dn = d_obtain_root(dir);
else
dn = d_obtain_alias(dir);
} else {
dn = __get_parent(child->d_sb, child, 0);
}
out:
dout("get_parent %p ino %llx.%llx err=%ld\n",
child, ceph_vinop(inode), (IS_ERR(dn) ? PTR_ERR(dn) : 0));
return dn;
} }
/* /*
...@@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, ...@@ -182,6 +382,11 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
struct ceph_nfs_confh *cfh = (void *)fid->raw; struct ceph_nfs_confh *cfh = (void *)fid->raw;
struct dentry *dentry; struct dentry *dentry;
if (fh_type == FILEID_BTRFS_WITH_PARENT) {
struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
return __snapfh_to_dentry(sb, sfh, true);
}
if (fh_type != FILEID_INO32_GEN_PARENT) if (fh_type != FILEID_INO32_GEN_PARENT)
return NULL; return NULL;
if (fh_len < sizeof(*cfh) / 4) if (fh_len < sizeof(*cfh) / 4)
...@@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, ...@@ -194,14 +399,115 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
return dentry; return dentry;
} }
static int __get_snap_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct inode *inode = d_inode(child);
struct inode *dir = d_inode(parent);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_mds_request *req = NULL;
char *last_name = NULL;
unsigned next_offset = 2;
int err = -EINVAL;
if (ceph_ino(inode) != ceph_ino(dir))
goto out;
if (ceph_snap(inode) == CEPH_SNAPDIR) {
if (ceph_snap(dir) == CEPH_NOSNAP) {
strcpy(name, fsc->mount_options->snapdir_name);
err = 0;
}
goto out;
}
if (ceph_snap(dir) != CEPH_SNAPDIR)
goto out;
while (1) {
struct ceph_mds_reply_info_parsed *rinfo;
struct ceph_mds_reply_dir_entry *rde;
int i;
req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
USE_AUTH_MDS);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
goto out;
}
err = ceph_alloc_readdir_reply_buffer(req, inode);
if (err)
goto out;
req->r_direct_mode = USE_AUTH_MDS;
req->r_readdir_offset = next_offset;
req->r_args.readdir.flags =
cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
if (last_name) {
req->r_path2 = last_name;
last_name = NULL;
}
req->r_inode = dir;
ihold(dir);
req->r_dentry = dget(parent);
inode_lock(dir);
err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
inode_unlock(dir);
if (err < 0)
goto out;
rinfo = &req->r_reply_info;
for (i = 0; i < rinfo->dir_nr; i++) {
rde = rinfo->dir_entries + i;
BUG_ON(!rde->inode.in);
if (ceph_snap(inode) ==
le64_to_cpu(rde->inode.in->snapid)) {
memcpy(name, rde->name, rde->name_len);
name[rde->name_len] = '\0';
err = 0;
goto out;
}
}
if (rinfo->dir_end)
break;
BUG_ON(rinfo->dir_nr <= 0);
rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
next_offset += rinfo->dir_nr;
last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
if (!last_name) {
err = -ENOMEM;
goto out;
}
ceph_mdsc_put_request(req);
req = NULL;
}
err = -ENOENT;
out:
if (req)
ceph_mdsc_put_request(req);
kfree(last_name);
dout("get_snap_name %p ino %llx.%llx err=%d\n",
child, ceph_vinop(inode), err);
return err;
}
static int ceph_get_name(struct dentry *parent, char *name, static int ceph_get_name(struct dentry *parent, char *name,
struct dentry *child) struct dentry *child)
{ {
struct ceph_mds_client *mdsc; struct ceph_mds_client *mdsc;
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct inode *inode = d_inode(child);
int err; int err;
mdsc = ceph_inode_to_client(d_inode(child))->mdsc; if (ceph_snap(inode) != CEPH_NOSNAP)
return __get_snap_name(parent, name, child);
mdsc = ceph_inode_to_client(inode)->mdsc;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
USE_ANY_MDS); USE_ANY_MDS);
if (IS_ERR(req)) if (IS_ERR(req))
...@@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name, ...@@ -209,8 +515,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
inode_lock(d_inode(parent)); inode_lock(d_inode(parent));
req->r_inode = d_inode(child); req->r_inode = inode;
ihold(d_inode(child)); ihold(inode);
req->r_ino2 = ceph_vino(d_inode(parent)); req->r_ino2 = ceph_vino(d_inode(parent));
req->r_parent = d_inode(parent); req->r_parent = d_inode(parent);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
...@@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name, ...@@ -224,10 +530,10 @@ static int ceph_get_name(struct dentry *parent, char *name,
memcpy(name, rinfo->dname, rinfo->dname_len); memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0; name[rinfo->dname_len] = 0;
dout("get_name %p ino %llx.%llx name %s\n", dout("get_name %p ino %llx.%llx name %s\n",
child, ceph_vinop(d_inode(child)), name); child, ceph_vinop(inode), name);
} else { } else {
dout("get_name %p ino %llx.%llx err %d\n", dout("get_name %p ino %llx.%llx err %d\n",
child, ceph_vinop(d_inode(child)), err); child, ceph_vinop(inode), err);
} }
ceph_mdsc_put_request(req); ceph_mdsc_put_request(req);
......
...@@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, ...@@ -929,7 +929,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n", dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
(write ? "write" : "read"), file, pos, (unsigned)count, (write ? "write" : "read"), file, pos, (unsigned)count,
snapc, snapc->seq); snapc, snapc ? snapc->seq : 0);
ret = filemap_write_and_wait_range(inode->i_mapping, ret = filemap_write_and_wait_range(inode->i_mapping,
pos, pos + count - 1); pos, pos + count - 1);
......
...@@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask) ...@@ -2266,43 +2266,72 @@ int ceph_permission(struct inode *inode, int mask)
return err; return err;
} }
/* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME))
mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME))
mask |= CEPH_CAP_LINK_SHARED;
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
mask |= CEPH_CAP_FILE_SHARED;
if (want & (STATX_CTIME))
mask |= CEPH_CAP_XATTR_SHARED;
return mask;
}
/* /*
* Get all attributes. Hopefully somedata we'll have a statlite() * Get all the attributes. If we have sufficient caps for the requested attrs,
* and can limit the fields we require to be accurate. * then we can avoid talking to the MDS at all.
*/ */
int ceph_getattr(const struct path *path, struct kstat *stat, int ceph_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags) u32 request_mask, unsigned int flags)
{ {
struct inode *inode = d_inode(path->dentry); struct inode *inode = d_inode(path->dentry);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
int err; int err = 0;
err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false); /* Skip the getattr altogether if we're asked not to sync */
if (!err) { if (!(flags & AT_STATX_DONT_SYNC)) {
generic_fillattr(inode, stat); err = ceph_do_getattr(inode, statx_to_caps(request_mask),
stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); flags & AT_STATX_FORCE_SYNC);
if (ceph_snap(inode) == CEPH_NOSNAP) if (err)
stat->dev = inode->i_sb->s_dev; return err;
}
generic_fillattr(inode, stat);
stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
if (ceph_snap(inode) == CEPH_NOSNAP)
stat->dev = inode->i_sb->s_dev;
else
stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0;
if (S_ISDIR(inode->i_mode)) {
if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb),
RBYTES))
stat->size = ci->i_rbytes;
else else
stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; stat->size = ci->i_files + ci->i_subdirs;
stat->blocks = 0;
if (S_ISDIR(inode->i_mode)) { stat->blksize = 65536;
if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), /*
RBYTES)) * Some applications rely on the number of st_nlink
stat->size = ci->i_rbytes; * value on directories to be either 0 (if unlinked)
else * or 2 + number of subdirectories.
stat->size = ci->i_files + ci->i_subdirs; */
stat->blocks = 0; if (stat->nlink == 1)
stat->blksize = 65536; /* '.' + '..' + subdirs */
/* stat->nlink = 1 + 1 + ci->i_subdirs;
* Some applications rely on the number of st_nlink
* value on directories to be either 0 (if unlinked)
* or 2 + number of subdirectories.
*/
if (stat->nlink == 1)
/* '.' + '..' + subdirs */
stat->nlink = 1 + 1 + ci->i_subdirs;
}
} }
/* Mask off any higher bits (e.g. btime) until we have support */
stat->result_mask = request_mask & STATX_BASIC_STATS;
return err; return err;
} }
...@@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) ...@@ -237,15 +237,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO; err = -EIO;
} else if (op == CEPH_MDS_OP_SETFILELOCK) {
/*
* increasing i_filelock_ref closes race window between
* handling request reply and adding file_lock struct to
* inode. Otherwise, i_auth_cap may get trimmed in the
* window. Caller function will decrease the counter.
*/
fl->fl_ops = &ceph_fl_lock_ops;
atomic_inc(&ci->i_filelock_ref);
} }
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (err < 0) { if (err < 0) {
...@@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) ...@@ -299,10 +290,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) { if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
err = -EIO; err = -EIO;
} else {
/* see comment in ceph_lock */
fl->fl_ops = &ceph_fl_lock_ops;
atomic_inc(&ci->i_filelock_ref);
} }
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
if (err < 0) { if (err < 0) {
......
...@@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) ...@@ -550,15 +550,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
int mds) int mds)
{ {
struct ceph_mds_session *session;
if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
return NULL; return NULL;
session = mdsc->sessions[mds]; return get_session(mdsc->sessions[mds]);
dout("lookup_mds_session %p %d\n", session,
refcount_read(&session->s_ref));
get_session(session);
return session;
} }
static bool __have_session(struct ceph_mds_client *mdsc, int mds) static bool __have_session(struct ceph_mds_client *mdsc, int mds)
...@@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, ...@@ -1284,9 +1278,9 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
* *
* Caller must hold session s_mutex. * Caller must hold session s_mutex.
*/ */
static int iterate_session_caps(struct ceph_mds_session *session, int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *, struct ceph_cap *, int (*cb)(struct inode *, struct ceph_cap *,
void *), void *arg) void *), void *arg)
{ {
struct list_head *p; struct list_head *p;
struct ceph_cap *cap; struct ceph_cap *cap;
...@@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session) ...@@ -1451,7 +1445,7 @@ static void remove_session_caps(struct ceph_mds_session *session)
LIST_HEAD(dispose); LIST_HEAD(dispose);
dout("remove_session_caps on %p\n", session); dout("remove_session_caps on %p\n", session);
iterate_session_caps(session, remove_session_caps_cb, fsc); ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
wake_up_all(&fsc->mdsc->cap_flushing_wq); wake_up_all(&fsc->mdsc->cap_flushing_wq);
...@@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, ...@@ -1534,8 +1528,8 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
static void wake_up_session_caps(struct ceph_mds_session *session, int ev) static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
{ {
dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
iterate_session_caps(session, wake_up_session_cb, ceph_iterate_session_caps(session, wake_up_session_cb,
(void *)(unsigned long)ev); (void *)(unsigned long)ev);
} }
/* /*
...@@ -1768,7 +1762,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc, ...@@ -1768,7 +1762,7 @@ int ceph_trim_caps(struct ceph_mds_client *mdsc,
session->s_mds, session->s_nr_caps, max_caps, trim_caps); session->s_mds, session->s_nr_caps, max_caps, trim_caps);
if (trim_caps > 0) { if (trim_caps > 0) {
session->s_trim_caps = trim_caps; session->s_trim_caps = trim_caps;
iterate_session_caps(session, trim_caps_cb, session); ceph_iterate_session_caps(session, trim_caps_cb, session);
dout("trim_caps mds%d done: %d / %d, trimmed %d\n", dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
session->s_mds, session->s_nr_caps, max_caps, session->s_mds, session->s_nr_caps, max_caps,
trim_caps - session->s_trim_caps); trim_caps - session->s_trim_caps);
...@@ -1861,7 +1855,8 @@ static void ceph_send_cap_releases(struct ceph_mds_client *mdsc, ...@@ -1861,7 +1855,8 @@ static void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
num_cap_releases--; num_cap_releases--;
head = msg->front.iov_base; head = msg->front.iov_base;
le32_add_cpu(&head->num, 1); put_unaligned_le32(get_unaligned_le32(&head->num) + 1,
&head->num);
item = msg->front.iov_base + msg->front.iov_len; item = msg->front.iov_base + msg->front.iov_len;
item->ino = cpu_to_le64(cap->cap_ino); item->ino = cpu_to_le64(cap->cap_ino);
item->cap_id = cpu_to_le64(cap->cap_id); item->cap_id = cpu_to_le64(cap->cap_id);
...@@ -2089,43 +2084,29 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) ...@@ -2089,43 +2084,29 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
* Encode hidden .snap dirs as a double /, i.e. * Encode hidden .snap dirs as a double /, i.e.
* foo/.snap/bar -> foo//bar * foo/.snap/bar -> foo//bar
*/ */
char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
int stop_on_nosnap) int stop_on_nosnap)
{ {
struct dentry *temp; struct dentry *temp;
char *path; char *path;
int len, pos; int pos;
unsigned seq; unsigned seq;
u64 base;
if (!dentry) if (!dentry)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
retry: path = __getname();
len = 0;
seq = read_seqbegin(&rename_lock);
rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp);) {
struct inode *inode = d_inode(temp);
if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
len++; /* slash only */
else if (stop_on_nosnap && inode &&
ceph_snap(inode) == CEPH_NOSNAP)
break;
else
len += 1 + temp->d_name.len;
temp = temp->d_parent;
}
rcu_read_unlock();
if (len)
len--; /* no leading '/' */
path = kmalloc(len+1, GFP_NOFS);
if (!path) if (!path)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
pos = len; retry:
path[pos] = 0; /* trailing null */ pos = PATH_MAX - 1;
path[pos] = '\0';
seq = read_seqbegin(&rename_lock);
rcu_read_lock(); rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) { temp = dentry;
for (;;) {
struct inode *inode; struct inode *inode;
spin_lock(&temp->d_lock); spin_lock(&temp->d_lock);
...@@ -2143,83 +2124,54 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, ...@@ -2143,83 +2124,54 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
spin_unlock(&temp->d_lock); spin_unlock(&temp->d_lock);
break; break;
} }
strncpy(path + pos, temp->d_name.name, memcpy(path + pos, temp->d_name.name, temp->d_name.len);
temp->d_name.len);
} }
spin_unlock(&temp->d_lock); spin_unlock(&temp->d_lock);
if (pos)
path[--pos] = '/';
temp = temp->d_parent; temp = temp->d_parent;
/* Are we at the root? */
if (IS_ROOT(temp))
break;
/* Are we out of buffer? */
if (--pos < 0)
break;
path[pos] = '/';
} }
base = ceph_ino(d_inode(temp));
rcu_read_unlock(); rcu_read_unlock();
if (pos != 0 || read_seqretry(&rename_lock, seq)) { if (pos < 0 || read_seqretry(&rename_lock, seq)) {
pr_err("build_path did not end path lookup where " pr_err("build_path did not end path lookup where "
"expected, namelen is %d, pos is %d\n", len, pos); "expected, pos is %d\n", pos);
/* presumably this is only possible if racing with a /* presumably this is only possible if racing with a
rename of one of the parent directories (we can not rename of one of the parent directories (we can not
lock the dentries above us to prevent this, but lock the dentries above us to prevent this, but
retrying should be harmless) */ retrying should be harmless) */
kfree(path);
goto retry; goto retry;
} }
*base = ceph_ino(d_inode(temp)); *pbase = base;
*plen = len; *plen = PATH_MAX - 1 - pos;
dout("build_path on %p %d built %llx '%.*s'\n", dout("build_path on %p %d built %llx '%.*s'\n",
dentry, d_count(dentry), *base, len, path); dentry, d_count(dentry), base, *plen, path + pos);
return path; return path + pos;
}
/* Duplicate the dentry->d_name.name safely */
static int clone_dentry_name(struct dentry *dentry, const char **ppath,
int *ppathlen)
{
u32 len;
char *name;
retry:
len = READ_ONCE(dentry->d_name.len);
name = kmalloc(len + 1, GFP_NOFS);
if (!name)
return -ENOMEM;
spin_lock(&dentry->d_lock);
if (dentry->d_name.len != len) {
spin_unlock(&dentry->d_lock);
kfree(name);
goto retry;
}
memcpy(name, dentry->d_name.name, len);
spin_unlock(&dentry->d_lock);
name[len] = '\0';
*ppath = name;
*ppathlen = len;
return 0;
} }
static int build_dentry_path(struct dentry *dentry, struct inode *dir, static int build_dentry_path(struct dentry *dentry, struct inode *dir,
const char **ppath, int *ppathlen, u64 *pino, const char **ppath, int *ppathlen, u64 *pino,
bool *pfreepath, bool parent_locked) bool *pfreepath, bool parent_locked)
{ {
int ret;
char *path; char *path;
rcu_read_lock(); rcu_read_lock();
if (!dir) if (!dir)
dir = d_inode_rcu(dentry->d_parent); dir = d_inode_rcu(dentry->d_parent);
if (dir && ceph_snap(dir) == CEPH_NOSNAP) { if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
*pino = ceph_ino(dir); *pino = ceph_ino(dir);
rcu_read_unlock(); rcu_read_unlock();
if (parent_locked) { *ppath = dentry->d_name.name;
*ppath = dentry->d_name.name; *ppathlen = dentry->d_name.len;
*ppathlen = dentry->d_name.len;
} else {
ret = clone_dentry_name(dentry, ppath, ppathlen);
if (ret)
return ret;
*pfreepath = true;
}
return 0; return 0;
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -2331,9 +2283,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ...@@ -2331,9 +2283,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
(!!req->r_inode_drop + !!req->r_dentry_drop + (!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop); !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop) if (req->r_dentry_drop)
len += req->r_dentry->d_name.len; len += pathlen1;
if (req->r_old_dentry_drop) if (req->r_old_dentry_drop)
len += req->r_old_dentry->d_name.len; len += pathlen2;
msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) { if (!msg) {
...@@ -2410,10 +2362,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, ...@@ -2410,10 +2362,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
out_free2: out_free2:
if (freepath2) if (freepath2)
kfree((char *)path2); ceph_mdsc_free_path((char *)path2, pathlen2);
out_free1: out_free1:
if (freepath1) if (freepath1)
kfree((char *)path1); ceph_mdsc_free_path((char *)path1, pathlen1);
out: out:
return msg; return msg;
} }
...@@ -2427,8 +2379,7 @@ static void complete_request(struct ceph_mds_client *mdsc, ...@@ -2427,8 +2379,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
{ {
if (req->r_callback) if (req->r_callback)
req->r_callback(mdsc, req); req->r_callback(mdsc, req);
else complete_all(&req->r_completion);
complete_all(&req->r_completion);
} }
/* /*
...@@ -2670,28 +2621,11 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds) ...@@ -2670,28 +2621,11 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
} }
} }
void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
struct ceph_mds_request *req) struct ceph_mds_request *req)
{
dout("submit_request on %p\n", req);
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, NULL);
__do_request(mdsc, req);
mutex_unlock(&mdsc->mutex);
}
/*
* Synchrously perform an mds request. Take care of all of the
* session setup, forwarding, retry details.
*/
int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req)
{ {
int err; int err;
dout("do_request on %p\n", req);
/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
if (req->r_inode) if (req->r_inode)
ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
...@@ -2701,18 +2635,21 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, ...@@ -2701,18 +2635,21 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN); CEPH_CAP_PIN);
/* issue */ dout("submit_request on %p for inode %p\n", req, dir);
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, dir); __register_request(mdsc, req, dir);
__do_request(mdsc, req); __do_request(mdsc, req);
err = req->r_err;
mutex_unlock(&mdsc->mutex);
return err;
}
if (req->r_err) { static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
err = req->r_err; struct ceph_mds_request *req)
goto out; {
} int err;
/* wait */ /* wait */
mutex_unlock(&mdsc->mutex);
dout("do_request waiting\n"); dout("do_request waiting\n");
if (!req->r_timeout && req->r_wait_for_completion) { if (!req->r_timeout && req->r_wait_for_completion) {
err = req->r_wait_for_completion(mdsc, req); err = req->r_wait_for_completion(mdsc, req);
...@@ -2753,8 +2690,26 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, ...@@ -2753,8 +2690,26 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
err = req->r_err; err = req->r_err;
} }
out:
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
return err;
}
/*
* Synchrously perform an mds request. Take care of all of the
* session setup, forwarding, retry details.
*/
int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req)
{
int err;
dout("do_request on %p\n", req);
/* issue */
err = ceph_mdsc_submit_request(mdsc, dir, req);
if (!err)
err = ceph_mdsc_wait_request(mdsc, req);
dout("do_request %p done, result %d\n", req, err); dout("do_request %p done, result %d\n", req, err);
return err; return err;
} }
...@@ -3485,7 +3440,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, ...@@ -3485,7 +3440,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
ceph_pagelist_encode_string(pagelist, path, pathlen); ceph_pagelist_encode_string(pagelist, path, pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
out_freepath: out_freepath:
kfree(path); ceph_mdsc_free_path(path, pathlen);
} }
out_err: out_err:
...@@ -3642,7 +3597,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, ...@@ -3642,7 +3597,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
recon_state.msg_version = 2; recon_state.msg_version = 2;
} }
/* trsaverse this session's caps */ /* trsaverse this session's caps */
err = iterate_session_caps(session, encode_caps_cb, &recon_state); err = ceph_iterate_session_caps(session, encode_caps_cb, &recon_state);
spin_lock(&session->s_cap_lock); spin_lock(&session->s_cap_lock);
session->s_cap_reconnect = 0; session->s_cap_reconnect = 0;
...@@ -4125,6 +4080,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -4125,6 +4080,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
mdsc->max_sessions = 0; mdsc->max_sessions = 0;
mdsc->stopping = 0; mdsc->stopping = 0;
atomic64_set(&mdsc->quotarealms_count, 0); atomic64_set(&mdsc->quotarealms_count, 0);
mdsc->quotarealms_inodes = RB_ROOT;
mutex_init(&mdsc->quotarealms_inodes_mutex);
mdsc->last_snap_seq = 0; mdsc->last_snap_seq = 0;
init_rwsem(&mdsc->snap_rwsem); init_rwsem(&mdsc->snap_rwsem);
mdsc->snap_realms = RB_ROOT; mdsc->snap_realms = RB_ROOT;
...@@ -4216,6 +4173,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) ...@@ -4216,6 +4173,8 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
* their inode/dcache refs * their inode/dcache refs
*/ */
ceph_msgr_flush(); ceph_msgr_flush();
ceph_cleanup_quotarealms_inodes(mdsc);
} }
/* /*
......
...@@ -325,6 +325,18 @@ struct ceph_snapid_map { ...@@ -325,6 +325,18 @@ struct ceph_snapid_map {
unsigned long last_used; unsigned long last_used;
}; };
/*
* node for list of quotarealm inodes that are not visible from the filesystem
* mountpoint, but required to handle, e.g. quotas.
*/
struct ceph_quotarealm_inode {
struct rb_node node;
u64 ino;
unsigned long timeout; /* last time a lookup failed for this inode */
struct mutex mutex;
struct inode *inode;
};
/* /*
* mds client state * mds client state
*/ */
...@@ -344,6 +356,12 @@ struct ceph_mds_client { ...@@ -344,6 +356,12 @@ struct ceph_mds_client {
int stopping; /* true if shutting down */ int stopping; /* true if shutting down */
atomic64_t quotarealms_count; /* # realms with quota */ atomic64_t quotarealms_count; /* # realms with quota */
/*
* We keep a list of inodes we don't see in the mountpoint but that we
* need to track quota realms.
*/
struct rb_root quotarealms_inodes;
struct mutex quotarealms_inodes_mutex;
/* /*
* snap_rwsem will cover cap linkage into snaprealms, and * snap_rwsem will cover cap linkage into snaprealms, and
...@@ -447,8 +465,9 @@ extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, ...@@ -447,8 +465,9 @@ extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
struct inode *dir); struct inode *dir);
extern struct ceph_mds_request * extern struct ceph_mds_request *
ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode);
extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
struct ceph_mds_request *req); struct inode *dir,
struct ceph_mds_request *req);
extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir, struct inode *dir,
struct ceph_mds_request *req); struct ceph_mds_request *req);
...@@ -468,8 +487,18 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc, ...@@ -468,8 +487,18 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session); struct ceph_mds_session *session);
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc); extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr); extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
int (*cb)(struct inode *,
struct ceph_cap *, void *),
void *arg);
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
static inline void ceph_mdsc_free_path(char *path, int len)
{
if (path)
__putname(path - (PATH_MAX - 1 - len));
}
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
int stop_on_nosnap); int stop_on_nosnap);
......
...@@ -205,7 +205,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -205,7 +205,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
i+1, n, global_id, mds, inc, i+1, n, global_id, mds, inc,
ceph_pr_addr(&addr.in_addr), ceph_pr_addr(&addr),
ceph_mds_state_name(state)); ceph_mds_state_name(state));
if (mds < 0 || state <= 0) if (mds < 0 || state <= 0)
......
...@@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc) ...@@ -22,7 +22,16 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
static inline bool ceph_has_realms_with_quotas(struct inode *inode) static inline bool ceph_has_realms_with_quotas(struct inode *inode)
{ {
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
return atomic64_read(&mdsc->quotarealms_count) > 0; struct super_block *sb = mdsc->fsc->sb;
if (atomic64_read(&mdsc->quotarealms_count) > 0)
return true;
/* if root is the real CephFS root, we don't have quota realms */
if (sb->s_root->d_inode &&
(sb->s_root->d_inode->i_ino == CEPH_INO_ROOT))
return false;
/* otherwise, we can't know for sure */
return true;
} }
void ceph_handle_quota(struct ceph_mds_client *mdsc, void ceph_handle_quota(struct ceph_mds_client *mdsc,
...@@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, ...@@ -68,6 +77,108 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
iput(inode); iput(inode);
} }
static struct ceph_quotarealm_inode *
find_quotarealm_inode(struct ceph_mds_client *mdsc, u64 ino)
{
struct ceph_quotarealm_inode *qri = NULL;
struct rb_node **node, *parent = NULL;
mutex_lock(&mdsc->quotarealms_inodes_mutex);
node = &(mdsc->quotarealms_inodes.rb_node);
while (*node) {
parent = *node;
qri = container_of(*node, struct ceph_quotarealm_inode, node);
if (ino < qri->ino)
node = &((*node)->rb_left);
else if (ino > qri->ino)
node = &((*node)->rb_right);
else
break;
}
if (!qri || (qri->ino != ino)) {
/* Not found, create a new one and insert it */
qri = kmalloc(sizeof(*qri), GFP_KERNEL);
if (qri) {
qri->ino = ino;
qri->inode = NULL;
qri->timeout = 0;
mutex_init(&qri->mutex);
rb_link_node(&qri->node, parent, node);
rb_insert_color(&qri->node, &mdsc->quotarealms_inodes);
} else
pr_warn("Failed to alloc quotarealms_inode\n");
}
mutex_unlock(&mdsc->quotarealms_inodes_mutex);
return qri;
}
/*
* This function will try to lookup a realm inode which isn't visible in the
* filesystem mountpoint. A list of these kind of inodes (not visible) is
* maintained in the mdsc and freed only when the filesystem is umounted.
*
* Note that these inodes are kept in this list even if the lookup fails, which
* allows to prevent useless lookup requests.
*/
static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc,
struct super_block *sb,
struct ceph_snap_realm *realm)
{
struct ceph_quotarealm_inode *qri;
struct inode *in;
qri = find_quotarealm_inode(mdsc, realm->ino);
if (!qri)
return NULL;
mutex_lock(&qri->mutex);
if (qri->inode) {
/* A request has already returned the inode */
mutex_unlock(&qri->mutex);
return qri->inode;
}
/* Check if this inode lookup has failed recently */
if (qri->timeout &&
time_before_eq(jiffies, qri->timeout)) {
mutex_unlock(&qri->mutex);
return NULL;
}
in = ceph_lookup_inode(sb, realm->ino);
if (IS_ERR(in)) {
pr_warn("Can't lookup inode %llx (err: %ld)\n",
realm->ino, PTR_ERR(in));
qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */
} else {
qri->timeout = 0;
qri->inode = in;
}
mutex_unlock(&qri->mutex);
return in;
}
void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc)
{
struct ceph_quotarealm_inode *qri;
struct rb_node *node;
/*
* It should now be safe to clean quotarealms_inode tree without holding
* mdsc->quotarealms_inodes_mutex...
*/
mutex_lock(&mdsc->quotarealms_inodes_mutex);
while (!RB_EMPTY_ROOT(&mdsc->quotarealms_inodes)) {
node = rb_first(&mdsc->quotarealms_inodes);
qri = rb_entry(node, struct ceph_quotarealm_inode, node);
rb_erase(node, &mdsc->quotarealms_inodes);
iput(qri->inode);
kfree(qri);
}
mutex_unlock(&mdsc->quotarealms_inodes_mutex);
}
/* /*
* This function walks through the snaprealm for an inode and returns the * This function walks through the snaprealm for an inode and returns the
* ceph_snap_realm for the first snaprealm that has quotas set (either max_files * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
...@@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, ...@@ -76,9 +187,15 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
* *
* Note that the caller is responsible for calling ceph_put_snap_realm() on the * Note that the caller is responsible for calling ceph_put_snap_realm() on the
* returned realm. * returned realm.
*
* Callers of this function need to hold mdsc->snap_rwsem. However, if there's
* a need to do an inode lookup, this rwsem will be temporarily dropped. Hence
* the 'retry' argument: if rwsem needs to be dropped and 'retry' is 'false'
* this function will return -EAGAIN; otherwise, the snaprealms walk-through
* will be restarted.
*/ */
static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
struct inode *inode) struct inode *inode, bool retry)
{ {
struct ceph_inode_info *ci = NULL; struct ceph_inode_info *ci = NULL;
struct ceph_snap_realm *realm, *next; struct ceph_snap_realm *realm, *next;
...@@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, ...@@ -88,6 +205,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
if (ceph_snap(inode) != CEPH_NOSNAP) if (ceph_snap(inode) != CEPH_NOSNAP)
return NULL; return NULL;
restart:
realm = ceph_inode(inode)->i_snap_realm; realm = ceph_inode(inode)->i_snap_realm;
if (realm) if (realm)
ceph_get_snap_realm(mdsc, realm); ceph_get_snap_realm(mdsc, realm);
...@@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc, ...@@ -95,11 +213,25 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) " pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode)); "null i_snap_realm\n", ceph_vinop(inode));
while (realm) { while (realm) {
bool has_inode;
spin_lock(&realm->inodes_with_caps_lock); spin_lock(&realm->inodes_with_caps_lock);
in = realm->inode ? igrab(realm->inode) : NULL; has_inode = realm->inode;
in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&realm->inodes_with_caps_lock);
if (!in) if (has_inode && !in)
break; break;
if (!in) {
up_read(&mdsc->snap_rwsem);
in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
down_read(&mdsc->snap_rwsem);
if (IS_ERR_OR_NULL(in))
break;
ceph_put_snap_realm(mdsc, realm);
if (!retry)
return ERR_PTR(-EAGAIN);
goto restart;
}
ci = ceph_inode(in); ci = ceph_inode(in);
has_quota = __ceph_has_any_quota(ci); has_quota = __ceph_has_any_quota(ci);
...@@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) ...@@ -125,9 +257,22 @@ bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
struct ceph_snap_realm *old_realm, *new_realm; struct ceph_snap_realm *old_realm, *new_realm;
bool is_same; bool is_same;
restart:
/*
* We need to lookup 2 quota realms atomically, i.e. with snap_rwsem.
* However, get_quota_realm may drop it temporarily. By setting the
* 'retry' parameter to 'false', we'll get -EAGAIN if the rwsem was
* dropped and we can then restart the whole operation.
*/
down_read(&mdsc->snap_rwsem); down_read(&mdsc->snap_rwsem);
old_realm = get_quota_realm(mdsc, old); old_realm = get_quota_realm(mdsc, old, true);
new_realm = get_quota_realm(mdsc, new); new_realm = get_quota_realm(mdsc, new, false);
if (PTR_ERR(new_realm) == -EAGAIN) {
up_read(&mdsc->snap_rwsem);
if (old_realm)
ceph_put_snap_realm(mdsc, old_realm);
goto restart;
}
is_same = (old_realm == new_realm); is_same = (old_realm == new_realm);
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
...@@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, ...@@ -166,6 +311,7 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
return false; return false;
down_read(&mdsc->snap_rwsem); down_read(&mdsc->snap_rwsem);
restart:
realm = ceph_inode(inode)->i_snap_realm; realm = ceph_inode(inode)->i_snap_realm;
if (realm) if (realm)
ceph_get_snap_realm(mdsc, realm); ceph_get_snap_realm(mdsc, realm);
...@@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op, ...@@ -173,12 +319,23 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) " pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
"null i_snap_realm\n", ceph_vinop(inode)); "null i_snap_realm\n", ceph_vinop(inode));
while (realm) { while (realm) {
bool has_inode;
spin_lock(&realm->inodes_with_caps_lock); spin_lock(&realm->inodes_with_caps_lock);
in = realm->inode ? igrab(realm->inode) : NULL; has_inode = realm->inode;
in = has_inode ? igrab(realm->inode) : NULL;
spin_unlock(&realm->inodes_with_caps_lock); spin_unlock(&realm->inodes_with_caps_lock);
if (!in) if (has_inode && !in)
break; break;
if (!in) {
up_read(&mdsc->snap_rwsem);
in = lookup_quotarealm_inode(mdsc, inode->i_sb, realm);
down_read(&mdsc->snap_rwsem);
if (IS_ERR_OR_NULL(in))
break;
ceph_put_snap_realm(mdsc, realm);
goto restart;
}
ci = ceph_inode(in); ci = ceph_inode(in);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
if (op == QUOTA_CHECK_MAX_FILES_OP) { if (op == QUOTA_CHECK_MAX_FILES_OP) {
...@@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf) ...@@ -314,7 +471,7 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
bool is_updated = false; bool is_updated = false;
down_read(&mdsc->snap_rwsem); down_read(&mdsc->snap_rwsem);
realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root)); realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root), true);
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
if (!realm) if (!realm)
return false; return false;
......
...@@ -845,6 +845,12 @@ static void ceph_umount_begin(struct super_block *sb) ...@@ -845,6 +845,12 @@ static void ceph_umount_begin(struct super_block *sb)
return; return;
} }
static int ceph_remount(struct super_block *sb, int *flags, char *data)
{
sync_filesystem(sb);
return 0;
}
static const struct super_operations ceph_super_ops = { static const struct super_operations ceph_super_ops = {
.alloc_inode = ceph_alloc_inode, .alloc_inode = ceph_alloc_inode,
.destroy_inode = ceph_destroy_inode, .destroy_inode = ceph_destroy_inode,
...@@ -853,6 +859,7 @@ static const struct super_operations ceph_super_ops = { ...@@ -853,6 +859,7 @@ static const struct super_operations ceph_super_ops = {
.drop_inode = ceph_drop_inode, .drop_inode = ceph_drop_inode,
.sync_fs = ceph_sync_fs, .sync_fs = ceph_sync_fs,
.put_super = ceph_put_super, .put_super = ceph_put_super,
.remount_fs = ceph_remount,
.show_options = ceph_show_options, .show_options = ceph_show_options,
.statfs = ceph_statfs, .statfs = ceph_statfs,
.umount_begin = ceph_umount_begin, .umount_begin = ceph_umount_begin,
......
...@@ -1083,6 +1083,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); ...@@ -1083,6 +1083,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
/* export.c */ /* export.c */
extern const struct export_operations ceph_export_ops; extern const struct export_operations ceph_export_ops;
struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino);
/* locks.c */ /* locks.c */
extern __init void ceph_flock_init(void); extern __init void ceph_flock_init(void);
...@@ -1133,5 +1134,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, ...@@ -1133,5 +1134,6 @@ extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen); loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf); struct kstatfs *buf);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */ #endif /* _FS_CEPH_SUPER_H */
...@@ -436,6 +436,12 @@ union ceph_mds_request_args { ...@@ -436,6 +436,12 @@ union ceph_mds_request_args {
__le64 length; /* num bytes to lock from start */ __le64 length; /* num bytes to lock from start */
__u8 wait; /* will caller wait for lock to become available? */ __u8 wait; /* will caller wait for lock to become available? */
} __attribute__ ((packed)) filelock_change; } __attribute__ ((packed)) filelock_change;
struct {
__le32 mask; /* CEPH_CAP_* */
__le64 snapid;
__le64 parent;
__le32 hash;
} __attribute__ ((packed)) lookupino;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
......
...@@ -323,7 +323,8 @@ struct ceph_connection { ...@@ -323,7 +323,8 @@ struct ceph_connection {
}; };
extern const char *ceph_pr_addr(const struct sockaddr_storage *ss); extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
extern int ceph_parse_ips(const char *c, const char *end, extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr, struct ceph_entity_addr *addr,
int max_count, int *count); int max_count, int *count);
......
...@@ -110,17 +110,16 @@ struct ceph_object_id { ...@@ -110,17 +110,16 @@ struct ceph_object_id {
int name_len; int name_len;
}; };
#define __CEPH_OID_INITIALIZER(oid) { .name = (oid).inline_name }
#define CEPH_DEFINE_OID_ONSTACK(oid) \
struct ceph_object_id oid = __CEPH_OID_INITIALIZER(oid)
static inline void ceph_oid_init(struct ceph_object_id *oid) static inline void ceph_oid_init(struct ceph_object_id *oid)
{ {
oid->name = oid->inline_name; *oid = (struct ceph_object_id) __CEPH_OID_INITIALIZER(*oid);
oid->name_len = 0;
} }
#define CEPH_OID_INIT_ONSTACK(oid) \
({ ceph_oid_init(&oid); oid; })
#define CEPH_DEFINE_OID_ONSTACK(oid) \
struct ceph_object_id oid = CEPH_OID_INIT_ONSTACK(oid)
static inline bool ceph_oid_empty(const struct ceph_object_id *oid) static inline bool ceph_oid_empty(const struct ceph_object_id *oid)
{ {
return oid->name == oid->inline_name && !oid->name_len; return oid->name == oid->inline_name && !oid->name_len;
......
...@@ -271,7 +271,7 @@ static int decode_locker(void **p, void *end, struct ceph_locker *locker) ...@@ -271,7 +271,7 @@ static int decode_locker(void **p, void *end, struct ceph_locker *locker)
dout("%s %s%llu cookie %s addr %s\n", __func__, dout("%s %s%llu cookie %s addr %s\n", __func__,
ENTITY_NAME(locker->id.name), locker->id.cookie, ENTITY_NAME(locker->id.name), locker->id.cookie,
ceph_pr_addr(&locker->info.addr.in_addr)); ceph_pr_addr(&locker->info.addr));
return 0; return 0;
} }
......
...@@ -46,7 +46,7 @@ static int monmap_show(struct seq_file *s, void *p) ...@@ -46,7 +46,7 @@ static int monmap_show(struct seq_file *s, void *p)
seq_printf(s, "\t%s%lld\t%s\n", seq_printf(s, "\t%s%lld\t%s\n",
ENTITY_NAME(inst->name), ENTITY_NAME(inst->name),
ceph_pr_addr(&inst->addr.in_addr)); ceph_pr_addr(&inst->addr));
} }
return 0; return 0;
} }
...@@ -82,7 +82,7 @@ static int osdmap_show(struct seq_file *s, void *p) ...@@ -82,7 +82,7 @@ static int osdmap_show(struct seq_file *s, void *p)
char sb[64]; char sb[64];
seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n", seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
i, ceph_pr_addr(&addr->in_addr), i, ceph_pr_addr(addr),
((map->osd_weight[i]*100) >> 16), ((map->osd_weight[i]*100) >> 16),
ceph_osdmap_state_str(sb, sizeof(sb), state), ceph_osdmap_state_str(sb, sizeof(sb), state),
((ceph_get_primary_affinity(map, i)*100) >> 16)); ((ceph_get_primary_affinity(map, i)*100) >> 16));
......
...@@ -186,17 +186,18 @@ static atomic_t addr_str_seq = ATOMIC_INIT(0); ...@@ -186,17 +186,18 @@ static atomic_t addr_str_seq = ATOMIC_INIT(0);
static struct page *zero_page; /* used in certain error cases */ static struct page *zero_page; /* used in certain error cases */
const char *ceph_pr_addr(const struct sockaddr_storage *ss) const char *ceph_pr_addr(const struct ceph_entity_addr *addr)
{ {
int i; int i;
char *s; char *s;
struct sockaddr_in *in4 = (struct sockaddr_in *) ss; struct sockaddr_storage ss = addr->in_addr; /* align */
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss; struct sockaddr_in *in4 = (struct sockaddr_in *)&ss;
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)&ss;
i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK; i = atomic_inc_return(&addr_str_seq) & ADDR_STR_COUNT_MASK;
s = addr_str[i]; s = addr_str[i];
switch (ss->ss_family) { switch (ss.ss_family) {
case AF_INET: case AF_INET:
snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr, snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%hu", &in4->sin_addr,
ntohs(in4->sin_port)); ntohs(in4->sin_port));
...@@ -209,7 +210,7 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss) ...@@ -209,7 +210,7 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
default: default:
snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)", snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %hu)",
ss->ss_family); ss.ss_family);
} }
return s; return s;
...@@ -449,7 +450,7 @@ static void set_sock_callbacks(struct socket *sock, ...@@ -449,7 +450,7 @@ static void set_sock_callbacks(struct socket *sock,
*/ */
static int ceph_tcp_connect(struct ceph_connection *con) static int ceph_tcp_connect(struct ceph_connection *con)
{ {
struct sockaddr_storage *paddr = &con->peer_addr.in_addr; struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
struct socket *sock; struct socket *sock;
unsigned int noio_flag; unsigned int noio_flag;
int ret; int ret;
...@@ -458,7 +459,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) ...@@ -458,7 +459,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
/* sock_create_kern() allocates with GFP_KERNEL */ /* sock_create_kern() allocates with GFP_KERNEL */
noio_flag = memalloc_noio_save(); noio_flag = memalloc_noio_save();
ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock); SOCK_STREAM, IPPROTO_TCP, &sock);
memalloc_noio_restore(noio_flag); memalloc_noio_restore(noio_flag);
if (ret) if (ret)
...@@ -471,18 +472,18 @@ static int ceph_tcp_connect(struct ceph_connection *con) ...@@ -471,18 +472,18 @@ static int ceph_tcp_connect(struct ceph_connection *con)
set_sock_callbacks(sock, con); set_sock_callbacks(sock, con);
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr)); dout("connect %s\n", ceph_pr_addr(&con->peer_addr));
con_sock_state_connecting(con); con_sock_state_connecting(con);
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss),
O_NONBLOCK); O_NONBLOCK);
if (ret == -EINPROGRESS) { if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n", dout("connect %s EINPROGRESS sk_state = %u\n",
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
sock->sk->sk_state); sock->sk->sk_state);
} else if (ret < 0) { } else if (ret < 0) {
pr_err("connect %s error %d\n", pr_err("connect %s error %d\n",
ceph_pr_addr(&con->peer_addr.in_addr), ret); ceph_pr_addr(&con->peer_addr), ret);
sock_release(sock); sock_release(sock);
return ret; return ret;
} }
...@@ -669,8 +670,7 @@ static void reset_connection(struct ceph_connection *con) ...@@ -669,8 +670,7 @@ static void reset_connection(struct ceph_connection *con)
void ceph_con_close(struct ceph_connection *con) void ceph_con_close(struct ceph_connection *con)
{ {
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
dout("con_close %p peer %s\n", con, dout("con_close %p peer %s\n", con, ceph_pr_addr(&con->peer_addr));
ceph_pr_addr(&con->peer_addr.in_addr));
con->state = CON_STATE_CLOSED; con->state = CON_STATE_CLOSED;
con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */
...@@ -694,7 +694,7 @@ void ceph_con_open(struct ceph_connection *con, ...@@ -694,7 +694,7 @@ void ceph_con_open(struct ceph_connection *con,
struct ceph_entity_addr *addr) struct ceph_entity_addr *addr)
{ {
mutex_lock(&con->mutex); mutex_lock(&con->mutex);
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr)); dout("con_open %p %s\n", con, ceph_pr_addr(addr));
WARN_ON(con->state != CON_STATE_CLOSED); WARN_ON(con->state != CON_STATE_CLOSED);
con->state = CON_STATE_PREOPEN; con->state = CON_STATE_PREOPEN;
...@@ -1788,21 +1788,22 @@ static int verify_hello(struct ceph_connection *con) ...@@ -1788,21 +1788,22 @@ static int verify_hello(struct ceph_connection *con)
{ {
if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
pr_err("connect to %s got bad banner\n", pr_err("connect to %s got bad banner\n",
ceph_pr_addr(&con->peer_addr.in_addr)); ceph_pr_addr(&con->peer_addr));
con->error_msg = "protocol error, bad banner"; con->error_msg = "protocol error, bad banner";
return -1; return -1;
} }
return 0; return 0;
} }
static bool addr_is_blank(struct sockaddr_storage *ss) static bool addr_is_blank(struct ceph_entity_addr *addr)
{ {
struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; struct sockaddr_storage ss = addr->in_addr; /* align */
struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
switch (ss->ss_family) { switch (ss.ss_family) {
case AF_INET: case AF_INET:
return addr->s_addr == htonl(INADDR_ANY); return addr4->s_addr == htonl(INADDR_ANY);
case AF_INET6: case AF_INET6:
return ipv6_addr_any(addr6); return ipv6_addr_any(addr6);
default: default:
...@@ -1810,25 +1811,25 @@ static bool addr_is_blank(struct sockaddr_storage *ss) ...@@ -1810,25 +1811,25 @@ static bool addr_is_blank(struct sockaddr_storage *ss)
} }
} }
static int addr_port(struct sockaddr_storage *ss) static int addr_port(struct ceph_entity_addr *addr)
{ {
switch (ss->ss_family) { switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET: case AF_INET:
return ntohs(((struct sockaddr_in *)ss)->sin_port); return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
case AF_INET6: case AF_INET6:
return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
} }
return 0; return 0;
} }
static void addr_set_port(struct sockaddr_storage *ss, int p) static void addr_set_port(struct ceph_entity_addr *addr, int p)
{ {
switch (ss->ss_family) { switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET: case AF_INET:
((struct sockaddr_in *)ss)->sin_port = htons(p); put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
break; break;
case AF_INET6: case AF_INET6:
((struct sockaddr_in6 *)ss)->sin6_port = htons(p); put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
break; break;
} }
} }
...@@ -1836,21 +1837,18 @@ static void addr_set_port(struct sockaddr_storage *ss, int p) ...@@ -1836,21 +1837,18 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
/* /*
* Unlike other *_pton function semantics, zero indicates success. * Unlike other *_pton function semantics, zero indicates success.
*/ */
static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
char delim, const char **ipend) char delim, const char **ipend)
{ {
struct sockaddr_in *in4 = (struct sockaddr_in *) ss; memset(&addr->in_addr, 0, sizeof(addr->in_addr));
struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
memset(ss, 0, sizeof(*ss)); if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
put_unaligned(AF_INET, &addr->in_addr.ss_family);
if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
ss->ss_family = AF_INET;
return 0; return 0;
} }
if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) { if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
ss->ss_family = AF_INET6; put_unaligned(AF_INET6, &addr->in_addr.ss_family);
return 0; return 0;
} }
...@@ -1862,7 +1860,7 @@ static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss, ...@@ -1862,7 +1860,7 @@ static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
*/ */
#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER #ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
static int ceph_dns_resolve_name(const char *name, size_t namelen, static int ceph_dns_resolve_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend) struct ceph_entity_addr *addr, char delim, const char **ipend)
{ {
const char *end, *delim_p; const char *end, *delim_p;
char *colon_p, *ip_addr = NULL; char *colon_p, *ip_addr = NULL;
...@@ -1891,7 +1889,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, ...@@ -1891,7 +1889,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
/* do dns_resolve upcall */ /* do dns_resolve upcall */
ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL); ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
if (ip_len > 0) if (ip_len > 0)
ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL); ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
else else
ret = -ESRCH; ret = -ESRCH;
...@@ -1900,13 +1898,13 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen, ...@@ -1900,13 +1898,13 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
*ipend = end; *ipend = end;
pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name, pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
ret, ret ? "failed" : ceph_pr_addr(ss)); ret, ret ? "failed" : ceph_pr_addr(addr));
return ret; return ret;
} }
#else #else
static inline int ceph_dns_resolve_name(const char *name, size_t namelen, static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend) struct ceph_entity_addr *addr, char delim, const char **ipend)
{ {
return -EINVAL; return -EINVAL;
} }
...@@ -1917,13 +1915,13 @@ static inline int ceph_dns_resolve_name(const char *name, size_t namelen, ...@@ -1917,13 +1915,13 @@ static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
* then try to extract a hostname to resolve using userspace DNS upcall. * then try to extract a hostname to resolve using userspace DNS upcall.
*/ */
static int ceph_parse_server_name(const char *name, size_t namelen, static int ceph_parse_server_name(const char *name, size_t namelen,
struct sockaddr_storage *ss, char delim, const char **ipend) struct ceph_entity_addr *addr, char delim, const char **ipend)
{ {
int ret; int ret;
ret = ceph_pton(name, namelen, ss, delim, ipend); ret = ceph_pton(name, namelen, addr, delim, ipend);
if (ret) if (ret)
ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend); ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
return ret; return ret;
} }
...@@ -1942,7 +1940,6 @@ int ceph_parse_ips(const char *c, const char *end, ...@@ -1942,7 +1940,6 @@ int ceph_parse_ips(const char *c, const char *end,
dout("parse_ips on '%.*s'\n", (int)(end-c), c); dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) { for (i = 0; i < max_count; i++) {
const char *ipend; const char *ipend;
struct sockaddr_storage *ss = &addr[i].in_addr;
int port; int port;
char delim = ','; char delim = ',';
...@@ -1951,7 +1948,7 @@ int ceph_parse_ips(const char *c, const char *end, ...@@ -1951,7 +1948,7 @@ int ceph_parse_ips(const char *c, const char *end,
p++; p++;
} }
ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend); ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
if (ret) if (ret)
goto bad; goto bad;
ret = -EINVAL; ret = -EINVAL;
...@@ -1982,9 +1979,9 @@ int ceph_parse_ips(const char *c, const char *end, ...@@ -1982,9 +1979,9 @@ int ceph_parse_ips(const char *c, const char *end,
port = CEPH_MON_PORT; port = CEPH_MON_PORT;
} }
addr_set_port(ss, port); addr_set_port(&addr[i], port);
dout("parse_ips got %s\n", ceph_pr_addr(ss)); dout("parse_ips got %s\n", ceph_pr_addr(&addr[i]));
if (p == end) if (p == end)
break; break;
...@@ -2023,12 +2020,12 @@ static int process_banner(struct ceph_connection *con) ...@@ -2023,12 +2020,12 @@ static int process_banner(struct ceph_connection *con)
*/ */
if (memcmp(&con->peer_addr, &con->actual_peer_addr, if (memcmp(&con->peer_addr, &con->actual_peer_addr,
sizeof(con->peer_addr)) != 0 && sizeof(con->peer_addr)) != 0 &&
!(addr_is_blank(&con->actual_peer_addr.in_addr) && !(addr_is_blank(&con->actual_peer_addr) &&
con->actual_peer_addr.nonce == con->peer_addr.nonce)) { con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
pr_warn("wrong peer, want %s/%d, got %s/%d\n", pr_warn("wrong peer, want %s/%d, got %s/%d\n",
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
(int)le32_to_cpu(con->peer_addr.nonce), (int)le32_to_cpu(con->peer_addr.nonce),
ceph_pr_addr(&con->actual_peer_addr.in_addr), ceph_pr_addr(&con->actual_peer_addr),
(int)le32_to_cpu(con->actual_peer_addr.nonce)); (int)le32_to_cpu(con->actual_peer_addr.nonce));
con->error_msg = "wrong peer at address"; con->error_msg = "wrong peer at address";
return -1; return -1;
...@@ -2037,16 +2034,16 @@ static int process_banner(struct ceph_connection *con) ...@@ -2037,16 +2034,16 @@ static int process_banner(struct ceph_connection *con)
/* /*
* did we learn our address? * did we learn our address?
*/ */
if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { if (addr_is_blank(&con->msgr->inst.addr)) {
int port = addr_port(&con->msgr->inst.addr.in_addr); int port = addr_port(&con->msgr->inst.addr);
memcpy(&con->msgr->inst.addr.in_addr, memcpy(&con->msgr->inst.addr.in_addr,
&con->peer_addr_for_me.in_addr, &con->peer_addr_for_me.in_addr,
sizeof(con->peer_addr_for_me.in_addr)); sizeof(con->peer_addr_for_me.in_addr));
addr_set_port(&con->msgr->inst.addr.in_addr, port); addr_set_port(&con->msgr->inst.addr, port);
encode_my_addr(con->msgr); encode_my_addr(con->msgr);
dout("process_banner learned my addr is %s\n", dout("process_banner learned my addr is %s\n",
ceph_pr_addr(&con->msgr->inst.addr.in_addr)); ceph_pr_addr(&con->msgr->inst.addr));
} }
return 0; return 0;
...@@ -2097,7 +2094,7 @@ static int process_connect(struct ceph_connection *con) ...@@ -2097,7 +2094,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s feature set mismatch," pr_err("%s%lld %s feature set mismatch,"
" my %llx < server's %llx, missing %llx\n", " my %llx < server's %llx, missing %llx\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
sup_feat, server_feat, server_feat & ~sup_feat); sup_feat, server_feat, server_feat & ~sup_feat);
con->error_msg = "missing required protocol features"; con->error_msg = "missing required protocol features";
reset_connection(con); reset_connection(con);
...@@ -2107,7 +2104,7 @@ static int process_connect(struct ceph_connection *con) ...@@ -2107,7 +2104,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s protocol version mismatch," pr_err("%s%lld %s protocol version mismatch,"
" my %d != server's %d\n", " my %d != server's %d\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
le32_to_cpu(con->out_connect.protocol_version), le32_to_cpu(con->out_connect.protocol_version),
le32_to_cpu(con->in_reply.protocol_version)); le32_to_cpu(con->in_reply.protocol_version));
con->error_msg = "protocol version mismatch"; con->error_msg = "protocol version mismatch";
...@@ -2141,7 +2138,7 @@ static int process_connect(struct ceph_connection *con) ...@@ -2141,7 +2138,7 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.connect_seq)); le32_to_cpu(con->in_reply.connect_seq));
pr_err("%s%lld %s connection reset\n", pr_err("%s%lld %s connection reset\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr)); ceph_pr_addr(&con->peer_addr));
reset_connection(con); reset_connection(con);
con_out_kvec_reset(con); con_out_kvec_reset(con);
ret = prepare_write_connect(con); ret = prepare_write_connect(con);
...@@ -2198,7 +2195,7 @@ static int process_connect(struct ceph_connection *con) ...@@ -2198,7 +2195,7 @@ static int process_connect(struct ceph_connection *con)
pr_err("%s%lld %s protocol feature mismatch," pr_err("%s%lld %s protocol feature mismatch,"
" my required %llx > server's %llx, need %llx\n", " my required %llx > server's %llx, need %llx\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
req_feat, server_feat, req_feat & ~server_feat); req_feat, server_feat, req_feat & ~server_feat);
con->error_msg = "missing required protocol features"; con->error_msg = "missing required protocol features";
reset_connection(con); reset_connection(con);
...@@ -2405,7 +2402,7 @@ static int read_partial_message(struct ceph_connection *con) ...@@ -2405,7 +2402,7 @@ static int read_partial_message(struct ceph_connection *con)
if ((s64)seq - (s64)con->in_seq < 1) { if ((s64)seq - (s64)con->in_seq < 1) {
pr_info("skipping %s%lld %s seq %lld expected %lld\n", pr_info("skipping %s%lld %s seq %lld expected %lld\n",
ENTITY_NAME(con->peer_name), ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), ceph_pr_addr(&con->peer_addr),
seq, con->in_seq + 1); seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len - con->in_base_pos = -front_len - middle_len - data_len -
sizeof_footer(con); sizeof_footer(con);
...@@ -2984,10 +2981,10 @@ static void ceph_con_workfn(struct work_struct *work) ...@@ -2984,10 +2981,10 @@ static void ceph_con_workfn(struct work_struct *work)
static void con_fault(struct ceph_connection *con) static void con_fault(struct ceph_connection *con)
{ {
dout("fault %p state %lu to peer %s\n", dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); con, con->state, ceph_pr_addr(&con->peer_addr));
pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); ceph_pr_addr(&con->peer_addr), con->error_msg);
con->error_msg = NULL; con->error_msg = NULL;
WARN_ON(con->state != CON_STATE_CONNECTING && WARN_ON(con->state != CON_STATE_CONNECTING &&
......
...@@ -76,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end) ...@@ -76,7 +76,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
m->num_mon); m->num_mon);
for (i = 0; i < m->num_mon; i++) for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i, dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr.in_addr)); ceph_pr_addr(&m->mon_inst[i].addr));
return m; return m;
bad: bad:
...@@ -203,7 +203,7 @@ static void reopen_session(struct ceph_mon_client *monc) ...@@ -203,7 +203,7 @@ static void reopen_session(struct ceph_mon_client *monc)
{ {
if (!monc->hunting) if (!monc->hunting)
pr_info("mon%d %s session lost, hunting for new mon\n", pr_info("mon%d %s session lost, hunting for new mon\n",
monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr.in_addr)); monc->cur_mon, ceph_pr_addr(&monc->con.peer_addr));
__close_session(monc); __close_session(monc);
__open_session(monc); __open_session(monc);
...@@ -1178,7 +1178,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc, ...@@ -1178,7 +1178,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
__resend_generic_request(monc); __resend_generic_request(monc);
pr_info("mon%d %s session established\n", monc->cur_mon, pr_info("mon%d %s session established\n", monc->cur_mon,
ceph_pr_addr(&monc->con.peer_addr.in_addr)); ceph_pr_addr(&monc->con.peer_addr));
} }
out: out:
......
...@@ -4926,7 +4926,7 @@ static int decode_watcher(void **p, void *end, struct ceph_watch_item *item) ...@@ -4926,7 +4926,7 @@ static int decode_watcher(void **p, void *end, struct ceph_watch_item *item)
dout("%s %s%llu cookie %llu addr %s\n", __func__, dout("%s %s%llu cookie %llu addr %s\n", __func__,
ENTITY_NAME(item->name), item->cookie, ENTITY_NAME(item->name), item->cookie,
ceph_pr_addr(&item->addr.in_addr)); ceph_pr_addr(&item->addr));
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment