Commit 6467dfdf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ceph-for-6.11-rc1' of https://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "A small patchset to address bogus I/O errors and ultimately an
  assertion failure in the face of watch errors with -o exclusive
  mappings in RBD marked for stable and some assorted CephFS fixes"

* tag 'ceph-for-6.11-rc1' of https://github.com/ceph/ceph-client:
  rbd: don't assume rbd_is_lock_owner() for exclusive mappings
  rbd: don't assume RBD_LOCK_STATE_LOCKED for exclusive mappings
  rbd: rename RBD_LOCK_STATE_RELEASING and releasing_wait
  ceph: fix incorrect kmalloc size of pagevec mempool
  ceph: periodically flush the cap releases
  ceph: convert comma to semicolon in __ceph_dentry_dir_lease_touch()
  ceph: use cap_wait_list only if debugfs is enabled
parents 732c2753 3ceccb14
...@@ -362,7 +362,7 @@ enum rbd_watch_state { ...@@ -362,7 +362,7 @@ enum rbd_watch_state {
enum rbd_lock_state { enum rbd_lock_state {
RBD_LOCK_STATE_UNLOCKED, RBD_LOCK_STATE_UNLOCKED,
RBD_LOCK_STATE_LOCKED, RBD_LOCK_STATE_LOCKED,
RBD_LOCK_STATE_RELEASING, RBD_LOCK_STATE_QUIESCING,
}; };
/* WatchNotify::ClientId */ /* WatchNotify::ClientId */
...@@ -422,7 +422,7 @@ struct rbd_device { ...@@ -422,7 +422,7 @@ struct rbd_device {
struct list_head running_list; struct list_head running_list;
struct completion acquire_wait; struct completion acquire_wait;
int acquire_err; int acquire_err;
struct completion releasing_wait; struct completion quiescing_wait;
spinlock_t object_map_lock; spinlock_t object_map_lock;
u8 *object_map; u8 *object_map;
...@@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev) ...@@ -525,7 +525,7 @@ static bool __rbd_is_lock_owner(struct rbd_device *rbd_dev)
lockdep_assert_held(&rbd_dev->lock_rwsem); lockdep_assert_held(&rbd_dev->lock_rwsem);
return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED || return rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED ||
rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING; rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING;
} }
static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) static bool rbd_is_lock_owner(struct rbd_device *rbd_dev)
...@@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req) ...@@ -3457,13 +3457,14 @@ static void rbd_lock_del_request(struct rbd_img_request *img_req)
lockdep_assert_held(&rbd_dev->lock_rwsem); lockdep_assert_held(&rbd_dev->lock_rwsem);
spin_lock(&rbd_dev->lock_lists_lock); spin_lock(&rbd_dev->lock_lists_lock);
if (!list_empty(&img_req->lock_item)) { if (!list_empty(&img_req->lock_item)) {
rbd_assert(!list_empty(&rbd_dev->running_list));
list_del_init(&img_req->lock_item); list_del_init(&img_req->lock_item);
need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_RELEASING && need_wakeup = (rbd_dev->lock_state == RBD_LOCK_STATE_QUIESCING &&
list_empty(&rbd_dev->running_list)); list_empty(&rbd_dev->running_list));
} }
spin_unlock(&rbd_dev->lock_lists_lock); spin_unlock(&rbd_dev->lock_lists_lock);
if (need_wakeup) if (need_wakeup)
complete(&rbd_dev->releasing_wait); complete(&rbd_dev->quiescing_wait);
} }
static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
...@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req) ...@@ -3476,11 +3477,6 @@ static int rbd_img_exclusive_lock(struct rbd_img_request *img_req)
if (rbd_lock_add_request(img_req)) if (rbd_lock_add_request(img_req))
return 1; return 1;
if (rbd_dev->opts->exclusive) {
WARN_ON(1); /* lock got released? */
return -EROFS;
}
/* /*
* Note the use of mod_delayed_work() in rbd_acquire_lock() * Note the use of mod_delayed_work() in rbd_acquire_lock()
* and cancel_delayed_work() in wake_lock_waiters(). * and cancel_delayed_work() in wake_lock_waiters().
...@@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) ...@@ -4181,16 +4177,16 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
/* /*
* Ensure that all in-flight IO is flushed. * Ensure that all in-flight IO is flushed.
*/ */
rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_dev->lock_state = RBD_LOCK_STATE_QUIESCING;
rbd_assert(!completion_done(&rbd_dev->releasing_wait)); rbd_assert(!completion_done(&rbd_dev->quiescing_wait));
if (list_empty(&rbd_dev->running_list)) if (list_empty(&rbd_dev->running_list))
return true; return true;
up_write(&rbd_dev->lock_rwsem); up_write(&rbd_dev->lock_rwsem);
wait_for_completion(&rbd_dev->releasing_wait); wait_for_completion(&rbd_dev->quiescing_wait);
down_write(&rbd_dev->lock_rwsem); down_write(&rbd_dev->lock_rwsem);
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) if (rbd_dev->lock_state != RBD_LOCK_STATE_QUIESCING)
return false; return false;
rbd_assert(list_empty(&rbd_dev->running_list)); rbd_assert(list_empty(&rbd_dev->running_list));
...@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev) ...@@ -4601,6 +4597,10 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
rbd_warn(rbd_dev, "failed to update lock cookie: %d", rbd_warn(rbd_dev, "failed to update lock cookie: %d",
ret); ret);
if (rbd_dev->opts->exclusive)
rbd_warn(rbd_dev,
"temporarily releasing lock on exclusive mapping");
/* /*
* Lock cookie cannot be updated on older OSDs, so do * Lock cookie cannot be updated on older OSDs, so do
* a manual release and queue an acquire. * a manual release and queue an acquire.
...@@ -5376,7 +5376,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec) ...@@ -5376,7 +5376,7 @@ static struct rbd_device *__rbd_dev_create(struct rbd_spec *spec)
INIT_LIST_HEAD(&rbd_dev->acquiring_list); INIT_LIST_HEAD(&rbd_dev->acquiring_list);
INIT_LIST_HEAD(&rbd_dev->running_list); INIT_LIST_HEAD(&rbd_dev->running_list);
init_completion(&rbd_dev->acquire_wait); init_completion(&rbd_dev->acquire_wait);
init_completion(&rbd_dev->releasing_wait); init_completion(&rbd_dev->quiescing_wait);
spin_lock_init(&rbd_dev->object_map_lock); spin_lock_init(&rbd_dev->object_map_lock);
...@@ -6582,11 +6582,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) ...@@ -6582,11 +6582,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
if (ret) if (ret)
return ret; return ret;
/*
* The lock may have been released by now, unless automatic lock
* transitions are disabled.
*/
rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev));
return 0; return 0;
} }
......
...@@ -3067,10 +3067,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, ...@@ -3067,10 +3067,13 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
flags, &_got); flags, &_got);
WARN_ON_ONCE(ret == -EAGAIN); WARN_ON_ONCE(ret == -EAGAIN);
if (!ret) { if (!ret) {
#ifdef CONFIG_DEBUG_FS
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct cap_wait cw; struct cap_wait cw;
#endif
DEFINE_WAIT_FUNC(wait, woken_wake_function); DEFINE_WAIT_FUNC(wait, woken_wake_function);
#ifdef CONFIG_DEBUG_FS
cw.ino = ceph_ino(inode); cw.ino = ceph_ino(inode);
cw.tgid = current->tgid; cw.tgid = current->tgid;
cw.need = need; cw.need = need;
...@@ -3079,6 +3082,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, ...@@ -3079,6 +3082,7 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
spin_lock(&mdsc->caps_list_lock); spin_lock(&mdsc->caps_list_lock);
list_add(&cw.list, &mdsc->cap_wait_list); list_add(&cw.list, &mdsc->cap_wait_list);
spin_unlock(&mdsc->caps_list_lock); spin_unlock(&mdsc->caps_list_lock);
#endif
/* make sure used fmode not timeout */ /* make sure used fmode not timeout */
ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS); ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
...@@ -3097,9 +3101,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need, ...@@ -3097,9 +3101,11 @@ int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
remove_wait_queue(&ci->i_cap_wq, &wait); remove_wait_queue(&ci->i_cap_wq, &wait);
ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS); ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
#ifdef CONFIG_DEBUG_FS
spin_lock(&mdsc->caps_list_lock); spin_lock(&mdsc->caps_list_lock);
list_del(&cw.list); list_del(&cw.list);
spin_unlock(&mdsc->caps_list_lock); spin_unlock(&mdsc->caps_list_lock);
#endif
if (ret == -EAGAIN) if (ret == -EAGAIN)
continue; continue;
......
...@@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di) ...@@ -1589,7 +1589,7 @@ void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di)
} }
spin_lock(&mdsc->dentry_list_lock); spin_lock(&mdsc->dentry_list_lock);
__dentry_dir_lease_touch(mdsc, di), __dentry_dir_lease_touch(mdsc, di);
spin_unlock(&mdsc->dentry_list_lock); spin_unlock(&mdsc->dentry_list_lock);
} }
......
...@@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work) ...@@ -5446,6 +5446,8 @@ static void delayed_work(struct work_struct *work)
} }
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
ceph_flush_cap_releases(mdsc, s);
mutex_lock(&s->s_mutex); mutex_lock(&s->s_mutex);
if (renew_caps) if (renew_caps)
send_renew_caps(mdsc, s); send_renew_caps(mdsc, s);
...@@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) ...@@ -5505,7 +5507,9 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies; mdsc->last_renew_caps = jiffies;
INIT_LIST_HEAD(&mdsc->cap_delay_list); INIT_LIST_HEAD(&mdsc->cap_delay_list);
#ifdef CONFIG_DEBUG_FS
INIT_LIST_HEAD(&mdsc->cap_wait_list); INIT_LIST_HEAD(&mdsc->cap_wait_list);
#endif
spin_lock_init(&mdsc->cap_delay_lock); spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
INIT_LIST_HEAD(&mdsc->snap_flush_list); INIT_LIST_HEAD(&mdsc->snap_flush_list);
......
...@@ -416,6 +416,8 @@ struct ceph_quotarealm_inode { ...@@ -416,6 +416,8 @@ struct ceph_quotarealm_inode {
struct inode *inode; struct inode *inode;
}; };
#ifdef CONFIG_DEBUG_FS
struct cap_wait { struct cap_wait {
struct list_head list; struct list_head list;
u64 ino; u64 ino;
...@@ -424,6 +426,8 @@ struct cap_wait { ...@@ -424,6 +426,8 @@ struct cap_wait {
int want; int want;
}; };
#endif
enum { enum {
CEPH_MDSC_STOPPING_BEGIN = 1, CEPH_MDSC_STOPPING_BEGIN = 1,
CEPH_MDSC_STOPPING_FLUSHING = 2, CEPH_MDSC_STOPPING_FLUSHING = 2,
...@@ -512,7 +516,9 @@ struct ceph_mds_client { ...@@ -512,7 +516,9 @@ struct ceph_mds_client {
spinlock_t caps_list_lock; spinlock_t caps_list_lock;
struct list_head caps_list; /* unused (reserved or struct list_head caps_list; /* unused (reserved or
unreserved) */ unreserved) */
#ifdef CONFIG_DEBUG_FS
struct list_head cap_wait_list; struct list_head cap_wait_list;
#endif
int caps_total_count; /* total caps allocated */ int caps_total_count; /* total caps allocated */
int caps_use_count; /* in use */ int caps_use_count; /* in use */
int caps_use_max; /* max used caps */ int caps_use_max; /* max used caps */
......
...@@ -961,7 +961,8 @@ static int __init init_caches(void) ...@@ -961,7 +961,8 @@ static int __init init_caches(void)
if (!ceph_mds_request_cachep) if (!ceph_mds_request_cachep)
goto bad_mds_req; goto bad_mds_req;
ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10,
(CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *));
if (!ceph_wb_pagevec_pool) if (!ceph_wb_pagevec_pool)
goto bad_pagevec_pool; goto bad_pagevec_pool;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment