Commit 5aea3dcd authored by Ilya Dryomov's avatar Ilya Dryomov

libceph: a major OSD client update

This is a major sync up, up to ~Jewel.  The highlights are:

- per-session request trees (vs a global per-client tree)
- per-session locking (vs a global per-client rwlock)
- homeless OSD session
- no ad-hoc global per-client lists
- support for pool quotas
- foundation for watch/notify v2 support
- foundation for map check (pool deletion detection) support

The switchover is incomplete: lingering requests can be setup and
teared down but aren't ever reestablished.  This functionality is
restored with the introduction of the new lingering infrastructure
(ceph_osd_linger_request, linger_work, etc) in a later commit.
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 9dd2845c
...@@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -193,12 +193,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
if (copy_from_user(&dl, arg, sizeof(dl))) if (copy_from_user(&dl, arg, sizeof(dl)))
return -EFAULT; return -EFAULT;
down_read(&osdc->map_sem); down_read(&osdc->lock);
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
&dl.object_no, &dl.object_offset, &dl.object_no, &dl.object_offset,
&olen); &olen);
if (r < 0) { if (r < 0) {
up_read(&osdc->map_sem); up_read(&osdc->lock);
return -EIO; return -EIO;
} }
dl.file_offset -= dl.object_offset; dl.file_offset -= dl.object_offset;
...@@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -217,7 +217,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid); r = ceph_object_locator_to_pg(osdc->osdmap, &oid, &oloc, &pgid);
if (r < 0) { if (r < 0) {
up_read(&osdc->map_sem); up_read(&osdc->lock);
return r; return r;
} }
...@@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) ...@@ -230,7 +230,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
} else { } else {
memset(&dl.osd_addr, 0, sizeof(dl.osd_addr)); memset(&dl.osd_addr, 0, sizeof(dl.osd_addr));
} }
up_read(&osdc->map_sem); up_read(&osdc->lock);
/* send result back to user */ /* send result back to user */
if (copy_to_user(arg, &dl, sizeof(dl))) if (copy_to_user(arg, &dl, sizeof(dl)))
......
...@@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, ...@@ -75,7 +75,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
char buf[128]; char buf[128];
dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
down_read(&osdc->map_sem); down_read(&osdc->lock);
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
if (pool_name) { if (pool_name) {
size_t len = strlen(pool_name); size_t len = strlen(pool_name);
...@@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, ...@@ -107,7 +107,7 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
ret = -ERANGE; ret = -ERANGE;
} }
} }
up_read(&osdc->map_sem); up_read(&osdc->lock);
return ret; return ret;
} }
...@@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, ...@@ -141,13 +141,13 @@ static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
s64 pool = ceph_file_layout_pg_pool(ci->i_layout); s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
const char *pool_name; const char *pool_name;
down_read(&osdc->map_sem); down_read(&osdc->lock);
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
if (pool_name) if (pool_name)
ret = snprintf(val, size, "%s", pool_name); ret = snprintf(val, size, "%s", pool_name);
else else
ret = snprintf(val, size, "%lld", (unsigned long long)pool); ret = snprintf(val, size, "%lld", (unsigned long long)pool);
up_read(&osdc->map_sem); up_read(&osdc->lock);
return ret; return ret;
} }
......
...@@ -33,12 +33,13 @@ struct ceph_osd { ...@@ -33,12 +33,13 @@ struct ceph_osd {
int o_incarnation; int o_incarnation;
struct rb_node o_node; struct rb_node o_node;
struct ceph_connection o_con; struct ceph_connection o_con;
struct list_head o_requests; struct rb_root o_requests;
struct list_head o_linger_requests; struct list_head o_linger_requests;
struct list_head o_osd_lru; struct list_head o_osd_lru;
struct ceph_auth_handshake o_auth; struct ceph_auth_handshake o_auth;
unsigned long lru_ttl; unsigned long lru_ttl;
struct list_head o_keepalive_item; struct list_head o_keepalive_item;
struct mutex lock;
}; };
#define CEPH_OSD_SLAB_OPS 2 #define CEPH_OSD_SLAB_OPS 2
...@@ -144,8 +145,6 @@ struct ceph_osd_request_target { ...@@ -144,8 +145,6 @@ struct ceph_osd_request_target {
struct ceph_osd_request { struct ceph_osd_request {
u64 r_tid; /* unique for this client */ u64 r_tid; /* unique for this client */
struct rb_node r_node; struct rb_node r_node;
struct list_head r_req_lru_item;
struct list_head r_osd_item;
struct list_head r_linger_item; struct list_head r_linger_item;
struct list_head r_linger_osd_item; struct list_head r_linger_osd_item;
struct ceph_osd *r_osd; struct ceph_osd *r_osd;
...@@ -219,19 +218,16 @@ struct ceph_osd_client { ...@@ -219,19 +218,16 @@ struct ceph_osd_client {
struct ceph_client *client; struct ceph_client *client;
struct ceph_osdmap *osdmap; /* current map */ struct ceph_osdmap *osdmap; /* current map */
struct rw_semaphore map_sem; struct rw_semaphore lock;
struct mutex request_mutex;
struct rb_root osds; /* osds */ struct rb_root osds; /* osds */
struct list_head osd_lru; /* idle osds */ struct list_head osd_lru; /* idle osds */
spinlock_t osd_lru_lock; spinlock_t osd_lru_lock;
u64 last_tid; /* tid of last request */
struct rb_root requests; /* pending requests */
struct list_head req_lru; /* in-flight lru */
struct list_head req_unsent; /* unsent/need-resend queue */
struct list_head req_notarget; /* map to no osd */
struct list_head req_linger; /* lingering requests */ struct list_head req_linger; /* lingering requests */
int num_requests; struct ceph_osd homeless_osd;
atomic64_t last_tid; /* tid of last request */
atomic_t num_requests;
atomic_t num_homeless;
struct delayed_work timeout_work; struct delayed_work timeout_work;
struct delayed_work osds_timeout_work; struct delayed_work osds_timeout_work;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
......
...@@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req) ...@@ -182,21 +182,39 @@ static void dump_request(struct seq_file *s, struct ceph_osd_request *req)
seq_putc(s, '\n'); seq_putc(s, '\n');
} }
static void dump_requests(struct seq_file *s, struct ceph_osd *osd)
{
struct rb_node *n;
mutex_lock(&osd->lock);
for (n = rb_first(&osd->o_requests); n; n = rb_next(n)) {
struct ceph_osd_request *req =
rb_entry(n, struct ceph_osd_request, r_node);
dump_request(s, req);
}
mutex_unlock(&osd->lock);
}
static int osdc_show(struct seq_file *s, void *pp) static int osdc_show(struct seq_file *s, void *pp)
{ {
struct ceph_client *client = s->private; struct ceph_client *client = s->private;
struct ceph_osd_client *osdc = &client->osdc; struct ceph_osd_client *osdc = &client->osdc;
struct rb_node *p; struct rb_node *n;
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
struct ceph_osd_request *req;
req = rb_entry(p, struct ceph_osd_request, r_node); down_read(&osdc->lock);
seq_printf(s, "REQUESTS %d homeless %d\n",
atomic_read(&osdc->num_requests),
atomic_read(&osdc->num_homeless));
for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
dump_request(s, req); dump_requests(s, osd);
} }
mutex_unlock(&osdc->request_mutex); dump_requests(s, &osdc->homeless_osd);
up_read(&osdc->lock);
return 0; return 0;
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment