Commit 22e8bd51 authored by Ilya Dryomov's avatar Ilya Dryomov

rbd: support for object-map and fast-diff

Speed up reads, discards and zeroouts through RBD_OBJ_FLAG_MAY_EXIST
and RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT based on object map.

Invalid object maps are not trusted, but still updated.  Note that we
never iterate, resize or invalidate object maps.  If object-map feature
is enabled but object map fails to load, we just fail the requester
(either "rbd map" or I/O, by way of post-acquire action).
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent da5ef6be
This diff is collapsed.
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
/* For format version 2, rbd image 'foo' consists of objects /* For format version 2, rbd image 'foo' consists of objects
* rbd_id.foo - id of image * rbd_id.foo - id of image
* rbd_header.<id> - image metadata * rbd_header.<id> - image metadata
* rbd_object_map.<id> - optional image object map
* rbd_data.<id>.0000000000000000 * rbd_data.<id>.0000000000000000
* rbd_data.<id>.0000000000000001 * rbd_data.<id>.0000000000000001
* ... - data * ... - data
...@@ -25,6 +26,7 @@ ...@@ -25,6 +26,7 @@
*/ */
#define RBD_HEADER_PREFIX "rbd_header." #define RBD_HEADER_PREFIX "rbd_header."
#define RBD_OBJECT_MAP_PREFIX "rbd_object_map."
#define RBD_ID_PREFIX "rbd_id." #define RBD_ID_PREFIX "rbd_id."
#define RBD_V2_DATA_FORMAT "%s.%016llx" #define RBD_V2_DATA_FORMAT "%s.%016llx"
...@@ -39,6 +41,14 @@ enum rbd_notify_op { ...@@ -39,6 +41,14 @@ enum rbd_notify_op {
RBD_NOTIFY_OP_HEADER_UPDATE = 3, RBD_NOTIFY_OP_HEADER_UPDATE = 3,
}; };
#define OBJECT_NONEXISTENT 0
#define OBJECT_EXISTS 1
#define OBJECT_PENDING 2
#define OBJECT_EXISTS_CLEAN 3
#define RBD_FLAG_OBJECT_MAP_INVALID (1ULL << 0)
#define RBD_FLAG_FAST_DIFF_INVALID (1ULL << 1)
/* /*
* For format version 1, rbd image 'foo' consists of objects * For format version 1, rbd image 'foo' consists of objects
* foo.rbd - image metadata * foo.rbd - image metadata
......
...@@ -52,4 +52,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc, ...@@ -52,4 +52,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
char *lock_name, u8 *type, char **tag, char *lock_name, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers); struct ceph_locker **lockers, u32 *num_lockers);
int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
char *lock_name, u8 type, char *cookie, char *tag);
#endif #endif
...@@ -66,4 +66,6 @@ int ceph_extent_to_file(struct ceph_file_layout *l, ...@@ -66,4 +66,6 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
struct ceph_file_extent **file_extents, struct ceph_file_extent **file_extents,
u32 *num_file_extents); u32 *num_file_extents);
u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size);
#endif #endif
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/ceph/cls_lock_client.h> #include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h> #include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h>
/** /**
* ceph_cls_lock - grab rados lock for object * ceph_cls_lock - grab rados lock for object
...@@ -378,3 +379,47 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc, ...@@ -378,3 +379,47 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
return ret; return ret;
} }
EXPORT_SYMBOL(ceph_cls_lock_info); EXPORT_SYMBOL(ceph_cls_lock_info);
int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
char *lock_name, u8 type, char *cookie, char *tag)
{
int assert_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
int tag_len = strlen(tag);
struct page **pages;
void *p, *end;
int ret;
assert_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
if (assert_op_buf_size > PAGE_SIZE)
return -E2BIG;
ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
if (ret)
return ret;
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages))
return PTR_ERR(pages);
p = page_address(pages[0]);
end = p + assert_op_buf_size;
/* encode cls_lock_assert_op struct */
ceph_start_encoding(&p, 1, 1,
assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, cookie, cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
WARN_ON(p != end);
osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
0, false, true);
return 0;
}
EXPORT_SYMBOL(ceph_cls_assert_locked);
...@@ -259,3 +259,20 @@ int ceph_extent_to_file(struct ceph_file_layout *l, ...@@ -259,3 +259,20 @@ int ceph_extent_to_file(struct ceph_file_layout *l,
return 0; return 0;
} }
EXPORT_SYMBOL(ceph_extent_to_file); EXPORT_SYMBOL(ceph_extent_to_file);
u64 ceph_get_num_objects(struct ceph_file_layout *l, u64 size)
{
u64 period = (u64)l->stripe_count * l->object_size;
u64 num_periods = DIV64_U64_ROUND_UP(size, period);
u64 remainder_bytes;
u64 remainder_objs = 0;
div64_u64_rem(size, period, &remainder_bytes);
if (remainder_bytes > 0 &&
remainder_bytes < (u64)l->stripe_count * l->stripe_unit)
remainder_objs = l->stripe_count -
DIV_ROUND_UP_ULL(remainder_bytes, l->stripe_unit);
return num_periods * l->stripe_count - remainder_objs;
}
EXPORT_SYMBOL(ceph_get_num_objects);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment