Commit 00e3f5cc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull Ceph updates from Sage Weil:
 "The two main changes are aio support in CephFS, and a series that
  fixes several issues in the authentication key timeout/renewal code.

  On top of that are a variety of cleanups and minor bug fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  libceph: remove outdated comment
  libceph: kill off ceph_x_ticket_handler::validity
  libceph: invalidate AUTH in addition to a service ticket
  libceph: fix authorizer invalidation, take 2
  libceph: clear messenger auth_retry flag if we fault
  libceph: fix ceph_msg_revoke()
  libceph: use list_for_each_entry_safe
  ceph: use i_size_{read,write} to get/set i_size
  ceph: re-send AIO write request when getting -EOLDSNAP error
  ceph: Asynchronous IO support
  ceph: Avoid to propagate the invalid page point
  ceph: fix double page_unlock() in page_mkwrite()
  rbd: delete an unnecessary check before rbd_dev_destroy()
  libceph: use list_next_entry instead of list_entry_next
  ceph: ceph_frag_contains_value can be boolean
  ceph: remove unused functions in ceph_frag.h
parents 772950ed 7e01726a
......@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
out_err:
rbd_dev_unparent(rbd_dev);
if (parent)
rbd_dev_destroy(parent);
rbd_dev_destroy(parent);
return ret;
}
......
......@@ -1108,7 +1108,7 @@ static int ceph_update_writeable_page(struct file *file,
return 0;
/* past end of file? */
i_size = inode->i_size; /* caller holds i_mutex */
i_size = i_size_read(inode);
if (page_off >= i_size ||
(pos_in_page == 0 && (pos+len) >= i_size &&
......@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
return -ENOMEM;
*pagep = page;
dout("write_begin file %p inode %p page %p %d~%d\n", file,
inode, page, (int)pos, (int)len);
......@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
zero_user_segment(page, from+copied, len);
/* did file size increase? */
/* (no need for i_size_read(); we caller holds i_mutex */
if (pos+copied > inode->i_size)
if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
if (!PageUptodate(page))
......@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
if ((off > size) ||
(page->mapping != inode->i_mapping))
(page->mapping != inode->i_mapping)) {
unlock_page(page);
goto out;
}
ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
if (ret == 0) {
if (ret >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
......@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
if (ret != VM_FAULT_LOCKED)
unlock_page(page);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
......
......@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
aux.size = inode->i_size;
aux.size = i_size_read(inode);
memcpy(buffer, &aux, sizeof(aux));
......@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
const struct ceph_inode_info* ci = cookie_netfs_data;
const struct inode* inode = &ci->vfs_inode;
*size = inode->i_size;
*size = i_size_read(&ci->vfs_inode);
}
static enum fscache_checkaux ceph_fscache_inode_check_aux(
......@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
aux.size = inode->i_size;
aux.size = i_size_read(inode);
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
......
This diff is collapsed.
......@@ -548,7 +548,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
inode->i_size = size;
i_size_write(inode, size);
inode->i_blocks = (size + (1<<9) - 1) >> 9;
ci->i_reported_size = size;
if (truncate_seq != ci->i_truncate_seq) {
......@@ -808,7 +808,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
spin_unlock(&ci->i_ceph_lock);
err = -EINVAL;
if (WARN_ON(symlen != inode->i_size))
if (WARN_ON(symlen != i_size_read(inode)))
goto out;
err = -ENOMEM;
......@@ -1549,7 +1549,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
inode->i_size = size;
i_size_write(inode, size);
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
/* tell the MDS if we are approaching max_size */
......@@ -1911,7 +1911,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_size, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) &&
attr->ia_size > inode->i_size) {
inode->i_size = attr->ia_size;
i_size_write(inode, attr->ia_size);
inode->i_blocks =
(attr->ia_size + (1 << 9) - 1) >> 9;
inode->i_ctime = attr->ia_ctime;
......
......@@ -40,46 +40,11 @@ static inline __u32 ceph_frag_mask_shift(__u32 f)
return 24 - ceph_frag_bits(f);
}
static inline int ceph_frag_contains_value(__u32 f, __u32 v)
static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
{
return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
}
static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
{
/* is sub as specific as us, and contained by us? */
return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
(ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
}
static inline __u32 ceph_frag_parent(__u32 f)
{
return ceph_frag_make(ceph_frag_bits(f) - 1,
ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
}
static inline int ceph_frag_is_left_child(__u32 f)
{
return ceph_frag_bits(f) > 0 &&
(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
}
static inline int ceph_frag_is_right_child(__u32 f)
{
return ceph_frag_bits(f) > 0 &&
(ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
}
static inline __u32 ceph_frag_sibling(__u32 f)
{
return ceph_frag_make(ceph_frag_bits(f),
ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
}
static inline __u32 ceph_frag_left_child(__u32 f)
{
return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
}
static inline __u32 ceph_frag_right_child(__u32 f)
{
return ceph_frag_make(ceph_frag_bits(f)+1,
ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
}
static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
{
int newbits = ceph_frag_bits(f) + by;
......
......@@ -220,6 +220,7 @@ struct ceph_connection {
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
struct ceph_msg_header out_hdr;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
bool out_msg_done;
......@@ -229,7 +230,6 @@ struct ceph_connection {
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
bool out_kvec_is_msg; /* kvec refers to out_msg */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
......
......@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
struct ceph_timespec new_validity;
struct ceph_crypto_key new_session_key;
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
......@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
ceph_decode_timespec(&validity, &new_validity);
ceph_decode_timespec(&validity, dp);
dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
......@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
ceph_buffer_put(th->ticket_blob);
th->session_key = new_session_key;
th->ticket_blob = new_ticket_blob;
th->validity = new_validity;
th->secret_id = new_secret_id;
th->expires = new_expires;
th->renew_after = new_renew_after;
th->have_key = true;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
......@@ -384,6 +383,24 @@ static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th,
return -ERANGE;
}
static bool need_key(struct ceph_x_ticket_handler *th)
{
if (!th->have_key)
return true;
return get_seconds() >= th->renew_after;
}
static bool have_key(struct ceph_x_ticket_handler *th)
{
if (th->have_key) {
if (get_seconds() >= th->expires)
th->have_key = false;
}
return th->have_key;
}
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
{
int want = ac->want_keys;
......@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
continue;
th = get_ticket_handler(ac, service);
if (IS_ERR(th)) {
*pneed |= service;
continue;
}
if (get_seconds() >= th->renew_after)
if (need_key(th))
*pneed |= service;
if (get_seconds() >= th->expires)
if (!have_key(th))
xi->have_keys &= ~service;
}
}
static int ceph_x_build_request(struct ceph_auth_client *ac,
void *buf, void *end)
{
......@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
ac->private = NULL;
}
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type)
static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
{
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
memset(&th->validity, 0, sizeof(th->validity));
th->have_key = false;
}
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type)
{
/*
* We are to invalidate a service ticket in the hopes of
* getting a new, hopefully more valid, one. But, we won't get
* it unless our AUTH ticket is good, so invalidate AUTH ticket
* as well, just in case.
*/
invalidate_ticket(ac, peer_type);
invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
static int calcu_signature(struct ceph_x_authorizer *au,
......
......@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
unsigned int service;
struct ceph_crypto_key session_key;
struct ceph_timespec validity;
bool have_key;
u64 secret_id;
struct ceph_buffer *ticket_blob;
......
......@@ -23,9 +23,6 @@
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
#define list_entry_next(pos, member) \
list_entry(pos->member.next, typeof(*pos), member)
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
......@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
}
con->in_seq = 0;
con->in_seq_acked = 0;
con->out_skip = 0;
}
/*
......@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
BUG_ON(con->out_skip);
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
......@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
int index;
int index = con->out_kvec_left;
index = con->out_kvec_left;
BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
......@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
/*
* Chop off a kvec from the end. Return residual number of bytes for
* that kvec, i.e. how many bytes would have been written if the kvec
* hadn't been nuked.
*/
static int con_out_kvec_skip(struct ceph_connection *con)
{
int off = con->out_kvec_cur - con->out_kvec;
int skip = 0;
if (con->out_kvec_bytes > 0) {
skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
BUG_ON(con->out_kvec_bytes < skip);
BUG_ON(!con->out_kvec_left);
con->out_kvec_bytes -= skip;
con->out_kvec_left--;
}
return skip;
}
#ifdef CONFIG_BLOCK
/*
......@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
cursor->page = list_entry_next(cursor->page, lru);
cursor->page = list_next_entry(cursor->page, lru);
cursor->last_piece = cursor->resid <= PAGE_SIZE;
return true;
......@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
cursor->data = list_entry_next(cursor->data, links);
cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
......@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
......@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
......@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
/* fill in crc (except data pages), footer */
/* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
con->out_msg->footer.flags = 0;
memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
/* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
......@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
......@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
......@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
......@@ -2506,13 +2528,13 @@ static int try_write(struct ceph_connection *con)
more_kvec:
/* kvec data queued? */
if (con->out_skip) {
ret = write_partial_skip(con);
if (con->out_kvec_left) {
ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
if (con->out_kvec_left) {
ret = write_partial_kvec(con);
if (con->out_skip) {
ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
......@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con)
static void con_fault_finish(struct ceph_connection *con)
{
dout("%s %p\n", __func__, con);
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
if (con->auth_retry && con->ops->invalidate_authorizer) {
dout("calling invalidate_authorizer()\n");
con->ops->invalidate_authorizer(con);
if (con->auth_retry) {
dout("auth_retry %d, invalidating\n", con->auth_retry);
if (con->ops->invalidate_authorizer)
con->ops->invalidate_authorizer(con);
con->auth_retry = 0;
}
if (con->ops->fault)
......@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
dout("%s %p msg %p - was sending\n", __func__, con, msg);
con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
BUG_ON(con->out_skip);
/* footer */
if (con->out_msg_done) {
con->out_skip += con_out_kvec_skip(con);
} else {
BUG_ON(!msg->data_length);
if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
con->out_skip += sizeof(msg->footer);
else
con->out_skip += sizeof(msg->old_footer);
}
/* data, middle, front */
if (msg->data_length)
con->out_skip += msg->cursor.total_resid;
if (msg->middle)
con->out_skip += con_out_kvec_skip(con);
con->out_skip += con_out_kvec_skip(con);
dout("%s %p msg %p - was sending, will write %d skip %d\n",
__func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
con->out_msg = NULL;
ceph_msg_put(msg);
}
mutex_unlock(&con->mutex);
}
......@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m)
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
LIST_HEAD(data);
struct list_head *links;
struct list_head *next;
struct ceph_msg_data *data, *next;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
......@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref)
m->middle = NULL;
}
list_splice_init(&m->data, &data);
list_for_each_safe(links, next, &data) {
struct ceph_msg_data *data;
data = list_entry(links, struct ceph_msg_data, links);
list_del_init(links);
list_for_each_entry_safe(data, next, &m->data, links) {
list_del_init(&data->links);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
......
......@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
return monc->client->have_fsid && monc->auth->global_id > 0;
}
/*
* The monitor responds with mount ack indicate mount success. The
* included client ticket allows the client to talk to MDSs and OSDs.
*/
static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment