Commit e779137a authored by Andy Grover's avatar Andy Grover

RDS: break out rdma and data ops into nested structs in rds_message

Clearly separate rdma-related variables in rm from data-related ones.
This is in anticipation of adding atomic support.
Signed-off-by: default avatarAndy Grover <andy.grover@oracle.com>
parent 8690bfa1
......@@ -83,11 +83,11 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
ib_dma_unmap_sg(ic->i_cm_id->device,
rm->m_sg, rm->m_nents,
DMA_TO_DEVICE);
rm->data.m_sg, rm->data.m_nents,
DMA_TO_DEVICE);
if (rm->m_rdma_op) {
rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
if (rm->rdma.m_rdma_op) {
rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
/* If the user asked for a completion notification on this
* message, we can implement three different semantics:
......@@ -111,10 +111,10 @@ static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
*/
rds_ib_send_rdma_complete(rm, wc_status);
if (rm->m_rdma_op->r_write)
rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
if (rm->rdma.m_rdma_op->r_write)
rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
else
rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
}
/* If anyone waited for this message to get flushed out, wake
......@@ -244,8 +244,8 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
rm = rds_send_get_message(conn, send->s_op);
if (rm) {
if (rm->m_rdma_op)
rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
if (rm->rdma.m_rdma_op)
rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
rds_ib_send_rdma_complete(rm, wc.status);
rds_message_put(rm);
}
......@@ -532,18 +532,20 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags,
be32_to_cpu(rm->m_inc.i_hdr.h_len));
*/
if (rm->m_nents) {
rm->m_count = ib_dma_map_sg(dev,
rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
if (rm->m_count == 0) {
if (rm->data.m_nents) {
rm->data.m_count = ib_dma_map_sg(dev,
rm->data.m_sg,
rm->data.m_nents,
DMA_TO_DEVICE);
rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
if (rm->data.m_count == 0) {
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
ret = -ENOMEM; /* XXX ? */
goto out;
}
} else {
rm->m_count = 0;
rm->data.m_count = 0;
}
ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
......@@ -559,10 +561,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
/* If it has a RDMA op, tell the peer we did it. This is
* used by the peer to release use-once RDMA MRs. */
if (rm->m_rdma_op) {
if (rm->rdma.m_rdma_op) {
struct rds_ext_header_rdma ext_hdr;
ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
}
......@@ -590,7 +592,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos];
first = send;
prev = NULL;
scat = &rm->m_sg[sg];
scat = &rm->data.m_sg[sg];
sent = 0;
i = 0;
......@@ -600,7 +602,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
* or when requested by the user. Right now, we let
* the application choose.
*/
if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
send_flags = IB_SEND_FENCE;
/*
......@@ -619,7 +621,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
}
/* if there's data reference it with a chain of work reqs */
for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
unsigned int len;
send = &ic->i_sends[pos];
......@@ -697,7 +699,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
sent += sizeof(struct rds_header);
/* if we finished the message then send completion owns it */
if (scat == &rm->m_sg[rm->m_count]) {
if (scat == &rm->data.m_sg[rm->data.m_count]) {
prev->s_rm = ic->i_rm;
prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
......
......@@ -83,11 +83,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
ib_dma_unmap_sg(ic->i_cm_id->device,
rm->m_sg, rm->m_nents,
rm->data.m_sg, rm->data.m_nents,
DMA_TO_DEVICE);
if (rm->m_rdma_op) {
rds_iw_send_unmap_rdma(ic, rm->m_rdma_op);
if (rm->rdma.m_rdma_op) {
rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
/* If the user asked for a completion notification on this
* message, we can implement three different semantics:
......@@ -111,10 +111,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
*/
rds_iw_send_rdma_complete(rm, wc_status);
if (rm->m_rdma_op->r_write)
rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
if (rm->rdma.m_rdma_op->r_write)
rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
else
rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
}
/* If anyone waited for this message to get flushed out, wake
......@@ -563,18 +563,20 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags,
be32_to_cpu(rm->m_inc.i_hdr.h_len));
*/
if (rm->m_nents) {
rm->m_count = ib_dma_map_sg(dev,
rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
if (rm->m_count == 0) {
if (rm->data.m_nents) {
rm->data.m_count = ib_dma_map_sg(dev,
rm->data.m_sg,
rm->data.m_nents,
DMA_TO_DEVICE);
rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
if (rm->data.m_count == 0) {
rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
ret = -ENOMEM; /* XXX ? */
goto out;
}
} else {
rm->m_count = 0;
rm->data.m_count = 0;
}
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
......@@ -590,10 +592,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
/* If it has a RDMA op, tell the peer we did it. This is
* used by the peer to release use-once RDMA MRs. */
if (rm->m_rdma_op) {
if (rm->rdma.m_rdma_op) {
struct rds_ext_header_rdma ext_hdr;
ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
}
......@@ -621,7 +623,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
send = &ic->i_sends[pos];
first = send;
prev = NULL;
scat = &rm->m_sg[sg];
scat = &rm->data.m_sg[sg];
sent = 0;
i = 0;
......@@ -631,7 +633,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
* or when requested by the user. Right now, we let
* the application choose.
*/
if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
send_flags = IB_SEND_FENCE;
/*
......@@ -650,7 +652,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
}
/* if there's data reference it with a chain of work reqs */
for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
unsigned int len;
send = &ic->i_sends[pos];
......@@ -728,7 +730,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
sent += sizeof(struct rds_header);
/* if we finished the message then send completion owns it */
if (scat == &rm->m_sg[rm->m_count]) {
if (scat == &rm->data.m_sg[rm->data.m_count]) {
prev->s_rm = ic->i_rm;
prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
......
......@@ -63,17 +63,17 @@ static void rds_message_purge(struct rds_message *rm)
if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
return;
for (i = 0; i < rm->m_nents; i++) {
rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i]));
for (i = 0; i < rm->data.m_nents; i++) {
rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.m_sg[i]));
/* XXX will have to put_page for page refs */
__free_page(sg_page(&rm->m_sg[i]));
__free_page(sg_page(&rm->data.m_sg[i]));
}
rm->m_nents = 0;
rm->data.m_nents = 0;
if (rm->m_rdma_op)
rds_rdma_free_op(rm->m_rdma_op);
if (rm->m_rdma_mr)
rds_mr_put(rm->m_rdma_mr);
if (rm->rdma.m_rdma_op)
rds_rdma_free_op(rm->rdma.m_rdma_op);
if (rm->rdma.m_rdma_mr)
rds_mr_put(rm->rdma.m_rdma_mr);
}
void rds_message_inc_purge(struct rds_incoming *inc)
......@@ -224,7 +224,7 @@ struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp)
goto out;
if (nents)
sg_init_table(rm->m_sg, nents);
sg_init_table(rm->data.m_sg, nents);
atomic_set(&rm->m_refcount, 1);
INIT_LIST_HEAD(&rm->m_sock_item);
INIT_LIST_HEAD(&rm->m_conn_item);
......@@ -245,10 +245,10 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
rm->m_nents = ceil(total_len, PAGE_SIZE);
rm->data.m_nents = ceil(total_len, PAGE_SIZE);
for (i = 0; i < rm->m_nents; ++i) {
sg_set_page(&rm->m_sg[i],
for (i = 0; i < rm->data.m_nents; ++i) {
sg_set_page(&rm->data.m_sg[i],
virt_to_page(page_addrs[i]),
PAGE_SIZE, 0);
}
......@@ -278,7 +278,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
/*
* now allocate and copy in the data payload.
*/
sg = rm->m_sg;
sg = rm->data.m_sg;
iov = first_iov;
iov_off = 0;
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
......@@ -289,7 +289,7 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
GFP_HIGHUSER);
if (ret)
goto out;
rm->m_nents++;
rm->data.m_nents++;
sg_off = 0;
}
......@@ -348,7 +348,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
iov = first_iov;
iov_off = 0;
sg = rm->m_sg;
sg = rm->data.m_sg;
vec_off = 0;
copied = 0;
......
......@@ -643,14 +643,14 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
struct rds_rdma_op *op;
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
rm->m_rdma_op)
rm->rdma.m_rdma_op)
return -EINVAL;
op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
if (IS_ERR(op))
return PTR_ERR(op);
rds_stats_inc(s_send_rdma);
rm->m_rdma_op = op;
rm->rdma.m_rdma_op = op;
return 0;
}
......@@ -679,6 +679,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
*/
r_key = rds_rdma_cookie_key(rm->m_rdma_cookie);
spin_lock_irqsave(&rs->rs_rdma_lock, flags);
mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
if (!mr)
......@@ -689,7 +690,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
if (mr) {
mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
rm->m_rdma_mr = mr;
rm->rdma.m_rdma_mr = mr;
}
return err;
}
......@@ -707,5 +708,5 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
rm->m_rdma_cookie != 0)
return -EINVAL;
return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr);
return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr);
}
......@@ -259,12 +259,18 @@ struct rds_message {
*/
spinlock_t m_rs_lock;
struct rds_sock *m_rs;
struct rds_rdma_op *m_rdma_op;
rds_rdma_cookie_t m_rdma_cookie;
struct rds_mr *m_rdma_mr;
unsigned int m_nents;
unsigned int m_count;
struct scatterlist m_sg[0];
struct {
struct {
struct rds_rdma_op *m_rdma_op;
struct rds_mr *m_rdma_mr;
} rdma;
struct {
unsigned int m_nents;
unsigned int m_count;
struct scatterlist m_sg[0];
} data;
};
};
/*
......
......@@ -166,7 +166,7 @@ int rds_send_xmit(struct rds_connection *conn)
rm = conn->c_xmit_rm;
if (rm &&
conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
conn->c_xmit_sg == rm->m_nents) {
conn->c_xmit_sg == rm->data.m_nents) {
conn->c_xmit_rm = NULL;
conn->c_xmit_sg = 0;
conn->c_xmit_hdr_off = 0;
......@@ -236,7 +236,7 @@ int rds_send_xmit(struct rds_connection *conn)
* connection.
* Therefore, we never retransmit messages with RDMA ops.
*/
if (rm->m_rdma_op &&
if (rm->rdma.m_rdma_op &&
test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
spin_lock_irqsave(&conn->c_lock, flags);
if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
......@@ -268,8 +268,8 @@ int rds_send_xmit(struct rds_connection *conn)
* keep this simple and require that the transport either
* send the whole rdma or none of it.
*/
if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
if (rm->rdma.m_rdma_op && !conn->c_xmit_rdma_sent) {
ret = conn->c_trans->xmit_rdma(conn, rm->rdma.m_rdma_op);
if (ret)
break;
conn->c_xmit_rdma_sent = 1;
......@@ -279,7 +279,7 @@ int rds_send_xmit(struct rds_connection *conn)
}
if (conn->c_xmit_hdr_off < sizeof(struct rds_header) ||
conn->c_xmit_sg < rm->m_nents) {
conn->c_xmit_sg < rm->data.m_nents) {
ret = conn->c_trans->xmit(conn, rm,
conn->c_xmit_hdr_off,
conn->c_xmit_sg,
......@@ -295,7 +295,7 @@ int rds_send_xmit(struct rds_connection *conn)
ret -= tmp;
}
sg = &rm->m_sg[conn->c_xmit_sg];
sg = &rm->data.m_sg[conn->c_xmit_sg];
while (ret) {
tmp = min_t(int, ret, sg->length -
conn->c_xmit_data_off);
......@@ -306,7 +306,7 @@ int rds_send_xmit(struct rds_connection *conn)
sg++;
conn->c_xmit_sg++;
BUG_ON(ret != 0 &&
conn->c_xmit_sg == rm->m_nents);
conn->c_xmit_sg == rm->data.m_nents);
}
}
}
......@@ -419,7 +419,7 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
spin_lock_irqsave(&rm->m_rs_lock, flags);
ro = rm->m_rdma_op;
ro = rm->rdma.m_rdma_op;
if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
ro && ro->r_notify && ro->r_notifier) {
notifier = ro->r_notifier;
......@@ -453,7 +453,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
{
struct rds_rdma_op *ro;
ro = rm->m_rdma_op;
ro = rm->rdma.m_rdma_op;
if (ro && ro->r_notify && ro->r_notifier) {
ro->r_notifier->n_status = status;
list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
......@@ -477,7 +477,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
spin_lock_irqsave(&conn->c_lock, flags);
list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
if (rm->m_rdma_op == op) {
if (rm->rdma.m_rdma_op == op) {
atomic_inc(&rm->m_refcount);
found = rm;
goto out;
......@@ -485,7 +485,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
}
list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
if (rm->m_rdma_op == op) {
if (rm->rdma.m_rdma_op == op) {
atomic_inc(&rm->m_refcount);
found = rm;
break;
......@@ -545,7 +545,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
spin_lock(&rs->rs_lock);
if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
struct rds_rdma_op *ro = rm->m_rdma_op;
struct rds_rdma_op *ro = rm->rdma.m_rdma_op;
struct rds_notifier *notifier;
list_del_init(&rm->m_sock_item);
......@@ -557,7 +557,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
&rs->rs_notify_queue);
if (!notifier->n_status)
notifier->n_status = status;
rm->m_rdma_op->r_notifier = NULL;
rm->rdma.m_rdma_op->r_notifier = NULL;
}
was_on_sock = 1;
rm->m_rs = NULL;
......@@ -874,11 +874,11 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
if (ret)
goto out;
if ((rm->m_rdma_cookie || rm->m_rdma_op) &&
if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op) &&
!conn->c_trans->xmit_rdma) {
if (printk_ratelimit())
printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
rm->m_rdma_op, conn->c_trans->xmit_rdma);
rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
ret = -EOPNOTSUPP;
goto out;
}
......
......@@ -166,21 +166,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
goto out;
}
while (sg < rm->m_nents) {
while (sg < rm->data.m_nents) {
ret = tc->t_sock->ops->sendpage(tc->t_sock,
sg_page(&rm->m_sg[sg]),
rm->m_sg[sg].offset + off,
rm->m_sg[sg].length - off,
sg_page(&rm->data.m_sg[sg]),
rm->data.m_sg[sg].offset + off,
rm->data.m_sg[sg].length - off,
MSG_DONTWAIT|MSG_NOSIGNAL);
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]),
rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off,
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.m_sg[sg]),
rm->data.m_sg[sg].offset + off, rm->data.m_sg[sg].length - off,
ret);
if (ret <= 0)
break;
off += ret;
done += ret;
if (off == rm->m_sg[sg].length) {
if (off == rm->data.m_sg[sg].length) {
off = 0;
sg++;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment