Commit a046d57d authored by Ursula Braun's avatar Ursula Braun Committed by David S. Miller

smc: CLC handshake (incl. preparation steps)

* CLC (Connection Layer Control) handshake
Signed-off-by: default avatarUrsula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6812baab
obj-$(CONFIG_SMC) += smc.o
smc-y := af_smc.o smc_pnet.o smc_ib.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o
This diff is collapsed.
......@@ -28,6 +28,12 @@ enum smc_state { /* possible states of an SMC socket */
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct sockaddr *addr; /* inet connect address */
struct smc_sock *listen_smc; /* listen parent */
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
bool use_fallback; /* fallback to tcp */
};
......@@ -40,4 +46,20 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */
#ifdef CONFIG_XFRM
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
return 0;
}
#endif
int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet,
u8 *prefix_len);
#endif /* __SMC_H */
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
* Copyright IBM Corp. 2016
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#include <linux/in.h>
#include <net/sock.h>
#include <net/tcp.h>
#include "smc.h"
#include "smc_clc.h"
#include "smc_ib.h"
/* Wait for data on the tcp-socket, analyze received data
* Returns:
* 0 if success and it was not a decline that we received.
* SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
* clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
*/
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type)
{
struct sock *clc_sk = smc->clcsock->sk;
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
int reason_code = 0;
struct kvec vec;
int len, datlen;
int krflags;
/* peek the first few bytes to determine length of data to receive
* so we don't consume any subsequent CLC message or payload data
* in the TCP byte stream
*/
vec.iov_base = buf;
vec.iov_len = buflen;
krflags = MSG_PEEK | MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
sizeof(struct smc_clc_msg_hdr), krflags);
if (signal_pending(current)) {
reason_code = -EINTR;
clc_sk->sk_err = EINTR;
smc->sk.sk_err = EINTR;
goto out;
}
if (clc_sk->sk_err) {
reason_code = -clc_sk->sk_err;
smc->sk.sk_err = clc_sk->sk_err;
goto out;
}
if (!len) { /* peer has performed orderly shutdown */
smc->sk.sk_err = ECONNRESET;
reason_code = -ECONNRESET;
goto out;
}
if (len < 0) {
smc->sk.sk_err = -len;
reason_code = len;
goto out;
}
datlen = ntohs(clcm->length);
if ((len < sizeof(struct smc_clc_msg_hdr)) ||
(datlen < sizeof(struct smc_clc_msg_decline)) ||
(datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
((clcm->type != SMC_CLC_DECLINE) &&
(clcm->type != expected_type))) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
}
/* receive the complete CLC message */
vec.iov_base = buf;
vec.iov_len = buflen;
memset(&msg, 0, sizeof(struct msghdr));
krflags = MSG_WAITALL;
smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
if (len < datlen) {
smc->sk.sk_err = EPROTO;
reason_code = -EPROTO;
goto out;
}
if (clcm->type == SMC_CLC_DECLINE)
reason_code = SMC_CLC_DECL_REPLY;
out:
return reason_code;
}
/* send CLC DECLINE message across internal TCP socket */
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
u8 out_of_sync)
{
struct smc_clc_msg_decline dclc;
struct msghdr msg;
struct kvec vec;
int len;
memset(&dclc, 0, sizeof(dclc));
memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
dclc.hdr.type = SMC_CLC_DECLINE;
dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
dclc.hdr.version = SMC_CLC_V1;
dclc.hdr.flag = out_of_sync ? 1 : 0;
memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
dclc.peer_diagnosis = htonl(peer_diag_info);
memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
vec.iov_base = &dclc;
vec.iov_len = sizeof(struct smc_clc_msg_decline);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
sizeof(struct smc_clc_msg_decline));
if (len < sizeof(struct smc_clc_msg_decline))
smc->sk.sk_err = EPROTO;
if (len < 0)
smc->sk.sk_err = -len;
return len;
}
/* send CLC PROPOSAL message across internal TCP socket */
int smc_clc_send_proposal(struct smc_sock *smc,
struct smc_ib_device *smcibdev,
u8 ibport)
{
struct smc_clc_msg_proposal pclc;
int reason_code = 0;
struct msghdr msg;
struct kvec vec;
int len, rc;
/* send SMC Proposal CLC message */
memset(&pclc, 0, sizeof(pclc));
memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
pclc.hdr.type = SMC_CLC_PROPOSAL;
pclc.hdr.length = htons(sizeof(pclc));
pclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1],
sizeof(smcibdev->mac[ibport - 1]));
/* determine subnet and mask from internal TCP socket */
rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
&pclc.prefix_len);
if (rc)
return SMC_CLC_DECL_CNFERR; /* configuration error */
memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
vec.iov_base = &pclc;
vec.iov_len = sizeof(pclc);
/* due to the few bytes needed for clc-handshake this cannot block */
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
if (len < sizeof(pclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
} else {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
reason_code = -smc->sk.sk_err;
}
}
return reason_code;
}
/* send CLC CONFIRM message across internal TCP socket */
int smc_clc_send_confirm(struct smc_sock *smc)
{
struct smc_clc_msg_accept_confirm cclc;
int reason_code = 0;
struct msghdr msg;
struct kvec vec;
int len;
/* send SMC Confirm CLC msg */
memset(&cclc, 0, sizeof(cclc));
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
cclc.hdr.type = SMC_CLC_CONFIRM;
cclc.hdr.length = htons(sizeof(cclc));
cclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
/* tbd in follow-on patch: fill in link-related values */
/* tbd in follow-on patch: fill in rmb-related values */
cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
vec.iov_base = &cclc;
vec.iov_len = sizeof(cclc);
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
if (len < sizeof(cclc)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
} else {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
reason_code = -smc->sk.sk_err;
}
}
return reason_code;
}
/* send CLC ACCEPT message across internal TCP socket */
int smc_clc_send_accept(struct smc_sock *new_smc)
{
struct smc_clc_msg_accept_confirm aclc;
struct msghdr msg;
struct kvec vec;
int rc = 0;
int len;
memset(&aclc, 0, sizeof(aclc));
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
aclc.hdr.type = SMC_CLC_ACCEPT;
aclc.hdr.length = htons(sizeof(aclc));
aclc.hdr.version = SMC_CLC_V1; /* SMC version */
memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
/* tbd in follow-on patch: fill in link-related values */
/* tbd in follow-on patch: fill in rmb-related values */
aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
memset(&msg, 0, sizeof(msg));
vec.iov_base = &aclc;
vec.iov_len = sizeof(aclc);
len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
if (len < sizeof(aclc)) {
if (len >= 0)
new_smc->sk.sk_err = EPROTO;
else
new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
rc = sock_error(&new_smc->sk);
}
return rc;
}
/*
* Shared Memory Communications over RDMA (SMC-R) and RoCE
*
* CLC (connection layer control) handshake over initial TCP socket to
* prepare for RDMA traffic
*
* Copyright IBM Corp. 2016
*
* Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
*/
#ifndef _SMC_CLC_H
#define _SMC_CLC_H
#include <rdma/ib_verbs.h>
#include "smc.h"
#define SMC_CLC_PROPOSAL 0x01
#define SMC_CLC_ACCEPT 0x02
#define SMC_CLC_CONFIRM 0x03
#define SMC_CLC_DECLINE 0x04
/* eye catcher "SMCR" EBCDIC for CLC messages */
static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
#define SMC_CLC_V1 0x1 /* SMC version */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
#define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */
#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */
#define SMC_CLC_DECL_IPSEC 0x03030000 /* IPsec usage */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_REPLY 0x06000000 /* reply to a received decline */
#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
struct smc_clc_msg_hdr { /* header1 of clc messages */
u8 eyecatcher[4]; /* eye catcher */
u8 type; /* proposal / accept / confirm / decline */
__be16 length;
#if defined(__BIG_ENDIAN_BITFIELD)
u8 version : 4,
flag : 1,
rsvd : 3;
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 rsvd : 3,
flag : 1,
version : 4;
#endif
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_trail { /* trailer of clc messages */
u8 eyecatcher[4];
};
struct smc_clc_msg_local { /* header2 of clc messages */
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* unique system id */
u8 gid[16]; /* gid of ib_device port */
u8 mac[6]; /* mac of ib_device port */
};
struct smc_clc_msg_proposal { /* clc proposal message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
__be16 iparea_offset; /* offset to IP address information area */
__be32 outgoing_subnet; /* subnet mask */
u8 prefix_len; /* number of significant bits in mask */
u8 reserved[2];
u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
struct smc_clc_msg_local lcl;
u8 qpn[3]; /* QP number */
__be32 rmb_rkey; /* RMB rkey */
u8 conn_idx; /* Connection index, which RMBE in RMB */
__be32 rmbe_alert_token;/* unique connection id */
#if defined(__BIG_ENDIAN_BITFIELD)
u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */
qp_mtu : 4; /* QP mtu */
#elif defined(__LITTLE_ENDIAN_BITFIELD)
u8 qp_mtu : 4,
rmbe_size : 4;
#endif
u8 reserved;
__be64 rmb_dma_addr; /* RMB virtual address */
u8 reserved2;
u8 psn[3]; /* initial packet sequence number */
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __packed; /* format defined in RFC7609 */
struct smc_clc_msg_decline { /* clc decline message */
struct smc_clc_msg_hdr hdr;
u8 id_for_peer[SMC_SYSTEMID_LEN]; /* sender peer_id */
__be32 peer_diagnosis; /* diagnosis information */
u8 reserved2[4];
struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */
} __aligned(4);
struct smc_sock;
struct smc_ib_device;
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
u8 out_of_sync);
int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev,
u8 ibport);
int smc_clc_send_confirm(struct smc_sock *smc);
int smc_clc_send_accept(struct smc_sock *smc);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment