Commit a4484705 authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-smc-rendezvous'

Ursula Braun says:

====================
TCP experimental option for SMC rendezvous

SMC-capability is to be negotiated with a TCP experimental option.
As requested during code review of our previous approach using
netfilter hooks, here's a new version. It touches tcp-code in the
first patch and exploits the new tcp flag in the smc-code.

Changelog:

V3:
* move include for linux/unaligned/access_ok.h to tcp_input.c

V2:
* switch to current jump labels API
* remove static key checking in smc_set_capability()
  (comment from Eric Dumazet)
* use inet_request_sock parameter for smc_set_option_cond()
* smc_listen_work(): replace local variable lgr_lock_taken by new labels
                     and separate this change into a prerequisite first
                     patch
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3fc27b71 c5c1cc9c
...@@ -98,7 +98,8 @@ struct tcp_options_received { ...@@ -98,7 +98,8 @@ struct tcp_options_received {
tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */ tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */
dsack : 1, /* D-SACK is scheduled */ dsack : 1, /* D-SACK is scheduled */
wscale_ok : 1, /* Wscale seen on SYN packet */ wscale_ok : 1, /* Wscale seen on SYN packet */
sack_ok : 4, /* SACK seen on SYN packet */ sack_ok : 3, /* SACK seen on SYN packet */
smc_ok : 1, /* SMC seen on SYN packet */
snd_wscale : 4, /* Window scaling received from sender */ snd_wscale : 4, /* Window scaling received from sender */
rcv_wscale : 4; /* Window scaling to send to receiver */ rcv_wscale : 4; /* Window scaling to send to receiver */
u8 num_sacks; /* Number of SACK blocks */ u8 num_sacks; /* Number of SACK blocks */
...@@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) ...@@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{ {
rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
rx_opt->wscale_ok = rx_opt->snd_wscale = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
#if IS_ENABLED(CONFIG_SMC)
rx_opt->smc_ok = 0;
#endif
} }
/* This is the max number of SACKS that we'll generate and process. It's safe /* This is the max number of SACKS that we'll generate and process. It's safe
...@@ -229,7 +233,8 @@ struct tcp_sock { ...@@ -229,7 +233,8 @@ struct tcp_sock {
syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
save_syn:1, /* Save headers of SYN packet */ save_syn:1, /* Save headers of SYN packet */
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
syn_smc:1; /* SYN includes SMC */
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
/* RTT measurement */ /* RTT measurement */
......
...@@ -92,7 +92,8 @@ struct inet_request_sock { ...@@ -92,7 +92,8 @@ struct inet_request_sock {
wscale_ok : 1, wscale_ok : 1,
ecn_ok : 1, ecn_ok : 1,
acked : 1, acked : 1,
no_srccheck: 1; no_srccheck: 1,
smc_ok : 1;
kmemcheck_bitfield_end(flags); kmemcheck_bitfield_end(flags);
u32 ir_mark; u32 ir_mark;
union { union {
......
...@@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
*/ */
#define TCPOPT_FASTOPEN_MAGIC 0xF989 #define TCPOPT_FASTOPEN_MAGIC 0xF989
#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
/* /*
* TCP option lengths * TCP option lengths
...@@ -203,6 +204,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -203,6 +204,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_MD5SIG 18 #define TCPOLEN_MD5SIG 18
#define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_FASTOPEN_BASE 2
#define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_EXP_SMC_BASE 6
/* But this is what stacks really send out. */ /* But this is what stacks really send out. */
#define TCPOLEN_TSTAMP_ALIGNED 12 #define TCPOLEN_TSTAMP_ALIGNED 12
...@@ -213,6 +215,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -213,6 +215,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERBLOCK 8 #define TCPOLEN_SACK_PERBLOCK 8
#define TCPOLEN_MD5SIG_ALIGNED 20 #define TCPOLEN_MD5SIG_ALIGNED 20
#define TCPOLEN_MSS_ALIGNED 4 #define TCPOLEN_MSS_ALIGNED 4
#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
/* Flags in tp->nonagle */ /* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
...@@ -2108,4 +2111,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) ...@@ -2108,4 +2111,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{ {
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1); return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
} }
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
#endif /* _TCP_H */ #endif /* _TCP_H */
...@@ -270,6 +270,7 @@ ...@@ -270,6 +270,7 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/errqueue.h> #include <linux/errqueue.h>
#include <linux/static_key.h>
#include <net/icmp.h> #include <net/icmp.h>
#include <net/inet_common.h> #include <net/inet_common.h>
...@@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem); ...@@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated); EXPORT_SYMBOL(tcp_memory_allocated);
#if IS_ENABLED(CONFIG_SMC)
DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
EXPORT_SYMBOL(tcp_have_smc);
#endif
/* /*
* Current number of TCP sockets. * Current number of TCP sockets.
*/ */
......
...@@ -76,6 +76,8 @@ ...@@ -76,6 +76,8 @@
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <linux/errqueue.h> #include <linux/errqueue.h>
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
#include <linux/unaligned/access_ok.h>
#include <linux/static_key.h>
int sysctl_tcp_fack __read_mostly; int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300; int sysctl_tcp_max_reordering __read_mostly = 300;
...@@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, ...@@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
foc->exp = exp_opt; foc->exp = exp_opt;
} }
static void smc_parse_options(const struct tcphdr *th,
struct tcp_options_received *opt_rx,
const unsigned char *ptr,
int opsize)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (th->syn && !(opsize & 1) &&
opsize >= TCPOLEN_EXP_SMC_BASE &&
get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
opt_rx->smc_ok = 1;
}
#endif
}
/* Look for tcp options. Normally only called on SYN and SYNACK packets. /* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when * But, this can also be called on packets in the established flow when
* the fast version below fails. * the fast version below fails.
...@@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net, ...@@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net,
tcp_parse_fastopen_option(opsize - tcp_parse_fastopen_option(opsize -
TCPOLEN_EXP_FASTOPEN_BASE, TCPOLEN_EXP_FASTOPEN_BASE,
ptr + 2, th->syn, foc, true); ptr + 2, th->syn, foc, true);
else
smc_parse_options(th, opt_rx, ptr,
opsize);
break; break;
} }
...@@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, ...@@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
return false; return false;
} }
static void smc_check_reset_syn(struct tcp_sock *tp)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc && !tp->rx_opt.smc_ok)
tp->syn_smc = 0;
}
#endif
}
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th) const struct tcphdr *th)
{ {
...@@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* is initialized. */ * is initialized. */
tp->copied_seq = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt;
smc_check_reset_syn(tp);
smp_mb(); smp_mb();
tcp_finish_connect(sk, skb); tcp_finish_connect(sk, skb);
...@@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req, ...@@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req,
ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_rmt_port = tcp_hdr(skb)->source;
ireq->ir_num = ntohs(tcp_hdr(skb)->dest); ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
ireq->ir_mark = inet_request_mark(sk, skb); ireq->ir_mark = inet_request_mark(sk, skb);
#if IS_ENABLED(CONFIG_SMC)
ireq->smc_ok = rx_opt->smc_ok;
#endif
} }
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/static_key.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/inet_common.h> #include <net/inet_common.h>
#include <net/xfrm.h> #include <net/xfrm.h>
...@@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) ...@@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
} }
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
struct request_sock *req,
struct tcp_sock *newtp)
{
#if IS_ENABLED(CONFIG_SMC)
struct inet_request_sock *ireq;
if (static_branch_unlikely(&tcp_have_smc)) {
ireq = inet_rsk(req);
if (oldtp->syn_smc && !ireq->smc_ok)
newtp->syn_smc = 0;
}
#endif
}
/* This is not only more efficient than what we used to do, it eliminates /* This is not only more efficient than what we used to do, it eliminates
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
* *
...@@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, ...@@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk = inet_csk(newsk); struct inet_connection_sock *newicsk = inet_csk(newsk);
struct tcp_sock *newtp = tcp_sk(newsk); struct tcp_sock *newtp = tcp_sk(newsk);
struct tcp_sock *oldtp = tcp_sk(sk);
smc_check_reset_syn_req(oldtp, req, newtp);
/* Now setup tcp_sock */ /* Now setup tcp_sock */
newtp->pred_flags = 0; newtp->pred_flags = 0;
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/static_key.h>
#include <trace/events/tcp.h> #include <trace/events/tcp.h>
...@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) ...@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_MD5 (1 << 2) #define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3) #define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8) #define OPTION_FAST_OPEN_COOKIE (1 << 8)
#define OPTION_SMC (1 << 9)
static void smc_options_write(__be32 *ptr, u16 *options)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (unlikely(OPTION_SMC & *options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_EXP << 8) |
(TCPOLEN_EXP_SMC_BASE));
*ptr++ = htonl(TCPOPT_SMC_MAGIC);
}
}
#endif
}
struct tcp_out_options { struct tcp_out_options {
u16 options; /* bit field of OPTION_* */ u16 options; /* bit field of OPTION_* */
...@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, ...@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
} }
ptr += (len + 3) >> 2; ptr += (len + 3) >> 2;
} }
smc_options_write(ptr, &options);
}
static void smc_set_option(const struct tcp_sock *tp,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
}
#endif
}
static void smc_set_option_cond(const struct tcp_sock *tp,
const struct inet_request_sock *ireq,
struct tcp_out_options *opts,
unsigned int *remaining)
{
#if IS_ENABLED(CONFIG_SMC)
if (static_branch_unlikely(&tcp_have_smc)) {
if (tp->syn_smc && ireq->smc_ok) {
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
opts->options |= OPTION_SMC;
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
}
}
}
#endif
} }
/* Compute TCP options for SYN packets. This is not the final /* Compute TCP options for SYN packets. This is not the final
...@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, ...@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
} }
} }
smc_set_option(tp, opts, &remaining);
return MAX_TCP_OPTION_SPACE - remaining; return MAX_TCP_OPTION_SPACE - remaining;
} }
/* Set up TCP options for SYN-ACKs. */ /* Set up TCP options for SYN-ACKs. */
static unsigned int tcp_synack_options(struct request_sock *req, static unsigned int tcp_synack_options(const struct sock *sk,
struct request_sock *req,
unsigned int mss, struct sk_buff *skb, unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts, struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5, const struct tcp_md5sig_key *md5,
...@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req, ...@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
} }
} }
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
return MAX_TCP_OPTION_SPACE - remaining; return MAX_TCP_OPTION_SPACE - remaining;
} }
...@@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif #endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
sizeof(*th); foc) + sizeof(*th);
skb_push(skb, tcp_header_size); skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb); skb_reset_transport_header(skb);
......
...@@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc) ...@@ -390,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0; int rc = 0;
u8 ibport; u8 ibport;
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
goto out_connected;
}
/* IPSec connections opt out of SMC-R optimizations */ /* IPSec connections opt out of SMC-R optimizations */
if (using_ipsec(smc)) { if (using_ipsec(smc)) {
reason_code = SMC_CLC_DECL_IPSEC; reason_code = SMC_CLC_DECL_IPSEC;
...@@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, ...@@ -555,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
} }
smc_copy_sock_settings_to_clc(smc); smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_connect(smc->clcsock, addr, alen, flags); rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc) if (rc)
goto out; goto out;
...@@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work) ...@@ -759,6 +766,12 @@ static void smc_listen_work(struct work_struct *work)
u8 prefix_len; u8 prefix_len;
u8 ibport; u8 ibport;
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
new_smc->use_fallback = true;
goto out_connected;
}
/* do inband token exchange - /* do inband token exchange -
*wait for and receive SMC Proposal CLC message *wait for and receive SMC Proposal CLC message
*/ */
...@@ -808,7 +821,7 @@ static void smc_listen_work(struct work_struct *work) ...@@ -808,7 +821,7 @@ static void smc_listen_work(struct work_struct *work)
rc = local_contact; rc = local_contact;
if (rc == -ENOMEM) if (rc == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
goto decline_rdma; goto decline_rdma_unlock;
} }
link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
...@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work) ...@@ -816,7 +829,7 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_buf_create(new_smc); rc = smc_buf_create(new_smc);
if (rc) { if (rc) {
reason_code = SMC_CLC_DECL_MEM; reason_code = SMC_CLC_DECL_MEM;
goto decline_rdma; goto decline_rdma_unlock;
} }
smc_close_init(new_smc); smc_close_init(new_smc);
...@@ -831,7 +844,7 @@ static void smc_listen_work(struct work_struct *work) ...@@ -831,7 +844,7 @@ static void smc_listen_work(struct work_struct *work)
buf_desc->mr_rx[SMC_SINGLE_LINK]); buf_desc->mr_rx[SMC_SINGLE_LINK]);
if (rc) { if (rc) {
reason_code = SMC_CLC_DECL_INTERR; reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma; goto decline_rdma_unlock;
} }
} }
} }
...@@ -839,15 +852,15 @@ static void smc_listen_work(struct work_struct *work) ...@@ -839,15 +852,15 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_clc_send_accept(new_smc, local_contact); rc = smc_clc_send_accept(new_smc, local_contact);
if (rc) if (rc)
goto out_err; goto out_err_unlock;
/* receive SMC Confirm CLC message */ /* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
SMC_CLC_CONFIRM); SMC_CLC_CONFIRM);
if (reason_code < 0) if (reason_code < 0)
goto out_err; goto out_err_unlock;
if (reason_code > 0) if (reason_code > 0)
goto decline_rdma; goto decline_rdma_unlock;
smc_conn_save_peer_info(new_smc, &cclc); smc_conn_save_peer_info(new_smc, &cclc);
if (local_contact == SMC_FIRST_CONTACT) if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, &cclc); smc_link_save_peer_info(link, &cclc);
...@@ -855,34 +868,34 @@ static void smc_listen_work(struct work_struct *work) ...@@ -855,34 +868,34 @@ static void smc_listen_work(struct work_struct *work)
rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc);
if (rc) { if (rc) {
reason_code = SMC_CLC_DECL_INTERR; reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma; goto decline_rdma_unlock;
} }
if (local_contact == SMC_FIRST_CONTACT) { if (local_contact == SMC_FIRST_CONTACT) {
rc = smc_ib_ready_link(link); rc = smc_ib_ready_link(link);
if (rc) { if (rc) {
reason_code = SMC_CLC_DECL_INTERR; reason_code = SMC_CLC_DECL_INTERR;
goto decline_rdma; goto decline_rdma_unlock;
} }
/* QP confirmation over RoCE fabric */ /* QP confirmation over RoCE fabric */
reason_code = smc_serv_conf_first_link(new_smc); reason_code = smc_serv_conf_first_link(new_smc);
if (reason_code < 0) { if (reason_code < 0) {
/* peer is not aware of a problem */ /* peer is not aware of a problem */
rc = reason_code; rc = reason_code;
goto out_err; goto out_err_unlock;
} }
if (reason_code > 0) if (reason_code > 0)
goto decline_rdma; goto decline_rdma_unlock;
} }
smc_tx_init(new_smc); smc_tx_init(new_smc);
mutex_unlock(&smc_create_lgr_pending);
out_connected: out_connected:
sk_refcnt_debug_inc(newsmcsk); sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT) if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE; newsmcsk->sk_state = SMC_ACTIVE;
enqueue: enqueue:
mutex_unlock(&smc_create_lgr_pending);
lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) { if (lsmc->sk.sk_state == SMC_LISTEN) {
smc_accept_enqueue(&lsmc->sk, newsmcsk); smc_accept_enqueue(&lsmc->sk, newsmcsk);
...@@ -896,6 +909,8 @@ static void smc_listen_work(struct work_struct *work) ...@@ -896,6 +909,8 @@ static void smc_listen_work(struct work_struct *work)
sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
return; return;
decline_rdma_unlock:
mutex_unlock(&smc_create_lgr_pending);
decline_rdma: decline_rdma:
/* RDMA setup failed, switch back to TCP */ /* RDMA setup failed, switch back to TCP */
smc_conn_free(&new_smc->conn); smc_conn_free(&new_smc->conn);
...@@ -907,6 +922,8 @@ static void smc_listen_work(struct work_struct *work) ...@@ -907,6 +922,8 @@ static void smc_listen_work(struct work_struct *work)
} }
goto out_connected; goto out_connected;
out_err_unlock:
mutex_unlock(&smc_create_lgr_pending);
out_err: out_err:
newsmcsk->sk_state = SMC_CLOSED; newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn); smc_conn_free(&new_smc->conn);
...@@ -963,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog) ...@@ -963,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog)
* them to the clc socket -- copy smc socket options to clc socket * them to the clc socket -- copy smc socket options to clc socket
*/ */
smc_copy_sock_settings_to_clc(smc); smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
rc = kernel_listen(smc->clcsock, backlog); rc = kernel_listen(smc->clcsock, backlog);
if (rc) if (rc)
...@@ -1405,6 +1423,7 @@ static int __init smc_init(void) ...@@ -1405,6 +1423,7 @@ static int __init smc_init(void)
goto out_sock; goto out_sock;
} }
static_branch_enable(&tcp_have_smc);
return 0; return 0;
out_sock: out_sock:
...@@ -1429,6 +1448,7 @@ static void __exit smc_exit(void) ...@@ -1429,6 +1448,7 @@ static void __exit smc_exit(void)
list_del_init(&lgr->list); list_del_init(&lgr->list);
smc_lgr_free(lgr); /* free link group */ smc_lgr_free(lgr); /* free link group */
} }
static_branch_disable(&tcp_have_smc);
smc_ib_unregister_client(); smc_ib_unregister_client();
sock_unregister(PF_SMC); sock_unregister(PF_SMC);
proto_unregister(&smc_proto); proto_unregister(&smc_proto);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment