Commit 042a4197 authored by David S. Miller's avatar David S. Miller

Merge branch 'for_net-next-5.1/rds-tos-v4' of...

Merge branch 'for_net-next-5.1/rds-tos-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux

Santosh Shilimkar says:

====================
rds: add tos support

RDS applications make use of tos to classify database traffic.
This feature has been used in shipping products from 2.6.32 based
kernels. Its tied with RDS v4.1 protocol version and the compatibility
gets negotiated as part of connections setup.

Patchset keeps full backward compatibility using existing connection
negotiation scheme. Currently the feature is exploited by RDMA
transport and for TCP transport the user tos values are mapped to
same default class (0).

For RDMA transports, RDMA CM service type API is used to
set up different SL(service lanes) and the IB fabric is configured
for tos mapping using Subnet Manager(SL to VL mappings).
Similarly for ROCE fabric, user priority is mapped with different
DSCP code points which are associated with different switch queues
in the fabric.

The original code was developed by Bang Nguyen in downstream kernel back in
2.6.32 kernel days and it has evolved significantly over period of time.

Thanks to Yanjun for doing testing with various combinations of host like
v3.1<->v4.1, v4.1.<->v3.1, v4.1 upstream to shipping v4.1 etc etc
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e90b1fd8 fd261ce6
......@@ -69,6 +69,12 @@
#define RDS_TRANS_COUNT 3
#define RDS_TRANS_NONE (~0)
/* IOCTLS commands for SOL_RDS */
#define SIOCRDSSETTOS (SIOCPROTOPRIVATE)
#define SIOCRDSGETTOS (SIOCPROTOPRIVATE + 1)
typedef __u8 rds_tos_t;
/*
* Control message types for SOL_RDS.
*
......@@ -149,6 +155,7 @@ struct rds_info_connection {
__be32 faddr;
__u8 transport[TRANSNAMSIZ]; /* null term ascii */
__u8 flags;
__u8 tos;
} __attribute__((packed));
struct rds6_info_connection {
......@@ -171,6 +178,7 @@ struct rds_info_message {
__be16 lport;
__be16 fport;
__u8 flags;
__u8 tos;
} __attribute__((packed));
struct rds6_info_message {
......@@ -214,6 +222,7 @@ struct rds_info_tcp_socket {
__u32 last_sent_nxt;
__u32 last_expected_una;
__u32 last_seen_una;
__u8 tos;
} __attribute__((packed));
struct rds6_info_tcp_socket {
......@@ -240,6 +249,7 @@ struct rds_info_rdma_connection {
__u32 max_send_sge;
__u32 rdma_mr_max;
__u32 rdma_mr_size;
__u8 tos;
};
struct rds6_info_rdma_connection {
......@@ -253,6 +263,7 @@ struct rds6_info_rdma_connection {
__u32 max_send_sge;
__u32 rdma_mr_max;
__u32 rdma_mr_size;
__u8 tos;
};
/* RDS message Receive Path Latency points */
......
......@@ -254,7 +254,40 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -ENOIOCTLCMD;
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
rds_tos_t utos, tos = 0;
switch (cmd) {
case SIOCRDSSETTOS:
if (get_user(utos, (rds_tos_t __user *)arg))
return -EFAULT;
if (rs->rs_transport &&
rs->rs_transport->get_tos_map)
tos = rs->rs_transport->get_tos_map(utos);
else
return -ENOIOCTLCMD;
spin_lock_bh(&rds_sock_lock);
if (rs->rs_tos || rs->rs_conn) {
spin_unlock_bh(&rds_sock_lock);
return -EINVAL;
}
rs->rs_tos = tos;
spin_unlock_bh(&rds_sock_lock);
break;
case SIOCRDSGETTOS:
spin_lock_bh(&rds_sock_lock);
tos = rs->rs_tos;
spin_unlock_bh(&rds_sock_lock);
if (put_user(tos, (rds_tos_t __user *)arg))
return -EFAULT;
break;
default:
return -ENOIOCTLCMD;
}
return 0;
}
static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
......@@ -650,6 +683,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
spin_lock_init(&rs->rs_rdma_lock);
rs->rs_rdma_keys = RB_ROOT;
rs->rs_rx_traces = 0;
rs->rs_tos = 0;
rs->rs_conn = NULL;
spin_lock_bh(&rds_sock_lock);
list_add_tail(&rs->rs_item, &rds_sock_list);
......
......@@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
int dev_if)
u8 tos, int dev_if)
{
struct rds_connection *conn, *ret = NULL;
......@@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
ipv6_addr_equal(&conn->c_laddr, laddr) &&
conn->c_trans == trans &&
conn->c_tos == tos &&
net == rds_conn_net(conn) &&
conn->c_dev_if == dev_if) {
ret = conn;
......@@ -139,6 +140,7 @@ static void __rds_conn_path_init(struct rds_connection *conn,
atomic_set(&cp->cp_state, RDS_CONN_DOWN);
cp->cp_send_gen = 0;
cp->cp_reconnect_jiffies = 0;
cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker);
INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker);
INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker);
......@@ -159,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
gfp_t gfp,
gfp_t gfp, u8 tos,
int is_outgoing,
int dev_if)
{
......@@ -171,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
if (conn &&
conn->c_loopback &&
conn->c_trans != &rds_loop_transport &&
......@@ -205,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
conn->c_faddr = *faddr;
conn->c_dev_if = dev_if;
conn->c_tos = tos;
#if IS_ENABLED(CONFIG_IPV6)
/* If the local address is link local, set c_bound_if to be the
......@@ -297,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_connection *found;
found = rds_conn_lookup(net, head, laddr, faddr, trans,
dev_if);
tos, dev_if);
if (found) {
struct rds_conn_path *cp;
int i;
......@@ -332,10 +335,10 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans, gfp_t gfp,
int dev_if)
struct rds_transport *trans, u8 tos,
gfp_t gfp, int dev_if)
{
return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create);
......@@ -343,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
gfp_t gfp, int dev_if)
u8 tos, gfp_t gfp, int dev_if)
{
return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
......
......@@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
iinfo->src_addr = conn->c_laddr.s6_addr32[3];
iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
iinfo->tos = conn->c_tos;
memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
......@@ -514,6 +515,15 @@ void rds_ib_exit(void)
rds_ib_mr_exit();
}
static u8 rds_ib_get_tos_map(u8 tos)
{
/* 1:1 user to transport map for RDMA transport.
* In future, if custom map is desired, hook can export
* user configurable map.
*/
return tos;
}
struct rds_transport rds_ib_transport = {
.laddr_check = rds_ib_laddr_check,
.xmit_path_complete = rds_ib_xmit_path_complete,
......@@ -536,6 +546,7 @@ struct rds_transport rds_ib_transport = {
.sync_mr = rds_ib_sync_mr,
.free_mr = rds_ib_free_mr,
.flush_mrs = rds_ib_flush_mrs,
.get_tos_map = rds_ib_get_tos_map,
.t_owner = THIS_MODULE,
.t_name = "infiniband",
.t_unloading = rds_ib_is_unloading,
......
......@@ -67,7 +67,9 @@ struct rds_ib_conn_priv_cmn {
u8 ricpc_protocol_major;
u8 ricpc_protocol_minor;
__be16 ricpc_protocol_minor_mask; /* bitmask */
__be32 ricpc_reserved1;
u8 ricpc_dp_toss;
u8 ripc_reserved1;
__be16 ripc_reserved2;
__be64 ricpc_ack_seq;
__be32 ricpc_credit; /* non-zero enables flow ctl */
};
......
......@@ -133,23 +133,24 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
rds_ib_set_flow_control(conn, be32_to_cpu(credit));
}
if (conn->c_version < RDS_PROTOCOL(3, 1)) {
pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
&conn->c_laddr, &conn->c_faddr,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version));
set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);
rds_conn_destroy(conn);
return;
} else {
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c> version %u.%u%s\n",
ic->i_active_side ? "Active" : "Passive",
&conn->c_laddr, &conn->c_faddr,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version),
ic->i_flowctl ? ", flow control" : "");
if (conn->c_version < RDS_PROTOCOL_VERSION) {
if (conn->c_version != RDS_PROTOCOL_COMPAT_VERSION) {
pr_notice("RDS/IB: Connection <%pI6c,%pI6c> version %u.%u no longer supported\n",
&conn->c_laddr, &conn->c_faddr,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version));
rds_conn_destroy(conn);
return;
}
}
pr_notice("RDS/IB: %s conn connected <%pI6c,%pI6c,%d> version %u.%u%s\n",
ic->i_active_side ? "Active" : "Passive",
&conn->c_laddr, &conn->c_faddr, conn->c_tos,
RDS_PROTOCOL_MAJOR(conn->c_version),
RDS_PROTOCOL_MINOR(conn->c_version),
ic->i_flowctl ? ", flow control" : "");
atomic_set(&ic->i_cq_quiesce, 0);
/* Init rings and fill recv. this needs to wait until protocol
......@@ -184,6 +185,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
NULL);
}
conn->c_proposed_version = conn->c_version;
rds_connect_complete(conn);
}
......@@ -220,6 +222,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
dp->ricp_v6.dp_ack_seq =
cpu_to_be64(rds_ib_piggyb_ack(ic));
dp->ricp_v6.dp_cmn.ricpc_dp_toss = conn->c_tos;
conn_param->private_data = &dp->ricp_v6;
conn_param->private_data_len = sizeof(dp->ricp_v6);
......@@ -234,6 +237,7 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
dp->ricp_v4.dp_ack_seq =
cpu_to_be64(rds_ib_piggyb_ack(ic));
dp->ricp_v4.dp_cmn.ricpc_dp_toss = conn->c_tos;
conn_param->private_data = &dp->ricp_v4;
conn_param->private_data_len = sizeof(dp->ricp_v4);
......@@ -389,10 +393,9 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
break;
default:
rdsdebug("Fatal QP Event %u (%s) "
"- connection %pI6c->%pI6c, reconnecting\n",
event->event, ib_event_msg(event->event),
&conn->c_laddr, &conn->c_faddr);
rdsdebug("Fatal QP Event %u (%s) - connection %pI6c->%pI6c, reconnecting\n",
event->event, ib_event_msg(event->event),
&conn->c_laddr, &conn->c_faddr);
rds_conn_drop(conn);
break;
}
......@@ -660,13 +663,16 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
/* Even if len is crap *now* I still want to check it. -ASG */
if (event->param.conn.private_data_len < data_len || major == 0)
return RDS_PROTOCOL_3_0;
return RDS_PROTOCOL_4_0;
common = be16_to_cpu(mask) & RDS_IB_SUPPORTED_PROTOCOLS;
if (major == 3 && common) {
version = RDS_PROTOCOL_3_0;
if (major == 4 && common) {
version = RDS_PROTOCOL_4_0;
while ((common >>= 1) != 0)
version++;
} else if (RDS_PROTOCOL_COMPAT_VERSION ==
RDS_PROTOCOL(major, minor)) {
version = RDS_PROTOCOL_COMPAT_VERSION;
} else {
if (isv6)
printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI6c using incompatible protocol version %u.%u\n",
......@@ -729,8 +735,10 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
/* Check whether the remote protocol version matches ours. */
version = rds_ib_protocol_compatible(event, isv6);
if (!version)
if (!version) {
err = RDS_RDMA_REJ_INCOMPAT;
goto out;
}
dp = event->param.conn.private_data;
if (isv6) {
......@@ -771,15 +779,16 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
daddr6 = &d_mapped_addr;
}
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid "
"0x%llx\n", saddr6, daddr6,
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
rdsdebug("saddr %pI6c daddr %pI6c RDSv%u.%u lguid 0x%llx fguid 0x%llx, tos:%d\n",
saddr6, daddr6, RDS_PROTOCOL_MAJOR(version),
RDS_PROTOCOL_MINOR(version),
(unsigned long long)be64_to_cpu(lguid),
(unsigned long long)be64_to_cpu(fguid));
(unsigned long long)be64_to_cpu(fguid), dp_cmn->ricpc_dp_toss);
/* RDS/IB is not currently netns aware, thus init_net */
conn = rds_conn_create(&init_net, daddr6, saddr6,
&rds_ib_transport, GFP_KERNEL, ifindex);
&rds_ib_transport, dp_cmn->ricpc_dp_toss,
GFP_KERNEL, ifindex);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
......@@ -846,7 +855,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
if (conn)
mutex_unlock(&conn->c_cm_lock);
if (err)
rdma_reject(cm_id, NULL, 0);
rdma_reject(cm_id, &err, sizeof(int));
return destroy;
}
......@@ -861,7 +870,7 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
/* If the peer doesn't do protocol negotiation, we must
* default to RDSv3.0 */
rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
rds_ib_set_protocol(conn, RDS_PROTOCOL_4_1);
ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
ret = rds_ib_setup_qp(conn);
......@@ -870,7 +879,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6)
goto out;
}
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
rds_ib_cm_fill_conn_param(conn, &conn_param, &dp,
conn->c_proposed_version,
UINT_MAX, UINT_MAX, isv6);
ret = rdma_connect(cm_id, &conn_param);
if (ret)
......
......@@ -986,9 +986,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
} else {
/* We expect errors as the qp is drained during shutdown */
if (rds_conn_up(conn) || rds_conn_connecting(conn))
rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
wc->status,
conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
......
......@@ -305,8 +305,9 @@ void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
/* We expect errors as the qp is drained during shutdown */
if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c> had status %u (%s), disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr, wc->status,
rds_ib_conn_error(conn, "send completion on <%pI6c,%pI6c,%d> had status %u (%s), disconnecting and reconnecting\n",
&conn->c_laddr, &conn->c_faddr,
conn->c_tos, wc->status,
ib_wc_status_msg(wc->status));
}
}
......
......@@ -51,6 +51,8 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
struct rds_connection *conn = cm_id->context;
struct rds_transport *trans;
int ret = 0;
int *err;
u8 len;
rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
event->event, rdma_event_msg(event->event));
......@@ -81,6 +83,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_ADDR_RESOLVED:
rdma_set_service_type(cm_id, conn->c_tos);
/* XXX do we need to clean up if this fails? */
ret = rdma_resolve_route(cm_id,
RDS_RDMA_RESOLVE_TIMEOUT_MS);
......@@ -106,8 +109,19 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
break;
case RDMA_CM_EVENT_REJECTED:
if (!conn)
break;
err = (int *)rdma_consumer_reject_data(cm_id, event, &len);
if (!err || (err && ((*err) == RDS_RDMA_REJ_INCOMPAT))) {
pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
&conn->c_laddr, &conn->c_faddr);
conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
conn->c_tos = 0;
rds_conn_drop(conn);
}
rdsdebug("Connection rejected: %s\n",
rdma_reject_msg(cm_id, event->status));
break;
/* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
......
......@@ -11,6 +11,12 @@
#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
/* Below reject reason is for legacy interoperability issue with non-linux
* RDS endpoints where older version incompatibility is conveyed via value 1.
* For future version(s), proper encoded reject reason should be be used.
*/
#define RDS_RDMA_REJ_INCOMPAT 1
int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
......
......@@ -19,10 +19,13 @@
*/
#define RDS_PROTOCOL_3_0 0x0300
#define RDS_PROTOCOL_3_1 0x0301
#define RDS_PROTOCOL_4_0 0x0400
#define RDS_PROTOCOL_4_1 0x0401
#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
#define RDS_PROTOCOL_COMPAT_VERSION RDS_PROTOCOL_3_1
/* The following ports, 16385, 18634, 18635, are registered with IANA as
* the ports to be used for RDS over TCP and UDP. Currently, only RDS over
......@@ -151,9 +154,13 @@ struct rds_connection {
struct rds_cong_map *c_fcong;
/* Protocol version */
unsigned int c_proposed_version;
unsigned int c_version;
possible_net_t c_net;
/* TOS */
u8 c_tos;
struct list_head c_map_item;
unsigned long c_map_queued;
......@@ -567,6 +574,7 @@ struct rds_transport {
void (*free_mr)(void *trans_private, int invalidate);
void (*flush_mrs)(void);
bool (*t_unloading)(struct rds_connection *conn);
u8 (*get_tos_map)(u8 tos);
};
/* Bind hash table key length. It is the sum of the size of a struct
......@@ -648,6 +656,7 @@ struct rds_sock {
u8 rs_rx_traces;
u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
struct rds_msg_zcopy_queue rs_zcookie_queue;
u8 rs_tos;
};
static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
......@@ -756,13 +765,14 @@ void rds_conn_exit(void);
struct rds_connection *rds_conn_create(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans, gfp_t gfp,
struct rds_transport *trans,
u8 tos, gfp_t gfp,
int dev_if);
struct rds_connection *rds_conn_create_outgoing(struct net *net,
const struct in6_addr *laddr,
const struct in6_addr *faddr,
struct rds_transport *trans,
gfp_t gfp, int dev_if);
u8 tos, gfp_t gfp, int dev_if);
void rds_conn_shutdown(struct rds_conn_path *cpath);
void rds_conn_destroy(struct rds_connection *conn);
void rds_conn_drop(struct rds_connection *conn);
......
......@@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
minfo.len = be32_to_cpu(inc->i_hdr.h_len);
minfo.tos = inc->i_conn->c_tos;
if (flip) {
minfo.laddr = daddr;
......
......@@ -1277,12 +1277,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* rds_conn_create has a spinlock that runs with IRQ off.
* Caching the conn in the socket helps a lot. */
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr) &&
rs->rs_tos == rs->rs_conn->c_tos) {
conn = rs->rs_conn;
else {
} else {
conn = rds_conn_create_outgoing(sock_net(sock->sk),
&rs->rs_bound_addr, &daddr,
rs->rs_transport,
rs->rs_transport, rs->rs_tos,
sock->sk->sk_allocation,
scope_id);
if (IS_ERR(conn)) {
......
......@@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
tsinfo.last_expected_una = tc->t_last_expected_una;
tsinfo.last_seen_una = tc->t_last_seen_una;
tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
}
......@@ -452,6 +453,12 @@ static void rds_tcp_destroy_conns(void)
static void rds_tcp_exit(void);
static u8 rds_tcp_get_tos_map(u8 tos)
{
/* all user tos mapped to default 0 for TCP transport */
return 0;
}
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
.xmit_path_prepare = rds_tcp_xmit_path_prepare,
......@@ -466,6 +473,7 @@ struct rds_transport rds_tcp_transport = {
.inc_free = rds_tcp_inc_free,
.stats_info_copy = rds_tcp_stats_info_copy,
.exit = rds_tcp_exit,
.get_tos_map = rds_tcp_get_tos_map,
.t_owner = THIS_MODULE,
.t_name = "tcp",
.t_type = RDS_TRANS_TCP,
......
......@@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock)
conn = rds_conn_create(sock_net(sock->sk),
my_addr, peer_addr,
&rds_tcp_transport, GFP_KERNEL, dev_if);
&rds_tcp_transport, 0, GFP_KERNEL, dev_if);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
......
......@@ -93,6 +93,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
queue_delayed_work(rds_wq, &cp->cp_recv_w, 0);
}
rcu_read_unlock();
cp->cp_conn->c_proposed_version = RDS_PROTOCOL_VERSION;
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment