Commit 1e2b44e7 authored by Ka-Cheong Poon's avatar Ka-Cheong Poon Committed by David S. Miller

rds: Enable RDS IPv6 support

This patch enables RDS to use IPv6 addresses. For RDS/TCP, the
listener is now an IPv6 endpoint which accepts both IPv4 and IPv6
connection requests.  RDS/RDMA/IB uses a private data (struct
rds_ib_connect_private) exchange between endpoints at RDS connection
establishment time to support RDMA. This private data exchange uses a
32 bit integer to represent an IP address. This needs to be changed in
order to support IPv6. A new private data struct
rds6_ib_connect_private is introduced to handle this. To ensure
backward compatibility, an IPv6 capable RDS stack uses another RDMA
listener port (RDS_CM_PORT) to accept IPv6 connection. And it
continues to use the original RDS_PORT for IPv4 RDS connections. When
it needs to communicate with an IPv6 peer, it uses the RDS_CM_PORT to
send the connection set up request.

v5: Fixed syntax problem (David Miller).

v4: Changed port history comments in rds.h (Sowmini Varadhan).

v3: Added support to set up IPv4 connection using mapped address
    (David Miller).
    Added support to set up connection between link local and non-link
    addresses.
    Various review comments from Santosh Shilimkar and Sowmini Varadhan.

v2: Fixed bound and peer address scope mismatched issue.
    Added back rds_connect() IPv6 changes.
Signed-off-by: default avatarKa-Cheong Poon <ka-cheong.poon@oracle.com>
Acked-by: default avatarSantosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent eee2fa6a
...@@ -142,16 +142,33 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, ...@@ -142,16 +142,33 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
uaddr_len = sizeof(*sin6); uaddr_len = sizeof(*sin6);
} }
} else { } else {
/* If socket is not yet bound, set the return address family /* If socket is not yet bound and the socket is connected,
* to be AF_UNSPEC (value 0) and the address size to be that * set the return address family to be the same as the
* of an IPv4 address. * connected address, but with 0 address value. If it is not
* connected, set the family to be AF_UNSPEC (value 0) and
* the address size to be that of an IPv4 address.
*/ */
if (ipv6_addr_any(&rs->rs_bound_addr)) { if (ipv6_addr_any(&rs->rs_bound_addr)) {
if (ipv6_addr_any(&rs->rs_conn_addr)) {
sin = (struct sockaddr_in *)uaddr; sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin)); memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_UNSPEC; sin->sin_family = AF_UNSPEC;
return sizeof(*sin); return sizeof(*sin);
} }
if (ipv6_addr_type(&rs->rs_conn_addr) &
IPV6_ADDR_MAPPED) {
sin = (struct sockaddr_in *)uaddr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
return sizeof(*sin);
}
sin6 = (struct sockaddr_in6 *)uaddr;
memset(sin6, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
return sizeof(*sin6);
}
if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) {
sin = (struct sockaddr_in *)uaddr; sin = (struct sockaddr_in *)uaddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
...@@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, ...@@ -484,16 +501,18 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct sockaddr_in *sin; struct sockaddr_in *sin;
struct sockaddr_in6 *sin6;
struct rds_sock *rs = rds_sk_to_rs(sk); struct rds_sock *rs = rds_sk_to_rs(sk);
int addr_type;
int ret = 0; int ret = 0;
lock_sock(sk); lock_sock(sk);
switch (addr_len) { switch (uaddr->sa_family) {
case sizeof(struct sockaddr_in): case AF_INET:
sin = (struct sockaddr_in *)uaddr; sin = (struct sockaddr_in *)uaddr;
if (sin->sin_family != AF_INET) { if (addr_len < sizeof(struct sockaddr_in)) {
ret = -EAFNOSUPPORT; ret = -EINVAL;
break; break;
} }
if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
...@@ -509,14 +528,58 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, ...@@ -509,14 +528,58 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
rs->rs_conn_port = sin->sin_port; rs->rs_conn_port = sin->sin_port;
break; break;
case sizeof(struct sockaddr_in6): case AF_INET6:
ret = -EPROTONOSUPPORT; sin6 = (struct sockaddr_in6 *)uaddr;
if (addr_len < sizeof(struct sockaddr_in6)) {
ret = -EINVAL;
break; break;
}
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;
default: if (!(addr_type & IPV6_ADDR_MAPPED)) {
ret = -EPROTOTYPE;
break;
}
/* It is a mapped address. Need to do some sanity
* checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4))) {
ret = -EPROTOTYPE;
break;
}
}
if (addr_type & IPV6_ADDR_LINKLOCAL) {
/* If socket is arleady bound to a link local address,
* the peer address must be on the same link.
*/
if (sin6->sin6_scope_id == 0 ||
(!ipv6_addr_any(&rs->rs_bound_addr) &&
rs->rs_bound_scope_id &&
sin6->sin6_scope_id != rs->rs_bound_scope_id)) {
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
/* Remember the connected address scope ID. It will
* be checked against the binding local address when
* the socket is bound.
*/
rs->rs_bound_scope_id = sin6->sin6_scope_id;
}
rs->rs_conn_addr = sin6->sin6_addr;
rs->rs_conn_port = sin6->sin6_port;
break;
default:
ret = -EAFNOSUPPORT;
break;
}
release_sock(sk); release_sock(sk);
return ret; return ret;
......
...@@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr, ...@@ -127,9 +127,10 @@ static int rds_add_bound(struct rds_sock *rs, const struct in6_addr *addr,
if (!rhashtable_insert_fast(&bind_hash_table, if (!rhashtable_insert_fast(&bind_hash_table,
&rs->rs_bound_node, ht_parms)) { &rs->rs_bound_node, ht_parms)) {
*port = rs->rs_bound_port; *port = rs->rs_bound_port;
rs->rs_bound_scope_id = scope_id;
ret = 0; ret = 0;
rdsdebug("rs %p binding to %pI4:%d\n", rdsdebug("rs %p binding to %pI6c:%d\n",
rs, &addr, (int)ntohs(*port)); rs, addr, (int)ntohs(*port));
break; break;
} else { } else {
rs->rs_bound_addr = in6addr_any; rs->rs_bound_addr = in6addr_any;
...@@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -164,23 +165,53 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct in6_addr v6addr, *binding_addr; struct in6_addr v6addr, *binding_addr;
struct rds_transport *trans; struct rds_transport *trans;
__u32 scope_id = 0; __u32 scope_id = 0;
int addr_type;
int ret = 0; int ret = 0;
__be16 port; __be16 port;
/* We only allow an RDS socket to be bound to an IPv4 address. IPv6 /* We allow an RDS socket to be bound to either IPv4 or IPv6
* address support will be added later. * address.
*/ */
if (addr_len == sizeof(struct sockaddr_in)) { if (uaddr->sa_family == AF_INET) {
struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
if (sin->sin_family != AF_INET || if (addr_len < sizeof(struct sockaddr_in) ||
sin->sin_addr.s_addr == htonl(INADDR_ANY)) sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
return -EINVAL; return -EINVAL;
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
binding_addr = &v6addr; binding_addr = &v6addr;
port = sin->sin_port; port = sin->sin_port;
} else if (addr_len == sizeof(struct sockaddr_in6)) { } else if (uaddr->sa_family == AF_INET6) {
return -EPROTONOSUPPORT; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)uaddr;
if (addr_len < sizeof(struct sockaddr_in6))
return -EINVAL;
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;
if (!(addr_type & IPV6_ADDR_MAPPED))
return -EINVAL;
/* It is a mapped address. Need to do some sanity
* checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4)))
return -EINVAL;
}
/* The scope ID must be specified for link local address. */
if (addr_type & IPV6_ADDR_LINKLOCAL) {
if (sin6->sin6_scope_id == 0)
return -EINVAL;
scope_id = sin6->sin6_scope_id;
}
binding_addr = &sin6->sin6_addr;
port = sin6->sin6_port;
} else { } else {
return -EINVAL; return -EINVAL;
} }
...@@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) ...@@ -191,6 +222,16 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
/* Socket is connected. The binding address should have the same
* scope ID as the connected address, except the case when one is
* non-link local address (scope_id is 0).
*/
if (!ipv6_addr_any(&rs->rs_conn_addr) && scope_id &&
rs->rs_bound_scope_id &&
scope_id != rs->rs_bound_scope_id) {
ret = -EINVAL;
goto out;
}
ret = rds_add_bound(rs, binding_addr, &port, scope_id); ret = rds_add_bound(rs, binding_addr, &port, scope_id);
if (ret) if (ret)
......
/* /*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/inet6_hashtables.h> #include <net/inet6_hashtables.h>
#include <net/addrconf.h>
#include "rds.h" #include "rds.h"
#include "loop.h" #include "loop.h"
...@@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net, ...@@ -200,6 +201,15 @@ static struct rds_connection *__rds_conn_create(struct net *net,
conn->c_isv6 = !ipv6_addr_v4mapped(laddr); conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
conn->c_faddr = *faddr; conn->c_faddr = *faddr;
conn->c_dev_if = dev_if; conn->c_dev_if = dev_if;
/* If the local address is link local, set c_bound_if to be the
* index used for this connection. Otherwise, set it to 0 as
* the socket is not bound to an interface. c_bound_if is used
* to look up a socket when a packet is received
*/
if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL)
conn->c_bound_if = dev_if;
else
conn->c_bound_if = 0;
rds_conn_net_set(conn, net); rds_conn_net_set(conn, net);
...@@ -486,7 +496,15 @@ void rds_conn_destroy(struct rds_connection *conn) ...@@ -486,7 +496,15 @@ void rds_conn_destroy(struct rds_connection *conn)
} }
EXPORT_SYMBOL_GPL(rds_conn_destroy); EXPORT_SYMBOL_GPL(rds_conn_destroy);
static void rds_conn_message_info(struct socket *sock, unsigned int len, static void __rds_inc_msg_cp(struct rds_incoming *inc,
struct rds_info_iterator *iter,
void *saddr, void *daddr, int flip)
{
rds_inc_info_copy(inc, iter, *(__be32 *)saddr,
*(__be32 *)daddr, flip);
}
static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter, struct rds_info_iterator *iter,
struct rds_info_lengths *lens, struct rds_info_lengths *lens,
int want_send) int want_send)
...@@ -524,17 +542,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, ...@@ -524,17 +542,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
/* XXX too lazy to maintain counts.. */ /* XXX too lazy to maintain counts.. */
list_for_each_entry(rm, list, m_conn_item) { list_for_each_entry(rm, list, m_conn_item) {
__be32 laddr;
__be32 faddr;
total++; total++;
laddr = conn->c_laddr.s6_addr32[3];
faddr = conn->c_faddr.s6_addr32[3];
if (total <= len) if (total <= len)
rds_inc_info_copy(&rm->m_inc, __rds_inc_msg_cp(&rm->m_inc,
iter, iter,
laddr, &conn->c_laddr,
faddr, &conn->c_faddr,
0); 0);
} }
...@@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, ...@@ -548,6 +561,14 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
lens->each = sizeof(struct rds_info_message); lens->each = sizeof(struct rds_info_message);
} }
static void rds_conn_message_info(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter,
struct rds_info_lengths *lens,
int want_send)
{
rds_conn_message_info_cmn(sock, len, iter, lens, want_send);
}
static void rds_conn_message_info_send(struct socket *sock, unsigned int len, static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
struct rds_info_iterator *iter, struct rds_info_iterator *iter,
struct rds_info_lengths *lens) struct rds_info_lengths *lens)
...@@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) ...@@ -655,6 +676,9 @@ static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer)
struct rds_info_connection *cinfo = buffer; struct rds_info_connection *cinfo = buffer;
struct rds_connection *conn = cp->cp_conn; struct rds_connection *conn = cp->cp_conn;
if (conn->c_isv6)
return 0;
cinfo->next_tx_seq = cp->cp_next_tx_seq; cinfo->next_tx_seq = cp->cp_next_tx_seq;
cinfo->next_rx_seq = cp->cp_next_rx_seq; cinfo->next_rx_seq = cp->cp_next_rx_seq;
cinfo->laddr = conn->c_laddr.s6_addr32[3]; cinfo->laddr = conn->c_laddr.s6_addr32[3];
......
/* /*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/module.h> #include <linux/module.h>
#include <net/addrconf.h>
#include "rds_single_path.h" #include "rds_single_path.h"
#include "rds.h" #include "rds.h"
...@@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, ...@@ -295,6 +296,8 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
/* We will only ever look at IB transports */ /* We will only ever look at IB transports */
if (conn->c_trans != &rds_ib_transport) if (conn->c_trans != &rds_ib_transport)
return 0; return 0;
if (conn->c_isv6)
return 0;
iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->src_addr = conn->c_laddr.s6_addr32[3];
iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
...@@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len, ...@@ -330,7 +333,6 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
sizeof(struct rds_info_rdma_connection)); sizeof(struct rds_info_rdma_connection));
} }
/* /*
* Early RDS/IB was built to only bind to an address if there is an IPoIB * Early RDS/IB was built to only bind to an address if there is an IPoIB
* device with that address set. * device with that address set.
...@@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, ...@@ -346,8 +348,12 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
{ {
int ret; int ret;
struct rdma_cm_id *cm_id; struct rdma_cm_id *cm_id;
struct sockaddr_in6 sin6;
struct sockaddr_in sin; struct sockaddr_in sin;
struct sockaddr *sa;
bool isv4;
isv4 = ipv6_addr_v4mapped(addr);
/* Create a CMA ID and try to bind it. This catches both /* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs. * IB and iWARP capable NICs.
*/ */
...@@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, ...@@ -356,20 +362,53 @@ static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr,
if (IS_ERR(cm_id)) if (IS_ERR(cm_id))
return PTR_ERR(cm_id); return PTR_ERR(cm_id);
if (isv4) {
memset(&sin, 0, sizeof(sin)); memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET; sin.sin_family = AF_INET;
sin.sin_addr.s_addr = addr->s6_addr32[3]; sin.sin_addr.s_addr = addr->s6_addr32[3];
sa = (struct sockaddr *)&sin;
} else {
memset(&sin6, 0, sizeof(sin6));
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = *addr;
sin6.sin6_scope_id = scope_id;
sa = (struct sockaddr *)&sin6;
/* XXX Do a special IPv6 link local address check here. The
* reason is that rdma_bind_addr() always succeeds with IPv6
* link local address regardless it is indeed configured in a
* system.
*/
if (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL) {
struct net_device *dev;
if (scope_id == 0)
return -EADDRNOTAVAIL;
/* Use init_net for now as RDS is not network
* name space aware.
*/
dev = dev_get_by_index(&init_net, scope_id);
if (!dev)
return -EADDRNOTAVAIL;
if (!ipv6_chk_addr(&init_net, addr, dev, 1)) {
dev_put(dev);
return -EADDRNOTAVAIL;
}
dev_put(dev);
}
}
/* rdma_bind_addr will only succeed for IB & iWARP devices */ /* rdma_bind_addr will only succeed for IB & iWARP devices */
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); ret = rdma_bind_addr(cm_id, sa);
/* due to this, we will claim to support iWARP devices unless we /* due to this, we will claim to support iWARP devices unless we
check node_type. */ check node_type. */
if (ret || !cm_id->device || if (ret || !cm_id->device ||
cm_id->device->node_type != RDMA_NODE_IB_CA) cm_id->device->node_type != RDMA_NODE_IB_CA)
ret = -EADDRNOTAVAIL; ret = -EADDRNOTAVAIL;
rdsdebug("addr %pI6c ret %d node type %d\n", rdsdebug("addr %pI6c%%%u ret %d node type %d\n",
addr, ret, addr, scope_id, ret,
cm_id->device ? cm_id->device->node_type : -1); cm_id->device ? cm_id->device->node_type : -1);
rdma_destroy_id(cm_id); rdma_destroy_id(cm_id);
......
...@@ -678,7 +678,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6) ...@@ -678,7 +678,7 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event, bool isv6)
return version; return version;
} }
/* Given an IPv6 address, find the IB net_device which hosts that address and /* Given an IPv6 address, find the net_device which hosts that address and
* return its index. This is used by the rds_ib_cm_handle_connect() code to * return its index. This is used by the rds_ib_cm_handle_connect() code to
* find the interface index of where an incoming request comes from when * find the interface index of where an incoming request comes from when
* the request is using a link local address. * the request is using a link local address.
...@@ -695,8 +695,7 @@ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr) ...@@ -695,8 +695,7 @@ static u32 __rds_find_ifindex(struct net *net, const struct in6_addr *addr)
rcu_read_lock(); rcu_read_lock();
for_each_netdev_rcu(net, dev) { for_each_netdev_rcu(net, dev) {
if (dev->type == ARPHRD_INFINIBAND && if (ipv6_chk_addr(net, addr, dev, 1)) {
ipv6_chk_addr(net, addr, dev, 0)) {
idx = dev->ifindex; idx = dev->ifindex;
break; break;
} }
...@@ -736,7 +735,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, ...@@ -736,7 +735,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
dp_cmn = &dp->ricp_v6.dp_cmn; dp_cmn = &dp->ricp_v6.dp_cmn;
saddr6 = &dp->ricp_v6.dp_saddr; saddr6 = &dp->ricp_v6.dp_saddr;
daddr6 = &dp->ricp_v6.dp_daddr; daddr6 = &dp->ricp_v6.dp_daddr;
/* If the local address is link local, need to find the /* If either address is link local, need to find the
* interface index in order to create a proper RDS * interface index in order to create a proper RDS
* connection. * connection.
*/ */
...@@ -748,6 +747,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, ...@@ -748,6 +747,14 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
err = -EOPNOTSUPP; err = -EOPNOTSUPP;
goto out; goto out;
} }
} else if (ipv6_addr_type(saddr6) & IPV6_ADDR_LINKLOCAL) {
/* Use our address to find the correct index. */
ifindex = __rds_find_ifindex(&init_net, daddr6);
/* No index found... Need to bail out. */
if (ifindex == 0) {
err = -EOPNOTSUPP;
goto out;
}
} }
} else { } else {
dp_cmn = &dp->ricp_v4.dp_cmn; dp_cmn = &dp->ricp_v4.dp_cmn;
...@@ -886,6 +893,9 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp) ...@@ -886,6 +893,9 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
/* XXX I wonder what affect the port space has */ /* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */ /* delegate cm event handler to rdma_transport */
if (conn->c_isv6)
handler = rds6_rdma_cm_event_handler;
else
handler = rds_rdma_cm_event_handler; handler = rds_rdma_cm_event_handler;
ic->i_cm_id = rdma_create_id(&init_net, handler, conn, ic->i_cm_id = rdma_create_id(&init_net, handler, conn,
RDMA_PS_TCP, IB_QPT_RC); RDMA_PS_TCP, IB_QPT_RC);
......
...@@ -37,7 +37,9 @@ ...@@ -37,7 +37,9 @@
#include "rdma_transport.h" #include "rdma_transport.h"
#include "ib.h" #include "ib.h"
/* Global IPv4 and IPv6 RDS RDMA listener cm_id */
static struct rdma_cm_id *rds_rdma_listen_id; static struct rdma_cm_id *rds_rdma_listen_id;
static struct rdma_cm_id *rds6_rdma_listen_id;
static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event, struct rdma_cm_event *event,
...@@ -153,6 +155,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, ...@@ -153,6 +155,12 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
return rds_rdma_cm_event_handler_cmn(cm_id, event, false); return rds_rdma_cm_event_handler_cmn(cm_id, event, false);
} }
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
return rds_rdma_cm_event_handler_cmn(cm_id, event, true);
}
static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
struct sockaddr *sa, struct sockaddr *sa,
struct rdma_cm_id **ret_cm_id) struct rdma_cm_id **ret_cm_id)
...@@ -206,6 +214,7 @@ static int rds_rdma_listen_init_common(rdma_cm_event_handler handler, ...@@ -206,6 +214,7 @@ static int rds_rdma_listen_init_common(rdma_cm_event_handler handler,
static int rds_rdma_listen_init(void) static int rds_rdma_listen_init(void)
{ {
int ret; int ret;
struct sockaddr_in6 sin6;
struct sockaddr_in sin; struct sockaddr_in sin;
sin.sin_family = PF_INET; sin.sin_family = PF_INET;
...@@ -214,7 +223,21 @@ static int rds_rdma_listen_init(void) ...@@ -214,7 +223,21 @@ static int rds_rdma_listen_init(void)
ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler, ret = rds_rdma_listen_init_common(rds_rdma_cm_event_handler,
(struct sockaddr *)&sin, (struct sockaddr *)&sin,
&rds_rdma_listen_id); &rds_rdma_listen_id);
if (ret != 0)
return ret; return ret;
sin6.sin6_family = PF_INET6;
sin6.sin6_addr = in6addr_any;
sin6.sin6_port = htons(RDS_CM_PORT);
sin6.sin6_scope_id = 0;
sin6.sin6_flowinfo = 0;
ret = rds_rdma_listen_init_common(rds6_rdma_cm_event_handler,
(struct sockaddr *)&sin6,
&rds6_rdma_listen_id);
/* Keep going even when IPv6 is not enabled in the system. */
if (ret != 0)
rdsdebug("Cannot set up IPv6 RDMA listener\n");
return 0;
} }
static void rds_rdma_listen_stop(void) static void rds_rdma_listen_stop(void)
...@@ -224,6 +247,11 @@ static void rds_rdma_listen_stop(void) ...@@ -224,6 +247,11 @@ static void rds_rdma_listen_stop(void)
rdma_destroy_id(rds_rdma_listen_id); rdma_destroy_id(rds_rdma_listen_id);
rds_rdma_listen_id = NULL; rds_rdma_listen_id = NULL;
} }
if (rds6_rdma_listen_id) {
rdsdebug("cm %p\n", rds6_rdma_listen_id);
rdma_destroy_id(rds6_rdma_listen_id);
rds6_rdma_listen_id = NULL;
}
} }
static int rds_rdma_init(void) static int rds_rdma_init(void)
......
...@@ -6,11 +6,16 @@ ...@@ -6,11 +6,16 @@
#include <rdma/rdma_cm.h> #include <rdma/rdma_cm.h>
#include "rds.h" #include "rds.h"
/* RDMA_CM also uses 16385 as the listener port. */
#define RDS_CM_PORT 16385
#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
int rds_rdma_conn_connect(struct rds_connection *conn); int rds_rdma_conn_connect(struct rds_connection *conn);
int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event); struct rdma_cm_event *event);
int rds6_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event);
/* from ib.c */ /* from ib.c */
extern struct rds_transport rds_ib_transport; extern struct rds_transport rds_ib_transport;
......
...@@ -24,14 +24,15 @@ ...@@ -24,14 +24,15 @@
#define RDS_PROTOCOL_MINOR(v) ((v) & 255) #define RDS_PROTOCOL_MINOR(v) ((v) & 255)
#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
/* /* The following ports, 16385, 18634, 18635, are registered with IANA as
* XXX randomly chosen, but at least seems to be unused: * the ports to be used for RDS over TCP and UDP. Currently, only RDS over
* # 18464-18768 Unassigned * TCP and RDS over IB/RDMA are implemented. 18634 is the historical value
* We should do better. We want a reserved port to discourage unpriv'ed * used for the RDMA_CM listener port. RDS/TCP uses port 16385. After
* userspace from listening. * IPv6 work, RDMA_CM also uses 16385 as the listener port. 18634 is kept
* to ensure compatibility with older RDS modules. Those ports are defined
* in each transport's header file.
*/ */
#define RDS_PORT 18634 #define RDS_PORT 18634
#define RDS_CM_PORT 16385
#ifdef ATOMIC64_INIT #ifdef ATOMIC64_INIT
#define KERNEL_HAS_ATOMIC64 #define KERNEL_HAS_ATOMIC64
...@@ -140,7 +141,8 @@ struct rds_connection { ...@@ -140,7 +141,8 @@ struct rds_connection {
struct hlist_node c_hash_node; struct hlist_node c_hash_node;
struct in6_addr c_laddr; struct in6_addr c_laddr;
struct in6_addr c_faddr; struct in6_addr c_faddr;
int c_dev_if; /* c_laddrs's interface index */ int c_dev_if; /* ifindex used for this conn */
int c_bound_if; /* ifindex of c_laddr */
unsigned int c_loopback:1, unsigned int c_loopback:1,
c_isv6:1, c_isv6:1,
c_ping_triggered:1, c_ping_triggered:1,
...@@ -736,7 +738,7 @@ void rds_cong_remove_socket(struct rds_sock *); ...@@ -736,7 +738,7 @@ void rds_cong_remove_socket(struct rds_sock *);
void rds_cong_exit(void); void rds_cong_exit(void);
struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
/* conn.c */ /* connection.c */
extern u32 rds_gen_num; extern u32 rds_gen_num;
int rds_conn_init(void); int rds_conn_init(void);
void rds_conn_exit(void); void rds_conn_exit(void);
...@@ -874,6 +876,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); ...@@ -874,6 +876,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
void rds_inc_info_copy(struct rds_incoming *inc, void rds_inc_info_copy(struct rds_incoming *inc,
struct rds_info_iterator *iter, struct rds_info_iterator *iter,
__be32 saddr, __be32 daddr, int flip); __be32 saddr, __be32 daddr, int flip);
void rds6_inc_info_copy(struct rds_incoming *inc,
struct rds_info_iterator *iter,
struct in6_addr *saddr, struct in6_addr *daddr,
int flip);
/* send.c */ /* send.c */
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len); int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len);
......
...@@ -364,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, ...@@ -364,7 +364,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr,
goto out; goto out;
} }
rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_dev_if); rs = rds_find_bound(daddr, inc->i_hdr.h_dport, conn->c_bound_if);
if (!rs) { if (!rs) {
rds_stats_inc(s_recv_drop_no_sock); rds_stats_inc(s_recv_drop_no_sock);
goto out; goto out;
......
...@@ -1091,10 +1091,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ...@@ -1091,10 +1091,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
switch (namelen) { switch (usin->sin_family) {
case sizeof(*usin): case AF_INET:
if (usin->sin_family != AF_INET || if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
ret = -EINVAL; ret = -EINVAL;
...@@ -1104,10 +1103,45 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ...@@ -1104,10 +1103,45 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
dport = usin->sin_port; dport = usin->sin_port;
break; break;
case sizeof(*sin6): { case AF_INET6: {
ret = -EPROTONOSUPPORT; int addr_type;
if (namelen < sizeof(*sin6)) {
ret = -EINVAL;
goto out;
}
addr_type = ipv6_addr_type(&sin6->sin6_addr);
if (!(addr_type & IPV6_ADDR_UNICAST)) {
__be32 addr4;
if (!(addr_type & IPV6_ADDR_MAPPED)) {
ret = -EINVAL;
goto out;
}
/* It is a mapped address. Need to do some
* sanity checks.
*/
addr4 = sin6->sin6_addr.s6_addr32[3];
if (addr4 == htonl(INADDR_ANY) ||
addr4 == htonl(INADDR_BROADCAST) ||
IN_MULTICAST(ntohl(addr4))) {
return -EINVAL;
goto out;
}
}
if (addr_type & IPV6_ADDR_LINKLOCAL) {
if (sin6->sin6_scope_id == 0) {
ret = -EINVAL;
goto out; goto out;
} }
scope_id = sin6->sin6_scope_id;
}
daddr = sin6->sin6_addr;
dport = sin6->sin6_port;
break;
}
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -1138,6 +1172,19 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) ...@@ -1138,6 +1172,19 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto out;
} }
/* If the socket is already bound to a link local address,
* it can only send to peers on the same link. But allow
* communicating beween link local and non-link local address.
*/
if (scope_id != rs->rs_bound_scope_id) {
if (!scope_id) {
scope_id = rs->rs_bound_scope_id;
} else if (rs->rs_bound_scope_id) {
release_sock(sk);
ret = -EINVAL;
goto out;
}
}
} }
release_sock(sk); release_sock(sk);
......
/* /*
* Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
* *
* This software is available to you under a choice of one of two * This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU * licenses. You may choose to be licensed under the terms of the GNU
...@@ -46,7 +46,12 @@ ...@@ -46,7 +46,12 @@
/* only for info exporting */ /* only for info exporting */
static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
static LIST_HEAD(rds_tcp_tc_list); static LIST_HEAD(rds_tcp_tc_list);
/* rds_tcp_tc_count counts only IPv4 connections.
* rds6_tcp_tc_count counts both IPv4 and IPv6 connections.
*/
static unsigned int rds_tcp_tc_count; static unsigned int rds_tcp_tc_count;
static unsigned int rds6_tcp_tc_count;
/* Track rds_tcp_connection structs so they can be cleaned up */ /* Track rds_tcp_connection structs so they can be cleaned up */
static DEFINE_SPINLOCK(rds_tcp_conn_lock); static DEFINE_SPINLOCK(rds_tcp_conn_lock);
...@@ -113,6 +118,8 @@ void rds_tcp_restore_callbacks(struct socket *sock, ...@@ -113,6 +118,8 @@ void rds_tcp_restore_callbacks(struct socket *sock,
/* done under the callback_lock to serialize with write_space */ /* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock); spin_lock(&rds_tcp_tc_list_lock);
list_del_init(&tc->t_list_item); list_del_init(&tc->t_list_item);
rds6_tcp_tc_count--;
if (!tc->t_cpath->cp_conn->c_isv6)
rds_tcp_tc_count--; rds_tcp_tc_count--;
spin_unlock(&rds_tcp_tc_list_lock); spin_unlock(&rds_tcp_tc_list_lock);
...@@ -200,6 +207,8 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) ...@@ -200,6 +207,8 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
/* done under the callback_lock to serialize with write_space */ /* done under the callback_lock to serialize with write_space */
spin_lock(&rds_tcp_tc_list_lock); spin_lock(&rds_tcp_tc_list_lock);
list_add_tail(&tc->t_list_item, &rds_tcp_tc_list); list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
rds6_tcp_tc_count++;
if (!tc->t_cpath->cp_conn->c_isv6)
rds_tcp_tc_count++; rds_tcp_tc_count++;
spin_unlock(&rds_tcp_tc_list_lock); spin_unlock(&rds_tcp_tc_list_lock);
...@@ -221,6 +230,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) ...@@ -221,6 +230,9 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
write_unlock_bh(&sock->sk->sk_callback_lock); write_unlock_bh(&sock->sk->sk_callback_lock);
} }
/* Handle RDS_INFO_TCP_SOCKETS socket option. It only returns IPv4
* connections for backward compatibility.
*/
static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
struct rds_info_iterator *iter, struct rds_info_iterator *iter,
struct rds_info_lengths *lens) struct rds_info_lengths *lens)
...@@ -228,8 +240,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, ...@@ -228,8 +240,6 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
struct rds_info_tcp_socket tsinfo; struct rds_info_tcp_socket tsinfo;
struct rds_tcp_connection *tc; struct rds_tcp_connection *tc;
unsigned long flags; unsigned long flags;
struct sockaddr_in sin;
struct socket *sock;
spin_lock_irqsave(&rds_tcp_tc_list_lock, flags); spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
...@@ -237,16 +247,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len, ...@@ -237,16 +247,15 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
goto out; goto out;
list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) { list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
struct inet_sock *inet = inet_sk(tc->t_sock->sk);
sock = tc->t_sock; if (tc->t_cpath->cp_conn->c_isv6)
if (sock) { continue;
sock->ops->getname(sock, (struct sockaddr *)&sin, 0);
tsinfo.local_addr = sin.sin_addr.s_addr; tsinfo.local_addr = inet->inet_saddr;
tsinfo.local_port = sin.sin_port; tsinfo.local_port = inet->inet_sport;
sock->ops->getname(sock, (struct sockaddr *)&sin, 1); tsinfo.peer_addr = inet->inet_daddr;
tsinfo.peer_addr = sin.sin_addr.s_addr; tsinfo.peer_port = inet->inet_dport;
tsinfo.peer_port = sin.sin_port;
}
tsinfo.hdr_rem = tc->t_tinc_hdr_rem; tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
tsinfo.data_rem = tc->t_tinc_data_rem; tsinfo.data_rem = tc->t_tinc_data_rem;
...@@ -494,14 +503,19 @@ static __net_init int rds_tcp_init_net(struct net *net) ...@@ -494,14 +503,19 @@ static __net_init int rds_tcp_init_net(struct net *net)
err = -ENOMEM; err = -ENOMEM;
goto fail; goto fail;
} }
rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, true);
if (!rtn->rds_tcp_listen_sock) {
pr_warn("could not set up IPv6 listen sock\n");
/* Try IPv4 as some systems disable IPv6 */
rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net, false);
if (!rtn->rds_tcp_listen_sock) { if (!rtn->rds_tcp_listen_sock) {
pr_warn("could not set up listen sock\n");
unregister_net_sysctl_table(rtn->rds_tcp_sysctl); unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
rtn->rds_tcp_sysctl = NULL; rtn->rds_tcp_sysctl = NULL;
err = -EAFNOSUPPORT; err = -EAFNOSUPPORT;
goto fail; goto fail;
} }
}
INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
return 0; return 0;
......
...@@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); ...@@ -67,7 +67,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
void rds_tcp_state_change(struct sock *sk); void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */ /* tcp_listen.c */
struct socket *rds_tcp_listen_init(struct net *); struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
void rds_tcp_listen_data_ready(struct sock *sk); void rds_tcp_listen_data_ready(struct sock *sk);
int rds_tcp_accept_one(struct socket *sock); int rds_tcp_accept_one(struct socket *sock);
......
...@@ -89,9 +89,11 @@ void rds_tcp_state_change(struct sock *sk) ...@@ -89,9 +89,11 @@ void rds_tcp_state_change(struct sock *sk)
int rds_tcp_conn_path_connect(struct rds_conn_path *cp) int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
{ {
struct socket *sock = NULL; struct socket *sock = NULL;
struct sockaddr_in6 sin6;
struct sockaddr_in sin; struct sockaddr_in sin;
struct sockaddr *addr; struct sockaddr *addr;
int addrlen; int addrlen;
bool isv6;
int ret; int ret;
struct rds_connection *conn = cp->cp_conn; struct rds_connection *conn = cp->cp_conn;
struct rds_tcp_connection *tc = cp->cp_transport_data; struct rds_tcp_connection *tc = cp->cp_transport_data;
...@@ -108,18 +110,36 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ...@@ -108,18 +110,36 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
mutex_unlock(&tc->t_conn_path_lock); mutex_unlock(&tc->t_conn_path_lock);
return 0; return 0;
} }
if (ipv6_addr_v4mapped(&conn->c_laddr)) {
ret = sock_create_kern(rds_conn_net(conn), PF_INET, ret = sock_create_kern(rds_conn_net(conn), PF_INET,
SOCK_STREAM, IPPROTO_TCP, &sock); SOCK_STREAM, IPPROTO_TCP, &sock);
isv6 = false;
} else {
ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
SOCK_STREAM, IPPROTO_TCP, &sock);
isv6 = true;
}
if (ret < 0) if (ret < 0)
goto out; goto out;
rds_tcp_tune(sock); rds_tcp_tune(sock);
if (isv6) {
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = conn->c_laddr;
sin6.sin6_port = 0;
sin6.sin6_flowinfo = 0;
sin6.sin6_scope_id = conn->c_dev_if;
addr = (struct sockaddr *)&sin6;
addrlen = sizeof(sin6);
} else {
sin.sin_family = AF_INET; sin.sin_family = AF_INET;
sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3]; sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
sin.sin_port = 0; sin.sin_port = 0;
addr = (struct sockaddr *)&sin; addr = (struct sockaddr *)&sin;
addrlen = sizeof(sin); addrlen = sizeof(sin);
}
ret = sock->ops->bind(sock, addr, addrlen); ret = sock->ops->bind(sock, addr, addrlen);
if (ret) { if (ret) {
...@@ -128,11 +148,21 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ...@@ -128,11 +148,21 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
goto out; goto out;
} }
if (isv6) {
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = conn->c_faddr;
sin6.sin6_port = htons(RDS_TCP_PORT);
sin6.sin6_flowinfo = 0;
sin6.sin6_scope_id = conn->c_dev_if;
addr = (struct sockaddr *)&sin6;
addrlen = sizeof(sin6);
} else {
sin.sin_family = AF_INET; sin.sin_family = AF_INET;
sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3]; sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
sin.sin_port = htons(RDS_TCP_PORT); sin.sin_port = htons(RDS_TCP_PORT);
addr = (struct sockaddr *)&sin; addr = (struct sockaddr *)&sin;
addrlen = sizeof(sin); addrlen = sizeof(sin);
}
/* /*
* once we call connect() we can start getting callbacks and they * once we call connect() we can start getting callbacks and they
......
...@@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -131,6 +131,8 @@ int rds_tcp_accept_one(struct socket *sock)
struct rds_tcp_connection *rs_tcp = NULL; struct rds_tcp_connection *rs_tcp = NULL;
int conn_state; int conn_state;
struct rds_conn_path *cp; struct rds_conn_path *cp;
struct in6_addr *my_addr, *peer_addr;
int dev_if;
if (!sock) /* module unload or netns delete in progress */ if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH; return -ENETUNREACH;
...@@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock) ...@@ -163,15 +165,29 @@ int rds_tcp_accept_one(struct socket *sock)
inet = inet_sk(new_sock->sk); inet = inet_sk(new_sock->sk);
my_addr = &new_sock->sk->sk_v6_rcv_saddr;
peer_addr = &new_sock->sk->sk_v6_daddr;
rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n", rdsdebug("accepted tcp %pI6c:%u -> %pI6c:%u\n",
&new_sock->sk->sk_v6_rcv_saddr, ntohs(inet->inet_sport), my_addr, ntohs(inet->inet_sport),
&new_sock->sk->sk_v6_daddr, ntohs(inet->inet_dport)); peer_addr, ntohs(inet->inet_dport));
/* sk_bound_dev_if is not set if the peer address is not link local
* address. In this case, it happens that mcast_oif is set. So
* just use it.
*/
if ((ipv6_addr_type(my_addr) & IPV6_ADDR_LINKLOCAL) &&
!(ipv6_addr_type(peer_addr) & IPV6_ADDR_LINKLOCAL)) {
struct ipv6_pinfo *inet6;
inet6 = inet6_sk(new_sock->sk);
dev_if = inet6->mcast_oif;
} else {
dev_if = new_sock->sk->sk_bound_dev_if;
}
conn = rds_conn_create(sock_net(sock->sk), conn = rds_conn_create(sock_net(sock->sk),
&new_sock->sk->sk_v6_rcv_saddr, &new_sock->sk->sk_v6_rcv_saddr,
&new_sock->sk->sk_v6_daddr, &new_sock->sk->sk_v6_daddr,
&rds_tcp_transport, GFP_KERNEL, &rds_tcp_transport, GFP_KERNEL, dev_if);
new_sock->sk->sk_bound_dev_if);
if (IS_ERR(conn)) { if (IS_ERR(conn)) {
ret = PTR_ERR(conn); ret = PTR_ERR(conn);
...@@ -256,15 +272,22 @@ void rds_tcp_listen_data_ready(struct sock *sk) ...@@ -256,15 +272,22 @@ void rds_tcp_listen_data_ready(struct sock *sk)
ready(sk); ready(sk);
} }
struct socket *rds_tcp_listen_init(struct net *net) struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
{ {
struct sockaddr_in sin;
struct socket *sock = NULL; struct socket *sock = NULL;
struct sockaddr_storage ss;
struct sockaddr_in6 *sin6;
struct sockaddr_in *sin;
int addr_len;
int ret; int ret;
ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); ret = sock_create_kern(net, isv6 ? PF_INET6 : PF_INET, SOCK_STREAM,
if (ret < 0) IPPROTO_TCP, &sock);
if (ret < 0) {
rdsdebug("could not create %s listener socket: %d\n",
isv6 ? "IPv6" : "IPv4", ret);
goto out; goto out;
}
sock->sk->sk_reuse = SK_CAN_REUSE; sock->sk->sk_reuse = SK_CAN_REUSE;
rds_tcp_nonagle(sock); rds_tcp_nonagle(sock);
...@@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net) ...@@ -274,13 +297,28 @@ struct socket *rds_tcp_listen_init(struct net *net)
sock->sk->sk_data_ready = rds_tcp_listen_data_ready; sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
write_unlock_bh(&sock->sk->sk_callback_lock); write_unlock_bh(&sock->sk->sk_callback_lock);
sin.sin_family = PF_INET; if (isv6) {
sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); sin6 = (struct sockaddr_in6 *)&ss;
sin.sin_port = (__force u16)htons(RDS_TCP_PORT); sin6->sin6_family = PF_INET6;
sin6->sin6_addr = in6addr_any;
sin6->sin6_port = (__force u16)htons(RDS_TCP_PORT);
sin6->sin6_scope_id = 0;
sin6->sin6_flowinfo = 0;
addr_len = sizeof(*sin6);
} else {
sin = (struct sockaddr_in *)&ss;
sin->sin_family = PF_INET;
sin->sin_addr.s_addr = INADDR_ANY;
sin->sin_port = (__force u16)htons(RDS_TCP_PORT);
addr_len = sizeof(*sin);
}
ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
if (ret < 0) if (ret < 0) {
rdsdebug("could not bind %s listener socket: %d\n",
isv6 ? "IPv6" : "IPv4", ret);
goto out; goto out;
}
ret = sock->ops->listen(sock, 64); ret = sock->ops->listen(sock, 64);
if (ret < 0) if (ret < 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment