Commit e6f61130 authored by Doug Ledford's avatar Doug Ledford

Merge branches 'misc-4.7-2', 'ipoib' and 'ib-router' into k.o/for-4.7

infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
ib_cm.o iw_cm.o ib_addr.o \
$(infiniband-y) $(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
...@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ ...@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \ device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o ib_cm-y := cm.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
...@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o ...@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
rdma_ucm-y := ucma.o rdma_ucm-y := ucma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o ib_umad-y := user_mad.o
ib_ucm-y := ucm.o ib_ucm-y := ucm.o
......
...@@ -46,10 +46,10 @@ ...@@ -46,10 +46,10 @@
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <rdma/ib_addr.h> #include <rdma/ib_addr.h>
#include <rdma/ib.h> #include <rdma/ib.h>
#include <rdma/rdma_netlink.h>
#include <net/netlink.h>
MODULE_AUTHOR("Sean Hefty"); #include "core_priv.h"
MODULE_DESCRIPTION("IB Address Translation");
MODULE_LICENSE("Dual BSD/GPL");
struct addr_req { struct addr_req {
struct list_head list; struct list_head list;
...@@ -62,8 +62,11 @@ struct addr_req { ...@@ -62,8 +62,11 @@ struct addr_req {
struct rdma_dev_addr *addr, void *context); struct rdma_dev_addr *addr, void *context);
unsigned long timeout; unsigned long timeout;
int status; int status;
u32 seq;
}; };
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
static void process_req(struct work_struct *work); static void process_req(struct work_struct *work);
static DEFINE_MUTEX(lock); static DEFINE_MUTEX(lock);
...@@ -71,6 +74,126 @@ static LIST_HEAD(req_list); ...@@ -71,6 +74,126 @@ static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req); static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq; static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
[LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
.len = sizeof(struct rdma_nla_ls_gid)},
};
static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
{
struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
int ret;
if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
return false;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
nlmsg_len(nlh), ib_nl_addr_policy);
if (ret)
return false;
return true;
}
static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
{
const struct nlattr *head, *curr;
union ib_gid gid;
struct addr_req *req;
int len, rem;
int found = 0;
head = (const struct nlattr *)nlmsg_data(nlh);
len = nlmsg_len(nlh);
nla_for_each_attr(curr, head, len, rem) {
if (curr->nla_type == LS_NLA_TYPE_DGID)
memcpy(&gid, nla_data(curr), nla_len(curr));
}
mutex_lock(&lock);
list_for_each_entry(req, &req_list, list) {
if (nlh->nlmsg_seq != req->seq)
continue;
/* We set the DGID part, the rest was set earlier */
rdma_addr_set_dgid(req->addr, &gid);
req->status = 0;
found = 1;
break;
}
mutex_unlock(&lock);
if (!found)
pr_info("Couldn't find request waiting for DGID: %pI6\n",
&gid);
}
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
!(NETLINK_CB(skb).sk) ||
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
if (ib_nl_is_good_ip_resp(nlh))
ib_nl_process_good_ip_rsep(nlh);
return skb->len;
}
static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
const void *daddr,
u32 seq, u16 family)
{
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct rdma_ls_ip_resolve_header *header;
void *data;
size_t size;
int attrtype;
int len;
if (family == AF_INET) {
size = sizeof(struct in_addr);
attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
} else {
size = sizeof(struct in6_addr);
attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
}
len = nla_total_size(sizeof(size));
len += NLMSG_ALIGN(sizeof(*header));
skb = nlmsg_new(len, GFP_KERNEL);
if (!skb)
return -ENOMEM;
data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
if (!data) {
nlmsg_free(skb);
return -ENODATA;
}
/* Construct the family header first */
header = (struct rdma_ls_ip_resolve_header *)
skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
header->ifindex = dev_addr->bound_dev_if;
nla_put(skb, attrtype, size, daddr);
/* Repair the nlmsg header length */
nlmsg_end(skb, nlh);
ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
/* Make the request retry, so when we get the response from userspace
* we will have something.
*/
return -ENODATA;
}
int rdma_addr_size(struct sockaddr *addr) int rdma_addr_size(struct sockaddr *addr)
{ {
switch (addr->sa_family) { switch (addr->sa_family) {
...@@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req) ...@@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock); mutex_unlock(&lock);
} }
static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family)
{
if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL;
/* We fill in what we can, the response will fill the rest */
rdma_copy_addr(dev_addr, dst->dev, NULL);
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
}
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr) const void *daddr)
{ {
...@@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, ...@@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
return ret; return ret;
} }
static bool has_gateway(struct dst_entry *dst, sa_family_t family)
{
struct rtable *rt;
struct rt6_info *rt6;
if (family == AF_INET) {
rt = container_of(dst, struct rtable, dst);
return rt->rt_uses_gateway;
}
rt6 = container_of(dst, struct rt6_info, dst);
return rt6->rt6i_flags & RTF_GATEWAY;
}
static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in, u32 seq)
{
const struct sockaddr_in *dst_in4 =
(const struct sockaddr_in *)dst_in;
const struct sockaddr_in6 *dst_in6 =
(const struct sockaddr_in6 *)dst_in;
const void *daddr = (dst_in->sa_family == AF_INET) ?
(const void *)&dst_in4->sin_addr.s_addr :
(const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family;
/* Gateway + ARPHRD_INFINIBAND -> IB router */
if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
else
return dst_fetch_ha(dst, dev_addr, daddr);
}
static int addr4_resolve(struct sockaddr_in *src_in, static int addr4_resolve(struct sockaddr_in *src_in,
const struct sockaddr_in *dst_in, const struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
...@@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in, ...@@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_family = AF_INET; src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr; src_in->sin_addr.s_addr = fl4.saddr;
/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/ */
if (rt->rt_uses_gateway) if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV4; addr->network = RDMA_NETWORK_IPV4;
addr->hoplimit = ip4_dst_hoplimit(&rt->dst); addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
...@@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, ...@@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
src_in->sin6_addr = fl6.saddr; src_in->sin6_addr = fl6.saddr;
} }
/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* routable) and we could set the network type accordingly. * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/ */
if (rt->rt6i_flags & RTF_GATEWAY) if (rt->rt6i_flags & RTF_GATEWAY &&
ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV6; addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst); addr->hoplimit = ip6_dst_hoplimit(dst);
...@@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, ...@@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(struct dst_entry *dst, static int addr_resolve_neigh(struct dst_entry *dst,
const struct sockaddr *dst_in, const struct sockaddr *dst_in,
struct rdma_dev_addr *addr) struct rdma_dev_addr *addr,
u32 seq)
{ {
if (dst->dev->flags & IFF_LOOPBACK) { if (dst->dev->flags & IFF_LOOPBACK) {
int ret; int ret;
...@@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, ...@@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
} }
/* If the device doesn't do ARP internally */ /* If the device doesn't do ARP internally */
if (!(dst->dev->flags & IFF_NOARP)) { if (!(dst->dev->flags & IFF_NOARP))
const struct sockaddr_in *dst_in4 = return fetch_ha(dst, addr, dst_in, seq);
(const struct sockaddr_in *)dst_in;
const struct sockaddr_in6 *dst_in6 =
(const struct sockaddr_in6 *)dst_in;
return dst_fetch_ha(dst, addr,
dst_in->sa_family == AF_INET ?
(const void *)&dst_in4->sin_addr.s_addr :
(const void *)&dst_in6->sin6_addr);
}
return rdma_copy_addr(addr, dst->dev, NULL); return rdma_copy_addr(addr, dst->dev, NULL);
} }
...@@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst, ...@@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
static int addr_resolve(struct sockaddr *src_in, static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in, const struct sockaddr *dst_in,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
bool resolve_neigh) bool resolve_neigh,
u32 seq)
{ {
struct net_device *ndev; struct net_device *ndev;
struct dst_entry *dst; struct dst_entry *dst;
...@@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in, ...@@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in,
return ret; return ret;
if (resolve_neigh) if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr); ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
ndev = rt->dst.dev; ndev = rt->dst.dev;
dev_hold(ndev); dev_hold(ndev);
...@@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in, ...@@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in,
return ret; return ret;
if (resolve_neigh) if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr); ret = addr_resolve_neigh(dst, dst_in, addr, seq);
ndev = dst->dev; ndev = dst->dev;
dev_hold(ndev); dev_hold(ndev);
...@@ -412,7 +575,7 @@ static void process_req(struct work_struct *work) ...@@ -412,7 +575,7 @@ static void process_req(struct work_struct *work)
src_in = (struct sockaddr *) &req->src_addr; src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr; dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr, req->status = addr_resolve(src_in, dst_in, req->addr,
true); true, req->seq);
if (req->status && time_after_eq(jiffies, req->timeout)) if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT; req->status = -ETIMEDOUT;
else if (req->status == -ENODATA) else if (req->status == -ENODATA)
...@@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client, ...@@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->context = context; req->context = context;
req->client = client; req->client = client;
atomic_inc(&client->refcount); atomic_inc(&client->refcount);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
req->status = addr_resolve(src_in, dst_in, addr, true); req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
switch (req->status) { switch (req->status) {
case 0: case 0:
req->timeout = jiffies; req->timeout = jiffies;
...@@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr, ...@@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
src_in->sa_family = dst_addr->sa_family; src_in->sa_family = dst_addr->sa_family;
} }
return addr_resolve(src_in, dst_addr, addr, false); return addr_resolve(src_in, dst_addr, addr, false, 0);
} }
EXPORT_SYMBOL(rdma_resolve_ip_route); EXPORT_SYMBOL(rdma_resolve_ip_route);
...@@ -634,7 +798,7 @@ static struct notifier_block nb = { ...@@ -634,7 +798,7 @@ static struct notifier_block nb = {
.notifier_call = netevent_callback .notifier_call = netevent_callback
}; };
static int __init addr_init(void) int addr_init(void)
{ {
addr_wq = create_singlethread_workqueue("ib_addr"); addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq) if (!addr_wq)
...@@ -642,15 +806,13 @@ static int __init addr_init(void) ...@@ -642,15 +806,13 @@ static int __init addr_init(void)
register_netevent_notifier(&nb); register_netevent_notifier(&nb);
rdma_addr_register_client(&self); rdma_addr_register_client(&self);
return 0; return 0;
} }
static void __exit addr_cleanup(void) void addr_cleanup(void)
{ {
rdma_addr_unregister_client(&self); rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb); unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq); destroy_workqueue(addr_wq);
} }
module_init(addr_init);
module_exit(addr_cleanup);
...@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev, ...@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
return _upper == upper; return _upper == upper;
} }
int addr_init(void);
void addr_cleanup(void);
int ib_mad_init(void);
void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb);
#endif /* _CORE_PRIV_H */ #endif /* _CORE_PRIV_H */
...@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, ...@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
} }
EXPORT_SYMBOL(ib_get_net_dev_by_params); EXPORT_SYMBOL(ib_get_net_dev_by_params);
static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_IP_RESOLVE] = {
.dump = ib_nl_handle_ip_res_resp,
.module = THIS_MODULE },
};
static int ib_add_ibnl_clients(void)
{
return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
ibnl_ls_cb_table);
}
static void ib_remove_ibnl_clients(void)
{
ibnl_remove_client(RDMA_NL_LS);
}
static int __init ib_core_init(void) static int __init ib_core_init(void)
{ {
int ret; int ret;
...@@ -983,10 +1006,41 @@ static int __init ib_core_init(void) ...@@ -983,10 +1006,41 @@ static int __init ib_core_init(void)
goto err_sysfs; goto err_sysfs;
} }
ret = addr_init();
if (ret) {
pr_warn("Could't init IB address resolution\n");
goto err_ibnl;
}
ret = ib_mad_init();
if (ret) {
pr_warn("Couldn't init IB MAD\n");
goto err_addr;
}
ret = ib_sa_init();
if (ret) {
pr_warn("Couldn't init SA\n");
goto err_mad;
}
if (ib_add_ibnl_clients()) {
pr_warn("Couldn't register ibnl clients\n");
goto err_sa;
}
ib_cache_setup(); ib_cache_setup();
return 0; return 0;
err_sa:
ib_sa_cleanup();
err_mad:
ib_mad_cleanup();
err_addr:
addr_cleanup();
err_ibnl:
ibnl_cleanup();
err_sysfs: err_sysfs:
class_unregister(&ib_class); class_unregister(&ib_class);
err_comp: err_comp:
...@@ -999,6 +1053,10 @@ static int __init ib_core_init(void) ...@@ -999,6 +1053,10 @@ static int __init ib_core_init(void)
static void __exit ib_core_cleanup(void) static void __exit ib_core_cleanup(void)
{ {
ib_cache_cleanup(); ib_cache_cleanup();
ib_remove_ibnl_clients();
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
ibnl_cleanup(); ibnl_cleanup();
class_unregister(&ib_class); class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq);
......
...@@ -47,11 +47,7 @@ ...@@ -47,11 +47,7 @@
#include "smi.h" #include "smi.h"
#include "opa_smi.h" #include "opa_smi.h"
#include "agent.h" #include "agent.h"
#include "core_priv.h"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
...@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = { ...@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = {
.remove = ib_mad_remove_device .remove = ib_mad_remove_device
}; };
static int __init ib_mad_init_module(void) int ib_mad_init(void)
{ {
mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE); mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE); mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
...@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void) ...@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void)
return 0; return 0;
} }
static void __exit ib_mad_cleanup_module(void) void ib_mad_cleanup(void)
{ {
ib_unregister_client(&mad_client); ib_unregister_client(&mad_client);
} }
module_init(ib_mad_init_module);
module_exit(ib_mad_cleanup_module);
...@@ -93,6 +93,18 @@ enum { ...@@ -93,6 +93,18 @@ enum {
struct mcast_member; struct mcast_member;
/*
* There are 4 types of join states:
* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
*/
enum {
FULLMEMBER_JOIN,
NONMEMBER_JOIN,
SENDONLY_NONMEBER_JOIN,
SENDONLY_FULLMEMBER_JOIN,
NUM_JOIN_MEMBERSHIP_TYPES,
};
struct mcast_group { struct mcast_group {
struct ib_sa_mcmember_rec rec; struct ib_sa_mcmember_rec rec;
struct rb_node node; struct rb_node node;
...@@ -102,7 +114,7 @@ struct mcast_group { ...@@ -102,7 +114,7 @@ struct mcast_group {
struct list_head pending_list; struct list_head pending_list;
struct list_head active_list; struct list_head active_list;
struct mcast_member *last_join; struct mcast_member *last_join;
int members[3]; int members[NUM_JOIN_MEMBERSHIP_TYPES];
atomic_t refcount; atomic_t refcount;
enum mcast_group_state state; enum mcast_group_state state;
struct ib_sa_query *query; struct ib_sa_query *query;
...@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member) ...@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member)
} }
/* /*
* A multicast group has three types of members: full member, non member, and * A multicast group has four types of members: full member, non member,
* send only member. We need to keep track of the number of members of each * sendonly non member and sendonly full member.
* We need to keep track of the number of members of each
* type based on their join state. Adjust the number of members the belong to * type based on their join state. Adjust the number of members the belong to
* the specified join states. * the specified join states.
*/ */
...@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc) ...@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{ {
int i; int i;
for (i = 0; i < 3; i++, join_state >>= 1) for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1)
if (join_state & 0x1) if (join_state & 0x1)
group->members[i] += inc; group->members[i] += inc;
} }
...@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group) ...@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group)
u8 leave_state = 0; u8 leave_state = 0;
int i; int i;
for (i = 0; i < 3; i++) for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++)
if (!group->members[i]) if (!group->members[i])
leave_state |= (0x1 << i); leave_state |= (0x1 << i);
......
...@@ -53,10 +53,6 @@ ...@@ -53,10 +53,6 @@
#include "sa.h" #include "sa.h"
#include "core_priv.h" #include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
MODULE_LICENSE("Dual BSD/GPL");
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
...@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query { ...@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query {
struct ib_sa_query sa_query; struct ib_sa_query sa_query;
}; };
struct ib_sa_classport_info_query {
void (*callback)(int, struct ib_class_port_info *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_mcmember_query { struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context; void *context;
...@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = { ...@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 }, .size_bits = 2*64 },
}; };
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
static const struct ib_field classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 8 },
{ CLASSPORTINFO_REC_FIELD(class_version),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 8 },
{ CLASSPORTINFO_REC_FIELD(capability_mask),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_lid),
.offset_words = 7,
.offset_bits = 0,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(redirect_pkey),
.offset_words = 7,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(redirect_qp),
.offset_words = 8,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_qkey),
.offset_words = 9,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_gid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 128 },
{ CLASSPORTINFO_REC_FIELD(trap_tcslfl),
.offset_words = 14,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_lid),
.offset_words = 15,
.offset_bits = 0,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(trap_pkey),
.offset_words = 15,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(trap_hlqp),
.offset_words = 16,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_qkey),
.offset_words = 17,
.offset_bits = 0,
.size_bits = 32 },
};
#define GUIDINFO_REC_FIELD(field) \ #define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
...@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work) ...@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work)
spin_unlock_irqrestore(&ib_nl_request_lock, flags); spin_unlock_irqrestore(&ib_nl_request_lock, flags);
} }
static int ib_nl_handle_set_timeout(struct sk_buff *skb, int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta; int timeout, delta, abs_delta;
...@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) ...@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
return 1; return 1;
} }
static int ib_nl_handle_resolve_resp(struct sk_buff *skb, int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb) struct netlink_callback *cb)
{ {
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh; const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags; unsigned long flags;
...@@ -838,15 +916,6 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb, ...@@ -838,15 +916,6 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return skb->len; return skb->len;
} }
static struct ibnl_client_cbs ib_sa_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
};
static void free_sm_ah(struct kref *kref) static void free_sm_ah(struct kref *kref)
{ {
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
...@@ -1645,6 +1714,97 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, ...@@ -1645,6 +1714,97 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
} }
EXPORT_SYMBOL(ib_sa_guid_info_rec_query); EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
/* Support get SA ClassPortInfo */
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
if (mad) {
struct ib_class_port_info rec;
ib_unpack(classport_info_rec_table,
ARRAY_SIZE(classport_info_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else {
query->callback(status, NULL, query->context);
}
}
static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_class_port_info *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_classport_info_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
query->sa_query.release = ib_sa_portclass_info_rec_release;
/* support GET only */
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
static void send_handler(struct ib_mad_agent *agent, static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc) struct ib_mad_send_wc *mad_send_wc)
{ {
...@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data) ...@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
kfree(sa_dev); kfree(sa_dev);
} }
static int __init ib_sa_init(void) int ib_sa_init(void)
{ {
int ret; int ret;
...@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void) ...@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void)
goto err3; goto err3;
} }
if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
ib_sa_cb_table)) {
pr_err("Failed to add netlink callback\n");
ret = -EINVAL;
goto err4;
}
INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
return 0; return 0;
err4:
destroy_workqueue(ib_nl_wq);
err3: err3:
mcast_cleanup(); mcast_cleanup();
err2: err2:
...@@ -1839,9 +1992,8 @@ static int __init ib_sa_init(void) ...@@ -1839,9 +1992,8 @@ static int __init ib_sa_init(void)
return ret; return ret;
} }
static void __exit ib_sa_cleanup(void) void ib_sa_cleanup(void)
{ {
ibnl_remove_client(RDMA_NL_LS);
cancel_delayed_work(&ib_nl_timed_work); cancel_delayed_work(&ib_nl_timed_work);
flush_workqueue(ib_nl_wq); flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq); destroy_workqueue(ib_nl_wq);
...@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void) ...@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void)
ib_unregister_client(&sa_client); ib_unregister_client(&sa_client);
idr_destroy(&query_idr); idr_destroy(&query_idr);
} }
module_init(ib_sa_init);
module_exit(ib_sa_cleanup);
...@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp, ...@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp,
* Set the most significant bit of CM2 to indicate support for * Set the most significant bit of CM2 to indicate support for
* congestion statistics * congestion statistics
*/ */
p->reserved[0] = dd->psxmitwait_supported << 7; ib_set_cpi_capmask2(p,
dd->psxmitwait_supported <<
(31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE));
/* /*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec. * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/ */
p->resp_time_value = 18; ib_set_cpi_resp_time(p, 18);
return reply((struct ib_smp *) pmp); return reply((struct ib_smp *) pmp);
} }
......
...@@ -92,6 +92,8 @@ enum { ...@@ -92,6 +92,8 @@ enum {
IPOIB_FLAG_UMCAST = 10, IPOIB_FLAG_UMCAST = 10,
IPOIB_STOP_NEIGH_GC = 11, IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_FLAG_DEV_ADDR_SET = 13,
IPOIB_FLAG_DEV_ADDR_CTRL = 14,
IPOIB_MAX_BACKOFF_SECONDS = 16, IPOIB_MAX_BACKOFF_SECONDS = 16,
...@@ -392,6 +394,7 @@ struct ipoib_dev_priv { ...@@ -392,6 +394,7 @@ struct ipoib_dev_priv {
struct ipoib_ethtool_st ethtool; struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer; struct timer_list poll_timer;
unsigned max_send_sge; unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
}; };
struct ipoib_ah { struct ipoib_ah {
...@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work); ...@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work);
void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev);
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format); struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port); int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
......
...@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv) ...@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
return 0; return 0;
} }
/*
* returns true if the device address of the ipoib interface has changed and the
* new address is a valid one (i.e in the gid table), return false otherwise.
*/
static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
{
union ib_gid search_gid;
union ib_gid gid0;
union ib_gid *netdev_gid;
int err;
u16 index;
u8 port;
bool ret = false;
netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
return false;
netif_addr_lock(priv->dev);
/* The subnet prefix may have changed, update it now so we won't have
* to do it later
*/
priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
search_gid.global.interface_id = priv->local_gid.global.interface_id;
netif_addr_unlock(priv->dev);
err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
priv->dev, &port, &index);
netif_addr_lock(priv->dev);
if (search_gid.global.interface_id !=
priv->local_gid.global.interface_id)
/* There was a change while we were looking up the gid, bail
* here and let the next work sort this out
*/
goto out;
/* The next section of code needs some background:
* Per IB spec the port GUID can't change if the HCA is powered on.
* port GUID is the basis for GID at index 0 which is the basis for
* the default device address of a ipoib interface.
*
* so it seems the flow should be:
* if user_changed_dev_addr && gid in gid tbl
* set bit dev_addr_set
* return true
* else
* return false
*
* The issue is that there are devices that don't follow the spec,
* they change the port GUID when the HCA is powered, so in order
* not to break userspace applications, We need to check if the
* user wanted to control the device address and we assume that
* if he sets the device address back to be based on GID index 0,
* he no longer wishs to control it.
*
* If the user doesn't control the the device address,
* IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
* the port GUID has changed and GID at index 0 has changed
* so we need to change priv->local_gid and priv->dev->dev_addr
* to reflect the new GID.
*/
if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
if (!err && port == priv->port) {
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
if (index == 0)
clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
&priv->flags);
else
set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
ret = true;
} else {
ret = false;
}
} else {
if (!err && port == priv->port) {
ret = true;
} else {
if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
memcpy(&priv->local_gid, &gid0,
sizeof(priv->local_gid));
memcpy(priv->dev->dev_addr + 4, &gid0,
sizeof(priv->local_gid));
ret = true;
}
}
}
out:
netif_addr_unlock(priv->dev);
return ret;
}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level, enum ipoib_flush_level level,
int nesting) int nesting)
...@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
level != IPOIB_FLUSH_HEAVY) { level != IPOIB_FLUSH_HEAVY) {
/* Make sure the dev_addr is set even if not flushing */
if (level == IPOIB_FLUSH_LIGHT)
ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return; return;
} }
...@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
update_parent_pkey(priv); update_parent_pkey(priv);
else else
update_child_pkey(priv); update_child_pkey(priv);
} } else if (level == IPOIB_FLUSH_LIGHT)
ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return; return;
} }
...@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ...@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
if (level >= IPOIB_FLUSH_NORMAL) if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_up(dev); ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task); if (ipoib_dev_addr_changed_valid(priv))
ipoib_mcast_restart_task(&priv->restart_task);
} }
} }
......
...@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params( ...@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
struct ib_device *dev, u8 port, u16 pkey, struct ib_device *dev, u8 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr, const union ib_gid *gid, const struct sockaddr *addr,
void *client_data); void *client_data);
static int ipoib_set_mac(struct net_device *dev, void *addr);
static struct ib_client ipoib_client = { static struct ib_client ipoib_client = {
.name = "ipoib", .name = "ipoib",
...@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev) ...@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
priv->sm_fullmember_sendonly_support = false;
if (ipoib_ib_dev_open(dev)) { if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0; return 0;
...@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev) ...@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
} }
struct classport_info_context {
struct ipoib_dev_priv *priv;
struct completion done;
struct ib_sa_query *sa_query;
};
static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
void *context)
{
struct classport_info_context *cb_ctx = context;
struct ipoib_dev_priv *priv;
WARN_ON(!context);
priv = cb_ctx->priv;
if (status || !rec) {
pr_debug("device: %s failed query classport_info status: %d\n",
priv->dev->name, status);
/* keeps the default, will try next mcast_restart */
priv->sm_fullmember_sendonly_support = false;
goto out;
}
if (ib_get_cpi_capmask2(rec) &
IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
priv->dev->name);
priv->sm_fullmember_sendonly_support = true;
} else {
pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
priv->dev->name);
priv->sm_fullmember_sendonly_support = false;
}
out:
complete(&cb_ctx->done);
}
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
{
struct classport_info_context *callback_context;
int ret;
callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
if (!callback_context)
return -ENOMEM;
callback_context->priv = priv;
init_completion(&callback_context->done);
ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
priv->ca, priv->port, 3000,
GFP_KERNEL,
classport_info_query_cb,
callback_context,
&callback_context->sa_query);
if (ret < 0) {
pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
priv->dev->name, ret);
kfree(callback_context);
return ret;
}
/* waiting for the callback to finish before returnning */
wait_for_completion(&callback_context->done);
kfree(callback_context);
return ret;
}
void ipoib_flush_paths(struct net_device *dev) void ipoib_flush_paths(struct net_device *dev)
{ {
struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_dev_priv *priv = netdev_priv(dev);
...@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { ...@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_get_vf_config = ipoib_get_vf_config, .ndo_get_vf_config = ipoib_get_vf_config,
.ndo_get_vf_stats = ipoib_get_vf_stats, .ndo_get_vf_stats = ipoib_get_vf_stats,
.ndo_set_vf_guid = ipoib_set_vf_guid, .ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
}; };
static const struct net_device_ops ipoib_netdev_ops_vf = { static const struct net_device_ops ipoib_netdev_ops_vf = {
...@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev) ...@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_umcast); return device_create_file(&dev->dev, &dev_attr_umcast);
} }
static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
{
struct ipoib_dev_priv *child_priv;
struct net_device *netdev = priv->dev;
netif_addr_lock(netdev);
memcpy(&priv->local_gid.global.interface_id,
&gid->global.interface_id,
sizeof(gid->global.interface_id));
memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
netif_addr_unlock(netdev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
down_read(&priv->vlan_rwsem);
list_for_each_entry(child_priv, &priv->child_intfs, list)
set_base_guid(child_priv, gid);
up_read(&priv->vlan_rwsem);
}
}
static int ipoib_check_lladdr(struct net_device *dev,
struct sockaddr_storage *ss)
{
union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
int ret = 0;
netif_addr_lock(dev);
/* Make sure the QPN, reserved and subnet prefix match the current
* lladdr, it also makes sure the lladdr is unicast.
*/
if (memcmp(dev->dev_addr, ss->__data,
4 + sizeof(gid->global.subnet_prefix)) ||
gid->global.interface_id == 0)
ret = -EINVAL;
netif_addr_unlock(dev);
return ret;
}
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
return -EBUSY;
ret = ipoib_check_lladdr(dev, ss);
if (ret)
return ret;
set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
queue_work(ipoib_workqueue, &priv->flush_light);
return 0;
}
static ssize_t create_child(struct device *dev, static ssize_t create_child(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
const char *buf, size_t count) const char *buf, size_t count)
...@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed; goto device_init_failed;
} else } else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
result = ipoib_dev_init(priv->dev, hca, port); result = ipoib_dev_init(priv->dev, hca, port);
if (result < 0) { if (result < 0) {
......
...@@ -64,6 +64,9 @@ struct ipoib_mcast_iter { ...@@ -64,6 +64,9 @@ struct ipoib_mcast_iter {
unsigned int send_only; unsigned int send_only;
}; };
/* join state that allows creating mcg with sendonly member request */
#define SENDONLY_FULLMEMBER_JOIN 8
/* /*
* This should be called with the priv->lock held * This should be called with the priv->lock held
*/ */
...@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work) ...@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task); carrier_on_task);
struct ib_port_attr attr; struct ib_port_attr attr;
int ret;
if (ib_query_port(priv->ca, priv->port, &attr) || if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) { attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return; return;
} }
/*
* Check if can send sendonly MCG's with sendonly-fullmember join state.
* It done here after the successfully join to the broadcast group,
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
ret = ipoib_check_sm_sendonly_fullmember_support(priv);
if (ret < 0)
pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
priv->dev->name, ret);
/* /*
* Take rtnl_lock to avoid racing with ipoib_stop() and * Take rtnl_lock to avoid racing with ipoib_stop() and
...@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) ...@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
rec.hop_limit = priv->broadcast->mcmember.hop_limit; rec.hop_limit = priv->broadcast->mcmember.hop_limit;
/* /*
* Send-only IB Multicast joins do not work at the core * Send-only IB Multicast joins work at the core IB layer but
* IB layer yet, so we can't use them here. However, * require specific SM support.
* we are emulating an Ethernet multicast send, which * We can use such joins here only if the current SM supports that feature.
* does not require a multicast subscription and will * However, if not, we emulate an Ethernet multicast send,
* still send properly. The most appropriate thing to * which does not require a multicast subscription and will
* still send properly. The most appropriate thing to
* do is to create the group if it doesn't exist as that * do is to create the group if it doesn't exist as that
* most closely emulates the behavior, from a user space * most closely emulates the behavior, from a user space
* application perspecitive, of Ethernet multicast * application perspective, of Ethernet multicast operation.
* operation. For now, we do a full join, maybe later
* when the core IB layers support send only joins we
* will use them.
*/ */
#if 0 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) priv->sm_fullmember_sendonly_support)
rec.join_state = 4; /* SM supports sendonly-fullmember, otherwise fallback to full-member */
#endif rec.join_state = SENDONLY_FULLMEMBER_JOIN;
} }
spin_unlock_irq(&priv->lock); spin_unlock_irq(&priv->lock);
...@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) ...@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
return; return;
} }
priv->local_lid = port_attr.lid; priv->local_lid = port_attr.lid;
netif_addr_lock(dev);
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL)) if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
ipoib_warn(priv, "ib_query_gid() failed\n"); netif_addr_unlock(dev);
else return;
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); }
netif_addr_unlock(dev);
spin_lock_irq(&priv->lock); spin_lock_irq(&priv->lock);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
......
...@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler, ...@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
queue_work(ipoib_workqueue, &priv->flush_normal); queue_work(ipoib_workqueue, &priv->flush_normal);
} else if (record->event == IB_EVENT_PKEY_CHANGE) { } else if (record->event == IB_EVENT_PKEY_CHANGE) {
queue_work(ipoib_workqueue, &priv->flush_heavy); queue_work(ipoib_workqueue, &priv->flush_heavy);
} else if (record->event == IB_EVENT_GID_CHANGE &&
!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
queue_work(ipoib_workqueue, &priv->flush_light);
} }
} }
...@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, ...@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->pkey = pkey; priv->pkey = pkey;
memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
priv->dev->broadcast[8] = pkey >> 8; priv->dev->broadcast[8] = pkey >> 8;
priv->dev->broadcast[9] = pkey & 0xff; priv->dev->broadcast[9] = pkey & 0xff;
......
...@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad) ...@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
memset(cif, 0, sizeof(*cif)); memset(cif, 0, sizeof(*cif));
cif->base_version = 1; cif->base_version = 1;
cif->class_version = 1; cif->class_version = 1;
cif->resp_time_value = 20;
ib_set_cpi_resp_time(cif, 20);
mad->mad_hdr.status = 0; mad->mad_hdr.status = 0;
} }
......
...@@ -239,12 +239,15 @@ struct ib_vendor_mad { ...@@ -239,12 +239,15 @@ struct ib_vendor_mad {
#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001) #define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001)
#define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F
#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5
struct ib_class_port_info { struct ib_class_port_info {
u8 base_version; u8 base_version;
u8 class_version; u8 class_version;
__be16 capability_mask; __be16 capability_mask;
u8 reserved[3]; /* 27 bits for cap_mask2, 5 bits for resp_time */
u8 resp_time_value; __be32 cap_mask2_resp_time;
u8 redirect_gid[16]; u8 redirect_gid[16];
__be32 redirect_tcslfl; __be32 redirect_tcslfl;
__be16 redirect_lid; __be16 redirect_lid;
...@@ -259,6 +262,59 @@ struct ib_class_port_info { ...@@ -259,6 +262,59 @@ struct ib_class_port_info {
__be32 trap_qkey; __be32 trap_qkey;
}; };
/**
* ib_get_cpi_resp_time - Returns the resp_time value from
* cap_mask2_resp_time in ib_class_port_info.
* @cpi: A struct ib_class_port_info mad.
*/
static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi)
{
return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) &
IB_CLASS_PORT_INFO_RESP_TIME_MASK);
}
/**
* ib_set_cpi_resptime - Sets the response time in an
* ib_class_port_info mad.
* @cpi: A struct ib_class_port_info.
* @rtime: The response time to set.
*/
static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi,
u8 rtime)
{
cpi->cap_mask2_resp_time =
(cpi->cap_mask2_resp_time &
cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK);
}
/**
* ib_get_cpi_capmask2 - Returns the capmask2 value from
* cap_mask2_resp_time in ib_class_port_info.
* @cpi: A struct ib_class_port_info mad.
*/
static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi)
{
return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
}
/**
* ib_set_cpi_capmask2 - Sets the capmask2 in an
* ib_class_port_info mad.
* @cpi: A struct ib_class_port_info.
* @capmask2: The capmask2 to set.
*/
static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
u32 capmask2)
{
cpi->cap_mask2_resp_time =
(cpi->cap_mask2_resp_time &
cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
cpu_to_be32(capmask2 <<
IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
}
struct ib_mad_notice_attr { struct ib_mad_notice_attr {
u8 generic_type; u8 generic_type;
u8 prod_type_msb; u8 prod_type_msb;
......
...@@ -94,6 +94,8 @@ enum ib_sa_selector { ...@@ -94,6 +94,8 @@ enum ib_sa_selector {
IB_SA_BEST = 3 IB_SA_BEST = 3
}; };
#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)
/* /*
* Structures for SA records are named "struct ib_sa_xxx_rec." No * Structures for SA records are named "struct ib_sa_xxx_rec." No
* attempt is made to pack structures to match the physical layout of * attempt is made to pack structures to match the physical layout of
...@@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client, ...@@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context, void *context,
struct ib_sa_query **sa_query); struct ib_sa_query **sa_query);
/* Support get SA ClassPortInfo */
int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_class_port_info *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
#endif /* IB_SA_H */ #endif /* IB_SA_H */
...@@ -135,10 +135,12 @@ enum { ...@@ -135,10 +135,12 @@ enum {
* Local service operations: * Local service operations:
* RESOLVE - The client requests the local service to resolve a path. * RESOLVE - The client requests the local service to resolve a path.
* SET_TIMEOUT - The local service requests the client to set the timeout. * SET_TIMEOUT - The local service requests the client to set the timeout.
* IP_RESOLVE - The client requests the local service to resolve an IP to GID.
*/ */
enum { enum {
RDMA_NL_LS_OP_RESOLVE = 0, RDMA_NL_LS_OP_RESOLVE = 0,
RDMA_NL_LS_OP_SET_TIMEOUT, RDMA_NL_LS_OP_SET_TIMEOUT,
RDMA_NL_LS_OP_IP_RESOLVE,
RDMA_NL_LS_NUM_OPS RDMA_NL_LS_NUM_OPS
}; };
...@@ -176,6 +178,10 @@ struct rdma_ls_resolve_header { ...@@ -176,6 +178,10 @@ struct rdma_ls_resolve_header {
__u8 path_use; __u8 path_use;
}; };
struct rdma_ls_ip_resolve_header {
__u32 ifindex;
};
/* Local service attribute type */ /* Local service attribute type */
#define RDMA_NLA_F_MANDATORY (1 << 13) #define RDMA_NLA_F_MANDATORY (1 << 13)
#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \ #define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \
...@@ -193,6 +199,8 @@ struct rdma_ls_resolve_header { ...@@ -193,6 +199,8 @@ struct rdma_ls_resolve_header {
* TCLASS u8 * TCLASS u8
* PKEY u16 cpu * PKEY u16 cpu
* QOS_CLASS u16 cpu * QOS_CLASS u16 cpu
* IPV4 u32 BE
* IPV6 u8[16] BE
*/ */
enum { enum {
LS_NLA_TYPE_UNSPEC = 0, LS_NLA_TYPE_UNSPEC = 0,
...@@ -204,6 +212,8 @@ enum { ...@@ -204,6 +212,8 @@ enum {
LS_NLA_TYPE_TCLASS, LS_NLA_TYPE_TCLASS,
LS_NLA_TYPE_PKEY, LS_NLA_TYPE_PKEY,
LS_NLA_TYPE_QOS_CLASS, LS_NLA_TYPE_QOS_CLASS,
LS_NLA_TYPE_IPV4,
LS_NLA_TYPE_IPV6,
LS_NLA_TYPE_MAX LS_NLA_TYPE_MAX
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment