Commit e6f61130 authored by Doug Ledford's avatar Doug Ledford

Merge branches 'misc-4.7-2', 'ipoib' and 'ib-router' into k.o/for-4.7

infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o
user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \
ib_cm.o iw_cm.o ib_addr.o \
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
......@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
ib_sa-y := sa_query.o multicast.o
ib_cm-y := cm.o
iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
......@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
ib_umad-y := user_mad.o
ib_ucm-y := ucm.o
......
This diff is collapsed.
......@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
return _upper == upper;
}
int addr_init(void);
void addr_cleanup(void);
int ib_mad_init(void);
void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb);
int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
struct netlink_callback *cb);
#endif /* _CORE_PRIV_H */
......@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_IP_RESOLVE] = {
.dump = ib_nl_handle_ip_res_resp,
.module = THIS_MODULE },
};
static int ib_add_ibnl_clients(void)
{
return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
ibnl_ls_cb_table);
}
static void ib_remove_ibnl_clients(void)
{
ibnl_remove_client(RDMA_NL_LS);
}
static int __init ib_core_init(void)
{
int ret;
......@@ -983,10 +1006,41 @@ static int __init ib_core_init(void)
goto err_sysfs;
}
ret = addr_init();
if (ret) {
pr_warn("Could't init IB address resolution\n");
goto err_ibnl;
}
ret = ib_mad_init();
if (ret) {
pr_warn("Couldn't init IB MAD\n");
goto err_addr;
}
ret = ib_sa_init();
if (ret) {
pr_warn("Couldn't init SA\n");
goto err_mad;
}
if (ib_add_ibnl_clients()) {
pr_warn("Couldn't register ibnl clients\n");
goto err_sa;
}
ib_cache_setup();
return 0;
err_sa:
ib_sa_cleanup();
err_mad:
ib_mad_cleanup();
err_addr:
addr_cleanup();
err_ibnl:
ibnl_cleanup();
err_sysfs:
class_unregister(&ib_class);
err_comp:
......@@ -999,6 +1053,10 @@ static int __init ib_core_init(void)
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
ib_remove_ibnl_clients();
ib_sa_cleanup();
ib_mad_cleanup();
addr_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq);
......
......@@ -47,11 +47,7 @@
#include "smi.h"
#include "opa_smi.h"
#include "agent.h"
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("kernel IB MAD API");
MODULE_AUTHOR("Hal Rosenstock");
MODULE_AUTHOR("Sean Hefty");
#include "core_priv.h"
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
......@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = {
.remove = ib_mad_remove_device
};
static int __init ib_mad_init_module(void)
int ib_mad_init(void)
{
mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
......@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void)
return 0;
}
static void __exit ib_mad_cleanup_module(void)
void ib_mad_cleanup(void)
{
ib_unregister_client(&mad_client);
}
module_init(ib_mad_init_module);
module_exit(ib_mad_cleanup_module);
......@@ -93,6 +93,18 @@ enum {
struct mcast_member;
/*
* There are 4 types of join states:
* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
*/
enum {
FULLMEMBER_JOIN,
NONMEMBER_JOIN,
SENDONLY_NONMEBER_JOIN,
SENDONLY_FULLMEMBER_JOIN,
NUM_JOIN_MEMBERSHIP_TYPES,
};
struct mcast_group {
struct ib_sa_mcmember_rec rec;
struct rb_node node;
......@@ -102,7 +114,7 @@ struct mcast_group {
struct list_head pending_list;
struct list_head active_list;
struct mcast_member *last_join;
int members[3];
int members[NUM_JOIN_MEMBERSHIP_TYPES];
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
......@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member)
}
/*
* A multicast group has three types of members: full member, non member, and
* send only member. We need to keep track of the number of members of each
* A multicast group has four types of members: full member, non member,
* sendonly non member and sendonly full member.
* We need to keep track of the number of members of each
* type based on their join state. Adjust the number of members the belong to
* the specified join states.
*/
......@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{
int i;
for (i = 0; i < 3; i++, join_state >>= 1)
for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1)
if (join_state & 0x1)
group->members[i] += inc;
}
......@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group)
u8 leave_state = 0;
int i;
for (i = 0; i < 3; i++)
for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++)
if (!group->members[i])
leave_state |= (0x1 << i);
......
......@@ -53,10 +53,6 @@
#include "sa.h"
#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
MODULE_LICENSE("Dual BSD/GPL");
#define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100
#define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000
#define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000
......@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query {
struct ib_sa_query sa_query;
};
struct ib_sa_classport_info_query {
void (*callback)(int, struct ib_class_port_info *, void *);
void *context;
struct ib_sa_query sa_query;
};
struct ib_sa_mcmember_query {
void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
void *context;
......@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = {
.size_bits = 2*64 },
};
#define CLASSPORTINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_class_port_info, field), \
.struct_size_bytes = sizeof((struct ib_class_port_info *)0)->field, \
.field_name = "ib_class_port_info:" #field
static const struct ib_field classport_info_rec_table[] = {
{ CLASSPORTINFO_REC_FIELD(base_version),
.offset_words = 0,
.offset_bits = 0,
.size_bits = 8 },
{ CLASSPORTINFO_REC_FIELD(class_version),
.offset_words = 0,
.offset_bits = 8,
.size_bits = 8 },
{ CLASSPORTINFO_REC_FIELD(capability_mask),
.offset_words = 0,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
.offset_words = 1,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_gid),
.offset_words = 2,
.offset_bits = 0,
.size_bits = 128 },
{ CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
.offset_words = 6,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_lid),
.offset_words = 7,
.offset_bits = 0,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(redirect_pkey),
.offset_words = 7,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(redirect_qp),
.offset_words = 8,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(redirect_qkey),
.offset_words = 9,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_gid),
.offset_words = 10,
.offset_bits = 0,
.size_bits = 128 },
{ CLASSPORTINFO_REC_FIELD(trap_tcslfl),
.offset_words = 14,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_lid),
.offset_words = 15,
.offset_bits = 0,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(trap_pkey),
.offset_words = 15,
.offset_bits = 16,
.size_bits = 16 },
{ CLASSPORTINFO_REC_FIELD(trap_hlqp),
.offset_words = 16,
.offset_bits = 0,
.size_bits = 32 },
{ CLASSPORTINFO_REC_FIELD(trap_qkey),
.offset_words = 17,
.offset_bits = 0,
.size_bits = 32 },
};
#define GUIDINFO_REC_FIELD(field) \
.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
.struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
......@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work)
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
}
static int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb)
int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
int timeout, delta, abs_delta;
......@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
return 1;
}
static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb)
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
unsigned long flags;
......@@ -838,15 +916,6 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return skb->len;
}
static struct ibnl_client_cbs ib_sa_cb_table[] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.dump = ib_nl_handle_resolve_resp,
.module = THIS_MODULE },
[RDMA_NL_LS_OP_SET_TIMEOUT] = {
.dump = ib_nl_handle_set_timeout,
.module = THIS_MODULE },
};
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
......@@ -1645,6 +1714,97 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
}
EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
/* Support get SA ClassPortInfo */
static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
int status,
struct ib_sa_mad *mad)
{
struct ib_sa_classport_info_query *query =
container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
if (mad) {
struct ib_class_port_info rec;
ib_unpack(classport_info_rec_table,
ARRAY_SIZE(classport_info_rec_table),
mad->data, &rec);
query->callback(status, &rec, query->context);
} else {
query->callback(status, NULL, query->context);
}
}
static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
{
kfree(container_of(sa_query, struct ib_sa_classport_info_query,
sa_query));
}
int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_class_port_info *resp,
void *context),
void *context,
struct ib_sa_query **sa_query)
{
struct ib_sa_classport_info_query *query;
struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
struct ib_sa_port *port;
struct ib_mad_agent *agent;
struct ib_sa_mad *mad;
int ret;
if (!sa_dev)
return -ENODEV;
port = &sa_dev->port[port_num - sa_dev->start_port];
agent = port->agent;
query = kzalloc(sizeof(*query), gfp_mask);
if (!query)
return -ENOMEM;
query->sa_query.port = port;
ret = alloc_mad(&query->sa_query, gfp_mask);
if (ret)
goto err1;
ib_sa_client_get(client);
query->sa_query.client = client;
query->callback = callback;
query->context = context;
mad = query->sa_query.mad_buf->mad;
init_mad(mad, agent);
query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
query->sa_query.release = ib_sa_portclass_info_rec_release;
/* support GET only */
mad->mad_hdr.method = IB_MGMT_METHOD_GET;
mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
mad->sa_hdr.comp_mask = 0;
*sa_query = &query->sa_query;
ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
if (ret < 0)
goto err2;
return ret;
err2:
*sa_query = NULL;
ib_sa_client_put(query->sa_query.client);
free_mad(&query->sa_query);
err1:
kfree(query);
return ret;
}
EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
......@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
kfree(sa_dev);
}
static int __init ib_sa_init(void)
int ib_sa_init(void)
{
int ret;
......@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void)
goto err3;
}
if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
ib_sa_cb_table)) {
pr_err("Failed to add netlink callback\n");
ret = -EINVAL;
goto err4;
}
INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
return 0;
err4:
destroy_workqueue(ib_nl_wq);
err3:
mcast_cleanup();
err2:
......@@ -1839,9 +1992,8 @@ static int __init ib_sa_init(void)
return ret;
}
static void __exit ib_sa_cleanup(void)
void ib_sa_cleanup(void)
{
ibnl_remove_client(RDMA_NL_LS);
cancel_delayed_work(&ib_nl_timed_work);
flush_workqueue(ib_nl_wq);
destroy_workqueue(ib_nl_wq);
......@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void)
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
module_init(ib_sa_init);
module_exit(ib_sa_cleanup);
......@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp,
* Set the most significant bit of CM2 to indicate support for
* congestion statistics
*/
p->reserved[0] = dd->psxmitwait_supported << 7;
ib_set_cpi_capmask2(p,
dd->psxmitwait_supported <<
(31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE));
/*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/
p->resp_time_value = 18;
ib_set_cpi_resp_time(p, 18);
return reply((struct ib_smp *) pmp);
}
......
......@@ -92,6 +92,8 @@ enum {
IPOIB_FLAG_UMCAST = 10,
IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_FLAG_DEV_ADDR_SET = 13,
IPOIB_FLAG_DEV_ADDR_CTRL = 14,
IPOIB_MAX_BACKOFF_SECONDS = 16,
......@@ -392,6 +394,7 @@ struct ipoib_dev_priv {
struct ipoib_ethtool_st ethtool;
struct timer_list poll_timer;
unsigned max_send_sge;
bool sm_fullmember_sendonly_support;
};
struct ipoib_ah {
......@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work);
void ipoib_mark_paths_invalid(struct net_device *dev);
void ipoib_flush_paths(struct net_device *dev);
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
......
......@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
return 0;
}
/*
* returns true if the device address of the ipoib interface has changed and the
* new address is a valid one (i.e in the gid table), return false otherwise.
*/
static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
{
union ib_gid search_gid;
union ib_gid gid0;
union ib_gid *netdev_gid;
int err;
u16 index;
u8 port;
bool ret = false;
netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
return false;
netif_addr_lock(priv->dev);
/* The subnet prefix may have changed, update it now so we won't have
* to do it later
*/
priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
search_gid.global.interface_id = priv->local_gid.global.interface_id;
netif_addr_unlock(priv->dev);
err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
priv->dev, &port, &index);
netif_addr_lock(priv->dev);
if (search_gid.global.interface_id !=
priv->local_gid.global.interface_id)
/* There was a change while we were looking up the gid, bail
* here and let the next work sort this out
*/
goto out;
/* The next section of code needs some background:
* Per IB spec the port GUID can't change if the HCA is powered on.
* port GUID is the basis for GID at index 0 which is the basis for
* the default device address of a ipoib interface.
*
* so it seems the flow should be:
* if user_changed_dev_addr && gid in gid tbl
* set bit dev_addr_set
* return true
* else
* return false
*
* The issue is that there are devices that don't follow the spec,
* they change the port GUID when the HCA is powered, so in order
* not to break userspace applications, We need to check if the
* user wanted to control the device address and we assume that
* if he sets the device address back to be based on GID index 0,
* he no longer wishs to control it.
*
* If the user doesn't control the the device address,
* IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
* the port GUID has changed and GID at index 0 has changed
* so we need to change priv->local_gid and priv->dev->dev_addr
* to reflect the new GID.
*/
if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
if (!err && port == priv->port) {
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
if (index == 0)
clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
&priv->flags);
else
set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
ret = true;
} else {
ret = false;
}
} else {
if (!err && port == priv->port) {
ret = true;
} else {
if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
memcpy(&priv->local_gid, &gid0,
sizeof(priv->local_gid));
memcpy(priv->dev->dev_addr + 4, &gid0,
sizeof(priv->local_gid));
ret = true;
}
}
}
out:
netif_addr_unlock(priv->dev);
return ret;
}
static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
enum ipoib_flush_level level,
int nesting)
......@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
level != IPOIB_FLUSH_HEAVY) {
/* Make sure the dev_addr is set even if not flushing */
if (level == IPOIB_FLUSH_LIGHT)
ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
......@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
update_parent_pkey(priv);
else
update_child_pkey(priv);
}
} else if (level == IPOIB_FLUSH_LIGHT)
ipoib_dev_addr_changed_valid(priv);
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
return;
}
......@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
if (ipoib_dev_addr_changed_valid(priv))
ipoib_mcast_restart_task(&priv->restart_task);
}
}
......
......@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
struct ib_device *dev, u8 port, u16 pkey,
const union ib_gid *gid, const struct sockaddr *addr,
void *client_data);
static int ipoib_set_mac(struct net_device *dev, void *addr);
static struct ib_client ipoib_client = {
.name = "ipoib",
......@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
priv->sm_fullmember_sendonly_support = false;
if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
......@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
spin_unlock_irq(&priv->lock);
}
struct classport_info_context {
struct ipoib_dev_priv *priv;
struct completion done;
struct ib_sa_query *sa_query;
};
static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
void *context)
{
struct classport_info_context *cb_ctx = context;
struct ipoib_dev_priv *priv;
WARN_ON(!context);
priv = cb_ctx->priv;
if (status || !rec) {
pr_debug("device: %s failed query classport_info status: %d\n",
priv->dev->name, status);
/* keeps the default, will try next mcast_restart */
priv->sm_fullmember_sendonly_support = false;
goto out;
}
if (ib_get_cpi_capmask2(rec) &
IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
priv->dev->name);
priv->sm_fullmember_sendonly_support = true;
} else {
pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
priv->dev->name);
priv->sm_fullmember_sendonly_support = false;
}
out:
complete(&cb_ctx->done);
}
int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
{
struct classport_info_context *callback_context;
int ret;
callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
if (!callback_context)
return -ENOMEM;
callback_context->priv = priv;
init_completion(&callback_context->done);
ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
priv->ca, priv->port, 3000,
GFP_KERNEL,
classport_info_query_cb,
callback_context,
&callback_context->sa_query);
if (ret < 0) {
pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
priv->dev->name, ret);
kfree(callback_context);
return ret;
}
/* waiting for the callback to finish before returnning */
wait_for_completion(&callback_context->done);
kfree(callback_context);
return ret;
}
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
......@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
.ndo_get_vf_config = ipoib_get_vf_config,
.ndo_get_vf_stats = ipoib_get_vf_stats,
.ndo_set_vf_guid = ipoib_set_vf_guid,
.ndo_set_mac_address = ipoib_set_mac,
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
......@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_umcast);
}
static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
{
struct ipoib_dev_priv *child_priv;
struct net_device *netdev = priv->dev;
netif_addr_lock(netdev);
memcpy(&priv->local_gid.global.interface_id,
&gid->global.interface_id,
sizeof(gid->global.interface_id));
memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
netif_addr_unlock(netdev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
down_read(&priv->vlan_rwsem);
list_for_each_entry(child_priv, &priv->child_intfs, list)
set_base_guid(child_priv, gid);
up_read(&priv->vlan_rwsem);
}
}
static int ipoib_check_lladdr(struct net_device *dev,
struct sockaddr_storage *ss)
{
union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
int ret = 0;
netif_addr_lock(dev);
/* Make sure the QPN, reserved and subnet prefix match the current
* lladdr, it also makes sure the lladdr is unicast.
*/
if (memcmp(dev->dev_addr, ss->__data,
4 + sizeof(gid->global.subnet_prefix)) ||
gid->global.interface_id == 0)
ret = -EINVAL;
netif_addr_unlock(dev);
return ret;
}
static int ipoib_set_mac(struct net_device *dev, void *addr)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sockaddr_storage *ss = addr;
int ret;
if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
return -EBUSY;
ret = ipoib_check_lladdr(dev, ss);
if (ret)
return ret;
set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
queue_work(ipoib_workqueue, &priv->flush_light);
return 0;
}
static ssize_t create_child(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
......@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
} else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
result = ipoib_dev_init(priv->dev, hca, port);
if (result < 0) {
......
......@@ -64,6 +64,9 @@ struct ipoib_mcast_iter {
unsigned int send_only;
};
/* join state that allows creating mcg with sendonly member request */
#define SENDONLY_FULLMEMBER_JOIN 8
/*
* This should be called with the priv->lock held
*/
......@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
carrier_on_task);
struct ib_port_attr attr;
int ret;
if (ib_query_port(priv->ca, priv->port, &attr) ||
attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
return;
}
/*
* Check if can send sendonly MCG's with sendonly-fullmember join state.
* It done here after the successfully join to the broadcast group,
* because the broadcast group must always be joined first and is always
* re-joined if the SM changes substantially.
*/
ret = ipoib_check_sm_sendonly_fullmember_support(priv);
if (ret < 0)
pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
priv->dev->name, ret);
/*
* Take rtnl_lock to avoid racing with ipoib_stop() and
......@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
rec.hop_limit = priv->broadcast->mcmember.hop_limit;
/*
* Send-only IB Multicast joins do not work at the core
* IB layer yet, so we can't use them here. However,
* we are emulating an Ethernet multicast send, which
* does not require a multicast subscription and will
* still send properly. The most appropriate thing to
* Send-only IB Multicast joins work at the core IB layer but
* require specific SM support.
* We can use such joins here only if the current SM supports that feature.
* However, if not, we emulate an Ethernet multicast send,
* which does not require a multicast subscription and will
* still send properly. The most appropriate thing to
* do is to create the group if it doesn't exist as that
* most closely emulates the behavior, from a user space
* application perspecitive, of Ethernet multicast
* operation. For now, we do a full join, maybe later
* when the core IB layers support send only joins we
* will use them.
* application perspective, of Ethernet multicast operation.
*/
#if 0
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
rec.join_state = 4;
#endif
if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
priv->sm_fullmember_sendonly_support)
/* SM supports sendonly-fullmember, otherwise fallback to full-member */
rec.join_state = SENDONLY_FULLMEMBER_JOIN;
}
spin_unlock_irq(&priv->lock);
......@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
}
priv->local_lid = port_attr.lid;
netif_addr_lock(dev);
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
netif_addr_unlock(dev);
return;
}
netif_addr_unlock(dev);
spin_lock_irq(&priv->lock);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
......
......@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
queue_work(ipoib_workqueue, &priv->flush_normal);
} else if (record->event == IB_EVENT_PKEY_CHANGE) {
queue_work(ipoib_workqueue, &priv->flush_heavy);
} else if (record->event == IB_EVENT_GID_CHANGE &&
!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
queue_work(ipoib_workqueue, &priv->flush_light);
}
}
......@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->pkey = pkey;
memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
priv->dev->broadcast[8] = pkey >> 8;
priv->dev->broadcast[9] = pkey & 0xff;
......
......@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
memset(cif, 0, sizeof(*cif));
cif->base_version = 1;
cif->class_version = 1;
cif->resp_time_value = 20;
ib_set_cpi_resp_time(cif, 20);
mad->mad_hdr.status = 0;
}
......
......@@ -239,12 +239,15 @@ struct ib_vendor_mad {
#define IB_MGMT_CLASSPORTINFO_ATTR_ID cpu_to_be16(0x0001)
#define IB_CLASS_PORT_INFO_RESP_TIME_MASK 0x1F
#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5
struct ib_class_port_info {
u8 base_version;
u8 class_version;
__be16 capability_mask;
u8 reserved[3];
u8 resp_time_value;
/* 27 bits for cap_mask2, 5 bits for resp_time */
__be32 cap_mask2_resp_time;
u8 redirect_gid[16];
__be32 redirect_tcslfl;
__be16 redirect_lid;
......@@ -259,6 +262,59 @@ struct ib_class_port_info {
__be32 trap_qkey;
};
/**
* ib_get_cpi_resp_time - Returns the resp_time value from
* cap_mask2_resp_time in ib_class_port_info.
* @cpi: A struct ib_class_port_info mad.
*/
static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi)
{
return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) &
IB_CLASS_PORT_INFO_RESP_TIME_MASK);
}
/**
* ib_set_cpi_resptime - Sets the response time in an
* ib_class_port_info mad.
* @cpi: A struct ib_class_port_info.
* @rtime: The response time to set.
*/
static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi,
u8 rtime)
{
cpi->cap_mask2_resp_time =
(cpi->cap_mask2_resp_time &
cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK);
}
/**
* ib_get_cpi_capmask2 - Returns the capmask2 value from
* cap_mask2_resp_time in ib_class_port_info.
* @cpi: A struct ib_class_port_info mad.
*/
static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi)
{
return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
}
/**
* ib_set_cpi_capmask2 - Sets the capmask2 in an
* ib_class_port_info mad.
* @cpi: A struct ib_class_port_info.
* @capmask2: The capmask2 to set.
*/
static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
u32 capmask2)
{
cpi->cap_mask2_resp_time =
(cpi->cap_mask2_resp_time &
cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
cpu_to_be32(capmask2 <<
IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
}
struct ib_mad_notice_attr {
u8 generic_type;
u8 prod_type_msb;
......
......@@ -94,6 +94,8 @@ enum ib_sa_selector {
IB_SA_BEST = 3
};
#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT BIT(12)
/*
* Structures for SA records are named "struct ib_sa_xxx_rec." No
* attempt is made to pack structures to match the physical layout of
......@@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
void *context,
struct ib_sa_query **sa_query);
/* Support get SA ClassPortInfo */
int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num,
int timeout_ms, gfp_t gfp_mask,
void (*callback)(int status,
struct ib_class_port_info *resp,
void *context),
void *context,
struct ib_sa_query **sa_query);
#endif /* IB_SA_H */
......@@ -135,10 +135,12 @@ enum {
* Local service operations:
* RESOLVE - The client requests the local service to resolve a path.
* SET_TIMEOUT - The local service requests the client to set the timeout.
* IP_RESOLVE - The client requests the local service to resolve an IP to GID.
*/
enum {
RDMA_NL_LS_OP_RESOLVE = 0,
RDMA_NL_LS_OP_SET_TIMEOUT,
RDMA_NL_LS_OP_IP_RESOLVE,
RDMA_NL_LS_NUM_OPS
};
......@@ -176,6 +178,10 @@ struct rdma_ls_resolve_header {
__u8 path_use;
};
struct rdma_ls_ip_resolve_header {
__u32 ifindex;
};
/* Local service attribute type */
#define RDMA_NLA_F_MANDATORY (1 << 13)
#define RDMA_NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \
......@@ -193,6 +199,8 @@ struct rdma_ls_resolve_header {
* TCLASS u8
* PKEY u16 cpu
* QOS_CLASS u16 cpu
* IPV4 u32 BE
* IPV6 u8[16] BE
*/
enum {
LS_NLA_TYPE_UNSPEC = 0,
......@@ -204,6 +212,8 @@ enum {
LS_NLA_TYPE_TCLASS,
LS_NLA_TYPE_PKEY,
LS_NLA_TYPE_QOS_CLASS,
LS_NLA_TYPE_IPV4,
LS_NLA_TYPE_IPV6,
LS_NLA_TYPE_MAX
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment