Commit 335a64a5 authored by Or Gerlitz's avatar Or Gerlitz Committed by Roland Dreier

IPoIB: Allow setting policy to ignore multicast groups

The kernel IB stack allows (through the RDMA CM) userspace
applications to join and use multicast groups from the IPoIB MGID
range.  This allows multicast traffic to be handled directly from
userspace QPs, without going through the kernel stack, which gives
better performance for some applications.

However, to fully interoperate with IP multicast, such userspace
applications need to participate in IGMP reports and queries, or else
routers may not forward the multicast traffic to the system where the
application is running.  The simplest way to do this is to share the
kernel IGMP implementation by using the IP_ADD_MEMBERSHIP option to
join multicast groups that are being handled directly in userspace.

However, in such cases, the actual multicast traffic should not also
be handled by the IPoIB interface, because that would burn resources
handling multicast packets that will just be discarded in the kernel.

To handle this, this patch adds lookup on the database used for IB
multicast group reference counting when IPoIB is joining multicast
groups, and if a multicast group is already handled by user space,
then the IPoIB kernel driver ignores the group.  This is controlled by
a per-interface policy flag.  When the flag is set, IPoIB will not
join and attach its QP to a multicast group which already has an entry
in the database; when the flag is cleared, IPoIB will behave as before
this change.

For each IPoIB interface, the /sys/class/net/$intf/umcast attribute
controls the policy flag.  The default value is off/0.
Signed-off-by: default avatarOr Gerlitz <ogerlitz@voltaire.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 55a98e95
...@@ -86,6 +86,7 @@ enum { ...@@ -86,6 +86,7 @@ enum {
IPOIB_MCAST_STARTED = 8, IPOIB_MCAST_STARTED = 8,
IPOIB_FLAG_NETIF_STOPPED = 9, IPOIB_FLAG_NETIF_STOPPED = 9,
IPOIB_FLAG_ADMIN_CM = 10, IPOIB_FLAG_ADMIN_CM = 10,
IPOIB_FLAG_UMCAST = 11,
IPOIB_MAX_BACKOFF_SECONDS = 16, IPOIB_MAX_BACKOFF_SECONDS = 16,
...@@ -384,6 +385,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) ...@@ -384,6 +385,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
int ipoib_open(struct net_device *dev); int ipoib_open(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
void ipoib_send(struct net_device *dev, struct sk_buff *skb, void ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_ah *address, u32 qpn); struct ipoib_ah *address, u32 qpn);
......
...@@ -1019,6 +1019,37 @@ static ssize_t show_pkey(struct device *dev, ...@@ -1019,6 +1019,37 @@ static ssize_t show_pkey(struct device *dev,
} }
static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
static ssize_t show_umcast(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
}
static ssize_t set_umcast(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
if (umcast_val > 0) {
set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
ipoib_warn(priv, "ignoring multicast groups joined directly "
"by userspace\n");
} else
clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
return count;
}
static DEVICE_ATTR(umcast, S_IWUSR | S_IRUGO, show_umcast, set_umcast);
int ipoib_add_umcast_attr(struct net_device *dev)
{
return device_create_file(&dev->dev, &dev_attr_umcast);
}
static ssize_t create_child(struct device *dev, static ssize_t create_child(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
const char *buf, size_t count) const char *buf, size_t count)
...@@ -1136,6 +1167,8 @@ static struct net_device *ipoib_add_port(const char *format, ...@@ -1136,6 +1167,8 @@ static struct net_device *ipoib_add_port(const char *format,
goto sysfs_failed; goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev)) if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed; goto sysfs_failed;
if (ipoib_add_umcast_attr(priv->dev))
goto sysfs_failed;
if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
goto sysfs_failed; goto sysfs_failed;
if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
......
...@@ -761,6 +761,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ...@@ -761,6 +761,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
struct ipoib_mcast *mcast, *tmcast; struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list); LIST_HEAD(remove_list);
unsigned long flags; unsigned long flags;
struct ib_sa_mcmember_rec rec;
ipoib_dbg_mcast(priv, "restarting multicast task\n"); ipoib_dbg_mcast(priv, "restarting multicast task\n");
...@@ -794,6 +795,14 @@ void ipoib_mcast_restart_task(struct work_struct *work) ...@@ -794,6 +795,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
struct ipoib_mcast *nmcast; struct ipoib_mcast *nmcast;
/* ignore group which is directly joined by userspace */
if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
!ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid "
IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
continue;
}
/* Not found or send-only group, let's add a new entry */ /* Not found or send-only group, let's add a new entry */
ipoib_dbg_mcast(priv, "adding multicast entry for mgid " ipoib_dbg_mcast(priv, "adding multicast entry for mgid "
IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid));
......
...@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ...@@ -119,6 +119,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
goto sysfs_failed; goto sysfs_failed;
if (ipoib_add_pkey_attr(priv->dev)) if (ipoib_add_pkey_attr(priv->dev))
goto sysfs_failed; goto sysfs_failed;
if (ipoib_add_umcast_attr(priv->dev))
goto sysfs_failed;
if (device_create_file(&priv->dev->dev, &dev_attr_parent)) if (device_create_file(&priv->dev->dev, &dev_attr_parent))
goto sysfs_failed; goto sysfs_failed;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment