Commit da65e9ff authored by Shay Drory's avatar Shay Drory Committed by Jakub Kicinski

devlink: Expose port function commands to control RoCE

Expose port function commands to enable / disable RoCE, this is used to
control the port RoCE device capabilities.

When RoCE is disabled for a function of the port, function cannot create
any RoCE specific resources (e.g GID table).
It also saves system memory utilization. For example disabling RoCE enable a
VF/SF saves 1 Mbytes of system memory per function.

Example of a PCI VF port which supports function configuration:
Set RoCE of the VF's port function.

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
vfnum 1
    function:
        hw_addr 00:00:00:00:00:00 roce enable

$ devlink port function set pci/0000:06:00.0/2 roce disable

$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0
vfnum 1
    function:
        hw_addr 00:00:00:00:00:00 roce disable
Signed-off-by: default avatarShay Drory <shayd@nvidia.com>
Reviewed-by: default avatarJiri Pirko <jiri@nvidia.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 875cd5ee
......@@ -110,7 +110,7 @@ devlink ports for both the controllers.
Function configuration
======================
A user can configure the function attribute before enumerating the PCI
Users can configure one or more function attributes before enumerating the PCI
function. Usually it means, user should configure function attribute
before a bus specific device for the function is created. However, when
SRIOV is enabled, virtual function devices are created on the PCI bus.
......@@ -122,6 +122,9 @@ A user may set the hardware address of the function using
`devlink port function set hw_addr` command. For Ethernet port function
this means a MAC address.
Users may also set the RoCE capability of the function using
`devlink port function set roce` command.
Function attributes
===================
......@@ -162,6 +165,35 @@ device created for the PCI VF/SF.
function:
hw_addr 00:00:00:00:88:88
RoCE capability setup
---------------------
Not all PCI VFs/SFs require RoCE capability.
When RoCE capability is disabled, it saves system memory per PCI VF/SF.
When user disables RoCE capability for a VF/SF, user application cannot send or
receive any RoCE packets through this VF/SF and RoCE GID table for this PCI
will be empty.
When RoCE capability is disabled in the device using port function attribute,
VF/SF driver cannot override it.
- Get RoCE capability of the VF device::
$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
function:
hw_addr 00:00:00:00:00:00 roce enable
- Set RoCE capability of the VF device::
$ devlink port function set pci/0000:06:00.0/2 roce disable
$ devlink port show pci/0000:06:00.0/2
pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1
function:
hw_addr 00:00:00:00:00:00 roce disable
Subfunction
============
......
......@@ -1451,6 +1451,24 @@ struct devlink_ops {
int (*port_function_hw_addr_set)(struct devlink_port *port,
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
* @port_fn_roce_get: Port function's roce get function.
*
* Query RoCE state of a function managed by the devlink port.
* Return -EOPNOTSUPP if port function RoCE handling is not supported.
*/
int (*port_fn_roce_get)(struct devlink_port *devlink_port,
bool *is_enable,
struct netlink_ext_ack *extack);
/**
* @port_fn_roce_set: Port function's roce set function.
*
* Enable/Disable the RoCE state of a function managed by the devlink
* port.
* Return -EOPNOTSUPP if port function RoCE handling is not supported.
*/
int (*port_fn_roce_set)(struct devlink_port *devlink_port,
bool enable, struct netlink_ext_ack *extack);
/**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
......
......@@ -658,11 +658,21 @@ enum devlink_resource_unit {
DEVLINK_RESOURCE_UNIT_ENTRY,
};
enum devlink_port_fn_attr_cap {
DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT,
/* Add new caps above */
__DEVLINK_PORT_FN_ATTR_CAPS_MAX,
};
#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT)
enum devlink_port_function_attr {
DEVLINK_PORT_FUNCTION_ATTR_UNSPEC,
DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */
DEVLINK_PORT_FN_ATTR_STATE, /* u8 */
DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */
DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */
__DEVLINK_PORT_FUNCTION_ATTR_MAX,
DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1
......
......@@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
#define DEVLINK_PORT_FN_CAPS_VALID_MASK \
(_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1)
static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = {
[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY },
[DEVLINK_PORT_FN_ATTR_STATE] =
NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE,
DEVLINK_PORT_FN_STATE_ACTIVE),
[DEVLINK_PORT_FN_ATTR_CAPS] =
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
......@@ -680,6 +685,60 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb,
return 0;
}
static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
u32 cap, bool is_enable)
{
caps->selector |= cap;
if (is_enable)
caps->value |= cap;
}
static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
struct devlink_port *devlink_port,
struct nla_bitfield32 *caps,
struct netlink_ext_ack *extack)
{
bool is_enable;
int err;
if (!ops->port_fn_roce_get)
return 0;
err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable);
return 0;
}
static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
struct devlink_port *devlink_port,
struct sk_buff *msg,
struct netlink_ext_ack *extack,
bool *msg_updated)
{
struct nla_bitfield32 caps = {};
int err;
err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
if (err)
return err;
if (!caps.selector)
return 0;
err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value,
caps.selector);
if (err)
return err;
*msg_updated = true;
return 0;
}
static int
devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb,
struct genl_info *info,
......@@ -1263,6 +1322,35 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
return 0;
}
static int
devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink_port->devlink->ops;
return ops->port_fn_roce_set(devlink_port, enable, extack);
}
static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
struct nla_bitfield32 caps;
u32 caps_value;
int err;
caps = nla_get_bitfield32(attr);
caps_value = caps.value & caps.selector;
if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) {
err = devlink_port_fn_roce_set(devlink_port,
caps_value & DEVLINK_PORT_FN_CAP_ROCE,
extack);
if (err)
return err;
}
return 0;
}
static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
......@@ -1281,6 +1369,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
&msg_updated);
if (err)
goto out;
err = devlink_port_fn_caps_fill(ops, port, msg, extack,
&msg_updated);
if (err)
goto out;
err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
......@@ -1653,6 +1745,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
struct netlink_ext_ack *extack)
{
const struct devlink_ops *ops = devlink_port->devlink->ops;
struct nlattr *attr;
if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
!ops->port_function_hw_addr_set) {
......@@ -1665,6 +1758,18 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
"Function does not support state setting");
return -EOPNOTSUPP;
}
attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
if (attr) {
struct nla_bitfield32 caps;
caps = nla_get_bitfield32(attr);
if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE &&
!ops->port_fn_roce_set) {
NL_SET_ERR_MSG_ATTR(extack, attr,
"Port doesn't support RoCE function attribute");
return -EOPNOTSUPP;
}
}
return 0;
}
......@@ -1692,6 +1797,14 @@ static int devlink_port_function_set(struct devlink_port *port,
if (err)
return err;
}
attr = tb[DEVLINK_PORT_FN_ATTR_CAPS];
if (attr) {
err = devlink_port_fn_caps_set(port, attr, extack);
if (err)
return err;
}
/* Keep this as the last function attribute set, so that when
* multiple port function attributes are set along with state,
* Those can be applied first before activating the state.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment