Commit ca4f3f18 authored by David S. Miller's avatar David S. Miller

Merge branch 'devlink-locking'

Jakub Kicinski says:

====================
devlink: hold the instance lock in eswitch callbacks

Series number 2 in the effort to hold the devlink instance lock
in call driver callbacks. We have the following drivers using
this API:

 - bnxt, nfp, netdevsim - their own locking is removed / simplified
   by this series; all of them needed a lock to protect from changes
   to the number of VFs while switching modes, now the VF config bus
   callback takes the devlink instance lock via devl_lock();
 - ice - appears not to allow changing modes while SR-IOV enabled,
   so nothing to do there;
 - liquidio - does not contain any locking;
 - octeontx2/af - is very special but at least doesn't have locking
   so doesn't get in the way either;
 - mlx5 has a wealth of locks - I chickened out and dropped the lock
   in the callbacks so that I can leave the driver be, for now.

The last one is obviously not ideal, but I would prefer to transition
the API already as it make take longer.

v2: use a wrapper in mlx5 and extend the comment
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e7dc00f3 14e426bf
......@@ -13470,7 +13470,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
#ifdef CONFIG_BNXT_SRIOV
init_waitqueue_head(&bp->sriov_cfg_wait);
mutex_init(&bp->sriov_lock);
#endif
if (BNXT_SUPPORTS_TPA(bp)) {
bp->gro_func = bnxt_gro_func_5730x;
......
......@@ -2072,12 +2072,6 @@ struct bnxt {
wait_queue_head_t sriov_cfg_wait;
bool sriov_cfg;
#define BNXT_SRIOV_CFG_WAIT_TMO msecs_to_jiffies(10000)
/* lock to protect VF-rep creation/cleanup via
* multiple paths such as ->sriov_configure() and
* devlink ->eswitch_mode_set()
*/
struct mutex sriov_lock;
#endif
#if BITS_PER_LONG == 32
......
......@@ -846,7 +846,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
return;
/* synchronize VF and VF-rep create and destroy */
mutex_lock(&bp->sriov_lock);
devl_lock(bp->dl);
bnxt_vf_reps_destroy(bp);
if (pci_vfs_assigned(bp->pdev)) {
......@@ -859,7 +859,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
/* Free the HW resources reserved for various VF's */
bnxt_hwrm_func_vf_resource_free(bp, num_vfs);
}
mutex_unlock(&bp->sriov_lock);
devl_unlock(bp->dl);
bnxt_free_vf_resources(bp);
......
......@@ -559,44 +559,34 @@ int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
int rc = 0;
mutex_lock(&bp->sriov_lock);
if (bp->eswitch_mode == mode) {
netdev_info(bp->dev, "already in %s eswitch mode\n",
mode == DEVLINK_ESWITCH_MODE_LEGACY ?
"legacy" : "switchdev");
rc = -EINVAL;
goto done;
return -EINVAL;
}
switch (mode) {
case DEVLINK_ESWITCH_MODE_LEGACY:
bnxt_vf_reps_destroy(bp);
break;
return 0;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
if (bp->hwrm_spec_code < 0x10803) {
netdev_warn(bp->dev, "FW does not support SRIOV E-Switch SWITCHDEV mode\n");
rc = -ENOTSUPP;
goto done;
return -ENOTSUPP;
}
if (pci_num_vf(bp->pdev) == 0) {
netdev_info(bp->dev, "Enable VFs before setting switchdev mode\n");
rc = -EPERM;
goto done;
return -EPERM;
}
rc = bnxt_vf_reps_create(bp);
break;
return bnxt_vf_reps_create(bp);
default:
rc = -EINVAL;
goto done;
return -EINVAL;
}
done:
mutex_unlock(&bp->sriov_lock);
return rc;
}
#endif
......@@ -3337,6 +3337,27 @@ static int eswitch_devlink_esw_mode_check(const struct mlx5_eswitch *esw)
!mlx5_core_is_ecpf_esw_manager(esw->dev)) ? -EOPNOTSUPP : 0;
}
/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
* is never correct and prone to races. It's a transitional workaround,
* never repeat this pattern.
*
* This code MUST be fixed before removing devlink_mutex as it is safe
* to do only because of that mutex.
*/
static void mlx5_eswtich_mode_callback_enter(struct devlink *devlink,
struct mlx5_eswitch *esw)
{
devl_unlock(devlink);
down_write(&esw->mode_lock);
}
static void mlx5_eswtich_mode_callback_exit(struct devlink *devlink,
struct mlx5_eswitch *esw)
{
up_write(&esw->mode_lock);
devl_lock(devlink);
}
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
......@@ -3351,6 +3372,15 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
if (esw_mode_from_devlink(mode, &mlx5_mode))
return -EINVAL;
/* FIXME: devl_unlock() followed by devl_lock() inside driver callback
* is never correct and prone to races. It's a transitional workaround,
* never repeat this pattern.
*
* This code MUST be fixed before removing devlink_mutex as it is safe
* to do only because of that mutex.
*/
devl_unlock(devlink);
mlx5_lag_disable_change(esw->dev);
err = mlx5_esw_try_lock(esw);
if (err < 0) {
......@@ -3381,6 +3411,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
mlx5_esw_unlock(esw);
enable_lag:
mlx5_lag_enable_change(esw->dev);
devl_lock(devlink);
return err;
}
......@@ -3393,14 +3424,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_enter(devlink, esw);
err = eswitch_devlink_esw_mode_check(esw);
if (err)
goto unlock;
err = esw_mode_to_devlink(esw->mode, mode);
unlock:
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
......@@ -3447,7 +3478,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_enter(devlink, esw);
err = eswitch_devlink_esw_mode_check(esw);
if (err)
goto out;
......@@ -3484,11 +3515,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
goto out;
esw->offloads.inline_mode = mlx5_mode;
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return 0;
out:
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
......@@ -3501,14 +3532,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_enter(devlink, esw);
err = eswitch_devlink_esw_mode_check(esw);
if (err)
goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
unlock:
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
......@@ -3524,7 +3555,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_enter(devlink, esw);
err = eswitch_devlink_esw_mode_check(esw);
if (err)
goto unlock;
......@@ -3570,7 +3601,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
}
unlock:
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
......@@ -3584,15 +3615,14 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
if (IS_ERR(esw))
return PTR_ERR(esw);
down_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_enter(devlink, esw);
err = eswitch_devlink_esw_mode_check(esw);
if (err)
goto unlock;
*encap = esw->offloads.encap;
unlock:
up_write(&esw->mode_lock);
mlx5_eswtich_mode_callback_exit(devlink, esw);
return err;
}
......
......@@ -144,13 +144,8 @@ static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
struct netlink_ext_ack *extack)
{
struct nfp_pf *pf = devlink_priv(devlink);
int ret;
devl_lock(devlink);
ret = nfp_app_eswitch_mode_set(pf->app, mode);
devl_unlock(devlink);
return ret;
return nfp_app_eswitch_mode_set(pf->app, mode);
}
static const struct nfp_devlink_versions_simple {
......
This diff is collapsed.
......@@ -261,7 +261,6 @@ struct nsim_dev {
struct dentry *take_snapshot;
struct dentry *nodes_ddir;
struct mutex vfs_lock; /* Protects vfconfigs */
struct nsim_vf_config *vfconfigs;
struct bpf_offload_dev *bpf_dev;
......@@ -274,7 +273,6 @@ struct nsim_dev {
struct list_head bpf_bound_maps;
struct netdev_phys_item_id switch_id;
struct list_head port_list;
struct mutex port_list_lock; /* protects port list */
bool fw_update_status;
u32 fw_update_overwrite_mask;
u32 max_macs;
......
......@@ -1490,6 +1490,10 @@ int devl_port_register(struct devlink *devlink,
unsigned int port_index);
void devl_port_unregister(struct devlink_port *devlink_port);
int devl_rate_leaf_create(struct devlink_port *port, void *priv);
void devl_rate_leaf_destroy(struct devlink_port *devlink_port);
void devl_rate_nodes_destroy(struct devlink *devlink);
struct ib_device;
struct net *devlink_net(const struct devlink *devlink);
......
......@@ -2868,15 +2868,11 @@ static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
{
struct devlink_rate *devlink_rate;
/* Take the lock to sync with devlink_rate_nodes_destroy() */
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
mutex_unlock(&devlink->lock);
NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
return -EBUSY;
}
mutex_unlock(&devlink->lock);
return 0;
}
......@@ -8735,14 +8731,12 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_get_doit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_ESWITCH_SET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_eswitch_set_doit,
.flags = GENL_ADMIN_PERM,
.internal_flags = DEVLINK_NL_FLAG_NO_LOCK,
},
{
.cmd = DEVLINK_CMD_DPIPE_TABLE_GET,
......@@ -9548,30 +9542,26 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
/**
* devlink_rate_leaf_create - create devlink rate leaf
*
* devl_rate_leaf_create - create devlink rate leaf
* @devlink_port: devlink port object to create rate object on
* @priv: driver private data
*
* Create devlink rate object of type leaf on provided @devlink_port.
* Throws call trace if @devlink_port already has a devlink rate object.
*
* Context: Takes and release devlink->lock <mutex>.
*
* Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
*/
int
devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
{
struct devlink *devlink = devlink_port->devlink;
struct devlink_rate *devlink_rate;
devl_assert_locked(devlink_port->devlink);
if (WARN_ON(devlink_port->devlink_rate))
return -EBUSY;
devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
if (!devlink_rate)
return -ENOMEM;
mutex_lock(&devlink->lock);
WARN_ON(devlink_port->devlink_rate);
devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
devlink_rate->devlink = devlink;
devlink_rate->devlink_port = devlink_port;
......@@ -9579,12 +9569,42 @@ devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
list_add_tail(&devlink_rate->list, &devlink->rate_list);
devlink_port->devlink_rate = devlink_rate;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
mutex_unlock(&devlink->lock);
return 0;
}
EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
int
devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
{
struct devlink *devlink = devlink_port->devlink;
int ret;
mutex_lock(&devlink->lock);
ret = devl_rate_leaf_create(devlink_port, priv);
mutex_unlock(&devlink->lock);
return ret;
}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
{
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
devl_assert_locked(devlink_port->devlink);
if (!devlink_rate)
return;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
if (devlink_rate->parent)
refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
kfree(devlink_rate);
}
EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
/**
* devlink_rate_leaf_destroy - destroy devlink rate leaf
*
......@@ -9601,32 +9621,25 @@ void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
return;
mutex_lock(&devlink->lock);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
if (devlink_rate->parent)
refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
devl_rate_leaf_destroy(devlink_port);
mutex_unlock(&devlink->lock);
kfree(devlink_rate);
}
EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
/**
* devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
*
* devl_rate_nodes_destroy - destroy all devlink rate nodes on device
* @devlink: devlink instance
*
* Unset parent for all rate objects and destroy all rate nodes
* on specified device.
*
* Context: Takes and release devlink->lock <mutex>.
*/
void devlink_rate_nodes_destroy(struct devlink *devlink)
void devl_rate_nodes_destroy(struct devlink *devlink)
{
static struct devlink_rate *devlink_rate, *tmp;
const struct devlink_ops *ops = devlink->ops;
mutex_lock(&devlink->lock);
devl_assert_locked(devlink);
list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
if (!devlink_rate->parent)
continue;
......@@ -9647,6 +9660,23 @@ void devlink_rate_nodes_destroy(struct devlink *devlink)
kfree(devlink_rate);
}
}
}
EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
/**
* devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
*
* @devlink: devlink instance
*
* Unset parent for all rate objects and destroy all rate nodes
* on specified device.
*
* Context: Takes and release devlink->lock <mutex>.
*/
void devlink_rate_nodes_destroy(struct devlink *devlink)
{
mutex_lock(&devlink->lock);
devl_rate_nodes_destroy(devlink);
mutex_unlock(&devlink->lock);
}
EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment