Commit 55fdbb01 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-next'

Saeed Mahameed says:

====================
Mellanox 100G mlx5 seamless error recovery

This series from Mohamad improves the driver load/unload flows
to seamlessly handle pci errors and device internal errors recovery
reset flows.

Current pci and internal error handling is too heavy and is done
with a full restart of the driver by unregistering mlx5 interfaces
(mlx5e netedevs and mlx5_ib) which will cause losing all the current
interfaces and mlx5 core configurations.

To improve this, we add new callback functions of mlx5 interface
object (attach/detach) to be called upon reset flows when errors are
detected rather than calling register and unregister interfaces.

On their side, interfaces such as (mlx5e and mlx5_ib) can choose to implement
those callback, if not, the old heavy reset will be called for that interface.

For non-interface mlx5 modules such as sriov and eswitch, we refactored
and reorganized the code in a way that the software state objects are created
only once on driver load.  Those software state objects are kept upon reset recovery
flows and only freed once on driver unload. On seamless soft reset flows, only
hardware resources are released on stop and re-allocated on start according to the
current soft state.

In this series only mlx5e interface implements attach/detach callbacks
so that the netdevice will be kept alive on reset. On detach only hardware resources
are released and the netdevice will be marked as detached to the stack. Once
attached again it will re-allocate the hardware resources according to the current
netdevice state, and all the configurations and the software state will be kept or restored
after recovery.

Note: I will be out of office all next week, in case of any updates
or V2 is required, Tariq will post the new series, I hope it is ok.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 48b4e51f f1ee87fe
......@@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o
fs_counters.o rl.o lag.o dev.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
......
/*
* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
static LIST_HEAD(intf_list);
static LIST_HEAD(mlx5_dev_list);
/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
struct mlx5_device_context {
struct list_head list;
struct mlx5_interface *intf;
void *context;
unsigned long state;
};
enum {
MLX5_INTERFACE_ADDED,
MLX5_INTERFACE_ATTACHED,
};
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
if (!mlx5_lag_intf_add(intf, priv))
return;
dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
if (!dev_ctx)
return;
dev_ctx->intf = intf;
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
if (intf->attach)
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
if (dev_ctx->context) {
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irq(&priv->ctx_lock);
} else {
kfree(dev_ctx);
}
}
static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf == intf)
return dev_ctx;
return NULL;
}
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
spin_lock_irq(&priv->ctx_lock);
list_del(&dev_ctx->list);
spin_unlock_irq(&priv->ctx_lock);
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
intf->remove(dev, dev_ctx->context);
kfree(dev_ctx);
}
static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
if (intf->attach) {
if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
return;
intf->attach(dev, dev_ctx->context);
set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
return;
dev_ctx->context = intf->add(dev);
set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
}
void mlx5_attach_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_attach_interface(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
}
static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
{
struct mlx5_device_context *dev_ctx;
struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
dev_ctx = mlx5_get_device(intf, priv);
if (!dev_ctx)
return;
if (intf->detach) {
if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
return;
intf->detach(dev, dev_ctx->context);
clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
} else {
if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
return;
intf->remove(dev, dev_ctx->context);
clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
}
}
void mlx5_detach_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_detach_interface(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
}
bool mlx5_device_registered(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv;
bool found = false;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
if (priv == &dev->priv)
found = true;
mutex_unlock(&mlx5_intf_mutex);
return found;
}
int mlx5_register_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&priv->dev_list, &mlx5_dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
void mlx5_unregister_device(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_interface *intf;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx5_remove_device(intf, priv);
list_del(&priv->dev_list);
mutex_unlock(&mlx5_intf_mutex);
}
int mlx5_register_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
if (!intf->add || !intf->remove)
return -EINVAL;
mutex_lock(&mlx5_intf_mutex);
list_add_tail(&intf->list, &intf_list);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_add_device(intf, priv);
mutex_unlock(&mlx5_intf_mutex);
return 0;
}
EXPORT_SYMBOL(mlx5_register_interface);
void mlx5_unregister_interface(struct mlx5_interface *intf)
{
struct mlx5_priv *priv;
mutex_lock(&mlx5_intf_mutex);
list_for_each_entry(priv, &mlx5_dev_list, dev_list)
mlx5_remove_device(intf, priv);
list_del(&intf->list);
mutex_unlock(&mlx5_intf_mutex);
}
EXPORT_SYMBOL(mlx5_unregister_interface);
void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
{
struct mlx5_priv *priv = &mdev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
void *result = NULL;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
if ((dev_ctx->intf->protocol == protocol) &&
dev_ctx->intf->get_dev) {
result = dev_ctx->intf->get_dev(dev_ctx->context);
break;
}
spin_unlock_irqrestore(&priv->ctx_lock, flags);
return result;
}
EXPORT_SYMBOL(mlx5_get_protocol_dev);
/* Must be called with intf_mutex held */
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_add_device(intf, &dev->priv);
break;
}
}
/* Must be called with intf_mutex held */
void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
{
struct mlx5_interface *intf;
list_for_each_entry(intf, &intf_list, list)
if (intf->protocol == protocol) {
mlx5_remove_device(intf, &dev->priv);
break;
}
}
static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
{
return (u16)((dev->pdev->bus->number << 8) |
PCI_SLOT(dev->pdev->devfn));
}
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
{
u16 pci_id = mlx5_gen_pci_id(dev);
struct mlx5_core_dev *res = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
res = tmp_dev;
break;
}
}
return res;
}
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_device_context *dev_ctx;
unsigned long flags;
spin_lock_irqsave(&priv->ctx_lock, flags);
list_for_each_entry(dev_ctx, &priv->ctx_list, list)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, event, param);
spin_unlock_irqrestore(&priv->ctx_lock, flags);
}
void mlx5_dev_list_lock(void)
{
mutex_lock(&mlx5_intf_mutex);
}
void mlx5_dev_list_unlock(void)
{
mutex_unlock(&mlx5_intf_mutex);
}
int mlx5_dev_list_trylock(void)
{
return mutex_trylock(&mlx5_intf_mutex);
}
......@@ -844,9 +844,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
void mlx5e_update_stats_work(struct work_struct *work);
void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile, void *ppriv);
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
void *ppriv);
void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
struct rtnl_link_stats64 *
mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
......
......@@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
return 0;
}
static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
{
int i;
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
}
if (priv->fs.vlan.filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
}
static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
{
int i;
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
}
if (priv->fs.vlan.filter_disabled &&
!(priv->netdev->flags & IFF_PROMISC))
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
}
#define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
......@@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
if (err)
goto err_free_g;
err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
if (err)
goto err_destroy_vlan_flow_groups;
mlx5e_add_vlan_rules(priv);
return 0;
err_destroy_vlan_flow_groups:
mlx5e_destroy_groups(ft);
err_free_g:
kfree(ft->g);
err_destroy_vlan_table:
......@@ -1043,6 +1069,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
{
mlx5e_del_vlan_rules(priv);
mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
}
......@@ -1100,7 +1127,6 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
{
mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
mlx5e_destroy_vlan_table(priv);
mlx5e_destroy_l2_table(priv);
mlx5e_destroy_ttc_table(priv);
......
......@@ -1883,6 +1883,9 @@ int mlx5e_close(struct net_device *netdev)
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
if (!netif_device_present(netdev))
return -ENODEV;
mutex_lock(&priv->state_lock);
err = mlx5e_close_locked(netdev);
mutex_unlock(&priv->state_lock);
......@@ -3401,13 +3404,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.max_tc = MLX5E_MAX_NUM_TC,
};
void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile, void *ppriv)
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
void *ppriv)
{
int nch = profile->max_nch(mdev);
struct net_device *netdev;
struct mlx5e_priv *priv;
int nch = profile->max_nch(mdev);
int err;
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
......@@ -3425,12 +3428,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
priv->wq = create_singlethread_workqueue("mlx5e");
if (!priv->wq)
goto err_free_netdev;
goto err_cleanup_nic;
return netdev;
err_cleanup_nic:
profile->cleanup(priv);
free_netdev(netdev);
return NULL;
}
int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
{
const struct mlx5e_profile *profile;
struct mlx5e_priv *priv;
int err;
priv = netdev_priv(netdev);
profile = priv->profile;
clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
err = mlx5e_create_umr_mkey(priv);
if (err) {
mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
goto err_destroy_wq;
goto out;
}
err = profile->init_tx(priv);
......@@ -3453,20 +3475,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
mlx5e_set_dev_port_mtu(netdev);
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
goto err_dealloc_q_counters;
}
if (profile->enable)
profile->enable(priv);
return priv;
rtnl_lock();
if (netif_running(netdev))
mlx5e_open(netdev);
netif_device_attach(netdev);
rtnl_unlock();
err_dealloc_q_counters:
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
return 0;
err_close_drop_rq:
mlx5e_close_drop_rq(priv);
......@@ -3477,13 +3495,8 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
err_destroy_umr_mkey:
mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
err_destroy_wq:
destroy_workqueue(priv->wq);
err_free_netdev:
free_netdev(netdev);
return NULL;
out:
return err;
}
static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
......@@ -3509,16 +3522,80 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
}
}
void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
const struct mlx5e_profile *profile = priv->profile;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (profile->disable)
profile->disable(priv);
flush_workqueue(priv->wq);
rtnl_lock();
if (netif_running(netdev))
mlx5e_close(netdev);
netif_device_detach(netdev);
rtnl_unlock();
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
mlx5e_close_drop_rq(priv);
profile->cleanup_tx(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
cancel_delayed_work_sync(&priv->update_stats_work);
}
/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
* hardware contexts and to connect it to the current netdev.
*/
static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
int err;
if (netif_device_present(netdev))
return 0;
err = mlx5e_create_mdev_resources(mdev);
if (err)
return err;
err = mlx5e_attach_netdev(mdev, netdev);
if (err) {
mlx5e_destroy_mdev_resources(mdev);
return err;
}
return 0;
}
static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
{
struct mlx5e_priv *priv = vpriv;
struct net_device *netdev = priv->netdev;
if (!netif_device_present(netdev))
return;
mlx5e_detach_netdev(mdev, netdev);
mlx5e_destroy_mdev_resources(mdev);
}
static void *mlx5e_add(struct mlx5_core_dev *mdev)
{
struct mlx5_eswitch *esw = mdev->priv.eswitch;
int total_vfs = MLX5_TOTAL_VPORTS(mdev);
void *ppriv = NULL;
void *ret;
if (mlx5e_check_required_hca_cap(mdev))
return NULL;
void *priv;
int vport;
int err;
struct net_device *netdev;
if (mlx5e_create_mdev_resources(mdev))
err = mlx5e_check_required_hca_cap(mdev);
if (err)
return NULL;
mlx5e_register_vport_rep(mdev);
......@@ -3526,12 +3603,39 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
if (MLX5_CAP_GEN(mdev, vport_group_manager))
ppriv = &esw->offloads.vport_reps[0];
ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
if (!ret) {
mlx5e_destroy_mdev_resources(mdev);
return NULL;
netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
goto err_unregister_reps;
}
priv = netdev_priv(netdev);
err = mlx5e_attach(mdev, priv);
if (err) {
mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
goto err_destroy_netdev;
}
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
goto err_detach;
}
return ret;
return priv;
err_detach:
mlx5e_detach(mdev, priv);
err_destroy_netdev:
mlx5e_destroy_netdev(mdev, priv);
err_unregister_reps:
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport);
return NULL;
}
void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
......@@ -3539,30 +3643,11 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
const struct mlx5e_profile *profile = priv->profile;
struct net_device *netdev = priv->netdev;
set_bit(MLX5E_STATE_DESTROYING, &priv->state);
if (profile->disable)
profile->disable(priv);
flush_workqueue(priv->wq);
if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
netif_device_detach(netdev);
mlx5e_close(netdev);
} else {
unregister_netdev(netdev);
}
mlx5e_destroy_q_counter(priv);
profile->cleanup_rx(priv);
mlx5e_close_drop_rq(priv);
profile->cleanup_tx(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
cancel_delayed_work_sync(&priv->update_stats_work);
unregister_netdev(netdev);
destroy_workqueue(priv->wq);
if (profile->cleanup)
profile->cleanup(priv);
if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
free_netdev(netdev);
free_netdev(netdev);
}
static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
......@@ -3572,12 +3657,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
struct mlx5e_priv *priv = vpriv;
int vport;
mlx5e_destroy_netdev(mdev, priv);
for (vport = 1; vport < total_vfs; vport++)
mlx5_eswitch_unregister_vport_rep(esw, vport);
mlx5e_destroy_mdev_resources(mdev);
mlx5e_detach(mdev, vpriv);
mlx5e_destroy_netdev(mdev, priv);
}
static void *mlx5e_get_netdev(void *vpriv)
......@@ -3590,6 +3674,8 @@ static void *mlx5e_get_netdev(void *vpriv)
static struct mlx5_interface mlx5e_interface = {
.add = mlx5e_add,
.remove = mlx5e_remove,
.attach = mlx5e_attach,
.detach = mlx5e_detach,
.event = mlx5e_async_event,
.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
.get_dev = mlx5e_get_netdev,
......
......@@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = {
int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
if (!rep->priv_data) {
mlx5_core_warn(esw->dev, "Failed to create representor for vport %d\n",
rep->vport);
struct net_device *netdev;
int err;
netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
return -EINVAL;
}
rep->priv_data = netdev_priv(netdev);
err = mlx5e_attach_netdev(esw->dev, netdev);
if (err) {
pr_warn("Failed to attach representor netdev for vport %d\n",
rep->vport);
goto err_destroy_netdev;
}
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
goto err_detach_netdev;
}
return 0;
err_detach_netdev:
mlx5e_detach_netdev(esw->dev, netdev);
err_destroy_netdev:
mlx5e_destroy_netdev(esw->dev, rep->priv_data);
return err;
}
void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
struct mlx5e_priv *priv = rep->priv_data;
struct net_device *netdev = priv->netdev;
mlx5e_detach_netdev(esw->dev, netdev);
mlx5e_destroy_netdev(esw->dev, priv);
}
......@@ -109,6 +109,16 @@ struct vport_egress {
struct mlx5_flow_rule *drop_rule;
};
struct mlx5_vport_info {
u8 mac[ETH_ALEN];
u16 vlan;
u8 qos;
u64 node_guid;
int link_state;
bool spoofchk;
bool trusted;
};
struct mlx5_vport {
struct mlx5_core_dev *dev;
int vport;
......@@ -121,10 +131,8 @@ struct mlx5_vport {
struct vport_ingress ingress;
struct vport_egress egress;
u16 vlan;
u8 qos;
bool spoofchk;
bool trusted;
struct mlx5_vport_info info;
bool enabled;
u16 enabled_events;
};
......@@ -204,6 +212,8 @@ struct mlx5_eswitch {
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
......
......@@ -277,7 +277,7 @@ static void mlx5_do_bond_work(struct work_struct *work)
bond_work);
int status;
status = mutex_trylock(&mlx5_intf_mutex);
status = mlx5_dev_list_trylock();
if (!status) {
/* 1 sec delay. */
mlx5_queue_bond_work(ldev, HZ);
......@@ -285,7 +285,7 @@ static void mlx5_do_bond_work(struct work_struct *work)
}
mlx5_do_bond(ldev);
mutex_unlock(&mlx5_intf_mutex);
mlx5_dev_list_unlock();
}
static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
......@@ -466,35 +466,21 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
mutex_unlock(&lag_mutex);
}
static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
{
return (u16)((dev->pdev->bus->number << 8) |
PCI_SLOT(dev->pdev->devfn));
}
/* Must be called with intf_mutex held */
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
{
struct mlx5_lag *ldev = NULL;
struct mlx5_core_dev *tmp_dev;
struct mlx5_priv *priv;
u16 pci_id;
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
!MLX5_CAP_GEN(dev, lag_master) ||
(MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
return;
pci_id = mlx5_gen_pci_id(dev);
mlx5_core_for_each_priv(priv) {
tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
if ((dev != tmp_dev) &&
(mlx5_gen_pci_id(tmp_dev) == pci_id)) {
ldev = tmp_dev->priv.lag;
break;
}
}
tmp_dev = mlx5_get_next_phys_dev(dev);
if (tmp_dev)
ldev = tmp_dev->priv.lag;
if (!ldev) {
ldev = mlx5_lag_dev_alloc();
......
......@@ -46,9 +46,6 @@
extern int mlx5_core_debug_mask;
extern struct list_head mlx5_dev_list;
extern struct mutex mlx5_intf_mutex;
#define mlx5_core_dbg(__dev, format, ...) \
dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \
(__dev)->priv.name, __func__, __LINE__, current->pid, \
......@@ -73,9 +70,6 @@ do { \
#define mlx5_core_info(__dev, format, ...) \
dev_info(&(__dev)->pdev->dev, format, ##__VA_ARGS__)
#define mlx5_core_for_each_priv(__priv) \
list_for_each_entry(__priv, &mlx5_dev_list, dev_list)
enum {
MLX5_CMD_DATA, /* print command payload only */
MLX5_CMD_TIME, /* print command execution time */
......@@ -89,6 +83,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_enter_error_state(struct mlx5_core_dev *dev);
void mlx5_disable_device(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
int mlx5_sriov_attach(struct mlx5_core_dev *dev);
void mlx5_sriov_detach(struct mlx5_core_dev *dev);
int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
......@@ -102,8 +100,19 @@ void mlx5_cq_tasklet_cb(unsigned long data);
void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
void mlx5_lag_remove(struct mlx5_core_dev *dev);
void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
void mlx5_attach_device(struct mlx5_core_dev *dev);
void mlx5_detach_device(struct mlx5_core_dev *dev);
bool mlx5_device_registered(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
......
......@@ -326,6 +326,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
{
struct fw_page *fwp;
struct rb_node *p;
u32 func_id;
u32 npages;
u32 i = 0;
......@@ -334,12 +335,16 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
/* No hard feelings, we want our pages back! */
npages = MLX5_GET(manage_pages_in, in, input_num_entries);
func_id = MLX5_GET(manage_pages_in, in, function_id);
p = rb_first(&dev->priv.page_root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
p = rb_next(p);
if (fwp->func_id != func_id)
continue;
MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
i++;
}
......@@ -540,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
int prev_vfs_pages = dev->priv.vfs_pages;
/* In case of internal error we will free the pages manually later */
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mlx5_core_warn(dev, "Skipping wait for vf pages stage");
return 0;
}
mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
dev->priv.name);
while (dev->priv.vfs_pages) {
......
......@@ -44,108 +44,132 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return !!sriov->num_vfs;
}
static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int vf;
for (vf = 1; vf <= num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf);
if (sriov->enabled_vfs) {
mlx5_core_warn(dev,
"failed to enable SRIOV on device, already enabled with %d vfs\n",
sriov->enabled_vfs);
return -EBUSY;
}
#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
if (err) {
mlx5_core_warn(dev,
"failed to enable eswitch SRIOV (%d)\n", err);
return err;
}
#endif
for (vf = 0; vf < num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
} else {
sriov->vfs_ctx[vf - 1].enabled = 1;
mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
continue;
}
sriov->vfs_ctx[vf].enabled = 1;
sriov->enabled_vfs++;
mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
}
return 0;
}
static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int vf;
for (vf = 1; vf <= num_vfs; vf++) {
if (sriov->vfs_ctx[vf - 1].enabled) {
if (mlx5_core_disable_hca(dev, vf))
mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
else
sriov->vfs_ctx[vf - 1].enabled = 0;
if (!sriov->enabled_vfs)
return;
for (vf = 0; vf < sriov->num_vfs; vf++) {
if (!sriov->vfs_ctx[vf].enabled)
continue;
err = mlx5_core_disable_hca(dev, vf + 1);
if (err) {
mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
continue;
}
sriov->vfs_ctx[vf].enabled = 0;
sriov->enabled_vfs--;
}
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
#endif
if (mlx5_wait_for_vf_pages(dev))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
}
static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
if (pci_num_vf(pdev))
pci_disable_sriov(pdev);
enable_vfs(dev, num_vfs);
int err = 0;
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
goto ex;
if (pci_num_vf(pdev)) {
mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
return -EBUSY;
}
return 0;
err = pci_enable_sriov(pdev, num_vfs);
if (err)
mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
ex:
disable_vfs(dev, num_vfs);
return err;
}
static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
{
pci_disable_sriov(pdev);
}
static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int err = 0;
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
if (!sriov->vfs_ctx)
return -ENOMEM;
err = mlx5_device_enable_sriov(dev, num_vfs);
if (err) {
mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
return err;
}
sriov->enabled_vfs = num_vfs;
err = mlx5_core_create_vfs(pdev, num_vfs);
err = mlx5_pci_enable_sriov(pdev, num_vfs);
if (err) {
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = NULL;
mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
mlx5_device_disable_sriov(dev);
return err;
}
sriov->num_vfs = num_vfs;
return 0;
}
static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
static void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
sriov->num_vfs = num_vfs;
}
static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov;
sriov = &dev->priv.sriov;
disable_vfs(dev, sriov->num_vfs);
if (mlx5_wait_for_vf_pages(dev))
mlx5_core_warn(dev, "timeout claiming VFs pages\n");
mlx5_pci_disable_sriov(pdev);
mlx5_device_disable_sriov(dev);
sriov->num_vfs = 0;
}
int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int err;
int err = 0;
mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
if (!mlx5_core_is_pf(dev))
......@@ -156,92 +180,57 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
return -EINVAL;
}
mlx5_core_cleanup_vfs(dev);
if (num_vfs)
err = mlx5_sriov_enable(pdev, num_vfs);
else
mlx5_sriov_disable(pdev);
if (!num_vfs) {
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
#endif
kfree(sriov->vfs_ctx);
sriov->vfs_ctx = NULL;
if (!pci_vfs_assigned(pdev))
pci_disable_sriov(pdev);
else
mlx5_core_info(dev, "unloading PF driver while leaving orphan VFs\n");
return 0;
}
return err ? err : num_vfs;
}
err = mlx5_core_sriov_enable(pdev, num_vfs);
if (err) {
mlx5_core_warn(dev, "mlx5_core_sriov_enable failed %d\n", err);
return err;
}
int mlx5_sriov_attach(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
mlx5_core_init_vfs(dev, num_vfs);
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
#endif
if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
return 0;
return num_vfs;
/* If sriov VFs exist in PCI level, enable them in device level */
return mlx5_device_enable_sriov(dev, sriov->num_vfs);
}
static int sync_required(struct pci_dev *pdev)
void mlx5_sriov_detach(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int cur_vfs = pci_num_vf(pdev);
if (cur_vfs != sriov->num_vfs) {
mlx5_core_warn(dev, "current VFs %d, registered %d - sync needed\n",
cur_vfs, sriov->num_vfs);
return 1;
}
if (!mlx5_core_is_pf(dev))
return;
return 0;
mlx5_device_disable_sriov(dev);
}
int mlx5_sriov_init(struct mlx5_core_dev *dev)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
struct pci_dev *pdev = dev->pdev;
int cur_vfs;
int total_vfs;
if (!mlx5_core_is_pf(dev))
return 0;
if (!sync_required(dev->pdev))
return 0;
cur_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
total_vfs = pci_sriov_get_totalvfs(pdev);
sriov->num_vfs = pci_num_vf(pdev);
sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
if (!sriov->vfs_ctx)
return -ENOMEM;
sriov->enabled_vfs = cur_vfs;
mlx5_core_init_vfs(dev, cur_vfs);
#ifdef CONFIG_MLX5_CORE_EN
if (cur_vfs)
mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
SRIOV_LEGACY);
#endif
enable_vfs(dev, cur_vfs);
return 0;
}
int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
{
struct pci_dev *pdev = dev->pdev;
int err;
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
if (!mlx5_core_is_pf(dev))
return 0;
return;
err = mlx5_core_sriov_configure(pdev, 0);
if (err)
return err;
return 0;
kfree(sriov->vfs_ctx);
}
......@@ -828,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
int mlx5_sriov_init(struct mlx5_core_dev *dev);
int mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
s32 npages);
int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
......@@ -932,6 +930,8 @@ enum {
struct mlx5_interface {
void * (*add)(struct mlx5_core_dev *dev);
void (*remove)(struct mlx5_core_dev *dev, void *context);
int (*attach)(struct mlx5_core_dev *dev, void *context);
void (*detach)(struct mlx5_core_dev *dev, void *context);
void (*event)(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param);
void * (*get_dev)(void *context);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment