Commit dd3cd3ca authored by Jakub Kicinski's avatar Jakub Kicinski

Merge tag 'aux-sysfs-irqs' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux

Saeed Mahameed says:

====================
aux-sysfs-irqs

Shay Says:
==========
Introduce auxiliary bus IRQs sysfs

Today, PCI PFs and VFs, which are anchored on the PCI bus, display their
IRQ information in the <pci_device>/msi_irqs/<irq_num> sysfs files.  PCI
subfunctions (SFs) are similar to PFs and VFs and these SFs are anchored
on the auxiliary bus. However, these PCI SFs lack such IRQ information
on the auxiliary bus, leaving users without visibility into which IRQs
are used by the SFs. This absence makes it impossible to debug
situations and to understand the source of interrupts/SFs for
performance tuning and debug.

Additionally, the SFs are multifunctional devices supporting RDMA,
network devices, clocks, and more, similar to their peer PCI PFs and
VFs. Therefore, it is desirable to have SFs' IRQ information available
at the bus/device level.

To overcome the above limitations, this short series extends the
auxiliary bus to display IRQ information in sysfs, similar to that of
PFs and VFs.

It adds an 'irqs' directory under the auxiliary device and includes an
<irq_num> sysfs file within it.

For example:
$ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/
50  51  52  53  54  55  56  57  58

Patch summary:
patch-1 adds auxiliary bus to support irqs used by auxiliary device
patch-2 mlx5 driver using exposing irqs for PCI SF devices via auxiliary
        bus
==========

* tag 'aux-sysfs-irqs' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux:
  net/mlx5: Expose SFs IRQs
  driver core: auxiliary bus: show auxiliary device IRQs
  RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded
  net/mlx5: Reimplement write combining test
====================

Link: https://patch.msgid.link/20240711213140.256997-1-saeed@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents e7afb958 0477d516
What: /sys/bus/auxiliary/devices/.../irqs/
Date: April, 2024
Contact: Shay Drory <shayd@nvidia.com>
Description:
The /sys/devices/.../irqs directory contains a variable set of
files, with each file is named as irq number similar to PCI PF
or VF's irq number located in msi_irqs directory.
These irq files are added and removed dynamically when an IRQ
is requested and freed respectively for the PCI SF.
......@@ -16,6 +16,7 @@ obj-$(CONFIG_NUMA) += node.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
ifeq ($(CONFIG_SYSFS),y)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_AUXILIARY_BUS) += auxiliary_sysfs.o
endif
obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
obj-$(CONFIG_REGMAP) += regmap/
......
......@@ -287,6 +287,7 @@ int auxiliary_device_init(struct auxiliary_device *auxdev)
dev->bus = &auxiliary_bus_type;
device_initialize(&auxdev->dev);
mutex_init(&auxdev->sysfs.lock);
return 0;
}
EXPORT_SYMBOL_GPL(auxiliary_device_init);
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
*/
#include <linux/auxiliary_bus.h>
#include <linux/slab.h>
#define AUXILIARY_MAX_IRQ_NAME 11
struct auxiliary_irq_info {
struct device_attribute sysfs_attr;
char name[AUXILIARY_MAX_IRQ_NAME];
};
static struct attribute *auxiliary_irq_attrs[] = {
NULL
};
static const struct attribute_group auxiliary_irqs_group = {
.name = "irqs",
.attrs = auxiliary_irq_attrs,
};
static int auxiliary_irq_dir_prepare(struct auxiliary_device *auxdev)
{
int ret = 0;
guard(mutex)(&auxdev->sysfs.lock);
if (auxdev->sysfs.irq_dir_exists)
return 0;
ret = devm_device_add_group(&auxdev->dev, &auxiliary_irqs_group);
if (ret)
return ret;
auxdev->sysfs.irq_dir_exists = true;
xa_init(&auxdev->sysfs.irqs);
return 0;
}
/**
* auxiliary_device_sysfs_irq_add - add a sysfs entry for the given IRQ
* @auxdev: auxiliary bus device to add the sysfs entry.
* @irq: The associated interrupt number.
*
* This function should be called after auxiliary device have successfully
* received the irq.
* The driver is responsible to add a unique irq for the auxiliary device. The
* driver can invoke this function from multiple thread context safely for
* unique irqs of the auxiliary devices. The driver must not invoke this API
* multiple times if the irq is already added previously.
*
* Return: zero on success or an error code on failure.
*/
int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
{
struct auxiliary_irq_info *info __free(kfree) = NULL;
struct device *dev = &auxdev->dev;
int ret;
ret = auxiliary_irq_dir_prepare(auxdev);
if (ret)
return ret;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
sysfs_attr_init(&info->sysfs_attr.attr);
snprintf(info->name, AUXILIARY_MAX_IRQ_NAME, "%d", irq);
ret = xa_insert(&auxdev->sysfs.irqs, irq, info, GFP_KERNEL);
if (ret)
return ret;
info->sysfs_attr.attr.name = info->name;
ret = sysfs_add_file_to_group(&dev->kobj, &info->sysfs_attr.attr,
auxiliary_irqs_group.name);
if (ret)
goto sysfs_add_err;
xa_store(&auxdev->sysfs.irqs, irq, no_free_ptr(info), GFP_KERNEL);
return 0;
sysfs_add_err:
xa_erase(&auxdev->sysfs.irqs, irq);
return ret;
}
EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_add);
/**
* auxiliary_device_sysfs_irq_remove - remove a sysfs entry for the given IRQ
* @auxdev: auxiliary bus device to add the sysfs entry.
* @irq: the IRQ to remove.
*
* This function should be called to remove an IRQ sysfs entry.
* The driver must invoke this API when IRQ is released by the device.
*/
void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq)
{
struct auxiliary_irq_info *info __free(kfree) = xa_load(&auxdev->sysfs.irqs, irq);
struct device *dev = &auxdev->dev;
if (!info) {
dev_err(&auxdev->dev, "IRQ %d doesn't exist\n", irq);
return;
}
sysfs_remove_file_from_group(&dev->kobj, &info->sysfs_attr.attr,
auxiliary_irqs_group.name);
xa_erase(&auxdev->sysfs.irqs, irq);
}
EXPORT_SYMBOL_GPL(auxiliary_device_sysfs_irq_remove);
......@@ -83,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
INIT_Q_COUNTER(req_transport_retries_exceeded),
INIT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter roce_accl_cnts[] = {
......@@ -102,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
......
......@@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
}
resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (dev->wc_support)
if (mlx5_wc_support_get(dev->mdev))
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
log_bf_reg_size);
resp->cache_line_size = cache_line_size();
......@@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC:
if (!dev->wc_support)
if (!mlx5_wc_support_get(dev->mdev))
return -EPERM;
fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
......@@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
return -EOPNOTSUPP;
if (!to_mdev(c->ibucontext.device)->wc_support &&
if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
return -EOPNOTSUPP;
......@@ -3766,18 +3766,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
return err;
}
static int mlx5_ib_enable_driver(struct ib_device *dev)
{
struct mlx5_ib_dev *mdev = to_mdev(dev);
int ret;
ret = mlx5_ib_test_wc(mdev);
mlx5_ib_dbg(mdev, "Write-Combining %s",
mdev->wc_support ? "supported" : "not supported");
return ret;
}
static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MLX5,
......@@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.device_group = &mlx5_attr_group,
.enable_driver = mlx5_ib_enable_driver,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
......
......@@ -30,10 +30,8 @@
* SOFTWARE.
*/
#include <linux/io.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include <linux/jiffies.h>
/*
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
......@@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
return 0;
return page_size;
}
#define WR_ID_BF 0xBF
#define WR_ID_END 0xBAD
#define TEST_WC_NUM_WQES 255
#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
bool signaled)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
struct mlx5_bf *bf = &qp->bf;
__be32 mmio_wqe[16] = {};
unsigned long flags;
unsigned int idx;
if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
spin_lock_irqsave(&qp->sq.lock, flags);
idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
ctrl->opmod_idx_opcode =
cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
(qp->trans_qp.base.mqp.qpn << 8));
qp->sq.wrid[idx] = wr_id;
qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
qp->sq.wqe_head[idx] = qp->sq.head + 1;
qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
qp->sq.head++;
memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
MLX5_WQE_CTRL_CQ_UPDATE;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell
*/
wmb();
__iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
sizeof(mmio_wqe) / 8);
bf->offset ^= bf->buf_size;
spin_unlock_irqrestore(&qp->sq.lock, flags);
return 0;
}
static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
{
int ret;
struct ib_wc wc = {};
unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
do {
ret = ib_poll_cq(cq, 1, &wc);
if (ret < 0 || wc.status)
return ret < 0 ? ret : -EINVAL;
if (ret)
break;
} while (!time_after(jiffies, end));
if (!ret)
return -ETIMEDOUT;
if (wc.wr_id != WR_ID_BF)
ret = 0;
return ret;
}
static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
int err, i;
for (i = 0; i < TEST_WC_NUM_WQES; i++) {
err = post_send_nop(dev, qp, WR_ID_BF, false);
if (err)
return err;
}
return post_send_nop(dev, qp, WR_ID_END, true);
}
int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
{
struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
struct ib_qp_init_attr qp_init_attr = {
.cap = { .max_send_wr = TEST_WC_NUM_WQES },
.qp_type = IB_QPT_UD,
.sq_sig_type = IB_SIGNAL_REQ_WR,
.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
};
struct ib_qp_attr qp_attr = { .port_num = 1 };
struct ib_device *ibdev = &dev->ib_dev;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_pd *pd;
int ret;
if (!MLX5_CAP_GEN(dev->mdev, bf))
return 0;
if (!dev->mdev->roce.roce_en &&
port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
if (mlx5_core_is_pf(dev->mdev))
dev->wc_support = arch_can_pci_mmap_wc();
return 0;
}
ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
if (ret)
goto print_err;
if (!dev->wc_bfreg.wc)
goto out1;
pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto out1;
}
cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto out2;
}
qp_init_attr.recv_cq = cq;
qp_init_attr.send_cq = cq;
qp = ib_create_qp(pd, &qp_init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto out3;
}
qp_attr.qp_state = IB_QPS_INIT;
ret = ib_modify_qp(qp, &qp_attr,
IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
IB_QP_QKEY);
if (ret)
goto out4;
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
if (ret)
goto out4;
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret)
goto out4;
ret = test_wc_do_send(dev, qp);
if (ret < 0)
goto out4;
ret = test_wc_poll_cq_result(dev, cq);
if (ret > 0) {
dev->wc_support = true;
ret = 0;
}
out4:
ib_destroy_qp(qp);
out3:
ib_destroy_cq(cq);
out2:
ib_dealloc_pd(pd);
out1:
mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
print_err:
if (ret)
mlx5_ib_err(
dev,
"Error %d while trying to test write-combining support\n",
ret);
return ret;
}
......@@ -341,7 +341,6 @@ struct mlx5_ib_flow_db {
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
struct wr_list {
u16 opcode;
......@@ -1123,7 +1122,6 @@ struct mlx5_ib_dev {
u8 ib_active:1;
u8 is_rep:1;
u8 lag_active:1;
u8 wc_support:1;
u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
......@@ -1149,7 +1147,6 @@ struct mlx5_ib_dev {
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
......
......@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;
......@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return;
}
if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
/*
* Special case, if condition didn't meet, it won't be error,
* just different in-kernel flow.
*/
*flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
return;
}
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
}
......@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
IB_QP_CREATE_PCI_WRITE_END_PADDING,
MLX5_CAP_GEN(mdev, end_pad), qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
qp_type != MLX5_IB_QPT_REG_UMR, qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);
......@@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
return true;
/* Internal QP used for wc testing, with NOPs in wq */
if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
return true;
return false;
}
......
......@@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
fw_reset.o qos.o lib/tout.o lib/aso.o
fw_reset.o qos.o lib/tout.o lib/aso.o wc.o
#
# Netdev basic
......
......@@ -714,7 +714,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
err1:
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
mlx5_ctrl_irq_release(table->ctrl_irq);
mlx5_ctrl_irq_release(dev, table->ctrl_irq);
return err;
}
......@@ -730,7 +730,7 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
mlx5_ctrl_irq_release(table->ctrl_irq);
mlx5_ctrl_irq_release(dev, table->ctrl_irq);
}
struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev)
......@@ -918,7 +918,7 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx)
af_desc.is_managed = 1;
cpumask_copy(&af_desc.mask, cpu_online_mask);
cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus);
irq = mlx5_irq_affinity_request(pool, &af_desc);
irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
if (IS_ERR(irq))
return PTR_ERR(irq);
......
......@@ -112,15 +112,18 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
/**
* mlx5_irq_affinity_request - request an IRQ according to the given mask.
* @dev: mlx5 core device which is requesting the IRQ.
* @pool: IRQ pool to request from.
* @af_desc: affinity descriptor for this IRQ.
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *
mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
struct irq_affinity_desc *af_desc)
{
struct mlx5_irq *least_loaded_irq, *new_irq;
int ret;
mutex_lock(&pool->lock);
least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask);
......@@ -153,6 +156,16 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *
mlx5_irq_read_locked(least_loaded_irq) / MLX5_EQ_REFS_PER_IRQ);
unlock:
mutex_unlock(&pool->lock);
if (mlx5_irq_pool_is_sf_pool(pool)) {
ret = auxiliary_device_sysfs_irq_add(mlx5_sf_coredev_to_adev(dev),
mlx5_irq_get_irq(least_loaded_irq));
if (ret) {
mlx5_core_err(dev, "Failed to create sysfs entry for irq %d, ret = %d\n",
mlx5_irq_get_irq(least_loaded_irq), ret);
mlx5_irq_put(least_loaded_irq);
least_loaded_irq = ERR_PTR(ret);
}
}
return least_loaded_irq;
}
......@@ -164,6 +177,9 @@ void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *i
cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq));
synchronize_irq(pci_irq_vector(pool->dev->pdev,
mlx5_irq_get_index(irq)));
if (mlx5_irq_pool_is_sf_pool(pool))
auxiliary_device_sysfs_irq_remove(mlx5_sf_coredev_to_adev(dev),
mlx5_irq_get_irq(irq));
if (mlx5_irq_put(irq))
if (pool->irqs_per_cpu)
cpu_put(pool, cpu);
......
......@@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&dev->intf_state_mutex);
lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
mutex_init(&dev->wc_state_lock);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
......@@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->wc_state_lock);
mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
mutex_destroy(&dev->intf_state_mutex);
lockdep_unregister_key(&dev->lock_key);
......
......@@ -320,6 +320,12 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev)
return dev->coredev_type == MLX5_COREDEV_SF;
}
static inline struct auxiliary_device *
mlx5_sf_coredev_to_adev(struct mlx5_core_dev *mdev)
{
return container_of(mdev->device, struct auxiliary_device, dev);
}
int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx);
void mlx5_mdev_uninit(struct mlx5_core_dev *dev);
int mlx5_init_one(struct mlx5_core_dev *dev);
......
......@@ -25,7 +25,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int devfn,
int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
struct irq_affinity_desc *af_desc,
struct cpu_rmap **rmap);
......@@ -36,13 +36,15 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq);
int mlx5_irq_get_index(struct mlx5_irq *irq);
int mlx5_irq_get_irq(const struct mlx5_irq *irq);
struct mlx5_irq_pool;
#ifdef CONFIG_MLX5_SF
struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
struct cpumask *used_cpus, u16 vecidx);
struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
struct irq_affinity_desc *af_desc);
struct mlx5_irq *
mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
struct irq_affinity_desc *af_desc);
void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq);
#else
static inline
......@@ -53,7 +55,8 @@ struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev,
}
static inline struct mlx5_irq *
mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool,
struct irq_affinity_desc *af_desc)
{
return ERR_PTR(-EOPNOTSUPP);
}
......@@ -61,6 +64,7 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *
static inline
void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq)
{
mlx5_irq_release_vector(irq);
}
#endif
#endif /* __MLX5_IRQ_H__ */
......@@ -368,6 +368,11 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
return irq->mask;
}
int mlx5_irq_get_irq(const struct mlx5_irq *irq)
{
return irq->map.virq;
}
int mlx5_irq_get_index(struct mlx5_irq *irq)
{
return irq->map.index;
......@@ -441,11 +446,12 @@ static void _mlx5_irq_release(struct mlx5_irq *irq)
/**
* mlx5_ctrl_irq_release - release a ctrl IRQ back to the system.
* @dev: mlx5 device that releasing the IRQ.
* @ctrl_irq: ctrl IRQ to be released.
*/
void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq)
{
_mlx5_irq_release(ctrl_irq);
mlx5_irq_affinity_irq_release(dev, ctrl_irq);
}
/**
......@@ -474,7 +480,7 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
/* Allocate the IRQ in index 0. The vector was already allocated */
irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
} else {
irq = mlx5_irq_affinity_request(pool, &af_desc);
irq = mlx5_irq_affinity_request(dev, pool, &af_desc);
}
return irq;
......
This diff is collapsed.
......@@ -58,6 +58,9 @@
* in
* @name: Match name found by the auxiliary device driver,
* @id: unique identitier if multiple devices of the same name are exported,
* @irqs: irqs xarray contains irq indices which are used by the device,
* @lock: Synchronize irq sysfs creation,
* @irq_dir_exists: whether "irqs" directory exists,
*
* An auxiliary_device represents a part of its parent device's functionality.
* It is given a name that, combined with the registering drivers
......@@ -139,6 +142,11 @@ struct auxiliary_device {
struct device dev;
const char *name;
u32 id;
struct {
struct xarray irqs;
struct mutex lock; /* Synchronize irq sysfs creation */
bool irq_dir_exists;
} sysfs;
};
/**
......@@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev);
int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname);
#define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME)
#ifdef CONFIG_SYSFS
int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq);
void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev,
int irq);
#else /* CONFIG_SYSFS */
static inline int
auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq)
{
return 0;
}
static inline void
auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {}
#endif
static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev)
{
mutex_destroy(&auxdev->sysfs.lock);
put_device(&auxdev->dev);
}
......
......@@ -766,6 +766,12 @@ struct mlx5_hca_cap {
u32 max[MLX5_UN_SZ_DW(hca_cap_union)];
};
enum mlx5_wc_state {
MLX5_WC_STATE_UNINITIALIZED,
MLX5_WC_STATE_UNSUPPORTED,
MLX5_WC_STATE_SUPPORTED,
};
struct mlx5_core_dev {
struct device *device;
enum mlx5_coredev_type coredev_type;
......@@ -824,6 +830,9 @@ struct mlx5_core_dev {
#endif
u64 num_ipsec_offloads;
struct mlx5_sd *sd;
enum mlx5_wc_state wc_state;
/* sync write combining state */
struct mutex wc_state_lock;
};
struct mlx5_db {
......@@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev)
enum {
MLX5_OCTWORD = 16,
};
bool mlx5_wc_support_get(struct mlx5_core_dev *mdev);
#endif /* MLX5_DRIVER_H */
......@@ -5642,7 +5642,11 @@ struct mlx5_ifc_query_q_counter_out_bits {
u8 local_ack_timeout_err[0x20];
u8 reserved_at_320[0xa0];
u8 reserved_at_320[0x60];
u8 req_rnr_retries_exceeded[0x20];
u8 reserved_at_3a0[0x20];
u8 resp_local_length_error[0x20];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment