Commit 9526f360 authored by Manoj N. Kumar's avatar Manoj N. Kumar Committed by Martin K. Petersen

cxlflash: Fix regression issue with re-ordering patch

While running 'sg_reset -H' back to back the following exception was seen:

[  735.115695] Faulting instruction address: 0xd0000000098c0864
cpu 0x0: Vector: 300 (Data Access) at [c000000ffffafa80]
    pc: d0000000098c0864: cxlflash_async_err_irq+0x84/0x5c0 [cxlflash]
    lr: c00000000013aed0: handle_irq_event_percpu+0xa0/0x310
    sp: c000000ffffafd00
   msr: 9000000000009033
   dar: 2010000
 dsisr: 40000000
  current = 0xc000000001510880
  paca    = 0xc00000000fb80000   softe: 0        irq_happened: 0x01
    pid   = 0, comm = swapper/0

Linux version 4.5.0-491-26f710d+

enter ? for help
[c000000ffffafe10] c00000000013aed0 handle_irq_event_percpu+0xa0/0x310
[c000000ffffafed0] c00000000013b1a8 handle_irq_event+0x68/0xc0
[c000000ffffaff00] c0000000001404ec handle_fasteoi_irq+0xec/0x2a0
[c000000ffffaff30] c00000000013a084 generic_handle_irq+0x54/0x80
[c000000ffffaff60] c000000000011130 __do_irq+0x80/0x1d0
[c000000ffffaff90] c000000000024d40 call_do_irq+0x14/0x24
[c000000001573a20] c000000000011318 do_IRQ+0x98/0x140
[c000000001573a70] c000000000002594 hardware_interrupt_common+0x114/0x180

This exception is being hit because the async_err interrupt path performs
an MMIO to read the interrupt status register. The MMIO region in this
case is not available.

Commit 6ded8b3c ("cxlflash: Unmap problem state area before detaching
master context") re-ordered the sequence in which term_mc() and stop_afu()
are called. This introduces a window for interrupts to come in with the
problem space area unmapped, that did not exist previously.

The fix is to separate the disabling of all AFU interrupts to a distinct
function, term_intr() so that it is the first thing that is done in the
tear down process.

To keep the initialization process symmetric, separate the AFU interrupt
setup also to a distinct function: init_intr().

Fixes: 6ded8b3c ("cxlflash: Unmap problem state area before detaching master context")
Signed-off-by: default avatarManoj N. Kumar <manoj@linux.vnet.ibm.com>
Acked-by: default avatarMatthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: default avatarUma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 5ec8a175
...@@ -683,28 +683,23 @@ static void stop_afu(struct cxlflash_cfg *cfg) ...@@ -683,28 +683,23 @@ static void stop_afu(struct cxlflash_cfg *cfg)
} }
/** /**
* term_mc() - terminates the master context * term_intr() - disables all AFU interrupts
* @cfg: Internal structure associated with the host. * @cfg: Internal structure associated with the host.
* @level: Depth of allocation, where to begin waterfall tear down. * @level: Depth of allocation, where to begin waterfall tear down.
* *
* Safe to call with AFU/MC in partially allocated/initialized state. * Safe to call with AFU/MC in partially allocated/initialized state.
*/ */
static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) static void term_intr(struct cxlflash_cfg *cfg, enum undo_level level)
{ {
int rc = 0;
struct afu *afu = cfg->afu; struct afu *afu = cfg->afu;
struct device *dev = &cfg->dev->dev; struct device *dev = &cfg->dev->dev;
if (!afu || !cfg->mcctx) { if (!afu || !cfg->mcctx) {
dev_err(dev, "%s: returning from term_mc with NULL afu or MC\n", dev_err(dev, "%s: returning with NULL afu or MC\n", __func__);
__func__);
return; return;
} }
switch (level) { switch (level) {
case UNDO_START:
rc = cxl_stop_context(cfg->mcctx);
BUG_ON(rc);
case UNMAP_THREE: case UNMAP_THREE:
cxl_unmap_afu_irq(cfg->mcctx, 3, afu); cxl_unmap_afu_irq(cfg->mcctx, 3, afu);
case UNMAP_TWO: case UNMAP_TWO:
...@@ -713,9 +708,34 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) ...@@ -713,9 +708,34 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
cxl_unmap_afu_irq(cfg->mcctx, 1, afu); cxl_unmap_afu_irq(cfg->mcctx, 1, afu);
case FREE_IRQ: case FREE_IRQ:
cxl_free_afu_irqs(cfg->mcctx); cxl_free_afu_irqs(cfg->mcctx);
case RELEASE_CONTEXT: /* fall through */
cfg->mcctx = NULL; case UNDO_NOOP:
/* No action required */
break;
}
}
/**
* term_mc() - terminates the master context
* @cfg: Internal structure associated with the host.
* @level: Depth of allocation, where to begin waterfall tear down.
*
* Safe to call with AFU/MC in partially allocated/initialized state.
*/
static void term_mc(struct cxlflash_cfg *cfg)
{
int rc = 0;
struct afu *afu = cfg->afu;
struct device *dev = &cfg->dev->dev;
if (!afu || !cfg->mcctx) {
dev_err(dev, "%s: returning with NULL afu or MC\n", __func__);
return;
} }
rc = cxl_stop_context(cfg->mcctx);
WARN_ON(rc);
cfg->mcctx = NULL;
} }
/** /**
...@@ -726,10 +746,20 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level) ...@@ -726,10 +746,20 @@ static void term_mc(struct cxlflash_cfg *cfg, enum undo_level level)
*/ */
static void term_afu(struct cxlflash_cfg *cfg) static void term_afu(struct cxlflash_cfg *cfg)
{ {
/*
* Tear down is carefully orchestrated to ensure
* no interrupts can come in when the problem state
* area is unmapped.
*
* 1) Disable all AFU interrupts
* 2) Unmap the problem state area
* 3) Stop the master context
*/
term_intr(cfg, UNMAP_THREE);
if (cfg->afu) if (cfg->afu)
stop_afu(cfg); stop_afu(cfg);
term_mc(cfg, UNDO_START); term_mc(cfg);
pr_debug("%s: returning\n", __func__); pr_debug("%s: returning\n", __func__);
} }
...@@ -1597,41 +1627,24 @@ static int start_afu(struct cxlflash_cfg *cfg) ...@@ -1597,41 +1627,24 @@ static int start_afu(struct cxlflash_cfg *cfg)
} }
/** /**
* init_mc() - create and register as the master context * init_intr() - setup interrupt handlers for the master context
* @cfg: Internal structure associated with the host. * @cfg: Internal structure associated with the host.
* *
* Return: 0 on success, -errno on failure * Return: 0 on success, -errno on failure
*/ */
static int init_mc(struct cxlflash_cfg *cfg) static enum undo_level init_intr(struct cxlflash_cfg *cfg,
struct cxl_context *ctx)
{ {
struct cxl_context *ctx;
struct device *dev = &cfg->dev->dev;
struct afu *afu = cfg->afu; struct afu *afu = cfg->afu;
struct device *dev = &cfg->dev->dev;
int rc = 0; int rc = 0;
enum undo_level level; enum undo_level level = UNDO_NOOP;
ctx = cxl_get_context(cfg->dev);
if (unlikely(!ctx))
return -ENOMEM;
cfg->mcctx = ctx;
/* Set it up as a master with the CXL */
cxl_set_master(ctx);
/* During initialization reset the AFU to start from a clean slate */
rc = cxl_afu_reset(cfg->mcctx);
if (unlikely(rc)) {
dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
__func__, rc);
level = RELEASE_CONTEXT;
goto out;
}
rc = cxl_allocate_afu_irqs(ctx, 3); rc = cxl_allocate_afu_irqs(ctx, 3);
if (unlikely(rc)) { if (unlikely(rc)) {
dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n", dev_err(dev, "%s: call to allocate_afu_irqs failed rc=%d!\n",
__func__, rc); __func__, rc);
level = RELEASE_CONTEXT; level = UNDO_NOOP;
goto out; goto out;
} }
...@@ -1661,8 +1674,47 @@ static int init_mc(struct cxlflash_cfg *cfg) ...@@ -1661,8 +1674,47 @@ static int init_mc(struct cxlflash_cfg *cfg)
level = UNMAP_TWO; level = UNMAP_TWO;
goto out; goto out;
} }
out:
return level;
}
rc = 0; /**
* init_mc() - create and register as the master context
* @cfg: Internal structure associated with the host.
*
* Return: 0 on success, -errno on failure
*/
static int init_mc(struct cxlflash_cfg *cfg)
{
struct cxl_context *ctx;
struct device *dev = &cfg->dev->dev;
int rc = 0;
enum undo_level level;
ctx = cxl_get_context(cfg->dev);
if (unlikely(!ctx)) {
rc = -ENOMEM;
goto ret;
}
cfg->mcctx = ctx;
/* Set it up as a master with the CXL */
cxl_set_master(ctx);
/* During initialization reset the AFU to start from a clean slate */
rc = cxl_afu_reset(cfg->mcctx);
if (unlikely(rc)) {
dev_err(dev, "%s: initial AFU reset failed rc=%d\n",
__func__, rc);
goto ret;
}
level = init_intr(cfg, ctx);
if (unlikely(level)) {
dev_err(dev, "%s: setting up interrupts failed rc=%d\n",
__func__, rc);
goto out;
}
/* This performs the equivalent of the CXL_IOCTL_START_WORK. /* This performs the equivalent of the CXL_IOCTL_START_WORK.
* The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process * The CXL_IOCTL_GET_PROCESS_ELEMENT is implicit in the process
...@@ -1678,7 +1730,7 @@ static int init_mc(struct cxlflash_cfg *cfg) ...@@ -1678,7 +1730,7 @@ static int init_mc(struct cxlflash_cfg *cfg)
pr_debug("%s: returning rc=%d\n", __func__, rc); pr_debug("%s: returning rc=%d\n", __func__, rc);
return rc; return rc;
out: out:
term_mc(cfg, level); term_intr(cfg, level);
goto ret; goto ret;
} }
...@@ -1751,7 +1803,8 @@ static int init_afu(struct cxlflash_cfg *cfg) ...@@ -1751,7 +1803,8 @@ static int init_afu(struct cxlflash_cfg *cfg)
err2: err2:
kref_put(&afu->mapcount, afu_unmap); kref_put(&afu->mapcount, afu_unmap);
err1: err1:
term_mc(cfg, UNDO_START); term_intr(cfg, UNMAP_THREE);
term_mc(cfg);
goto out; goto out;
} }
...@@ -2502,8 +2555,7 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, ...@@ -2502,8 +2555,7 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
if (unlikely(rc)) if (unlikely(rc))
dev_err(dev, "%s: Failed to mark user contexts!(%d)\n", dev_err(dev, "%s: Failed to mark user contexts!(%d)\n",
__func__, rc); __func__, rc);
stop_afu(cfg); term_afu(cfg);
term_mc(cfg, UNDO_START);
return PCI_ERS_RESULT_NEED_RESET; return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure: case pci_channel_io_perm_failure:
cfg->state = STATE_FAILTERM; cfg->state = STATE_FAILTERM;
......
...@@ -79,12 +79,11 @@ ...@@ -79,12 +79,11 @@
#define WWPN_BUF_LEN (WWPN_LEN + 1) #define WWPN_BUF_LEN (WWPN_LEN + 1)
enum undo_level { enum undo_level {
RELEASE_CONTEXT = 0, UNDO_NOOP = 0,
FREE_IRQ, FREE_IRQ,
UNMAP_ONE, UNMAP_ONE,
UNMAP_TWO, UNMAP_TWO,
UNMAP_THREE, UNMAP_THREE
UNDO_START
}; };
struct dev_dependent_vals { struct dev_dependent_vals {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment