Commit c95246c3 authored by Philip J Kelleher's avatar Philip J Kelleher Committed by Jens Axboe

Adding in EEH support to the IBM FlashSystem 70/80 device driver

Changes in v2 include:
o Fixed spelling of guarantee.
o Fixed potential memory leak if slot reset fails out.
o Changed list_for_each_entry_safe with list_for_each_entry.
Signed-off-by: default avatarPhilip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 1ebfd109
......@@ -30,6 +30,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/genhd.h>
#include <linux/idr.h>
......@@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida);
static DEFINE_SPINLOCK(rsxx_ida_lock);
/*----------------- Interrupt Control & Handling -------------------*/
static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
{
card->isr_mask = 0;
card->ier_mask = 0;
}
static void __enable_intr(unsigned int *mask, unsigned int intr)
{
*mask |= intr;
......@@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr)
*/
void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
if (unlikely(card->halt))
if (unlikely(card->halt) ||
unlikely(card->eeh_state))
return;
__enable_intr(&card->ier_mask, intr);
......@@ -80,6 +89,9 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
if (unlikely(card->eeh_state))
return;
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}
......@@ -87,7 +99,8 @@ void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
if (unlikely(card->halt))
if (unlikely(card->halt) ||
unlikely(card->eeh_state))
return;
__enable_intr(&card->isr_mask, intr);
......@@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
if (unlikely(card->eeh_state))
return;
__disable_intr(&card->isr_mask, intr);
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
......@@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
do {
reread_isr = 0;
if (unlikely(card->eeh_state))
break;
isr = ioread32(card->regmap + ISR);
if (isr == 0xffffffff) {
/*
......@@ -304,6 +323,179 @@ static int card_shutdown(struct rsxx_cardinfo *card)
return 0;
}
static void rsxx_eeh_frozen(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
int i;
dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
card->eeh_state = 1;
rsxx_mask_interrupts(card);
/*
* We need to guarantee that the write for eeh_state and masking
* interrupts does not become reordered. This will prevent a possible
* race condition with the EEH code.
*/
wmb();
pci_disable_device(dev);
rsxx_eeh_save_issued_dmas(card);
rsxx_eeh_save_issued_creg(card);
for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr);
}
}
static void rsxx_eeh_failure(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
int i;
dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
card->eeh_state = 1;
for (i = 0; i < card->n_targets; i++)
del_timer_sync(&card->ctrl[i].activity_timer);
rsxx_eeh_cancel_dmas(card);
}
static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
{
unsigned int status;
int iter = 0;
/* We need to wait for the hardware to reset */
while (iter++ < 10) {
status = ioread32(card->regmap + PCI_RECONFIG);
if (status & RSXX_FLUSH_BUSY) {
ssleep(1);
continue;
}
if (status & RSXX_FLUSH_TIMEOUT)
dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n");
return 0;
}
/* Hardware failed resetting itself. */
return -1;
}
static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
enum pci_channel_state error)
{
if (dev->revision < RSXX_EEH_SUPPORT)
return PCI_ERS_RESULT_NONE;
if (error == pci_channel_io_perm_failure) {
rsxx_eeh_failure(dev);
return PCI_ERS_RESULT_DISCONNECT;
}
rsxx_eeh_frozen(dev);
return PCI_ERS_RESULT_NEED_RESET;
}
static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
unsigned long flags;
int i;
int st;
dev_warn(&dev->dev,
"IBM FlashSystem PCI: recovering from slot reset.\n");
st = pci_enable_device(dev);
if (st)
goto failed_hw_setup;
pci_set_master(dev);
st = rsxx_eeh_fifo_flush_poll(card);
if (st)
goto failed_hw_setup;
rsxx_dma_queue_reset(card);
for (i = 0; i < card->n_targets; i++) {
st = rsxx_hw_buffers_init(dev, &card->ctrl[i]);
if (st)
goto failed_hw_buffers_init;
}
if (card->config_valid)
rsxx_dma_configure(card);
/* Clears the ISR register from spurious interrupts */
st = ioread32(card->regmap + ISR);
card->eeh_state = 0;
st = rsxx_eeh_remap_dmas(card);
if (st)
goto failed_remap_dmas;
spin_lock_irqsave(&card->irq_lock, flags);
if (card->n_targets & RSXX_MAX_TARGETS)
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
else
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C);
spin_unlock_irqrestore(&card->irq_lock, flags);
rsxx_kick_creg_queue(card);
for (i = 0; i < card->n_targets; i++) {
spin_lock(&card->ctrl[i].queue_lock);
if (list_empty(&card->ctrl[i].queue)) {
spin_unlock(&card->ctrl[i].queue_lock);
continue;
}
spin_unlock(&card->ctrl[i].queue_lock);
queue_work(card->ctrl[i].issue_wq,
&card->ctrl[i].issue_dma_work);
}
dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
return PCI_ERS_RESULT_RECOVERED;
failed_hw_buffers_init:
failed_remap_dmas:
for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev,
STATUS_BUFFER_SIZE8,
card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev,
COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr);
}
failed_hw_setup:
rsxx_eeh_failure(dev);
return PCI_ERS_RESULT_DISCONNECT;
}
/*----------------- Driver Initialization & Setup -------------------*/
/* Returns: 0 if the driver is compatible with the device
-1 if the driver is NOT compatible with the device */
......@@ -383,6 +575,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
spin_lock_init(&card->irq_lock);
card->halt = 0;
card->eeh_state = 0;
spin_lock_irq(&card->irq_lock);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
......@@ -593,6 +786,11 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)
card_shutdown(card);
}
static const struct pci_error_handlers rsxx_err_handler = {
.error_detected = rsxx_error_detected,
.slot_reset = rsxx_slot_reset,
};
static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
......@@ -608,6 +806,7 @@ static struct pci_driver rsxx_pci_driver = {
.remove = rsxx_pci_remove,
.suspend = rsxx_pci_suspend,
.shutdown = rsxx_pci_shutdown,
.err_handler = &rsxx_err_handler,
};
static int __init rsxx_core_init(void)
......
......@@ -58,7 +58,7 @@ static struct kmem_cache *creg_cmd_pool;
#error Unknown endianess!!! Aborting...
#endif
static void copy_to_creg_data(struct rsxx_cardinfo *card,
static int copy_to_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
......@@ -66,6 +66,9 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
int i = 0;
u32 *data = buf;
if (unlikely(card->eeh_state))
return -EIO;
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
......@@ -76,10 +79,12 @@ static void copy_to_creg_data(struct rsxx_cardinfo *card,
else
iowrite32(data[i], card->regmap + CREG_DATA(i));
}
return 0;
}
static void copy_from_creg_data(struct rsxx_cardinfo *card,
static int copy_from_creg_data(struct rsxx_cardinfo *card,
int cnt8,
void *buf,
unsigned int stream)
......@@ -87,6 +92,9 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
int i = 0;
u32 *data = buf;
if (unlikely(card->eeh_state))
return -EIO;
for (i = 0; cnt8 > 0; i++, cnt8 -= 4) {
/*
* Firmware implementation makes it necessary to byte swap on
......@@ -97,19 +105,32 @@ static void copy_from_creg_data(struct rsxx_cardinfo *card,
else
data[i] = ioread32(card->regmap + CREG_DATA(i));
}
return 0;
}
static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd)
{
int st;
if (unlikely(card->eeh_state))
return;
iowrite32(cmd->addr, card->regmap + CREG_ADD);
iowrite32(cmd->cnt8, card->regmap + CREG_CNT);
if (cmd->op == CREG_OP_WRITE) {
if (cmd->buf)
copy_to_creg_data(card, cmd->cnt8,
cmd->buf, cmd->stream);
if (cmd->buf) {
st = copy_to_creg_data(card, cmd->cnt8,
cmd->buf, cmd->stream);
if (st)
return;
}
}
if (unlikely(card->eeh_state))
return;
/* Setting the valid bit will kick off the command. */
iowrite32(cmd->op, card->regmap + CREG_CMD);
}
......@@ -272,7 +293,7 @@ static void creg_cmd_done(struct work_struct *work)
goto creg_done;
}
copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
st = copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream);
}
creg_done:
......@@ -675,6 +696,32 @@ int rsxx_reg_access(struct rsxx_cardinfo *card,
return 0;
}
void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card)
{
struct creg_cmd *cmd = NULL;
cmd = card->creg_ctrl.active_cmd;
card->creg_ctrl.active_cmd = NULL;
if (cmd) {
del_timer_sync(&card->creg_ctrl.cmd_timer);
spin_lock_bh(&card->creg_ctrl.lock);
list_add(&cmd->list, &card->creg_ctrl.queue);
card->creg_ctrl.q_depth++;
card->creg_ctrl.active = 0;
spin_unlock_bh(&card->creg_ctrl.lock);
}
}
void rsxx_kick_creg_queue(struct rsxx_cardinfo *card)
{
spin_lock_bh(&card->creg_ctrl.lock);
if (!list_empty(&card->creg_ctrl.queue))
creg_kick_queue(card);
spin_unlock_bh(&card->creg_ctrl.lock);
}
/*------------ Initialization & Setup --------------*/
int rsxx_creg_setup(struct rsxx_cardinfo *card)
{
......
This diff is collapsed.
......@@ -64,6 +64,9 @@ struct proc_cmd;
#define RSXX_MAX_OUTSTANDING_CMDS 255
#define RSXX_CS_IDX_MASK 0xff
#define STATUS_BUFFER_SIZE8 4096
#define COMMAND_BUFFER_SIZE8 4096
#define RSXX_MAX_TARGETS 8
struct dma_tracker_list;
......@@ -88,6 +91,9 @@ struct rsxx_dma_stats {
u32 discards_failed;
u32 done_rescheduled;
u32 issue_rescheduled;
u32 dma_sw_err;
u32 dma_hw_fault;
u32 dma_cancelled;
u32 sw_q_depth; /* Number of DMAs on the SW queue. */
atomic_t hw_q_depth; /* Number of DMAs queued to HW. */
};
......@@ -113,6 +119,7 @@ struct rsxx_dma_ctrl {
struct rsxx_cardinfo {
struct pci_dev *dev;
unsigned int halt;
unsigned int eeh_state;
void __iomem *regmap;
spinlock_t irq_lock;
......@@ -221,6 +228,7 @@ enum rsxx_pci_regmap {
PERF_RD512_HI = 0xac,
PERF_WR512_LO = 0xb0,
PERF_WR512_HI = 0xb4,
PCI_RECONFIG = 0xb8,
};
enum rsxx_intr {
......@@ -234,6 +242,8 @@ enum rsxx_intr {
CR_INTR_DMA5 = 0x00000080,
CR_INTR_DMA6 = 0x00000100,
CR_INTR_DMA7 = 0x00000200,
CR_INTR_ALL_C = 0x0000003f,
CR_INTR_ALL_G = 0x000003ff,
CR_INTR_DMA_ALL = 0x000003f5,
CR_INTR_ALL = 0xffffffff,
};
......@@ -250,8 +260,14 @@ enum rsxx_pci_reset {
DMA_QUEUE_RESET = 0x00000001,
};
enum rsxx_hw_fifo_flush {
RSXX_FLUSH_BUSY = 0x00000002,
RSXX_FLUSH_TIMEOUT = 0x00000004,
};
enum rsxx_pci_revision {
RSXX_DISCARD_SUPPORT = 2,
RSXX_EEH_SUPPORT = 3,
};
enum rsxx_creg_cmd {
......@@ -357,11 +373,17 @@ int rsxx_dma_setup(struct rsxx_cardinfo *card);
void rsxx_dma_destroy(struct rsxx_cardinfo *card);
int rsxx_dma_init(void);
void rsxx_dma_cleanup(void);
void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
int rsxx_dma_configure(struct rsxx_cardinfo *card);
int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
struct bio *bio,
atomic_t *n_dmas,
rsxx_dma_cb cb,
void *cb_data);
int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
void rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
/***** cregs.c *****/
int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr,
......@@ -386,10 +408,11 @@ int rsxx_creg_setup(struct rsxx_cardinfo *card);
void rsxx_creg_destroy(struct rsxx_cardinfo *card);
int rsxx_creg_init(void);
void rsxx_creg_cleanup(void);
int rsxx_reg_access(struct rsxx_cardinfo *card,
struct rsxx_reg_access __user *ucmd,
int read);
void rsxx_eeh_save_issued_creg(struct rsxx_cardinfo *card);
void rsxx_kick_creg_queue(struct rsxx_cardinfo *card);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment