Commit 97f6ef64 authored by Xu Yu's avatar Xu Yu Committed by Christoph Hellwig

nvme-pci: remap BAR0 to cover admin CQ doorbell for large stride

The existing driver initially maps 8192 bytes of BAR0 which is
intended to cover doorbells of admin SQ and CQ. However, if a
large stride, e.g. 10, is used, the doorbell of admin CQ will
be out of 8192 bytes. Consequently, a page fault will be raised
when the admin CQ doorbell is accessed in nvme_configure_admin_queue().

This patch fixes this issue by remapping BAR0 before accessing
admin CQ doorbell if the initial mapping is not enough.
Signed-off-by: default avatarXu Yu <yu.a.xu@intel.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent fdf9dfa8
...@@ -95,6 +95,7 @@ struct nvme_dev { ...@@ -95,6 +95,7 @@ struct nvme_dev {
int q_depth; int q_depth;
u32 db_stride; u32 db_stride;
void __iomem *bar; void __iomem *bar;
unsigned long bar_mapped_size;
struct work_struct reset_work; struct work_struct reset_work;
struct work_struct remove_work; struct work_struct remove_work;
struct timer_list watchdog_timer; struct timer_list watchdog_timer;
...@@ -1320,6 +1321,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) ...@@ -1320,6 +1321,32 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return 0; return 0;
} }
static unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
}
static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (size <= dev->bar_mapped_size)
return 0;
if (size > pci_resource_len(pdev, 0))
return -ENOMEM;
if (dev->bar)
iounmap(dev->bar);
dev->bar = ioremap(pci_resource_start(pdev, 0), size);
if (!dev->bar) {
dev->bar_mapped_size = 0;
return -ENOMEM;
}
dev->bar_mapped_size = size;
dev->dbs = dev->bar + NVME_REG_DBS;
return 0;
}
static int nvme_configure_admin_queue(struct nvme_dev *dev) static int nvme_configure_admin_queue(struct nvme_dev *dev)
{ {
int result; int result;
...@@ -1327,6 +1354,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) ...@@ -1327,6 +1354,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq; struct nvme_queue *nvmeq;
result = nvme_remap_bar(dev, db_bar_size(dev, 0));
if (result < 0)
return result;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0; NVME_CAP_NSSRC(cap) : 0;
...@@ -1679,16 +1710,12 @@ static void nvme_setup_host_mem(struct nvme_dev *dev) ...@@ -1679,16 +1710,12 @@ static void nvme_setup_host_mem(struct nvme_dev *dev)
nvme_free_host_mem(dev); nvme_free_host_mem(dev);
} }
static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
{
return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
}
static int nvme_setup_io_queues(struct nvme_dev *dev) static int nvme_setup_io_queues(struct nvme_dev *dev)
{ {
struct nvme_queue *adminq = dev->queues[0]; struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev); struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, nr_io_queues, size; int result, nr_io_queues;
unsigned long size;
nr_io_queues = num_online_cpus(); nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
...@@ -1707,20 +1734,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -1707,20 +1734,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nvme_release_cmb(dev); nvme_release_cmb(dev);
} }
size = db_bar_size(dev, nr_io_queues); do {
if (size > 8192) { size = db_bar_size(dev, nr_io_queues);
iounmap(dev->bar); result = nvme_remap_bar(dev, size);
do { if (!result)
dev->bar = ioremap(pci_resource_start(pdev, 0), size); break;
if (dev->bar) if (!--nr_io_queues)
break; return -ENOMEM;
if (!--nr_io_queues) } while (1);
return -ENOMEM; adminq->q_db = dev->dbs;
size = db_bar_size(dev, nr_io_queues);
} while (1);
dev->dbs = dev->bar + 4096;
adminq->q_db = dev->dbs;
}
/* Deregister the admin queue's interrupt */ /* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq); pci_free_irq(pdev, 0, adminq);
...@@ -2240,8 +2262,7 @@ static int nvme_dev_map(struct nvme_dev *dev) ...@@ -2240,8 +2262,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_mem_regions(pdev, "nvme")) if (pci_request_mem_regions(pdev, "nvme"))
return -ENODEV; return -ENODEV;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
if (!dev->bar)
goto release; goto release;
return 0; return 0;
......
...@@ -102,6 +102,7 @@ enum { ...@@ -102,6 +102,7 @@ enum {
NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
}; };
#define NVME_CAP_MQES(cap) ((cap) & 0xffff) #define NVME_CAP_MQES(cap) ((cap) & 0xffff)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment