Commit bf0bf77f authored by Eli Cohen's avatar Eli Cohen Committed by Roland Dreier

mlx5: Support communicating arbitrary host page size to firmware

Connect-IB firmware requires 4K pages to be communicated with the
driver. This patch breaks larger pages to 4K units to enable support
for architectures utilizing larger page size, such as PowerPC.  This
patch also fixes several places that referred to PAGE_SHIFT instead of
explicit 12 which is the inherent page shift on Connect-IB.
Signed-off-by: default avatarEli Cohen <eli@mellanox.com>
Signed-off-by: default avatarRoland Dreier <roland@purestorage.com>
parent 952f5f6e
...@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, ...@@ -620,7 +620,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
} }
mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT; (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - 12;
*index = dev->mdev.priv.uuari.uars[0].index; *index = dev->mdev.priv.uuari.uars[0].index;
return 0; return 0;
......
...@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ...@@ -551,7 +551,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
} }
mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0); mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn = (*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - PAGE_SHIFT) << 24); cpu_to_be32((page_shift - 12) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6); (*in)->ctx.params2 = cpu_to_be32(offset << 6);
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
...@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, ...@@ -648,7 +648,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
goto err_buf; goto err_buf;
} }
(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
(*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24); (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - 12) << 24);
/* Set "fast registration enabled" for all kernel QPs */ /* Set "fast registration enabled" for all kernel QPs */
(*in)->ctx.params1 |= cpu_to_be32(1 << 11); (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
......
...@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, ...@@ -123,7 +123,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
goto err_in; goto err_in;
} }
(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; (*in)->ctx.log_pg_sz = page_shift - 12;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26); (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
return 0; return 0;
...@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, ...@@ -192,7 +192,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
} }
srq->wq_sig = !!srq_signature; srq->wq_sig = !!srq_signature;
(*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT; (*in)->ctx.log_pg_sz = page_shift - 12;
return 0; return 0;
......
...@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, ...@@ -354,7 +354,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
in->ctx.intr = vecidx; in->ctx.intr = vecidx;
in->ctx.log_page_size = PAGE_SHIFT - 12; in->ctx.log_page_size = eq->buf.page_shift - 12;
in->events_mask = cpu_to_be64(mask); in->events_mask = cpu_to_be64(mask);
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
......
...@@ -57,10 +57,13 @@ struct mlx5_pages_req { ...@@ -57,10 +57,13 @@ struct mlx5_pages_req {
}; };
struct fw_page { struct fw_page {
struct rb_node rb_node; struct rb_node rb_node;
u64 addr; u64 addr;
struct page *page; struct page *page;
u16 func_id; u16 func_id;
unsigned long bitmask;
struct list_head list;
unsigned free_count;
}; };
struct mlx5_query_pages_inbox { struct mlx5_query_pages_inbox {
...@@ -94,6 +97,11 @@ enum { ...@@ -94,6 +97,11 @@ enum {
MAX_RECLAIM_TIME_MSECS = 5000, MAX_RECLAIM_TIME_MSECS = 5000,
}; };
enum {
MLX5_MAX_RECLAIM_TIME_MILI = 5000,
MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / 4096,
};
static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id) static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
{ {
struct rb_root *root = &dev->priv.page_root; struct rb_root *root = &dev->priv.page_root;
...@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u ...@@ -101,6 +109,7 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
struct fw_page *nfp; struct fw_page *nfp;
struct fw_page *tfp; struct fw_page *tfp;
int i;
while (*new) { while (*new) {
parent = *new; parent = *new;
...@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u ...@@ -113,25 +122,29 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
return -EEXIST; return -EEXIST;
} }
nfp = kmalloc(sizeof(*nfp), GFP_KERNEL); nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
if (!nfp) if (!nfp)
return -ENOMEM; return -ENOMEM;
nfp->addr = addr; nfp->addr = addr;
nfp->page = page; nfp->page = page;
nfp->func_id = func_id; nfp->func_id = func_id;
nfp->free_count = MLX5_NUM_4K_IN_PAGE;
for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
set_bit(i, &nfp->bitmask);
rb_link_node(&nfp->rb_node, parent, new); rb_link_node(&nfp->rb_node, parent, new);
rb_insert_color(&nfp->rb_node, root); rb_insert_color(&nfp->rb_node, root);
list_add(&nfp->list, &dev->priv.free_list);
return 0; return 0;
} }
static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
{ {
struct rb_root *root = &dev->priv.page_root; struct rb_root *root = &dev->priv.page_root;
struct rb_node *tmp = root->rb_node; struct rb_node *tmp = root->rb_node;
struct page *result = NULL; struct fw_page *result = NULL;
struct fw_page *tfp; struct fw_page *tfp;
while (tmp) { while (tmp) {
...@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr) ...@@ -141,9 +154,7 @@ static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
} else if (tfp->addr > addr) { } else if (tfp->addr > addr) {
tmp = tmp->rb_right; tmp = tmp->rb_right;
} else { } else {
rb_erase(&tfp->rb_node, root); result = tfp;
result = tfp->page;
kfree(tfp);
break; break;
} }
} }
...@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, ...@@ -176,13 +187,97 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
return err; return err;
} }
static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
{
struct fw_page *fp;
unsigned n;
if (list_empty(&dev->priv.free_list)) {
return -ENOMEM;
mlx5_core_warn(dev, "\n");
}
fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
if (n >= MLX5_NUM_4K_IN_PAGE) {
mlx5_core_warn(dev, "alloc 4k bug\n");
return -ENOENT;
}
clear_bit(n, &fp->bitmask);
fp->free_count--;
if (!fp->free_count)
list_del(&fp->list);
*addr = fp->addr + n * 4096;
return 0;
}
static void free_4k(struct mlx5_core_dev *dev, u64 addr)
{
struct fw_page *fwp;
int n;
fwp = find_fw_page(dev, addr & PAGE_MASK);
if (!fwp) {
mlx5_core_warn(dev, "page not found\n");
return;
}
n = (addr & ~PAGE_MASK) % 4096;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root);
list_del(&fwp->list);
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page);
kfree(fwp);
} else if (fwp->free_count == 1) {
list_add(&fwp->list, &dev->priv.free_list);
}
}
static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
{
struct page *page;
u64 addr;
int err;
page = alloc_page(GFP_HIGHUSER);
if (!page) {
mlx5_core_warn(dev, "failed to allocate page\n");
return -ENOMEM;
}
addr = dma_map_page(&dev->pdev->dev, page, 0,
PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&dev->pdev->dev, addr)) {
mlx5_core_warn(dev, "failed dma mapping page\n");
err = -ENOMEM;
goto out_alloc;
}
err = insert_page(dev, addr, page, func_id);
if (err) {
mlx5_core_err(dev, "failed to track allocated page\n");
goto out_mapping;
}
return 0;
out_mapping:
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
out_alloc:
__free_page(page);
return err;
}
static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
int notify_fail) int notify_fail)
{ {
struct mlx5_manage_pages_inbox *in; struct mlx5_manage_pages_inbox *in;
struct mlx5_manage_pages_outbox out; struct mlx5_manage_pages_outbox out;
struct mlx5_manage_pages_inbox *nin; struct mlx5_manage_pages_inbox *nin;
struct page *page;
int inlen; int inlen;
u64 addr; u64 addr;
int err; int err;
...@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -197,27 +292,15 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
memset(&out, 0, sizeof(out)); memset(&out, 0, sizeof(out));
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
page = alloc_page(GFP_HIGHUSER); retry:
if (!page) { err = alloc_4k(dev, &addr);
err = -ENOMEM;
mlx5_core_warn(dev, "failed to allocate page\n");
goto out_alloc;
}
addr = dma_map_page(&dev->pdev->dev, page, 0,
PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(&dev->pdev->dev, addr)) {
mlx5_core_warn(dev, "failed dma mapping page\n");
__free_page(page);
err = -ENOMEM;
goto out_alloc;
}
err = insert_page(dev, addr, page, func_id);
if (err) { if (err) {
mlx5_core_err(dev, "failed to track allocated page\n"); if (err == -ENOMEM)
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); err = alloc_system_page(dev, func_id);
__free_page(page); if (err)
err = -ENOMEM; goto out_4k;
goto out_alloc;
goto retry;
} }
in->pas[i] = cpu_to_be64(addr); in->pas[i] = cpu_to_be64(addr);
} }
...@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -227,7 +310,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
in->func_id = cpu_to_be16(func_id); in->func_id = cpu_to_be16(func_id);
in->num_entries = cpu_to_be32(npages); in->num_entries = cpu_to_be32(npages);
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
mlx5_core_dbg(dev, "err %d\n", err);
if (err) { if (err) {
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
goto out_alloc; goto out_alloc;
...@@ -251,7 +333,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -251,7 +333,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
nin = kzalloc(sizeof(*nin), GFP_KERNEL); nin = kzalloc(sizeof(*nin), GFP_KERNEL);
if (!nin) { if (!nin) {
mlx5_core_warn(dev, "allocation failed\n"); mlx5_core_warn(dev, "allocation failed\n");
goto unmap; goto out_4k;
} }
memset(&out, 0, sizeof(out)); memset(&out, 0, sizeof(out));
nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
...@@ -261,19 +343,9 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -261,19 +343,9 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
kfree(nin); kfree(nin);
} }
unmap: out_4k:
for (i--; i >= 0; i--) { for (i--; i >= 0; i--)
addr = be64_to_cpu(in->pas[i]); free_4k(dev, be64_to_cpu(in->pas[i]));
page = remove_page(dev, addr);
if (!page) {
mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
addr);
continue;
}
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(page);
}
out_free: out_free:
mlx5_vfree(in); mlx5_vfree(in);
return err; return err;
...@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, ...@@ -284,7 +356,6 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
{ {
struct mlx5_manage_pages_inbox in; struct mlx5_manage_pages_inbox in;
struct mlx5_manage_pages_outbox *out; struct mlx5_manage_pages_outbox *out;
struct page *page;
int num_claimed; int num_claimed;
int outlen; int outlen;
u64 addr; u64 addr;
...@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, ...@@ -323,13 +394,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
for (i = 0; i < num_claimed; i++) { for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]); addr = be64_to_cpu(out->pas[i]);
page = remove_page(dev, addr); free_4k(dev, addr);
if (!page) {
mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
} else {
dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(page);
}
} }
out_free: out_free:
...@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) ...@@ -435,6 +500,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
void mlx5_pagealloc_init(struct mlx5_core_dev *dev) void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
{ {
dev->priv.page_root = RB_ROOT; dev->priv.page_root = RB_ROOT;
INIT_LIST_HEAD(&dev->priv.free_list);
} }
void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
......
...@@ -483,6 +483,7 @@ struct mlx5_priv { ...@@ -483,6 +483,7 @@ struct mlx5_priv {
struct rb_root page_root; struct rb_root page_root;
int fw_pages; int fw_pages;
int reg_pages; int reg_pages;
struct list_head free_list;
struct mlx5_core_health health; struct mlx5_core_health health;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment