Commit ba943fb2 authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Doug Ledford

IB/iser: Rewrite bounce buffer code path

In some rare cases, IO operations may be not aligned to page
boundaries. This prevents iser from performing fast memory
registration. In order to overcome that iser uses a bounce
buffer to carry the transaction. We basically allocate a buffer
in the size of the transaction and perform a copy.

The buffer allocation using kmalloc is too restrictive since it
requires higher order (atomic) allocations for large transactions
(which may result in memory exhaustion fairly fast for some workloads).
We rewrite the bounce buffer code path to allocate scattered pages
and perform a copy between the transaction sg and the bounce sg.
Reported-by: default avatarAlex Lyakas <alex@zadarastorage.com>
Signed-off-by: default avatarSagi Grimberg <sagig@mellanox.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 4fcd1470
...@@ -222,12 +222,9 @@ enum iser_data_dir { ...@@ -222,12 +222,9 @@ enum iser_data_dir {
* @size: num entries of this sg * @size: num entries of this sg
* @data_len: total beffer byte len * @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg * @dma_nents: returned by dma_map_sg
* @copy_buf: allocated copy buf for SGs unaligned
* for rdma which are copied
* @orig_sg: pointer to the original sg list (in case * @orig_sg: pointer to the original sg list (in case
* we used a copy) * we used a copy)
* @sg_single: SG-ified clone of a non SG SC or * @orig_size: num entris of orig sg list
* unaligned SG
*/ */
struct iser_data_buf { struct iser_data_buf {
struct scatterlist *sg; struct scatterlist *sg;
...@@ -235,8 +232,7 @@ struct iser_data_buf { ...@@ -235,8 +232,7 @@ struct iser_data_buf {
unsigned long data_len; unsigned long data_len;
unsigned int dma_nents; unsigned int dma_nents;
struct scatterlist *orig_sg; struct scatterlist *orig_sg;
char *copy_buf; unsigned int orig_size;
struct scatterlist sg_single;
}; };
/* fwd declarations */ /* fwd declarations */
......
...@@ -674,28 +674,28 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) ...@@ -674,28 +674,28 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
/* if we were reading, copy back to unaligned sglist, /* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy * anyway dma_unmap and free the copy
*/ */
if (iser_task->data[ISER_DIR_IN].copy_buf) { if (iser_task->data[ISER_DIR_IN].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_IN], &iser_task->data[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->data[ISER_DIR_OUT].copy_buf) { if (iser_task->data[ISER_DIR_OUT].orig_sg) {
is_rdma_data_aligned = 0; is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_OUT], &iser_task->data[ISER_DIR_OUT],
ISER_DIR_OUT); ISER_DIR_OUT);
} }
if (iser_task->prot[ISER_DIR_IN].copy_buf) { if (iser_task->prot[ISER_DIR_IN].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN], &iser_task->prot[ISER_DIR_IN],
ISER_DIR_IN); ISER_DIR_IN);
} }
if (iser_task->prot[ISER_DIR_OUT].copy_buf) { if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
is_rdma_prot_aligned = 0; is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task, iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT], &iser_task->prot[ISER_DIR_OUT],
......
...@@ -39,7 +39,112 @@ ...@@ -39,7 +39,112 @@
#include "iscsi_iser.h" #include "iscsi_iser.h"
#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ static void
iser_free_bounce_sg(struct iser_data_buf *data)
{
struct scatterlist *sg;
int count;
for_each_sg(data->sg, sg, data->size, count)
__free_page(sg_page(sg));
kfree(data->sg);
data->sg = data->orig_sg;
data->size = data->orig_size;
data->orig_sg = NULL;
data->orig_size = 0;
}
static int
iser_alloc_bounce_sg(struct iser_data_buf *data)
{
struct scatterlist *sg;
struct page *page;
unsigned long length = data->data_len;
int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
if (!sg)
goto err;
sg_init_table(sg, nents);
while (length) {
u32 page_len = min_t(u32, length, PAGE_SIZE);
page = alloc_page(GFP_ATOMIC);
if (!page)
goto err;
sg_set_page(&sg[i], page, page_len, 0);
length -= page_len;
i++;
}
data->orig_sg = data->sg;
data->orig_size = data->size;
data->sg = sg;
data->size = nents;
return 0;
err:
for (; i > 0; i--)
__free_page(sg_page(&sg[i - 1]));
kfree(sg);
return -ENOMEM;
}
static void
iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
{
struct scatterlist *osg, *bsg = data->sg;
void *oaddr, *baddr;
unsigned int left = data->data_len;
unsigned int bsg_off = 0;
int i;
for_each_sg(data->orig_sg, osg, data->orig_size, i) {
unsigned int copy_len, osg_off = 0;
oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
copy_len = min(left, osg->length);
while (copy_len) {
unsigned int len = min(copy_len, bsg->length - bsg_off);
baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
if (to_buffer)
memcpy(baddr + bsg_off, oaddr + osg_off, len);
else
memcpy(oaddr + osg_off, baddr + bsg_off, len);
kunmap_atomic(baddr - bsg->offset);
osg_off += len;
bsg_off += len;
copy_len -= len;
if (bsg_off >= bsg->length) {
bsg = sg_next(bsg);
bsg_off = 0;
}
}
kunmap_atomic(oaddr - osg->offset);
left -= osg_off;
}
}
static inline void
iser_copy_from_bounce(struct iser_data_buf *data)
{
iser_copy_bounce(data, false);
}
static inline void
iser_copy_to_bounce(struct iser_data_buf *data)
{
iser_copy_bounce(data, true);
}
struct fast_reg_descriptor * struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn) iser_reg_desc_get(struct ib_conn *ib_conn)
...@@ -75,52 +180,32 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, ...@@ -75,52 +180,32 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir) enum iser_data_dir cmd_dir)
{ {
struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device; struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
struct scatterlist *sgl = data->sg; int rc;
struct scatterlist *sg;
char *mem = NULL;
unsigned long cmd_data_len = data->data_len;
int dma_nents, i;
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
mem = (void *)__get_free_pages(GFP_ATOMIC,
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
mem = kmalloc(cmd_data_len, GFP_ATOMIC);
if (mem == NULL) { rc = iser_alloc_bounce_sg(data);
iser_err("Failed to allocate mem size %d %d for copying sglist\n", if (rc) {
data->size, (int)cmd_data_len); iser_err("Failed to allocate bounce for data len %lu\n",
return -ENOMEM; data->data_len);
return rc;
} }
if (cmd_dir == ISER_DIR_OUT) { if (cmd_dir == ISER_DIR_OUT)
/* copy the unaligned sg the buffer which is used for RDMA */ iser_copy_to_bounce(data);
char *p, *from;
sgl = data->sg;
p = mem;
for_each_sg(sgl, sg, data->size, i) {
from = kmap_atomic(sg_page(sg));
memcpy(p,
from + sg->offset,
sg->length);
kunmap_atomic(from);
p += sg->length;
}
}
sg_init_one(&data->sg_single, mem, cmd_data_len);
data->orig_sg = data->sg;
data->sg = &data->sg_single;
data->copy_buf = mem;
dma_nents = ib_dma_map_sg(dev, data->sg, 1,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
BUG_ON(dma_nents == 0);
data->dma_nents = dma_nents; data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
(cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (!data->dma_nents) {
iser_err("Got dma_nents %d, something went wrong...\n",
data->dma_nents);
rc = -ENOMEM;
goto err;
}
return 0; return 0;
err:
iser_free_bounce_sg(data);
return rc;
} }
/** /**
...@@ -131,48 +216,16 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, ...@@ -131,48 +216,16 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data, struct iser_data_buf *data,
enum iser_data_dir cmd_dir) enum iser_data_dir cmd_dir)
{ {
struct ib_device *dev; struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
unsigned long cmd_data_len;
dev = iser_task->iser_conn->ib_conn.device->ib_device;
ib_dma_unmap_sg(dev, data->sg, 1, ib_dma_unmap_sg(dev, data->sg, data->size,
(cmd_dir == ISER_DIR_OUT) ? (cmd_dir == ISER_DIR_OUT) ?
DMA_TO_DEVICE : DMA_FROM_DEVICE); DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (cmd_dir == ISER_DIR_IN) { if (cmd_dir == ISER_DIR_IN)
char *mem; iser_copy_from_bounce(data);
struct scatterlist *sgl, *sg;
unsigned char *p, *to;
unsigned int sg_size;
int i;
/* copy back read RDMA to unaligned sg */
mem = data->copy_buf;
sgl = data->sg;
sg_size = data->size;
p = mem;
for_each_sg(sgl, sg, sg_size, i) {
to = kmap_atomic(sg_page(sg));
memcpy(to + sg->offset,
p,
sg->length);
kunmap_atomic(to);
p += sg->length;
}
}
cmd_data_len = data->data_len;
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
free_pages((unsigned long)data->copy_buf,
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
kfree(data->copy_buf);
data->copy_buf = NULL; iser_free_bounce_sg(data);
} }
#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0) #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment