Commit f36e1793 authored by Jack Morgenstein's avatar Jack Morgenstein Committed by Roland Dreier

IB/umad: Add support for large RMPP transfers

Add support for sending and receiving large RMPP transfers.  The old
code supports transfers only as large as a single contiguous kernel
memory allocation.  This patch uses linked list of memory buffers when
sending and receiving data to avoid needing contiguous pages for
larger transfers.

  Receive side: copy the arriving MADs in chunks instead of coalescing
  to one large buffer in kernel space.

  Send side: split a multipacket MAD buffer to a list of segments,
  (multipacket_list) and send these using a gather list of size 2.
  Also, save pointer to last sent segment, and retrieve requested
  segments by walking list starting at last sent segment. Finally,
  save pointer to last-acked segment.  When retrying, retrieve
  segments for resending relative to this pointer.  When updating last
  ack, start at this pointer.
Signed-off-by: default avatarJack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: default avatarSean Hefty <sean.hefty@intel.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 6ecb0c84
......@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
* $Id: mad.c 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#include <linux/dma-mapping.h>
......@@ -765,18 +765,67 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
return ret;
}
static int get_buf_length(int hdr_len, int data_len)
static int get_pad_size(int hdr_len, int data_len)
{
int seg_size, pad;
seg_size = sizeof(struct ib_mad) - hdr_len;
if (data_len && seg_size) {
pad = seg_size - data_len % seg_size;
if (pad == seg_size)
pad = 0;
return pad == seg_size ? 0 : pad;
} else
pad = seg_size;
return hdr_len + data_len + pad;
return seg_size;
}
static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
{
struct ib_rmpp_segment *s, *t;
list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
list_del(&s->list);
kfree(s);
}
}
static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
gfp_t gfp_mask)
{
struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
struct ib_rmpp_segment *seg = NULL;
int left, seg_size, pad;
send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
seg_size = send_buf->seg_size;
pad = send_wr->pad;
/* Allocate data segments. */
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
"alloc failed for len %zd, gfp %#x\n",
sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
seg->num = ++send_buf->seg_count;
list_add_tail(&seg->list, &send_wr->rmpp_list);
}
/* Zero any padding */
if (pad)
memset(seg->data + seg_size - pad, 0, pad);
rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
agent.rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
struct ib_rmpp_segment, list);
send_wr->last_ack_seg = send_wr->cur_seg;
return 0;
}
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
......@@ -787,32 +836,40 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
int buf_size;
int pad, message_size, ret, size;
void *buf;
mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
agent);
buf_size = get_buf_length(hdr_len, data_len);
pad = get_pad_size(hdr_len, data_len);
message_size = hdr_len + data_len + pad;
if ((!mad_agent->rmpp_version &&
(rmpp_active || buf_size > sizeof(struct ib_mad))) ||
(!rmpp_active && buf_size > sizeof(struct ib_mad)))
(rmpp_active || message_size > sizeof(struct ib_mad))) ||
(!rmpp_active && message_size > sizeof(struct ib_mad)))
return ERR_PTR(-EINVAL);
buf = kzalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
if (!buf)
return ERR_PTR(-ENOMEM);
mad_send_wr = buf + buf_size;
mad_send_wr = buf + size;
INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
mad_send_wr->send_buf.mad = buf;
mad_send_wr->send_buf.hdr_len = hdr_len;
mad_send_wr->send_buf.data_len = data_len;
mad_send_wr->pad = pad;
mad_send_wr->mad_agent_priv = mad_agent_priv;
mad_send_wr->sg_list[0].length = buf_size;
mad_send_wr->sg_list[0].length = hdr_len;
mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.num_sge = 1;
mad_send_wr->send_wr.num_sge = 2;
mad_send_wr->send_wr.opcode = IB_WR_SEND;
mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
......@@ -820,13 +877,11 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
if (rmpp_active) {
struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
IB_MGMT_RMPP_HDR + data_len);
rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
IB_MGMT_RMPP_FLAG_ACTIVE);
ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
if (ret) {
kfree(buf);
return ERR_PTR(ret);
}
}
mad_send_wr->send_buf.mad_agent = mad_agent;
......@@ -835,14 +890,50 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
}
EXPORT_SYMBOL(ib_create_send_mad);
void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
{
struct ib_mad_send_wr_private *mad_send_wr;
struct list_head *list;
mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
send_buf);
list = &mad_send_wr->cur_seg->list;
if (mad_send_wr->cur_seg->num < seg_num) {
list_for_each_entry(mad_send_wr->cur_seg, list, list)
if (mad_send_wr->cur_seg->num == seg_num)
break;
} else if (mad_send_wr->cur_seg->num > seg_num) {
list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
if (mad_send_wr->cur_seg->num == seg_num)
break;
}
return mad_send_wr->cur_seg->data;
}
EXPORT_SYMBOL(ib_get_rmpp_segment);
static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
{
if (mad_send_wr->send_buf.seg_count)
return ib_get_rmpp_segment(&mad_send_wr->send_buf,
mad_send_wr->seg_num);
else
return mad_send_wr->send_buf.mad +
mad_send_wr->send_buf.hdr_len;
}
void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
{
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_wr_private *mad_send_wr;
mad_agent_priv = container_of(send_buf->mad_agent,
struct ib_mad_agent_private, agent);
kfree(send_buf->mad);
mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
send_buf);
free_send_rmpp_list(mad_send_wr);
kfree(send_buf->mad);
if (atomic_dec_and_test(&mad_agent_priv->refcount))
wake_up(&mad_agent_priv->wait);
}
......@@ -865,10 +956,17 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
sge->addr = dma_map_single(mad_agent->device->dma_device,
mad_send_wr->send_buf.mad, sge->length,
sge[0].addr = dma_map_single(mad_agent->device->dma_device,
mad_send_wr->send_buf.mad,
sge[0].length,
DMA_TO_DEVICE);
pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
pci_unmap_addr_set(mad_send_wr, header_mapping, sge[0].addr);
sge[1].addr = dma_map_single(mad_agent->device->dma_device,
ib_get_payload(mad_send_wr),
sge[1].length,
DMA_TO_DEVICE);
pci_unmap_addr_set(mad_send_wr, payload_mapping, sge[1].addr);
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
......@@ -885,11 +983,14 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
list_add_tail(&mad_send_wr->mad_list.list, list);
}
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
if (ret)
if (ret) {
dma_unmap_single(mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, mapping),
sge->length, DMA_TO_DEVICE);
pci_unmap_addr(mad_send_wr, header_mapping),
sge[0].length, DMA_TO_DEVICE);
dma_unmap_single(mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, payload_mapping),
sge[1].length, DMA_TO_DEVICE);
}
return ret;
}
......@@ -1860,8 +1961,11 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
retry:
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, mapping),
pci_unmap_addr(mad_send_wr, header_mapping),
mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
pci_unmap_addr(mad_send_wr, payload_mapping),
mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
queued_send_wr = NULL;
spin_lock_irqsave(&send_queue->lock, flags);
list_del(&mad_list->list);
......
......@@ -31,7 +31,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: mad_priv.h 2730 2005-06-28 16:43:03Z sean.hefty $
* $Id: mad_priv.h 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#ifndef __IB_MAD_PRIV_H__
......@@ -85,6 +85,12 @@ struct ib_mad_private {
} mad;
} __attribute__ ((packed));
struct ib_rmpp_segment {
struct list_head list;
u32 num;
u8 data[0];
};
struct ib_mad_agent_private {
struct list_head agent_list;
struct ib_mad_agent agent;
......@@ -119,7 +125,8 @@ struct ib_mad_send_wr_private {
struct list_head agent_list;
struct ib_mad_agent_private *mad_agent_priv;
struct ib_mad_send_buf send_buf;
DECLARE_PCI_UNMAP_ADDR(mapping)
DECLARE_PCI_UNMAP_ADDR(header_mapping)
DECLARE_PCI_UNMAP_ADDR(payload_mapping)
struct ib_send_wr send_wr;
struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
__be64 tid;
......@@ -130,11 +137,12 @@ struct ib_mad_send_wr_private {
enum ib_wc_status status;
/* RMPP control */
struct list_head rmpp_list;
struct ib_rmpp_segment *last_ack_seg;
struct ib_rmpp_segment *cur_seg;
int last_ack;
int seg_num;
int newwin;
int total_seg;
int data_offset;
int pad;
};
......
This diff is collapsed.
This diff is collapsed.
......@@ -33,7 +33,7 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $
* $Id: ib_mad.h 5596 2006-03-03 01:00:07Z sean.hefty $
*/
#if !defined( IB_MAD_H )
......@@ -208,15 +208,23 @@ struct ib_class_port_info
/**
* ib_mad_send_buf - MAD data buffer and work request for sends.
* @next: A pointer used to chain together MADs for posting.
* @mad: References an allocated MAD data buffer.
* @mad: References an allocated MAD data buffer for MADs that do not have
* RMPP active. For MADs using RMPP, references the common and management
* class specific headers.
* @mad_agent: MAD agent that allocated the buffer.
* @ah: The address handle to use when sending the MAD.
* @context: User-controlled context fields.
* @hdr_len: Indicates the size of the data header of the MAD. This length
* includes the common MAD, RMPP, and class specific headers.
* @data_len: Indicates the total size of user-transferred data.
* @seg_count: The number of RMPP segments allocated for this send.
* @seg_size: Size of each RMPP segment.
* @timeout_ms: Time to wait for a response.
* @retries: Number of times to retry a request for a response.
*
* Users are responsible for initializing the MAD buffer itself, with the
* exception of specifying the payload length field in any RMPP MAD.
* exception of any RMPP header. Additional segment buffer space allocated
* beyond data_len is padding.
*/
struct ib_mad_send_buf {
struct ib_mad_send_buf *next;
......@@ -224,6 +232,10 @@ struct ib_mad_send_buf {
struct ib_mad_agent *mad_agent;
struct ib_ah *ah;
void *context[2];
int hdr_len;
int data_len;
int seg_count;
int seg_size;
int timeout_ms;
int retries;
};
......@@ -299,7 +311,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
* @mad_recv_wc: Received work completion information on the received MAD.
*
* MADs received in response to a send request operation will be handed to
* the user after the send operation completes. All data buffers given
* the user before the send operation completes. All data buffers given
* to registered agents through this routine are owned by the receiving
* client, except for snooping agents. Clients snooping MADs should not
* modify the data referenced by @mad_recv_wc.
......@@ -485,17 +497,6 @@ int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
struct ib_mad_send_buf **bad_send_buf);
/**
* ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
* @mad_recv_wc: Work completion information for a received MAD.
* @buf: User-provided data buffer to receive the coalesced buffers. The
* referenced buffer should be at least the size of the mad_len specified
* by @mad_recv_wc.
*
* This call copies a chain of received MAD segments into a single data buffer,
* removing duplicated headers.
*/
void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf);
/**
* ib_free_recv_mad - Returns data buffers used to receive a MAD.
......@@ -590,9 +591,10 @@ int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
* with an initialized work request structure. Users may modify the returned
* MAD data buffer before posting the send.
*
* The returned data buffer will be cleared. Users are responsible for
* initializing the common MAD and any class specific headers. If @rmpp_active
* is set, the RMPP header will be initialized for sending.
* The returned MAD header, class specific headers, and any padding will be
* cleared. Users are responsible for initializing the common MAD header,
* any class specific header, and MAD data area.
* If @rmpp_active is set, the RMPP header will be initialized for sending.
*/
struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
u32 remote_qpn, u16 pkey_index,
......@@ -600,6 +602,16 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
int hdr_len, int data_len,
gfp_t gfp_mask);
/**
* ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
* @send_buf: Previously allocated send data buffer.
* @seg_num: number of segment to return
*
* This routine returns a pointer to the data buffer of an RMPP MAD.
* Users must provide synchronization to @send_buf around this call.
*/
void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
/**
* ib_free_send_mad - Returns data buffers used to send a MAD.
* @send_buf: Previously allocated send data buffer.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment