Commit d99dc602 authored by Gary Leshner's avatar Gary Leshner Committed by Jason Gunthorpe

IB/hfi1: Add functions to transmit datagram ipoib packets

This patch implements the mechanism to accelerate the transmit side of
a multiple transmit queue RDMA netdev by submitting the packets to
the SDMA engine directly instead of sending through the verbs layer.

This patch also changes the UD/SEND_ONLY op to output the entropy value
in byte 0 of deth[1]. UD/SEND_ONLY_WITH_IMMEDIATE uses the previous
behavior with no entropy value being output.

The code in the ipoib rdma netdev which submits tx requests upon
successful submission will call trace_sdma_output_ibhdr to output
the ibhdr to the trace buffer.

Link: https://lore.kernel.org/r/20200511160548.173205.45616.stgit@awfm-01.aw.intel.comReviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarGary Leshner <Gary.S.Leshner@intel.com>
Signed-off-by: default avatarKaike Wan <kaike.wan@intel.com>
Signed-off-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent fe810b50
...@@ -22,6 +22,7 @@ hfi1-y := \ ...@@ -22,6 +22,7 @@ hfi1-y := \
init.o \ init.o \
intr.o \ intr.o \
iowait.o \ iowait.o \
ipoib_tx.o \
mad.o \ mad.o \
mmu_rb.o \ mmu_rb.o \
msix.o \ msix.o \
......
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2020 Intel Corporation.
*
*/
/*
* This file contains HFI1 support for IPOIB functionality
*/
#ifndef HFI1_IPOIB_H
#define HFI1_IPOIB_H
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/atomic.h>
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/list.h>
#include <linux/if_infiniband.h>
#include "hfi.h"
#include "iowait.h"
#include <rdma/ib_verbs.h>
#define HFI1_IPOIB_ENTROPY_SHIFT 24
#define HFI1_IPOIB_TXREQ_NAME_LEN 32
#define HFI1_IPOIB_ENCAP_LEN 4
struct hfi1_ipoib_dev_priv;
union hfi1_ipoib_flow {
u16 as_int;
struct {
u8 tx_queue;
u8 sc5;
} __attribute__((__packed__));
};
/**
* struct hfi1_ipoib_circ_buf - List of items to be processed
* @items: ring of items
* @head: ring head
* @tail: ring tail
* @max_items: max items + 1 that the ring can contain
* @producer_lock: producer sync lock
* @consumer_lock: consumer sync lock
*/
struct hfi1_ipoib_circ_buf {
void **items;
unsigned long head;
unsigned long tail;
unsigned long max_items;
spinlock_t producer_lock; /* head sync lock */
spinlock_t consumer_lock; /* tail sync lock */
};
/**
* struct hfi1_ipoib_txq - IPOIB per Tx queue information
* @priv: private pointer
* @sde: sdma engine
* @tx_list: tx request list
* @sent_txreqs: count of txreqs posted to sdma
* @flow: tracks when list needs to be flushed for a flow change
* @q_idx: ipoib Tx queue index
* @pkts_sent: indicator packets have been sent from this queue
* @wait: iowait structure
* @complete_txreqs: count of txreqs completed by sdma
* @napi: pointer to tx napi interface
* @tx_ring: ring of ipoib txreqs to be reaped by napi callback
*/
struct hfi1_ipoib_txq {
struct hfi1_ipoib_dev_priv *priv;
struct sdma_engine *sde;
struct list_head tx_list;
u64 sent_txreqs;
union hfi1_ipoib_flow flow;
u8 q_idx;
bool pkts_sent;
struct iowait wait;
atomic64_t ____cacheline_aligned_in_smp complete_txreqs;
struct napi_struct *napi;
struct hfi1_ipoib_circ_buf tx_ring;
};
struct hfi1_ipoib_dev_priv {
struct hfi1_devdata *dd;
struct net_device *netdev;
struct ib_device *device;
struct hfi1_ipoib_txq *txqs;
struct kmem_cache *txreq_cache;
struct napi_struct *tx_napis;
u16 pkey;
u16 pkey_index;
u32 qkey;
u8 port_num;
const struct net_device_ops *netdev_ops;
struct rvt_qp *qp;
struct pcpu_sw_netstats __percpu *netstats;
};
/* hfi1 ipoib rdma netdev's private data structure */
struct hfi1_ipoib_rdma_netdev {
struct rdma_netdev rn; /* keep this first */
/* followed by device private data */
struct hfi1_ipoib_dev_priv dev_priv;
};
static inline struct hfi1_ipoib_dev_priv *
hfi1_ipoib_priv(const struct net_device *dev)
{
return &((struct hfi1_ipoib_rdma_netdev *)netdev_priv(dev))->dev_priv;
}
static inline void
hfi1_ipoib_update_tx_netstats(struct hfi1_ipoib_dev_priv *priv,
u64 packets,
u64 bytes)
{
struct pcpu_sw_netstats *netstats = this_cpu_ptr(priv->netstats);
u64_stats_update_begin(&netstats->syncp);
netstats->tx_packets += packets;
netstats->tx_bytes += bytes;
u64_stats_update_end(&netstats->syncp);
}
int hfi1_ipoib_send_dma(struct net_device *dev,
struct sk_buff *skb,
struct ib_ah *address,
u32 dqpn);
int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv);
void hfi1_ipoib_napi_tx_enable(struct net_device *dev);
void hfi1_ipoib_napi_tx_disable(struct net_device *dev);
#endif /* _IPOIB_H */
This diff is collapsed.
/* /*
* Copyright(c) 2015 - 2018 Intel Corporation. * Copyright(c) 2015 - 2020 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace.h"
#include "exp_rcv.h" #include "exp_rcv.h"
#include "ipoib.h"
static u8 __get_ib_hdr_len(struct ib_header *hdr) static u8 __get_ib_hdr_len(struct ib_header *hdr)
{ {
...@@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2) ...@@ -126,6 +127,7 @@ const char *hfi1_trace_get_packet_l2_str(u8 l2)
#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x" #define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
#define DETH_ENTROPY_PRN "deth qkey:0x%.8x sqpn:0x%.6x entropy:0x%.2x"
#define IETH_PRN "ieth rkey:0x%.8x" #define IETH_PRN "ieth rkey:0x%.8x"
#define ATOMICACKETH_PRN "origdata:%llx" #define ATOMICACKETH_PRN "origdata:%llx"
#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx" #define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
...@@ -444,6 +446,12 @@ const char *parse_everbs_hdrs( ...@@ -444,6 +446,12 @@ const char *parse_everbs_hdrs(
break; break;
/* deth */ /* deth */
case OP(UD, SEND_ONLY): case OP(UD, SEND_ONLY):
trace_seq_printf(p, DETH_ENTROPY_PRN,
be32_to_cpu(eh->ud.deth[0]),
be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK,
be32_to_cpu(eh->ud.deth[1]) >>
HFI1_IPOIB_ENTROPY_SHIFT);
break;
case OP(UD, SEND_ONLY_WITH_IMMEDIATE): case OP(UD, SEND_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, DETH_PRN, trace_seq_printf(p, DETH_PRN,
be32_to_cpu(eh->ud.deth[0]), be32_to_cpu(eh->ud.deth[0]),
......
...@@ -2205,6 +2205,7 @@ struct rdma_netdev { ...@@ -2205,6 +2205,7 @@ struct rdma_netdev {
void *clnt_priv; void *clnt_priv;
struct ib_device *hca; struct ib_device *hca;
u8 port_num; u8 port_num;
int mtu;
/* /*
* cleanup function must be specified. * cleanup function must be specified.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment