Commit 32199ec3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ntb-4.8' of git://github.com/jonmason/ntb

Pull NTB updates from Jon Mason:
 "NTB bug fixes for the ntb_tool and ntb_perf, and improvements to the
  ntb_perf and ntb_pingpong for increased debugability.

  Also, modification to the ntb_transport layer to increase/decrease
  the number of transport entries depending on the ring size"

* tag 'ntb-4.8' of git://github.com/jonmason/ntb:
  NTB: ntb_hw_intel: use local variable pdev
  NTB: ntb_hw_intel: show BAR size in debugfs info
  ntb_test: Add a selftest script for the NTB subsystem
  ntb_perf: clear link_is_up flag when the link goes down.
  ntb_pingpong: Add a debugfs file to get the ping count
  ntb_tool: Add link status and files to debugfs
  ntb_tool: Postpone memory window initialization for the user
  ntb_perf: Wait for link before running test
  ntb_perf: Return results by reading the run file
  ntb_perf: Improve thread handling to increase robustness
  ntb_perf: Schedule based on time not on performance
  ntb_transport: Check the number of spads the hardware supports
  ntb_tool: Add memory window debug support
  ntb_perf: Allow limiting the size of the memory windows
  NTB: allocate number transport entries depending on size of ring size
  ntb_tool: BUG: Ensure the buffer size is large enough to return all spads
  ntb_tool: Fix infinite loop bug when writing spad/peer_spad file
parents a02040d8 95f1464f
...@@ -8341,6 +8341,7 @@ F: drivers/ntb/ ...@@ -8341,6 +8341,7 @@ F: drivers/ntb/
F: drivers/net/ntb_netdev.c F: drivers/net/ntb_netdev.c
F: include/linux/ntb.h F: include/linux/ntb.h
F: include/linux/ntb_transport.h F: include/linux/ntb_transport.h
F: tools/testing/selftests/ntb/
NTB INTEL DRIVER NTB INTEL DRIVER
M: Jon Mason <jdmason@kudzu.us> M: Jon Mason <jdmason@kudzu.us>
......
...@@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, ...@@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *offp) size_t count, loff_t *offp)
{ {
struct intel_ntb_dev *ndev; struct intel_ntb_dev *ndev;
struct pci_dev *pdev;
void __iomem *mmio; void __iomem *mmio;
char *buf; char *buf;
size_t buf_size; size_t buf_size;
ssize_t ret, off; ssize_t ret, off;
union { u64 v64; u32 v32; u16 v16; } u; union { u64 v64; u32 v32; u16 v16; u8 v8; } u;
ndev = filp->private_data; ndev = filp->private_data;
pdev = ndev_pdev(ndev);
mmio = ndev->self_mmio; mmio = ndev->self_mmio;
buf_size = min(count, 0x800ul); buf_size = min(count, 0x800ul);
...@@ -631,6 +633,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, ...@@ -631,6 +633,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"Doorbell Bell -\t\t%#llx\n", u.v64); "Doorbell Bell -\t\t%#llx\n", u.v64);
off += scnprintf(buf + off, buf_size - off,
"\nNTB Window Size:\n");
pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"PBAR23SZ %hhu\n", u.v8);
if (!ndev->bar4_split) {
pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"PBAR45SZ %hhu\n", u.v8);
} else {
pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"PBAR4SZ %hhu\n", u.v8);
pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"PBAR5SZ %hhu\n", u.v8);
}
pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"SBAR23SZ %hhu\n", u.v8);
if (!ndev->bar4_split) {
pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"SBAR45SZ %hhu\n", u.v8);
} else {
pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"SBAR4SZ %hhu\n", u.v8);
pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8);
off += scnprintf(buf + off, buf_size - off,
"SBAR5SZ %hhu\n", u.v8);
}
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"\nNTB Incoming XLAT:\n"); "\nNTB Incoming XLAT:\n");
...@@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, ...@@ -669,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
"LMT45 -\t\t\t%#018llx\n", u.v64); "LMT45 -\t\t\t%#018llx\n", u.v64);
} }
if (pdev_is_xeon(ndev->ntb.pdev)) { if (pdev_is_xeon(pdev)) {
if (ntb_topo_is_b2b(ndev->ntb.topo)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) {
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"\nNTB Outgoing B2B XLAT:\n"); "\nNTB Outgoing B2B XLAT:\n");
...@@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, ...@@ -750,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"\nXEON NTB Hardware Errors:\n"); "\nXEON NTB Hardware Errors:\n");
if (!pci_read_config_word(ndev->ntb.pdev, if (!pci_read_config_word(pdev,
XEON_DEVSTS_OFFSET, &u.v16)) XEON_DEVSTS_OFFSET, &u.v16))
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"DEVSTS -\t\t%#06x\n", u.v16); "DEVSTS -\t\t%#06x\n", u.v16);
if (!pci_read_config_word(ndev->ntb.pdev, if (!pci_read_config_word(pdev,
XEON_LINK_STATUS_OFFSET, &u.v16)) XEON_LINK_STATUS_OFFSET, &u.v16))
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"LNKSTS -\t\t%#06x\n", u.v16); "LNKSTS -\t\t%#06x\n", u.v16);
if (!pci_read_config_dword(ndev->ntb.pdev, if (!pci_read_config_dword(pdev,
XEON_UNCERRSTS_OFFSET, &u.v32)) XEON_UNCERRSTS_OFFSET, &u.v32))
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"UNCERRSTS -\t\t%#06x\n", u.v32); "UNCERRSTS -\t\t%#06x\n", u.v32);
if (!pci_read_config_dword(ndev->ntb.pdev, if (!pci_read_config_dword(pdev,
XEON_CORERRSTS_OFFSET, &u.v32)) XEON_CORERRSTS_OFFSET, &u.v32))
off += scnprintf(buf + off, buf_size - off, off += scnprintf(buf + off, buf_size - off,
"CORERRSTS -\t\t%#06x\n", u.v32); "CORERRSTS -\t\t%#06x\n", u.v32);
......
...@@ -153,6 +153,7 @@ struct ntb_transport_qp { ...@@ -153,6 +153,7 @@ struct ntb_transport_qp {
unsigned int rx_index; unsigned int rx_index;
unsigned int rx_max_entry; unsigned int rx_max_entry;
unsigned int rx_max_frame; unsigned int rx_max_frame;
unsigned int rx_alloc_entry;
dma_cookie_t last_cookie; dma_cookie_t last_cookie;
struct tasklet_struct rxc_db_work; struct tasklet_struct rxc_db_work;
...@@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, ...@@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset, out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_index - \t%u\n", qp->rx_index); "rx_index - \t%u\n", qp->rx_index);
out_offset += snprintf(buf + out_offset, out_count - out_offset, out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_max_entry - \t%u\n\n", qp->rx_max_entry); "rx_max_entry - \t%u\n", qp->rx_max_entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry);
out_offset += snprintf(buf + out_offset, out_count - out_offset, out_offset += snprintf(buf + out_offset, out_count - out_offset,
"tx_bytes - \t%llu\n", qp->tx_bytes); "tx_bytes - \t%llu\n", qp->tx_bytes);
...@@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, ...@@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
{ {
struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
struct ntb_transport_mw *mw; struct ntb_transport_mw *mw;
struct ntb_dev *ndev = nt->ndev;
struct ntb_queue_entry *entry;
unsigned int rx_size, num_qps_mw; unsigned int rx_size, num_qps_mw;
unsigned int mw_num, mw_count, qp_count; unsigned int mw_num, mw_count, qp_count;
unsigned int i; unsigned int i;
int node;
mw_count = nt->mw_count; mw_count = nt->mw_count;
qp_count = nt->qp_count; qp_count = nt->qp_count;
...@@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, ...@@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_max_entry = rx_size / qp->rx_max_frame;
qp->rx_index = 0; qp->rx_index = 0;
/*
* Checking to see if we have more entries than the default.
* We should add additional entries if that is the case so we
* can be in sync with the transport frames.
*/
node = dev_to_node(&ndev->dev);
for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
if (!entry)
return -ENOMEM;
entry->qp = qp;
ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
&qp->rx_free_q);
qp->rx_alloc_entry++;
}
qp->remote_rx_info->entry = qp->rx_max_entry - 1; qp->remote_rx_info->entry = qp->rx_max_entry - 1;
/* setup the hdr offsets with 0's */ /* setup the hdr offsets with 0's */
...@@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) ...@@ -1037,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
int node; int node;
int rc, i; int rc, i;
mw_count = ntb_mw_count(ndev);
if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) {
dev_err(&ndev->dev, "Not enough scratch pad registers for %s",
NTB_TRANSPORT_NAME);
return -EIO;
}
if (ntb_db_is_unsafe(ndev)) if (ntb_db_is_unsafe(ndev))
dev_dbg(&ndev->dev, dev_dbg(&ndev->dev,
"doorbell is unsafe, proceed anyway...\n"); "doorbell is unsafe, proceed anyway...\n");
...@@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) ...@@ -1052,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
nt->ndev = ndev; nt->ndev = ndev;
mw_count = ntb_mw_count(ndev);
nt->mw_count = mw_count; nt->mw_count = mw_count;
nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec),
...@@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ...@@ -1722,8 +1750,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry,
&qp->rx_free_q); &qp->rx_free_q);
} }
qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES;
for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { for (i = 0; i < qp->tx_max_entry; i++) {
entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
if (!entry) if (!entry)
goto err2; goto err2;
...@@ -1744,6 +1773,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ...@@ -1744,6 +1773,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry); kfree(entry);
err1: err1:
qp->rx_alloc_entry = 0;
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry); kfree(entry);
if (qp->tx_dma_chan) if (qp->tx_dma_chan)
......
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/ntb.h> #include <linux/ntb.h>
#include <linux/mutex.h>
#define DRIVER_NAME "ntb_perf" #define DRIVER_NAME "ntb_perf"
#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
...@@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); ...@@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
static struct dentry *perf_debugfs_dir; static struct dentry *perf_debugfs_dir;
static unsigned long max_mw_size;
module_param(max_mw_size, ulong, 0644);
MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
static unsigned int seg_order = 19; /* 512K */ static unsigned int seg_order = 19; /* 512K */
module_param(seg_order, uint, 0644); module_param(seg_order, uint, 0644);
MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
...@@ -117,6 +122,10 @@ struct pthr_ctx { ...@@ -117,6 +122,10 @@ struct pthr_ctx {
int dma_prep_err; int dma_prep_err;
int src_idx; int src_idx;
void *srcs[MAX_SRCS]; void *srcs[MAX_SRCS];
wait_queue_head_t *wq;
int status;
u64 copied;
u64 diff_us;
}; };
struct perf_ctx { struct perf_ctx {
...@@ -124,23 +133,23 @@ struct perf_ctx { ...@@ -124,23 +133,23 @@ struct perf_ctx {
spinlock_t db_lock; spinlock_t db_lock;
struct perf_mw mw; struct perf_mw mw;
bool link_is_up; bool link_is_up;
struct work_struct link_cleanup;
struct delayed_work link_work; struct delayed_work link_work;
wait_queue_head_t link_wq;
struct dentry *debugfs_node_dir; struct dentry *debugfs_node_dir;
struct dentry *debugfs_run; struct dentry *debugfs_run;
struct dentry *debugfs_threads; struct dentry *debugfs_threads;
u8 perf_threads; u8 perf_threads;
bool run; /* mutex ensures only one set of threads run at once */
struct mutex run_mutex;
struct pthr_ctx pthr_ctx[MAX_THREADS]; struct pthr_ctx pthr_ctx[MAX_THREADS];
atomic_t tsync; atomic_t tsync;
atomic_t tdone;
}; };
enum { enum {
VERSION = 0, VERSION = 0,
MW_SZ_HIGH, MW_SZ_HIGH,
MW_SZ_LOW, MW_SZ_LOW,
SPAD_MSG,
SPAD_ACK,
MAX_SPAD MAX_SPAD
}; };
...@@ -148,10 +157,16 @@ static void perf_link_event(void *ctx) ...@@ -148,10 +157,16 @@ static void perf_link_event(void *ctx)
{ {
struct perf_ctx *perf = ctx; struct perf_ctx *perf = ctx;
if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
schedule_delayed_work(&perf->link_work, 2*HZ); schedule_delayed_work(&perf->link_work, 2*HZ);
else } else {
schedule_work(&perf->link_cleanup); dev_dbg(&perf->ntb->pdev->dev, "link down\n");
if (!perf->link_is_up)
cancel_delayed_work_sync(&perf->link_work);
perf->link_is_up = false;
}
} }
static void perf_db_event(void *ctx, int vec) static void perf_db_event(void *ctx, int vec)
...@@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, ...@@ -271,6 +286,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
char __iomem *tmp = dst; char __iomem *tmp = dst;
u64 perf, diff_us; u64 perf, diff_us;
ktime_t kstart, kstop, kdiff; ktime_t kstart, kstop, kdiff;
unsigned long last_sleep = jiffies;
chunks = div64_u64(win_size, buf_size); chunks = div64_u64(win_size, buf_size);
total_chunks = div64_u64(total, buf_size); total_chunks = div64_u64(total, buf_size);
...@@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, ...@@ -286,30 +302,40 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
} else } else
tmp += buf_size; tmp += buf_size;
/* Probably should schedule every 4GB to prevent soft hang. */ /* Probably should schedule every 5s to prevent soft hang. */
if (((copied % SZ_4G) == 0) && !use_dma) { if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
last_sleep = jiffies;
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1); schedule_timeout(1);
} }
if (unlikely(kthread_should_stop()))
break;
} }
if (use_dma) { if (use_dma) {
pr_info("%s: All DMA descriptors submitted\n", current->comm); pr_debug("%s: All DMA descriptors submitted\n", current->comm);
while (atomic_read(&pctx->dma_sync) != 0) while (atomic_read(&pctx->dma_sync) != 0) {
if (kthread_should_stop())
break;
msleep(20); msleep(20);
}
} }
kstop = ktime_get(); kstop = ktime_get();
kdiff = ktime_sub(kstop, kstart); kdiff = ktime_sub(kstop, kstart);
diff_us = ktime_to_us(kdiff); diff_us = ktime_to_us(kdiff);
pr_info("%s: copied %llu bytes\n", current->comm, copied); pr_debug("%s: copied %llu bytes\n", current->comm, copied);
pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
perf = div64_u64(copied, diff_us); perf = div64_u64(copied, diff_us);
pr_info("%s: MBytes/s: %llu\n", current->comm, perf); pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);
pctx->copied = copied;
pctx->diff_us = diff_us;
return 0; return 0;
} }
...@@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data) ...@@ -331,7 +357,7 @@ static int ntb_perf_thread(void *data)
int rc, node, i; int rc, node, i;
struct dma_chan *dma_chan = NULL; struct dma_chan *dma_chan = NULL;
pr_info("kthread %s starting...\n", current->comm); pr_debug("kthread %s starting...\n", current->comm);
node = dev_to_node(&pdev->dev); node = dev_to_node(&pdev->dev);
...@@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data) ...@@ -389,7 +415,10 @@ static int ntb_perf_thread(void *data)
pctx->srcs[i] = NULL; pctx->srcs[i] = NULL;
} }
return 0; atomic_inc(&perf->tdone);
wake_up(pctx->wq);
rc = 0;
goto done;
err: err:
for (i = 0; i < MAX_SRCS; i++) { for (i = 0; i < MAX_SRCS; i++) {
...@@ -402,6 +431,16 @@ static int ntb_perf_thread(void *data) ...@@ -402,6 +431,16 @@ static int ntb_perf_thread(void *data)
pctx->dma_chan = NULL; pctx->dma_chan = NULL;
} }
done:
/* Wait until we are told to stop */
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
break;
schedule();
}
__set_current_state(TASK_RUNNING);
return rc; return rc;
} }
...@@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work) ...@@ -472,6 +511,10 @@ static void perf_link_work(struct work_struct *work)
dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
size = perf->mw.phys_size; size = perf->mw.phys_size;
if (max_mw_size && size > max_mw_size)
size = max_mw_size;
ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
...@@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work) ...@@ -496,6 +539,7 @@ static void perf_link_work(struct work_struct *work)
goto out1; goto out1;
perf->link_is_up = true; perf->link_is_up = true;
wake_up(&perf->link_wq);
return; return;
...@@ -508,18 +552,6 @@ static void perf_link_work(struct work_struct *work) ...@@ -508,18 +552,6 @@ static void perf_link_work(struct work_struct *work)
msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
} }
static void perf_link_cleanup(struct work_struct *work)
{
struct perf_ctx *perf = container_of(work,
struct perf_ctx,
link_cleanup);
dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
if (!perf->link_is_up)
cancel_delayed_work_sync(&perf->link_work);
}
static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
{ {
struct perf_mw *mw; struct perf_mw *mw;
...@@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, ...@@ -544,16 +576,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
{ {
struct perf_ctx *perf = filp->private_data; struct perf_ctx *perf = filp->private_data;
char *buf; char *buf;
ssize_t ret, out_offset; ssize_t ret, out_off = 0;
struct pthr_ctx *pctx;
int i;
u64 rate;
if (!perf) if (!perf)
return 0; return 0;
buf = kmalloc(64, GFP_KERNEL); buf = kmalloc(1024, GFP_KERNEL);
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
out_offset = snprintf(buf, 64, "%d\n", perf->run);
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); if (mutex_is_locked(&perf->run_mutex)) {
out_off = snprintf(buf, 64, "running\n");
goto read_from_buf;
}
for (i = 0; i < MAX_THREADS; i++) {
pctx = &perf->pthr_ctx[i];
if (pctx->status == -ENODATA)
break;
if (pctx->status) {
out_off += snprintf(buf + out_off, 1024 - out_off,
"%d: error %d\n", i,
pctx->status);
continue;
}
rate = div64_u64(pctx->copied, pctx->diff_us);
out_off += snprintf(buf + out_off, 1024 - out_off,
"%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
i, pctx->copied, pctx->diff_us, rate);
}
read_from_buf:
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
kfree(buf); kfree(buf);
return ret; return ret;
...@@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf) ...@@ -564,80 +624,90 @@ static void threads_cleanup(struct perf_ctx *perf)
struct pthr_ctx *pctx; struct pthr_ctx *pctx;
int i; int i;
perf->run = false;
for (i = 0; i < MAX_THREADS; i++) { for (i = 0; i < MAX_THREADS; i++) {
pctx = &perf->pthr_ctx[i]; pctx = &perf->pthr_ctx[i];
if (pctx->thread) { if (pctx->thread) {
kthread_stop(pctx->thread); pctx->status = kthread_stop(pctx->thread);
pctx->thread = NULL; pctx->thread = NULL;
} }
} }
} }
static void perf_clear_thread_status(struct perf_ctx *perf)
{
int i;
for (i = 0; i < MAX_THREADS; i++)
perf->pthr_ctx[i].status = -ENODATA;
}
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *offp) size_t count, loff_t *offp)
{ {
struct perf_ctx *perf = filp->private_data; struct perf_ctx *perf = filp->private_data;
int node, i; int node, i;
DECLARE_WAIT_QUEUE_HEAD(wq);
if (!perf->link_is_up) if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
return 0; return -ENOLINK;
if (perf->perf_threads == 0) if (perf->perf_threads == 0)
return 0; return -EINVAL;
if (atomic_read(&perf->tsync) == 0) if (!mutex_trylock(&perf->run_mutex))
perf->run = false; return -EBUSY;
if (perf->run) perf_clear_thread_status(perf);
threads_cleanup(perf);
else {
perf->run = true;
if (perf->perf_threads > MAX_THREADS) { if (perf->perf_threads > MAX_THREADS) {
perf->perf_threads = MAX_THREADS; perf->perf_threads = MAX_THREADS;
pr_info("Reset total threads to: %u\n", MAX_THREADS); pr_info("Reset total threads to: %u\n", MAX_THREADS);
} }
/* no greater than 1M */ /* no greater than 1M */
if (seg_order > MAX_SEG_ORDER) { if (seg_order > MAX_SEG_ORDER) {
seg_order = MAX_SEG_ORDER; seg_order = MAX_SEG_ORDER;
pr_info("Fix seg_order to %u\n", seg_order); pr_info("Fix seg_order to %u\n", seg_order);
} }
if (run_order < seg_order) { if (run_order < seg_order) {
run_order = seg_order; run_order = seg_order;
pr_info("Fix run_order to %u\n", run_order); pr_info("Fix run_order to %u\n", run_order);
} }
node = dev_to_node(&perf->ntb->pdev->dev); node = dev_to_node(&perf->ntb->pdev->dev);
/* launch kernel thread */ atomic_set(&perf->tdone, 0);
for (i = 0; i < perf->perf_threads; i++) {
struct pthr_ctx *pctx;
pctx = &perf->pthr_ctx[i];
atomic_set(&pctx->dma_sync, 0);
pctx->perf = perf;
pctx->thread =
kthread_create_on_node(ntb_perf_thread,
(void *)pctx,
node, "ntb_perf %d", i);
if (IS_ERR(pctx->thread)) {
pctx->thread = NULL;
goto err;
} else
wake_up_process(pctx->thread);
if (perf->run == false)
return -ENXIO;
}
/* launch kernel thread */
for (i = 0; i < perf->perf_threads; i++) {
struct pthr_ctx *pctx;
pctx = &perf->pthr_ctx[i];
atomic_set(&pctx->dma_sync, 0);
pctx->perf = perf;
pctx->wq = &wq;
pctx->thread =
kthread_create_on_node(ntb_perf_thread,
(void *)pctx,
node, "ntb_perf %d", i);
if (IS_ERR(pctx->thread)) {
pctx->thread = NULL;
goto err;
} else {
wake_up_process(pctx->thread);
}
} }
wait_event_interruptible(wq,
atomic_read(&perf->tdone) == perf->perf_threads);
threads_cleanup(perf);
mutex_unlock(&perf->run_mutex);
return count; return count;
err: err:
threads_cleanup(perf); threads_cleanup(perf);
mutex_unlock(&perf->run_mutex);
return -ENXIO; return -ENXIO;
} }
...@@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -688,6 +758,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
int node; int node;
int rc = 0; int rc = 0;
if (ntb_spad_count(ntb) < MAX_SPAD) {
dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
DRIVER_NAME);
return -EIO;
}
node = dev_to_node(&pdev->dev); node = dev_to_node(&pdev->dev);
perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
...@@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -699,11 +775,11 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
perf->ntb = ntb; perf->ntb = ntb;
perf->perf_threads = 1; perf->perf_threads = 1;
atomic_set(&perf->tsync, 0); atomic_set(&perf->tsync, 0);
perf->run = false; mutex_init(&perf->run_mutex);
spin_lock_init(&perf->db_lock); spin_lock_init(&perf->db_lock);
perf_setup_mw(ntb, perf); perf_setup_mw(ntb, perf);
init_waitqueue_head(&perf->link_wq);
INIT_DELAYED_WORK(&perf->link_work, perf_link_work); INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
rc = ntb_set_ctx(ntb, perf, &perf_ops); rc = ntb_set_ctx(ntb, perf, &perf_ops);
if (rc) if (rc)
...@@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -717,11 +793,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
if (rc) if (rc)
goto err_ctx; goto err_ctx;
perf_clear_thread_status(perf);
return 0; return 0;
err_ctx: err_ctx:
cancel_delayed_work_sync(&perf->link_work); cancel_delayed_work_sync(&perf->link_work);
cancel_work_sync(&perf->link_cleanup);
kfree(perf); kfree(perf);
err_perf: err_perf:
return rc; return rc;
...@@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -734,8 +811,9 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
dev_dbg(&perf->ntb->dev, "%s called\n", __func__); dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
mutex_lock(&perf->run_mutex);
cancel_delayed_work_sync(&perf->link_work); cancel_delayed_work_sync(&perf->link_work);
cancel_work_sync(&perf->link_cleanup);
ntb_clear_ctx(ntb); ntb_clear_ctx(ntb);
ntb_link_disable(ntb); ntb_link_disable(ntb);
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/ntb.h> #include <linux/ntb.h>
...@@ -96,8 +97,13 @@ struct pp_ctx { ...@@ -96,8 +97,13 @@ struct pp_ctx {
spinlock_t db_lock; spinlock_t db_lock;
struct timer_list db_timer; struct timer_list db_timer;
unsigned long db_delay; unsigned long db_delay;
struct dentry *debugfs_node_dir;
struct dentry *debugfs_count;
atomic_t count;
}; };
static struct dentry *pp_debugfs_dir;
static void pp_ping(unsigned long ctx) static void pp_ping(unsigned long ctx)
{ {
struct pp_ctx *pp = (void *)ctx; struct pp_ctx *pp = (void *)ctx;
...@@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec) ...@@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec)
dev_dbg(&pp->ntb->dev, dev_dbg(&pp->ntb->dev,
"Pong vec %d bits %#llx\n", "Pong vec %d bits %#llx\n",
vec, db_bits); vec, db_bits);
atomic_inc(&pp->count);
} }
spin_unlock_irqrestore(&pp->db_lock, irqflags); spin_unlock_irqrestore(&pp->db_lock, irqflags);
} }
static int pp_debugfs_setup(struct pp_ctx *pp)
{
struct pci_dev *pdev = pp->ntb->pdev;
if (!pp_debugfs_dir)
return -ENODEV;
pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
pp_debugfs_dir);
if (!pp->debugfs_node_dir)
return -ENODEV;
pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR,
pp->debugfs_node_dir,
&pp->count);
if (!pp->debugfs_count)
return -ENODEV;
return 0;
}
static const struct ntb_ctx_ops pp_ops = { static const struct ntb_ctx_ops pp_ops = {
.link_event = pp_link_event, .link_event = pp_link_event,
.db_event = pp_db_event, .db_event = pp_db_event,
...@@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client, ...@@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client,
pp->ntb = ntb; pp->ntb = ntb;
pp->db_bits = 0; pp->db_bits = 0;
atomic_set(&pp->count, 0);
spin_lock_init(&pp->db_lock); spin_lock_init(&pp->db_lock);
setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp);
pp->db_delay = msecs_to_jiffies(delay_ms); pp->db_delay = msecs_to_jiffies(delay_ms);
...@@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client, ...@@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client,
if (rc) if (rc)
goto err_ctx; goto err_ctx;
rc = pp_debugfs_setup(pp);
if (rc)
goto err_ctx;
ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
ntb_link_event(ntb); ntb_link_event(ntb);
...@@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client, ...@@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client,
{ {
struct pp_ctx *pp = ntb->ctx; struct pp_ctx *pp = ntb->ctx;
debugfs_remove_recursive(pp->debugfs_node_dir);
ntb_clear_ctx(ntb); ntb_clear_ctx(ntb);
del_timer_sync(&pp->db_timer); del_timer_sync(&pp->db_timer);
ntb_link_disable(ntb); ntb_link_disable(ntb);
...@@ -247,4 +282,29 @@ static struct ntb_client pp_client = { ...@@ -247,4 +282,29 @@ static struct ntb_client pp_client = {
.remove = pp_remove, .remove = pp_remove,
}, },
}; };
module_ntb_client(pp_client);
static int __init pp_init(void)
{
int rc;
if (debugfs_initialized())
pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
rc = ntb_register_client(&pp_client);
if (rc)
goto err_client;
return 0;
err_client:
debugfs_remove_recursive(pp_debugfs_dir);
return rc;
}
module_init(pp_init);
static void __exit pp_exit(void)
{
ntb_unregister_client(&pp_client);
debugfs_remove_recursive(pp_debugfs_dir);
}
module_exit(pp_exit);
...@@ -59,6 +59,12 @@ ...@@ -59,6 +59,12 @@
* *
* Eg: check if clearing the doorbell mask generates an interrupt. * Eg: check if clearing the doorbell mask generates an interrupt.
* *
* # Check the link status
* root@self# cat $DBG_DIR/link
*
* # Block until the link is up
* root@self# echo Y > $DBG_DIR/link_event
*
* # Set the doorbell mask * # Set the doorbell mask
* root@self# echo 's 1' > $DBG_DIR/mask * root@self# echo 's 1' > $DBG_DIR/mask
* *
...@@ -79,6 +85,13 @@ ...@@ -79,6 +85,13 @@
* root@self# cat $DBG_DIR/spad * root@self# cat $DBG_DIR/spad
* *
* Observe that spad 0 and 1 have the values set by the peer. * Observe that spad 0 and 1 have the values set by the peer.
*
* # Check the memory window translation info
* cat $DBG_DIR/peer_trans0
*
* # Setup a 16k memory window buffer
* echo 16384 > $DBG_DIR/peer_trans0
*
*/ */
#include <linux/init.h> #include <linux/init.h>
...@@ -89,6 +102,7 @@ ...@@ -89,6 +102,7 @@
#include <linux/dma-mapping.h> #include <linux/dma-mapping.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/ntb.h> #include <linux/ntb.h>
...@@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION); ...@@ -105,11 +119,27 @@ MODULE_VERSION(DRIVER_VERSION);
MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESCRIPTION); MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
#define MAX_MWS 16
static struct dentry *tool_dbgfs; static struct dentry *tool_dbgfs;
struct tool_mw {
int idx;
struct tool_ctx *tc;
resource_size_t win_size;
resource_size_t size;
u8 __iomem *local;
u8 *peer;
dma_addr_t peer_dma;
struct dentry *peer_dbg_file;
};
struct tool_ctx { struct tool_ctx {
struct ntb_dev *ntb; struct ntb_dev *ntb;
struct dentry *dbgfs; struct dentry *dbgfs;
wait_queue_head_t link_wq;
int mw_count;
struct tool_mw mws[MAX_MWS];
}; };
#define SPAD_FNAME_SIZE 0x10 #define SPAD_FNAME_SIZE 0x10
...@@ -135,6 +165,8 @@ static void tool_link_event(void *ctx) ...@@ -135,6 +165,8 @@ static void tool_link_event(void *ctx)
dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n",
up ? "up" : "down", speed, width); up ? "up" : "down", speed, width);
wake_up(&tc->link_wq);
} }
static void tool_db_event(void *ctx, int vec) static void tool_db_event(void *ctx, int vec)
...@@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, ...@@ -239,7 +271,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf,
if (!spad_read_fn) if (!spad_read_fn)
return -EINVAL; return -EINVAL;
buf_size = min_t(size_t, size, 0x100); spad_count = ntb_spad_count(tc->ntb);
/*
* We multiply the number of spads by 15 to get the buffer size
* this is from 3 for the %d, 10 for the largest hex value
* (0x00000000) and 2 for the tab and line feed.
*/
buf_size = min_t(size_t, size, spad_count * 15);
buf = kmalloc(buf_size, GFP_KERNEL); buf = kmalloc(buf_size, GFP_KERNEL);
if (!buf) if (!buf)
...@@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, ...@@ -247,7 +286,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf,
pos = 0; pos = 0;
spad_count = ntb_spad_count(tc->ntb);
for (i = 0; i < spad_count; ++i) { for (i = 0; i < spad_count; ++i) {
pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n",
i, spad_read_fn(tc->ntb, i)); i, spad_read_fn(tc->ntb, i));
...@@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, ...@@ -268,7 +306,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc,
{ {
int spad_idx; int spad_idx;
u32 spad_val; u32 spad_val;
char *buf; char *buf, *buf_ptr;
int pos, n; int pos, n;
ssize_t rc; ssize_t rc;
...@@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, ...@@ -288,14 +326,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc,
} }
buf[size] = 0; buf[size] = 0;
buf_ptr = buf;
n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
while (n == 2) { while (n == 2) {
buf_ptr += pos;
rc = spad_write_fn(tc->ntb, spad_idx, spad_val); rc = spad_write_fn(tc->ntb, spad_idx, spad_val);
if (rc) if (rc)
break; break;
n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
} }
if (n < 0) if (n < 0)
...@@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, ...@@ -442,8 +481,384 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops,
tool_peer_spad_read, tool_peer_spad_read,
tool_peer_spad_write); tool_peer_spad_write);
static ssize_t tool_link_read(struct file *filep, char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_ctx *tc = filep->private_data;
char buf[3];
buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N';
buf[1] = '\n';
buf[2] = '\0';
return simple_read_from_buffer(ubuf, size, offp, buf, 2);
}
static ssize_t tool_link_write(struct file *filep, const char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_ctx *tc = filep->private_data;
char buf[32];
size_t buf_size;
bool val;
int rc;
buf_size = min(size, (sizeof(buf) - 1));
if (copy_from_user(buf, ubuf, buf_size))
return -EFAULT;
buf[buf_size] = '\0';
rc = strtobool(buf, &val);
if (rc)
return rc;
if (val)
rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
else
rc = ntb_link_disable(tc->ntb);
if (rc)
return rc;
return size;
}
static TOOL_FOPS_RDWR(tool_link_fops,
tool_link_read,
tool_link_write);
static ssize_t tool_link_event_write(struct file *filep,
const char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_ctx *tc = filep->private_data;
char buf[32];
size_t buf_size;
bool val;
int rc;
buf_size = min(size, (sizeof(buf) - 1));
if (copy_from_user(buf, ubuf, buf_size))
return -EFAULT;
buf[buf_size] = '\0';
rc = strtobool(buf, &val);
if (rc)
return rc;
if (wait_event_interruptible(tc->link_wq,
ntb_link_is_up(tc->ntb, NULL, NULL) == val))
return -ERESTART;
return size;
}
static TOOL_FOPS_RDWR(tool_link_event_fops,
NULL,
tool_link_event_write);
static ssize_t tool_mw_read(struct file *filep, char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
ssize_t rc;
loff_t pos = *offp;
void *buf;
if (mw->local == NULL)
return -EIO;
if (pos < 0)
return -EINVAL;
if (pos >= mw->win_size || !size)
return 0;
if (size > mw->win_size - pos)
size = mw->win_size - pos;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
memcpy_fromio(buf, mw->local + pos, size);
rc = copy_to_user(ubuf, buf, size);
if (rc == size) {
rc = -EFAULT;
goto err_free;
}
size -= rc;
*offp = pos + size;
rc = size;
err_free:
kfree(buf);
return rc;
}
static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
ssize_t rc;
loff_t pos = *offp;
void *buf;
if (pos < 0)
return -EINVAL;
if (pos >= mw->win_size || !size)
return 0;
if (size > mw->win_size - pos)
size = mw->win_size - pos;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
rc = copy_from_user(buf, ubuf, size);
if (rc == size) {
rc = -EFAULT;
goto err_free;
}
size -= rc;
*offp = pos + size;
rc = size;
memcpy_toio(mw->local + pos, buf, size);
err_free:
kfree(buf);
return rc;
}
static TOOL_FOPS_RDWR(tool_mw_fops,
tool_mw_read,
tool_mw_write);
static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
if (!mw->peer)
return -ENXIO;
return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size);
}
static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
if (!mw->peer)
return -ENXIO;
return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size);
}
static TOOL_FOPS_RDWR(tool_peer_mw_fops,
tool_peer_mw_read,
tool_peer_mw_write);
static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
{
int rc;
struct tool_mw *mw = &tc->mws[idx];
phys_addr_t base;
resource_size_t size, align, align_size;
char buf[16];
if (mw->peer)
return 0;
rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align,
&align_size);
if (rc)
return rc;
mw->size = min_t(resource_size_t, req_size, size);
mw->size = round_up(mw->size, align);
mw->size = round_up(mw->size, align_size);
mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
&mw->peer_dma, GFP_KERNEL);
if (!mw->peer)
return -ENOMEM;
rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size);
if (rc)
goto err_free_dma;
snprintf(buf, sizeof(buf), "peer_mw%d", idx);
mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR,
mw->tc->dbgfs, mw,
&tool_peer_mw_fops);
return 0;
err_free_dma:
dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
mw->peer,
mw->peer_dma);
mw->peer = NULL;
mw->peer_dma = 0;
mw->size = 0;
return rc;
}
static void tool_free_mw(struct tool_ctx *tc, int idx)
{
struct tool_mw *mw = &tc->mws[idx];
if (mw->peer) {
ntb_mw_clear_trans(tc->ntb, idx);
dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
mw->peer,
mw->peer_dma);
}
mw->peer = NULL;
mw->peer_dma = 0;
debugfs_remove(mw->peer_dbg_file);
mw->peer_dbg_file = NULL;
}
static ssize_t tool_peer_mw_trans_read(struct file *filep,
char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
char *buf;
size_t buf_size;
ssize_t ret, off = 0;
phys_addr_t base;
resource_size_t mw_size;
resource_size_t align;
resource_size_t align_size;
buf_size = min_t(size_t, size, 512);
buf = kmalloc(buf_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
ntb_mw_get_range(mw->tc->ntb, mw->idx,
&base, &mw_size, &align, &align_size);
off += scnprintf(buf + off, buf_size - off,
"Peer MW %d Information:\n", mw->idx);
off += scnprintf(buf + off, buf_size - off,
"Physical Address \t%pa[p]\n",
&base);
off += scnprintf(buf + off, buf_size - off,
"Window Size \t%lld\n",
(unsigned long long)mw_size);
off += scnprintf(buf + off, buf_size - off,
"Alignment \t%lld\n",
(unsigned long long)align);
off += scnprintf(buf + off, buf_size - off,
"Size Alignment \t%lld\n",
(unsigned long long)align_size);
off += scnprintf(buf + off, buf_size - off,
"Ready \t%c\n",
(mw->peer) ? 'Y' : 'N');
off += scnprintf(buf + off, buf_size - off,
"Allocated Size \t%zd\n",
(mw->peer) ? (size_t)mw->size : 0);
ret = simple_read_from_buffer(ubuf, size, offp, buf, off);
kfree(buf);
return ret;
}
static ssize_t tool_peer_mw_trans_write(struct file *filep,
const char __user *ubuf,
size_t size, loff_t *offp)
{
struct tool_mw *mw = filep->private_data;
char buf[32];
size_t buf_size;
unsigned long long val;
int rc;
buf_size = min(size, (sizeof(buf) - 1));
if (copy_from_user(buf, ubuf, buf_size))
return -EFAULT;
buf[buf_size] = '\0';
rc = kstrtoull(buf, 0, &val);
if (rc)
return rc;
tool_free_mw(mw->tc, mw->idx);
if (val)
rc = tool_setup_mw(mw->tc, mw->idx, val);
if (rc)
return rc;
return size;
}
static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops,
tool_peer_mw_trans_read,
tool_peer_mw_trans_write);
static int tool_init_mw(struct tool_ctx *tc, int idx)
{
struct tool_mw *mw = &tc->mws[idx];
phys_addr_t base;
int rc;
rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size,
NULL, NULL);
if (rc)
return rc;
mw->tc = tc;
mw->idx = idx;
mw->local = ioremap_wc(base, mw->win_size);
if (!mw->local)
return -EFAULT;
return 0;
}
static void tool_free_mws(struct tool_ctx *tc)
{
int i;
for (i = 0; i < tc->mw_count; i++) {
tool_free_mw(tc, i);
if (tc->mws[i].local)
iounmap(tc->mws[i].local);
tc->mws[i].local = NULL;
}
}
static void tool_setup_dbgfs(struct tool_ctx *tc) static void tool_setup_dbgfs(struct tool_ctx *tc)
{ {
int i;
/* This modules is useless without dbgfs... */ /* This modules is useless without dbgfs... */
if (!tool_dbgfs) { if (!tool_dbgfs) {
tc->dbgfs = NULL; tc->dbgfs = NULL;
...@@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) ...@@ -472,12 +887,31 @@ static void tool_setup_dbgfs(struct tool_ctx *tc)
debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs,
tc, &tool_peer_spad_fops); tc, &tool_peer_spad_fops);
debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs,
tc, &tool_link_fops);
debugfs_create_file("link_event", S_IWUSR, tc->dbgfs,
tc, &tool_link_event_fops);
for (i = 0; i < tc->mw_count; i++) {
char buf[30];
snprintf(buf, sizeof(buf), "mw%d", i);
debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
&tc->mws[i], &tool_mw_fops);
snprintf(buf, sizeof(buf), "peer_trans%d", i);
debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
&tc->mws[i], &tool_peer_mw_trans_fops);
}
} }
static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
{ {
struct tool_ctx *tc; struct tool_ctx *tc;
int rc; int rc;
int i;
if (ntb_db_is_unsafe(ntb)) if (ntb_db_is_unsafe(ntb))
dev_dbg(&ntb->dev, "doorbell is unsafe\n"); dev_dbg(&ntb->dev, "doorbell is unsafe\n");
...@@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) ...@@ -485,13 +919,21 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
if (ntb_spad_is_unsafe(ntb)) if (ntb_spad_is_unsafe(ntb))
dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
tc = kmalloc(sizeof(*tc), GFP_KERNEL); tc = kzalloc(sizeof(*tc), GFP_KERNEL);
if (!tc) { if (!tc) {
rc = -ENOMEM; rc = -ENOMEM;
goto err_tc; goto err_tc;
} }
tc->ntb = ntb; tc->ntb = ntb;
init_waitqueue_head(&tc->link_wq);
tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS);
for (i = 0; i < tc->mw_count; i++) {
rc = tool_init_mw(tc, i);
if (rc)
goto err_ctx;
}
tool_setup_dbgfs(tc); tool_setup_dbgfs(tc);
...@@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) ...@@ -505,6 +947,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
return 0; return 0;
err_ctx: err_ctx:
tool_free_mws(tc);
debugfs_remove_recursive(tc->dbgfs); debugfs_remove_recursive(tc->dbgfs);
kfree(tc); kfree(tc);
err_tc: err_tc:
...@@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) ...@@ -515,6 +958,8 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb)
{ {
struct tool_ctx *tc = ntb->ctx; struct tool_ctx *tc = ntb->ctx;
tool_free_mws(tc);
ntb_clear_ctx(ntb); ntb_clear_ctx(ntb);
ntb_link_disable(ntb); ntb_link_disable(ntb);
......
#!/bin/bash
# Copyright (c) 2016 Microsemi. All Rights Reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
#
# This program is distributed in the hope that it would be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# Author: Logan Gunthorpe <logang@deltatee.com>
REMOTE_HOST=
LIST_DEVS=FALSE
DEBUGFS=${DEBUGFS-/sys/kernel/debug}
PERF_RUN_ORDER=32
MAX_MW_SIZE=0
RUN_DMA_TESTS=
DONT_CLEANUP=
MW_SIZE=65536
function show_help()
{
echo "Usage: $0 [OPTIONS] LOCAL_DEV REMOTE_DEV"
echo "Run tests on a pair of NTB endpoints."
echo
echo "If the NTB device loops back to the same host then,"
echo "just specifying the two PCI ids on the command line is"
echo "sufficient. Otherwise, if the NTB link spans two hosts"
echo "use the -r option to specify the hostname for the remote"
echo "device. SSH will then be used to test the remote side."
echo "An SSH key between the root users of the host would then"
echo "be highly recommended."
echo
echo "Options:"
echo " -C don't cleanup ntb modules on exit"
echo " -d run dma tests"
echo " -h show this help message"
echo " -l list available local and remote PCI ids"
echo " -r REMOTE_HOST specify the remote's hostname to connect"
echo " to for the test (using ssh)"
echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)"
echo " -w max_mw_size maxmium memory window size"
echo
}
function parse_args()
{
OPTIND=0
while getopts "Cdhlm:r:p:w:" opt; do
case "$opt" in
C) DONT_CLEANUP=1 ;;
d) RUN_DMA_TESTS=1 ;;
h) show_help; exit 0 ;;
l) LIST_DEVS=TRUE ;;
m) MW_SIZE=${OPTARG} ;;
r) REMOTE_HOST=${OPTARG} ;;
p) PERF_RUN_ORDER=${OPTARG} ;;
w) MAX_MW_SIZE=${OPTARG} ;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
esac
done
}
parse_args "$@"
shift $((OPTIND-1))
LOCAL_DEV=$1
shift
parse_args "$@"
shift $((OPTIND-1))
REMOTE_DEV=$1
shift
parse_args "$@"
set -e
function _modprobe()
{
modprobe "$@"
}
function split_remote()
{
VPATH=$1
REMOTE=
if [[ "$VPATH" == *":/"* ]]; then
REMOTE=${VPATH%%:*}
VPATH=${VPATH#*:}
fi
}
function read_file()
{
split_remote $1
if [[ "$REMOTE" != "" ]]; then
ssh "$REMOTE" cat "$VPATH"
else
cat "$VPATH"
fi
}
function write_file()
{
split_remote $2
VALUE=$1
if [[ "$REMOTE" != "" ]]; then
ssh "$REMOTE" "echo \"$VALUE\" > \"$VPATH\""
else
echo "$VALUE" > "$VPATH"
fi
}
function link_test()
{
LOC=$1
REM=$2
EXP=0
echo "Running link tests on: $(basename $LOC) / $(basename $REM)"
if ! write_file "N" "$LOC/link" 2> /dev/null; then
echo " Unsupported"
return
fi
write_file "N" "$LOC/link_event"
if [[ $(read_file "$REM/link") != "N" ]]; then
echo "Expected remote link to be down in $REM/link" >&2
exit -1
fi
write_file "Y" "$LOC/link"
write_file "Y" "$LOC/link_event"
echo " Passed"
}
function doorbell_test()
{
LOC=$1
REM=$2
EXP=0
echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
write_file "c 0xFFFFFFFF" "$REM/db"
for ((i=1; i <= 8; i++)); do
let DB=$(read_file "$REM/db") || true
if [[ "$DB" != "$EXP" ]]; then
echo "Doorbell doesn't match expected value $EXP " \
"in $REM/db" >&2
exit -1
fi
let "MASK=1 << ($i-1)" || true
let "EXP=$EXP | $MASK" || true
write_file "s $MASK" "$LOC/peer_db"
done
echo " Passed"
}
function read_spad()
{
VPATH=$1
IDX=$2
ROW=($(read_file "$VPATH" | grep -e "^$IDX"))
let VAL=${ROW[1]} || true
echo $VAL
}
function scratchpad_test()
{
LOC=$1
REM=$2
CNT=$(read_file "$LOC/spad" | wc -l)
echo "Running spad tests on: $(basename $LOC) / $(basename $REM)"
for ((i = 0; i < $CNT; i++)); do
VAL=$RANDOM
write_file "$i $VAL" "$LOC/peer_spad"
RVAL=$(read_spad "$REM/spad" $i)
if [[ "$VAL" != "$RVAL" ]]; then
echo "Scratchpad doesn't match expected value $VAL " \
"in $REM/spad, got $RVAL" >&2
exit -1
fi
done
echo " Passed"
}
function write_mw()
{
split_remote $2
if [[ "$REMOTE" != "" ]]; then
ssh "$REMOTE" \
dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
else
dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
fi
}
function mw_test()
{
IDX=$1
LOC=$2
REM=$3
echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)"
write_mw "$LOC/$IDX"
split_remote "$LOC/$IDX"
if [[ "$REMOTE" == "" ]]; then
A=$VPATH
else
A=/tmp/ntb_test.$$.A
ssh "$REMOTE" cat "$VPATH" > "$A"
fi
split_remote "$REM/peer_$IDX"
if [[ "$REMOTE" == "" ]]; then
B=$VPATH
else
B=/tmp/ntb_test.$$.B
ssh "$REMOTE" cat "$VPATH" > "$B"
fi
cmp -n $MW_SIZE "$A" "$B"
if [[ $? != 0 ]]; then
echo "Memory window $MW did not match!" >&2
fi
if [[ "$A" == "/tmp/*" ]]; then
rm "$A"
fi
if [[ "$B" == "/tmp/*" ]]; then
rm "$B"
fi
echo " Passed"
}
function pingpong_test()
{
LOC=$1
REM=$2
echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)"
LOC_START=$(read_file $LOC/count)
REM_START=$(read_file $REM/count)
sleep 7
LOC_END=$(read_file $LOC/count)
REM_END=$(read_file $REM/count)
if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then
echo "Ping pong counter not incrementing!" >&2
exit 1
fi
echo " Passed"
}
function perf_test()
{
USE_DMA=$1
if [[ $USE_DMA == "1" ]]; then
WITH="with"
else
WITH="without"
fi
_modprobe ntb_perf run_order=$PERF_RUN_ORDER \
max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
echo "Running local perf test $WITH DMA"
write_file "" $LOCAL_PERF/run
echo -n " "
read_file $LOCAL_PERF/run
echo " Passed"
echo "Running remote perf test $WITH DMA"
write_file "" $REMOTE_PERF/run
echo -n " "
read_file $LOCAL_PERF/run
echo " Passed"
_modprobe -r ntb_perf
}
function ntb_tool_tests()
{
LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV
REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV
echo "Starting ntb_tool tests..."
_modprobe ntb_tool
write_file Y $LOCAL_TOOL/link_event
write_file Y $REMOTE_TOOL/link_event
link_test $LOCAL_TOOL $REMOTE_TOOL
link_test $REMOTE_TOOL $LOCAL_TOOL
for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do
PT=$(basename $PEER_TRANS)
write_file $MW_SIZE $LOCAL_TOOL/$PT
write_file $MW_SIZE $REMOTE_TOOL/$PT
done
doorbell_test $LOCAL_TOOL $REMOTE_TOOL
doorbell_test $REMOTE_TOOL $LOCAL_TOOL
scratchpad_test $LOCAL_TOOL $REMOTE_TOOL
scratchpad_test $REMOTE_TOOL $LOCAL_TOOL
for MW in $(ls $LOCAL_TOOL/mw*); do
MW=$(basename $MW)
mw_test $MW $LOCAL_TOOL $REMOTE_TOOL
mw_test $MW $REMOTE_TOOL $LOCAL_TOOL
done
_modprobe -r ntb_tool
}
function ntb_pingpong_tests()
{
LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV
REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV
echo "Starting ntb_pingpong tests..."
_modprobe ntb_pingpong
pingpong_test $LOCAL_PP $REMOTE_PP
_modprobe -r ntb_pingpong
}
function ntb_perf_tests()
{
LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV
REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV
echo "Starting ntb_perf tests..."
perf_test 0
if [[ $RUN_DMA_TESTS ]]; then
perf_test 1
fi
}
function cleanup()
{
set +e
_modprobe -r ntb_tool 2> /dev/null
_modprobe -r ntb_perf 2> /dev/null
_modprobe -r ntb_pingpong 2> /dev/null
_modprobe -r ntb_transport 2> /dev/null
set -e
}
cleanup
if ! [[ $$DONT_CLEANUP ]]; then
trap cleanup EXIT
fi
if [ "$(id -u)" != "0" ]; then
echo "This script must be run as root" 1>&2
exit 1
fi
if [[ "$LIST_DEVS" == TRUE ]]; then
echo "Local Devices:"
ls -1 /sys/bus/ntb/devices
echo
if [[ "$REMOTE_HOST" != "" ]]; then
echo "Remote Devices:"
ssh $REMOTE_HOST ls -1 /sys/bus/ntb/devices
fi
exit 0
fi
if [[ "$LOCAL_DEV" == $"" ]] || [[ "$REMOTE_DEV" == $"" ]]; then
show_help
exit 1
fi
ntb_tool_tests
echo
ntb_pingpong_tests
echo
ntb_perf_tests
echo
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment