[Feature]add MT2731_MP2_MR2_SVN388 baseline version

Change-Id: Ief04314834b31e27effab435d3ca8ba33b499059
diff --git a/src/kernel/linux/v4.14/drivers/ntb/test/Kconfig b/src/kernel/linux/v4.14/drivers/ntb/test/Kconfig
new file mode 100644
index 0000000..a5d0eda
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/ntb/test/Kconfig
@@ -0,0 +1,27 @@
+config NTB_PINGPONG
+	tristate "NTB Ping Pong Test Client"
+	help
+	 This is a simple ping pong driver that exercises the scratchpads and
+	 doorbells of the ntb hardware.  This driver may be used to test that
+	 your ntb hardware and drivers are functioning at a basic level.
+
+	 If unsure, say N.
+
+config NTB_TOOL
+	tristate "NTB Debugging Tool Test Client"
+	help
+	 This is a simple debugging driver that enables the doorbell and
+	 scratchpad registers to be read and written from the debugfs.  This
+	 enables more complicated debugging to be scripted from user space.
+	 This driver may be used to test that your ntb hardware and drivers are
+	 functioning at a basic level.
+
+	 If unsure, say N.
+
+config NTB_PERF
+	tristate "NTB RAW Perf Measuring Tool"
+	help
+	 This is a tool to measure raw NTB performance by transferring data
+	 to and from the window without additional software interaction.
+
+	 If unsure, say N.
diff --git a/src/kernel/linux/v4.14/drivers/ntb/test/Makefile b/src/kernel/linux/v4.14/drivers/ntb/test/Makefile
new file mode 100644
index 0000000..9e77e0b
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/ntb/test/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
+obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/src/kernel/linux/v4.14/drivers/ntb/test/ntb_perf.c b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_perf.c
new file mode 100644
index 0000000..759f772
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_perf.c
@@ -0,0 +1,899 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *   PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+#include <linux/mutex.h>
+
+#define DRIVER_NAME		"ntb_perf"
+#define DRIVER_DESCRIPTION	"PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE		"Dual BSD/GPL"
+#define DRIVER_VERSION		"1.0"
+#define DRIVER_AUTHOR		"Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT	10
+#define PERF_VERSION		0xffff0001
+#define MAX_THREADS		32
+#define MAX_TEST_SIZE		SZ_1M
+#define MAX_SRCS		32
+#define DMA_OUT_RESOURCE_TO	msecs_to_jiffies(50)
+#define DMA_RETRIES		20
+#define SZ_4G			(1ULL << 32)
+#define MAX_SEG_ORDER		20 /* no larger than 1M for kmalloc buffer */
+#define PIDX			NTB_DEF_PEER_IDX
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [2^n] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [2^n] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+static bool on_node = true; /* default to 1 */
+module_param(on_node, bool, 0644);
+MODULE_PARM_DESC(on_node, "Run threads only on NTB device node (default: true)");
+
+struct perf_mw {
+	phys_addr_t	phys_addr;
+	resource_size_t	phys_size;
+	resource_size_t	xlat_align;
+	resource_size_t	xlat_align_size;
+	void __iomem	*vbase;
+	size_t		xlat_size;
+	size_t		buf_size;
+	void		*virt_addr;
+	dma_addr_t	dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+	struct task_struct	*thread;
+	struct perf_ctx		*perf;
+	atomic_t		dma_sync;
+	struct dma_chan		*dma_chan;
+	int			dma_prep_err;
+	int			src_idx;
+	void			*srcs[MAX_SRCS];
+	wait_queue_head_t       *wq;
+	int			status;
+	u64			copied;
+	u64			diff_us;
+};
+
+struct perf_ctx {
+	struct ntb_dev		*ntb;
+	spinlock_t		db_lock;
+	struct perf_mw		mw;
+	bool			link_is_up;
+	struct delayed_work	link_work;
+	wait_queue_head_t	link_wq;
+	u8			perf_threads;
+	/* mutex ensures only one set of threads run at once */
+	struct mutex		run_mutex;
+	struct pthr_ctx		pthr_ctx[MAX_THREADS];
+	atomic_t		tsync;
+	atomic_t                tdone;
+};
+
+enum {
+	VERSION = 0,
+	MW_SZ_HIGH,
+	MW_SZ_LOW,
+	MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+	struct perf_ctx *perf = ctx;
+
+	if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) {
+		schedule_delayed_work(&perf->link_work, 2*HZ);
+	} else {
+		dev_dbg(&perf->ntb->pdev->dev, "link down\n");
+
+		if (!perf->link_is_up)
+			cancel_delayed_work_sync(&perf->link_work);
+
+		perf->link_is_up = false;
+	}
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+	struct perf_ctx *perf = ctx;
+	u64 db_bits, db_mask;
+
+	db_mask = ntb_db_vector_mask(perf->ntb, vec);
+	db_bits = ntb_db_read(perf->ntb);
+
+	dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+		vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+	.link_event = perf_link_event,
+	.db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+	struct pthr_ctx *pctx = data;
+
+	atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char __iomem *dst,
+			 char *src, size_t size)
+{
+	struct perf_ctx *perf = pctx->perf;
+	struct dma_async_tx_descriptor *txd;
+	struct dma_chan *chan = pctx->dma_chan;
+	struct dma_device *device;
+	struct dmaengine_unmap_data *unmap;
+	dma_cookie_t cookie;
+	size_t src_off, dst_off;
+	struct perf_mw *mw = &perf->mw;
+	void __iomem *vbase;
+	void __iomem *dst_vaddr;
+	dma_addr_t dst_phys;
+	int retries = 0;
+
+	if (!use_dma) {
+		memcpy_toio(dst, src, size);
+		return size;
+	}
+
+	if (!chan) {
+		dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+		return -EINVAL;
+	}
+
+	device = chan->device;
+	src_off = (uintptr_t)src & ~PAGE_MASK;
+	dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
+
+	if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+		return -ENODEV;
+
+	vbase = mw->vbase;
+	dst_vaddr = dst;
+	dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+	if (!unmap)
+		return -ENOMEM;
+
+	unmap->len = size;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+				      src_off, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
+
+	do {
+		txd = device->device_prep_dma_memcpy(chan, dst_phys,
+						     unmap->addr[0],
+						     size, DMA_PREP_INTERRUPT);
+		if (!txd) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(DMA_OUT_RESOURCE_TO);
+		}
+	} while (!txd && (++retries < DMA_RETRIES));
+
+	if (!txd) {
+		pctx->dma_prep_err++;
+		goto err_get_unmap;
+	}
+
+	txd->callback = perf_copy_callback;
+	txd->callback_param = pctx;
+	dma_set_unmap(txd, unmap);
+
+	cookie = dmaengine_submit(txd);
+	if (dma_submit_error(cookie))
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
+
+	atomic_inc(&pctx->dma_sync);
+	dma_async_issue_pending(chan);
+
+	return size;
+
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
+	return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
+			  u64 buf_size, u64 win_size, u64 total)
+{
+	int chunks, total_chunks, i;
+	int copied_chunks = 0;
+	u64 copied = 0, result;
+	char __iomem *tmp = dst;
+	u64 perf, diff_us;
+	ktime_t kstart, kstop, kdiff;
+	unsigned long last_sleep = jiffies;
+
+	chunks = div64_u64(win_size, buf_size);
+	total_chunks = div64_u64(total, buf_size);
+	kstart = ktime_get();
+
+	for (i = 0; i < total_chunks; i++) {
+		result = perf_copy(pctx, tmp, src, buf_size);
+		copied += result;
+		copied_chunks++;
+		if (copied_chunks == chunks) {
+			tmp = dst;
+			copied_chunks = 0;
+		} else
+			tmp += buf_size;
+
+		/* Probably should schedule every 5s to prevent soft hang. */
+		if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
+			last_sleep = jiffies;
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(1);
+		}
+
+		if (unlikely(kthread_should_stop()))
+			break;
+	}
+
+	if (use_dma) {
+		pr_debug("%s: All DMA descriptors submitted\n", current->comm);
+		while (atomic_read(&pctx->dma_sync) != 0) {
+			if (kthread_should_stop())
+				break;
+			msleep(20);
+		}
+	}
+
+	kstop = ktime_get();
+	kdiff = ktime_sub(kstop, kstart);
+	diff_us = ktime_to_us(kdiff);
+
+	pr_debug("%s: copied %llu bytes\n", current->comm, copied);
+
+	pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+	perf = div64_u64(copied, diff_us);
+
+	pr_debug("%s: MBytes/s: %llu\n", current->comm, perf);
+
+	pctx->copied = copied;
+	pctx->diff_us = diff_us;
+
+	return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+	/* Is the channel required to be on the same node as the device? */
+	if (!on_node)
+		return true;
+
+	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+	struct pthr_ctx *pctx = data;
+	struct perf_ctx *perf = pctx->perf;
+	struct pci_dev *pdev = perf->ntb->pdev;
+	struct perf_mw *mw = &perf->mw;
+	char __iomem *dst;
+	u64 win_size, buf_size, total;
+	void *src;
+	int rc, node, i;
+	struct dma_chan *dma_chan = NULL;
+
+	pr_debug("kthread %s starting...\n", current->comm);
+
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
+
+	if (use_dma && !pctx->dma_chan) {
+		dma_cap_mask_t dma_mask;
+
+		dma_cap_zero(dma_mask);
+		dma_cap_set(DMA_MEMCPY, dma_mask);
+		dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+					       (void *)(unsigned long)node);
+		if (!dma_chan) {
+			pr_warn("%s: cannot acquire DMA channel, quitting\n",
+				current->comm);
+			return -ENODEV;
+		}
+		pctx->dma_chan = dma_chan;
+	}
+
+	for (i = 0; i < MAX_SRCS; i++) {
+		pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+		if (!pctx->srcs[i]) {
+			rc = -ENOMEM;
+			goto err;
+		}
+	}
+
+	win_size = mw->phys_size;
+	buf_size = 1ULL << seg_order;
+	total = 1ULL << run_order;
+
+	if (buf_size > MAX_TEST_SIZE)
+		buf_size = MAX_TEST_SIZE;
+
+	dst = (char __iomem *)mw->vbase;
+
+	atomic_inc(&perf->tsync);
+	while (atomic_read(&perf->tsync) != perf->perf_threads)
+		schedule();
+
+	src = pctx->srcs[pctx->src_idx];
+	pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+	rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+	atomic_dec(&perf->tsync);
+
+	if (rc < 0) {
+		pr_err("%s: failed\n", current->comm);
+		rc = -ENXIO;
+		goto err;
+	}
+
+	for (i = 0; i < MAX_SRCS; i++) {
+		kfree(pctx->srcs[i]);
+		pctx->srcs[i] = NULL;
+	}
+
+	atomic_inc(&perf->tdone);
+	wake_up(pctx->wq);
+	rc = 0;
+	goto done;
+
+err:
+	for (i = 0; i < MAX_SRCS; i++) {
+		kfree(pctx->srcs[i]);
+		pctx->srcs[i] = NULL;
+	}
+
+	if (dma_chan) {
+		dma_release_channel(dma_chan);
+		pctx->dma_chan = NULL;
+	}
+
+done:
+	/* Wait until we are told to stop */
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (kthread_should_stop())
+			break;
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+
+	return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+	struct perf_mw *mw = &perf->mw;
+	struct pci_dev *pdev = perf->ntb->pdev;
+
+	if (!mw->virt_addr)
+		return;
+
+	ntb_mw_clear_trans(perf->ntb, PIDX, 0);
+	dma_free_coherent(&pdev->dev, mw->buf_size,
+			  mw->virt_addr, mw->dma_addr);
+	mw->xlat_size = 0;
+	mw->buf_size = 0;
+	mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+	struct perf_mw *mw = &perf->mw;
+	size_t xlat_size, buf_size;
+	int rc;
+
+	if (!size)
+		return -EINVAL;
+
+	xlat_size = round_up(size, mw->xlat_align_size);
+	buf_size = round_up(size, mw->xlat_align);
+
+	if (mw->xlat_size == xlat_size)
+		return 0;
+
+	if (mw->buf_size)
+		perf_free_mw(perf);
+
+	mw->xlat_size = xlat_size;
+	mw->buf_size = buf_size;
+
+	mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+					   &mw->dma_addr, GFP_KERNEL);
+	if (!mw->virt_addr) {
+		mw->xlat_size = 0;
+		mw->buf_size = 0;
+	}
+
+	rc = ntb_mw_set_trans(perf->ntb, PIDX, 0, mw->dma_addr, mw->xlat_size);
+	if (rc) {
+		dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
+		perf_free_mw(perf);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+	struct perf_ctx *perf =
+		container_of(work, struct perf_ctx, link_work.work);
+	struct ntb_dev *ndev = perf->ntb;
+	struct pci_dev *pdev = ndev->pdev;
+	u32 val;
+	u64 size;
+	int rc;
+
+	dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+	size = perf->mw.phys_size;
+
+	if (max_mw_size && size > max_mw_size)
+		size = max_mw_size;
+
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_HIGH, upper_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, MW_SZ_LOW, lower_32_bits(size));
+	ntb_peer_spad_write(ndev, PIDX, VERSION, PERF_VERSION);
+
+	/* now read what peer wrote */
+	val = ntb_spad_read(ndev, VERSION);
+	if (val != PERF_VERSION) {
+		dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+		goto out;
+	}
+
+	val = ntb_spad_read(ndev, MW_SZ_HIGH);
+	size = (u64)val << 32;
+
+	val = ntb_spad_read(ndev, MW_SZ_LOW);
+	size |= val;
+
+	dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+	rc = perf_set_mw(perf, size);
+	if (rc)
+		goto out1;
+
+	perf->link_is_up = true;
+	wake_up(&perf->link_wq);
+
+	return;
+
+out1:
+	perf_free_mw(perf);
+
+out:
+	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+		schedule_delayed_work(&perf->link_work,
+				      msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+	struct perf_mw *mw;
+	int rc;
+
+	mw = &perf->mw;
+
+	rc = ntb_mw_get_align(ntb, PIDX, 0, &mw->xlat_align,
+			      &mw->xlat_align_size, NULL);
+	if (rc)
+		return rc;
+
+	rc = ntb_peer_mw_get_addr(ntb, 0, &mw->phys_addr, &mw->phys_size);
+	if (rc)
+		return rc;
+
+	perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+	if (!mw->vbase)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+				size_t count, loff_t *offp)
+{
+	struct perf_ctx *perf = filp->private_data;
+	char *buf;
+	ssize_t ret, out_off = 0;
+	struct pthr_ctx *pctx;
+	int i;
+	u64 rate;
+
+	if (!perf)
+		return 0;
+
+	buf = kmalloc(1024, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (mutex_is_locked(&perf->run_mutex)) {
+		out_off = scnprintf(buf, 64, "running\n");
+		goto read_from_buf;
+	}
+
+	for (i = 0; i < MAX_THREADS; i++) {
+		pctx = &perf->pthr_ctx[i];
+
+		if (pctx->status == -ENODATA)
+			break;
+
+		if (pctx->status) {
+			out_off += scnprintf(buf + out_off, 1024 - out_off,
+					    "%d: error %d\n", i,
+					    pctx->status);
+			continue;
+		}
+
+		rate = div64_u64(pctx->copied, pctx->diff_us);
+		out_off += scnprintf(buf + out_off, 1024 - out_off,
+			"%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
+			i, pctx->copied, pctx->diff_us, rate);
+	}
+
+read_from_buf:
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off);
+	kfree(buf);
+
+	return ret;
+}
+
+static void threads_cleanup(struct perf_ctx *perf)
+{
+	struct pthr_ctx *pctx;
+	int i;
+
+	for (i = 0; i < MAX_THREADS; i++) {
+		pctx = &perf->pthr_ctx[i];
+		if (pctx->thread) {
+			pctx->status = kthread_stop(pctx->thread);
+			pctx->thread = NULL;
+		}
+	}
+}
+
+static void perf_clear_thread_status(struct perf_ctx *perf)
+{
+	int i;
+
+	for (i = 0; i < MAX_THREADS; i++)
+		perf->pthr_ctx[i].status = -ENODATA;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+				 size_t count, loff_t *offp)
+{
+	struct perf_ctx *perf = filp->private_data;
+	int node, i;
+	DECLARE_WAIT_QUEUE_HEAD(wq);
+
+	if (wait_event_interruptible(perf->link_wq, perf->link_is_up))
+		return -ENOLINK;
+
+	if (perf->perf_threads == 0)
+		return -EINVAL;
+
+	if (!mutex_trylock(&perf->run_mutex))
+		return -EBUSY;
+
+	perf_clear_thread_status(perf);
+
+	if (perf->perf_threads > MAX_THREADS) {
+		perf->perf_threads = MAX_THREADS;
+		pr_info("Reset total threads to: %u\n", MAX_THREADS);
+	}
+
+	/* no greater than 1M */
+	if (seg_order > MAX_SEG_ORDER) {
+		seg_order = MAX_SEG_ORDER;
+		pr_info("Fix seg_order to %u\n", seg_order);
+	}
+
+	if (run_order < seg_order) {
+		run_order = seg_order;
+		pr_info("Fix run_order to %u\n", run_order);
+	}
+
+	node = on_node ? dev_to_node(&perf->ntb->pdev->dev)
+		       : NUMA_NO_NODE;
+	atomic_set(&perf->tdone, 0);
+
+	/* launch kernel thread */
+	for (i = 0; i < perf->perf_threads; i++) {
+		struct pthr_ctx *pctx;
+
+		pctx = &perf->pthr_ctx[i];
+		atomic_set(&pctx->dma_sync, 0);
+		pctx->perf = perf;
+		pctx->wq = &wq;
+		pctx->thread =
+			kthread_create_on_node(ntb_perf_thread,
+					       (void *)pctx,
+					       node, "ntb_perf %d", i);
+		if (IS_ERR(pctx->thread)) {
+			pctx->thread = NULL;
+			goto err;
+		} else {
+			wake_up_process(pctx->thread);
+		}
+	}
+
+	wait_event_interruptible(wq,
+		atomic_read(&perf->tdone) == perf->perf_threads);
+
+	threads_cleanup(perf);
+	mutex_unlock(&perf->run_mutex);
+	return count;
+
+err:
+	threads_cleanup(perf);
+	mutex_unlock(&perf->run_mutex);
+	return -ENXIO;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = debugfs_run_read,
+	.write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+	struct pci_dev *pdev = perf->ntb->pdev;
+	struct dentry *debugfs_node_dir;
+	struct dentry *debugfs_run;
+	struct dentry *debugfs_threads;
+	struct dentry *debugfs_seg_order;
+	struct dentry *debugfs_run_order;
+	struct dentry *debugfs_use_dma;
+	struct dentry *debugfs_on_node;
+
+	if (!debugfs_initialized())
+		return -ENODEV;
+
+	/* Assumpion: only one NTB device in the system */
+	if (!perf_debugfs_dir) {
+		perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+		if (!perf_debugfs_dir)
+			return -ENODEV;
+	}
+
+	debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+					      perf_debugfs_dir);
+	if (!debugfs_node_dir)
+		goto err;
+
+	debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+					  debugfs_node_dir, perf,
+					  &ntb_perf_debugfs_run);
+	if (!debugfs_run)
+		goto err;
+
+	debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+					    debugfs_node_dir,
+					    &perf->perf_threads);
+	if (!debugfs_threads)
+		goto err;
+
+	debugfs_seg_order = debugfs_create_u32("seg_order", 0600,
+					       debugfs_node_dir,
+					       &seg_order);
+	if (!debugfs_seg_order)
+		goto err;
+
+	debugfs_run_order = debugfs_create_u32("run_order", 0600,
+					       debugfs_node_dir,
+					       &run_order);
+	if (!debugfs_run_order)
+		goto err;
+
+	debugfs_use_dma = debugfs_create_bool("use_dma", 0600,
+					       debugfs_node_dir,
+					       &use_dma);
+	if (!debugfs_use_dma)
+		goto err;
+
+	debugfs_on_node = debugfs_create_bool("on_node", 0600,
+					      debugfs_node_dir,
+					      &on_node);
+	if (!debugfs_on_node)
+		goto err;
+
+	return 0;
+
+err:
+	debugfs_remove_recursive(perf_debugfs_dir);
+	perf_debugfs_dir = NULL;
+	return -ENODEV;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct pci_dev *pdev = ntb->pdev;
+	struct perf_ctx *perf;
+	int node;
+	int rc = 0;
+
+	if (ntb_spad_count(ntb) < MAX_SPAD) {
+		dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
+			DRIVER_NAME);
+		return -EIO;
+	}
+
+	if (!ntb->ops->mw_set_trans) {
+		dev_err(&ntb->dev, "Need inbound MW based NTB API\n");
+		return -EINVAL;
+	}
+
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "Multi-port NTB devices unsupported\n");
+
+	node = on_node ? dev_to_node(&pdev->dev) : NUMA_NO_NODE;
+	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+	if (!perf) {
+		rc = -ENOMEM;
+		goto err_perf;
+	}
+
+	perf->ntb = ntb;
+	perf->perf_threads = 1;
+	atomic_set(&perf->tsync, 0);
+	mutex_init(&perf->run_mutex);
+	spin_lock_init(&perf->db_lock);
+	perf_setup_mw(ntb, perf);
+	init_waitqueue_head(&perf->link_wq);
+	INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+
+	rc = ntb_set_ctx(ntb, perf, &perf_ops);
+	if (rc)
+		goto err_ctx;
+
+	perf->link_is_up = false;
+	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	ntb_link_event(ntb);
+
+	rc = perf_debugfs_setup(perf);
+	if (rc)
+		goto err_ctx;
+
+	perf_clear_thread_status(perf);
+
+	return 0;
+
+err_ctx:
+	cancel_delayed_work_sync(&perf->link_work);
+	kfree(perf);
+err_perf:
+	return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+	struct perf_ctx *perf = ntb->ctx;
+	int i;
+
+	dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+	mutex_lock(&perf->run_mutex);
+
+	cancel_delayed_work_sync(&perf->link_work);
+
+	ntb_clear_ctx(ntb);
+	ntb_link_disable(ntb);
+
+	debugfs_remove_recursive(perf_debugfs_dir);
+	perf_debugfs_dir = NULL;
+
+	if (use_dma) {
+		for (i = 0; i < MAX_THREADS; i++) {
+			struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+			if (pctx->dma_chan)
+				dma_release_channel(pctx->dma_chan);
+		}
+	}
+
+	kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+	.ops = {
+		.probe = perf_probe,
+		.remove = perf_remove,
+	},
+};
+module_ntb_client(perf_client);
diff --git a/src/kernel/linux/v4.14/drivers/ntb/test/ntb_pingpong.c b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_pingpong.c
new file mode 100644
index 0000000..938a18b
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_pingpong.c
@@ -0,0 +1,322 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Pingpong Linux driver
+ *
+ * Contact Information:
+ * Allen Hubbe <Allen.Hubbe@emc.com>
+ */
+
+/* Note: load this module with option 'dyndbg=+p' */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+
+#include <linux/ntb.h>
+
+#define DRIVER_NAME			"ntb_pingpong"
+#define DRIVER_DESCRIPTION		"PCIe NTB Simple Pingpong Client"
+
+#define DRIVER_LICENSE			"Dual BSD/GPL"
+#define DRIVER_VERSION			"1.0"
+#define DRIVER_RELDATE			"24 March 2015"
+#define DRIVER_AUTHOR			"Allen Hubbe <Allen.Hubbe@emc.com>"
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static unsigned int unsafe;
+module_param(unsafe, uint, 0644);
+MODULE_PARM_DESC(unsafe, "Run even though ntb operations may be unsafe");
+
+static unsigned int delay_ms = 1000;
+module_param(delay_ms, uint, 0644);
+MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer");
+
+static unsigned long db_init = 0x7;
+module_param(db_init, ulong, 0644);
+MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer");
+
+/* Only two-ports NTB devices are supported */
+#define PIDX		NTB_DEF_PEER_IDX
+
+struct pp_ctx {
+	struct ntb_dev			*ntb;
+	u64				db_bits;
+	/* synchronize access to db_bits by ping and pong */
+	spinlock_t			db_lock;
+	struct timer_list		db_timer;
+	unsigned long			db_delay;
+	struct dentry			*debugfs_node_dir;
+	struct dentry			*debugfs_count;
+	atomic_t			count;
+};
+
+static struct dentry *pp_debugfs_dir;
+
+static void pp_ping(unsigned long ctx)
+{
+	struct pp_ctx *pp = (void *)ctx;
+	unsigned long irqflags;
+	u64 db_bits, db_mask;
+	u32 spad_rd, spad_wr;
+
+	spin_lock_irqsave(&pp->db_lock, irqflags);
+	{
+		db_mask = ntb_db_valid_mask(pp->ntb);
+		db_bits = ntb_db_read(pp->ntb);
+
+		if (db_bits) {
+			dev_dbg(&pp->ntb->dev,
+				"Masked pongs %#llx\n",
+				db_bits);
+			ntb_db_clear(pp->ntb, db_bits);
+		}
+
+		db_bits = ((pp->db_bits | db_bits) << 1) & db_mask;
+
+		if (!db_bits)
+			db_bits = db_init;
+
+		spad_rd = ntb_spad_read(pp->ntb, 0);
+		spad_wr = spad_rd + 1;
+
+		dev_dbg(&pp->ntb->dev,
+			"Ping bits %#llx read %#x write %#x\n",
+			db_bits, spad_rd, spad_wr);
+
+		ntb_peer_spad_write(pp->ntb, PIDX, 0, spad_wr);
+		ntb_peer_db_set(pp->ntb, db_bits);
+		ntb_db_clear_mask(pp->ntb, db_mask);
+
+		pp->db_bits = 0;
+	}
+	spin_unlock_irqrestore(&pp->db_lock, irqflags);
+}
+
+static void pp_link_event(void *ctx)
+{
+	struct pp_ctx *pp = ctx;
+
+	if (ntb_link_is_up(pp->ntb, NULL, NULL) == 1) {
+		dev_dbg(&pp->ntb->dev, "link is up\n");
+		pp_ping((unsigned long)pp);
+	} else {
+		dev_dbg(&pp->ntb->dev, "link is down\n");
+		del_timer(&pp->db_timer);
+	}
+}
+
+static void pp_db_event(void *ctx, int vec)
+{
+	struct pp_ctx *pp = ctx;
+	u64 db_bits, db_mask;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&pp->db_lock, irqflags);
+	{
+		db_mask = ntb_db_vector_mask(pp->ntb, vec);
+		db_bits = db_mask & ntb_db_read(pp->ntb);
+		ntb_db_set_mask(pp->ntb, db_mask);
+		ntb_db_clear(pp->ntb, db_bits);
+
+		pp->db_bits |= db_bits;
+
+		mod_timer(&pp->db_timer, jiffies + pp->db_delay);
+
+		dev_dbg(&pp->ntb->dev,
+			"Pong vec %d bits %#llx\n",
+			vec, db_bits);
+		atomic_inc(&pp->count);
+	}
+	spin_unlock_irqrestore(&pp->db_lock, irqflags);
+}
+
+static int pp_debugfs_setup(struct pp_ctx *pp)
+{
+	struct pci_dev *pdev = pp->ntb->pdev;
+
+	if (!pp_debugfs_dir)
+		return -ENODEV;
+
+	pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+						  pp_debugfs_dir);
+	if (!pp->debugfs_node_dir)
+		return -ENODEV;
+
+	pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR,
+						    pp->debugfs_node_dir,
+						    &pp->count);
+	if (!pp->debugfs_count)
+		return -ENODEV;
+
+	return 0;
+}
+
+static const struct ntb_ctx_ops pp_ops = {
+	.link_event = pp_link_event,
+	.db_event = pp_db_event,
+};
+
+static int pp_probe(struct ntb_client *client,
+		    struct ntb_dev *ntb)
+{
+	struct pp_ctx *pp;
+	int rc;
+
+	if (ntb_db_is_unsafe(ntb)) {
+		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
+		if (!unsafe) {
+			rc = -EINVAL;
+			goto err_pp;
+		}
+	}
+
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_pp;
+	}
+
+	if (ntb_spad_is_unsafe(ntb)) {
+		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
+		if (!unsafe) {
+			rc = -EINVAL;
+			goto err_pp;
+		}
+	}
+
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
+	pp = kmalloc(sizeof(*pp), GFP_KERNEL);
+	if (!pp) {
+		rc = -ENOMEM;
+		goto err_pp;
+	}
+
+	pp->ntb = ntb;
+	pp->db_bits = 0;
+	atomic_set(&pp->count, 0);
+	spin_lock_init(&pp->db_lock);
+	setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp);
+	pp->db_delay = msecs_to_jiffies(delay_ms);
+
+	rc = ntb_set_ctx(ntb, pp, &pp_ops);
+	if (rc)
+		goto err_ctx;
+
+	rc = pp_debugfs_setup(pp);
+	if (rc)
+		goto err_ctx;
+
+	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	ntb_link_event(ntb);
+
+	return 0;
+
+err_ctx:
+	kfree(pp);
+err_pp:
+	return rc;
+}
+
+static void pp_remove(struct ntb_client *client,
+		      struct ntb_dev *ntb)
+{
+	struct pp_ctx *pp = ntb->ctx;
+
+	debugfs_remove_recursive(pp->debugfs_node_dir);
+
+	ntb_clear_ctx(ntb);
+	del_timer_sync(&pp->db_timer);
+	ntb_link_disable(ntb);
+
+	kfree(pp);
+}
+
+static struct ntb_client pp_client = {
+	.ops = {
+		.probe = pp_probe,
+		.remove = pp_remove,
+	},
+};
+
+static int __init pp_init(void)
+{
+	int rc;
+
+	if (debugfs_initialized())
+		pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	rc = ntb_register_client(&pp_client);
+	if (rc)
+		goto err_client;
+
+	return 0;
+
+err_client:
+	debugfs_remove_recursive(pp_debugfs_dir);
+	return rc;
+}
+module_init(pp_init);
+
+static void __exit pp_exit(void)
+{
+	ntb_unregister_client(&pp_client);
+	debugfs_remove_recursive(pp_debugfs_dir);
+}
+module_exit(pp_exit);
diff --git a/src/kernel/linux/v4.14/drivers/ntb/test/ntb_tool.c b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_tool.c
new file mode 100644
index 0000000..a69815c
--- /dev/null
+++ b/src/kernel/linux/v4.14/drivers/ntb/test/ntb_tool.c
@@ -0,0 +1,1032 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *   General Public License for more details.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2015 EMC Corporation. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Debugging Tool Linux driver
+ *
+ * Contact Information:
+ * Allen Hubbe <Allen.Hubbe@emc.com>
+ */
+
+/*
+ * How to use this tool, by example.
+ *
+ * Assuming $DBG_DIR is something like:
+ * '/sys/kernel/debug/ntb_tool/0000:00:03.0'
+ *
+ * Eg: check if clearing the doorbell mask generates an interrupt.
+ *
+ * # Check the link status
+ * root@self# cat $DBG_DIR/link
+ *
+ * # Block until the link is up
+ * root@self# echo Y > $DBG_DIR/link_event
+ *
+ * # Set the doorbell mask
+ * root@self# echo 's 1' > $DBG_DIR/mask
+ *
+ * # Ring the doorbell from the peer
+ * root@peer# echo 's 1' > $DBG_DIR/peer_db
+ *
+ * # Clear the doorbell mask
+ * root@self# echo 'c 1' > $DBG_DIR/mask
+ *
+ * Observe debugging output in dmesg or your console.  You should see a
+ * doorbell event triggered by clearing the mask.  If not, this may indicate an
+ * issue with the hardware that needs to be worked around in the driver.
+ *
+ * Eg: read and write scratchpad registers
+ *
+ * root@peer# echo '0 0x01010101 1 0x7f7f7f7f' > $DBG_DIR/peer_spad
+ *
+ * root@self# cat $DBG_DIR/spad
+ *
+ * Observe that spad 0 and 1 have the values set by the peer.
+ *
+ * # Check the memory window translation info
+ * cat $DBG_DIR/peer_trans0
+ *
+ * # Setup a 16k memory window buffer
+ * echo 16384 > $DBG_DIR/peer_trans0
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <linux/ntb.h>
+
+#define DRIVER_NAME			"ntb_tool"
+#define DRIVER_DESCRIPTION		"PCIe NTB Debugging Tool"
+
+#define DRIVER_LICENSE			"Dual BSD/GPL"
+#define DRIVER_VERSION			"1.0"
+#define DRIVER_RELDATE			"22 April 2015"
+#define DRIVER_AUTHOR			"Allen Hubbe <Allen.Hubbe@emc.com>"
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+/* It is rare to have hadrware with greater than six MWs */
+#define MAX_MWS	6
+/* Only two-ports devices are supported */
+#define PIDX	NTB_DEF_PEER_IDX
+
+static struct dentry *tool_dbgfs;
+
+struct tool_mw {
+	int idx;
+	struct tool_ctx *tc;
+	resource_size_t win_size;
+	resource_size_t size;
+	u8 __iomem *local;
+	u8 *peer;
+	dma_addr_t peer_dma;
+	struct dentry *peer_dbg_file;
+};
+
+struct tool_ctx {
+	struct ntb_dev *ntb;
+	struct dentry *dbgfs;
+	wait_queue_head_t link_wq;
+	int mw_count;
+	struct tool_mw mws[MAX_MWS];
+};
+
+#define SPAD_FNAME_SIZE 0x10
+#define INT_PTR(x) ((void *)(unsigned long)x)
+#define PTR_INT(x) ((int)(unsigned long)x)
+
+#define TOOL_FOPS_RDWR(__name, __read, __write) \
+	const struct file_operations __name = {	\
+		.owner = THIS_MODULE,		\
+		.open = simple_open,		\
+		.read = __read,			\
+		.write = __write,		\
+	}
+
+static void tool_link_event(void *ctx)
+{
+	struct tool_ctx *tc = ctx;
+	enum ntb_speed speed;
+	enum ntb_width width;
+	int up;
+
+	up = ntb_link_is_up(tc->ntb, &speed, &width);
+
+	dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n",
+		up ? "up" : "down", speed, width);
+
+	wake_up(&tc->link_wq);
+}
+
+static void tool_db_event(void *ctx, int vec)
+{
+	struct tool_ctx *tc = ctx;
+	u64 db_bits, db_mask;
+
+	db_mask = ntb_db_vector_mask(tc->ntb, vec);
+	db_bits = ntb_db_read(tc->ntb);
+
+	dev_dbg(&tc->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+		vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops tool_ops = {
+	.link_event = tool_link_event,
+	.db_event = tool_db_event,
+};
+
+static ssize_t tool_dbfn_read(struct tool_ctx *tc, char __user *ubuf,
+			      size_t size, loff_t *offp,
+			      u64 (*db_read_fn)(struct ntb_dev *))
+{
+	size_t buf_size;
+	char *buf;
+	ssize_t pos, rc;
+
+	if (!db_read_fn)
+		return -EINVAL;
+
+	buf_size = min_t(size_t, size, 0x20);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	pos = scnprintf(buf, buf_size, "%#llx\n",
+			db_read_fn(tc->ntb));
+
+	rc = simple_read_from_buffer(ubuf, size, offp, buf, pos);
+
+	kfree(buf);
+
+	return rc;
+}
+
+static ssize_t tool_dbfn_write(struct tool_ctx *tc,
+			       const char __user *ubuf,
+			       size_t size, loff_t *offp,
+			       int (*db_set_fn)(struct ntb_dev *, u64),
+			       int (*db_clear_fn)(struct ntb_dev *, u64))
+{
+	u64 db_bits;
+	char *buf, cmd;
+	ssize_t rc;
+	int n;
+
+	buf = kmalloc(size + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	rc = simple_write_to_buffer(buf, size, offp, ubuf, size);
+	if (rc < 0) {
+		kfree(buf);
+		return rc;
+	}
+
+	buf[size] = 0;
+
+	n = sscanf(buf, "%c %lli", &cmd, &db_bits);
+
+	kfree(buf);
+
+	if (n != 2) {
+		rc = -EINVAL;
+	} else if (cmd == 's') {
+		if (!db_set_fn)
+			rc = -EINVAL;
+		else
+			rc = db_set_fn(tc->ntb, db_bits);
+	} else if (cmd == 'c') {
+		if (!db_clear_fn)
+			rc = -EINVAL;
+		else
+			rc = db_clear_fn(tc->ntb, db_bits);
+	} else {
+		rc = -EINVAL;
+	}
+
+	return rc ? : size;
+}
+
+static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf,
+				size_t size, loff_t *offp,
+				u32 (*spad_read_fn)(struct ntb_dev *, int))
+{
+	size_t buf_size;
+	char *buf;
+	ssize_t pos, rc;
+	int i, spad_count;
+
+	if (!spad_read_fn)
+		return -EINVAL;
+
+	spad_count = ntb_spad_count(tc->ntb);
+
+	/*
+	 * We multiply the number of spads by 15 to get the buffer size
+	 * this is from 3 for the %d, 10 for the largest hex value
+	 * (0x00000000) and 2 for the tab and line feed.
+	 */
+	buf_size = min_t(size_t, size, spad_count * 15);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	pos = 0;
+
+	for (i = 0; i < spad_count; ++i) {
+		pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n",
+				 i, spad_read_fn(tc->ntb, i));
+	}
+
+	rc = simple_read_from_buffer(ubuf, size, offp, buf, pos);
+
+	kfree(buf);
+
+	return rc;
+}
+
+static ssize_t tool_spadfn_write(struct tool_ctx *tc,
+				 const char __user *ubuf,
+				 size_t size, loff_t *offp,
+				 int (*spad_write_fn)(struct ntb_dev *,
+						      int, u32))
+{
+	int spad_idx;
+	u32 spad_val;
+	char *buf, *buf_ptr;
+	int pos, n;
+	ssize_t rc;
+
+	if (!spad_write_fn) {
+		dev_dbg(&tc->ntb->dev, "no spad write fn\n");
+		return -EINVAL;
+	}
+
+	buf = kmalloc(size + 1, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	rc = simple_write_to_buffer(buf, size, offp, ubuf, size);
+	if (rc < 0) {
+		kfree(buf);
+		return rc;
+	}
+
+	buf[size] = 0;
+	buf_ptr = buf;
+	n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
+	while (n == 2) {
+		buf_ptr += pos;
+		rc = spad_write_fn(tc->ntb, spad_idx, spad_val);
+		if (rc)
+			break;
+
+		n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos);
+	}
+
+	if (n < 0)
+		rc = n;
+
+	kfree(buf);
+
+	return rc ? : size;
+}
+
+static ssize_t tool_db_read(struct file *filep, char __user *ubuf,
+			    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_read(tc, ubuf, size, offp,
+			      tc->ntb->ops->db_read);
+}
+
+static ssize_t tool_db_write(struct file *filep, const char __user *ubuf,
+			     size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_write(tc, ubuf, size, offp,
+			       tc->ntb->ops->db_set,
+			       tc->ntb->ops->db_clear);
+}
+
+static TOOL_FOPS_RDWR(tool_db_fops,
+		      tool_db_read,
+		      tool_db_write);
+
+static ssize_t tool_mask_read(struct file *filep, char __user *ubuf,
+			      size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_read(tc, ubuf, size, offp,
+			      tc->ntb->ops->db_read_mask);
+}
+
+static ssize_t tool_mask_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_write(tc, ubuf, size, offp,
+			       tc->ntb->ops->db_set_mask,
+			       tc->ntb->ops->db_clear_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_mask_fops,
+		      tool_mask_read,
+		      tool_mask_write);
+
+static ssize_t tool_peer_db_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_read(tc, ubuf, size, offp,
+			      tc->ntb->ops->peer_db_read);
+}
+
+static ssize_t tool_peer_db_write(struct file *filep, const char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_write(tc, ubuf, size, offp,
+			       tc->ntb->ops->peer_db_set,
+			       tc->ntb->ops->peer_db_clear);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_db_fops,
+		      tool_peer_db_read,
+		      tool_peer_db_write);
+
+static ssize_t tool_peer_mask_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_read(tc, ubuf, size, offp,
+			      tc->ntb->ops->peer_db_read_mask);
+}
+
+static ssize_t tool_peer_mask_write(struct file *filep, const char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_dbfn_write(tc, ubuf, size, offp,
+			       tc->ntb->ops->peer_db_set_mask,
+			       tc->ntb->ops->peer_db_clear_mask);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mask_fops,
+		      tool_peer_mask_read,
+		      tool_peer_mask_write);
+
+static ssize_t tool_spad_read(struct file *filep, char __user *ubuf,
+			      size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_spadfn_read(tc, ubuf, size, offp,
+				tc->ntb->ops->spad_read);
+}
+
+static ssize_t tool_spad_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_spadfn_write(tc, ubuf, size, offp,
+				 tc->ntb->ops->spad_write);
+}
+
+static TOOL_FOPS_RDWR(tool_spad_fops,
+		      tool_spad_read,
+		      tool_spad_write);
+
+static u32 ntb_tool_peer_spad_read(struct ntb_dev *ntb, int sidx)
+{
+	return ntb_peer_spad_read(ntb, PIDX, sidx);
+}
+
+static ssize_t tool_peer_spad_read(struct file *filep, char __user *ubuf,
+				   size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_spadfn_read(tc, ubuf, size, offp, ntb_tool_peer_spad_read);
+}
+
+static int ntb_tool_peer_spad_write(struct ntb_dev *ntb, int sidx, u32 val)
+{
+	return ntb_peer_spad_write(ntb, PIDX, sidx, val);
+}
+
+static ssize_t tool_peer_spad_write(struct file *filep, const char __user *ubuf,
+				    size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+
+	return tool_spadfn_write(tc, ubuf, size, offp,
+				 ntb_tool_peer_spad_write);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_spad_fops,
+		      tool_peer_spad_read,
+		      tool_peer_spad_write);
+
+static ssize_t tool_link_read(struct file *filep, char __user *ubuf,
+			      size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	char buf[3];
+
+	buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+
+	return simple_read_from_buffer(ubuf, size, offp, buf, 2);
+}
+
+static ssize_t tool_link_write(struct file *filep, const char __user *ubuf,
+			       size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	char buf[32];
+	size_t buf_size;
+	bool val;
+	int rc;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (copy_from_user(buf, ubuf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+
+	rc = strtobool(buf, &val);
+	if (rc)
+		return rc;
+
+	if (val)
+		rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	else
+		rc = ntb_link_disable(tc->ntb);
+
+	if (rc)
+		return rc;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_link_fops,
+		      tool_link_read,
+		      tool_link_write);
+
+static ssize_t tool_link_event_write(struct file *filep,
+				     const char __user *ubuf,
+				     size_t size, loff_t *offp)
+{
+	struct tool_ctx *tc = filep->private_data;
+	char buf[32];
+	size_t buf_size;
+	bool val;
+	int rc;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (copy_from_user(buf, ubuf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+
+	rc = strtobool(buf, &val);
+	if (rc)
+		return rc;
+
+	if (wait_event_interruptible(tc->link_wq,
+		ntb_link_is_up(tc->ntb, NULL, NULL) == val))
+		return -ERESTART;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_link_event_fops,
+		      NULL,
+		      tool_link_event_write);
+
+static ssize_t tool_mw_read(struct file *filep, char __user *ubuf,
+			    size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+	ssize_t rc;
+	loff_t pos = *offp;
+	void *buf;
+
+	if (mw->local == NULL)
+		return -EIO;
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= mw->win_size || !size)
+		return 0;
+	if (size > mw->win_size - pos)
+		size = mw->win_size - pos;
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	memcpy_fromio(buf, mw->local + pos, size);
+	rc = copy_to_user(ubuf, buf, size);
+	if (rc == size) {
+		rc = -EFAULT;
+		goto err_free;
+	}
+
+	size -= rc;
+	*offp = pos + size;
+	rc = size;
+
+err_free:
+	kfree(buf);
+
+	return rc;
+}
+
+static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf,
+			     size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+	ssize_t rc;
+	loff_t pos = *offp;
+	void *buf;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= mw->win_size || !size)
+		return 0;
+	if (size > mw->win_size - pos)
+		size = mw->win_size - pos;
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	rc = copy_from_user(buf, ubuf, size);
+	if (rc == size) {
+		rc = -EFAULT;
+		goto err_free;
+	}
+
+	size -= rc;
+	*offp = pos + size;
+	rc = size;
+
+	memcpy_toio(mw->local + pos, buf, size);
+
+err_free:
+	kfree(buf);
+
+	return rc;
+}
+
+static TOOL_FOPS_RDWR(tool_mw_fops,
+		      tool_mw_read,
+		      tool_mw_write);
+
+static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf,
+				 size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+
+	if (!mw->peer)
+		return -ENXIO;
+
+	return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size);
+}
+
+static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf,
+				  size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+
+	if (!mw->peer)
+		return -ENXIO;
+
+	return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size);
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_fops,
+		      tool_peer_mw_read,
+		      tool_peer_mw_write);
+
+static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size)
+{
+	int rc;
+	struct tool_mw *mw = &tc->mws[idx];
+	resource_size_t size, align_addr, align_size;
+	char buf[16];
+
+	if (mw->peer)
+		return 0;
+
+	rc = ntb_mw_get_align(tc->ntb, PIDX, idx, &align_addr,
+				&align_size, &size);
+	if (rc)
+		return rc;
+
+	mw->size = min_t(resource_size_t, req_size, size);
+	mw->size = round_up(mw->size, align_addr);
+	mw->size = round_up(mw->size, align_size);
+	mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size,
+				      &mw->peer_dma, GFP_KERNEL);
+
+	if (!mw->peer || !IS_ALIGNED(mw->peer_dma, align_addr))
+		return -ENOMEM;
+
+	rc = ntb_mw_set_trans(tc->ntb, PIDX, idx, mw->peer_dma, mw->size);
+	if (rc)
+		goto err_free_dma;
+
+	snprintf(buf, sizeof(buf), "peer_mw%d", idx);
+	mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR,
+						mw->tc->dbgfs, mw,
+						&tool_peer_mw_fops);
+
+	return 0;
+
+err_free_dma:
+	dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
+			  mw->peer,
+			  mw->peer_dma);
+	mw->peer = NULL;
+	mw->peer_dma = 0;
+	mw->size = 0;
+
+	return rc;
+}
+
+static void tool_free_mw(struct tool_ctx *tc, int idx)
+{
+	struct tool_mw *mw = &tc->mws[idx];
+
+	if (mw->peer) {
+		ntb_mw_clear_trans(tc->ntb, PIDX, idx);
+		dma_free_coherent(&tc->ntb->pdev->dev, mw->size,
+				  mw->peer,
+				  mw->peer_dma);
+	}
+
+	mw->peer = NULL;
+	mw->peer_dma = 0;
+
+	debugfs_remove(mw->peer_dbg_file);
+
+	mw->peer_dbg_file = NULL;
+}
+
+static ssize_t tool_peer_mw_trans_read(struct file *filep,
+				       char __user *ubuf,
+				       size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+
+	char *buf;
+	size_t buf_size;
+	ssize_t ret, off = 0;
+
+	phys_addr_t base;
+	resource_size_t mw_size;
+	resource_size_t align_addr;
+	resource_size_t align_size;
+	resource_size_t max_size;
+
+	buf_size = min_t(size_t, size, 512);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ntb_mw_get_align(mw->tc->ntb, PIDX, mw->idx,
+			 &align_addr, &align_size, &max_size);
+	ntb_peer_mw_get_addr(mw->tc->ntb, mw->idx, &base, &mw_size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Peer MW %d Information:\n", mw->idx);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Physical Address      \t%pa[p]\n",
+			 &base);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Window Size           \t%lld\n",
+			 (unsigned long long)mw_size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Alignment             \t%lld\n",
+			 (unsigned long long)align_addr);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Alignment        \t%lld\n",
+			 (unsigned long long)align_size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Size Max              \t%lld\n",
+			 (unsigned long long)max_size);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Ready                 \t%c\n",
+			 (mw->peer) ? 'Y' : 'N');
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Allocated Size       \t%zd\n",
+			 (mw->peer) ? (size_t)mw->size : 0);
+
+	ret = simple_read_from_buffer(ubuf, size, offp, buf, off);
+	kfree(buf);
+	return ret;
+}
+
+static ssize_t tool_peer_mw_trans_write(struct file *filep,
+					const char __user *ubuf,
+					size_t size, loff_t *offp)
+{
+	struct tool_mw *mw = filep->private_data;
+
+	char buf[32];
+	size_t buf_size;
+	unsigned long long val;
+	int rc;
+
+	buf_size = min(size, (sizeof(buf) - 1));
+	if (copy_from_user(buf, ubuf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+
+	rc = kstrtoull(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	tool_free_mw(mw->tc, mw->idx);
+	if (val)
+		rc = tool_setup_mw(mw->tc, mw->idx, val);
+
+	if (rc)
+		return rc;
+
+	return size;
+}
+
+static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops,
+		      tool_peer_mw_trans_read,
+		      tool_peer_mw_trans_write);
+
+static int tool_init_mw(struct tool_ctx *tc, int idx)
+{
+	struct tool_mw *mw = &tc->mws[idx];
+	phys_addr_t base;
+	int rc;
+
+	rc = ntb_peer_mw_get_addr(tc->ntb, idx, &base, &mw->win_size);
+	if (rc)
+		return rc;
+
+	mw->tc = tc;
+	mw->idx = idx;
+	mw->local = ioremap_wc(base, mw->win_size);
+	if (!mw->local)
+		return -EFAULT;
+
+	return 0;
+}
+
+static void tool_free_mws(struct tool_ctx *tc)
+{
+	int i;
+
+	for (i = 0; i < tc->mw_count; i++) {
+		tool_free_mw(tc, i);
+
+		if (tc->mws[i].local)
+			iounmap(tc->mws[i].local);
+
+		tc->mws[i].local = NULL;
+	}
+}
+
+static void tool_setup_dbgfs(struct tool_ctx *tc)
+{
+	int i;
+
+	/* This modules is useless without dbgfs... */
+	if (!tool_dbgfs) {
+		tc->dbgfs = NULL;
+		return;
+	}
+
+	tc->dbgfs = debugfs_create_dir(dev_name(&tc->ntb->dev),
+				       tool_dbgfs);
+	if (!tc->dbgfs)
+		return;
+
+	debugfs_create_file("db", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_db_fops);
+
+	debugfs_create_file("mask", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_mask_fops);
+
+	debugfs_create_file("peer_db", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_peer_db_fops);
+
+	debugfs_create_file("peer_mask", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_peer_mask_fops);
+
+	debugfs_create_file("spad", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_spad_fops);
+
+	debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_peer_spad_fops);
+
+	debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs,
+			    tc, &tool_link_fops);
+
+	debugfs_create_file("link_event", S_IWUSR, tc->dbgfs,
+			    tc, &tool_link_event_fops);
+
+	for (i = 0; i < tc->mw_count; i++) {
+		char buf[30];
+
+		snprintf(buf, sizeof(buf), "mw%d", i);
+		debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
+				    &tc->mws[i], &tool_mw_fops);
+
+		snprintf(buf, sizeof(buf), "peer_trans%d", i);
+		debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs,
+				    &tc->mws[i], &tool_peer_mw_trans_fops);
+	}
+}
+
+static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
+{
+	struct tool_ctx *tc;
+	int rc;
+	int i;
+
+	if (!ntb->ops->mw_set_trans) {
+		dev_dbg(&ntb->dev, "need inbound MW based NTB API\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
+	if (ntb_spad_count(ntb) < 1) {
+		dev_dbg(&ntb->dev, "no enough scratchpads\n");
+		rc = -EINVAL;
+		goto err_tc;
+	}
+
+	if (ntb_db_is_unsafe(ntb))
+		dev_dbg(&ntb->dev, "doorbell is unsafe\n");
+
+	if (ntb_spad_is_unsafe(ntb))
+		dev_dbg(&ntb->dev, "scratchpad is unsafe\n");
+
+	if (ntb_peer_port_count(ntb) != NTB_DEF_PEER_CNT)
+		dev_warn(&ntb->dev, "multi-port NTB is unsupported\n");
+
+	tc = kzalloc(sizeof(*tc), GFP_KERNEL);
+	if (!tc) {
+		rc = -ENOMEM;
+		goto err_tc;
+	}
+
+	tc->ntb = ntb;
+	init_waitqueue_head(&tc->link_wq);
+
+	tc->mw_count = min(ntb_peer_mw_count(tc->ntb), MAX_MWS);
+	for (i = 0; i < tc->mw_count; i++) {
+		rc = tool_init_mw(tc, i);
+		if (rc)
+			goto err_ctx;
+	}
+
+	tool_setup_dbgfs(tc);
+
+	rc = ntb_set_ctx(ntb, tc, &tool_ops);
+	if (rc)
+		goto err_ctx;
+
+	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+	ntb_link_event(ntb);
+
+	return 0;
+
+err_ctx:
+	tool_free_mws(tc);
+	debugfs_remove_recursive(tc->dbgfs);
+	kfree(tc);
+err_tc:
+	return rc;
+}
+
+static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb)
+{
+	struct tool_ctx *tc = ntb->ctx;
+
+	tool_free_mws(tc);
+
+	ntb_clear_ctx(ntb);
+	ntb_link_disable(ntb);
+
+	debugfs_remove_recursive(tc->dbgfs);
+	kfree(tc);
+}
+
+static struct ntb_client tool_client = {
+	.ops = {
+		.probe = tool_probe,
+		.remove = tool_remove,
+	},
+};
+
+static int __init tool_init(void)
+{
+	int rc;
+
+	if (debugfs_initialized())
+		tool_dbgfs = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	rc = ntb_register_client(&tool_client);
+	if (rc)
+		goto err_client;
+
+	return 0;
+
+err_client:
+	debugfs_remove_recursive(tool_dbgfs);
+	return rc;
+}
+module_init(tool_init);
+
+static void __exit tool_exit(void)
+{
+	ntb_unregister_client(&tool_client);
+	debugfs_remove_recursive(tool_dbgfs);
+}
+module_exit(tool_exit);