summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dma/ipe_benchmark.c192
-rwxr-xr-xtests/device_info.sh81
-rwxr-xr-xtests/new_device.sh6
-rwxr-xr-xtests/reload.sh25
4 files changed, 298 insertions, 6 deletions
diff --git a/dma/ipe_benchmark.c b/dma/ipe_benchmark.c
new file mode 100644
index 0000000..3c10715
--- /dev/null
+++ b/dma/ipe_benchmark.c
@@ -0,0 +1,192 @@
+#define _PCILIB_DMA_IPE_C
+#define _BSD_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+#include <sys/time.h>
+#include <arpa/inet.h>
+
+#include "pci.h"
+#include "pcilib.h"
+#include "error.h"
+#include "tools.h"
+#include "debug.h"
+
+#include "ipe.h"
+#include "ipe_private.h"
+
+
+typedef struct {
+ size_t size;
+ size_t pos;
+ pcilib_dma_flags_t flags;
+} dma_ipe_skim_callback_context_t;
+
+static int dma_ipe_skim_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) {
+ dma_ipe_skim_callback_context_t *ctx = (dma_ipe_skim_callback_context_t*)arg;
+
+ ctx->pos += bufsize;
+
+ if (flags & PCILIB_DMA_FLAG_EOP) {
+ if ((ctx->pos < ctx->size)&&(ctx->flags&PCILIB_DMA_FLAG_MULTIPACKET)) {
+ if (ctx->flags&PCILIB_DMA_FLAG_WAIT) return PCILIB_STREAMING_WAIT;
+ else return PCILIB_STREAMING_CONTINUE;
+ }
+ return PCILIB_STREAMING_STOP;
+ }
+
+ return PCILIB_STREAMING_REQ_FRAGMENT;
+}
+
+int dma_ipe_skim_dma_custom(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes) {
+ int err;
+
+ dma_ipe_skim_callback_context_t opts = {
+ size, 0, flags
+ };
+
+ err = pcilib_stream_dma(ctx, dma, addr, size, flags, timeout, dma_ipe_skim_callback, &opts);
+ if (read_bytes) *read_bytes = opts.pos;
+ return err;
+}
+
+
+double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) {
+ int err = 0;
+
+ ipe_dma_t *ctx = (ipe_dma_t*)vctx;
+
+ int iter;
+ size_t us = 0;
+ struct timeval start, cur;
+
+ void *buf;
+ size_t bytes, rbytes;
+
+ int (*read_dma)(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes);
+
+ if ((direction == PCILIB_DMA_TO_DEVICE)||(direction == PCILIB_DMA_BIDIRECTIONAL)) return -1.;
+
+ if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return -1.;
+
+ if (size%IPEDMA_PAGE_SIZE) size = (1 + size / IPEDMA_PAGE_SIZE) * IPEDMA_PAGE_SIZE;
+
+ err = dma_ipe_start(vctx, 0, PCILIB_DMA_FLAGS_DEFAULT);
+ if (err) return err;
+
+ if (getenv("PCILIB_BENCHMARK_HARDWARE"))
+ read_dma = dma_ipe_skim_dma_custom;
+ else
+ read_dma = pcilib_read_dma_custom;
+
+ // There is no significant difference and we can remove this when testing phase is over.
+ if (getenv("PCILIB_BENCHMARK_STREAMING")) {
+ size_t dma_buffer_space;
+ pcilib_dma_engine_status_t dma_status;
+
+ if (read_dma == pcilib_read_dma_custom)
+ pcilib_info_once("Benchmarking the DMA streaming (with memcpy)");
+ else
+ pcilib_info_once("Benchmarking the DMA streaming (without memcpy)");
+
+ // Starting DMA
+ WR(IPEDMA_REG_CONTROL, 0x1);
+
+ gettimeofday(&start, NULL);
+ pcilib_calc_deadline(&start, IPEDMA_DMA_TIMEOUT * IPEDMA_DMA_PAGES);
+
+#ifdef IPEDMA_BUG_LAST_READ
+ dma_buffer_space = (IPEDMA_DMA_PAGES - 2) * IPEDMA_PAGE_SIZE;
+#else /* IPEDMA_BUG_LAST_READ */
+ dma_buffer_space = (IPEDMA_DMA_PAGES - 1) * IPEDMA_PAGE_SIZE;
+#endif /* IPEDMA_BUG_LAST_READ */
+
+ // Allocate memory and prepare data
+ buf = malloc(size + dma_buffer_space);
+ if (!buf) return -1;
+
+ // Wait all DMA buffers are filled
+ memset(&dma_status, 0, sizeof(dma_status));
+ do {
+ usleep(10 * IPEDMA_NODATA_SLEEP);
+ err = dma_ipe_get_status(vctx, dma, &dma_status, 0, NULL);
+ } while ((!err)&&(dma_status.written_bytes < dma_buffer_space)&&(pcilib_calc_time_to_deadline(&start) > 0));
+
+ if (err) {
+ pcilib_error("Error (%i) getting dma status", err);
+ return -1;
+ } else if (dma_status.written_bytes < dma_buffer_space) {
+ pcilib_error("Timeout while waiting DMA engine to feel the buffer space completely, only %zu bytes of %zu written", dma_status.written_bytes, dma_buffer_space);
+ return -1;
+ }
+
+ gettimeofday(&start, NULL);
+ for (iter = 0; iter < iterations; iter++) {
+ for (bytes = 0; bytes < (size + dma_buffer_space); bytes += rbytes) {
+ err = read_dma(ctx->dmactx.pcilib, 0, addr, size + dma_buffer_space - bytes, PCILIB_DMA_FLAG_MULTIPACKET, PCILIB_DMA_TIMEOUT, buf + bytes, &rbytes);
+ if (err) {
+ pcilib_error("Can't read data from DMA, error %i", err);
+ return -1;
+ }
+ }
+ dma_buffer_space = 0;
+ }
+
+ gettimeofday(&cur, NULL);
+ us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
+
+ // Stopping DMA
+ WR(IPEDMA_REG_CONTROL, 0x0);
+ pcilib_skip_dma(ctx->dmactx.pcilib, 0);
+ } else {
+ if (read_dma == dma_ipe_skim_dma_custom)
+ pcilib_info_once("Benchmarking the DMA hardware (without memcpy)");
+
+ WR(IPEDMA_REG_CONTROL, 0x0);
+
+ err = pcilib_skip_dma(ctx->dmactx.pcilib, 0);
+ if (err) {
+ pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine");
+ return -1;
+ }
+
+ // Allocate memory and prepare data
+ buf = malloc(size);
+ if (!buf) return -1;
+
+ for (iter = 0; iter < iterations; iter++) {
+ gettimeofday(&start, NULL);
+
+ // Starting DMA
+ WR(IPEDMA_REG_CONTROL, 0x1);
+
+ for (bytes = 0; bytes < size; bytes += rbytes) {
+ err = read_dma(ctx->dmactx.pcilib, 0, addr, size - bytes, PCILIB_DMA_FLAG_MULTIPACKET, PCILIB_DMA_TIMEOUT, buf + bytes, &rbytes);
+ if (err) {
+ pcilib_error("Can't read data from DMA, error %i", err);
+ return -1;
+ }
+ }
+
+ // Stopping DMA
+ WR(IPEDMA_REG_CONTROL, 0x0);
+ if (err) break;
+
+ gettimeofday(&cur, NULL);
+ us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec));
+
+ err = pcilib_skip_dma(ctx->dmactx.pcilib, 0);
+ if (err) {
+ pcilib_error("Can't start iteration, devices continuously writes unexpected data using DMA engine");
+ break;
+ }
+ }
+ }
+
+ free(buf);
+
+ return err?-1:((1. * size * iterations * 1000000) / (1024. * 1024. * us));
+}
diff --git a/tests/device_info.sh b/tests/device_info.sh
new file mode 100755
index 0000000..21e59db
--- /dev/null
+++ b/tests/device_info.sh
@@ -0,0 +1,81 @@
+#! /bin/bash
+
+BAR=0
+
+function pci {
+ PCILIB_PATH=`pwd`/..
+ LD_LIBRARY_PATH="$PCILIB_PATH/pcilib" $PCILIB_PATH/pcitool/pci $*
+}
+
+
+function read_cfg {
+ pci -a config -r 0x$1 | awk '{ print $2; }' | sed -e 's/\s*//g' -e '/^\s*$/d'
+}
+
+function parse_config {
+ info=0x`pci -b $BAR -r 0 | awk '{ print $2; }' | sed -e 's/\s*//g' -e '/^\s*$/d'`
+ model=`printf "%X" $((info>>24))`
+ if [ $model -eq 14 ]; then
+ model="Xilinx Virtex-6"
+ else
+ model="Xilinx $model"
+ fi
+ version=$(((info >> 8) & 0xFF))
+ data_width=$((16 * (2 ** ((info >> 16) & 0xF))))
+
+ echo "$model, build $version, $data_width bits"
+
+
+ next=`read_cfg 34 | cut -c 7-8`
+# next=`printf "%u" $next`
+
+ while [ $((0x$next)) -ne 0 ]; do
+ cap=`read_cfg $next`
+ capid=`echo $cap | cut -c 7-8`
+ if [ $capid -eq 10 ]; then
+ addr=`printf "%X" $((0x$next + 4))`
+ device_capabilities=`read_cfg $addr`
+
+ addr=`printf "%X" $((0x$next + 8))`
+ device_control=`read_cfg $addr`
+
+ addr=`printf "%X" $((0x$next + 12))`
+ pcie_link1=`read_cfg $addr`
+ addr=`printf "%X" $((0x$next + 16))`
+ pcie_link2=`read_cfg $addr`
+
+ link_speed=$((((0x$pcie_link2 & 0xF0000) >> 16)))
+ link_width=$((((0x$pcie_link2 & 0x3F00000) >> 20)))
+
+ dev_link_speed=$((((0x$pcie_link1 & 0xF))))
+ dev_link_width=$((((0x$pcie_link1 & 0x3F0) >> 4)))
+
+ max_payload=$(((1 << ((0x$device_capabilities & 0x07) + 7))))
+ dev_payload=$(((1 << (((0x$device_capabilities >> 5) & 0x07) + 7))))
+ fi
+ next=`echo $cap | cut -c 5-6`
+ done
+
+ echo "Link: PCIe gen$link_speed x$link_width"
+ if [ $link_speed -ne $dev_link_speed -o $link_width -ne $dev_link_width ]; then
+ echo " * But device capable of gen$dev_link_speed x$dev_link_width"
+ fi
+
+ echo "Payload: $dev_payload"
+ if [ $dev_payload -ne $max_payload ]; then
+ echo " * But device capable of $max_payload"
+ fi
+
+
+ info=0x`read_cfg 40`
+ max_tlp=$((2 ** (5 + ((info & 0xE0) >> 5))))
+ echo "TLP: 32 dwords (transfering 32 TLP per request)"
+ if [ $max_tlp -ne 32 ]; then
+ echo " * But device is able to transfer TLP up to $max_tlp bytes"
+ fi
+
+ # 2500 MT/s, but PCIe gen1 and gen2 uses 10 bit encoding
+ speed=$((link_width * link_speed * 2500 / 10))
+}
+
+parse_config
diff --git a/tests/new_device.sh b/tests/new_device.sh
deleted file mode 100755
index e3f8f8a..0000000
--- a/tests/new_device.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#! /bin/bash
-
-#Add new device to the driver
-#echo "10ee 6028" > /sys/bus/pci/drivers/pciDriver/new_id
-#Enable bus mastering
-#setpci -s 03:00.0 4.w=0x07
diff --git a/tests/reload.sh b/tests/reload.sh
new file mode 100755
index 0000000..62c6e22
--- /dev/null
+++ b/tests/reload.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+
+device=`lspci -n | grep -m 1 "10ee:" | awk '{print $1}'`
+if [ -z "$device" ]; then
+ echo "Xilinx device doesn't exist, rescanning..."
+ echo 1 > /sys/bus/pci/rescan
+ exit
+else
+ echo "Xilinx is located at: " $device
+fi
+echo "remove driver"
+rmmod pciDriver
+echo "remove devices"
+echo 1 > /sys/bus/pci/devices/0000\:${device:0:2}\:${device:3:4}/remove
+sleep 1
+echo "rescan"
+echo 1 > /sys/bus/pci/rescan
+sleep 1
+echo "instantiate driver"
+modprobe pciDriver
+# for devices with different ID
+#echo "10ee 6028" > /sys/bus/pci/drivers/pciDriver/new_id
+pci -i
+#echo Enabling bus mastering on device $dev
+#setpci -s $device 4.w=0x07