bzr branch
http://suren.me/webbzr/alps/pcitool
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
1 |
#define _PCILIB_DMA_IPE_C
|
2 |
#define _BSD_SOURCE
|
|
302
by Suren A. Chilingaryan
Fixes out-of-source builds and minor build issues |
3 |
#define _DEFAULT_SOURCE
|
329
by Suren A. Chilingaryan
IPEDMA Update |
4 |
#define _POSIX_C_SOURCE 200112L
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
5 |
|
6 |
#include <stdio.h> |
|
7 |
#include <stdlib.h> |
|
8 |
#include <string.h> |
|
9 |
#include <unistd.h> |
|
10 |
#include <sched.h> |
|
11 |
#include <sys/time.h> |
|
12 |
#include <arpa/inet.h> |
|
13 |
||
14 |
#include "pci.h" |
|
15 |
#include "pcilib.h" |
|
16 |
#include "error.h" |
|
17 |
#include "tools.h" |
|
18 |
#include "debug.h" |
|
19 |
||
20 |
#include "ipe.h" |
|
21 |
#include "ipe_private.h" |
|
22 |
||
23 |
||
24 |
typedef struct { |
|
25 |
size_t size; |
|
26 |
size_t pos; |
|
27 |
pcilib_dma_flags_t flags; |
|
28 |
} dma_ipe_skim_callback_context_t; |
|
29 |
||
30 |
static int dma_ipe_skim_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) { |
|
31 |
dma_ipe_skim_callback_context_t *ctx = (dma_ipe_skim_callback_context_t*)arg; |
|
32 |
||
33 |
ctx->pos += bufsize; |
|
34 |
||
35 |
if (flags & PCILIB_DMA_FLAG_EOP) { |
|
36 |
if ((ctx->pos < ctx->size)&&(ctx->flags&PCILIB_DMA_FLAG_MULTIPACKET)) { |
|
37 |
if (ctx->flags&PCILIB_DMA_FLAG_WAIT) return PCILIB_STREAMING_WAIT; |
|
38 |
else return PCILIB_STREAMING_CONTINUE; |
|
39 |
}
|
|
40 |
return PCILIB_STREAMING_STOP; |
|
41 |
}
|
|
42 |
||
43 |
return PCILIB_STREAMING_REQ_FRAGMENT; |
|
44 |
}
|
|
45 |
||
46 |
int dma_ipe_skim_dma_custom(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes) { |
|
47 |
int err; |
|
48 |
||
49 |
dma_ipe_skim_callback_context_t opts = { |
|
50 |
size, 0, flags |
|
51 |
};
|
|
52 |
||
53 |
err = pcilib_stream_dma(ctx, dma, addr, size, flags, timeout, dma_ipe_skim_callback, &opts); |
|
54 |
if (read_bytes) *read_bytes = opts.pos; |
|
55 |
return err; |
|
56 |
}
|
|
57 |
||
58 |
||
59 |
double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) { |
|
60 |
int err = 0; |
|
61 |
||
62 |
ipe_dma_t *ctx = (ipe_dma_t*)vctx; |
|
63 |
||
64 |
int iter; |
|
65 |
size_t us = 0; |
|
66 |
struct timeval start, cur; |
|
67 |
||
68 |
void *buf; |
|
69 |
size_t bytes, rbytes; |
|
70 |
||
71 |
int (*read_dma)(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes); |
|
72 |
||
73 |
if ((direction == PCILIB_DMA_TO_DEVICE)||(direction == PCILIB_DMA_BIDIRECTIONAL)) return -1.; |
|
74 |
||
75 |
if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return -1.; |
|
76 |
||
77 |
err = dma_ipe_start(vctx, 0, PCILIB_DMA_FLAGS_DEFAULT); |
|
78 |
if (err) return err; |
|
79 |
||
342
by Suren A. Chilingaryan
Support large DMA pages in IPEDMA |
80 |
if (size%ctx->page_size) size = (1 + size / ctx->page_size) * ctx->page_size; |
81 |
||
82 |
||
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
83 |
if (getenv("PCILIB_BENCHMARK_HARDWARE")) |
84 |
read_dma = dma_ipe_skim_dma_custom; |
|
85 |
else
|
|
86 |
read_dma = pcilib_read_dma_custom; |
|
87 |
||
88 |
// There is no significant difference and we can remove this when testing phase is over.
|
|
329
by Suren A. Chilingaryan
IPEDMA Update |
89 |
// DS: With large number of buffers this is quite slow due to skimming of initially written buffers
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
90 |
if (getenv("PCILIB_BENCHMARK_STREAMING")) { |
91 |
size_t dma_buffer_space; |
|
92 |
pcilib_dma_engine_status_t dma_status; |
|
93 |
||
94 |
if (read_dma == pcilib_read_dma_custom) |
|
95 |
pcilib_info_once("Benchmarking the DMA streaming (with memcpy)"); |
|
96 |
else
|
|
97 |
pcilib_info_once("Benchmarking the DMA streaming (without memcpy)"); |
|
98 |
||
99 |
// Starting DMA
|
|
100 |
WR(IPEDMA_REG_CONTROL, 0x1); |
|
101 |
||
102 |
gettimeofday(&start, NULL); |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
103 |
pcilib_calc_deadline(&start, ctx->dma_timeout * IPEDMA_DMA_PAGES); |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
104 |
|
105 |
#ifdef IPEDMA_BUG_LAST_READ
|
|
342
by Suren A. Chilingaryan
Support large DMA pages in IPEDMA |
106 |
dma_buffer_space = (IPEDMA_DMA_PAGES - 2) * ctx->page_size; |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
107 |
#else /* IPEDMA_BUG_LAST_READ */ |
342
by Suren A. Chilingaryan
Support large DMA pages in IPEDMA |
108 |
dma_buffer_space = (IPEDMA_DMA_PAGES - 1) * ctx->page_size; |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
109 |
#endif /* IPEDMA_BUG_LAST_READ */ |
110 |
||
111 |
// Allocate memory and prepare data
|
|
329
by Suren A. Chilingaryan
IPEDMA Update |
112 |
err = posix_memalign(&buf, 4096, size + dma_buffer_space); |
113 |
if ((err)||(!buf)) return -1; |
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
114 |
|
115 |
// Wait all DMA buffers are filled
|
|
116 |
memset(&dma_status, 0, sizeof(dma_status)); |
|
117 |
do { |
|
118 |
usleep(10 * IPEDMA_NODATA_SLEEP); |
|
119 |
err = dma_ipe_get_status(vctx, dma, &dma_status, 0, NULL); |
|
120 |
} while ((!err)&&(dma_status.written_bytes < dma_buffer_space)&&(pcilib_calc_time_to_deadline(&start) > 0)); |
|
121 |
||
122 |
if (err) { |
|
123 |
pcilib_error("Error (%i) getting dma status", err); |
|
124 |
return -1; |
|
125 |
} else if (dma_status.written_bytes < dma_buffer_space) { |
|
126 |
pcilib_error("Timeout while waiting DMA engine to feel the buffer space completely, only %zu bytes of %zu written", dma_status.written_bytes, dma_buffer_space); |
|
127 |
return -1; |
|
128 |
}
|
|
129 |
||
130 |
gettimeofday(&start, NULL); |
|
131 |
for (iter = 0; iter < iterations; iter++) { |
|
132 |
for (bytes = 0; bytes < (size + dma_buffer_space); bytes += rbytes) { |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
133 |
err = read_dma(ctx->dmactx.pcilib, 0, addr, size + dma_buffer_space - bytes, PCILIB_DMA_FLAG_MULTIPACKET, ctx->dma_timeout, buf + bytes, &rbytes); |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
134 |
if (err) { |
135 |
pcilib_error("Can't read data from DMA, error %i", err); |
|
136 |
return -1; |
|
137 |
}
|
|
138 |
}
|
|
139 |
dma_buffer_space = 0; |
|
140 |
}
|
|
141 |
||
142 |
gettimeofday(&cur, NULL); |
|
143 |
us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); |
|
144 |
||
145 |
// Stopping DMA
|
|
146 |
WR(IPEDMA_REG_CONTROL, 0x0); |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
147 |
usleep(IPEDMA_RESET_DELAY); |
148 |
||
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
149 |
pcilib_skip_dma(ctx->dmactx.pcilib, 0); |
150 |
} else { |
|
151 |
if (read_dma == dma_ipe_skim_dma_custom) |
|
152 |
pcilib_info_once("Benchmarking the DMA hardware (without memcpy)"); |
|
153 |
||
154 |
WR(IPEDMA_REG_CONTROL, 0x0); |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
155 |
usleep(IPEDMA_RESET_DELAY); |
308
by Suren A. Chilingaryan
Fix IPEDMA benchmark in non-streaming mode |
156 |
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
157 |
err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); |
158 |
if (err) { |
|
159 |
pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine"); |
|
160 |
return -1; |
|
161 |
}
|
|
308
by Suren A. Chilingaryan
Fix IPEDMA benchmark in non-streaming mode |
162 |
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
163 |
// Allocate memory and prepare data
|
329
by Suren A. Chilingaryan
IPEDMA Update |
164 |
err = posix_memalign(&buf, 4096, size); |
165 |
if ((err)||(!buf)) return -1; |
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
166 |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
167 |
for (iter = 0; iter <= iterations; iter++) { |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
168 |
gettimeofday(&start, NULL); |
169 |
||
170 |
// Starting DMA
|
|
171 |
WR(IPEDMA_REG_CONTROL, 0x1); |
|
172 |
||
173 |
for (bytes = 0; bytes < size; bytes += rbytes) { |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
174 |
err = read_dma(ctx->dmactx.pcilib, 0, addr, size - bytes, PCILIB_DMA_FLAG_MULTIPACKET, ctx->dma_timeout, buf + bytes, &rbytes); |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
175 |
if (err) { |
296
by Suren A. Chilingaryan
Disable STREAMING_CHECKS for better performance |
176 |
pcilib_error("Can't read data from DMA (iteration: %zu, offset: %zu), error %i", iter, bytes, err); |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
177 |
return -1; |
178 |
}
|
|
179 |
}
|
|
180 |
||
329
by Suren A. Chilingaryan
IPEDMA Update |
181 |
gettimeofday(&cur, NULL); |
182 |
||
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
183 |
// Stopping DMA
|
184 |
WR(IPEDMA_REG_CONTROL, 0x0); |
|
329
by Suren A. Chilingaryan
IPEDMA Update |
185 |
usleep(IPEDMA_RESET_DELAY); |
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
186 |
if (err) break; |
187 |
||
329
by Suren A. Chilingaryan
IPEDMA Update |
188 |
// Heating up during the first iteration
|
189 |
if (iter) |
|
190 |
us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); |
|
191 |
||
192 |
pcilib_info("Iteration %-4i latency: %lu", iter, ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec))); |
|
193 |
||
194 |
||
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
195 |
err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); |
196 |
if (err) { |
|
197 |
pcilib_error("Can't start iteration, devices continuously writes unexpected data using DMA engine"); |
|
198 |
break; |
|
199 |
}
|
|
329
by Suren A. Chilingaryan
IPEDMA Update |
200 |
|
201 |
usleep(ctx->dma_timeout); |
|
308
by Suren A. Chilingaryan
Fix IPEDMA benchmark in non-streaming mode |
202 |
|
266
by Suren A. Chilingaryan
Add forgotten ipe_benchmark.c |
203 |
}
|
204 |
}
|
|
205 |
||
206 |
free(buf); |
|
207 |
||
208 |
return err?-1:((1. * size * iterations * 1000000) / (1024. * 1024. * us)); |
|
209 |
}
|