18
22
static int device_number = 0;
19
23
static int devices[MAX_DEVICES];
25
#ifdef DICT_HW_MEASURE_TIMINGS
26
static int hardware_detection_time = 0;
27
#endif /* DICT_HW_MEASURE_TIMINGS */
21
29
int dictDetectHardware() {
23
31
cudaDeviceProp deviceProp;
33
#ifdef DICT_HW_MEASURE_TIMINGS
34
struct timeval tv1, tv2;
35
gettimeofday(&tv1, NULL);
36
#endif /* DICT_HW_MEASURE_TIMINGS */
25
39
cudaGetDeviceCount(&deviceCount);
26
40
if (!deviceCount) return -1;
49
#ifdef DICT_HW_MEASURE_TIMINGS
50
gettimeofday(&tv2, NULL);
51
hardware_detection_time = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
52
#endif /* DICT_HW_MEASURE_TIMINGS */
35
54
return device_number;
38
57
DICTContext dictCreateContext() {
58
#ifdef DICT_HW_MEASURE_TIMINGS
59
struct timeval tv1, tv2;
60
#endif /* DICT_HW_MEASURE_TIMINGS */
39
62
if (!device_number) {
40
63
if (dictDetectHardware() <= 0) return NULL;
66
#ifdef DICT_HW_MEASURE_TIMINGS
67
gettimeofday(&tv1, NULL);
68
#endif /* DICT_HW_MEASURE_TIMINGS */
43
70
TProcessingState *pstate = pstateInit();
72
#ifdef DICT_HW_MEASURE_TIMINGS
74
gettimeofday(&tv2, NULL);
75
pstate->time[0] = hardware_detection_time;
76
pstate->time[1] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
78
#endif /* DICT_HW_MEASURE_TIMINGS */
47
83
void dictDestroyContext(DICTContext ctx) {
84
#ifdef DICT_HW_MEASURE_TIMINGS
86
struct timeval tv1, tv2;
88
int init_time = ctx->time[0]+ctx->time[1]+ctx->time[2]+ctx->time[3]+ctx->time[4]+ctx->time[12]+ctx->time[13];
89
int load_time = ctx->time[14] + ctx->time[15];
90
int process_time = ctx->time[5];
92
printf("Library timings\n");
93
printf("---------------\n");
94
print_timing("Overall", init_time + load_time + process_time + ctx->time[16]);
95
print_timing("Initialization", init_time);
96
print_timing(" Hardware Detection", ctx->time[0]);
97
print_timing(" Context Initialization", ctx->time[1]);
98
print_timing(" Setting Template Points", ctx->time[2]);
99
print_timing(" Setting Current Points", ctx->time[3]);
100
print_timing(" Loading Template Image", ctx->time[12]);
101
print_timing(" Reducting Template Image", ctx->time[13]);
102
print_timing(" Processing Template Image", ctx->time[4]);
103
print_timing("Loading Images", load_time);
104
print_timing(" Load", ctx->time[14]);
105
print_timing(" Reduction", ctx->time[15]);
106
print_timing("Processing Images", process_time);
107
print_timing(" Copy Fragment", ctx->time[6]);
108
print_timing(" Load Fragment", ctx->time[7]);
109
print_timing(" Preprocessing", ctx->time[8]);
110
print_timing(" FFT", ctx->time[9]);
111
print_timing(" Postprocessing", ctx->time[10]);
112
print_timing("Waiting Result", ctx->time[16]);
114
gettimeofday(&tv1, NULL);
115
#endif /* DICT_HW_MEASURE_TIMINGS */
117
#ifdef DICT_HW_MEASURE_TIMINGS
118
gettimeofday(&tv2, NULL);
119
time = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
120
print_timing("Destruction", time);
122
#endif /* DICT_HW_MEASURE_TIMINGS */
51
125
int dictSetLogger(DICTLogger error_reporter, DICTLogger message_writer) {
94
174
base_blocks = calc_blocks(4 * ps->corr_size + 1, BLOCK_SIZE_1D);
95
175
ps->base_blocks_power = get_power(base_blocks);
179
#ifdef DICT_HW_MEASURE_TIMINGS
180
gettimeofday(&tv2, NULL);
181
ps->time[1] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
182
#endif /* DICT_HW_MEASURE_TIMINGS */
100
187
int dictSetTemplatePoints(DICTContext ps, const float *points_x, const float *points_y) {
188
#ifdef DICT_HW_MEASURE_TIMINGS
189
struct timeval tv1, tv2;
190
gettimeofday(&tv1, NULL);
191
#endif /* DICT_HW_MEASURE_TIMINGS */
101
193
memcpy(ps->points, points_x, ps->ncp * sizeof(float));
102
194
memcpy(ps->points + ps->ncp_alloc_size, points_y, ps->ncp * sizeof(float));
196
#ifdef DICT_HW_MEASURE_TIMINGS
197
gettimeofday(&tv2, NULL);
198
ps->time[2] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
199
#endif /* DICT_HW_MEASURE_TIMINGS */
121
218
int dictSetCurrentPoints(DICTContext ps, const float *points_x, const float *points_y) {
219
#ifdef DICT_HW_MEASURE_TIMINGS
220
struct timeval tv1, tv2;
221
gettimeofday(&tv1, NULL);
222
#endif /* DICT_HW_MEASURE_TIMINGS */
122
224
memcpy(ps->points + 2 * ps->ncp_alloc_size, points_x, ps->ncp * sizeof(float));
123
225
memcpy(ps->points + 3 * ps->ncp_alloc_size, points_y, ps->ncp * sizeof(float));
227
#ifdef DICT_HW_MEASURE_TIMINGS
228
gettimeofday(&tv2, NULL);
229
ps->time[3] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
230
#endif /* DICT_HW_MEASURE_TIMINGS */
130
238
int dictCompute(DICTContext ps) {
131
return fftGetCurrentPoints(ps);
241
#ifdef DICT_HW_MEASURE_TIMINGS
242
struct timeval tv1, tv2;
243
gettimeofday(&tv1, NULL);
244
#endif /* DICT_HW_MEASURE_TIMINGS */
246
err = fftGetCurrentPoints(ps);
248
#ifdef DICT_HW_MEASURE_TIMINGS
249
gettimeofday(&tv2, NULL);
250
ps->time[12] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
251
#endif /* DICT_HW_MEASURE_TIMINGS */
134
256
int dictGetCurrentPoints(DICTContext ps, float *res_x, float *res_y) {
259
#ifdef DICT_HW_MEASURE_TIMINGS
260
struct timeval tv1, tv2;
261
gettimeofday(&tv1, NULL);
262
#endif /* DICT_HW_MEASURE_TIMINGS */
137
264
err = fftGetCurrentPoints(ps);
138
265
if (err) return err;
153
280
memcpy(res_y, data_y, ps->ncp * sizeof(float));
283
#ifdef DICT_HW_MEASURE_TIMINGS
284
gettimeofday(&tv2, NULL);
285
ps->time[16] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
286
#endif /* DICT_HW_MEASURE_TIMINGS */
160
292
int dictLoadTemplateFragment(DICTContext ps, int icp, int ncp, const unsigned char *img) {
161
return fftLoadBaseFragment(ps, icp, min2(CP_BLOCK, ps->ncp - icp), img);
295
#ifdef DICT_HW_MEASURE_TIMINGS
296
struct timeval tv1, tv2;
297
gettimeofday(&tv1, NULL);
298
#endif /* DICT_HW_MEASURE_TIMINGS */
300
err = fftLoadBaseFragment(ps, icp, min2(CP_BLOCK, ps->ncp - icp), img);
302
#ifdef DICT_HW_MEASURE_TIMINGS
303
gettimeofday(&tv2, NULL);
304
ps->time[4] += (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
305
#endif /* DICT_HW_MEASURE_TIMINGS */
164
310
int dictLoadTemplateImage(DICTContext ps, const unsigned char *img, int width, int height) {
313
#ifdef DICT_HW_MEASURE_TIMINGS
314
struct timeval tv1, tv2;
315
gettimeofday(&tv1, NULL);
316
#endif /* DICT_HW_MEASURE_TIMINGS */
167
318
ps->width = width;
168
319
ps->height = height;
226
382
int dictLoadFragment(DICTContext ps, int icp, int ncp, const unsigned char *input) {
228
384
cudaStream_t stream = NULL;
386
#ifdef DICT_HW_MEASURE_TIMINGS
388
struct timeval tv1, tv2;
389
gettimeofday(&tv1, NULL);
390
#endif /* DICT_HW_MEASURE_TIMINGS */
230
392
err = fftCopyFragment(ps, icp, ncp, input);
231
393
if (err) return err;
395
#ifdef DICT_HW_MEASURE_TIMINGS
396
gettimeofday(&tv2, NULL);
397
time[1] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
398
#endif /* DICT_HW_MEASURE_TIMINGS */
233
400
err = fftLoadFragment(ps, icp, ncp, input, stream);
234
401
if (err) return err;
403
#ifdef DICT_HW_MEASURE_TIMINGS
404
gettimeofday(&tv2, NULL);
405
time[2] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec) - time[1];
406
#endif /* DICT_HW_MEASURE_TIMINGS */
236
408
err = fftPreprocessFragment(ps, icp, min2(CP_BLOCK, ps->ncp - icp), stream);
237
409
if (err) return err;
411
#ifdef DICT_HW_MEASURE_TIMINGS
412
gettimeofday(&tv2, NULL);
413
time[3] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec) - time[2];
414
#endif /* DICT_HW_MEASURE_TIMINGS */
239
416
err = fftProcessFragment(ps, icp, min2(CP_BLOCK, ps->ncp - icp), stream);
240
417
if (err) return err;
419
#ifdef DICT_HW_MEASURE_TIMINGS
420
gettimeofday(&tv2, NULL);
421
time[4] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec) - time[3];
422
#endif /* DICT_HW_MEASURE_TIMINGS */
242
424
err = fftPostprocessFragment(ps, icp, min2(CP_BLOCK, ps->ncp - icp), stream);
243
425
if (err) return err;
427
#ifdef DICT_HW_MEASURE_TIMINGS
428
gettimeofday(&tv2, NULL);
429
time[5] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec) - time[4];
430
time[0] = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec);
432
for (int i = 0; i < 6; i++) {
433
ps->time[i+5] += time[i];
435
#endif /* DICT_HW_MEASURE_TIMINGS */