273
277
if (yend > maxy) maxy = yend;
277
img + i * alloc_size,
278
size * sizeof(uint8_t),
279
fullimg + (xstart * height + ystart),
280
height * sizeof(uint8_t),
281
size * sizeof(uint8_t),
280
if (ps->matlab_mode) {
282
img + i * alloc_size,
283
size * sizeof(uint8_t),
284
fullimg + (xstart * height + ystart),
285
height * sizeof(uint8_t),
286
size * sizeof(uint8_t),
292
img + i * alloc_size,
293
size * sizeof(uint8_t),
294
fullimg + (ystart * width + xstart),
295
width * sizeof(uint8_t),
296
size * sizeof(uint8_t),
286
302
cudaMemcpy2DAsync(
287
303
cuda_input_buffer + i * side_alloc2, side_alloc * sizeof(uint8_t),
386
img + i * size2,//alloc_size,
387
size * sizeof(uint8_t),
388
fullimg + (xstart * height + ystart),
389
height * sizeof(uint8_t),
390
size * sizeof(uint8_t),
401
if (ps->matlab_mode) {
403
img + i * size2,//alloc_size,
404
size * sizeof(uint8_t),
405
fullimg + (xstart * height + ystart),
406
height * sizeof(uint8_t),
407
size * sizeof(uint8_t),
413
img + i * size2,//alloc_size,
414
size * sizeof(uint8_t),
415
fullimg + (ystart * width + xstart),
416
width * sizeof(uint8_t),
417
size * sizeof(uint8_t),
587
615
int ncp_alloc = ps->ncp_alloc_size;
588
616
int precision = ps->precision;
590
float *move_x = ps->points + 6 * ncp_alloc;
591
float *move_y = move_x + ncp_alloc;
594
move_x, ncp_alloc * sizeof(float),
595
ps->cuda_points, ncp_alloc * sizeof(float),
596
ps->ncp * sizeof(float), 2,
597
cudaMemcpyDeviceToHost
618
float *move_x, *move_y;
620
// We do not do a completely correct thing in non-matlab mode, the data
621
// is copied from image buffer non-transposed as it should be, but
622
// the processing code is supports only matlab-mode and handles it as
623
// standard transposed data. Therefore, here we turning back the
624
// X and Y coords. But this adds some extra precision penalty.
625
// Therefore, it is better to use matlab mode until the computation
626
// code is changed (this implementation is just done to accept
627
// images from user apps without transposing)
628
if (ps->matlab_mode) {
629
move_x = ps->points + 6 * ncp_alloc;
630
move_y = move_x + ncp_alloc;
633
move_x, ncp_alloc * sizeof(float),
634
ps->cuda_points, ncp_alloc * sizeof(float),
635
ps->ncp * sizeof(float), 2,
636
cudaMemcpyDeviceToHost
639
move_y = ps->points + 6 * ncp_alloc;
640
move_x = move_x + ncp_alloc;
643
move_y, ncp_alloc * sizeof(float),
644
ps->cuda_points, ncp_alloc * sizeof(float),
645
ps->ncp * sizeof(float), 2,
646
cudaMemcpyDeviceToHost
600
650
float *data_x, *data_y;
601
651
if (ps->stored) {