/normxcorr/trunk : revision 14

To get this branch, use:

bzr branch
http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.cu

Committer: Suren A. Chilingaryan
Date: 2009-12-10 05:17:54 UTC
Revision ID: csa@dside.dyndns.org-20091210051754-1y0pgbfodgwr3okc

memcpy3D

files modified:
cuda/normxcorr_hw.cu

Show diffs side-by-side

added added

removed removed

cuda/normxcorr_hw.cu

297

}

298

299

cudaMemcpy2D(

300

img,

300

img + i * alloc_size,

301

size * sizeof(uint8_t),

302

fullimg + (xstart * height + ystart),

303

height * sizeof(uint8_t),

308

309

cudaMemcpy2D(

310

cuda_input_buffer + i * side_alloc2, side_alloc * sizeof(uint8_t),

311

img, size * sizeof(uint8_t),

311

img + i * alloc_size, size * sizeof(uint8_t),

312

size * sizeof(uint8_t), size, cudaMemcpyHostToDevice

313

);

314

426

}

427

428

cudaMemcpy2D(

429

img,

429

img + i * size2,//alloc_size,

430

size * sizeof(uint8_t),

431

fullimg + (xstart * height + ystart),

432

height * sizeof(uint8_t),

434

size,

435

cudaMemcpyHostToHost

436

);

437

438

cudaMemcpy2D(

439

cuda_input_buffer + i * side_alloc2, side_alloc * sizeof(uint8_t),

440

img, size * sizeof(uint8_t),

440

img + i * size2, size * sizeof(uint8_t),

441

size * sizeof(uint8_t), size, cudaMemcpyHostToDevice

442

);

443

444

}

444

445

446

cudaMemcpy3DParms copy_params = { 0 };

447

copy_params.dstPtr = make_cudaPitchedPtr(

448

cuda_input_buffer, side_alloc * sizeof(uint8_t), side_alloc, side_alloc

449

);

450

copy_params.srcPtr = make_cudaPitchedPtr(

451

img, size * sizeof(uint8_t), size, size

452

);

453

copy_params.extent = make_cudaExtent(size * sizeof(uint8_t), size, ncp);

454

copy_params.kind = cudaMemcpyHostToDevice;

455

cudaMemcpy3D(&copy_params);

456

457

445

458

dim3 block_2d(BLOCK_SIZE_2D, BLOCK_SIZE_2D, 1);

446

459

dim3 block_side_cp(SIDE_BLOCK_SIZE, CP_BLOCK_SIZE, 1);

447

460

448

//int input_blocks = calc_blocks(size2, BLOCK_SIZE_2D);

449

450

451

461

int cp_blocks = calc_blocks(ncp, CP_BLOCK_SIZE);

452

462

int cp_blocks1 = calc_blocks(ncp, BLOCK_SIZE_1D);

453

463

int side_blocks = calc_blocks(size, SIDE_BLOCK_SIZE);

454

464

int fft_blocks = calc_blocks(fft_size, SIDE_BLOCK_SIZE);

455

465

int input_blocks = side_blocks * side_blocks * SIDE_BLOCK_SIZE;

456

466

457

458

467

// Computing sum and std

459

468

int32_t *stat_buf = (int*)ps->cuda_temp_buffer;

460

469

Older »