/normxcorr/trunk : revision 20

To get this branch, use:

bzr branch
http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to dict_hw/src/normxcorr_hw.cu.h

Committer: Suren A. Chilingaryan
Date: 2009-12-13 02:20:05 UTC
Revision ID: csa@dside.dyndns.org-20091213022005-m932to8hhihwuw5r

Support for TIFF images in C code and stand-alone console application

files added:
dict_hw/src/dict_hw_test.cpp

dict_hw/src/dict_image.cpp

dict_hw/src/dict_image.h

dict_hw/src/normxcorr_hw_msg.cpp

files modified:
.bzrignore

dict_hw/CMakeLists.txt

dict_hw/matlab/normxcorr_hw.cu

dict_hw/src/CMakeLists.txt

dict_hw/src/dict_hw.cu

dict_hw/src/dict_hw.h

dict_hw/src/normxcorr_hw.cu.h

dict_hw/src/normxcorr_hw.h

dict_hw/src/normxcorr_hw_msg.h

Show diffs side-by-side

added added

removed removed

dict_hw/src/normxcorr_hw.cu.h

cufftDestroy(ps->cufft_c2r_plan);

}

if (ps->image_buf) {

dictImageFree(ps);

}

memset(ps, 0, sizeof(TProcessingState));

}

273

277

if (yend > maxy) maxy = yend;

274

278

}

275

279

276

cudaMemcpy2D(

277

img + i * alloc_size,

278

size * sizeof(uint8_t),

279

fullimg + (xstart * height + ystart),

280

height * sizeof(uint8_t),

281

size * sizeof(uint8_t),

282

size,

283

cudaMemcpyHostToHost

284

);

280

if (ps->matlab_mode) {

281

cudaMemcpy2D(

282

img + i * alloc_size,

283

size * sizeof(uint8_t),

284

fullimg + (xstart * height + ystart),

285

height * sizeof(uint8_t),

286

size * sizeof(uint8_t),

287

size,

288

cudaMemcpyHostToHost

289

);

290

} else {

291

cudaMemcpy2D(

292

img + i * alloc_size,

293

size * sizeof(uint8_t),

294

fullimg + (ystart * width + xstart),

295

width * sizeof(uint8_t),

296

size * sizeof(uint8_t),

297

size,

298

cudaMemcpyHostToHost

299

);

300

}

285

301

286

302

cudaMemcpy2DAsync(

287

303

cuda_input_buffer + i * side_alloc2, side_alloc * sizeof(uint8_t),

382

398

continue;

383

399

}

384

400

385

cudaMemcpy2D(

386

img + i * size2,//alloc_size,

387

size * sizeof(uint8_t),

388

fullimg + (xstart * height + ystart),

389

height * sizeof(uint8_t),

390

size * sizeof(uint8_t),

391

size,

392

cudaMemcpyHostToHost

393

);

401

if (ps->matlab_mode) {

402

cudaMemcpy2D(

403

img + i * size2,//alloc_size,

404

size * sizeof(uint8_t),

405

fullimg + (xstart * height + ystart),

406

height * sizeof(uint8_t),

407

size * sizeof(uint8_t),

408

size,

409

cudaMemcpyHostToHost

410

);

411

} else {

412

cudaMemcpy2D(

413

img + i * size2,//alloc_size,

414

size * sizeof(uint8_t),

415

fullimg + (ystart * width + xstart),

416

width * sizeof(uint8_t),

417

size * sizeof(uint8_t),

418

size,

419

cudaMemcpyHostToHost

420

);

421

}

394

422

}

395

423

return 0;

396

424

}

587

615

int ncp_alloc = ps->ncp_alloc_size;

588

616

int precision = ps->precision;

589

617

590

float *move_x = ps->points + 6 * ncp_alloc;

591

float *move_y = move_x + ncp_alloc;

592

593

cudaMemcpy2D(

594

move_x, ncp_alloc * sizeof(float),

595

ps->cuda_points, ncp_alloc * sizeof(float),

596

ps->ncp * sizeof(float), 2,

597

cudaMemcpyDeviceToHost

598

);

618

float *move_x, *move_y;

619

620

// We do not do a completely correct thing in non-matlab mode, the data

621

// is copied from image buffer non-transposed as it should be, but

622

// the processing code is supports only matlab-mode and handles it as

623

// standard transposed data. Therefore, here we turning back the

624

// X and Y coords. But this adds some extra precision penalty.

625

// Therefore, it is better to use matlab mode until the computation

626

// code is changed (this implementation is just done to accept

627

// images from user apps without transposing)

628

if (ps->matlab_mode) {

629

move_x = ps->points + 6 * ncp_alloc;

630

move_y = move_x + ncp_alloc;

631

632

cudaMemcpy2D(

633

move_x, ncp_alloc * sizeof(float),

634

ps->cuda_points, ncp_alloc * sizeof(float),

635

ps->ncp * sizeof(float), 2,

636

cudaMemcpyDeviceToHost

637

);

638

} else {

639

move_y = ps->points + 6 * ncp_alloc;

640

move_x = move_x + ncp_alloc;

641

642

cudaMemcpy2D(

643

move_y, ncp_alloc * sizeof(float),

644

ps->cuda_points, ncp_alloc * sizeof(float),

645

ps->ncp * sizeof(float), 2,

646

cudaMemcpyDeviceToHost

647

);

648

}

599

649

600

650

float *data_x, *data_y;

601

651

if (ps->stored) {

Older »