/normxcorr/trunk : revision 6

To get this branch, use:

bzr branch
http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.h

Committer: Suren A. Chilingaryan
Date: 2009-12-06 01:52:56 UTC
Revision ID: csa@dside.dyndns.org-20091206015256-evn0sne8d18ovm8o

A little more computations are moved to CUDA

files modified:
automate_image.m

cuda/normxcorr_hw.cu

cuda/normxcorr_hw.h

cuda/normxcorr_hw_kernel.cu

Show diffs side-by-side

added added

removed removed

cuda/normxcorr_hw.h

#define BLOCK_SIZE_1D 64

#define BLOCK_SIZE_2D 16

#define CP_BLOCK 256 // should be divisable by CP_BLOCK_SIZE, BLOCK_SIZE_xD

#define CP_BLOCK_SIZE BLOCK_SIZE_2D

#define SIDE_BLOCK_SIZE BLOCK_SIZE_2D

//#define VALIDATE_LSUM

typedef enum {

ACTION_SETUP = 1,

ACTION_PREPARE = 2,

ACTION_SET_POINTS = 3,

ACTION_GET_POINTS = 4,

ACTION_COMPUTE_BASE = 10,

ACTION_COMPUTE_FRAGMENT = 11,

ACTION_COMPUTE = 12

} TAction;

typedef enum {

struct STProcessingState {

cufftComplex *cuda_base_buffer; // Stored FFT's of the template image

cufftComplex *cuda_data_buffer; // Main computational buffer

void *cuda_data_buffer; // Main computational buffer, temporary

cufftReal *cuda_temp_buffer; // Temporary buffer for FFT inputs

cufftReal *cuda_result_buffer; // Temporary buffer for FFT outputs

float *cuda_final_buffer; // Ultimate output

float *cuda_lsum_buffer;

float *cuda_denom_buffer;

float *cuda_cp; // Various information on control points:

// 0: data_x

// 1: data_y

// 2: sum

// 3: denom

float *data_x; // x coordinates of control points

float *data_y; // y coordinates of control points

uint8_t *input_buffer;

int ncp; // Number of control points

int corr_size; // CORR_SIZE

int side_alloc_size; // allocation size for 1 side of fft

int fft_size; // Matrix Size for FFT (base_size + input_size - 1)

int fft_size2; // size * size

Older »