/normxcorr/trunk : revision 4

To get this branch, use:

bzr branch
http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw_kernel.cu

Committer: Suren A. Chilingaryan
Date: 2009-12-02 05:08:22 UTC
Revision ID: csa@dside.dyndns.org-20091202050822-n6ouznm1zp2n2i5l

Instead of transfer compute local sums and denormals on board

files added:
cuda/INFO

cuda/local_sum.cu

cuda/local_sum.h

cuda/local_sum_kernel.cu

cuda/normxcorr_hw.h

files modified:
automate_image.m

cuda/Makefile

cuda/normxcorr_hw.cu

cuda/normxcorr_hw_kernel.cu

Show diffs side-by-side

added added

removed removed

cuda/normxcorr_hw_kernel.cu

a[threadIdx.x + asize*blockIdx.x] = b[i];

}

static __global__ void vecPackBase(

uint8_t *b, int bsize,

cufftReal *a, int asize,

float *c, float *c2, int csize, int coffset) {

float v;

int i = threadIdx.x + (blockIdx.x+coffset)*csize + coffset;

v = b[threadIdx.x + blockIdx.x*bsize];

a[threadIdx.x + asize*blockIdx.x] = v;

c[i] = v;

c2[i] = v * v;

}

static __global__ void vecCompute(

uint16_t *items, float *res,

cufftReal *corr, float corr_scale,

float *lsum, float lsum_scale,

float *denom, float denom_scale

) {

int pos = items[threadIdx.x + blockIdx.x*BLOCK_SIZE];//correct (row/column)?

int pos = items[threadIdx.x + blockIdx.x*BLOCK_SIZE_1D];//correct (row/column)?

res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);

}

static __global__ void vecCompute(

float *res,

cufftReal *corr, float corr_scale,

float *lsum, float lsum_scale,

float *denom, float denom_scale,

int size

) {

int pos = threadIdx.x + blockIdx.x*size;

if (denom[pos]) {

res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);

}

Older »