17
17
a[threadIdx.x + asize*blockIdx.x] = b[i];
20
static __global__ void vecPackBase(
21
uint8_t *b, int bsize,
22
cufftReal *a, int asize,
23
float *c, float *c2, int csize, int coffset) {
26
int i = threadIdx.x + (blockIdx.x+coffset)*csize + coffset;
28
v = b[threadIdx.x + blockIdx.x*bsize];
30
a[threadIdx.x + asize*blockIdx.x] = v;
21
38
static __global__ void vecCompute(
22
39
uint16_t *items, float *res,
23
40
cufftReal *corr, float corr_scale,
24
41
float *lsum, float lsum_scale,
25
42
float *denom, float denom_scale
27
int pos = items[threadIdx.x + blockIdx.x*BLOCK_SIZE];//correct (row/column)?
44
int pos = items[threadIdx.x + blockIdx.x*BLOCK_SIZE_1D];//correct (row/column)?
28
45
res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);
49
static __global__ void vecCompute(
51
cufftReal *corr, float corr_scale,
52
float *lsum, float lsum_scale,
53
float *denom, float denom_scale,
56
int pos = threadIdx.x + blockIdx.x*size;
59
res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);