1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#define DOUBLE(x) ((double)(x))
static __global__ void vecMul(cuComplex *a, cuComplex *b, int size) {
float tmp;
int i = threadIdx.x + blockIdx.x*size;
tmp = a[i].x * b[i].x - a[i].y * b[i].y;
a[i].y = a[i].x * b[i].y + a[i].y * b[i].x;
a[i].x = tmp;
}
static __global__ void vecPack(cufftReal *a, int asize, uint8_t *b, int bsize) {
int i = threadIdx.x + blockIdx.x*bsize;
a[threadIdx.x + asize*blockIdx.x] = b[i];
}
static __global__ void vecPackBase(
uint8_t *b, int bsize,
cufftReal *a, int asize,
float *c, float *c2, int csize, int coffset) {
float v;
int i = threadIdx.x + (blockIdx.x+coffset)*csize + coffset;
v = b[threadIdx.x + blockIdx.x*bsize];
a[threadIdx.x + asize*blockIdx.x] = v;
c[i] = v;
c2[i] = v * v;
}
/*
static __global__ void vecCompute(
uint16_t *items, float *res,
cufftReal *corr, float corr_scale,
float *lsum, float lsum_scale,
float *denom, float denom_scale
) {
int pos = items[threadIdx.x + blockIdx.x*BLOCK_SIZE_1D];//correct (row/column)?
res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);
}
*/
static __global__ void vecCompute(
float *res,
cufftReal *corr, float corr_scale,
float *lsum, float lsum_scale,
float *denom, float denom_scale,
int size
) {
int pos = threadIdx.x + blockIdx.x*size;
if (denom[pos]) {
res[pos] = (corr[pos] * corr_scale - lsum[pos]*lsum_scale) / (denom[pos] * denom_scale);
}
}
|