65
static __global__ void vecPackBase(
66
uint8_t *b, int bsize,
67
cufftReal *a, int asize,
68
float *c, float *c2, int csize, int coffset) {
71
int i = threadIdx.x + (blockIdx.x+coffset)*csize + coffset;
73
v = b[threadIdx.x + blockIdx.x*bsize];
75
a[threadIdx.x + asize*blockIdx.x] = v;
65
static __global__ void vecBasePack(
66
uint8_t *b, int bsize,
67
cufftReal *a, int asize,
68
float *c, float *c2, int csize,
69
int size, int blocks_size) {
71
int y = __float2int_rz(__fdividef(blockIdx.x, blocks_size));
72
int x = (blockIdx.x - y * blocks_size) * blockDim.x + threadIdx.x ;
74
if ((x<size)&&(y<size)) {
75
float v = b[x + y*bsize];
85
static __global__ void vecBasePackFast(
86
uint8_t *b, int bsize,
87
cufftReal *a, int asize,
88
float *c, float *c2, int csize,
89
int size, int blocks_shift) {
91
int y = blockIdx.x>>blocks_shift;
92
int x = (blockIdx.x - (y<<blocks_shift)) * blockDim.x + threadIdx.x ;
94
if ((x<size)&&(y<size)) {
95
float v = b[x + y*bsize];