/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/local_sum.cu

  • Committer: Suren A. Chilingaryan
  • Date: 2009-12-10 15:47:44 UTC
  • Revision ID: csa@dside.dyndns.org-20091210154744-min3x71y3tgrkvpu
Optimize FFT size

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
    int short_size = ps->lsum_short_aligned_size;
27
27
 
28
28
    int fft_size = ps->fft_size;
 
29
//    User real size everthere
 
30
//    int fft_size = ps->fft_real_size;
29
31
 
30
32
    cudaMemset(tmp1, 0, fft_size * ps->lsum_alloc_size * sizeof(float));
31
33
    cudaMemset(tmp2, 0, fft_size * ps->lsum_alloc_size * sizeof(float));
47
49
    cudppMultiScan(ps->cudpp_plan, tmp2, denom, temp_size, size + lsize - 1);
48
50
 
49
51
    dim3 grid_dim2(short_size / BLOCK_SIZE_2D, short_size / BLOCK_SIZE_2D, 1);
50
 
    transpose2<<<grid_dim2,block_dim,0,stream>>>(lsum, denom, tmp1, tmp2, alloc_size, ps->fft_size, lsize);
 
52
    transpose2<<<grid_dim2,block_dim,0,stream>>>(lsum, denom, tmp1, tmp2, alloc_size, fft_size, lsize);
51
53
 
52
54
    return 0;
53
55
}