/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.h

  • Committer: Suren A. Chilingaryan
  • Date: 2009-12-10 03:16:21 UTC
  • Revision ID: csa@dside.dyndns.org-20091210031621-2a15m2tdumdz3s39
Block computational kernels

Show diffs side-by-side

added added

removed removed

Lines of Context:
23
23
 
24
24
#include <cudpp.h>
25
25
 
26
 
#define BLOCK_SIZE_1D 64
27
 
#define BLOCK_SIZE_2D 16
 
26
#define BLOCK_SIZE_1D           64
 
27
#define BLOCK_SIZE_2D           16
28
28
 
29
29
#define CP_BLOCK 256            // should be divisable by CP_BLOCK_SIZE, BLOCK_SIZE_xD
30
30
#define CP_BLOCK_SIZE BLOCK_SIZE_2D
77
77
 
78
78
    void *cuda_temp_buffer;             // Main computational buffer, temporary
79
79
 
80
 
    cufftComplex *cuda_fft_buffer;      // Stored FFT's of the template image
81
 
    
82
 
    cufftReal *cuda_result_buffer;      // Temporary buffer for FFT outputs
83
 
    float *cuda_final_buffer;           // Ultimate output
 
80
    cufftComplex *cuda_fft_cache;       // Stored FFT's of the template image
84
81
    
85
82
    float *cuda_lsum_temp;              // Temporary buffer for local sum comp.
86
83