/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.h

  • Committer: Suren A. Chilingaryan
  • Date: 2009-12-06 12:27:00 UTC
  • Revision ID: csa@dside.dyndns.org-20091206122700-88vy44g7b0bn1fi3
FindPeak optimization

Show diffs side-by-side

added added

removed removed

Lines of Context:
30
30
 
31
31
 
32
32
//#define VALIDATE_LSUM
33
 
 
 
33
//#define VALIDATE_PEAK
34
34
 
35
35
typedef enum {
36
36
    ACTION_SETUP = 1,
39
39
    ACTION_GET_POINTS = 4,
40
40
    ACTION_COMPUTE_BASE = 10,
41
41
    ACTION_COMPUTE_FRAGMENT = 11,
42
 
    ACTION_COMPUTE = 12
 
42
    ACTION_COMPUTE = 12,
43
43
} TAction;
44
44
 
45
45
typedef enum {
52
52
struct STProcessingState {
53
53
    cufftComplex *cuda_base_buffer;     // Stored FFT's of the template image
54
54
    void *cuda_data_buffer;             // Main computational buffer, temporary
55
 
    cufftReal *cuda_temp_buffer;        // Temporary buffer for FFT inputs
 
55
    cufftReal *cuda_temp_buffer;        // Temporary buffer for FFT inputs, pre-zeroed
56
56
    cufftReal *cuda_result_buffer;      // Temporary buffer for FFT outputs
57
57
    float *cuda_final_buffer;           // Ultimate output
58
58
    uint8_t *cuda_input_buffer;         // Input buffer
73
73
    uint8_t *input_buffer;
74
74
 
75
75
    int ncp;                    // Number of control points
 
76
    int ncp_alloc_size;
 
77
 
76
78
    int corr_size;              // CORR_SIZE 
77
79
    
78
80
    int side_alloc_size;        // allocation size for 1 side of fft
79
81
 
80
82
    int fft_size;               // Matrix Size for FFT (base_size + input_size - 1)
81
 
    int fft_size2;              // size * size
82
83
    int fft_alloc_size;         // cuda optimized size2
83
 
    int fft_inner_size;         // size * (size/2 + 1), R2C/C2R
84
84
    
85
85
    int subimage_size;          // Size of neighborhood (4*corr_size + 1)
86
86
    int lsum_size;              // Dimmensions of local sums (2*corr_size + 1)