/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.h

  • Committer: Suren A. Chilingaryan
  • Date: 2009-12-09 13:00:50 UTC
  • Revision ID: csa@dside.dyndns.org-20091209130050-z27djqs8ed68fqnk
Complete elimination of cpcorr

Show diffs side-by-side

added added

removed removed

Lines of Context:
13
13
# include <stdint.h>
14
14
#endif
15
15
 
 
16
#include <mex.h>
 
17
 
16
18
#include <cuda.h>
17
19
#include <cuda_runtime.h>
18
20
 
31
33
 
32
34
//#define VALIDATE_LSUM
33
35
//#define VALIDATE_PEAK
 
36
#define USE_UNDOCUMENTED
34
37
 
35
38
typedef enum {
36
39
    ACTION_SETUP = 1,
37
 
    ACTION_PREPARE = 2,
38
 
    ACTION_SET_POINTS = 3,
39
 
    ACTION_GET_POINTS = 4,
40
 
    ACTION_COMPUTE_BASE = 10,
 
40
#ifdef VALIDATE_LSUM
 
41
    ACTION_COMPUTE_BASE_FRAGMENT = 2,
 
42
#endif /* VALIDAT_LSUM */
 
43
    ACTION_SET_BASE_POINTS = 3,
 
44
    ACTION_COMPUTE_BASE = 4,
 
45
    ACTION_PREPARE = 5,
 
46
#ifdef VALIDATE_PEAK
41
47
    ACTION_COMPUTE_FRAGMENT = 11,
42
 
    ACTION_COMPUTE = 12,
 
48
    ACTION_GET_CORRECTIONS = 15,
 
49
#endif /* VALIDATE_PEAK */
 
50
    ACTION_SET_POINTS = 12,
 
51
    ACTION_COMPUTE = 13,
 
52
    ACTION_GET_POINTS = 14,
43
53
} TAction;
44
54
 
45
55
typedef enum {
50
60
} TError;
51
61
 
52
62
struct STProcessingState {
53
 
    cufftComplex *cuda_base_buffer;     // Stored FFT's of the template image
54
 
    void *cuda_data_buffer;             // Main computational buffer, temporary
55
 
    cufftReal *cuda_temp_buffer;        // Temporary buffer for FFT inputs, pre-zeroed
 
63
    int stored;                         // flag indicating if we already have coordinates in coords stored
 
64
    mxArray *coords;                    // Matlab array with current coordinates
 
65
    float *points;                      // various information on control points
 
66
                                        //      base_x, base_y, data_x, data_y
 
67
                                        //      base_frac_x, base_frac_y
 
68
                                        //      move_x, move_y
 
69
 
 
70
    uint8_t *banlist;                   // control points banned from computations for various reasons
 
71
 
 
72
    uint8_t *input_buffer;              // Input Image buffer / Host
 
73
    uint8_t *cuda_input_buffer;         // Input Image buffer / Device
 
74
 
 
75
    cufftReal *cuda_base_buffer;        // Temporary buffer for FFT inputs, pre-zeroed
 
76
    cufftReal *cuda_data_buffer;        // Temporary buffer for FFT inputs, pre-zeroed
 
77
 
 
78
    void *cuda_temp_buffer;             // Main computational buffer, temporary
 
79
 
 
80
    cufftComplex *cuda_fft_buffer;      // Stored FFT's of the template image
 
81
    
56
82
    cufftReal *cuda_result_buffer;      // Temporary buffer for FFT outputs
57
83
    float *cuda_final_buffer;           // Ultimate output
58
 
    uint8_t *cuda_input_buffer;         // Input buffer
59
84
    
60
85
    float *cuda_lsum_temp;              // Temporary buffer for local sum comp.
61
86
    
67
92
                                        // 1: data_y
68
93
                                        // 2: sum
69
94
                                        // 3: denom
70
 
    
71
 
    float *data_x;                      // x coordinates of control points
72
 
    float *data_y;                      // y coordinates of control points
73
 
    uint8_t *input_buffer;
 
95
 
 
96
    int fft_initialized;                // Flag indicating if CUFFT plan is initialized
 
97
    cufftHandle cufft_plan;
 
98
    cufftHandle cufft_r2c_plan;
 
99
    cufftHandle cufft_c2r_plan;
 
100
    
 
101
    int cudpp_initialized;              // Flag indicating if CUDPP plan is initialized
 
102
    CUDPPHandle cudpp_plan;
 
103
    
 
104
    int mode;                   // 1 - image mode, 0 - fragment mode
 
105
    int base_mode;              // 1 - image mode, 0 - fragment mode
 
106
    
 
107
    float minx,miny,maxx,maxy;  // Coordinates of actualy used rectangle of image
74
108
 
75
109
    int ncp;                    // Number of control points
76
110
    int ncp_alloc_size;
77
111
 
78
112
    int corr_size;              // CORR_SIZE 
 
113
    int precision;              // PRECISION
79
114
    
80
115
    int side_alloc_size;        // allocation size for 1 side of fft
81
116
 
89
124
    int lsum_aligned_size;      // CUDA optimized lsum_temp_size
90
125
    int lsum_short_aligned_size;// CUDA optimized lsum_temp_size - lsum_size - 1
91
126
 
92
 
    int fft_initialized;        // Flag indicating if CUFFT plan is initialized
93
 
    cufftHandle cufft_plan;
94
 
    cufftHandle cufft_r2c_plan;
95
 
    cufftHandle cufft_c2r_plan;
96
 
    
97
 
    int cudpp_initialized;      // Flag indicating if CUDPP plan is initialized
98
 
    CUDPPHandle cudpp_plan;
99
127
};
100
128
 
101
129
typedef struct STProcessingState TProcessingState;
102
130
 
 
131
#ifndef EXTERN_C
 
132
# ifdef __cplusplus
 
133
   #define EXTERN_C extern "C"
 
134
# else
 
135
   #define EXTERN_C extern
 
136
# endif
 
137
#endif
 
138
 
 
139
#ifdef USE_UNDOCUMENTED
 
140
EXTERN_C mxArray *mxCreateSharedDataCopy(const mxArray *pr);
 
141
#endif /* USE_UNDOCUMENTED */
 
142
 
103
143
#endif /* NORMXCORR_HW_H */