/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk

« back to all changes in this revision

Viewing changes to cuda/normxcorr_hw.h

  • Committer: Suren A. Chilingaryan
  • Date: 2009-12-06 01:52:56 UTC
  • Revision ID: csa@dside.dyndns.org-20091206015256-evn0sne8d18ovm8o
A little more computations are moved to CUDA

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
#define BLOCK_SIZE_1D 64
25
25
#define BLOCK_SIZE_2D 16
26
26
 
 
27
#define CP_BLOCK 256            // should be divisable by CP_BLOCK_SIZE, BLOCK_SIZE_xD
 
28
#define CP_BLOCK_SIZE BLOCK_SIZE_2D
 
29
#define SIDE_BLOCK_SIZE BLOCK_SIZE_2D
 
30
 
 
31
 
27
32
//#define VALIDATE_LSUM
28
33
 
29
34
 
30
35
typedef enum {
31
36
    ACTION_SETUP = 1,
32
37
    ACTION_PREPARE = 2,
 
38
    ACTION_SET_POINTS = 3,
 
39
    ACTION_GET_POINTS = 4,
33
40
    ACTION_COMPUTE_BASE = 10,
34
41
    ACTION_COMPUTE_FRAGMENT = 11,
 
42
    ACTION_COMPUTE = 12
35
43
} TAction;
36
44
 
37
45
typedef enum {
43
51
 
44
52
struct STProcessingState {
45
53
    cufftComplex *cuda_base_buffer;     // Stored FFT's of the template image
46
 
    cufftComplex *cuda_data_buffer;     // Main computational buffer
 
54
    void *cuda_data_buffer;             // Main computational buffer, temporary
47
55
    cufftReal *cuda_temp_buffer;        // Temporary buffer for FFT inputs
48
56
    cufftReal *cuda_result_buffer;      // Temporary buffer for FFT outputs
49
57
    float *cuda_final_buffer;           // Ultimate output
53
61
    
54
62
    float *cuda_lsum_buffer;
55
63
    float *cuda_denom_buffer;
 
64
    
 
65
    float *cuda_cp;                     // Various information on control points:
 
66
                                        // 0: data_x
 
67
                                        // 1: data_y
 
68
                                        // 2: sum
 
69
                                        // 3: denom
 
70
    
 
71
    float *data_x;                      // x coordinates of control points
 
72
    float *data_y;                      // y coordinates of control points
 
73
    uint8_t *input_buffer;
56
74
 
57
75
    int ncp;                    // Number of control points
58
76
    int corr_size;              // CORR_SIZE 
 
77
    
 
78
    int side_alloc_size;        // allocation size for 1 side of fft
59
79
 
60
80
    int fft_size;               // Matrix Size for FFT (base_size + input_size - 1)
61
81
    int fft_size2;              // size * size