/normxcorr/trunk

To get this branch, use:
bzr branch http://suren.me/webbzr/normxcorr/trunk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#if defined(_WIN32) || defined(_WIN64)
# include <windows.h>
    typedef UINT8 uint8_t;
    typedef UINT16 uint16_t;
    typedef UINT32 uint32_t;
    typedef INT8 int8_t;
    typedef INT16 int16_t;
    typedef INT32 int32_t;
#else
# include <stdint.h>
#endif

#if MSVCPP
# define ALGNW __declspec(align(16))
# define ALGNL
#else
# define ALGNW
# define ALGNL __attribute__((aligned(16)))
#endif


#define max4(a,b,c,d) max2(max2(a,b),max2(c,d))
#define max3(a,b,c) max2(max2(a,b),c)
#define max2(a,b) (((a)>(b))?(a):(b))
#define min2(a,b) (((a)<(b))?(a):(b))

#define calc_alloc(size,rounding) ((((size)/(rounding)) + (((size)%(rounding))?1:0))*(rounding))
#define calc_blocks(size,rounding) (((size)/(rounding)) + (((size)%(rounding))?1:0))

static const char debruijn[32] = {
    0,  1, 28,  2, 29, 14, 24,  3, 30, 22, 20, 15, 25, 17,  4,  8,
    31, 27, 13, 23, 21, 19, 16,  7, 26, 12, 18,  6, 11,  5, 10, 9
};

static inline int next_power(int n) {
    n--;
    n |= n >> 1;   // Divide by 2^k for consecutive doublings of k up to 32,
    n |= n >> 2;   // and then or the results.
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    n++;           // The result is a number of 1 bits equal to the number
	           // of bits in the original number, plus 1. That's the
    	           // next highest power of 2.
    return n;
}

static inline int get_power(int n) {
    if (n&(n-1)) return -1;
    else return debruijn[((uint32_t)n * 0x077CB531) >> 27];
}

#ifdef DICT_HW_MEASURE_TIMINGS
#include <string.h>
#include "normxcorr_hw_msg.h"
static inline void print_timing(const char *msg, int time, int images = 0) {
    int len;
    int img_time;
    char units[4];
    char tmp[128];
        
    if (images > 0) img_time = time / images;
    
    if (time > 10000000) {
	time /= 1000000;
	strcpy(units, "s ");
    } else if (time > 10000) {
	time /= 1000;
	strcpy(units, "ms");
    } else {
	strcpy(units, "us");
    }

    strncpy(tmp, msg, 32);
    tmp[32] = 0;
    len = strlen(tmp);
    if (len < 32) {
	sprintf(tmp + len, "%*s", max2(0, 32 - len), "");
    }

    sprintf(tmp + 32, ": %4i %s", time, units);		//< 16
    
    if (images > 0) {
        time = img_time;
	if (time > 10000000) {
    	    time /= 1000000;
	    strcpy(units, "s ");
	} else if (time > 10000) {
	    time /= 1000;
	    strcpy(units, "ms");
	} else {
	    strcpy(units, "us");
	}
	len = 32 + strlen(tmp + 32);
	sprintf(tmp + len, ", %5i images, %4i %s per image", images, time, units);	// < 48
    }

    reportMessage(tmp);
}
#endif /* DICT_HW_MEASURE_TIMINGS */