bzr branch
http://suren.me/webbzr/normxcorr/trunk
32
by Suren A. Chilingaryan
Multi-GPU support |
1 |
#include <stdio.h> |
2 |
#include <stdlib.h> |
|
3 |
#include <string.h> |
|
4 |
||
5 |
#include "normxcorr_hw_msg.h" |
|
6 |
#include "hw_sched.h" |
|
7 |
||
8 |
//#include "mrses_ppu.h"
|
|
9 |
||
10 |
||
11 |
#define MUTEX_INIT(ctx, name) \
|
|
12 |
if (!err) { \
|
|
13 |
ctx->name##_mutex = g_mutex_new(); \
|
|
14 |
if (!ctx->name##_mutex) err = DICT_ERROR_GLIB; \
|
|
15 |
}
|
|
16 |
||
17 |
#define MUTEX_FREE(ctx, name) \
|
|
18 |
if (ctx->name##_mutex) g_mutex_free(ctx->name##_mutex);
|
|
19 |
||
20 |
#define COND_INIT(ctx, name) \
|
|
21 |
MUTEX_INIT(ctx, name##_cond) \
|
|
22 |
if (!err) { \
|
|
23 |
ctx->name##_cond = g_cond_new(); \
|
|
24 |
if (!ctx->name##_cond) { \
|
|
25 |
err = DICT_ERROR_GLIB; \
|
|
26 |
MUTEX_FREE(ctx, name##_cond) \
|
|
27 |
} \
|
|
28 |
}
|
|
29 |
||
30 |
#define COND_FREE(ctx, name) \
|
|
31 |
if (ctx->name##_cond) g_cond_free(ctx->name##_cond); \
|
|
32 |
MUTEX_FREE(ctx, name##_cond)
|
|
33 |
||
34 |
HWRunFunction ppu_run[] = { |
|
35 |
(HWRunFunction)NULL |
|
36 |
};
|
|
37 |
||
38 |
static int hw_sched_initialized = 0; |
|
39 |
||
40 |
int hw_sched_init() { |
|
41 |
if (!hw_sched_initialized) { |
|
42 |
g_thread_init(NULL); |
|
43 |
hw_sched_initialized = 1; |
|
44 |
}
|
|
45 |
||
46 |
return 0; |
|
47 |
}
|
|
48 |
||
49 |
||
50 |
||
51 |
HWSched hw_sched_create(int ppu_count) { |
|
52 |
int i; |
|
53 |
int err = 0; |
|
54 |
||
55 |
HWSched ctx; |
|
56 |
||
57 |
hw_sched_init(); |
|
58 |
||
59 |
ctx = (HWSched)malloc(sizeof(HWSchedS)); |
|
60 |
if (!ctx) return NULL; |
|
61 |
||
62 |
memset(ctx, 0, sizeof(HWSchedS)); |
|
63 |
||
64 |
ctx->status = 1; |
|
65 |
||
66 |
MUTEX_INIT(ctx, data); |
|
67 |
COND_INIT(ctx, compl); |
|
68 |
COND_INIT(ctx, job); |
|
69 |
||
70 |
if (err) { |
|
71 |
reportError("Error initializing conditions and mutexes"); |
|
72 |
hw_sched_destroy(ctx); |
|
73 |
return NULL; |
|
74 |
}
|
|
75 |
||
76 |
ctx->n_threads = 0; |
|
77 |
for (i = 0; i < ppu_count; i++) { |
|
78 |
ctx->thread[ctx->n_threads] = hw_thread_create(ctx, ctx->n_threads, NULL, ppu_run, NULL); |
|
79 |
if (ctx->thread[ctx->n_threads]) ++ctx->n_threads; |
|
80 |
}
|
|
81 |
||
82 |
return ctx; |
|
83 |
}
|
|
84 |
||
85 |
static int hw_sched_wait_threads(HWSched ctx) { |
|
86 |
int i = 0; |
|
87 |
||
88 |
hw_sched_lock(ctx, compl_cond); |
|
89 |
while (i < ctx->n_threads) { |
|
90 |
for (; i < ctx->n_threads; i++) { |
|
91 |
if (ctx->thread[i]->status == HW_THREAD_STATUS_INIT) { |
|
92 |
hw_sched_wait(ctx, compl); |
|
93 |
break; |
|
94 |
}
|
|
95 |
}
|
|
96 |
||
97 |
}
|
|
98 |
hw_sched_unlock(ctx, compl_cond); |
|
99 |
||
100 |
ctx->started = 1; |
|
101 |
||
102 |
return 0; |
|
103 |
}
|
|
104 |
||
105 |
void hw_sched_destroy(HWSched ctx) { |
|
106 |
int i; |
|
107 |
||
108 |
if (ctx->n_threads > 0) { |
|
109 |
if (!ctx->started) { |
|
110 |
hw_sched_wait_threads(ctx); |
|
111 |
}
|
|
112 |
||
113 |
ctx->status = 0; |
|
114 |
hw_sched_lock(ctx, job_cond); |
|
115 |
hw_sched_broadcast(ctx, job); |
|
116 |
hw_sched_unlock(ctx, job_cond); |
|
117 |
||
118 |
for (i = 0; i < ctx->n_threads; i++) { |
|
119 |
hw_thread_destroy(ctx->thread[i]); |
|
120 |
}
|
|
121 |
}
|
|
122 |
||
123 |
COND_FREE(ctx, job); |
|
124 |
COND_FREE(ctx, compl); |
|
125 |
MUTEX_FREE(ctx, data); |
|
126 |
||
127 |
free(ctx); |
|
128 |
}
|
|
129 |
||
37
by Suren A. Chilingaryan
Fixes |
130 |
int hw_sched_limit_num_threads(HWSched ctx, int count) { |
131 |
ctx->n_threads_actual = count; |
|
132 |
return 0; |
|
133 |
}
|
|
134 |
||
135 |
||
32
by Suren A. Chilingaryan
Multi-GPU support |
136 |
int hw_sched_set_sequential_mode(HWSched ctx, int *n_blocks, int *cur_block) { |
137 |
ctx->mode = HW_SCHED_MODE_SEQUENTIAL; |
|
138 |
ctx->n_blocks = n_blocks; |
|
139 |
ctx->cur_block = cur_block; |
|
140 |
||
141 |
return 0; |
|
142 |
}
|
|
143 |
||
144 |
int hw_sched_get_chunk(HWSched ctx, int thread_id) { |
|
145 |
int block; |
|
146 |
||
37
by Suren A. Chilingaryan
Fixes |
147 |
if ((ctx->n_threads_actual)&&(thread_id >= ctx->n_threads_actual)) return -1; |
148 |
||
32
by Suren A. Chilingaryan
Multi-GPU support |
149 |
switch (ctx->mode) { |
150 |
case HW_SCHED_MODE_PREALLOCATED: |
|
151 |
if (ctx->thread[thread_id]->status == HW_THREAD_STATUS_IDLE) { |
|
152 |
return thread_id; |
|
153 |
} else { |
|
154 |
return -1; |
|
155 |
}
|
|
156 |
case HW_SCHED_MODE_SEQUENTIAL: |
|
157 |
hw_sched_lock(ctx, data); |
|
158 |
block = *ctx->cur_block; |
|
159 |
if (block < *ctx->n_blocks) { |
|
160 |
*ctx->cur_block = *ctx->cur_block + 1; |
|
161 |
} else { |
|
162 |
block = -1; |
|
163 |
}
|
|
164 |
hw_sched_unlock(ctx, data); |
|
165 |
return block; |
|
166 |
default: |
|
167 |
return -1; |
|
168 |
}
|
|
169 |
||
170 |
return -1; |
|
171 |
}
|
|
172 |
||
173 |
||
174 |
int hw_sched_schedule_task(HWSched ctx, void *appctx, HWEntry entry) { |
|
175 |
if (!ctx->started) { |
|
176 |
hw_sched_wait_threads(ctx); |
|
177 |
}
|
|
178 |
||
179 |
ctx->ctx = appctx; |
|
180 |
ctx->entry = entry; |
|
181 |
||
182 |
hw_sched_lock(ctx, compl_cond); |
|
183 |
||
184 |
hw_sched_lock(ctx, job_cond); |
|
185 |
hw_sched_broadcast(ctx, job); |
|
186 |
hw_sched_unlock(ctx, job_cond); |
|
187 |
||
188 |
return 0; |
|
189 |
}
|
|
190 |
||
191 |
int hw_sched_wait_task(HWSched ctx) { |
|
192 |
int i = 0; |
|
193 |
||
194 |
while (i < ctx->n_threads) { |
|
195 |
for (; i < ctx->n_threads; i++) { |
|
196 |
if (ctx->thread[i]->status == HW_THREAD_STATUS_DONE) { |
|
197 |
ctx->thread[i]->status = HW_THREAD_STATUS_IDLE; |
|
198 |
} else { |
|
199 |
hw_sched_wait(ctx, compl); |
|
200 |
break; |
|
201 |
}
|
|
202 |
}
|
|
203 |
||
204 |
}
|
|
205 |
||
206 |
hw_sched_unlock(ctx, compl_cond); |
|
207 |
||
208 |
return 0; |
|
209 |
}
|