4 #include <nrnpthread.h>
41 #define CACHELINE_ALLOC(name, type, size) \
42 name = (type*) nrn_cacheline_alloc((void**) &name, size * sizeof(type))
43 #define CACHELINE_CALLOC(name, type, size) \
44 name = (type*) nrn_cacheline_calloc((void**) &name, size, sizeof(type))
65 #define BENCHMARKING 0
69 #define BENCHDECLARE unsigned long t1;
70 #define BENCHBEGIN(arg) \
71 if (t_[arg] < t1_[arg] + BSIZE) { \
75 #define BENCHADD(arg) BENCHBEGIN(arg)
76 #define WAIT wait_for_workers_timeit
80 static unsigned long bcnt_, bcnt1_;
81 static unsigned long t1_[
BS][BSIZE], *t_[
BS];
84 #define BENCHBEGIN(arg)
86 #define WAIT wait_for_workers
101 #define use_malloc_hook 0
105 static int nrn_malloc_protected_;
106 static void my_init_hook();
107 static void* (*old_malloc_hook)(size_t,
const void*);
108 static void* (*old_memalign_hook)(size_t, size_t,
const void*);
109 static void* (*old_realloc_hook)(
void*, size_t,
const void*);
110 static void (*old_free_hook)(
void*,
const void*);
111 static void* my_malloc_hook(
size_t,
const void*);
112 static void* my_memalign_hook(
size_t,
size_t,
const void*);
113 static void* my_realloc_hook(
void*,
size_t,
const void*);
114 static void my_free_hook(
void*,
const void*);
115 void (*__malloc_initialize_hook)(
void) = my_init_hook;
117 static void* my_malloc_hook(
size_t size,
const void* caller) {
122 __malloc_hook = old_malloc_hook;
123 __memalign_hook = old_memalign_hook;
124 __realloc_hook = old_realloc_hook;
125 __free_hook = old_free_hook;
127 old_malloc_hook = __malloc_hook;
128 old_memalign_hook = __memalign_hook;
129 old_realloc_hook = __realloc_hook;
130 old_free_hook = __free_hook;
131 __malloc_hook = my_malloc_hook;
132 __memalign_hook = my_memalign_hook;
133 __realloc_hook = my_realloc_hook;
134 __free_hook = my_free_hook;
137 static void* my_memalign_hook(
size_t alignment,
size_t size,
const void* caller) {
142 __malloc_hook = old_malloc_hook;
143 __memalign_hook = old_memalign_hook;
144 __realloc_hook = old_realloc_hook;
145 __free_hook = old_free_hook;
146 result = memalign(alignment, size);
147 old_malloc_hook = __malloc_hook;
148 old_memalign_hook = __memalign_hook;
149 old_realloc_hook = __realloc_hook;
150 old_free_hook = __free_hook;
151 __malloc_hook = my_malloc_hook;
152 __memalign_hook = my_memalign_hook;
153 __realloc_hook = my_realloc_hook;
154 __free_hook = my_free_hook;
157 static void* my_realloc_hook(
void* ptr,
size_t size,
const void* caller) {
162 __malloc_hook = old_malloc_hook;
163 __memalign_hook = old_memalign_hook;
164 __realloc_hook = old_realloc_hook;
165 __free_hook = old_free_hook;
166 result = realloc(ptr, size);
167 old_malloc_hook = __malloc_hook;
168 old_memalign_hook = __memalign_hook;
169 old_realloc_hook = __realloc_hook;
170 old_free_hook = __free_hook;
171 __malloc_hook = my_malloc_hook;
172 __memalign_hook = my_memalign_hook;
173 __realloc_hook = my_realloc_hook;
174 __free_hook = my_free_hook;
177 static void my_free_hook(
void* ptr,
const void* caller) {
181 __malloc_hook = old_malloc_hook;
182 __memalign_hook = old_memalign_hook;
183 __realloc_hook = old_realloc_hook;
184 __free_hook = old_free_hook;
186 old_malloc_hook = __malloc_hook;
187 old_memalign_hook = __memalign_hook;
188 old_realloc_hook = __realloc_hook;
189 old_free_hook = __free_hook;
190 __malloc_hook = my_malloc_hook;
191 __memalign_hook = my_memalign_hook;
192 __realloc_hook = my_realloc_hook;
193 __free_hook = my_free_hook;
195 static void my_init_hook() {
196 static int installed = 0;
201 old_malloc_hook = __malloc_hook;
202 __malloc_hook = my_malloc_hook;
203 old_memalign_hook = __memalign_hook;
204 __memalign_hook = my_memalign_hook;
205 old_realloc_hook = __realloc_hook;
206 __realloc_hook = my_realloc_hook;
207 old_free_hook = __free_hook;
208 __free_hook = my_free_hook;
212 static int interpreter_locked;
213 static pthread_mutex_t interpreter_lock_;
214 static pthread_mutex_t* _interpreter_lock;
216 static pthread_mutex_t nmodlmutex_;
217 pthread_mutex_t* _nmodlmutex;
219 static pthread_mutex_t nrn_malloc_mutex_;
220 static pthread_mutex_t* _nrn_malloc_mutex;
223 if (_nrn_malloc_mutex) {
224 pthread_mutex_lock(_nrn_malloc_mutex);
226 nrn_malloc_protected_ = 1;
232 if (_nrn_malloc_mutex) {
234 nrn_malloc_protected_ = 0;
236 pthread_mutex_unlock(_nrn_malloc_mutex);
246 typedef volatile struct {
253 static pthread_cond_t*
cond;
254 static pthread_mutex_t* mut;
255 static pthread_t* slave_threads;
256 static slave_conf_t* wc;
258 static void wait_for_workers() {
263 while (wc[
i].flag != 0) {
267 pthread_mutex_lock(mut +
i);
268 while (wc[
i].flag != 0) {
269 pthread_cond_wait(
cond +
i, mut +
i);
271 pthread_mutex_unlock(mut +
i);
274 pthread_join(slave_threads[
i],
nullptr);
279 static void wait_for_workers_timeit() {
286 static
void send_job_to_slave(
int i,
void* (*job)(
NrnThread*) ) {
288 pthread_mutex_lock(mut +
i);
291 pthread_cond_signal(
cond +
i);
292 pthread_mutex_unlock(mut +
i);
294 pthread_create(slave_threads +
i,
nullptr, (
void* (*) (
void*) ) job, (
void*) (
nrn_threads +
i));
298 void setaffinity(
int i) {
306 sched_setaffinity(0, 4, &mask);
310 static void* slave_main(
void*
arg) {
311 slave_conf_t* my_wc = (slave_conf_t*)
arg;
312 pthread_mutex_t* my_mut = mut + my_wc->thread_id;
313 pthread_cond_t* my_cond =
cond + my_wc->thread_id;
316 unsigned long* t_[
BS];
318 a1 = my_wc->thread_id;
323 setaffinity(my_wc->thread_id);
327 while (my_wc->flag == 0) {
330 if (my_wc->flag == 1) {
338 pthread_cond_signal(my_cond);
340 pthread_mutex_lock(my_mut);
341 while (my_wc->flag == 0) {
342 pthread_cond_wait(my_cond, my_mut);
344 pthread_mutex_unlock(my_mut);
345 pthread_mutex_lock(my_mut);
346 if (my_wc->flag == 1) {
347 pthread_mutex_unlock(my_mut);
352 pthread_mutex_unlock(my_mut);
355 pthread_mutex_lock(my_mut);
357 pthread_cond_signal(my_cond);
358 pthread_mutex_unlock(my_mut);
368 printf(
"This MPI is not threadsafe so pthreads are disabled.\n");
385 pthread_cond_init(
cond +
i,
nullptr);
386 pthread_mutex_init(mut +
i,
nullptr);
387 pthread_create(slave_threads +
i,
nullptr, slave_main, (
void*) (wc +
i));
392 if (!_interpreter_lock) {
393 interpreter_locked = 0;
394 _interpreter_lock = &interpreter_lock_;
395 pthread_mutex_init(_interpreter_lock,
nullptr);
398 _nmodlmutex = &nmodlmutex_;
399 pthread_mutex_init(_nmodlmutex,
nullptr);
401 if (!_nrn_malloc_mutex) {
402 _nrn_malloc_mutex = &nrn_malloc_mutex_;
403 pthread_mutex_init(_nrn_malloc_mutex,
nullptr);
417 pthread_mutex_lock(mut +
i);
419 pthread_cond_signal(
cond +
i);
420 pthread_mutex_unlock(mut +
i);
421 pthread_join(slave_threads[
i],
nullptr);
422 pthread_cond_destroy(
cond +
i);
423 pthread_mutex_destroy(mut +
i);
425 free((
char*) slave_threads);
429 slave_threads = (pthread_t*) 0;
430 cond = (pthread_cond_t*) 0;
431 mut = (pthread_mutex_t*) 0;
432 wc = (slave_conf_t*) 0;
434 free((
char*) slave_threads);
435 slave_threads = (pthread_t*) 0;
438 if (_interpreter_lock) {
439 pthread_mutex_destroy(_interpreter_lock);
440 _interpreter_lock = (pthread_mutex_t*) 0;
441 interpreter_locked = 0;
444 pthread_mutex_destroy(_nmodlmutex);
445 _nmodlmutex = (pthread_mutex_t*) 0;
447 if (_nrn_malloc_mutex) {
448 pthread_mutex_destroy(_nrn_malloc_mutex);
449 _nrn_malloc_mutex = (pthread_mutex_t*) 0;
481 n = (t_[0] - t1_[0]);
488 for (
i = 0;
i <
BS; ++
i) {
493 for (
j = 0;
j <
BS; ++
j) {
495 for (
i = 0;
i <
n; ++
i) {
526 for (
i = 0;
i <
BS; ++
i) {
530 for (
i = 0;
i <
n; ++
i) {
646 for (tml = nt->
tml; tml; tml = tml2) {
650 free((
char*) ml->nodeindices);
652 free((
char*) ml->
prop);
654 free((
char*) ml->
data);
655 free((
char*) ml->
pdata);
672 for (tbl = nt->
tbl[
i]; tbl; tbl = tbl2) {
744 printf(
"thread_memblist_setup %lx v_node_count=%d ncell=%d end=%d\n", (
long)nth, v_node_count, nth->ncell, nth->end);
751 for (
i = 0;
i < _nt->
end; ++
i) {
753 for (
p = nd->
prop;
p;
p =
p->next) {
775 if (
i == EXTRACELL) {
798 for (tml = _nt->
tml; tml; tml = tml->
next) {
803 for (
i = 0;
i < _nt->
end; ++
i) {
805 for (
p = nd->
prop;
p;
p =
p->next) {
827 for (
i = 0;
i < _nt->
end; ++
i) {
840 for (
i = 0;
i < _nt->
end; ++
i) {
853 for (tml = _nt->
tml; tml; tml = tml->
next) {
869 if (!bamap[bam->
type]) {
870 bamap[bam->
type] = bam;
875 for (tml = _nt->
tml; tml; tml = tml->
next) {
876 if (bamap[tml->
index]) {
877 int mtype = tml->
index;
879 for (bam = bamap[mtype]; bam && bam->
type == mtype; bam = bam->
next) {
892 for (tml = _nt->
tml; tml; tml = tml->
next)
896 pnt->
_vnt = (
void*) _nt;
914 (*nrn_mk_transfer_thread_data_)();
947 nd =
sec->parentnode;
955 sec->prop->dparam[9]._pvoid = (
void*) _nt;
956 for (
j = 0;
j <
sec->nnode; ++
j) {
995 nd =
sec->parentnode;
1006 sec->prop->dparam[9]._pvoid = (
void*) _nt;
1007 for (
j = 0;
j <
sec->nnode; ++
j) {
1019 for (ch =
sec->child; ch; ch = ch->
sibling) {
1046 (*nrn_multisplit_setup_)();
1050 for (
j = 0;
j < _nt->
end; ++
j) {
1080 for (tml = nt->
tml; tml; tml = tml->
next) {
1095 for (tml = nt->
tml; tml; tml = tml->
next) {
1122 pthread_mutex_lock(_interpreter_lock);
1123 interpreter_locked = 1;
1129 if (interpreter_locked) {
1130 interpreter_locked = 0;
1131 pthread_mutex_unlock(_interpreter_lock);
1143 send_job_to_slave(
i, job);
1172 send_job_to_slave(
i, job);
1228 hoc_execerror(
"some threads have a user defined partition",
"and some do not");
1253 sec->volatile_mark = 0;
1265 if (
sec->parentsec) {
1266 sprintf(
buf,
"in thread partition %d is not a root section", it);
1269 if (
sec->volatile_mark) {
1270 sprintf(
buf,
"appeared again in partition %d", it);
1273 sec->volatile_mark = 1;
1278 "The total number of cells, %d, is different than the number of user partition "
1318 static void* waste(
void*
v) {
1322 for (
i = 0;
i <
n; ++
i) {
1331 static double trial(
int ip) {
1335 th = (pthread_t*)
ecalloc(ip,
sizeof(pthread_t));
1337 for (
i = 0;
i < ip; ++
i) {
1338 pthread_create(th +
i,
nullptr, waste, (
void*) 100000000);
1340 for (
i = 0;
i < ip; ++
i) {
1341 pthread_join(th[
i],
nullptr);
1353 printf(
"nthread walltime (count to 1e8 on each thread)\n");
1355 printf(
"%4d\t %g\n", 1, t1);
1356 for (ip = 2; ip <= _nt_; ip *= 2) {
1358 printf(
"%4d\t %g\n", ip, t2);
1359 if (t2 > 1.3 * t1) {
const char * secname(Section *sec)
void(* nrn_multisplit_setup_)()
static double order(void *v)
sprintf(buf, " if (secondorder) {\n" " int _i;\n" " for (_i = 0; _i < %d; ++_i) {\n" " _p[_slist%d[_i]] += dt*_p[_dlist%d[_i]];\n" " }}\n", numeqn, listnum, listnum)
void hoc_execerror(const char *, const char *)
void hoc_obj_ref(Object *obj)
void hoc_obj_unref(Object *obj)
void * ecalloc(size_t n, size_t size)
void hoc_l_freelist(hoc_List **)
#define BEFORE_AFTER_SIZE
#define ITERATE(itm, lst)
char * emalloc(unsigned n)
static int allow_busywait_
void nrn_thread_error(const char *)
void nrn_onethread_job(int i, void *(*job)(NrnThread *))
static int table_check_cnt_
void nrn_threads_create(int n, int parallel)
static int fast_imem_nthread_
static void fast_imem_free()
int nrn_allow_busywait(int b)
static void thread_memblist_setup(NrnThread *_nt, int *mlcnt, void **vmap)
#define CACHELINE_ALLOC(name, type, size)
void nrn_multithread_job(void *(*job)(NrnThread *))
void nrn_use_busywait(int b)
static int busywait_main_
static void threads_create_pthread()
static int * fast_imem_size_
int nrn_how_many_processors()
static void nrn_thread_memblist_setup()
static void reorder_secorder()
static Datum * table_check_
void nrn_thread_partition(int it, Object *sl)
#define CACHELINE_CALLOC(name, type, size)
void nrn_mk_table_check()
void nrn_thread_table_check()
static void * nulljob(NrnThread *nt)
void nrn_fast_imem_alloc()
static _nrn_Fast_Imem * fast_imem_
void nrn_old_thread_save()
static void fast_imem_alloc()
void nrn_wait_for_threads()
static void threads_free_pthread()
static int nrn_thread_parallel_
void(* nrn_mk_transfer_thread_data_)()
void nrn_multisplit_ptr_update()
int const size_t const size_t n
static double cond(void *v)
void(* thread_table_check_)(double *, Datum *, Datum *, NrnThread *, int)
void(* thread_mem_init_)(Datum *)
void(* thread_cleanup_)(Datum *)
struct Node * _classical_parent
struct NrnThreadBAList * next
Represent main neuron object computed by single thread.
_nrn_Fast_Imem * _nrn_fast_imem
NrnThreadBAList * tbl[BEFORE_AFTER_SIZE]
Memb_list * _ecell_memb_list
struct NrnThreadMembList * next