4 #include <nrnpthread.h> 41 #define CACHELINE_ALLOC(name,type,size) name = (type*)nrn_cacheline_alloc((void**)&name, size*sizeof(type)) 42 #define CACHELINE_CALLOC(name,type,size) name = (type*)nrn_cacheline_calloc((void**)&name, size, sizeof(type)) 63 #define BENCHMARKING 0 67 #define BENCHDECLARE unsigned long t1; 68 #define BENCHBEGIN(arg) if (t_[arg] < t1_[arg] + BSIZE) {rdtscl(t1); *(t_[arg]++) = t1;} 69 #define BENCHADD(arg) BENCHBEGIN(arg) 70 #define WAIT wait_for_workers_timeit 74 static unsigned long bcnt_, bcnt1_;
75 static unsigned long t1_[
BS][BSIZE], *t_[
BS];
78 #define BENCHBEGIN(arg) 80 #define WAIT wait_for_workers 95 #define use_malloc_hook 0 99 static int nrn_malloc_protected_;
100 static void my_init_hook();
101 static void *(*old_malloc_hook) (size_t,
const void*);
102 static void *(*old_memalign_hook) (size_t, size_t,
const void*);
103 static void *(*old_realloc_hook) (
void*, size_t,
const void*);
104 static void (*old_free_hook) (
void*,
const void*);
105 static void *my_malloc_hook (
size_t,
const void*);
106 static void *my_memalign_hook (
size_t,
size_t,
const void*);
107 static void *my_realloc_hook (
void*,
size_t,
const void*);
108 static void my_free_hook (
void*,
const void*);
109 void (*__malloc_initialize_hook)(
void) = my_init_hook;
111 static void* my_malloc_hook(
size_t size,
const void* caller) {
116 __malloc_hook = old_malloc_hook;
117 __memalign_hook = old_memalign_hook;
118 __realloc_hook = old_realloc_hook;
119 __free_hook = old_free_hook;
120 result = malloc(size);
121 old_malloc_hook = __malloc_hook;
122 old_memalign_hook = __memalign_hook;
123 old_realloc_hook = __realloc_hook;
124 old_free_hook = __free_hook;
125 __malloc_hook = my_malloc_hook;
126 __memalign_hook = my_memalign_hook;
127 __realloc_hook = my_realloc_hook;
128 __free_hook = my_free_hook;
131 static void* my_memalign_hook(
size_t alignment,
size_t size,
const void* caller) {
136 __malloc_hook = old_malloc_hook;
137 __memalign_hook = old_memalign_hook;
138 __realloc_hook = old_realloc_hook;
139 __free_hook = old_free_hook;
140 result = memalign(alignment, size);
141 old_malloc_hook = __malloc_hook;
142 old_memalign_hook = __memalign_hook;
143 old_realloc_hook = __realloc_hook;
144 old_free_hook = __free_hook;
145 __malloc_hook = my_malloc_hook;
146 __memalign_hook = my_memalign_hook;
147 __realloc_hook = my_realloc_hook;
148 __free_hook = my_free_hook;
151 static void* my_realloc_hook(
void* ptr,
size_t size,
const void* caller) {
156 __malloc_hook = old_malloc_hook;
157 __memalign_hook = old_memalign_hook;
158 __realloc_hook = old_realloc_hook;
159 __free_hook = old_free_hook;
160 result = realloc(ptr, size);
161 old_malloc_hook = __malloc_hook;
162 old_memalign_hook = __memalign_hook;
163 old_realloc_hook = __realloc_hook;
164 old_free_hook = __free_hook;
165 __malloc_hook = my_malloc_hook;
166 __memalign_hook = my_memalign_hook;
167 __realloc_hook = my_realloc_hook;
168 __free_hook = my_free_hook;
171 static void my_free_hook(
void* ptr,
const void* caller) {
175 __malloc_hook = old_malloc_hook;
176 __memalign_hook = old_memalign_hook;
177 __realloc_hook = old_realloc_hook;
178 __free_hook = old_free_hook;
180 old_malloc_hook = __malloc_hook;
181 old_memalign_hook = __memalign_hook;
182 old_realloc_hook = __realloc_hook;
183 old_free_hook = __free_hook;
184 __malloc_hook = my_malloc_hook;
185 __memalign_hook = my_memalign_hook;
186 __realloc_hook = my_realloc_hook;
187 __free_hook = my_free_hook;
189 static void my_init_hook() {
190 static int installed = 0;
191 if (installed) {
return; }
193 old_malloc_hook = __malloc_hook;
194 __malloc_hook = my_malloc_hook;
195 old_memalign_hook = __memalign_hook;
196 __memalign_hook = my_memalign_hook;
197 old_realloc_hook = __realloc_hook;
198 __realloc_hook = my_realloc_hook;
199 old_free_hook = __free_hook;
200 __free_hook = my_free_hook;
204 static int interpreter_locked;
205 static pthread_mutex_t interpreter_lock_;
206 static pthread_mutex_t* _interpreter_lock;
208 static pthread_mutex_t nmodlmutex_;
209 pthread_mutex_t* _nmodlmutex;
211 static pthread_mutex_t nrn_malloc_mutex_;
212 static pthread_mutex_t* _nrn_malloc_mutex;
215 if (_nrn_malloc_mutex) {
216 pthread_mutex_lock(_nrn_malloc_mutex);
218 nrn_malloc_protected_ = 1;
224 if (_nrn_malloc_mutex) {
226 nrn_malloc_protected_ = 0;
228 pthread_mutex_unlock(_nrn_malloc_mutex);
238 typedef volatile struct {
245 static pthread_cond_t*
cond;
246 static pthread_mutex_t* mut;
247 static pthread_t* slave_threads;
248 static slave_conf_t* wc;
250 static void wait_for_workers() {
255 while (wc[i].flag != 0){;}
257 pthread_mutex_lock(mut + i);
258 while (wc[i].flag != 0) {
259 pthread_cond_wait(cond + i, mut + i);
261 pthread_mutex_unlock(mut + i);
264 pthread_join(slave_threads[i],
nullptr);
269 static void wait_for_workers_timeit() {
276 static
void send_job_to_slave(
int i,
void* (*job)(
NrnThread*)) {
278 pthread_mutex_lock(mut + i);
281 pthread_cond_signal(cond + i);
282 pthread_mutex_unlock(mut + i);
284 pthread_create(slave_threads + i,
nullptr, (
void*(*)(
void*))job, (
void*)(nrn_threads + i));
288 void setaffinity(
int i) {
296 sched_setaffinity(0, 4, &mask);
300 static void* slave_main(
void*
arg) {
301 slave_conf_t* my_wc = (slave_conf_t*)arg;
302 pthread_mutex_t *my_mut = mut + my_wc->thread_id;
303 pthread_cond_t *my_cond = cond + my_wc->thread_id;
306 unsigned long* t_[
BS];
308 a1 = my_wc->thread_id;
313 setaffinity(my_wc->thread_id);
317 while(my_wc->flag == 0) {;}
318 if (my_wc->flag == 1) {
320 (*my_wc->job)(nrn_threads + my_wc->thread_id);
326 pthread_cond_signal(my_cond);
328 pthread_mutex_lock(my_mut);
329 while (my_wc->flag == 0) {
330 pthread_cond_wait(my_cond, my_mut);
332 pthread_mutex_unlock(my_mut);
333 pthread_mutex_lock(my_mut);
334 if (my_wc->flag == 1) {
335 pthread_mutex_unlock(my_mut);
337 (*my_wc->job)(nrn_threads + my_wc->thread_id);
340 pthread_mutex_unlock(my_mut);
343 pthread_mutex_lock(my_mut);
345 pthread_cond_signal(my_cond);
346 pthread_mutex_unlock(my_mut);
354 if (nrn_nthread > 1 &&
nrnmpi_numprocs > 1 && nrn_cannot_use_threads_and_mpi == 1) {
355 if (
nrnmpi_myid == 0) {
printf(
"This MPI is not threadsafe so pthreads are disabled.\n"); }
361 if (nrn_nthread > 1) {
371 pthread_cond_init(cond + i,
nullptr);
372 pthread_mutex_init(mut + i,
nullptr);
373 pthread_create(slave_threads + i,
nullptr, slave_main, (
void*)(wc+i));
378 if (!_interpreter_lock) {
379 interpreter_locked = 0;
380 _interpreter_lock = &interpreter_lock_;
381 pthread_mutex_init(_interpreter_lock,
nullptr);
384 _nmodlmutex = &nmodlmutex_;
385 pthread_mutex_init(_nmodlmutex,
nullptr);
387 if (!_nrn_malloc_mutex) {
388 _nrn_malloc_mutex = &nrn_malloc_mutex_;
389 pthread_mutex_init(_nrn_malloc_mutex,
nullptr);
403 pthread_mutex_lock(mut + i);
405 pthread_cond_signal(cond + i);
406 pthread_mutex_unlock(mut + i);
407 pthread_join(slave_threads[i],
nullptr);
408 pthread_cond_destroy(cond + i);
409 pthread_mutex_destroy(mut + i);
411 free((
char*)slave_threads);
415 slave_threads = (pthread_t*)0;
416 cond = (pthread_cond_t*)0;
417 mut = (pthread_mutex_t*)0;
418 wc = (slave_conf_t*)0;
420 free((
char*)slave_threads);
421 slave_threads = (pthread_t*)0;
424 if (_interpreter_lock) {
425 pthread_mutex_destroy(_interpreter_lock);
426 _interpreter_lock = (pthread_mutex_t*)0;
427 interpreter_locked = 0;
430 pthread_mutex_destroy(_nmodlmutex);
431 _nmodlmutex = (pthread_mutex_t*)0;
433 if (_nrn_malloc_mutex) {
434 pthread_mutex_destroy(_nrn_malloc_mutex);
435 _nrn_malloc_mutex = (pthread_mutex_t*)0;
454 if (nrn_nthread != 1) {
467 n = (t_[0] - t1_[0]);
474 for (i=0; i <
BS; ++
i) {
479 for (j=0; j <
BS; ++
j) {
481 for (i=0; i <
n; ++
i) {
493 if (nrn_nthread != n) {
498 nt = nrn_threads +
i;
503 free((
char *) nrn_threads);
506 nrn_threads = (NrnThread *) 0;
511 for (i=0; i <
BS; ++
i) {
515 for (i = 0; i <
n; ++
i) {
516 nt = nrn_threads +
i;
574 if (fast_imem_size_[i] > 0) {
575 free(fast_imem_[i]._nrn_sav_rhs);
576 free(fast_imem_[i]._nrn_sav_d);
579 if (fast_imem_nthread_) {
580 free(fast_imem_size_);
582 fast_imem_nthread_ = 0;
583 fast_imem_size_ =
NULL;
590 if (fast_imem_nthread_ != nrn_nthread) {
593 fast_imem_size_ =
static_cast<int*
>(
ecalloc(nrn_nthread,
sizeof(
int)));
597 NrnThread* nt = nrn_threads +
i;
600 if (n != fast_imem_size_[i]) {
601 if (fast_imem_size_[i] > 0) {
609 fast_imem_size_[
i] =
n;
629 NrnThread* nt = nrn_threads + it;
631 for (tml = nt->
tml; tml; tml = tml2) {
635 free((
char*)ml->nodeindices);
637 free((
char*)ml->
prop);
639 free((
char*)ml->
data);
640 free((
char*)ml->
pdata);
654 for (tbl = nt->
tbl[i]; tbl; tbl = tbl2) {
705 printf(
"thread_memblist_setup %lx v_node_count=%d ncell=%d end=%d\n", (
long)nth, v_node_count, nth->ncell, nth->end);
712 for (i = 0; i < _nt->
end; ++
i) {
714 for (p = nd->
prop; p; p = p->
next) {
759 for (tml = _nt->
tml; tml; tml = tml->
next) {
764 for (i = 0; i < _nt->
end; ++
i) {
766 for (p = nd->
prop; p; p = p->
next) {
790 for (i = 0; i < _nt->
end; ++
i) {
803 for (i = 0; i < _nt->
end; ++
i) {
816 for (tml = _nt->
tml; tml; tml = tml->
next) {
829 bamap[bam->
type] = bam;
833 for (tml = _nt->
tml; tml; tml = tml->
next) {
834 if (bamap[tml->
index]) {
850 pnt->
_vnt = (
void*)_nt;
897 nd =
sec->parentnode;
902 for (isec =
order - _nt->ncell; isec <
order; ++isec) {
905 sec->prop->dparam[9]._pvoid = (
void*)_nt;
906 for (
j = 0;
j <
sec->nnode; ++
j) {
911 for (ch =
sec->child; ch; ch = ch->sibling) {
943 nd =
sec->parentnode;
951 for (isec =
order - _nt->ncell; isec <
order; ++isec) {
954 sec->prop->dparam[9]._pvoid = (
void*)_nt;
955 for (
j = 0;
j <
sec->nnode; ++
j) {
960 _nt->_v_parent[
inode] =
sec->pnode[
j - 1];
962 _nt->_v_parent[
inode] =
sec->parentnode;
967 for (ch =
sec->child; ch; ch = ch->sibling) {
986 _nt->_v_node[
inode]->_classical_parent = _nt->_v_parent[
inode];
990 (*nrn_multisplit_setup_)();
994 for (
j = 0;
j < _nt->end; ++
j) {
996 nd->
_d = _nt->_actual_d +
j;
997 nd->
_rhs = _nt->_actual_rhs +
j;
1013 free((
void*)table_check_);
1014 table_check_ = (
Datum*)0;
1022 NrnThread* nt = nrn_threads +
id;
1024 for (tml = nt->
tml; tml; tml = tml->
next) {
1026 if (
memb_func[index].thread_table_check_ && ix[index] == -1) {
1037 NrnThread* nt = nrn_threads +
id;
1039 for (tml = nt->
tml; tml; tml = tml->
next) {
1041 if (
memb_func[index].thread_table_check_ && ix[index] ==
id) {
1042 table_check_[i++].
i =
id;
1043 table_check_[i++].
_pvoid = (
void*)tml;
1053 NrnThread* nt = nrn_threads + table_check_[
i].
i;
1067 pthread_mutex_lock(_interpreter_lock);
1068 interpreter_locked = 1;
1074 if (interpreter_locked) {
1075 interpreter_locked = 0;
1076 pthread_mutex_unlock(_interpreter_lock);
1088 send_job_to_slave(i, job);
1101 (*job)(nrn_threads +
i);
1113 assert(i >= 0 && i < nrn_nthread);
1117 send_job_to_slave(i, job);
1128 (*job)(nrn_threads +
i);
1142 assert(it >= 0 && it < nrn_nthread);
1143 nt = nrn_threads + it;
1170 b = (nrn_threads[0].
userpart !=
nullptr);
1172 if ((nrn_threads[it].userpart !=
nullptr) != b) {
1173 hoc_execerror(
"some threads have a user defined partition",
"and some do not");
1176 if (!b) {
return 0; }
1199 nt = nrn_threads + it;
1206 if (
sec->parentsec) {
1207 sprintf(
buf,
"in thread partition %d is not a root section", it);
1210 if (
sec->volatile_mark) {
1211 sprintf(
buf,
"appeared again in partition %d", it);
1214 sec->volatile_mark = 1;
1255 static void* waste(
void*
v) {
1259 for (i=0; i <
n; ++
i) {
1268 static double trial(
int ip) {
1272 th = (pthread_t*)
ecalloc(ip,
sizeof(pthread_t));
1274 for (i=0; i < ip; ++
i) {
1275 pthread_create(th + i,
nullptr, waste, (
void*)100000000);
1277 for (i=0; i < ip; ++
i) {
1278 pthread_join(th[i],
nullptr);
1290 printf(
"nthread walltime (count to 1e8 on each thread)\n");
1292 printf(
"%4d\t %g\n", 1, t1);
1293 for (ip = 2; ip <= _nt_; ip *= 2) {
1295 printf(
"%4d\t %g\n", ip, t2);
void * ecalloc(size_t n, size_t size)
void nrn_multithread_job(void *(*job)(NrnThread *))
void(* nrn_multisplit_setup_)()
struct NrnThreadMembList * next
void nrn_mk_table_check()
int nrn_how_many_processors()
void nrn_thread_table_check()
#define ITERATE(itm, lst)
Represent main neuron object computed by single thread.
static double cond(void *v)
static void * nulljob(NrnThread *nt)
void(* nrn_mk_transfer_thread_data_)()
NrnThreadBAList * tbl[BEFORE_AFTER_SIZE]
ForAllSections(sec) sec -> order=-1
void nrn_thread_error(const char *)
void(* thread_cleanup_)(Datum *)
void nrn_old_thread_save()
static void nrn_thread_memblist_setup()
void hoc_l_freelist(hoc_List **)
sprintf(buf," if (secondorder) {\ " int _i;\" " for(_i=0;_i< %d;++_i) {\" " _p[_slist%d[_i]]+=dt *_p[_dlist%d[_i]];\" " }}\", numeqn, listnum, listnum)
static int * fast_imem_size_
#define BEFORE_AFTER_SIZE
_nrn_Fast_Imem * _nrn_fast_imem
static int nrn_thread_parallel_
Memb_list * _ecell_memb_list
void hoc_obj_unref(Object *obj)
static void threads_create_pthread()
void(* thread_mem_init_)(Datum *)
const char * secname(Section *sec)
static void threads_free_pthread()
void hoc_execerror(const char *, const char *)
void nrn_fast_imem_alloc()
void nrn_threads_create(int n, int parallel)
fprintf(stderr, "Don't know the location of params at %p\, pp)
static void fast_imem_alloc()
char * emalloc(unsigned n)
static Datum * table_check_
void hoc_obj_ref(Object *obj)
static int allow_busywait_
static void thread_memblist_setup(NrnThread *_nt, int *mlcnt, void **vmap)
void nrn_onethread_job(int i, void *(*job)(NrnThread *))
static int fast_imem_nthread_
int nrn_allow_busywait(int b)
assert(order==section_count)
void(* thread_table_check_)(double *, Datum *, Datum *, NrnThread *, int)
static int busywait_main_
void nrn_thread_partition(int it, Object *sl)
static _nrn_Fast_Imem * fast_imem_
#define CACHELINE_ALLOC(name, type, size)
void nrn_use_busywait(int b)
static int table_check_cnt_
static void fast_imem_free()
#define CACHELINE_CALLOC(name, type, size)
void nrn_wait_for_threads()
static void reorder_secorder()
void nrn_multisplit_ptr_update()
struct NrnThreadBAList * next