NEURON
netpar.cpp
Go to the documentation of this file.
2 #include <../../nrnconf.h>
3 #include <InterViews/resource.h>
4 #include <math.h>
5 #include <nrnmpi.h>
6 #include <nrnoc2iv.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <unordered_map>
10 #include <bbs.h>
11 
12 #undef MD
13 #define MD 2147483648.
14 
15 class PreSyn;
16 
17 // hash table where buckets are binary search maps
18 using Gid2PreSyn = std::unordered_map<int, PreSyn*>;
19 
20 #include <errno.h>
21 #include <netcon.h>
22 #include <cvodeobj.h>
23 #include <netcvode.h>
24 #include <vector>
25 #include "ivocvect.h"
26 
27 #define BGP_INTERVAL 2
28 #if BGP_INTERVAL == 2
29 static int n_bgp_interval;
30 #endif
31 
37 static double t_exchange_;
38 static double dt1_; // 1/dt
39 static void alloc_space();
40 
42 extern double t, dt;
43 extern int cvode_active_;
44 extern "C" Point_process* ob2pntproc(Object*);
45 extern int nrn_use_selfqueue_;
46 extern void nrn_pending_selfqueue(double, NrnThread*);
47 extern int vector_capacity(IvocVect*); //ivocvect.h conflicts with STL
48 extern double* vector_vec(IvocVect*);
49 extern Object* nrn_sec2cell(Section*);
50 extern void ncs2nrn_integrate(double tstop);
51 extern "C" {
52 extern void nrn_fake_fire(int gid, double firetime, int fake_out);
53 } // extern "C"
54 int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth);
56 int nrn_set_timeout(int);
57 void nrnmpi_gid_clear(int);
58 extern void nrn_partrans_clear();
60 extern double nrn_bgp_receive_time(int);
61 typedef void (*PFIO)(int, Object*);
62 extern void nrn_gidout_iter(PFIO);
63 extern Object* nrn_gid2obj(int);
64 extern PreSyn* nrn_gid2presyn(int);
65 extern int nrn_gid_exists(int);
66 extern double nrnmpi_step_wait_; // barrier at beginning of spike exchange.
67 
68 /**
69  * @brief NEURON callback used from CORENEURON to transfer all spike vectors after simulation
70  *
71  * @param spiketvec - vector of spikes
72  * @param spikegidvec - vector of gids
73  * @return 1 if CORENEURON shall drop writing `out.dat`; 0 otherwise
74  */
75 extern "C" int nrnthread_all_spike_vectors_return(std::vector<double>& spiketvec, std::vector<int>& spikegidvec);
76 
77 // BGPDMA can be 0 or 1
78 // (BGPDMA & 1) > 0 means multisend ISend allowed
79 #if !defined(BGPDMA)
80 #define BGPDMA 0
81 #endif
82 
83 #if BGPDMA == 0
84 double nrn_bgp_receive_time(int) { return 0.; }
85 #endif
86 
87 #if PARANEURON
88 extern void nrnmpi_split_clear();
89 #endif
90 extern void nrnmpi_multisplit_clear();
91 
92 static double set_mindelay(double maxdelay);
93 
94 #if NRNMPI
95 
96 #include "../nrnmpi/mpispike.h"
97 
98 void nrn_timeout(int);
100 extern int nrnmpi_int_allmax(int);
101 extern void nrnmpi_int_allgather(int*, int*, int);
102 void nrn2ncs_outputevent(int netcon_output_index, double firetime);
103 bool nrn_use_compress_; // global due to bbsavestate
104 #define use_compress_ nrn_use_compress_
105 
106 #ifdef USENCS
107 extern int ncs_bgp_sending_info( int ** );
108 extern int ncs_bgp_target_hosts( int, int** );
109 extern int ncs_bgp_target_info( int ** );
110 extern int ncs_bgp_mindelays( int **, double ** );
111 
112 //get minimum delays for all presyn objects in gid2in_
113 int ncs_netcon_mindelays( int**hosts, double **delays )
114 {
115  return ncs_bgp_mindelays(hosts, delays);
116 }
117 
118 double ncs_netcon_localmindelay( int srcgid )
119 {
120  PreSyn *ps = gid2out_.at(srcgid);
121  return ps->mindelay();
122 }
123 
124 //get the number of netcons for an object, if it sends here
125 int ncs_netcon_count( int srcgid, bool localNetCons )
126 {
127  const auto& map = localNetCons ? gid2out_ : gid2in_;
128  const auto& iter = map.find(srcgid);
129  PreSyn* ps{iter != map.end() ? iter->second : nullptr};
130 
131  if( !ps ) { //no cells on this cpu receive from the given gid
132  fprintf( stderr, "should never happen!\n" );
133  return 0;
134  }
135 
136  return ps->dil_.count();
137 }
138 
139 //inject a spike into the appropriate netcon
140 void ncs_netcon_inject( int srcgid, int netconIndex, double spikeTime, bool localNetCons )
141 {
143  const auto& map = localNetCons ? gid2out_ : gid2in_;
144  const auto& iter = map.find(srcgid);
145  PreSyn* ps{iter != map.end() ? iter->second : nullptr};
146  if( !ps ) { //no cells on this cpu receive from the given gid
147  return;
148  }
149 
150  //fprintf( stderr, "gid %d index %d!\n", srcgid, netconIndex );
151  NetCon* d = ps->dil_.item(netconIndex);
152  NrnThread* nt = nrn_threads;
153  if (d->active_ && d->target_) {
154 #if BBTQ == 5
155  ns->bin_event(spikeTime + d->delay_, d, nt);
156 #else
157  ns->event(spikeTime + d->delay_, d, nt);
158 #endif
159  }
160 }
161 
162 int ncs_gid_receiving_info( int **presyngids ) {
163  return ncs_bgp_target_info( presyngids );
164 }
165 
166 //given the gid of a cell, retrieve its target count
167 int ncs_gid_sending_count( int **sendlist2build ) {
168  return ncs_bgp_sending_info( sendlist2build );
169 }
170 
171 int ncs_target_hosts( int gid, int** targetnodes ) {
172  return ncs_bgp_target_hosts( gid, targetnodes );
173 }
174 
175 #endif
176 
177 // for compressed gid info during spike exchange
178 bool nrn_use_localgid_;
179 void nrn_outputevent(unsigned char localgid, double firetime);
180 static std::vector<std::unique_ptr<Gid2PreSyn>> localmaps_;
181 
182 #define NRNSTAT 1
183 static int nsend_, nsendmax_, nrecv_, nrecv_useful_;
184 #if NRNSTAT
185 static IvocVect* max_histogram_;
186 #endif
187 
188 static int ocapacity_; // for spikeout_
189 // require it to be smaller than min_interprocessor_delay.
190 static double wt_; // wait time for nrnmpi_spike_exchange
191 static double wt1_; // time to find the PreSyns and send the spikes.
192 static int spfixout_capacity_;
193 static int idxout_;
194 static void nrn_spike_exchange_compressed(NrnThread*);
195 #endif // NRNMPI
196 
197 #if BGPDMA
198 bool use_bgpdma_; // false: allgather, true: multisend (ISend, bgpdma)
199 static void bgp_dma_setup();
200 static void bgp_dma_init();
201 static void bgp_dma_receive(NrnThread*);
202 extern void bgp_dma_send(PreSyn*, double t);
203 static void bgpdma_cleanup_presyn(PreSyn*);
204 #endif
205 
206 static int active_;
207 static double usable_mindelay_;
209 static double mindelay_; // the one actually used. Some of our optional algorithms
210 static double last_maxstep_arg_;
211 static NetParEvent* npe_; // nrn_nthread of them
212 static int n_npe_; // just to compare with nrn_nthread
213 
214 #if NRNMPI
215 // for combination of threads and mpi.
216 #if USE_PTHREAD
217 static MUTDEC
218 #endif
219 static int seqcnt_;
220 static NrnThread* last_nt_;
221 #endif
222 
223 #if NRN_MUSIC
224 #include "nrnmusic.cpp"
225 #endif
226 
228  wx_ = ws_ = 0.;
229  ithread_ = -1;
230 }
232 }
233 void NetParEvent::send(double tt, NetCvode* nc, NrnThread* nt){
234  nc->event(tt + usable_mindelay_, this, nt);
235 }
236 void NetParEvent::deliver(double tt, NetCvode* nc, NrnThread* nt){
237  int seq;
238  if (nrn_use_selfqueue_) { //first handle pending flag=1 self events
239  nrn_pending_selfqueue(tt, nt);
240  }
241  // has to be the last event at this time in order to avoid a race
242  // condition with HocEvent that may call things such as pc.barrier
243  // actually allthread HocEvent (cvode.event(tev) and cvode.event(tev,stmt)
244  // will be executed last after a thread join when nrn_allthread_handle
245  // is called.
246  net_cvode_instance->deliver_events(tt, nt);
247  nt->_stop_stepping = 1;
248  nt->_t = tt;
249 #if NRNMPI
250  if (nrnmpi_numprocs > 0) {
251  MUTLOCK
252  seq = ++seqcnt_;
253  MUTUNLOCK
254  if (seq == nrn_nthread) {
255  last_nt_ = nt;
256 #if BGPDMA
257  if (use_bgpdma_) {
258  bgp_dma_receive(nt);
259  }else{
260  nrn_spike_exchange(nt);
261  }
262 #else
263  nrn_spike_exchange(nt);
264 #endif
265  wx_ += wt_;
266  ws_ += wt1_;
267  seqcnt_ = 0;
268  }
269  }
270 #endif
271  send(tt, nc, nt);
272 }
274  assert(nrn_nthread == 1);
275  deliver(tt, nc, nrn_threads);
276 }
277 
278 void NetParEvent::pr(const char* m, double tt, NetCvode* nc){
279  Printf("%s NetParEvent %d t=%.15g tt-t=%g\n", m, ithread_, tt, tt - nrn_threads[ithread_]._t);
280 }
281 
283  //pr("savestate_save", 0, net_cvode_instance);
284  NetParEvent* npe = new NetParEvent();
285  npe->ithread_ = ithread_;
286  return npe;
287 }
288 
290  int i;
291  char buf[100];
292  nrn_assert(fgets(buf, 100, f));
293  nrn_assert(sscanf(buf, "%d\n", &i) == 1);
294  //printf("NetParEvent::savestate_read %d\n", i);
295  NetParEvent* npe = new NetParEvent();
296  npe->ithread_ = i;
297  return npe;
298 }
299 
301  //pr("savestate_write", 0, net_cvode_instance);
302  fprintf(f, "%d\n%d\n", NetParEventType, ithread_);
303 }
304 
306 #if NRNMPI
307  if (use_compress_) {
308  t_exchange_ = t;
309  }
310 #endif
311  if (ithread_ == 0) {
312  //npe_->pr("savestate_restore", tt, nc);
313  for (int i=0; i < nrn_nthread; ++i) if (npe_+i) {
314  nc->event(tt, npe_+i, nrn_threads + i);
315  }
316  }
317 }
318 
319 #if NRNMPI
320 inline static void sppk(unsigned char* c, int gid) {
321  for (int i = localgid_size_-1; i >= 0; --i) {
322  c[i] = gid & 255;
323  gid >>= 8;
324  }
325 }
326 inline static int spupk(unsigned char* c) {
327  int gid = *c++;
328  for (int i = 1; i < localgid_size_; ++i) {
329  gid <<= 8;
330  gid += *c++;
331  }
332  return gid;
333 }
334 
335 void nrn_outputevent(unsigned char localgid, double firetime) {
336  if (!active_) { return; }
337  MUTLOCK
338  nout_++;
339  int i = idxout_;
340  idxout_ += 2;
341  if (idxout_ >= spfixout_capacity_) {
342  spfixout_capacity_ *= 2;
343  spfixout_ = (unsigned char*)hoc_Erealloc(spfixout_, spfixout_capacity_*sizeof(unsigned char)); hoc_malchk();
344  }
345  spfixout_[i++] = (unsigned char)((firetime - t_exchange_)*dt1_ + .5);
346  spfixout_[i] = localgid;
347 //printf("%d idx=%d lgid=%d firetime=%g t_exchange_=%g [0]=%d [1]=%d\n", nrnmpi_myid, i, (int)localgid, firetime, t_exchange_, (int)spfixout_[i-1], (int)spfixout_[i]);
348  MUTUNLOCK
349 }
350 
351 #ifndef USENCS
352 void nrn2ncs_outputevent(int gid, double firetime) {
353  if (!active_) { return; }
354  MUTLOCK
355  if (use_compress_) {
356  nout_++;
357  int i = idxout_;
358  idxout_ += 1 + localgid_size_;
359  if (idxout_ >= spfixout_capacity_) {
360  spfixout_capacity_ *= 2;
361  spfixout_ = (unsigned char*)hoc_Erealloc(spfixout_, spfixout_capacity_*sizeof(unsigned char)); hoc_malchk();
362  }
363 //printf("%d nrnncs_outputevent %d %.20g %.20g %d\n", nrnmpi_myid, gid, firetime, t_exchange_,
364 //(int)((unsigned char)((firetime - t_exchange_)*dt1_ + .5)));
365  spfixout_[i++] = (unsigned char)((firetime - t_exchange_)*dt1_ + .5);
366 //printf("%d idx=%d firetime=%g t_exchange_=%g spfixout=%d\n", nrnmpi_myid, i, firetime, t_exchange_, (int)spfixout_[i-1]);
367  sppk(spfixout_+i, gid);
368 //printf("%d idx=%d gid=%d spupk=%d\n", nrnmpi_myid, i, gid, spupk(spfixout_+i));
369  }else{
370 #if nrn_spikebuf_size == 0
371  int i = nout_++;
372  if (i >= ocapacity_) {
373  ocapacity_ *= 2;
375  }
376 //printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime);
377  spikeout_[i].gid = gid;
378  spikeout_[i].spiketime = firetime;
379 #else
380  int i = nout_++;
381  if (i >= nrn_spikebuf_size) {
382  i -= nrn_spikebuf_size;
383  if (i >= ocapacity_) {
384  ocapacity_ *= 2;
386  }
387  spikeout_[i].gid = gid;
388  spikeout_[i].spiketime = firetime;
389  }else{
390  spbufout_->gid[i] = gid;
391  spbufout_->spiketime[i] = firetime;
392  }
393 #endif
394  }
395  MUTUNLOCK
396 //printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime);
397 }
398 #endif //USENCS
399 #endif // NRNMPI
400 
401 static int nrn_need_npe() {
402 
403  int b = 0;
404  if (active_) { b = 1; }
405  if (nrn_use_selfqueue_) { b = 1; }
406  if (nrn_nthread > 1) { b = 1; }
407  if (b) {
408  if (last_maxstep_arg_ == 0) {
409  last_maxstep_arg_ = 100.;
410  }
412  }else{
413  if (npe_) {
414  delete [] npe_;
415  npe_ = NULL;
416  n_npe_ = 0;
417  }
418  }
419  return b;
420 }
421 
422 static void calc_actual_mindelay() {
423  //reasons why mindelay_ can be smaller than min_interprocessor_delay
424  // are use_bgpdma_ when BGP_INTERVAL == 2
425 #if BGPDMA && (BGP_INTERVAL == 2)
426  if (use_bgpdma_ && n_bgp_interval == 2) {
428  }else{
430  }
431 #endif
432 }
433 
434 #if BGPDMA
435 #include "bgpdma.cpp"
436 #else
437 #define TBUFSIZE 0
438 #define TBUF /**/
439 #endif
440 
442  if (nrnmpi_step_wait_ >= 0.0) {
443  nrnmpi_step_wait_ = 0.0;
444  }
445 #ifdef USENCS
446  bgp_dma_setup();
447  return;
448 #endif
449 //printf("nrn_spike_exchange_init\n");
450  if (!nrn_need_npe()) { return; }
451 // if (!active_ && !nrn_use_selfqueue_) { return; }
452  alloc_space();
453 //printf("nrnmpi_use=%d active=%d\n", nrnmpi_use, active_);
456  if (cvode_active_ == 0 && nrn_nthread > 1) {
457  usable_mindelay_ -= dt;
458  }
459  if ((usable_mindelay_ < 1e-9) || (cvode_active_ == 0 && usable_mindelay_ < dt)) {
460  if (nrnmpi_myid == 0) {
461  hoc_execerror("usable mindelay is 0", "(or less than dt for fixed step method)");
462  }else{
463  return;
464  }
465  }
466 
467 #if NRN_MUSIC
468  if (nrnmusic) {
470  }
471 #endif
472 
473 #if TBUFSIZE
474  itbuf_ = 0;
475 #endif
476 
477 #if BGPDMA
478  if (use_bgpdma_) {
479  bgp_dma_init();
480  }
481 #endif
482 
483  if (n_npe_ != nrn_nthread) {
484  if (npe_) { delete [] npe_; }
485  npe_ = new NetParEvent[nrn_nthread];
487  }
488  for (int i = 0; i < nrn_nthread; ++i) {
489  npe_[i].ithread_ = i;
490  npe_[i].wx_ = 0.;
491  npe_[i].ws_ = 0.;
492  npe_[i].send(t, net_cvode_instance, nrn_threads + i);
493  }
494 #if NRNMPI
495  if (use_compress_) {
496  idxout_ = 2;
497  t_exchange_ = t;
498  dt1_ = 1./dt;
499  usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt;
500  assert (usable_mindelay_ >= dt && (usable_mindelay_ * dt1_) < 255);
501  }else{
502 #if nrn_spikebuf_size > 0
503  if (spbufout_) {
504  spbufout_->nspike = 0;
505  }
506 #endif
507  }
508  nout_ = 0;
509  nsend_ = nsendmax_ = nrecv_ = nrecv_useful_ = 0;
510  if (nrnmpi_numprocs > 0) {
511  if (nrn_nthread > 0) {
512 #if USE_PTHREAD
513  if (!mut_) {
514  MUTCONSTRUCT(1)
515  }
516 #endif
517  }else{
519  }
520  }
521 #endif // NRNMPI
522  //if (nrnmpi_myid == 0){printf("usable_mindelay_ = %g\n", usable_mindelay_);}
523 }
524 
525 #if NRNMPI
526 void nrn_spike_exchange(NrnThread* nt) {
527  nrn::Instrumentor::phase p_spike_exchange("spike-exchange");
528  if (!active_) { return; }
529 #if BGPDMA
530  if (use_bgpdma_) {
531  bgp_dma_receive(nt);
532  return;
533  }
534 #endif
535  if (use_compress_) { nrn_spike_exchange_compressed(nt); return; }
536  TBUF
537 #if TBUFSIZE
538  nrnmpi_barrier();
539 #endif
540  TBUF
541  double wt;
542  int i, n;
543 #if NRNSTAT
544  nsend_ += nout_;
545  if (nsendmax_ < nout_) { nsendmax_ = nout_; }
546 #endif
547 #if nrn_spikebuf_size > 0
548  spbufout_->nspike = nout_;
549 #endif
550  wt = nrnmpi_wtime();
551  if (nrnmpi_step_wait_ >= 0.) {
552  nrnmpi_barrier();
554  }
555  n = nrnmpi_spike_exchange();
556  wt_ = nrnmpi_wtime() - wt;
557  wt = nrnmpi_wtime();
558  TBUF
559 #if TBUFSIZE
560  tbuf_[itbuf_++] = (unsigned long)nout_;
561  tbuf_[itbuf_++] = (unsigned long)n;
562 #endif
563 
564  errno = 0;
565 //if (n > 0) {
566 //printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout_, n);
567 //}
568  nout_ = 0;
569  if (n == 0) {
570 #if NRNSTAT
571  if (max_histogram_) { vector_vec(max_histogram_)[0] += 1.; }
572 #endif
573  TBUF
574  return;
575  }
576 #if NRNSTAT
577  nrecv_ += n;
578  if (max_histogram_) {
579  int mx = 0;
580  if (n > 0) {
581  for (i=nrnmpi_numprocs-1 ; i >= 0; --i) {
582 #if nrn_spikebuf_size == 0
583  if (mx < nin_[i]) {
584  mx = nin_[i];
585  }
586 #else
587  if (mx < spbufin_[i].nspike) {
588  mx = spbufin_[i].nspike;
589  }
590 #endif
591  }
592  }
593  int ms = vector_capacity(max_histogram_)-1;
594  mx = (mx < ms) ? mx : ms;
595  vector_vec(max_histogram_)[mx] += 1.;
596  }
597 #endif // NRNSTAT
598 #if nrn_spikebuf_size > 0
599  for (i = 0; i < nrnmpi_numprocs; ++i) {
600  int j;
601  int nn = spbufin_[i].nspike;
602  if (nn > nrn_spikebuf_size) { nn = nrn_spikebuf_size; }
603  for (j=0; j < nn; ++j) {
604  auto iter = gid2in_.find(spbufin_[i].gid[j]);
605  if (iter != gid2in_.end()) {
606  PreSyn* ps = iter->second;
607  ps->send(spbufin_[i].spiketime[j], net_cvode_instance, nt);
608 #if NRNSTAT
609  ++nrecv_useful_;
610 #endif
611  }
612  }
613  }
614  n = ovfl_;
615 #endif // nrn_spikebuf_size > 0
616  for (i = 0; i < n; ++i) {
617  auto iter = gid2in_.find(spikein_[i].gid);
618  if (iter != gid2in_.end()) {
619  PreSyn* ps = iter->second;
620  ps->send(spikein_[i].spiketime, net_cvode_instance, nt);
621 #if NRNSTAT
622  ++nrecv_useful_;
623 #endif
624  }
625  }
626  wt1_ = nrnmpi_wtime() - wt;
627  TBUF
628 }
629 
630 void nrn_spike_exchange_compressed(NrnThread* nt) {
631  if (!active_) { return; }
632  TBUF
633 #if TBUFSIZE
634  nrnmpi_barrier();
635 #endif
636  TBUF
638  double wt;
639  int i, n, idx;
640 #if NRNSTAT
641  nsend_ += nout_;
642  if (nsendmax_ < nout_) { nsendmax_ = nout_; }
643 #endif
644  assert(nout_ < 0x10000);
645  spfixout_[1] = (unsigned char)(nout_ & 0xff);
646  spfixout_[0] = (unsigned char)(nout_>>8);
647 
648  wt = nrnmpi_wtime();
649  if (nrnmpi_step_wait_ >= 0.) {
650  nrnmpi_barrier();
652  }
653  n = nrnmpi_spike_exchange_compressed();
654  wt_ = nrnmpi_wtime() - wt;
655  wt = nrnmpi_wtime();
656  TBUF
657 #if TBUFSIZE
658  tbuf_[itbuf_++] = (unsigned long)nout_;
659  tbuf_[itbuf_++] = (unsigned long)n;
660 #endif
661  errno = 0;
662 //if (n > 0) {
663 //printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout_, n);
664 //}
665  nout_ = 0;
666  idxout_ = 2;
667  if (n == 0) {
668 #if NRNSTAT
669  if (max_histogram_) { vector_vec(max_histogram_)[0] += 1.; }
670 #endif
672  TBUF
673  return;
674  }
675 #if NRNSTAT
676  nrecv_ += n;
677  if (max_histogram_) {
678  int mx = 0;
679  if (n > 0) {
680  for (i=nrnmpi_numprocs-1 ; i >= 0; --i) {
681  if (mx < nin_[i]) {
682  mx = nin_[i];
683  }
684  }
685  }
686  int ms = vector_capacity(max_histogram_)-1;
687  mx = (mx < ms) ? mx : ms;
688  vector_vec(max_histogram_)[mx] += 1.;
689  }
690 #endif // NRNSTAT
691  if (nrn_use_localgid_) {
692  int idxov = 0;
693  for (i = 0; i < nrnmpi_numprocs; ++i) {
694  int j, nnn;
695  int nn = nin_[i];
696  if (nn) {
697  if (i == nrnmpi_myid) { // skip but may need to increment idxov.
698  if (nn > ag_send_nspike_) {
699  idxov += (nn - ag_send_nspike_)*(1 + localgid_size_);
700  }
701  continue;
702  }
703  Gid2PreSyn* gps = localmaps_[i].get();
704  if (nn > ag_send_nspike_) {
705  nnn = ag_send_nspike_;
706  }else{
707  nnn = nn;
708  }
709  idx = 2 + i*ag_send_size_;
710  for (j=0; j < nnn; ++j) {
711  // order is (firetime,gid) pairs.
712  double firetime = spfixin_[idx++]*dt + t_exchange_;
713  int lgid = (int)spfixin_[idx];
714  idx += localgid_size_;
715  auto iter = gps->find(lgid);
716  if (iter != gps->end()) {
717  PreSyn* ps = iter->second;
718  ps->send(firetime + 1e-10, net_cvode_instance, nt);
719 #if NRNSTAT
720  ++nrecv_useful_;
721 #endif
722  }
723  }
724  for ( ; j < nn; ++j) {
725  double firetime = spfixin_ovfl_[idxov++]*dt + t_exchange_;
726  int lgid = (int)spfixin_ovfl_[idxov];
727  idxov += localgid_size_;
728  auto iter = gps->find(lgid);
729  if (iter != gps->end()) {
730  PreSyn* ps = iter->second;
731  ps->send(firetime+1e-10, net_cvode_instance, nt);
732 #if NRNSTAT
733  ++nrecv_useful_;
734 #endif
735  }
736  }
737  }
738  }
739  }else{
740  for (i = 0; i < nrnmpi_numprocs; ++i) {
741  int j;
742  int nn = nin_[i];
743  if (nn > ag_send_nspike_) { nn = ag_send_nspike_; }
744  idx = 2 + i*ag_send_size_;
745  for (j=0; j < nn; ++j) {
746  // order is (firetime,gid) pairs.
747  double firetime = spfixin_[idx++]*dt + t_exchange_;
748  int gid = spupk(spfixin_ + idx);
749  idx += localgid_size_;
750  auto iter = gid2in_.find(gid);
751  if (iter != gid2in_.end()) {
752  PreSyn* ps = iter->second;
753  ps->send(firetime+1e-10, net_cvode_instance, nt);
754 #if NRNSTAT
755  ++nrecv_useful_;
756 #endif
757  }
758  }
759  }
760  n = ovfl_;
761  idx = 0;
762  for (i = 0; i < n; ++i) {
763  double firetime = spfixin_ovfl_[idx++]*dt + t_exchange_;
764  int gid = spupk(spfixin_ovfl_ + idx);
765  idx += localgid_size_;
766  auto iter = gid2in_.find(gid);
767  if (iter != gid2in_.end()) {
768  PreSyn* ps = iter->second;
769  ps->send(firetime+1e-10, net_cvode_instance, nt);
770 #if NRNSTAT
771  ++nrecv_useful_;
772 #endif
773  }
774  }
775  }
777  wt1_ = nrnmpi_wtime() - wt;
778  TBUF
779 }
780 
781 static void mk_localgid_rep() {
782  // how many gids are there on this machine
783  // and can they be compressed into one byte
784  int ngid = 0;
785  for (const auto& iter: gid2out_) {
786  PreSyn* ps = iter.second;
787  if (ps->output_index_ >= 0) {
788  ++ngid;
789  }
790  }
791  int ngidmax = nrnmpi_int_allmax(ngid);
792  if (ngidmax > 256) {
793  //do not compress
794  return;
795  }
796  localgid_size_ = sizeof(unsigned char);
797  nrn_use_localgid_ = true;
798 
799  // allocate Allgather receive buffer (send is the nrnmpi_myid one)
800  int* rbuf = new int[nrnmpi_numprocs*(ngidmax + 1)];
801  int* sbuf = new int[ngidmax + 1];
802 
803  sbuf[0] = ngid;
804  ++sbuf;
805  ngid = 0;
806  // define the local gid and fill with the gids on this machine
807  for (const auto& iter: gid2out_) {
808  PreSyn* ps = iter.second;
809  if (ps->output_index_ >= 0) {
810  ps->localgid_ = (unsigned char)ngid;
811  sbuf[ngid] = ps->output_index_;
812  ++ngid;
813  }
814  }
815  --sbuf;
816 
817  // exchange everything
818  nrnmpi_int_allgather(sbuf, rbuf, ngidmax+1);
819  delete [] sbuf;
820  errno = 0;
821 
822  // create the maps
823  // there is a lot of potential for efficiency here. i.e. use of
824  // perfect hash functions, or even simple Vectors.
825  localmaps_.clear();
826  localmaps_.resize(nrnmpi_numprocs);
827 
828  for (int i = 0; i < nrnmpi_numprocs; ++i) if (i != nrnmpi_myid) {
829  localmaps_[i].reset(new Gid2PreSyn());
830  }
831 
832  // fill in the maps
833  for (int i = 0; i < nrnmpi_numprocs; ++i) if (i != nrnmpi_myid) {
834  sbuf = rbuf + i*(ngidmax + 1);
835  ngid = *(sbuf++);
836  for (int k=0; k < ngid; ++k) {
837  auto iter = gid2in_.find(int(sbuf[k]));
838  if (iter != gid2in_.end()) {
839  PreSyn* ps = iter->second;
840  (*localmaps_[i])[k] = ps;
841  }
842  }
843  }
844 
845  // cleanup
846  delete [] rbuf;
847 }
848 
849 #endif // NRNMPI
850 
851 // may stimulate a gid for a cell not owned by this cpu. This allows
852 // us to run single cells or subnets and stimulate exactly according to
853 // their input in a full parallel net simulation.
854 // For some purposes, it may be useful to simulate a spike from a
855 // cell that does exist and would normally send its own spike, eg.
856 // recurrent stimulation. This can be useful in debugging where the
857 // spike raster comes from another implementation and one wants to
858 // get complete control of all input spikes without the confounding
859 // effects of output spikes from the simulated cells. In this case
860 // set the third arg to 1 and set the output cell thresholds very
861 // high so that they do not themselves generate spikes.
862 // The remaining possibility is fake_out=2 which only does a send for
863 // the gids owned by this cpu. This, followed by a nrn_spike_exchange(),
864 // ensures that all the target cells, regardless of what rank they are on
865 // will get the spike delivered and nobody gets it twice.
866 
867 extern "C" void nrn_fake_fire(int gid, double spiketime, int fake_out) {
868  PreSyn* ps{nullptr};
869  if (fake_out < 2) {
870  auto iter = gid2in_.find(gid);
871  if (iter != gid2in_.end()) {
872  ps = iter->second;
873 //printf("nrn_fake_fire %d %g\n", gid, spiketime);
874  ps->send(spiketime, net_cvode_instance, nrn_threads);
875 #if NRNSTAT
876  ++nrecv_useful_;
877 #endif
878  }
879  }else if (fake_out && !ps) {
880  auto iter = gid2out_.find(gid);
881  if (iter != gid2out_.end()) {
882  ps = iter->second;
883 //printf("nrn_fake_fire fake_out %d %g\n", gid, spiketime);
884  ps->send(spiketime, net_cvode_instance, nrn_threads);
885 #if NRNSTAT
886  ++nrecv_useful_;
887 #endif
888  }
889  }
890 }
891 
892 static void alloc_space() {
893  if (!netcon_sym_) {
894  netcon_sym_ = hoc_lookup("NetCon");
895 #if NRNMPI
896  ocapacity_ = 100;
897  spikeout_ = (NRNMPI_Spike*)hoc_Emalloc(ocapacity_*sizeof(NRNMPI_Spike)); hoc_malchk();
898  icapacity_ = 100;
900  nin_ = (int*)hoc_Emalloc(nrnmpi_numprocs*sizeof(int)); hoc_malchk();
901 #if nrn_spikebuf_size > 0
902 spbufout_ = (NRNMPI_Spikebuf*)hoc_Emalloc(sizeof(NRNMPI_Spikebuf)); hoc_malchk();
903 spbufin_ = (NRNMPI_Spikebuf*)hoc_Emalloc(nrnmpi_numprocs*sizeof(NRNMPI_Spikebuf)); hoc_malchk();
904 #endif
905 #endif
906  }
907 }
908 
909 void BBS::set_gid2node(int gid, int nid) {
910  alloc_space();
911 #if NRNMPI
912  if (nid == nrnmpi_myid) {
913 #else
914  {
915 #endif
916 //printf("gid %d defined on %d\n", gid, nrnmpi_myid);
917  char m[200];
918  if (gid2in_.find(gid) != gid2in_.end()) {
919  sprintf(m, "gid=%d already exists as an input port", gid);
920  hoc_execerror(m, "Setup all the output ports on this process before using them as input ports.");
921  }
922  if (gid2out_.find(gid) != gid2out_.end()) {
923  sprintf(m, "gid=%d already exists on this process as an output port", gid);
924  hoc_execerror(m, 0);
925  }
926  gid2out_[gid] = nullptr;
927  }
928 }
929 
930 static int gid_donot_remove = 0; // avoid gid2in_, gid2out removal when iterating
931 
933 #if BGPDMA
935 #endif
936  if (gid_donot_remove) {
937  return;
938  }
939  if (ps->output_index_ >= 0) {
940  gid2out_.erase(ps->output_index_);
941  ps->output_index_ = -1;
942  ps->gid_ = -1;
943  }
944  if (ps->gid_ >= 0) {
945  gid2in_.erase(ps->gid_);
946  ps->gid_ = -1;
947  }
948 }
949 
951  if (arg == 0 || arg == 3 || arg == 4) {
953 #if PARANEURON
954  nrnmpi_split_clear();
955 #endif
956  }
957  if (arg == 0 || arg == 2 || arg == 4) { nrnmpi_multisplit_clear(); }
958  if (arg == 2 || arg == 3) { return; }
959  gid_donot_remove = 1;
960  for (const auto& iter: gid2out_) {
961  PreSyn* ps = iter.second;
962  if (ps && gid2in_.find(ps->gid_) == gid2in_.end()) {
963  if (arg == 4) {
964  delete ps;
965  }else{
966 #if BGPDMA
968 #endif
969  ps->gid_ = -1;
970  ps->output_index_ = -1;
971  if (ps->dil_.empty()) {
972  delete ps;
973  }
974  }
975  }
976  }
977  for (const auto& iter: gid2in_) {
978  PreSyn* ps = iter.second;
979  if (arg == 4) {
980  delete ps;
981  }else{
982 #if BGPDMA
984 #endif
985  ps->gid_ = -1;
986  ps->output_index_ = -1;
987  if (ps->dil_.empty()) {
988  delete ps;
989  }
990  }
991  }
992  gid_donot_remove = 0;
993  gid2in_.clear();
994  gid2out_.clear();
995 }
996 
997 int nrn_gid_exists(int gid) {
998  alloc_space();
999  auto iter = gid2out_.find(gid);
1000  if (iter != gid2out_.end()) {
1001  PreSyn* ps = iter->second;
1002 //printf("%d gid %d exists\n", nrnmpi_myid, gid);
1003  if (ps) {
1004  return (ps->output_index_ >= 0 ? 3 : 2);
1005  }else{
1006  return 1;
1007  }
1008  }
1009  return 0;
1010 }
1011 int BBS::gid_exists(int gid) {return nrn_gid_exists(gid);}
1012 
1013 double BBS::threshold() {
1014  int gid = int(chkarg(1, 0., MD));
1015  auto iter = gid2out_.find(gid);
1016  if (iter == gid2out_.end() || iter->second == NULL) {
1017  hoc_execerror("gid not associated with spike generation location", 0);
1018  }
1019  PreSyn* ps = iter->second;
1020  if (ifarg(2)) {
1021  ps->threshold_ = *getarg(2);
1022  }
1023  return ps->threshold_;
1024 }
1025 
1026 void BBS::cell() {
1027  int gid = int(chkarg(1, 0., MD));
1028  alloc_space();
1029  if (gid2in_.find(gid) != gid2in_.end()) {
1030  char buf[100];
1031  sprintf(buf, "gid=%d is in the input list. Must register prior to connecting", gid);
1032  hoc_execerror(buf, 0);
1033  }
1034  if (gid2out_.find(gid) == gid2out_.end()) {
1035  char buf[100];
1036  sprintf(buf, "gid=%d has not been set on rank %d", gid, nrnmpi_myid);
1037  hoc_execerror(buf, 0);
1038  }
1039  Object* ob = *hoc_objgetarg(2);
1040  if (!ob || ob->ctemplate != netcon_sym_->u.ctemplate) {
1041  check_obj_type(ob, "NetCon");
1042  }
1043  NetCon* nc = (NetCon*)ob->u.this_pointer;
1044  PreSyn* ps = nc->src_;
1045 //printf("%d cell %d %s\n", nrnmpi_myid, gid, hoc_object_name(ps->ssrc_ ? nrn_sec2cell(ps->ssrc_) : ps->osrc_));
1046  gid2out_[gid] = ps;
1047  ps->gid_ = gid;
1048  if (ifarg(3) && !chkarg(3, 0., 1.)) {
1049  ps->output_index_ = -2; //prevents destruction of PreSyn
1050  }else{
1051  ps->output_index_ = gid;
1052  }
1053 }
1054 
1055 void BBS::outputcell(int gid) {
1056  auto iter = gid2out_.find(gid);
1057  nrn_assert(iter != gid2out_.end());
1058  PreSyn* ps = iter->second;
1059  assert(ps);
1060  ps->output_index_ = gid;
1061  ps->gid_ = gid;
1062 }
1063 
1064 void BBS::spike_record(int gid, IvocVect* spikevec, IvocVect* gidvec) {
1065  if (gid >= 0) {
1066  all_spiketvec = NULL, all_spikegidvec = NULL; // invalidate global spike vectors
1067 
1068  auto iter = gid2out_.find(gid);
1069  nrn_assert(iter != gid2out_.end());
1070  PreSyn* ps = iter->second;
1071  assert(ps);
1072  ps->record(spikevec, gidvec, gid);
1073  }else{ // record all output spikes.
1074  all_spiketvec = spikevec, all_spikegidvec = gidvec; // track global spike vectors (optimisation)
1075  for (const auto& iter: gid2out_) {
1076  PreSyn* ps = iter.second;
1077  if (ps->output_index_ >= 0) {
1078  ps->record(all_spiketvec, all_spikegidvec, ps->output_index_);
1079  }
1080  }
1081  }
1082 }
1083 
1084 void BBS::spike_record(IvocVect* gids, IvocVect* spikevec, IvocVect* gidvec) {
1085  int sz = vector_capacity(gids);
1086  // invalidate global spike vectors
1087  all_spiketvec = nullptr;
1088  all_spikegidvec = nullptr;
1089  double* pd = vector_vec(gids);
1090  for (int i = 0; i < sz; ++i) {
1091  int gid = int(pd[i]);
1092  auto iter = gid2out_.find(gid);
1093  nrn_assert(iter != gid2out_.end());
1094  PreSyn* ps = iter->second;
1095  assert(ps);
1096  ps->record(spikevec, gidvec, gid);
1097  }
1098 }
1099 
1100 static Object* gid2obj_(int gid) {
1101  Object* cell = 0;
1102 //printf("%d gid2obj gid=%d\n", nrnmpi_myid, gid);
1103  auto iter = gid2out_.find(gid);
1104  nrn_assert(iter != gid2out_.end());
1105  PreSyn* ps = iter->second;
1106 //printf(" found\n");
1107  assert(ps);
1108  cell = ps->ssrc_ ? nrn_sec2cell(ps->ssrc_) : ps->osrc_;
1109 //printf(" return %s\n", hoc_object_name(cell));
1110  return cell;
1111 }
1112 
1113 Object** BBS::gid2obj(int gid) {
1114  return hoc_temp_objptr(gid2obj_(gid));
1115 }
1116 
1117 Object** BBS::gid2cell(int gid) {
1118  Object* cell = 0;
1119 //printf("%d gid2obj gid=%d\n", nrnmpi_myid, gid);
1120  auto iter = gid2out_.find(gid);
1121  nrn_assert(iter != gid2out_.end());
1122  PreSyn* ps = iter->second;
1123 //printf(" found\n");
1124  assert(ps);
1125  if (ps->ssrc_) {
1126  cell = nrn_sec2cell(ps->ssrc_);
1127  }else{
1128  cell = ps->osrc_;
1129  // but if it is a POINT_PROCESS in a section
1130  // that is inside an object ... (probably has a WATCH statement)
1131  Section* sec = ob2pntproc(cell)->sec;
1132  Object* c2;
1133  if (sec && (c2 = nrn_sec2cell(sec))) {
1134  if (c2) {
1135  cell = c2;
1136  }
1137  }
1138  }
1139 //printf(" return %s\n", hoc_object_name(cell));
1140  return hoc_temp_objptr(cell);
1141 }
1142 
1144  Object* target = *hoc_objgetarg(2);
1145  if (!is_point_process(target)) {
1146  hoc_execerror("arg 2 must be a point process", 0);
1147  }
1148  alloc_space();
1149  PreSyn* ps;
1150  auto iter_out = gid2out_.find(gid);
1151  if (iter_out != gid2out_.end()) {
1152  // the gid is owned by this machine so connect directly
1153  ps = iter_out->second;
1154  if (!ps) {
1155  char buf[100];
1156  sprintf(buf, "gid %d owned by %d but no associated cell", gid, nrnmpi_myid);
1157  hoc_execerror(buf, 0);
1158  }
1159  }else{
1160  auto iter_in = gid2in_.find(gid);
1161  if (iter_in != gid2in_.end()) {
1162  // the gid stub already exists
1163  ps = iter_in->second;
1164 //printf("%d connect %s from already existing %d\n", nrnmpi_myid, hoc_object_name(target), gid);
1165  }else{
1166 //printf("%d connect %s from new PreSyn for %d\n", nrnmpi_myid, hoc_object_name(target), gid);
1167  ps = new PreSyn(NULL, NULL, NULL);
1168  net_cvode_instance->psl_append(ps);
1169  gid2in_[gid] = ps;
1170  ps->gid_ = gid;
1171  }
1172  }
1173  NetCon* nc;
1174  Object** po;
1175  if (ifarg(3)) {
1176  po = hoc_objgetarg(3);
1177  if (!*po || (*po)->ctemplate != netcon_sym_->u.ctemplate) {
1178  check_obj_type(*po, "NetCon");
1179  }
1180  nc = (NetCon*)((*po)->u.this_pointer);
1181  if (nc->target_ != ob2pntproc(target)) {
1182  hoc_execerror("target is different from 3rd arg NetCon target", 0);
1183  }
1184  nc->replace_src(ps);
1185  }else{
1186  nc = new NetCon(ps, target);
1187  po = hoc_temp_objvar(netcon_sym_, nc);
1188  nc->obj_ = *po;
1189  }
1190  return po;
1191 }
1192 
1193 static int timeout_ = 20;
1194 int nrn_set_timeout(int timeout) {
1195  int tt;
1196  tt = timeout_;
1197  timeout_ = timeout;
1198  return tt;
1199 }
1200 
1201 void BBS::netpar_solve(double tstop) {
1202  // temporary check to be eventually replaced by verify_structure()
1204  if (tree_changed) {
1205  setup_topology();
1206  }
1207  if (v_structure_change) {
1208  v_setup_vectors();
1209  }
1210  if (diam_changed) {
1211  recalc_diam();
1212  }
1213  // if cvode_active, and anything at all has changed, should call re_init
1214 
1215 #if NRNMPI
1216  double mt, md;
1217  tstopunset;
1218  if (cvode_active_) {
1219  mt = 1e-9 ; md = mindelay_;
1220  }else{
1221  mt = dt ; md = mindelay_ - 1e-10;
1222  }
1223  if (md < mt) {
1224  if (nrnmpi_myid == 0) {
1225  hoc_execerror("mindelay is 0", "(or less than dt for fixed step method)");
1226  }else{
1227  return;
1228  }
1229  }
1230  double wt;
1231 
1232  nrnmpi_barrier(); // make sure all integrations start about the same
1233  nrn_timeout(timeout_); //time to avoid spurious timeouts while waiting
1234  // at the next MPI_collective.
1235  wt = nrnmpi_wtime();
1236  if (cvode_active_) {
1237  ncs2nrn_integrate(tstop);
1238  }else{
1239  ncs2nrn_integrate(tstop*(1.+1e-11));
1240  }
1241  impl_->integ_time_ += nrnmpi_wtime() - wt;
1242  impl_->integ_time_ -= (npe_ ? (npe_[0].wx_ + npe_[0].ws_) : 0.);
1243 #if BGPDMA
1244  if (use_bgpdma_) {
1245 #if BGP_INTERVAL == 2
1246  for (int i=0; i < n_bgp_interval; ++i) {
1248  }
1249 #else
1251 #endif
1252  }else{
1254  }
1255 #else
1257 #endif
1258  nrn_timeout(0);
1259  impl_->wait_time_ += wt_;
1260  impl_->send_time_ += wt1_;
1261  if (npe_) {
1262  impl_->wait_time_ += npe_[0].wx_;
1263  impl_->send_time_ += npe_[0].ws_;
1264  npe_[0].wx_ = npe_[0].ws_ = 0.;
1265  };
1266 //printf("%d netpar_solve exit t=%g tstop=%g mindelay_=%g\n",nrnmpi_myid, t, tstop, mindelay_);
1267 #else // not NRNMPI
1268  ncs2nrn_integrate(tstop);
1269 #endif
1270  tstopunset;
1271 }
1272 
1273 int nrnthread_all_spike_vectors_return(std::vector<double>& spiketvec, std::vector<int>& spikegidvec){
1274  assert(spiketvec.size() == spikegidvec.size());
1275  if( spiketvec.size() ) {
1276  /* Optimisation:
1277  * if all_spiketvec and all_spikegidvec are set (pc.spike_record with gid=-1 was called)
1278  * and they are still reference counted (obj_->refcount), then copy over incoming vectors.
1279  */
1280  if (all_spiketvec != NULL && all_spiketvec->obj_ != NULL && all_spiketvec->obj_->refcount > 0 &&
1281  all_spikegidvec != NULL && all_spikegidvec->obj_ != NULL && all_spikegidvec->obj_->refcount > 0) {
1282 
1283  all_spiketvec->buffer_size(spiketvec.size() + all_spiketvec->size());
1284  all_spikegidvec->buffer_size(spikegidvec.size() + all_spikegidvec->size());
1285  all_spiketvec->vec().insert(all_spiketvec->end(), spiketvec.begin(), spiketvec.end());
1286  all_spikegidvec->vec().insert(all_spikegidvec->end(), spikegidvec.begin(), spikegidvec.end());
1287 
1288  }else{ // different underlying vectors for PreSyns
1289  for (size_t i = 0; i < spikegidvec.size(); ++i ) {
1290  auto iter = gid2out_.find(spikegidvec[i]);
1291  if (iter != gid2out_.end()) {
1292  PreSyn* ps = iter->second;
1293  ps->record(spiketvec[i]);
1294  }
1295  }
1296  }
1297  }
1298  return 1;
1299 }
1300 
1301 
1302 static double set_mindelay(double maxdelay) {
1303  double mindelay = maxdelay;
1304  last_maxstep_arg_ = maxdelay;
1305  if (nrn_use_selfqueue_ || net_cvode_instance->localstep() || nrn_nthread > 1 ) {
1306  hoc_Item* q;
1307  if (net_cvode_instance->psl_) ITERATE(q, net_cvode_instance->psl_) {
1308  PreSyn* ps = (PreSyn*)VOIDITM(q);
1309  double md = ps->mindelay();
1310  if (mindelay > md) {
1311  mindelay = md;
1312  }
1313  }
1314  }
1315 #if NRNMPI
1316  else{
1317  for (const auto& iter: gid2in_) {
1318  PreSyn* ps = iter.second;
1319  double md = ps->mindelay();
1320  if (mindelay > md) {
1321  mindelay = md;
1322  }
1323  }
1324  }
1325  if (nrnmpi_use) {active_ = 1;}
1326  if (use_compress_) {
1327  if (mindelay/dt > 255) {
1328  mindelay = 255*dt;
1329  }
1330  }
1331 
1332 //printf("%d netpar_mindelay local %g now calling nrnmpi_mindelay\n", nrnmpi_myid, mindelay);
1333 // double st = time();
1334  mindelay_ = nrnmpi_mindelay(mindelay);
1336 // add_wait_time(st);
1337 //printf("%d local min=%g global min=%g\n", nrnmpi_myid, mindelay, mindelay_);
1338  if (mindelay_ < 1e-9 && nrn_use_selfqueue_) {
1339  nrn_use_selfqueue_ = 0;
1340  double od = mindelay_;
1341  mindelay = set_mindelay(maxdelay);
1342  if (nrnmpi_myid == 0) {
1343 Printf("Notice: The global minimum NetCon delay is %g, so turned off the cvode.queue_mode\n", od);
1344 Printf(" use_self_queue option. The interprocessor minimum NetCon delay is %g\n", mindelay);
1345  }
1346  }
1347  errno = 0;
1348  return mindelay;
1349 #else
1350  mindelay_ = mindelay;
1352  return mindelay;
1353 #endif //NRNMPI
1354 }
1355 
1356 double BBS::netpar_mindelay(double maxdelay) {
1357 #if BGPDMA
1358  bgp_dma_setup();
1359 #endif
1360  double tt = set_mindelay(maxdelay);
1361  return tt;
1362 }
1363 
1364 void BBS::netpar_spanning_statistics(int* nsend, int* nsendmax, int* nrecv, int* nrecv_useful) {
1365 #if NRNMPI
1366  *nsend = nsend_;
1367  *nsendmax = nsendmax_;
1368  *nrecv = nrecv_;
1369  *nrecv_useful = nrecv_useful_;
1370 #endif
1371 }
1372 
1373 // unfortunately, ivocvect.h conflicts with STL
1375 #if NRNMPI && NRNSTAT
1376  IvocVect* h = max_histogram_;
1377  if (max_histogram_) {
1378  max_histogram_ = NULL;
1379  }
1380  if (mh) {
1381  max_histogram_ = mh;
1382  }
1383  return h;
1384 #else
1385  return NULL;
1386 #endif
1387 }
1388 
1389 /* 08-Nov-2010
1390 The workhorse for spike exchange on up to 10K machines is MPI_Allgather
1391 but as the number of machines becomes far greater than the fanout per
1392 cell we have been exploring a class of exchange methods called multisend
1393 where the spikes only go to those machines that need them and there is
1394 overlap between communication and computation. The numer of variants of
1395 multisend has grown so that some method selection function is needed
1396 that makes sense.
1397 
1398 The situation that needs to be captured by xchng_meth is
1399 
1400 0: Allgather
1401 1: multisend implemented as MPI_ISend
1402 
1403 n_bgp_interval 1 or 2 per minimum interprocessor NetCon delay
1404  that concept valid for all methods
1405 
1406 Note that Allgather allows spike compression and an allgather spike buffer
1407  with size chosen at setup time. All methods allow bin queueing.
1408 
1409 The multisend methods should allow two phase multisend.
1410 
1411 Note that, in principle, MPI_ISend allows the source to send the index
1412  of the target PreSyn to avoid a hash table lookup (even with a two phase
1413  variant)
1414 
1415 Not all variation are useful. e.g. it is pointless to combine Allgather and
1416 n_bgp_interval=2.
1417 The whole point is to make the
1418 spike transfer initiation as lowcost as possible since that is what causes
1419 most load imbalance. I.e. since 10K more spikes arrive than are sent, spikes
1420 received per processor per interval are much more statistically
1421 balanced than spikes sent per processor per interval. And presently
1422 DCMF multisend injects 10000 messages per spike into the network which
1423 is quite expensive.
1424 
1425 See case 8 of nrn_bgp_receive_time for the xchng_meth properties
1426 */
1427 
1428 int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth) {
1429 #if NRNMPI
1430  if (nrnmpi_numprocs < 2) { return 0; }
1431  if (nspike >= 0) { // otherwise don't set any multisend properties
1432 #if BGP_INTERVAL == 2
1433  n_bgp_interval = (xchng_meth & 4) ? 2 : 1;
1434 #endif
1435 #if BGPDMA
1436  use_bgpdma_ = (xchng_meth & 1) == 1;
1437  use_phase2_ = (xchng_meth & 8) ? 1 : 0;
1438  if (use_bgpdma_) { assert(BGPDMA); }
1439  bgpdma_cleanup();
1440 #else // BGPDMA == 0
1441  assert(xchng_meth == 0);
1442 #endif
1443  }
1444  if (nspike >= 0) {
1445  ag_send_nspike_ = 0;
1446  if (spfixout_) { free(spfixout_); spfixout_ = 0; }
1447  if (spfixin_) { free(spfixin_); spfixin_ = 0; }
1448  if (spfixin_ovfl_) { free(spfixin_ovfl_); spfixin_ovfl_ = 0; }
1449  localmaps_.clear();
1450  }
1451  if (nspike == 0) { // turn off
1452  use_compress_ = false;
1453  nrn_use_localgid_ = false;
1454  }else if (nspike > 0) { // turn on
1455  if (cvode_active_) {
1456 if (nrnmpi_myid == 0) {hoc_warning("ParallelContext.spike_compress cannot be used with cvode active", 0);}
1457  use_compress_ = false;
1458  nrn_use_localgid_ = false;
1459  return 0;
1460  }
1461  use_compress_ = true;
1462  ag_send_nspike_ = nspike;
1463  nrn_use_localgid_ = false;
1464  if (gid_compress) {
1465  // we can only do this after everything is set up
1466  mk_localgid_rep();
1467  if (!nrn_use_localgid_ && nrnmpi_myid == 0) {
1468 Printf("Notice: gid compression did not succeed. Probably more than 255 cells on one cpu.\n");
1469  }
1470  }
1471  if (!nrn_use_localgid_) {
1472  localgid_size_ = sizeof(unsigned int);
1473  }
1475  spfixout_capacity_ = ag_send_size_ + 50*(1 + localgid_size_);
1476  spfixout_ = (unsigned char*)hoc_Emalloc(spfixout_capacity_); hoc_malchk();
1478  ovfl_capacity_ = 100;
1480  }
1481  return ag_send_nspike_;
1482 #else
1483  return 0;
1484 #endif
1485 }
1486 
1487 PreSyn* nrn_gid2outputpresyn(int gid) { // output PreSyn
1488  auto iter = gid2out_.find(gid);
1489  if (iter != gid2out_.end()) {
1490  return iter->second;
1491  }
1492  return NULL;
1493 }
1494 
1495 Object* nrn_gid2obj(int gid) {
1496  return gid2obj_(gid);
1497 }
1498 
1499 PreSyn* nrn_gid2presyn(int gid) { // output PreSyn
1500  auto iter = gid2out_.find(gid);
1501  nrn_assert(iter != gid2out_.end());
1502  return iter->second;
1503 }
1504 
1505 void nrn_gidout_iter(PFIO callback) {
1506  for (const auto& iter: gid2out_) {
1507  PreSyn* ps = iter.second;
1508  if (ps) {
1509  int gid = ps->gid_;
1510  Object* c = gid2obj_(gid);
1511  (*callback)(gid, c);
1512  }
1513  }
1514 }
1515 
1516 #include "nrncore_write.h"
1517 extern int* nrn_prop_param_size_;
1518 extern int* pnt_receive_size;
1519 extern short* nrn_is_artificial_;
1520 static int weightcnt(NetCon* nc) {
1521  return nc->cnt_;
1522 // return nc->target_ ? pnt_receive_size[nc->target_->prop->type]: 1;
1523 }
1524 
1526  size_t ntot, nin, nout, nnet, nweight;
1527  ntot = nin = nout = nnet = nweight = 0;
1528 size_t npnt = 0;
1529  if (0 && nrnmpi_myid == 0) {
1530  printf("size Presyn=%ld NetCon=%ld Point_process=%ld Prop=%ld\n",
1531  sizeof(PreSyn), sizeof(NetCon), sizeof(Point_process), sizeof(Prop));
1532  }
1533  for (const auto& iter: gid2out_) {
1534  PreSyn* ps = iter.second;
1535  if (ps) {
1536  nout += 1;
1537  int n = ps->dil_.size();
1538  nnet += n;
1539  for (int i=0; i < n; ++i) {
1540  nweight += weightcnt(ps->dil_[i]);
1541  NetCon* nc = ps->dil_[i];
1542  if (nc->target_) { npnt += 1; }
1543  }
1544  }
1545  }
1546 //printf("output Presyn npnt = %ld nweight = %ld\n", npnt, nweight);
1547  for (const auto& iter: gid2in_) {
1548  PreSyn* ps = iter.second;
1549  if (ps) {
1550  nin += 1;
1551  int n = ps->dil_.size();
1552  nnet += n;
1553  for (int i=0; i < n; ++i) {
1554  nweight += weightcnt(ps->dil_[i]);
1555  NetCon* nc = ps->dil_[i];
1556  if (nc->target_) { npnt += 1; }
1557  }
1558  }
1559  }
1560 //printf("after input Presyn total npnt = %ld nweight = %ld\n", npnt, nweight);
1561  ntot = (nin + nout)*sizeof(PreSyn) + nnet*sizeof(NetCon) + nweight*sizeof(double);
1562 // printf("%d rank output Presyn %ld input Presyn %ld NetCon %ld bytes %ld\n",
1563 // nrnmpi_myid, nout, nin, nnet, ntot);
1564  return ntot;
1565 }
1566 
1568  //printf("nrncore_netpar_cellgroups_helper\n");
1569 
1570  // for the real cells in each thread (all have output gid and voltage spike
1571  // detector) fill the cgs output_ps, output_gid, and output_vindex
1572  // All the other (acell) gids have already been processed.
1573  int* gidcnt = new int[nrn_nthread]; // real only
1574  for (int i=0; i < nrn_nthread; ++ i) {
1575  gidcnt[i] = 0;
1576  }
1577 
1578  for (const auto& iter: gid2out_) {
1579  PreSyn* ps = iter.second;
1580  if (ps && ps->thvar_) {
1581  int ith = ps->nt_->id;
1582  assert(ith >= 0 && ith < nrn_nthread);
1583  int i = gidcnt[ith];
1584  cgs[ith].output_ps[i] = ps;
1585  cgs[ith].output_gid[i] = ps->output_index_;
1586  assert(ps->thvar_ >= ps->nt_->_actual_v);
1587  int inode = ps->thvar_ - ps->nt_->_actual_v;
1588  assert(inode <= ps->nt_->end);
1589  cgs[ith].output_vindex[i] = inode;
1590  ++gidcnt[ith];
1591  }
1592  }
1593 #if 0 // allow real cells to NOT have a direct voltage threshold.
1594  for (int i=0; i < nrn_nthread; ++ i) {
1595  assert(nrn_threads[i].ncell == gidcnt[i]);
1596  }
1597 #endif
1598  delete [] gidcnt;
1599 }
1600 
void cell()
Definition: netpar.cpp:1026
static int nrnmpi_int_allmax(int x)
virtual void send(double, NetCvode *, NrnThread *)
Definition: netpar.cpp:233
ms
Definition: extargs.h:1
int cnt_
Definition: netcon.h:109
void outputcell(int)
Definition: netpar.cpp:1055
#define Printf
Definition: model.h:252
#define nrn_assert(ex)
Definition: nrnassrt.h:35
void deliver_events(double til, NrnThread *)
Definition: netcvode.cpp:2978
int _stop_stepping
Definition: multicore.h:67
#define assert(ex)
Definition: hocassrt.h:26
void netpar_solve(double)
Definition: netpar.cpp:1201
void nrnmpi_multisplit_clear()
Definition: multisplit.cpp:500
static double min_interprocessor_delay_
Definition: netpar.cpp:208
double threshold()
Definition: netpar.cpp:1013
int tree_changed
Definition: cabcode.cpp:19
void bgpdma_cleanup_presyn(PreSyn *ps)
Definition: bgpdma.cpp:635
TQItem * event(double tdeliver, DiscreteEvent *, NrnThread *)
Definition: netcvode.cpp:2623
static Gid2PreSyn gid2out_
Definition: netpar.cpp:33
Object ** gid2cell(int)
Definition: netpar.cpp:1117
TQItem * bin_event(double tdeliver, DiscreteEvent *, NrnThread *)
Definition: netcvode.cpp:2600
double netpar_mindelay(double maxdelay)
Definition: netpar.cpp:1356
static double mindelay_
Definition: netpar.cpp:209
Definition: netcon.h:232
void nrn_gidout_iter(PFIO)
Definition: netpar.cpp:1505
if(status)
static int use_phase2_
Definition: bgpdma.cpp:151
int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth)
Definition: netpar.cpp:1428
static void calc_actual_mindelay()
Definition: netpar.cpp:422
bool active_
Definition: netcon.h:110
int cvode_active_
Definition: fadvance.cpp:158
NRNMPI_Spike * spikeout_
#define ITERATE(itm, lst)
Definition: model.h:25
size_t size() const
Definition: ivocvect.h:43
static void bgp_dma_init()
Definition: bgpdma.cpp:413
NrnThread * nt_
Definition: netcon.h:270
int nrn_use_selfqueue_
Definition: netcvode.cpp:92
Symbol * hoc_lookup(const char *)
#define MUTLOCK
Definition: nrnmutdec.h:32
void
Object * nrn_gid2obj(int)
Definition: netpar.cpp:1495
virtual void savestate_write(FILE *)
Definition: netpar.cpp:300
virtual void send(double sendtime, NetCvode *, NrnThread *)
Definition: netcvode.cpp:3165
void * this_pointer
Definition: hocdec.h:231
void ncs2nrn_integrate(double tstop)
Definition: netcvode.cpp:3748
Represent main neuron object computed by single thread.
Definition: multicore.h:58
int gid_exists(int)
Definition: netpar.cpp:1011
void nrn_cleanup_presyn(PreSyn *)
Definition: netpar.cpp:932
int output_index_
Definition: netcon.h:276
void bgp_dma_receive(NrnThread *nt)
Definition: bgpdma.cpp:543
void setup_topology()
Object * obj_
Definition: netcon.h:108
short * nrn_is_artificial_
Definition: init.cpp:231
void psl_append(PreSyn *)
Definition: netcvode.cpp:4592
int diam_changed
Definition: cabcode.cpp:23
check_obj_type(o, "SectionList")
#define MUTUNLOCK
Definition: nrnmutdec.h:33
static int nrnmpi_use
Definition: multisplit.cpp:48
Point_process * target_
Definition: netcon.h:106
int nout_
int ovfl_capacity_
int ithread_
Definition: netcon.h:379
auto end() -> std::vector< double >::iterator
Definition: ivocvect.h:69
Object * obj_
Definition: ivocvect.h:86
static philox4x32_key_t k
Definition: nrnran123.cpp:11
void nrn_spike_exchange_init()
Definition: netpar.cpp:441
NRNMPI_Spike * spikein_
sprintf(buf," if (secondorder) {\ " int _i;\" " for(_i=0;_i< %d;++_i) {\" " _p[_slist%d[_i]]+=dt *_p[_dlist%d[_i]];\" " }}\", numeqn, listnum, listnum)
inode
Definition: multicore.cpp:985
double delay_
Definition: netcon.h:104
virtual void pgvts_deliver(double t, NetCvode *)
Definition: netpar.cpp:273
unsigned char * spfixin_ovfl_
static double set_mindelay(double maxdelay)
Definition: netpar.cpp:1302
double nrn_bgp_receive_time(int)
Definition: netpar.cpp:84
static double dt1_
Definition: netpar.cpp:38
#define e
Definition: passive0.cpp:24
static void bgpdma_cleanup()
Definition: bgpdma.cpp:648
PreSyn * src_
Definition: netcon.h:105
int * nin_
int ag_send_size_
static int timeout_
Definition: netpar.cpp:1193
void bgp_dma_send(PreSyn *ps, double t)
Definition: bgpdma.cpp:624
virtual void savestate_restore(double deliverytime, NetCvode *)
Definition: netpar.cpp:305
int id
Definition: multicore.h:66
static double map(void *v)
Definition: mlinedit.cpp:46
int * output_vindex
Definition: cell_group.h:36
long
Definition: netcvode.cpp:4792
virtual void deliver(double, NetCvode *, NrnThread *)
Definition: netpar.cpp:236
int refcount
Definition: hocdec.h:227
Point_process * ob2pntproc(Object *)
Definition: hocmech.cpp:88
Object ** hoc_temp_objvar(Symbol *symtemp, void *v)
Definition: hoc_oop.cpp:503
void localstep(bool)
Definition: netcvode.cpp:1282
void record(IvocVect *, IvocVect *idvec=nil, int rec_id=0)
Definition: netcvode.cpp:5091
double ws_
Definition: netcon.h:378
int nrn_nthread
Definition: multicore.cpp:44
std::vector< double > & vec()
Definition: ivocvect.h:35
static double cell(void *v)
Definition: ocbbs.cpp:573
int const size_t const size_t n
Definition: nrngsl.h:12
unsigned char * spfixin_
int gid
Definition: nrnmpi.h:18
double threshold_
Definition: netcon.h:262
int nrnmpi_numprocs
static int gid_donot_remove
Definition: netpar.cpp:930
NrnThread * nrn_threads
Definition: multicore.cpp:45
void bgp_dma_setup()
Definition: bgpdma.cpp:716
int nrn_gid_exists(int)
Definition: netpar.cpp:997
#define MUTDEC
Definition: nrnmutdec.h:28
static void nrnmpi_barrier()
hoc_Item * psl_
Definition: netcvode.h:208
#define printf
Definition: mwprefix.h:26
void nrncore_netpar_cellgroups_helper(CellGroup *cgs)
Definition: netpar.cpp:1567
#define VOIDITM(q)
Definition: hoclist.h:68
int
Definition: nrnmusic.cpp:71
double spiketime
Definition: nrnmpi.h:19
void hoc_warning(const char *, const char *)
Object ** gid_connect(int)
Definition: netpar.cpp:1143
virtual DiscreteEvent * savestate_save()
Definition: netpar.cpp:282
void netpar_spanning_statistics(int *, int *, int *, int *)
Definition: netpar.cpp:1364
void nrn_pending_selfqueue(double, NrnThread *)
Definition: netcvode.cpp:3802
static double usable_mindelay_
Definition: netpar.cpp:207
void hoc_execerror(const char *, const char *)
Definition: hoc.cpp:741
int nrnmusic
void(* PFIO)(int, Object *)
Definition: netpar.cpp:61
static Gid2PreSyn gid2in_
Definition: netpar.cpp:34
void v_setup_vectors()
Definition: treeset.cpp:1623
#define nrn_spikebuf_size
Definition: mpispike.h:5
double * thvar_
Definition: netcon.h:264
errno
Definition: system.cpp:98
#define tstopunset
Definition: section.h:329
int is_point_process(Object *)
Definition: point.cpp:405
size_t j
virtual void pr(const char *, double t, NetCvode *)
Definition: netpar.cpp:278
Section * sec
Definition: section.h:262
fprintf(stderr, "Don't know the location of params at %p\, pp)
Definition: model.h:57
void set_gid2node(int, int)
Definition: netpar.cpp:909
int * output_gid
Definition: cell_group.h:35
int v_structure_change
Definition: cvodestb.cpp:99
Definition: section.h:213
#define BGPDMA
Definition: netpar.cpp:80
PreSyn ** output_ps
Definition: cell_group.h:34
static IvocVect * all_spikegidvec
Definition: netpar.cpp:36
Definition: netcon.h:82
static int n_bgp_interval
Definition: netpar.cpp:29
int * nrn_prop_param_size_
Definition: init.cpp:178
void nrn_fake_fire(int gid, double firetime, int fake_out)
Definition: netpar.cpp:867
int gid_
Definition: netcon.h:277
size_t nrncore_netpar_bytes()
Definition: netpar.cpp:1525
Object * nrn_sec2cell(Section *)
Definition: cabcode.cpp:224
double nrnmpi_step_wait_
Definition: ocbbs.cpp:40
int ifarg(int)
Definition: code.cpp:1562
PreSyn * nrn_gid2presyn(int)
Definition: netpar.cpp:1499
Object ** gid2obj(int)
Definition: netpar.cpp:1113
int end
Definition: multicore.h:65
double dt
Definition: init.cpp:123
void nrn_partrans_clear()
Definition: partrans.cpp:845
VEC * cgs(MTX_FN A, void *A_params, VEC *b, VEC *r0, double tol, VEC *x)
Definition: conjgrad.c:179
PreSyn(double *src, Object *osrc, Section *ssrc=nil)
Definition: netcvode.cpp:4912
#define MUTCONSTRUCT(mkmut)
Definition: nrnmutdec.h:30
static int weightcnt(NetCon *nc)
Definition: netpar.cpp:1520
unsigned char * spfixout_
static void nrnmpi_int_allgather(int *s, int *r, int n)
static Object * gid2obj_(int gid)
Definition: netpar.cpp:1100
void * hoc_Erealloc(void *buf, size_t size)
Definition: symbol.cpp:253
static DiscreteEvent * savestate_read(FILE *)
Definition: netpar.cpp:289
double t
Definition: init.cpp:123
int ag_send_nspike_
double _t
Definition: multicore.h:59
static int active_
Definition: netpar.cpp:206
IvocVect * netpar_max_histogram(IvocVect *)
Definition: netpar.cpp:1374
void hoc_malchk()
Definition: symbol.cpp:187
int nrnmpi_myid
void nrn_spike_exchange(NrnThread *nt)
Definition: hocdec.h:226
HocStruct cTemplate * ctemplate
Definition: hocdec.h:151
void replace_src(PreSyn *)
Definition: netcvode.cpp:4740
std::unordered_map< int, PreSyn * > Gid2PreSyn
Definition: netpar.cpp:18
static void nrnmusic_runtime_phase()
Definition: nrnmusic.cpp:219
#define getarg
Definition: hocdec.h:15
NetConPList dil_
Definition: netcon.h:261
static void alloc_space()
Definition: netpar.cpp:892
#define i
Definition: md1redef.h:12
#define MUTDESTRUCT
Definition: nrnmutdec.h:31
#define c
static int nrn_need_npe()
Definition: netpar.cpp:401
void spike_record(int, IvocVect *, IvocVect *)
Definition: netpar.cpp:1064
void recalc_diam()
Definition: treeset.cpp:940
#define TBUF
Definition: netpar.cpp:438
static double nrnmpi_wtime()
Definition: multisplit.cpp:55
int * pnt_receive_size
Definition: init.cpp:173
int vector_capacity(IvocVect *)
sec
Definition: solve.cpp:885
#define arg
Definition: redef.h:28
floor
Definition: extdef.h:3
int nrn_set_timeout(int)
Definition: netpar.cpp:1194
char buf[512]
Definition: init.cpp:13
PreSyn * nrn_gid2outputpresyn(int gid)
Definition: netpar.cpp:1487
static Symbol * netcon_sym_
Definition: netpar.cpp:32
int ovfl_
static IvocVect * all_spiketvec
Definition: netpar.cpp:35
void nrnmpi_gid_clear(int)
Definition: netpar.cpp:950
int localgid_size_
int nrnthread_all_spike_vectors_return(std::vector< double > &spiketvec, std::vector< int > &spikegidvec)
NEURON callback used from CORENEURON to transfer all spike vectors after simulation.
Definition: netpar.cpp:1273
virtual ~NetParEvent()
Definition: netpar.cpp:231
#define MD
Definition: netpar.cpp:13
union Symbol::@18 u
union Object::@54 u
Object ** hoc_temp_objptr(Object *)
Definition: code.cpp:209
double mindelay()
Definition: netcvode.cpp:2967
static double last_maxstep_arg_
Definition: netpar.cpp:210
#define NetParEventType
Definition: netcon.h:51
int buffer_size()
Definition: ivocvect.cpp:1289
size_t q
Object ** hoc_objgetarg(int)
Definition: code.cpp:1568
static double t_exchange_
Definition: netpar.cpp:37
NetCvode * net_cvode_instance
Definition: cvodestb.cpp:27
return NULL
Definition: cabcode.cpp:461
int icapacity_
double chkarg(int, double low, double high)
Definition: code2.cpp:608
static NetParEvent * npe_
Definition: netpar.cpp:211
double * vector_vec(IvocVect *)
static int n_npe_
Definition: netpar.cpp:212
double * _actual_v
Definition: multicore.h:74
double wx_
Definition: netcon.h:378
void * hoc_Emalloc(size_t size)
Definition: symbol.cpp:194