NEURON
netpar.cpp
Go to the documentation of this file.
2 #include <../../nrnconf.h>
3 #include <InterViews/resource.h>
4 #include <math.h>
5 #include <nrnmpi.h>
6 #include <nrnoc2iv.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <unordered_map>
10 #include <bbs.h>
11 
12 #undef MD
13 #define MD 2147483648.
14 
15 class PreSyn;
16 
17 // hash table where buckets are binary search maps
18 using Gid2PreSyn = std::unordered_map<int, PreSyn*>;
19 
20 #include <errno.h>
21 #include <netcon.h>
22 #include <cvodeobj.h>
23 #include <netcvode.h>
24 #include <vector>
25 #include "ivocvect.h"
26 
27 #define BGP_INTERVAL 2
28 #if BGP_INTERVAL == 2
29 static int n_bgp_interval;
30 #endif
31 
37 static double t_exchange_;
38 static double dt1_; // 1/dt
39 static void alloc_space();
40 
42 extern double t, dt;
43 extern int cvode_active_;
44 extern "C" Point_process* ob2pntproc(Object*);
45 extern int nrn_use_selfqueue_;
46 extern void nrn_pending_selfqueue(double, NrnThread*);
47 extern int vector_capacity(IvocVect*); // ivocvect.h conflicts with STL
48 extern double* vector_vec(IvocVect*);
49 extern Object* nrn_sec2cell(Section*);
50 extern void ncs2nrn_integrate(double tstop);
51 extern "C" {
52 extern void nrn_fake_fire(int gid, double firetime, int fake_out);
53 } // extern "C"
54 int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth);
56 int nrn_set_timeout(int);
57 void nrnmpi_gid_clear(int);
58 extern void nrn_partrans_clear();
60 extern double nrn_bgp_receive_time(int);
61 typedef void (*PFIO)(int, Object*);
62 extern void nrn_gidout_iter(PFIO);
63 extern Object* nrn_gid2obj(int);
64 extern PreSyn* nrn_gid2presyn(int);
65 extern int nrn_gid_exists(int);
66 extern double nrnmpi_step_wait_; // barrier at beginning of spike exchange.
67 
68 /**
69  * @brief NEURON callback used from CORENEURON to transfer all spike vectors after simulation
70  *
71  * @param spiketvec - vector of spikes
72  * @param spikegidvec - vector of gids
73  * @return 1 if CORENEURON shall drop writing `out.dat`; 0 otherwise
74  */
75 extern "C" int nrnthread_all_spike_vectors_return(std::vector<double>& spiketvec,
76  std::vector<int>& spikegidvec);
77 
78 // BGPDMA can be 0 or 1
79 // (BGPDMA & 1) > 0 means multisend ISend allowed
80 #if !defined(BGPDMA)
81 #define BGPDMA 0
82 #endif
83 
84 #if BGPDMA == 0
85 double nrn_bgp_receive_time(int) {
86  return 0.;
87 }
88 #endif
89 
90 #if PARANEURON
91 extern void nrnmpi_split_clear();
92 #endif
93 extern void nrnmpi_multisplit_clear();
94 
95 static double set_mindelay(double maxdelay);
96 
97 #if NRNMPI
98 
99 #include "../nrnmpi/mpispike.h"
100 
101 void nrn_timeout(int);
103 extern int nrnmpi_int_allmax(int);
104 extern void nrnmpi_int_allgather(int*, int*, int);
105 void nrn2ncs_outputevent(int netcon_output_index, double firetime);
106 bool nrn_use_compress_; // global due to bbsavestate
107 #define use_compress_ nrn_use_compress_
108 
109 #ifdef USENCS
110 extern int ncs_bgp_sending_info(int**);
111 extern int ncs_bgp_target_hosts(int, int**);
112 extern int ncs_bgp_target_info(int**);
113 extern int ncs_bgp_mindelays(int**, double**);
114 
115 // get minimum delays for all presyn objects in gid2in_
116 int ncs_netcon_mindelays(int** hosts, double** delays) {
117  return ncs_bgp_mindelays(hosts, delays);
118 }
119 
120 double ncs_netcon_localmindelay(int srcgid) {
121  PreSyn* ps = gid2out_.at(srcgid);
122  return ps->mindelay();
123 }
124 
125 // get the number of netcons for an object, if it sends here
126 int ncs_netcon_count(int srcgid, bool localNetCons) {
127  const auto& map = localNetCons ? gid2out_ : gid2in_;
128  const auto& iter = map.find(srcgid);
129  PreSyn* ps{iter != map.end() ? iter->second : nullptr};
130 
131  if (!ps) { // no cells on this cpu receive from the given gid
132  fprintf(stderr, "should never happen!\n");
133  return 0;
134  }
135 
136  return ps->dil_.count();
137 }
138 
139 // inject a spike into the appropriate netcon
140 void ncs_netcon_inject(int srcgid, int netconIndex, double spikeTime, bool localNetCons) {
142  const auto& map = localNetCons ? gid2out_ : gid2in_;
143  const auto& iter = map.find(srcgid);
144  PreSyn* ps{iter != map.end() ? iter->second : nullptr};
145  if (!ps) { // no cells on this cpu receive from the given gid
146  return;
147  }
148 
149  // fprintf( stderr, "gid %d index %d!\n", srcgid, netconIndex );
150  NetCon* d = ps->dil_.item(netconIndex);
151  NrnThread* nt = nrn_threads;
152  if (d->active_ && d->target_) {
153 #if BBTQ == 5
154  ns->bin_event(spikeTime + d->delay_, d, nt);
155 #else
156  ns->event(spikeTime + d->delay_, d, nt);
157 #endif
158  }
159 }
160 
161 int ncs_gid_receiving_info(int** presyngids) {
162  return ncs_bgp_target_info(presyngids);
163 }
164 
165 // given the gid of a cell, retrieve its target count
166 int ncs_gid_sending_count(int** sendlist2build) {
167  return ncs_bgp_sending_info(sendlist2build);
168 }
169 
170 int ncs_target_hosts(int gid, int** targetnodes) {
171  return ncs_bgp_target_hosts(gid, targetnodes);
172 }
173 
174 #endif
175 
176 // for compressed gid info during spike exchange
177 bool nrn_use_localgid_;
178 void nrn_outputevent(unsigned char localgid, double firetime);
179 static std::vector<std::unique_ptr<Gid2PreSyn>> localmaps_;
180 
181 #define NRNSTAT 1
182 static int nsend_, nsendmax_, nrecv_, nrecv_useful_;
183 #if NRNSTAT
184 static IvocVect* max_histogram_;
185 #endif
186 
187 static int ocapacity_; // for spikeout_
188 // require it to be smaller than min_interprocessor_delay.
189 static double wt_; // wait time for nrnmpi_spike_exchange
190 static double wt1_; // time to find the PreSyns and send the spikes.
191 static int spfixout_capacity_;
192 static int idxout_;
193 static void nrn_spike_exchange_compressed(NrnThread*);
194 #endif // NRNMPI
195 
196 #if BGPDMA
197 bool use_bgpdma_; // false: allgather, true: multisend (ISend, bgpdma)
198 static void bgp_dma_setup();
199 static void bgp_dma_init();
200 static void bgp_dma_receive(NrnThread*);
201 extern void bgp_dma_send(PreSyn*, double t);
202 static void bgpdma_cleanup_presyn(PreSyn*);
203 #endif
204 
205 static int active_;
206 static double usable_mindelay_;
208 static double mindelay_; // the one actually used. Some of our optional algorithms
209 static double last_maxstep_arg_;
210 static NetParEvent* npe_; // nrn_nthread of them
211 static int n_npe_; // just to compare with nrn_nthread
212 
213 #if NRNMPI
214 // for combination of threads and mpi.
215 #if USE_PTHREAD
216 static MUTDEC
217 #endif
218  static int seqcnt_;
219 static NrnThread* last_nt_;
220 #endif
221 
222 #if NRN_MUSIC
223 #include "nrnmusic.cpp"
224 #endif
225 
227  wx_ = ws_ = 0.;
228  ithread_ = -1;
229 }
231 void NetParEvent::send(double tt, NetCvode* nc, NrnThread* nt) {
232  nc->event(tt + usable_mindelay_, this, nt);
233 }
234 void NetParEvent::deliver(double tt, NetCvode* nc, NrnThread* nt) {
235  int seq;
236  if (nrn_use_selfqueue_) { // first handle pending flag=1 self events
237  nrn_pending_selfqueue(tt, nt);
238  }
239  // has to be the last event at this time in order to avoid a race
240  // condition with HocEvent that may call things such as pc.barrier
241  // actually allthread HocEvent (cvode.event(tev) and cvode.event(tev,stmt)
242  // will be executed last after a thread join when nrn_allthread_handle
243  // is called.
245  nt->_stop_stepping = 1;
246  nt->_t = tt;
247 #if NRNMPI
248  if (nrnmpi_numprocs > 0) {
249  MUTLOCK
250  seq = ++seqcnt_;
251  MUTUNLOCK
252  if (seq == nrn_nthread) {
253  last_nt_ = nt;
254 #if BGPDMA
255  if (use_bgpdma_) {
256  bgp_dma_receive(nt);
257  } else {
258  nrn_spike_exchange(nt);
259  }
260 #else
261  nrn_spike_exchange(nt);
262 #endif
263  wx_ += wt_;
264  ws_ += wt1_;
265  seqcnt_ = 0;
266  }
267  }
268 #endif
269  send(tt, nc, nt);
270 }
271 void NetParEvent::pgvts_deliver(double tt, NetCvode* nc) {
272  assert(nrn_nthread == 1);
273  deliver(tt, nc, nrn_threads);
274 }
275 
276 void NetParEvent::pr(const char* m, double tt, NetCvode* nc) {
277  Printf("%s NetParEvent %d t=%.15g tt-t=%g\n", m, ithread_, tt, tt - nrn_threads[ithread_]._t);
278 }
279 
281  // pr("savestate_save", 0, net_cvode_instance);
282  NetParEvent* npe = new NetParEvent();
283  npe->ithread_ = ithread_;
284  return npe;
285 }
286 
288  int i;
289  char buf[100];
290  nrn_assert(fgets(buf, 100, f));
291  nrn_assert(sscanf(buf, "%d\n", &i) == 1);
292  // printf("NetParEvent::savestate_read %d\n", i);
293  NetParEvent* npe = new NetParEvent();
294  npe->ithread_ = i;
295  return npe;
296 }
297 
299  // pr("savestate_write", 0, net_cvode_instance);
300  fprintf(f, "%d\n%d\n", NetParEventType, ithread_);
301 }
302 
304 #if NRNMPI
305  if (use_compress_) {
306  t_exchange_ = t;
307  }
308 #endif
309  if (ithread_ == 0) {
310  // npe_->pr("savestate_restore", tt, nc);
311  for (int i = 0; i < nrn_nthread; ++i)
312  if (npe_ + i) {
313  nc->event(tt, npe_ + i, nrn_threads + i);
314  }
315  }
316 }
317 
318 #if NRNMPI
319 inline static void sppk(unsigned char* c, int gid) {
320  for (int i = localgid_size_ - 1; i >= 0; --i) {
321  c[i] = gid & 255;
322  gid >>= 8;
323  }
324 }
325 inline static int spupk(unsigned char* c) {
326  int gid = *c++;
327  for (int i = 1; i < localgid_size_; ++i) {
328  gid <<= 8;
329  gid += *c++;
330  }
331  return gid;
332 }
333 
334 void nrn_outputevent(unsigned char localgid, double firetime) {
335  if (!active_) {
336  return;
337  }
338  MUTLOCK
339  nout_++;
340  int i = idxout_;
341  idxout_ += 2;
342  if (idxout_ >= spfixout_capacity_) {
343  spfixout_capacity_ *= 2;
344  spfixout_ = (unsigned char*) hoc_Erealloc(spfixout_,
345  spfixout_capacity_ * sizeof(unsigned char));
346  hoc_malchk();
347  }
348  spfixout_[i++] = (unsigned char) ((firetime - t_exchange_) * dt1_ + .5);
349  spfixout_[i] = localgid;
350  // printf("%d idx=%d lgid=%d firetime=%g t_exchange_=%g [0]=%d [1]=%d\n", nrnmpi_myid, i,
351  // (int)localgid, firetime, t_exchange_, (int)spfixout_[i-1], (int)spfixout_[i]);
352  MUTUNLOCK
353 }
354 
355 #ifndef USENCS
356 void nrn2ncs_outputevent(int gid, double firetime) {
357  if (!active_) {
358  return;
359  }
360  MUTLOCK
361  if (use_compress_) {
362  nout_++;
363  int i = idxout_;
364  idxout_ += 1 + localgid_size_;
365  if (idxout_ >= spfixout_capacity_) {
366  spfixout_capacity_ *= 2;
367  spfixout_ = (unsigned char*) hoc_Erealloc(spfixout_,
368  spfixout_capacity_ * sizeof(unsigned char));
369  hoc_malchk();
370  }
371  // printf("%d nrnncs_outputevent %d %.20g %.20g %d\n", nrnmpi_myid, gid, firetime,
372  // t_exchange_, (int)((unsigned char)((firetime - t_exchange_)*dt1_ + .5)));
373  spfixout_[i++] = (unsigned char) ((firetime - t_exchange_) * dt1_ + .5);
374  // printf("%d idx=%d firetime=%g t_exchange_=%g spfixout=%d\n", nrnmpi_myid, i, firetime,
375  // t_exchange_, (int)spfixout_[i-1]);
376  sppk(spfixout_ + i, gid);
377  // printf("%d idx=%d gid=%d spupk=%d\n", nrnmpi_myid, i, gid, spupk(spfixout_+i));
378  } else {
379 #if nrn_spikebuf_size == 0
380  int i = nout_++;
381  if (i >= ocapacity_) {
382  ocapacity_ *= 2;
383  spikeout_ = (NRNMPI_Spike*) hoc_Erealloc(spikeout_, ocapacity_ * sizeof(NRNMPI_Spike));
384  hoc_malchk();
385  }
386  // printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime);
387  spikeout_[i].gid = gid;
388  spikeout_[i].spiketime = firetime;
389 #else
390  int i = nout_++;
391  if (i >= nrn_spikebuf_size) {
392  i -= nrn_spikebuf_size;
393  if (i >= ocapacity_) {
394  ocapacity_ *= 2;
396  ocapacity_ * sizeof(NRNMPI_Spike));
397  hoc_malchk();
398  }
399  spikeout_[i].gid = gid;
400  spikeout_[i].spiketime = firetime;
401  } else {
402  spbufout_->gid[i] = gid;
403  spbufout_->spiketime[i] = firetime;
404  }
405 #endif
406  }
407  MUTUNLOCK
408  // printf("%d cell %d in slot %d fired at %g\n", nrnmpi_myid, gid, i, firetime);
409 }
410 #endif // USENCS
411 #endif // NRNMPI
412 
413 static int nrn_need_npe() {
414  int b = 0;
415  if (active_) {
416  b = 1;
417  }
418  if (nrn_use_selfqueue_) {
419  b = 1;
420  }
421  if (nrn_nthread > 1) {
422  b = 1;
423  }
424  if (b) {
425  if (last_maxstep_arg_ == 0) {
426  last_maxstep_arg_ = 100.;
427  }
429  } else {
430  if (npe_) {
431  delete[] npe_;
432  npe_ = NULL;
433  n_npe_ = 0;
434  }
435  }
436  return b;
437 }
438 
439 static void calc_actual_mindelay() {
440  // reasons why mindelay_ can be smaller than min_interprocessor_delay
441  // are use_bgpdma_ when BGP_INTERVAL == 2
442 #if BGPDMA && (BGP_INTERVAL == 2)
443  if (use_bgpdma_ && n_bgp_interval == 2) {
445  } else {
447  }
448 #endif
449 }
450 
451 #if BGPDMA
452 #include "bgpdma.cpp"
453 #else
454 #define TBUFSIZE 0
455 #define TBUF /**/
456 #endif
457 
459  if (nrnmpi_step_wait_ >= 0.0) {
460  nrnmpi_step_wait_ = 0.0;
461  }
462 #ifdef USENCS
463  bgp_dma_setup();
464  return;
465 #endif
466  // printf("nrn_spike_exchange_init\n");
467  if (!nrn_need_npe()) {
468  return;
469  }
470  // if (!active_ && !nrn_use_selfqueue_) { return; }
471  alloc_space();
472  // printf("nrnmpi_use=%d active=%d\n", nrnmpi_use, active_);
475  if (cvode_active_ == 0 && nrn_nthread > 1) {
476  usable_mindelay_ -= dt;
477  }
478  if ((usable_mindelay_ < 1e-9) || (cvode_active_ == 0 && usable_mindelay_ < dt)) {
479  if (nrnmpi_myid == 0) {
480  hoc_execerror("usable mindelay is 0", "(or less than dt for fixed step method)");
481  } else {
482  return;
483  }
484  }
485 
486 #if NRN_MUSIC
487  if (nrnmusic) {
489  }
490 #endif
491 
492 #if TBUFSIZE
493  itbuf_ = 0;
494 #endif
495 
496 #if BGPDMA
497  if (use_bgpdma_) {
498  bgp_dma_init();
499  }
500 #endif
501 
502  if (n_npe_ != nrn_nthread) {
503  if (npe_) {
504  delete[] npe_;
505  }
508  }
509  for (int i = 0; i < nrn_nthread; ++i) {
510  npe_[i].ithread_ = i;
511  npe_[i].wx_ = 0.;
512  npe_[i].ws_ = 0.;
514  }
515 #if NRNMPI
516  if (use_compress_) {
517  idxout_ = 2;
518  t_exchange_ = t;
519  dt1_ = 1. / dt;
520  usable_mindelay_ = floor(mindelay_ * dt1_ + 1e-9) * dt;
522  } else {
523 #if nrn_spikebuf_size > 0
524  if (spbufout_) {
525  spbufout_->nspike = 0;
526  }
527 #endif
528  }
529  nout_ = 0;
530  nsend_ = nsendmax_ = nrecv_ = nrecv_useful_ = 0;
531  if (nrnmpi_numprocs > 0) {
532  if (nrn_nthread > 0) {
533 #if USE_PTHREAD
534  if (!mut_) {
535  MUTCONSTRUCT(1)
536  }
537 #endif
538  } else {
540  }
541  }
542 #endif // NRNMPI
543  // if (nrnmpi_myid == 0){printf("usable_mindelay_ = %g\n", usable_mindelay_);}
544 }
545 
546 #if NRNMPI
547 void nrn_spike_exchange(NrnThread* nt) {
548  nrn::Instrumentor::phase p_spike_exchange("spike-exchange");
549  if (!active_) {
550  return;
551  }
552 #if BGPDMA
553  if (use_bgpdma_) {
554  bgp_dma_receive(nt);
555  return;
556  }
557 #endif
558  if (use_compress_) {
559  nrn_spike_exchange_compressed(nt);
560  return;
561  }
562  TBUF
563 #if TBUFSIZE
564  nrnmpi_barrier();
565 #endif
566  TBUF double wt;
567  int i, n;
568 #if NRNSTAT
569  nsend_ += nout_;
570  if (nsendmax_ < nout_) {
571  nsendmax_ = nout_;
572  }
573 #endif
574 #if nrn_spikebuf_size > 0
575  spbufout_->nspike = nout_;
576 #endif
577  wt = nrnmpi_wtime();
578  if (nrnmpi_step_wait_ >= 0.) {
579  nrnmpi_barrier();
581  }
582  n = nrnmpi_spike_exchange();
583  wt_ = nrnmpi_wtime() - wt;
584  wt = nrnmpi_wtime();
585  TBUF
586 #if TBUFSIZE
587  tbuf_[itbuf_++] = (unsigned long) nout_;
588  tbuf_[itbuf_++] = (unsigned long) n;
589 #endif
590 
591  errno = 0;
592  // if (n > 0) {
593  // printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout_, n);
594  //}
595  nout_ = 0;
596  if (n == 0) {
597 #if NRNSTAT
598  if (max_histogram_) {
599  vector_vec(max_histogram_)[0] += 1.;
600  }
601 #endif
602  TBUF return;
603  }
604 #if NRNSTAT
605  nrecv_ += n;
606  if (max_histogram_) {
607  int mx = 0;
608  if (n > 0) {
609  for (i = nrnmpi_numprocs - 1; i >= 0; --i) {
610 #if nrn_spikebuf_size == 0
611  if (mx < nin_[i]) {
612  mx = nin_[i];
613  }
614 #else
615  if (mx < spbufin_[i].nspike) {
616  mx = spbufin_[i].nspike;
617  }
618 #endif
619  }
620  }
621  int ms = vector_capacity(max_histogram_) - 1;
622  mx = (mx < ms) ? mx : ms;
623  vector_vec(max_histogram_)[mx] += 1.;
624  }
625 #endif // NRNSTAT
626 #if nrn_spikebuf_size > 0
627  for (i = 0; i < nrnmpi_numprocs; ++i) {
628  int j;
629  int nn = spbufin_[i].nspike;
630  if (nn > nrn_spikebuf_size) {
631  nn = nrn_spikebuf_size;
632  }
633  for (j = 0; j < nn; ++j) {
634  auto iter = gid2in_.find(spbufin_[i].gid[j]);
635  if (iter != gid2in_.end()) {
636  PreSyn* ps = iter->second;
637  ps->send(spbufin_[i].spiketime[j], net_cvode_instance, nt);
638 #if NRNSTAT
639  ++nrecv_useful_;
640 #endif
641  }
642  }
643  }
644  n = ovfl_;
645 #endif // nrn_spikebuf_size > 0
646  for (i = 0; i < n; ++i) {
647  auto iter = gid2in_.find(spikein_[i].gid);
648  if (iter != gid2in_.end()) {
649  PreSyn* ps = iter->second;
650  ps->send(spikein_[i].spiketime, net_cvode_instance, nt);
651 #if NRNSTAT
652  ++nrecv_useful_;
653 #endif
654  }
655  }
656  wt1_ = nrnmpi_wtime() - wt;
657  TBUF
658 }
659 
660 void nrn_spike_exchange_compressed(NrnThread* nt) {
661  if (!active_) {
662  return;
663  }
664  TBUF
665 #if TBUFSIZE
666  nrnmpi_barrier();
667 #endif
669  double wt;
670  int i, n, idx;
671 #if NRNSTAT
672  nsend_ += nout_;
673  if (nsendmax_ < nout_) {
674  nsendmax_ = nout_;
675  }
676 #endif
677  assert(nout_ < 0x10000);
678  spfixout_[1] = (unsigned char) (nout_ & 0xff);
679  spfixout_[0] = (unsigned char) (nout_ >> 8);
680 
681  wt = nrnmpi_wtime();
682  if (nrnmpi_step_wait_ >= 0.) {
683  nrnmpi_barrier();
685  }
686  n = nrnmpi_spike_exchange_compressed();
687  wt_ = nrnmpi_wtime() - wt;
688  wt = nrnmpi_wtime();
689  TBUF
690 #if TBUFSIZE
691  tbuf_[itbuf_++] = (unsigned long) nout_;
692  tbuf_[itbuf_++] = (unsigned long) n;
693 #endif
694  errno = 0;
695  // if (n > 0) {
696  // printf("%d nrn_spike_exchange sent %d received %d\n", nrnmpi_myid, nout_, n);
697  //}
698  nout_ = 0;
699  idxout_ = 2;
700  if (n == 0) {
701 #if NRNSTAT
702  if (max_histogram_) {
703  vector_vec(max_histogram_)[0] += 1.;
704  }
705 #endif
707  TBUF return;
708  }
709 #if NRNSTAT
710  nrecv_ += n;
711  if (max_histogram_) {
712  int mx = 0;
713  if (n > 0) {
714  for (i = nrnmpi_numprocs - 1; i >= 0; --i) {
715  if (mx < nin_[i]) {
716  mx = nin_[i];
717  }
718  }
719  }
720  int ms = vector_capacity(max_histogram_) - 1;
721  mx = (mx < ms) ? mx : ms;
722  vector_vec(max_histogram_)[mx] += 1.;
723  }
724 #endif // NRNSTAT
725  if (nrn_use_localgid_) {
726  int idxov = 0;
727  for (i = 0; i < nrnmpi_numprocs; ++i) {
728  int j, nnn;
729  int nn = nin_[i];
730  if (nn) {
731  if (i == nrnmpi_myid) { // skip but may need to increment idxov.
732  if (nn > ag_send_nspike_) {
733  idxov += (nn - ag_send_nspike_) * (1 + localgid_size_);
734  }
735  continue;
736  }
737  Gid2PreSyn* gps = localmaps_[i].get();
738  if (nn > ag_send_nspike_) {
739  nnn = ag_send_nspike_;
740  } else {
741  nnn = nn;
742  }
743  idx = 2 + i * ag_send_size_;
744  for (j = 0; j < nnn; ++j) {
745  // order is (firetime,gid) pairs.
746  double firetime = spfixin_[idx++] * dt + t_exchange_;
747  int lgid = (int) spfixin_[idx];
748  idx += localgid_size_;
749  auto iter = gps->find(lgid);
750  if (iter != gps->end()) {
751  PreSyn* ps = iter->second;
752  ps->send(firetime + 1e-10, net_cvode_instance, nt);
753 #if NRNSTAT
754  ++nrecv_useful_;
755 #endif
756  }
757  }
758  for (; j < nn; ++j) {
759  double firetime = spfixin_ovfl_[idxov++] * dt + t_exchange_;
760  int lgid = (int) spfixin_ovfl_[idxov];
761  idxov += localgid_size_;
762  auto iter = gps->find(lgid);
763  if (iter != gps->end()) {
764  PreSyn* ps = iter->second;
765  ps->send(firetime + 1e-10, net_cvode_instance, nt);
766 #if NRNSTAT
767  ++nrecv_useful_;
768 #endif
769  }
770  }
771  }
772  }
773  } else {
774  for (i = 0; i < nrnmpi_numprocs; ++i) {
775  int j;
776  int nn = nin_[i];
777  if (nn > ag_send_nspike_) {
778  nn = ag_send_nspike_;
779  }
780  idx = 2 + i * ag_send_size_;
781  for (j = 0; j < nn; ++j) {
782  // order is (firetime,gid) pairs.
783  double firetime = spfixin_[idx++] * dt + t_exchange_;
784  int gid = spupk(spfixin_ + idx);
785  idx += localgid_size_;
786  auto iter = gid2in_.find(gid);
787  if (iter != gid2in_.end()) {
788  PreSyn* ps = iter->second;
789  ps->send(firetime + 1e-10, net_cvode_instance, nt);
790 #if NRNSTAT
791  ++nrecv_useful_;
792 #endif
793  }
794  }
795  }
796  n = ovfl_;
797  idx = 0;
798  for (i = 0; i < n; ++i) {
799  double firetime = spfixin_ovfl_[idx++] * dt + t_exchange_;
800  int gid = spupk(spfixin_ovfl_ + idx);
801  idx += localgid_size_;
802  auto iter = gid2in_.find(gid);
803  if (iter != gid2in_.end()) {
804  PreSyn* ps = iter->second;
805  ps->send(firetime + 1e-10, net_cvode_instance, nt);
806 #if NRNSTAT
807  ++nrecv_useful_;
808 #endif
809  }
810  }
811  }
813  wt1_ = nrnmpi_wtime() - wt;
814  TBUF
815 }
816 
817 static void mk_localgid_rep() {
818  // how many gids are there on this machine
819  // and can they be compressed into one byte
820  int ngid = 0;
821  for (const auto& iter: gid2out_) {
822  PreSyn* ps = iter.second;
823  if (ps->output_index_ >= 0) {
824  ++ngid;
825  }
826  }
827  int ngidmax = nrnmpi_int_allmax(ngid);
828  if (ngidmax > 256) {
829  // do not compress
830  return;
831  }
832  localgid_size_ = sizeof(unsigned char);
833  nrn_use_localgid_ = true;
834 
835  // allocate Allgather receive buffer (send is the nrnmpi_myid one)
836  int* rbuf = new int[nrnmpi_numprocs * (ngidmax + 1)];
837  int* sbuf = new int[ngidmax + 1];
838 
839  sbuf[0] = ngid;
840  ++sbuf;
841  ngid = 0;
842  // define the local gid and fill with the gids on this machine
843  for (const auto& iter: gid2out_) {
844  PreSyn* ps = iter.second;
845  if (ps->output_index_ >= 0) {
846  ps->localgid_ = (unsigned char) ngid;
847  sbuf[ngid] = ps->output_index_;
848  ++ngid;
849  }
850  }
851  --sbuf;
852 
853  // exchange everything
854  nrnmpi_int_allgather(sbuf, rbuf, ngidmax + 1);
855  delete[] sbuf;
856  errno = 0;
857 
858  // create the maps
859  // there is a lot of potential for efficiency here. i.e. use of
860  // perfect hash functions, or even simple Vectors.
861  localmaps_.clear();
862  localmaps_.resize(nrnmpi_numprocs);
863 
864  for (int i = 0; i < nrnmpi_numprocs; ++i)
865  if (i != nrnmpi_myid) {
866  localmaps_[i].reset(new Gid2PreSyn());
867  }
868 
869  // fill in the maps
870  for (int i = 0; i < nrnmpi_numprocs; ++i)
871  if (i != nrnmpi_myid) {
872  sbuf = rbuf + i * (ngidmax + 1);
873  ngid = *(sbuf++);
874  for (int k = 0; k < ngid; ++k) {
875  auto iter = gid2in_.find(int(sbuf[k]));
876  if (iter != gid2in_.end()) {
877  PreSyn* ps = iter->second;
878  (*localmaps_[i])[k] = ps;
879  }
880  }
881  }
882 
883  // cleanup
884  delete[] rbuf;
885 }
886 
887 #endif // NRNMPI
888 
889 // may stimulate a gid for a cell not owned by this cpu. This allows
890 // us to run single cells or subnets and stimulate exactly according to
891 // their input in a full parallel net simulation.
892 // For some purposes, it may be useful to simulate a spike from a
893 // cell that does exist and would normally send its own spike, eg.
894 // recurrent stimulation. This can be useful in debugging where the
895 // spike raster comes from another implementation and one wants to
896 // get complete control of all input spikes without the confounding
897 // effects of output spikes from the simulated cells. In this case
898 // set the third arg to 1 and set the output cell thresholds very
899 // high so that they do not themselves generate spikes.
900 // The remaining possibility is fake_out=2 which only does a send for
901 // the gids owned by this cpu. This, followed by a nrn_spike_exchange(),
902 // ensures that all the target cells, regardless of what rank they are on
903 // will get the spike delivered and nobody gets it twice.
904 
905 extern "C" void nrn_fake_fire(int gid, double spiketime, int fake_out) {
906  PreSyn* ps{nullptr};
907  if (fake_out < 2) {
908  auto iter = gid2in_.find(gid);
909  if (iter != gid2in_.end()) {
910  ps = iter->second;
911  // printf("nrn_fake_fire %d %g\n", gid, spiketime);
912  ps->send(spiketime, net_cvode_instance, nrn_threads);
913 #if NRNSTAT
914  ++nrecv_useful_;
915 #endif
916  }
917  } else if (fake_out && !ps) {
918  auto iter = gid2out_.find(gid);
919  if (iter != gid2out_.end()) {
920  ps = iter->second;
921  // printf("nrn_fake_fire fake_out %d %g\n", gid, spiketime);
922  ps->send(spiketime, net_cvode_instance, nrn_threads);
923 #if NRNSTAT
924  ++nrecv_useful_;
925 #endif
926  }
927  }
928 }
929 
930 static void alloc_space() {
931  if (!netcon_sym_) {
932  netcon_sym_ = hoc_lookup("NetCon");
933 #if NRNMPI
934  ocapacity_ = 100;
935  spikeout_ = (NRNMPI_Spike*) hoc_Emalloc(ocapacity_ * sizeof(NRNMPI_Spike));
936  hoc_malchk();
937  icapacity_ = 100;
939  hoc_malchk();
940  nin_ = (int*) hoc_Emalloc(nrnmpi_numprocs * sizeof(int));
941  hoc_malchk();
942 #if nrn_spikebuf_size > 0
943  spbufout_ = (NRNMPI_Spikebuf*) hoc_Emalloc(sizeof(NRNMPI_Spikebuf));
944  hoc_malchk();
945  spbufin_ = (NRNMPI_Spikebuf*) hoc_Emalloc(nrnmpi_numprocs * sizeof(NRNMPI_Spikebuf));
946  hoc_malchk();
947 #endif
948 #endif
949  }
950 }
951 
952 void BBS::set_gid2node(int gid, int nid) {
953  alloc_space();
954 #if NRNMPI
955  if (nid == nrnmpi_myid) {
956 #else
957  {
958 #endif
959  // printf("gid %d defined on %d\n", gid, nrnmpi_myid);
960  char m[200];
961  if (gid2in_.find(gid) != gid2in_.end()) {
962  sprintf(m, "gid=%d already exists as an input port", gid);
964  m, "Setup all the output ports on this process before using them as input ports.");
965  }
966  if (gid2out_.find(gid) != gid2out_.end()) {
967  sprintf(m, "gid=%d already exists on this process as an output port", gid);
968  hoc_execerror(m, 0);
969  }
970  gid2out_[gid] = nullptr;
971  }
972 }
973 
974 static int gid_donot_remove = 0; // avoid gid2in_, gid2out removal when iterating
975 
977 #if BGPDMA
979 #endif
980  if (gid_donot_remove) {
981  return;
982  }
983  if (ps->output_index_ >= 0) {
984  gid2out_.erase(ps->output_index_);
985  ps->output_index_ = -1;
986  ps->gid_ = -1;
987  }
988  if (ps->gid_ >= 0) {
989  gid2in_.erase(ps->gid_);
990  ps->gid_ = -1;
991  }
992 }
993 
995  if (arg == 0 || arg == 3 || arg == 4) {
997 #if PARANEURON
998  nrnmpi_split_clear();
999 #endif
1000  }
1001  if (arg == 0 || arg == 2 || arg == 4) {
1003  }
1004  if (arg == 2 || arg == 3) {
1005  return;
1006  }
1007  gid_donot_remove = 1;
1008  for (const auto& iter: gid2out_) {
1009  PreSyn* ps = iter.second;
1010  if (ps && gid2in_.find(ps->gid_) == gid2in_.end()) {
1011  if (arg == 4) {
1012  delete ps;
1013  } else {
1014 #if BGPDMA
1016 #endif
1017  ps->gid_ = -1;
1018  ps->output_index_ = -1;
1019  if (ps->dil_.empty()) {
1020  delete ps;
1021  }
1022  }
1023  }
1024  }
1025  for (const auto& iter: gid2in_) {
1026  PreSyn* ps = iter.second;
1027  if (arg == 4) {
1028  delete ps;
1029  } else {
1030 #if BGPDMA
1032 #endif
1033  ps->gid_ = -1;
1034  ps->output_index_ = -1;
1035  if (ps->dil_.empty()) {
1036  delete ps;
1037  }
1038  }
1039  }
1040  gid_donot_remove = 0;
1041  gid2in_.clear();
1042  gid2out_.clear();
1043 }
1044 
1045 int nrn_gid_exists(int gid) {
1046  alloc_space();
1047  auto iter = gid2out_.find(gid);
1048  if (iter != gid2out_.end()) {
1049  PreSyn* ps = iter->second;
1050  // printf("%d gid %d exists\n", nrnmpi_myid, gid);
1051  if (ps) {
1052  return (ps->output_index_ >= 0 ? 3 : 2);
1053  } else {
1054  return 1;
1055  }
1056  }
1057  return 0;
1058 }
1059 int BBS::gid_exists(int gid) {
1060  return nrn_gid_exists(gid);
1061 }
1062 
1063 double BBS::threshold() {
1064  int gid = int(chkarg(1, 0., MD));
1065  auto iter = gid2out_.find(gid);
1066  if (iter == gid2out_.end() || iter->second == NULL) {
1067  hoc_execerror("gid not associated with spike generation location", 0);
1068  }
1069  PreSyn* ps = iter->second;
1070  if (ifarg(2)) {
1071  ps->threshold_ = *getarg(2);
1072  }
1073  return ps->threshold_;
1074 }
1075 
1076 void BBS::cell() {
1077  int gid = int(chkarg(1, 0., MD));
1078  alloc_space();
1079  if (gid2in_.find(gid) != gid2in_.end()) {
1080  char buf[100];
1081  sprintf(buf, "gid=%d is in the input list. Must register prior to connecting", gid);
1082  hoc_execerror(buf, 0);
1083  }
1084  if (gid2out_.find(gid) == gid2out_.end()) {
1085  char buf[100];
1086  sprintf(buf, "gid=%d has not been set on rank %d", gid, nrnmpi_myid);
1087  hoc_execerror(buf, 0);
1088  }
1089  Object* ob = *hoc_objgetarg(2);
1090  if (!ob || ob->ctemplate != netcon_sym_->u.ctemplate) {
1091  check_obj_type(ob, "NetCon");
1092  }
1093  NetCon* nc = (NetCon*) ob->u.this_pointer;
1094  PreSyn* ps = nc->src_;
1095  // printf("%d cell %d %s\n", nrnmpi_myid, gid, hoc_object_name(ps->ssrc_ ?
1096  // nrn_sec2cell(ps->ssrc_) : ps->osrc_));
1097  gid2out_[gid] = ps;
1098  ps->gid_ = gid;
1099  if (ifarg(3) && !chkarg(3, 0., 1.)) {
1100  ps->output_index_ = -2; // prevents destruction of PreSyn
1101  } else {
1102  ps->output_index_ = gid;
1103  }
1104 }
1105 
1106 void BBS::outputcell(int gid) {
1107  auto iter = gid2out_.find(gid);
1108  nrn_assert(iter != gid2out_.end());
1109  PreSyn* ps = iter->second;
1110  assert(ps);
1111  ps->output_index_ = gid;
1112  ps->gid_ = gid;
1113 }
1114 
1115 void BBS::spike_record(int gid, IvocVect* spikevec, IvocVect* gidvec) {
1116  if (gid >= 0) {
1117  all_spiketvec = NULL, all_spikegidvec = NULL; // invalidate global spike vectors
1118 
1119  auto iter = gid2out_.find(gid);
1120  nrn_assert(iter != gid2out_.end());
1121  PreSyn* ps = iter->second;
1122  assert(ps);
1123  ps->record(spikevec, gidvec, gid);
1124  } else { // record all output spikes.
1125  all_spiketvec = spikevec,
1126  all_spikegidvec = gidvec; // track global spike vectors (optimisation)
1127  for (const auto& iter: gid2out_) {
1128  PreSyn* ps = iter.second;
1129  if (ps->output_index_ >= 0) {
1131  }
1132  }
1133  }
1134 }
1135 
1136 void BBS::spike_record(IvocVect* gids, IvocVect* spikevec, IvocVect* gidvec) {
1137  int sz = vector_capacity(gids);
1138  // invalidate global spike vectors
1139  all_spiketvec = nullptr;
1140  all_spikegidvec = nullptr;
1141  double* pd = vector_vec(gids);
1142  for (int i = 0; i < sz; ++i) {
1143  int gid = int(pd[i]);
1144  auto iter = gid2out_.find(gid);
1145  nrn_assert(iter != gid2out_.end());
1146  PreSyn* ps = iter->second;
1147  assert(ps);
1148  ps->record(spikevec, gidvec, gid);
1149  }
1150 }
1151 
1152 static Object* gid2obj_(int gid) {
1153  Object* cell = 0;
1154  // printf("%d gid2obj gid=%d\n", nrnmpi_myid, gid);
1155  auto iter = gid2out_.find(gid);
1156  nrn_assert(iter != gid2out_.end());
1157  PreSyn* ps = iter->second;
1158  // printf(" found\n");
1159  assert(ps);
1160  cell = ps->ssrc_ ? nrn_sec2cell(ps->ssrc_) : ps->osrc_;
1161  // printf(" return %s\n", hoc_object_name(cell));
1162  return cell;
1163 }
1164 
1165 Object** BBS::gid2obj(int gid) {
1166  return hoc_temp_objptr(gid2obj_(gid));
1167 }
1168 
1169 Object** BBS::gid2cell(int gid) {
1170  Object* cell = 0;
1171  // printf("%d gid2obj gid=%d\n", nrnmpi_myid, gid);
1172  auto iter = gid2out_.find(gid);
1173  nrn_assert(iter != gid2out_.end());
1174  PreSyn* ps = iter->second;
1175  // printf(" found\n");
1176  assert(ps);
1177  if (ps->ssrc_) {
1178  cell = nrn_sec2cell(ps->ssrc_);
1179  } else {
1180  cell = ps->osrc_;
1181  // but if it is a POINT_PROCESS in a section
1182  // that is inside an object ... (probably has a WATCH statement)
1183  Section* sec = ob2pntproc(cell)->sec;
1184  Object* c2;
1185  if (sec && (c2 = nrn_sec2cell(sec))) {
1186  if (c2) {
1187  cell = c2;
1188  }
1189  }
1190  }
1191  // printf(" return %s\n", hoc_object_name(cell));
1192  return hoc_temp_objptr(cell);
1193 }
1194 
1196  Object* target = *hoc_objgetarg(2);
1197  if (!is_point_process(target)) {
1198  hoc_execerror("arg 2 must be a point process", 0);
1199  }
1200  alloc_space();
1201  PreSyn* ps;
1202  auto iter_out = gid2out_.find(gid);
1203  if (iter_out != gid2out_.end()) {
1204  // the gid is owned by this machine so connect directly
1205  ps = iter_out->second;
1206  if (!ps) {
1207  char buf[100];
1208  sprintf(buf, "gid %d owned by %d but no associated cell", gid, nrnmpi_myid);
1209  hoc_execerror(buf, 0);
1210  }
1211  } else {
1212  auto iter_in = gid2in_.find(gid);
1213  if (iter_in != gid2in_.end()) {
1214  // the gid stub already exists
1215  ps = iter_in->second;
1216  // printf("%d connect %s from already existing %d\n", nrnmpi_myid,
1217  // hoc_object_name(target), gid);
1218  } else {
1219  // printf("%d connect %s from new PreSyn for %d\n", nrnmpi_myid,
1220  // hoc_object_name(target), gid);
1221  ps = new PreSyn(NULL, NULL, NULL);
1223  gid2in_[gid] = ps;
1224  ps->gid_ = gid;
1225  }
1226  }
1227  NetCon* nc;
1228  Object** po;
1229  if (ifarg(3)) {
1230  po = hoc_objgetarg(3);
1231  if (!*po || (*po)->ctemplate != netcon_sym_->u.ctemplate) {
1232  check_obj_type(*po, "NetCon");
1233  }
1234  nc = (NetCon*) ((*po)->u.this_pointer);
1235  if (nc->target_ != ob2pntproc(target)) {
1236  hoc_execerror("target is different from 3rd arg NetCon target", 0);
1237  }
1238  nc->replace_src(ps);
1239  } else {
1240  nc = new NetCon(ps, target);
1241  po = hoc_temp_objvar(netcon_sym_, nc);
1242  nc->obj_ = *po;
1243  }
1244  return po;
1245 }
1246 
1247 static int timeout_ = 20;
1248 int nrn_set_timeout(int timeout) {
1249  int tt;
1250  tt = timeout_;
1251  timeout_ = timeout;
1252  return tt;
1253 }
1254 
1255 void BBS::netpar_solve(double tstop) {
1256  // temporary check to be eventually replaced by verify_structure()
1258  if (tree_changed) {
1259  setup_topology();
1260  }
1261  if (v_structure_change) {
1262  v_setup_vectors();
1263  }
1264  if (diam_changed) {
1265  recalc_diam();
1266  }
1267  // if cvode_active, and anything at all has changed, should call re_init
1268 
1269 #if NRNMPI
1270  double mt, md;
1271  tstopunset;
1272  if (cvode_active_) {
1273  mt = 1e-9;
1274  md = mindelay_;
1275  } else {
1276  mt = dt;
1277  md = mindelay_ - 1e-10;
1278  }
1279  if (md < mt) {
1280  if (nrnmpi_myid == 0) {
1281  hoc_execerror("mindelay is 0", "(or less than dt for fixed step method)");
1282  } else {
1283  return;
1284  }
1285  }
1286  double wt;
1287 
1288  nrnmpi_barrier(); // make sure all integrations start about the same
1289  nrn_timeout(timeout_); // time to avoid spurious timeouts while waiting
1290  // at the next MPI_collective.
1291  wt = nrnmpi_wtime();
1292  if (cvode_active_) {
1293  ncs2nrn_integrate(tstop);
1294  } else {
1295  ncs2nrn_integrate(tstop * (1. + 1e-11));
1296  }
1297  impl_->integ_time_ += nrnmpi_wtime() - wt;
1298  impl_->integ_time_ -= (npe_ ? (npe_[0].wx_ + npe_[0].ws_) : 0.);
1299 #if BGPDMA
1300  if (use_bgpdma_) {
1301 #if BGP_INTERVAL == 2
1302  for (int i = 0; i < n_bgp_interval; ++i) {
1304  }
1305 #else
1307 #endif
1308  } else {
1310  }
1311 #else
1313 #endif
1314  nrn_timeout(0);
1315  impl_->wait_time_ += wt_;
1316  impl_->send_time_ += wt1_;
1317  if (npe_) {
1318  impl_->wait_time_ += npe_[0].wx_;
1319  impl_->send_time_ += npe_[0].ws_;
1320  npe_[0].wx_ = npe_[0].ws_ = 0.;
1321  };
1322 // printf("%d netpar_solve exit t=%g tstop=%g mindelay_=%g\n",nrnmpi_myid, t, tstop, mindelay_);
1323 #else // not NRNMPI
1324  ncs2nrn_integrate(tstop);
1325 #endif
1326  tstopunset;
1327 }
1328 
1329 int nrnthread_all_spike_vectors_return(std::vector<double>& spiketvec,
1330  std::vector<int>& spikegidvec) {
1331  assert(spiketvec.size() == spikegidvec.size());
1332  if (spiketvec.size()) {
1333  /* Optimisation:
1334  * if all_spiketvec and all_spikegidvec are set (pc.spike_record with gid=-1 was called)
1335  * and they are still reference counted (obj_->refcount), then copy over incoming vectors.
1336  */
1337  if (all_spiketvec != NULL && all_spiketvec->obj_ != NULL &&
1340  all_spiketvec->buffer_size(spiketvec.size() + all_spiketvec->size());
1341  all_spikegidvec->buffer_size(spikegidvec.size() + all_spikegidvec->size());
1342  all_spiketvec->vec().insert(all_spiketvec->end(), spiketvec.begin(), spiketvec.end());
1343  all_spikegidvec->vec().insert(all_spikegidvec->end(),
1344  spikegidvec.begin(),
1345  spikegidvec.end());
1346 
1347  } else { // different underlying vectors for PreSyns
1348  for (size_t i = 0; i < spikegidvec.size(); ++i) {
1349  auto iter = gid2out_.find(spikegidvec[i]);
1350  if (iter != gid2out_.end()) {
1351  PreSyn* ps = iter->second;
1352  ps->record(spiketvec[i]);
1353  }
1354  }
1355  }
1356  }
1357  return 1;
1358 }
1359 
1360 
1361 static double set_mindelay(double maxdelay) {
1362  double mindelay = maxdelay;
1363  last_maxstep_arg_ = maxdelay;
1365  hoc_Item* q;
1366  if (net_cvode_instance->psl_)
1368  PreSyn* ps = (PreSyn*) VOIDITM(q);
1369  double md = ps->mindelay();
1370  if (mindelay > md) {
1371  mindelay = md;
1372  }
1373  }
1374  }
1375 #if NRNMPI
1376  else {
1377  for (const auto& iter: gid2in_) {
1378  PreSyn* ps = iter.second;
1379  double md = ps->mindelay();
1380  if (mindelay > md) {
1381  mindelay = md;
1382  }
1383  }
1384  }
1385  if (nrnmpi_use) {
1386  active_ = 1;
1387  }
1388  if (use_compress_) {
1389  if (mindelay / dt > 255) {
1390  mindelay = 255 * dt;
1391  }
1392  }
1393 
1394  // printf("%d netpar_mindelay local %g now calling nrnmpi_mindelay\n", nrnmpi_myid, mindelay);
1395  // double st = time();
1396  mindelay_ = nrnmpi_mindelay(mindelay);
1398  // add_wait_time(st);
1399  // printf("%d local min=%g global min=%g\n", nrnmpi_myid, mindelay, mindelay_);
1400  if (mindelay_ < 1e-9 && nrn_use_selfqueue_) {
1401  nrn_use_selfqueue_ = 0;
1402  double od = mindelay_;
1403  mindelay = set_mindelay(maxdelay);
1404  if (nrnmpi_myid == 0) {
1405  Printf(
1406  "Notice: The global minimum NetCon delay is %g, so turned off the "
1407  "cvode.queue_mode\n",
1408  od);
1409  Printf(" use_self_queue option. The interprocessor minimum NetCon delay is %g\n",
1410  mindelay);
1411  }
1412  }
1413  errno = 0;
1414  return mindelay;
1415 #else
1416  mindelay_ = mindelay;
1418  return mindelay;
1419 #endif // NRNMPI
1420 }
1421 
1422 double BBS::netpar_mindelay(double maxdelay) {
1423 #if BGPDMA
1424  bgp_dma_setup();
1425 #endif
1426  double tt = set_mindelay(maxdelay);
1427  return tt;
1428 }
1429 
1430 void BBS::netpar_spanning_statistics(int* nsend, int* nsendmax, int* nrecv, int* nrecv_useful) {
1431 #if NRNMPI
1432  *nsend = nsend_;
1433  *nsendmax = nsendmax_;
1434  *nrecv = nrecv_;
1435  *nrecv_useful = nrecv_useful_;
1436 #endif
1437 }
1438 
1439 // unfortunately, ivocvect.h conflicts with STL
1441 #if NRNMPI && NRNSTAT
1442  IvocVect* h = max_histogram_;
1443  if (max_histogram_) {
1444  max_histogram_ = NULL;
1445  }
1446  if (mh) {
1447  max_histogram_ = mh;
1448  }
1449  return h;
1450 #else
1451  return NULL;
1452 #endif
1453 }
1454 
1455 /* 08-Nov-2010
1456 The workhorse for spike exchange on up to 10K machines is MPI_Allgather
1457 but as the number of machines becomes far greater than the fanout per
1458 cell we have been exploring a class of exchange methods called multisend
1459 where the spikes only go to those machines that need them and there is
1460 overlap between communication and computation. The numer of variants of
1461 multisend has grown so that some method selection function is needed
1462 that makes sense.
1463 
1464 The situation that needs to be captured by xchng_meth is
1465 
1466 0: Allgather
1467 1: multisend implemented as MPI_ISend
1468 
1469 n_bgp_interval 1 or 2 per minimum interprocessor NetCon delay
1470  that concept valid for all methods
1471 
1472 Note that Allgather allows spike compression and an allgather spike buffer
1473  with size chosen at setup time. All methods allow bin queueing.
1474 
1475 The multisend methods should allow two phase multisend.
1476 
1477 Note that, in principle, MPI_ISend allows the source to send the index
1478  of the target PreSyn to avoid a hash table lookup (even with a two phase
1479  variant)
1480 
1481 Not all variation are useful. e.g. it is pointless to combine Allgather and
1482 n_bgp_interval=2.
1483 The whole point is to make the
1484 spike transfer initiation as lowcost as possible since that is what causes
1485 most load imbalance. I.e. since 10K more spikes arrive than are sent, spikes
1486 received per processor per interval are much more statistically
1487 balanced than spikes sent per processor per interval. And presently
1488 DCMF multisend injects 10000 messages per spike into the network which
1489 is quite expensive.
1490 
1491 See case 8 of nrn_bgp_receive_time for the xchng_meth properties
1492 */
1493 
1494 int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth) {
1495 #if NRNMPI
1496  if (nrnmpi_numprocs < 2) {
1497  return 0;
1498  }
1499  if (nspike >= 0) { // otherwise don't set any multisend properties
1500 #if BGP_INTERVAL == 2
1501  n_bgp_interval = (xchng_meth & 4) ? 2 : 1;
1502 #endif
1503 #if BGPDMA
1504  use_bgpdma_ = (xchng_meth & 1) == 1;
1505  use_phase2_ = (xchng_meth & 8) ? 1 : 0;
1506  if (use_bgpdma_) {
1507  assert(BGPDMA);
1508  }
1509  bgpdma_cleanup();
1510 #else // BGPDMA == 0
1511  assert(xchng_meth == 0);
1512 #endif
1513  }
1514  if (nspike >= 0) {
1515  ag_send_nspike_ = 0;
1516  if (spfixout_) {
1517  free(spfixout_);
1518  spfixout_ = 0;
1519  }
1520  if (spfixin_) {
1521  free(spfixin_);
1522  spfixin_ = 0;
1523  }
1524  if (spfixin_ovfl_) {
1525  free(spfixin_ovfl_);
1526  spfixin_ovfl_ = 0;
1527  }
1528  localmaps_.clear();
1529  }
1530  if (nspike == 0) { // turn off
1531  use_compress_ = false;
1532  nrn_use_localgid_ = false;
1533  } else if (nspike > 0) { // turn on
1534  if (cvode_active_) {
1535  if (nrnmpi_myid == 0) {
1536  hoc_warning("ParallelContext.spike_compress cannot be used with cvode active", 0);
1537  }
1538  use_compress_ = false;
1539  nrn_use_localgid_ = false;
1540  return 0;
1541  }
1542  use_compress_ = true;
1543  ag_send_nspike_ = nspike;
1544  nrn_use_localgid_ = false;
1545  if (gid_compress) {
1546  // we can only do this after everything is set up
1547  mk_localgid_rep();
1548  if (!nrn_use_localgid_ && nrnmpi_myid == 0) {
1549  Printf(
1550  "Notice: gid compression did not succeed. Probably more than 255 cells on one "
1551  "cpu.\n");
1552  }
1553  }
1554  if (!nrn_use_localgid_) {
1555  localgid_size_ = sizeof(unsigned int);
1556  }
1558  spfixout_capacity_ = ag_send_size_ + 50 * (1 + localgid_size_);
1559  spfixout_ = (unsigned char*) hoc_Emalloc(spfixout_capacity_);
1560  hoc_malchk();
1561  spfixin_ = (unsigned char*) hoc_Emalloc(nrnmpi_numprocs * ag_send_size_);
1562  hoc_malchk();
1563  ovfl_capacity_ = 100;
1564  spfixin_ovfl_ = (unsigned char*) hoc_Emalloc(ovfl_capacity_ * (1 + localgid_size_));
1565  hoc_malchk();
1566  }
1567  return ag_send_nspike_;
1568 #else
1569  return 0;
1570 #endif
1571 }
1572 
1573 PreSyn* nrn_gid2outputpresyn(int gid) { // output PreSyn
1574  auto iter = gid2out_.find(gid);
1575  if (iter != gid2out_.end()) {
1576  return iter->second;
1577  }
1578  return NULL;
1579 }
1580 
1581 Object* nrn_gid2obj(int gid) {
1582  return gid2obj_(gid);
1583 }
1584 
1585 PreSyn* nrn_gid2presyn(int gid) { // output PreSyn
1586  auto iter = gid2out_.find(gid);
1587  nrn_assert(iter != gid2out_.end());
1588  return iter->second;
1589 }
1590 
1591 void nrn_gidout_iter(PFIO callback) {
1592  for (const auto& iter: gid2out_) {
1593  PreSyn* ps = iter.second;
1594  if (ps) {
1595  int gid = ps->gid_;
1596  Object* c = gid2obj_(gid);
1597  (*callback)(gid, c);
1598  }
1599  }
1600 }
1601 
1602 #include "nrncore_write.h"
1603 extern int* nrn_prop_param_size_;
1604 extern int* pnt_receive_size;
1605 extern short* nrn_is_artificial_;
1606 static int weightcnt(NetCon* nc) {
1607  return nc->cnt_;
1608  // return nc->target_ ? pnt_receive_size[nc->target_->prop->type]: 1;
1609 }
1610 
1612  size_t ntot, nin, nout, nnet, nweight;
1613  ntot = nin = nout = nnet = nweight = 0;
1614  size_t npnt = 0;
1615  if (0 && nrnmpi_myid == 0) {
1616  printf("size Presyn=%ld NetCon=%ld Point_process=%ld Prop=%ld\n",
1617  sizeof(PreSyn),
1618  sizeof(NetCon),
1619  sizeof(Point_process),
1620  sizeof(Prop));
1621  }
1622  for (const auto& iter: gid2out_) {
1623  PreSyn* ps = iter.second;
1624  if (ps) {
1625  nout += 1;
1626  int n = ps->dil_.size();
1627  nnet += n;
1628  for (auto nc: ps->dil_) {
1629  nweight += weightcnt(nc);
1630  if (nc->target_) {
1631  npnt += 1;
1632  }
1633  }
1634  }
1635  }
1636  // printf("output Presyn npnt = %ld nweight = %ld\n", npnt, nweight);
1637  for (const auto& iter: gid2in_) {
1638  PreSyn* ps = iter.second;
1639  if (ps) {
1640  nin += 1;
1641  int n = ps->dil_.size();
1642  nnet += n;
1643  for (auto nc: ps->dil_) {
1644  nweight += weightcnt(nc);
1645  if (nc->target_) {
1646  npnt += 1;
1647  }
1648  }
1649  }
1650  }
1651  // printf("after input Presyn total npnt = %ld nweight = %ld\n", npnt, nweight);
1652  ntot = (nin + nout) * sizeof(PreSyn) + nnet * sizeof(NetCon) + nweight * sizeof(double);
1653  // printf("%d rank output Presyn %ld input Presyn %ld NetCon %ld bytes %ld\n",
1654  // nrnmpi_myid, nout, nin, nnet, ntot);
1655  return ntot;
1656 }
1657 
1659  // printf("nrncore_netpar_cellgroups_helper\n");
1660 
1661  // for the real cells in each thread (all have output gid and voltage spike
1662  // detector) fill the cgs output_ps, output_gid, and output_vindex
1663  // All the other (acell) gids have already been processed.
1664  int* gidcnt = new int[nrn_nthread]; // real only
1665  for (int i = 0; i < nrn_nthread; ++i) {
1666  gidcnt[i] = 0;
1667  }
1668 
1669  for (const auto& iter: gid2out_) {
1670  PreSyn* ps = iter.second;
1671  if (ps && ps->thvar_) {
1672  int ith = ps->nt_->id;
1673  assert(ith >= 0 && ith < nrn_nthread);
1674  int i = gidcnt[ith];
1675  cgs[ith].output_ps[i] = ps;
1676  cgs[ith].output_gid[i] = ps->output_index_;
1677  assert(ps->thvar_ >= ps->nt_->_actual_v);
1678  int inode = ps->thvar_ - ps->nt_->_actual_v;
1679  assert(inode <= ps->nt_->end);
1680  cgs[ith].output_vindex[i] = inode;
1681  ++gidcnt[ith];
1682  }
1683  }
1684 #if 0 // allow real cells to NOT have a direct voltage threshold.
1685  for (int i=0; i < nrn_nthread; ++ i) {
1686  assert(nrn_threads[i].ncell == gidcnt[i]);
1687  }
1688 #endif
1689  delete[] gidcnt;
1690 }
static void nrnmpi_int_allgather(int *s, int *r, int n)
static void nrnmpi_barrier()
static int nrnmpi_int_allmax(int x)
void bgp_dma_receive(NrnThread *nt)
Definition: bgpdma.cpp:554
void bgp_dma_setup()
Definition: bgpdma.cpp:730
static void bgpdma_cleanup()
Definition: bgpdma.cpp:659
void bgpdma_cleanup_presyn(PreSyn *ps)
Definition: bgpdma.cpp:646
static void bgp_dma_init()
Definition: bgpdma.cpp:422
void bgp_dma_send(PreSyn *ps, double t)
Definition: bgpdma.cpp:634
static int use_phase2_
Definition: bgpdma.cpp:154
Object ** gid2cell(int)
Definition: netpar.cpp:1169
void spike_record(int, IvocVect *, IvocVect *)
Definition: netpar.cpp:1115
Object ** gid_connect(int)
Definition: netpar.cpp:1195
void set_gid2node(int, int)
Definition: netpar.cpp:952
void netpar_solve(double)
Definition: netpar.cpp:1255
Object ** gid2obj(int)
Definition: netpar.cpp:1165
IvocVect * netpar_max_histogram(IvocVect *)
Definition: netpar.cpp:1440
int gid_exists(int)
Definition: netpar.cpp:1059
void cell()
Definition: netpar.cpp:1076
void netpar_spanning_statistics(int *, int *, int *, int *)
Definition: netpar.cpp:1430
BBSImpl * impl_
Definition: bbs.h:74
void outputcell(int)
Definition: netpar.cpp:1106
double netpar_mindelay(double maxdelay)
Definition: netpar.cpp:1422
double threshold()
Definition: netpar.cpp:1063
double send_time_
Definition: bbsimpl.h:57
double integ_time_
Definition: bbsimpl.h:56
double wait_time_
Definition: bbsimpl.h:55
auto end() -> std::vector< double >::iterator
Definition: ivocvect.h:69
size_t size() const
Definition: ivocvect.h:43
int buffer_size()
Definition: ivocvect.cpp:1375
std::vector< double > & vec()
Definition: ivocvect.h:35
Object * obj_
Definition: ivocvect.h:94
Definition: netcon.h:83
void replace_src(PreSyn *)
Definition: netcvode.cpp:4898
bool active_
Definition: netcon.h:115
int cnt_
Definition: netcon.h:114
double delay_
Definition: netcon.h:109
Point_process * target_
Definition: netcon.h:111
Object * obj_
Definition: netcon.h:113
PreSyn * src_
Definition: netcon.h:110
TQItem * bin_event(double tdeliver, DiscreteEvent *, NrnThread *)
Definition: netcvode.cpp:2685
void deliver_events(double til, NrnThread *)
Definition: netcvode.cpp:3061
hoc_Item * psl_
Definition: netcvode.h:249
void localstep(bool)
Definition: netcvode.cpp:1310
void psl_append(PreSyn *)
Definition: netcvode.cpp:4756
TQItem * event(double tdeliver, DiscreteEvent *, NrnThread *)
Definition: netcvode.cpp:2712
virtual ~NetParEvent()
Definition: netpar.cpp:230
virtual void savestate_write(FILE *)
Definition: netpar.cpp:298
virtual void pgvts_deliver(double t, NetCvode *)
Definition: netpar.cpp:271
virtual DiscreteEvent * savestate_save()
Definition: netpar.cpp:280
static DiscreteEvent * savestate_read(FILE *)
Definition: netpar.cpp:287
int ithread_
Definition: netcon.h:411
double wx_
Definition: netcon.h:410
virtual void send(double, NetCvode *, NrnThread *)
Definition: netpar.cpp:231
virtual void savestate_restore(double deliverytime, NetCvode *)
Definition: netpar.cpp:303
virtual void deliver(double, NetCvode *, NrnThread *)
Definition: netpar.cpp:234
double ws_
Definition: netcon.h:410
virtual void pr(const char *, double t, NetCvode *)
Definition: netpar.cpp:276
Definition: netcon.h:255
double * thvar_
Definition: netcon.h:294
int output_index_
Definition: netcon.h:306
virtual void send(double sendtime, NetCvode *, NrnThread *)
Definition: netcvode.cpp:3252
PreSyn(double *src, Object *osrc, Section *ssrc=nil)
Definition: netcvode.cpp:5072
double mindelay()
Definition: netcvode.cpp:3051
Section * ssrc_
Definition: netcon.h:296
NrnThread * nt_
Definition: netcon.h:300
int gid_
Definition: netcon.h:307
void record(IvocVect *, IvocVect *idvec=nil, int rec_id=0)
Definition: netcvode.cpp:5251
Object * osrc_
Definition: netcon.h:295
double threshold_
Definition: netcon.h:292
NetConPList dil_
Definition: netcon.h:291
VEC * cgs(MTX_FN A, void *A_params, VEC *b, VEC *r0, double tol, VEC *x)
Definition: conjgrad.c:179
int diam_changed
Definition: cabcode.cpp:23
int v_structure_change
Definition: cvodestb.cpp:99
int tree_changed
sprintf(buf, " if (secondorder) {\n" " int _i;\n" " for (_i = 0; _i < %d; ++_i) {\n" " _p[_slist%d[_i]] += dt*_p[_dlist%d[_i]];\n" " }}\n", numeqn, listnum, listnum)
double chkarg(int, double low, double high)
Definition: code2.cpp:638
#define c
ms
Definition: extargs.h:1
void hoc_execerror(const char *, const char *)
Definition: hoc.cpp:754
char buf[512]
Definition: init.cpp:13
Object ** hoc_temp_objvar(Symbol *symtemp, void *v)
Definition: hoc_oop.cpp:491
void hoc_warning(const char *, const char *)
Symbol * hoc_lookup(const char *)
#define assert(ex)
Definition: hocassrt.h:32
#define getarg
Definition: hocdec.h:15
#define VOIDITM(q)
Definition: hoclist.h:68
Object ** hoc_objgetarg(int)
Definition: code.cpp:1587
void
int ifarg(int)
Definition: code.cpp:1581
Object ** hoc_temp_objptr(Object *)
Definition: code.cpp:216
#define sec
Definition: md1redef.h:13
#define i
Definition: md1redef.h:12
static double map(void *v)
Definition: mlinedit.cpp:47
#define ITERATE(itm, lst)
Definition: model.h:25
#define Printf
Definition: model.h:237
floor
Definition: extdef.h:4
int * nin_
NRNMPI_Spike * spikein_
int icapacity_
int localgid_size_
NRNMPI_Spike * spikeout_
int ovfl_capacity_
int ovfl_
unsigned char * spfixout_
int nout_
unsigned char * spfixin_
int ag_send_nspike_
unsigned char * spfixin_ovfl_
#define nrn_spikebuf_size
Definition: mpispike.h:5
int ag_send_size_
int nrn_nthread
Definition: multicore.cpp:46
NrnThread * nrn_threads
Definition: multicore.cpp:47
static double nrnmpi_wtime()
Definition: multisplit.cpp:55
static int nrnmpi_use
Definition: multisplit.cpp:48
#define printf
Definition: mwprefix.h:26
#define fprintf
Definition: mwprefix.h:30
#define NetParEventType
Definition: netcon.h:47
void setup_topology()
void v_setup_vectors()
Definition: treeset.cpp:1631
void nrn_partrans_clear()
Definition: partrans.cpp:897
static IvocVect * all_spikegidvec
Definition: netpar.cpp:36
int vector_capacity(IvocVect *)
double dt
Definition: netpar.cpp:42
static int n_npe_
Definition: netpar.cpp:211
short * nrn_is_artificial_
Definition: init.cpp:193
static int active_
Definition: netpar.cpp:205
Point_process * ob2pntproc(Object *)
Definition: hocmech.cpp:88
void nrn_gidout_iter(PFIO)
Definition: netpar.cpp:1591
int * pnt_receive_size
Definition: init.cpp:135
static double t_exchange_
Definition: netpar.cpp:37
static int timeout_
Definition: netpar.cpp:1247
static Gid2PreSyn gid2out_
Definition: netpar.cpp:33
static Symbol * netcon_sym_
Definition: netpar.cpp:32
static int weightcnt(NetCon *nc)
Definition: netpar.cpp:1606
static Object * gid2obj_(int gid)
Definition: netpar.cpp:1152
double nrnmpi_step_wait_
Definition: ocbbs.cpp:40
int nrnthread_all_spike_vectors_return(std::vector< double > &spiketvec, std::vector< int > &spikegidvec)
NEURON callback used from CORENEURON to transfer all spike vectors after simulation.
Definition: netpar.cpp:1329
void ncs2nrn_integrate(double tstop)
Definition: netcvode.cpp:3832
void nrn_cleanup_presyn(PreSyn *)
Definition: netpar.cpp:976
static Gid2PreSyn gid2in_
Definition: netpar.cpp:34
#define BGPDMA
Definition: netpar.cpp:81
double nrn_bgp_receive_time(int)
Definition: netpar.cpp:85
void nrn_spike_exchange_init()
Definition: netpar.cpp:458
static double min_interprocessor_delay_
Definition: netpar.cpp:207
PreSyn * nrn_gid2outputpresyn(int gid)
Definition: netpar.cpp:1573
double * vector_vec(IvocVect *)
int cvode_active_
Definition: fadvance.cpp:163
static double mindelay_
Definition: netpar.cpp:208
int nrn_set_timeout(int)
Definition: netpar.cpp:1248
PreSyn * nrn_gid2presyn(int)
Definition: netpar.cpp:1585
void nrnmpi_multisplit_clear()
Definition: multisplit.cpp:509
std::unordered_map< int, PreSyn * > Gid2PreSyn
Definition: netpar.cpp:18
int * nrn_prop_param_size_
Definition: init.cpp:140
double t
Definition: cvodeobj.cpp:59
static int nrn_need_npe()
Definition: netpar.cpp:413
static double set_mindelay(double maxdelay)
Definition: netpar.cpp:1361
static NetParEvent * npe_
Definition: netpar.cpp:210
int nrn_gid_exists(int)
Definition: netpar.cpp:1045
static double dt1_
Definition: netpar.cpp:38
#define TBUF
Definition: netpar.cpp:455
static int gid_donot_remove
Definition: netpar.cpp:974
static int n_bgp_interval
Definition: netpar.cpp:29
void(* PFIO)(int, Object *)
Definition: netpar.cpp:61
static double last_maxstep_arg_
Definition: netpar.cpp:209
static double usable_mindelay_
Definition: netpar.cpp:206
void nrnmpi_gid_clear(int)
Definition: netpar.cpp:994
#define MD
Definition: netpar.cpp:13
int nrnmpi_spike_compress(int nspike, bool gid_compress, int xchng_meth)
Definition: netpar.cpp:1494
static void alloc_space()
Definition: netpar.cpp:930
Object * nrn_sec2cell(Section *)
Definition: cabcode.cpp:233
void nrn_fake_fire(int gid, double firetime, int fake_out)
Definition: netpar.cpp:905
NetCvode * net_cvode_instance
Definition: cvodestb.cpp:27
static IvocVect * all_spiketvec
Definition: netpar.cpp:35
static void calc_actual_mindelay()
Definition: netpar.cpp:439
void nrncore_netpar_cellgroups_helper(CellGroup *cgs)
Definition: netpar.cpp:1658
int nrn_use_selfqueue_
Definition: netcvode.cpp:93
size_t nrncore_netpar_bytes()
Definition: netpar.cpp:1611
Object * nrn_gid2obj(int)
Definition: netpar.cpp:1581
void nrn_pending_selfqueue(double, NrnThread *)
Definition: netcvode.cpp:3890
int is_point_process(Object *)
Definition: point.cpp:396
#define nrn_assert(ex)
Definition: nrnassrt.h:53
void nrn_spike_exchange(struct NrnThread *nt)
int const size_t const size_t n
Definition: nrngsl.h:11
size_t q
if(status)
size_t j
int nrnmpi_myid
int nrnmpi_numprocs
void * hoc_Erealloc(void *buf, size_t size)
Definition: symbol.cpp:251
void * hoc_Emalloc(size_t size)
Definition: symbol.cpp:190
void hoc_malchk()
Definition: symbol.cpp:183
int nrnmusic
static void nrnmusic_runtime_phase()
Definition: nrnmusic.cpp:219
#define MUTCONSTRUCT(mkmut)
Definition: nrnmutdec.h:70
#define MUTDESTRUCT
Definition: nrnmutdec.h:71
#define MUTDEC
Definition: nrnmutdec.h:68
#define MUTLOCK
Definition: nrnmutdec.h:72
#define MUTUNLOCK
Definition: nrnmutdec.h:73
static philox4x32_key_t k
Definition: nrnran123.cpp:11
void recalc_diam()
Definition: treeset.cpp:953
#define e
Definition: passive0.cpp:22
#define arg
Definition: redef.h:28
check_obj_type(o, "SectionList")
#define tstopunset
Definition: section.h:327
#define NULL
Definition: sptree.h:16
double spiketime
Definition: nrnmpi.h:19
int gid
Definition: nrnmpi.h:18
Represent main neuron object computed by single thread.
Definition: multicore.h:58
int id
Definition: multicore.h:66
int _stop_stepping
Definition: multicore.h:67
int end
Definition: multicore.h:65
double * _actual_v
Definition: multicore.h:74
double _t
Definition: multicore.h:59
Definition: hocdec.h:227
void * this_pointer
Definition: hocdec.h:232
int refcount
Definition: hocdec.h:228
union Object::@39 u
Section * sec
Definition: section.h:263
Definition: section.h:214
Definition: model.h:57
union Symbol::@18 u
HocStruct cTemplate * ctemplate
Definition: hocdec.h:152