NEURON
multisplit.cpp
Go to the documentation of this file.
1 #include <../../nrnconf.h>
2 
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <errno.h>
6 #include <InterViews/resource.h>
7 #include <vector>
8 #include <nrnoc2iv.h>
9 #include <nrnmpi.h>
10 #include <multisplit.h>
11 #include <unordered_map>
12 #include <memory>
13 
14 void nrnmpi_multisplit(Section*, double x, int sid, int backbone_style);
16 
17 extern int tree_changed;
18 extern int diam_changed;
19 extern void setup_topology();
20 extern void nrn_cachevec(int);
21 extern void nrn_matrix_node_free();
22 extern void (*nrn_multisplit_setup_)();
23 extern void* nrn_multisplit_triang(NrnThread*);
25 extern void* nrn_multisplit_bksub(NrnThread*);
26 extern double t;
27 
35 extern void (*nrn_multisplit_solve_)();
36 static void multisplit_v_setup();
37 static void multisplit_solve();
38 
39 extern double nrnmpi_rtcomp_time_;
40 #if PARANEURON
41 extern double nrnmpi_splitcell_wait_;
42 #else
43 static double nrnmpi_splitcell_wait_;
44 #endif
45 
46 #if NRNMPI
47 #else
48 static int nrnmpi_use;
49 static void nrnmpi_int_allgather(int*, int*, int) {}
50 static void nrnmpi_int_allgatherv(int*, int*, int*, int*) {}
51 static void nrnmpi_postrecv_doubles(double*, int, int, int, void**) {}
52 static void nrnmpi_send_doubles(double*, int, int, int) {}
53 static void nrnmpi_wait(void**) {}
54 static void nrnmpi_barrier() {}
55 static double nrnmpi_wtime() {
56  return 0.0;
57 }
58 #endif
59 
60 class MultiSplit;
61 class MultiSplitControl;
62 
63 #define A(i) VEC_A(i)
64 #define B(i) VEC_B(i)
65 #define D(i) VEC_D(i)
66 #define RHS(i) VEC_RHS(i)
67 #define S1A(i) sid1A[i]
68 #define S1B(i) sid1B[i]
69 
70 // any number of nodes can have the same sid (generally those nodes
71 // will be on different machines).
72 // A node cannot have more than one sid.
73 // A tree cannot have more than two nodes with an sid and those nodes
74 // must be distinct.
75 
76 // NOTE:
77 // For cache efficiency, it would be nice to keep individual cell nodes
78 // together (i.e v_node from 0 to rootnodecount-1 are NOT the root nodes.)
79 // This would allow keeping the backbones contiguous and would simplify the
80 // solution of the sid0,1 2x2 at the end of triangularization that forms
81 // an N shaped matrix. Unfortunately, in several places, e.g. matrix setup,
82 // the first rootnodecount v_node are assumed to be the individual cell roots
83 // and at this time it would be confusing to work on that tangential issue.
84 // However, it does mean that we need some special structure since
85 // sid0 ...backbone interior nodes... sid1 are not contiguous in the v_node
86 // for a single cell.
87 // Presently the node order is
88 // 1) all the roots of cells with 0 or 1 sid (no backbone involved)
89 // 2) all the sid0 when there are two sids in a cell. 1) + 2) = rootnodecount
90 // 3) the interior backbone nodes (does not include sid0 or sid1 nodes)
91 // 4) all the sid1 nodes
92 // 5) all remaining nodes
93 // backbone_begin is the index for 2).
94 // backbone_interior_begin is one more than the last index for 2)
95 // backbone_sid1_begin is the index for 4)
96 // backbone_end is the index for 5.
97 
98 // Modification to handle short backbones
99 // Numerical accuracy/stability suffers when backbones are electrically
100 // short. Unfortunately, it is impossible to divide many 3-d reconstructed
101 // cells with any kind of load balance without using them. Also,
102 // unfortunately, it is probably not possible to solve the problem
103 // in general. However we can extend, without introducing
104 // too much parallel inefficiency, the implementation to allow an
105 // arbitrarily short backbone treated as a single node, as long as it
106 // does not connect to a short backbone on another machine. e.g we can
107 // have alternating short and long sausages but not a sequence of
108 // contiguous short sausages. The idea is to exchange backbone info
109 // in two stages. First receive the info that short backbones need
110 // (by definition only from long backbones), compute the exact D,RHS
111 // backbone end points (only a 2x2 matrix solve), and then send the
112 // exact short backbone info. matrix_exchange carries out the
113 // augmented communication. But we need to distinguish short from
114 // long backbones since for short ones we need to factor out the
115 // first part of bksub_backbone and do it in the middle of
116 // matrix_exchange (as well as avoid doing that twice in bksub_backbone).
117 // We can accomplish that by introducing the index
118 // backbone_long_begin between the backbone_begin and backbone_interior_begin
119 // indices. Then in matrix exchange we do backbone_begin to
120 // backbone_long_begin - 1 , and in bksub_backbone, the first part is
121 // from backbone_long_begin to backbone_interior_begin - 1.
122 
123 // Modification to handle any number of backbones, short or long, exactly.
124 // In this case, it is not necessary to consider numerical stability/accuracy.
125 // However a tree matrix with rank equal to the number of distinct sid for
126 // an entire parallelized cell must be solved between the send and receive
127 // stages for that cell. The main issue to be resolved is: which machine receives
128 // the matrix information. ie. a single sid subtree sends d and rhs to the
129 // (rthost)machine that gaussian eliminates the reduced tree
130 // matrix and a two sid backbone subtree sends the two d, a, b, and two rhs
131 // to rthost. After rthost solves the matrix (triangualize and back substitute)
132 // the answer for each sid is sent back to the machines needing it.
133 // Clearly, some savings results if rthost happens to be one of the machines
134 // with a 2 sid backbone. Also, it is probably best if a machine serves
135 // as rthost to as few cells as possible.
136 
137 // Modification to handle multiple subtrees with same sid on same machine.
138 // This is very valuable for load balancing. It demands a backbone style
139 // of 2 (using reduced tree matrices) and we allow user specified host
140 // for the reduced tree. The exact solution using reduced trees has
141 // proved so much more useful than the long/short backbone methods
142 // that we should consider removing the code
143 // for those others and implement only the reduced tree method.
144 
145 // Area handling prior to ReducedTree extension was and mostly still is:
146 // 1) sending long to short, long, reduced on the send side multiplies
147 // the send buffer (D and RHS components) by area (if non-zero area node)
148 // from 0 to iarea_short_long using area_node_indices and buf_area_indices.
149 // 2) receive what is needed by short backbone (and reduced tree) and the
150 // receive buffer for short backbong divides by area from iarea_short_long
151 // to narea using aread_node_indices and buf_area_indices. (not needed by
152 // reduced tree but the local D and RHS will have to be multiplied by the
153 // node area when filling the ReducedTree. Also the sid1A and sid1B terms
154 // will have to be area multiplied both on the ReducedTree host
155 // and when backbones are sent to the ReducedTree host
156 // 3) The short backbone is solved consistent with its original node area
157 // (and the ReducedTree is solved with all its equation having been multiplied
158 // by area.
159 // 4) Note that solving the short and reduced tree gives the answer which
160 // does not have to be scaled but we multiply the D and RHS by 1e30 so
161 // those equation are v = RHS/D regardless of area or coupling. So no need
162 // to divide by area on the short or reduced tree machines and it would not
163 // matter if we did so or not.
164 // 5) all remaining receives take place and what is received gets divided
165 // by the node area (0 to iarea_short_long). As mentioned above, this does
166 // not affect the result that was sent from the short or reduced tree
167 // host (since D and RHS were scaled by 1e30).
168 // So... Anything sent to reduced trees from another machine needs to get its
169 // info multiplied by the node area. This is sid0 D and RHS and if there is
170 // a sid1 then that D and RHS as well as sid1A and sid1B.
171 // Also... Anything rmapped into a reduced tree from the reduced tree machine
172 // needs to get its rmap info multiplied by the node area ( in fact we can
173 // do that in place before calling fillrmap since those values get the result
174 // *1e30 returned into them).
175 // We do the old methods (backbone style 0 and 1) completely segregated from
176 // the new reducedtree method by introducing a vector parallel to
177 // nodeindex_buffer_ called nodeindex_rthost_ that contains the hostid for the
178 // reduced tree host associated with that sid. Then -1 means old style and
179 // >= 0 means reducedtree style.
180 
181 // the implementation so far has turned out to be too brittle with regard
182 // to unimportant topology changes in the sense of new point processes or
183 // anything that ends up calling multisplit_v_setup. The experienced
184 // cause of this is because MultiSplitTransferInfo holds pointers into
185 // the sid1A and sid1B off diag elements and those arrays are freed
186 // and reallocated when multisplit_v_setup is called. Although
187 // exchange_setup cries out for a simplified and
188 // more efficient implementation, for now, opportunistically reset the
189 // pointers in the MultiSplitTransferInfo at the end multisplit_v_setup
190 
191 
192 struct Area2Buf {
193  int inode;
194  int n; // using only for transfer to ReducedTree on another
195  int ibuf[3]; // machine so n is 2 or 3
196  double adjust_rhs_; // for cvode
197  MultiSplit* ms; // for recalculating ibuf pointers after v_setup
198 };
199 struct Area2RT {
200  int inode;
201  int n; // 2 or 3
202  double* pd[3];
203  double adjust_rhs_; // for cvode
204  MultiSplit* ms; // for recalculating pd pointers after v_setup
205 };
206 
207 using Int2IntTable = std::unordered_map<int, int>;
208 
209 // d and rhs keep getting reorderd according to thread cache efficiency
210 // so we need to retain some logical info in order to update the
211 // pointer lists in the Reduced tree. Prior to this comment, this was
212 // done only for sid1A and sid1B near the end of v_setup. That is ok
213 // since sid1A and sid1B are allocated by v_setup. But d and rhs are
214 // filled into the Node* after return from v_setup.
215 
216 class ReducedTree {
217  public:
218  ReducedTree(MultiSplitControl*, int rank, int mapsize);
219  virtual ~ReducedTree();
220  void solve();
221  void nocap();
222  // private:
223  public:
224  void gather();
225  void scatter();
226 
228  int n;
229  int* ip;
230  double* rhs;
231  double* d;
232  double* a;
233  double* b;
234  int n2, n4, nmap;
235 
236  public:
237  // next implementation, exploit the fact that send buffer can be
238  // smaller than the receive buffer since we only need to send back
239  // the answer in RHS. Now we are sending back RHS, D,
240  // and even the offdiags.
241  double **smap, **rmap;
242  int *ismap, *irmap;
243  int nsmap, irfill;
244 
245  int* rmap2smap_index; // needed for nocap when cvode used
246  int* nzindex; // needed for nocap when cvode used
247  double* v; // needed for nocap when cvode used
248 
249  void reorder(int j, int nt, int* mark, int* all_bb_relation, int* allsid);
250  std::unique_ptr<Int2IntTable> s2rt{new Int2IntTable()}; // sid2rank table
251  void fillrmap(int sid1, int sid2, double* pd);
252  void fillsmap(int sid, double* prhs, double* pdiag);
253  void pr_map(int, double*);
254 };
255 
256 class MultiSplit {
257  public:
258  Node* nd[2];
259  int sid[2];
260  int backbone_style; // 0 = no, 1 = yes, 2 = reducedtree (exact)
261  int rthost; // nrnmpi_myid where reducedtree will be solved
262  // the problem with using backsid_ to find the index now is that
263  // the value may not be unique since several backbones can be
264  // connected at the same sid on this machine. So store the index into
265  // the backsid) array.
267  // and need the thread id as well
268  int ithread;
269  // reduced tree pointers to d and rhs can become invalid and
270  // need to be restored. According to the exchange_setup that calls
271  // ReducedTree::fillrmap, the order is nd[0]->rhs, nd[0]->d, nd[1]->rhs
272  // nd[1]->d.
276 };
277 
278 // note: the tag_ below was added in case this machine interacts with
279 // another via more than one message type. i.e combinations of short<->long,
280 // reduced<->long, and long<->long backbone interactions.
281 // The user should avoid this
282 // eventuality and in fact we might not allow it.
283 
284 // mpi transfer information
286  int host_; // host id for send-receive
287  int nnode_; // number of nodes involved
288  int* nodeindex_; // v_node indices of those nodes. Pointer into nodeindex_buffer_
289  int* nodeindex_th_; // thread for above
290  int nnode_rt_; // number of off diag elements involved
291  int* nd_rt_index_; // v_node indices of offdiag. Not pointer into nodeindex_buffer. Needed for
292  // area2buf
293  int* nd_rt_index_th_; // thread for above
294  double** offdiag_; // pointers to sid1A or sid1B off diag elements
295  int* ioffdiag_; // indices of above, to recalculate offdiag when freed
296  int size_; // 2*nnode_ + nnode_rt_ doubles needed in buffer
297  int displ_; // displacement into trecvbuf_
298  void* request_; // MPI_Request
299  int tag_; // short<->long, long<->long, subtree->rthost, rthost->subtree
300  int rthost_; // host id where the reduced tree is located (normally -1)
301 };
302 
303 using MultiSplitTable = std::unordered_map<Node*, MultiSplit*>;
304 using MultiSplitList = std::vector<MultiSplit*>;
305 
306 #include <multisplitcontrol.h>
308 
309 void nrnmpi_multisplit(Section* sec, double x, int sid, int backbone_style) {
310  if (!msc_) {
311  msc_ = new MultiSplitControl();
312  }
313  msc_->multisplit(sec, x, sid, backbone_style);
314 }
315 
317  narea2buf_ = narea2rt_ = 0;
318  area2buf_ = 0;
319  area2rt_ = 0;
320  nthost_ = 0;
322  msti_ = 0;
323  tbsize = 0;
324  ndbsize = 0;
325  trecvbuf_ = 0;
326  tsendbuf_ = 0;
327  nodeindex_buffer_ = 0;
329  nodeindex_rthost_ = 0;
330  narea_ = 0;
331  iarea_short_long_ = 0;
332  buf_area_indices_ = 0;
333  area_node_indices_ = 0;
334 
335  nrtree_ = 0;
336  rtree_ = 0;
337 
338 
339  multisplit_list_ = 0;
340  nth_ = 0;
341  mth_ = 0;
342 }
343 
345 
349  sid1A = sid1B = 0;
350  sid0i = 0;
351 
352  nbackrt_ = 0;
353  backsid_ = 0;
354  backAindex_ = 0;
355  backBindex_ = 0;
356  i1 = i2 = i3 = 0;
357 }
358 
360  del_sidA();
361 }
362 
363 void MultiSplitControl::multisplit(Section* sec, double x, int sid, int backbone_style) {
364 #if 0
365  if (sid > 1000) { pexch(); return; }
366  if (sid >= 1000) { pmat(sid>1000); return; }
367 #endif
368  if (sid < 0) {
369  nrn_cachevec(1);
374  }
375  exchange_setup();
376  return;
377  }
379  if (backbone_style != 2) {
380  hoc_execerror("only backbone_style 2 is now supported", 0);
381  }
384  classical_root_to_multisplit_->reserve(97);
386  }
387  Node* nd = node_exact(sec, x);
388  if (tree_changed) {
389  setup_topology();
390  }
391  // printf("root of %s(%g) ", secname(sec), x);
392  Node* root;
393  for (sec = nd->sec; sec; sec = sec->parentsec) {
394  root = sec->parentnode;
395  }
396  assert(root);
397  // printf("is %s\n", secname(root->sec));
398  MultiSplit* ms;
399  const auto& msiter = classical_root_to_multisplit_->find(root);
400  if (msiter != classical_root_to_multisplit_->end()) {
401  ms = msiter->second;
402  if (backbone_style == 2) {
403  if (ms->backbone_style != 2) {
404  hoc_execerror("earlier call for this cell did not have a backbone style = 2", 0);
405  }
406  } else if (backbone_style == 1) {
407  ms->backbone_style = 1;
408  }
409  ms->nd[1] = nd;
410  ms->sid[1] = sid;
411  if (ms->sid[1] == ms->sid[0]) {
412  char s[100];
413  sprintf(s, "two sid = %d at same point on tree rooted at", sid);
414  hoc_execerror(s, secname(root->sec));
415  }
416  } else {
417  ms = new MultiSplit();
418  ms->backbone_style = backbone_style;
419  ms->rthost = -1;
420  ms->nd[0] = nd;
421  ms->nd[1] = 0;
422  ms->sid[0] = sid;
423  ms->sid[1] = -1;
424  ms->back_index = -1;
425  ms->ithread = -1;
426  ms->rt_ = 0;
427  ms->rmap_index_ = -1;
428  ms->smap_index_ = -1;
429  (*classical_root_to_multisplit_)[root] = ms;
430  multisplit_list_->push_back(ms);
431  }
432 }
433 
435  if (sid1A) {
436  delete[] sid1A;
437  delete[] sid1B;
438  delete[] sid0i;
439  sid1A = 0;
440  sid1B = 0;
441  sid0i = 0;
442  }
443  if (nbackrt_) {
444  delete[] backsid_;
445  delete[] backAindex_;
446  delete[] backBindex_;
447  nbackrt_ = 0;
448  }
449 }
450 
452  int i;
453  if (nrtree_) {
454  for (i = 0; i < nrtree_; ++i) {
455  delete rtree_[i];
456  }
457  delete[] rtree_;
458  nrtree_ = 0;
459  }
460  if (msti_) {
461  for (i = 0; i < nthost_; ++i) {
462  if (msti_[i].nnode_rt_) {
463  delete[] msti_[i].nd_rt_index_;
464  delete[] msti_[i].nd_rt_index_th_;
465  delete[] msti_[i].offdiag_;
466  delete[] msti_[i].ioffdiag_;
467  }
468  }
469  delete[] msti_;
470  msti_ = 0;
471  if (nodeindex_buffer_) {
472  delete[] nodeindex_buffer_;
473  delete[] nodeindex_buffer_th_;
474  delete[] nodeindex_rthost_;
475  }
476  nodeindex_buffer_ = 0;
478  nodeindex_rthost_ = 0;
479  if (trecvbuf_) {
480  delete[] trecvbuf_;
481  delete[] tsendbuf_;
482  }
483  trecvbuf_ = 0;
484  tsendbuf_ = 0;
485  if (narea_) {
486  delete[] buf_area_indices_;
487  delete[] area_node_indices_;
488  buf_area_indices_ = 0;
489  area_node_indices_ = 0;
490  narea_ = 0;
491  }
492  if (narea2buf_) {
493  // at present, statically 3 since only used for ReducedTree
494  // for (i=0; i < narea2buf_; ++i) {
495  // delete [] area2buf_->ibuf;
496  // }
497  delete[] area2buf_;
498  area2buf_ = 0;
499  narea2buf_ = 0;
500  }
501  if (narea2rt_) {
502  delete[] area2rt_;
503  area2rt_ = 0;
504  narea2rt_ = 0;
505  }
506  }
507 }
508 
510  if (msc_) {
513  }
514 }
515 
517  // printf("nrnmpi_multisplit_clear()\n");
518  int i;
521  for (i = 0; i < nth_; ++i) {
522  mth_[i].del_sidA();
523  }
524  if (mth_) {
525  delete[] mth_;
526  mth_ = 0;
527  }
528  nth_ = 0;
529  del_msti();
531  for (const auto& mspair: *classical_root_to_multisplit_) {
532  delete mspair.second;
533  }
535  delete multisplit_list_;
536  multisplit_list_ = nullptr;
537  }
538 }
539 
540 // in the incremental development of exchange_setup from only long to short
541 // to reduced tree we end up with many iterations over all pc.multisplit
542 // calls. ie. every machines sid info. If the performance becomes
543 // visible relative to the total setup time then one can replace all those
544 // iterations by preserving only the sid graph info that is needed by each
545 // machine. A sid graph, tree, would contain a list of sid's and a list of
546 // machines. Then there would be a map of sid2graph. This list and map
547 // could be constructed with a single iteration. ie. when an edge is seen
548 // the graphs of the two sids would merge. Then one could throw away all
549 // graphs that have no sid on this machine. I would expect an order of
550 // magnitude performance improvement in exchange_setup.
551 
553  int i, j, k;
554  del_msti(); // above recalc_diam so multisplit_v_setup does not
555  // attempt to fill offdiag_
556  if (diam_changed) {
557  recalc_diam();
558  }
559 
560  // how many nodes are involved on this machine
561  // all nodes are distinct. For now we independently assert that
562  // all the sid on this machine are distinct. So when we get an
563  // sid from another machine it does not have to be directed to
564  // more than one place. So, just like splitcell, we cannot test
565  // a multisplit on a single machine.
566  // When sids are not distinct, in the context of reduced trees,
567  // there is no problem with respect to sending sid info to
568  // multiple places since each sid info gets added to the reduced
569  // tree and the reduced tree answer gets sent back to each sid.
570  // So there is not a lot of special code involved with multiple
571  // pieces on the same host, since each piece <-> reduced tree
572  // independently of where the piece and reduced tree are located.
573  int n = 0;
574  int nwc = 0; // number of backbonestyle=2 nodes.
576  for (MultiSplit* ms: *multisplit_list_) {
577  ++n;
578  if (ms->nd[1]) {
579  ++n;
580  }
581  if (ms->backbone_style == 2) {
582  ++nwc;
583  if (ms->nd[1]) {
584  ++nwc;
585  }
586  } else if (ms->backbone_style == 1) {
587  if (!ms->nd[1]) {
588  ms->backbone_style = 0;
589  }
590  }
591  }
592  }
593  // printf("%d n=%d nsbb=%d nwc=%d\n", nrnmpi_myid, n, nsbb, nwc);
594  if (nrnmpi_numprocs == 1 && n == 0) {
595  return;
596  }
597  // how many nodes are involved on each of the other machines
598  int* nn = new int[nrnmpi_numprocs];
599  if (nrnmpi_use) {
600  nrnmpi_int_allgather(&n, nn, 1);
601  } else {
602  nn[0] = n;
603  }
604  // if (nrnmpi_myid==0) for (i=0; i < nrnmpi_numprocs;++i) printf("%d %d nn=%d\n", nrnmpi_myid,
605  // i, nn[i]);
606  // what are the sid's on this machine
607  int* sid = 0;
608  int* inode = 0;
609  int* threadid = 0;
610  // an parallel array back to MultiSplit
611  MultiSplit** vec2ms = new MultiSplit*[n];
612  // following used to be is_ssb (is sid for short backbone)
613  // now we code that as well as enough information for the
614  // backbone_style==2 case to figure out the reduced tree matrix
615  // so 0 means backbone_style == 0, 1 means backbone_style == 1
616  // and > 1 means backbone_style == 2 and the value is
617  // other backbone sid+3 (so a value of 2 means that
618  // it is a single sid subtree and a value of 3 means that the
619  // other backbone sid is 0.
620  int* bb_relation = 0;
621  if (n > 0) {
622  sid = new int[n];
623  inode = new int[n];
624  bb_relation = new int[n];
625  threadid = new int[n];
626  }
628  i = 0;
629  for (MultiSplit* ms: *multisplit_list_) {
630  sid[i] = ms->sid[0];
631  inode[i] = ms->nd[0]->v_node_index;
632  threadid[i] = ms->ithread;
633  bb_relation[i] = ms->backbone_style;
634  vec2ms[i] = ms;
635  ++i;
636  if (ms->nd[1]) {
637  sid[i] = ms->sid[1];
638  inode[i] = ms->nd[1]->v_node_index;
639  threadid[i] = ms->ithread;
640  bb_relation[i] = ms->backbone_style;
641  if (ms->backbone_style == 2) {
642  bb_relation[i - 1] += 1 + sid[i];
643  bb_relation[i] += 1 + sid[i - 1];
644  }
645  vec2ms[i] = ms;
646  ++i;
647  }
648  }
649  }
650  // for (i=0; i < n; ++i) { printf("%d %d sid=%d inode=%d %s %d\n", nrnmpi_myid, i, sid[i],
651  // inode[i], secname(v_node[inode[i]]->sec), v_node[inode[i]]->sec_node_index_);}
652  // what are the sid's and backbone status on each of the other machines
653  // first need a buffer for them and their offsets
654  int* displ = new int[nrnmpi_numprocs + 1];
655  displ[0] = 0;
656  for (i = 0; i < nrnmpi_numprocs; ++i) {
657  displ[i + 1] = displ[i] + nn[i];
658  }
659  int nt = displ[nrnmpi_numprocs]; // total number of (distinct) nodes
660  int* allsid = new int[nt];
661  int* all_bb_relation = new int[nt];
662  if (nrnmpi_use) {
663  nrnmpi_int_allgatherv(sid, allsid, nn, displ);
664  nrnmpi_int_allgatherv(bb_relation, all_bb_relation, nn, displ);
665  } else {
666  for (i = 0; i < n; ++i) {
667  allsid[i] = sid[i];
668  all_bb_relation[i] = bb_relation[i];
669  }
670  }
671  if (!n) {
672  delete[] allsid;
673  delete[] all_bb_relation;
674  delete[] displ;
675  delete[] nn;
676  delete[] vec2ms;
677  errno = 0;
678  return;
679  }
680 
681  // now we need to analyze
682  // how many machines do we need to communicate with
683 
684  // note that when long sid is connected to 1 or more long sid
685  // and also a short backbone sid that there is no need to send
686  // or receive info from the long sid because the short sid sends
687  // back the answer.
688 
689  // A similar communication pattern holds for the backbone_style==2
690  // case. i.e. a star instead of all2all. The only problem is
691  // which host is the center of the star. The following
692  // code for backbone_style < 2 is almost unchanged but
693  // backbone_style >=2 issue handling is inserted as needed.
694 
695  // there should not be that many sid on one machine. so just use
696  // linear search
697 
698  // for an index into the all... vectors, mark[index] is the index
699  // into this hosts vectors of the corresponding sid. It gets slowly
700  // transformed into a communication pattern in the sense that
701  // mark[index] = -1 means no communication will take place between
702  // this host and the host associated with the index.
703  // For reduced tree purposes, we overload the mark semantics so
704  // that mark[index] points to one (of the possibly two) sid on the
705  // same cell. ie. all the sid on other cpus that are part of the
706  // whole cell point to that sid.
707 
708  // For backbone style 2,
709  // when there are several pieces on the same host with the same
710  // sid, then the mark points to the principle pieces sid. i.e all
711  // the sid on this cpu that are part of the whole cell also point
712  // to that sid.
713 
714  int* mark = new int[nt];
715  int* connects2short = new int[n];
716  for (i = 0; i < n; ++i) { // so we know if we will be sending to
717  connects2short[i] = 0; // a short backbone.
718  }
719  for (i = 0; i < nt; ++i) {
720  mark[i] = -1;
721  for (j = 0; j < n; ++j) {
722  if (allsid[i] == sid[j]) {
723  // here we can errcheck the case of 2 on one
724  // machine and a different style for the same
725  // sid on another machine. If this passes on
726  // all machines then a single cell is necessarily
727  // consistent with regard to all sids for it
728  // being backbone_style == 2
729  if ((bb_relation[j] >= 2) != (all_bb_relation[i] >= 2)) {
730  hoc_execerror("backbone_style==2 inconsistent between hosts for same sid", 0);
731  }
732 
733  if (all_bb_relation[i] < 2) {
734  mark[i] = j;
735  if (all_bb_relation[i] == 1) {
736  connects2short[j] = 1;
737  }
738  }
739  }
740  }
741  }
742 
743  // but we do not want to mark allsid on this machine
744  for (i = displ[nrnmpi_myid]; i < displ[nrnmpi_myid] + n; ++i) {
745  mark[i] = -1;
746  }
747  // but note that later when we mark the reduced tree items we
748  // do leave the marks for allsid on this machine (but the marks
749  // have a slightly different meaning
750 
751  // undo all the marks for long to long if there is a long to short
752  // but be careful not to undo the short to long
753  for (i = 0; i < nt; ++i) {
754  if (mark[i] >= 0 && connects2short[mark[i]] && all_bb_relation[i] == 0 &&
755  bb_relation[mark[i]] == 0) {
756  mark[i] = -1;
757  }
758  }
759  // make sure no short to short connections
760  for (i = 0; i < nt; ++i) {
761  if (mark[i] >= 0) {
762  if (bb_relation[mark[i]] == 1 && all_bb_relation[i] == 1) {
763  hoc_execerror("a short to short backbone interprocessor connection exists", 0);
764  }
765  }
766  }
767 
768  // right now, the communication pattern represented by mark is
769  // bogus for backbone_style == 2 and they are all -1.
770  // We want to adjust mark so all the sid on
771  // a single cell point to one of the sid on this cell
772  // The following uses an excessive number of iterations
773  // over the all... vectors (proportional to number of distinct sids
774  // on a cell and should be made more efficient
775  for (j = 0; j < n; ++j) {
776  if (bb_relation[j] >= 2) {
777  reduced_mark(j, sid[j], nt, mark, allsid, all_bb_relation);
778  // note the above will also mark both this host sids
779  // so when the other one comes around here it will
780  // mark nothing.
781  }
782  }
783 
784 #if 0
785 for (i=0; i < nt; ++i) { printf("%d %d allsid=%d all_bb_relation=%d mark=%d\n",
786 nrnmpi_myid, i, allsid[i], all_bb_relation[i], mark[i]);}
787 #endif
788 
789  // it is an error if there are two pieces of the same cell on the
790  // same host. All we need to do is make sure the mark for this
791  // cpu are unique. i.e belong to different cells
792  // This is no longer an error. For backbonestyle 2, allowing
793  // multiple pieces of same cell on same cpu.
794  if (0) {
795  int* mcnt = new int[n];
796  for (j = 0; j < n; ++j) {
797  mcnt[j] = 0;
798  }
799  for (i = displ[nrnmpi_myid]; i < displ[nrnmpi_myid] + n; ++i) {
800  if (all_bb_relation[i] >= 2 && mark[i] >= 0) {
801  mcnt[mark[i]] += 1;
802  if (all_bb_relation[i] > 2) {
803  ++i;
804  }
805  }
806  }
807  for (j = 0; j < n; ++j) {
808  assert(mcnt[j] < 2);
809  }
810  delete[] mcnt;
811  }
812 
813  // for reduced trees, we now need to settle on which host will solve
814  // a reduced tree. If rthost is this one then this receives from
815  // and sends to every host that has a sid on the cell. If the rthost
816  // is another, then this sends and receives to rthost.
817 
818  // For now let it be the first host with two sid, and if there is
819  // no such host (then why are we using style 2?) then the first
820  // host with 1.
821  nrtree_ = 0;
822  int* rthost = new int[n];
823  ReducedTree** rt = new ReducedTree*[n];
824  for (j = 0; j < n; ++j) {
825  rthost[j] = -1;
826  rt[j] = 0;
827  }
828  if (nwc) {
829  for (MultiSplit* ms: *multisplit_list_) {
830  if (ms->backbone_style == 2) {
831  for (j = 0; j < n; ++j) { // find the mark value
832  if (sid[j] == ms->sid[0]) {
833  break;
834  }
835  }
836  ms->rthost = -1;
837  k = -1;
838  for (int ih = 0; ih < nrnmpi_numprocs; ++ih) {
839  for (i = displ[ih]; i < displ[ih + 1]; ++i) {
840  if (mark[i] == j) {
841  if (all_bb_relation[i] > 2) {
842  ms->rthost = ih;
843  break;
844  }
845  if (all_bb_relation[i] == 2) {
846  if (k == -1) {
847  k = ih;
848  }
849  }
850  }
851  }
852  if (ms->rthost != -1) {
853  break;
854  }
855  }
856  if (ms->rthost == -1) {
857  ms->rthost = k;
858  }
859  // only want rthost[j] for sid[0], not sid[1]
860  rthost[j] = ms->rthost;
861  // printf("%d sid0=%d sid1=%d rthost=%d\n", nrnmpi_myid, ms->sid[0], ms->sid[1],
862  // ms->rthost);
863  }
864  }
865  // it is important that only the principal sid has rthost != 0
866  // since we count them below, nrtree_, to allocate rtree_.
867  }
868 #if 0
869 for (j=0; j < n; ++j) {
870 printf("%d j=%d sid=%d bb_relation=%d rthost=%d\n", nrnmpi_myid,j, sid[j],
871 bb_relation[j], rthost[j]);
872 }
873 #endif
874  // there can be a problem when one host sends parts of two cells
875  // to another host due to cells not being in the same order with
876  // respect to NrnHashIterate. So demand that they are in the
877  // same order. Note this can also be used to assert that there
878  // are not two pieces on a host from the same cell.
879  // But we no longer use NrnHashIterate and the order is consistent
880  // because ordering is always with respect to the allgathered
881  // arrays of size nt.
882  if (0) {
883  for (i = 0; i < nrnmpi_numprocs; ++i) {
884  int ix = -1;
885  int nj = displ[i + 1];
886  for (j = displ[i]; j < nj; ++j) {
887  if (all_bb_relation[j] >= 2 && mark[j] >= 0 && rthost[mark[j]] == nrnmpi_myid) {
888  assert(mark[j] > ix);
889  ix = mark[j];
890  if (all_bb_relation[j] > 2) {
891  ++j;
892  }
893  }
894  }
895  }
896  }
897 
898  // allocate rtree_
899  for (j = 0; j < n; ++j) {
900  if (rthost[j] == nrnmpi_myid) {
901  ++nrtree_;
902  }
903  }
904  if (nrtree_) {
905  i = 0;
906  rtree_ = new ReducedTree*[nrtree_];
907  for (j = 0; j < n; ++j) {
908  if (rthost[j] == nrnmpi_myid) {
909  // sid to rank table
910  std::unique_ptr<Int2IntTable> s2rt{new Int2IntTable()};
911  s2rt->reserve(20);
912  int rank = 0;
913  int mapsize = 0;
914  for (k = 0; k < nt; ++k) {
915  if (mark[k] == j && all_bb_relation[k] >= 2) {
916  const auto& result = s2rt->insert({allsid[k], rank});
917  if (result.second) {
918  rank++;
919  }
920  if (all_bb_relation[k] == 2) {
921  mapsize += 2;
922  } else {
923  mapsize += 3;
924  }
925  }
926  }
927  rtree_[i] = new ReducedTree(this, rank, mapsize);
928  rt[j] = rtree_[i]; // needed for third pass below
929  // printf("%d new ReducedTree(%d, %d)\n", nrnmpi_myid, rank, mapsize);
930  // at this point the ReducedTree.s2rt is not in tree order
931  // (in fact we do not even know if it is a tree)
932  // so reorder. For a tree, we know there must be ReducedTree.n - 1
933  // edges.
934  rtree_[i]->s2rt = std::move(s2rt);
935  rtree_[i]->reorder(j, nt, mark, all_bb_relation, allsid);
936  ++i;
937  }
938  }
939  }
940 
941  // fill in the rest of the rthost[], mt->rthost, and rt
942  j = 0;
943  for (MultiSplit* ms: *multisplit_list_) {
944  if (ms->backbone_style == 2) {
945  int jj = mark[displ[nrnmpi_myid] + j];
946  ms->rthost = rthost[jj];
947  rthost[j] = ms->rthost;
948  rt[j] = rt[jj];
949  if (ms->nd[1]) {
950  ++j;
951  }
952  }
953  ++j;
954  }
955 #if 0
956  for (int ii=0; ii < multisplit_list_->count(); ++ii) {
957  MultiSplit* ms = multisplit_list_->item(ii);
958  if (ms->backbone_style == 2) {
959 printf("%d sid0=%d sid1=%d rthost=%d\n", nrnmpi_myid, ms->sid[0], ms->sid[1], ms->rthost);
960  }
961  }
962 for (j=0; j < n; ++j) {
963 printf("%d j=%d sid=%d bb_relation=%d rthost=%d\n", nrnmpi_myid,j, sid[j],
964 bb_relation[j], rthost[j]);
965 }
966 #endif
967 
968  // count the reduced tree related nodes that have non-zero area
969  for (i = 0; i < n; ++i) {
970  // remember rthost >= 0 only for sid[0]
971  if (rthost[i] >= 0)
972  for (j = 0; j < 2; ++j) {
973  NrnThread* _nt = nrn_threads + threadid[i];
974  Node* nd = _nt->_v_node[inode[i + j]];
975  if (nd->_classical_parent && nd->sec_node_index_ < nd->sec->nnode - 1) {
976  if (rthost[i] == nrnmpi_myid) {
977  ++narea2rt_;
978  } else {
979  ++narea2buf_;
980  }
981  }
982  if (bb_relation[i] == 2) { // sid[0] only
983  break;
984  }
985  if (j == 1) {
986  ++i;
987  }
988  }
989  }
990  if (narea2rt_) {
991  area2rt_ = new Area2RT[narea2rt_];
992  }
993  if (narea2buf_) {
995  }
996  // can fill in all of the info for area2rt_ but only some for area2buf_
997  // actually, area2rt is the one used in practice since only the soma ever
998  // has a sid at a non-zero area node and the reduced tree will probably
999  // be on the machine with the soma.
1000  narea2rt_ = narea2buf_ = 0;
1001  for (i = 0; i < n; ++i) {
1002  // remember rthost >= 0 only for sid[0]
1003  if (rthost[i] >= 0)
1004  for (j = 0; j < 2; ++j) {
1005  NrnThread* _nt = nrn_threads + threadid[i];
1006  Node* nd = _nt->_v_node[inode[i + j]];
1007  if (nd->_classical_parent && nd->sec_node_index_ < nd->sec->nnode - 1) {
1008  if (rthost[i] == nrnmpi_myid) {
1009  Area2RT& art = area2rt_[narea2rt_];
1010  art.ms = vec2ms[i];
1011  art.inode = inode[i + j];
1012  art.n = 2;
1013  art.pd[0] = &D(art.inode);
1014  art.pd[1] = &RHS(art.inode);
1015  if (bb_relation[i] > 2) {
1016  art.n = 3;
1017  k = vec2ms[i]->back_index;
1018  MultiSplitThread& t = mth_[vec2ms[i]->ithread];
1019  if (j == 0) {
1020  art.pd[2] = t.sid1A + t.backAindex_[k];
1021  } else {
1022  art.pd[2] = t.sid1B + t.backBindex_[k];
1023  }
1024  }
1025  ++narea2rt_;
1026  } else {
1027  Area2Buf& ab = area2buf_[narea2buf_];
1028  ab.ms = vec2ms[i];
1029  ab.inode = inode[i + j];
1030  // can figure out how many but not
1031  // the indices into the send buffer.
1032  ab.n = 2;
1033  if (bb_relation[i] > 2) {
1034  ab.n = 3;
1035  }
1036  ++narea2buf_;
1037  }
1038  }
1039  if (bb_relation[i] == 2) { // sid[0] only
1040  break;
1041  }
1042  }
1043  }
1044 #if 0
1045 printf("%d narea2rt=%d narea2buf=%d\n", nrnmpi_myid, narea2rt_, narea2buf_);
1046 for (i = 0; i < narea2rt_; ++i) {
1047  Area2RT& art = area2rt_[i];
1048  printf("%d area2rt[%d] inode=%d n=%d thread=%d\n", nrnmpi_myid, i, art.inode, art.n, art.ms->ithread);
1049 }
1050 for (i = 0; i < narea2buf_; ++i) {
1051  Area2Buf& ab = area2buf_[i];
1052  printf("%d area2buf[%d] inode=%d n=%d\n", nrnmpi_myid, i, ab.inode, ab.n);
1053 }
1054 #endif
1055 #if 0
1056 if (nrnmpi_myid == 0) {
1057 for (i=0; i < n; ++i) {
1058  printf("%d %d sid=%d bbrelation=%d rthost=%d rt=%p\n", nrnmpi_myid, i, sid[i], bb_relation[i], rthost[i], rt[i]);
1059 }
1060 for (i=0; i < nrnmpi_numprocs+1; ++i) {
1061  printf("%d displ[%d]=%d\n", nrnmpi_myid, i, displ[i]);
1062 }
1063 for (i=0; i < nt; ++i) {
1064  printf("%d %d allsid=%d mark=%d all_bb_relation=%d\n", nrnmpi_myid, i, allsid[i], mark[i], all_bb_relation[i]);
1065 }}
1066 #endif
1067  // it is best if the user arranges things so that machine -> machine
1068  // has only one type of connection, i.e long->short, long->long,
1069  // or short->long since that means only one message required.
1070  // But for generality we program for the possibility of needing
1071  // two message types (long->short and short->long are the same
1072  // type and distinct from long->long)
1073  // Actually we do need 3 because one cannot mix long->short
1074  // and long->long in the same message because they need
1075  // different tags.
1076  // we also calculate the indices for the types
1077  // we need to have separate tags for long <-> reduced trees
1078  nthost_ = 0;
1079  // int ndbsize, tbsize;
1080  ndbsize = 0;
1081  tbsize = 0;
1082 
1083  for (i = 0; i < nrnmpi_numprocs; ++i) {
1084  int nh = 0;
1085  int type = 0;
1086  int nh1 = 0;
1087  for (j = displ[i]; j < displ[i + 1]; ++j) {
1088  if (mark[j] >= 0) {
1089  // how many distinct all_bb_relation<2: 1,2,3?
1090  if (bb_relation[mark[j]] == 1) { // short<->long
1091  assert(all_bb_relation[j] == 0);
1092  type |= 04;
1093  ++ndbsize; // space for node index
1094  tbsize += 2; // d and rhs
1095  } else if (all_bb_relation[j] == 1) { // long<->short
1096  type |= 01;
1097  ++ndbsize;
1098  tbsize += 2;
1099  } else if (all_bb_relation[j] < 2) { // long<->long
1100  type |= 02;
1101  ++ndbsize;
1102  tbsize += 2;
1103  } else if (all_bb_relation[j] >= 2) {
1104  int rth = rthost[mark[j]];
1105  if (rth == nrnmpi_myid) {
1106  if (i != nrnmpi_myid) {
1107  // this is the rthost
1108  // reducedtree->long
1109  type |= 010;
1110  // no node buffer since all
1111  // info <-> ReducedTree
1112  // ++ndbsize;
1113  tbsize += 2;
1114  if (all_bb_relation[j] > 2) {
1115  tbsize += 1;
1116  }
1117  }
1118  } else if (i == nrnmpi_myid) {
1119  // only this to the rthost
1120  // long->reducedtree
1121  nh1 += 1;
1122  // may not be distinct hosts
1123  // be careful not to count twice
1124  for (int jj = displ[i]; jj < j; ++jj) {
1125  if (rth == rthost[mark[jj]]) {
1126  nh1 -= 1;
1127  break;
1128  }
1129  }
1130  ++ndbsize;
1131  tbsize += 2; // off diag element
1132  if (all_bb_relation[j] > 2) {
1133  tbsize += 1;
1134  }
1135  }
1136  }
1137  }
1138  }
1139  nh = (type & 1) + ((type / 2) & 1) + ((type / 4) & 1) + ((type / 010) & 1);
1140  nthost_ += nh + nh1;
1141  // printf("%d type=%d %d %d %d %d %d nh=%d nthost_=%d\n", nrnmpi_myid, type, type&1,
1142  // (type/2)&1, (type/4)&1, (type/010)&1, (type/020)&1, nh, nthost_);
1143  }
1144  // printf("%d x nthost_=%d\n", nrnmpi_myid, nthost_);
1146  for (i = 0; i < nthost_; ++i) { // two of these needed before rest of fill
1147  msti_[i].nnode_rt_ = 0;
1148  msti_[i].nd_rt_index_ = 0;
1149  msti_[i].nd_rt_index_th_ = 0;
1150  msti_[i].offdiag_ = 0;
1151  msti_[i].ioffdiag_ = 0;
1152  msti_[i].rthost_ = -1;
1153  }
1154  if (ndbsize) { // can be 0 if rthost
1155  nodeindex_buffer_ = new int[ndbsize];
1156  nodeindex_buffer_th_ = new int[ndbsize];
1157  nodeindex_rthost_ = new int[ndbsize];
1158  }
1159  for (i = 0; i < ndbsize; ++i) {
1160  nodeindex_rthost_[i] = -1;
1161  }
1162  // printf("%d nthost_=%d ndbsize=%d tbsize=%d nrtree_=%d\n", nrnmpi_myid, nthost_, ndbsize,
1163  // tbsize, nrtree_); tbsize_=tbsize;
1164  if (tbsize) {
1165  trecvbuf_ = new double[tbsize];
1166  tsendbuf_ = new double[tbsize];
1167  }
1168  // We need to fill the msti_ array
1169  // in the order long<->short long<->reduced
1170  // followed by long<->long followed by reduced<->long
1171  // followed short<->long. // three distinct message tags
1172  nthost_ = 0;
1173  int mdisp = 0;
1174  k = 0;
1175 
1176  // first pass for the long<->short backbones
1177  for (i = 0; i < nrnmpi_numprocs; ++i) {
1178  int b = 0;
1179  for (j = displ[i]; j < displ[i + 1]; ++j) {
1180  if (mark[j] >= 0 && bb_relation[mark[j]] == 0 && all_bb_relation[j] == 1) {
1181  nodeindex_buffer_th_[k] = threadid[mark[j]];
1182  nodeindex_buffer_[k++] = inode[mark[j]];
1183  // printf("%d i=%d j=%d k=%d nthost=%d mark=%d inode=%d\n", nrnmpi_myid, i, j, k,
1184  // nthost_, mark[j], inode[mark[j]]);
1185  ++b;
1186  }
1187  }
1188  if (b) {
1189  msti_[nthost_].displ_ = mdisp;
1190  msti_[nthost_].size_ = 2 * b;
1191  mdisp += 2 * b;
1192  msti_[nthost_].host_ = i;
1193  msti_[nthost_].nnode_ = b;
1194  msti_[nthost_].tag_ = 1;
1195  ++nthost_;
1196  }
1197  }
1198 
1199  // second pass for the long<->reducedtree
1200  // includes the communication of sid <-> reduced tree for the
1201  // non rthost cpus
1202  // first make a list of the rthost values, the size of that
1203  // list will be how many msti we need for this pass.
1204  int* tmphost = new int[n]; // cannot be more than this.
1205  int ntmphost = 0;
1206  i = nrnmpi_myid;
1207  for (j = displ[i]; j < displ[i + 1]; ++j) {
1208  int j1 = mark[j];
1209  if (j1 >= 0 && bb_relation[j1] >= 2 && rthost[j1] != i) {
1210  tmphost[ntmphost++] = rthost[j1];
1211  for (int itmp = 1; itmp < ntmphost; ++itmp) {
1212  if (tmphost[itmp - 1] == rthost[j1]) {
1213  ntmphost--;
1214  break;
1215  }
1216  }
1217  }
1218  }
1219  // printf("%d second pass ntmphost = %d\n", nrnmpi_myid, ntmphost);
1220  // now we can loop over that list and fill the msti
1221  for (int itmp = 0; itmp < ntmphost; ++itmp) {
1222  i = nrnmpi_myid;
1223  int b = 0;
1224  int br = 0;
1225  // only the ones on this machine and rthost not this machine
1226  // Note: rthost generally has different sid value
1227  // and number of sid than this machine
1228  for (j = displ[i]; j < displ[i + 1]; ++j) {
1229  int jj = j - displ[i];
1230  int j1 = mark[j]; // point to sid[0] on this machine
1231  // skip if sid[1] of the rthost
1232  // printf("%d i=%d j=%d j1=%d bb_relation[j1]=%d rthost[j1]=%d\n",
1233  // nrnmpi_myid, i, j, j1, bb_relation[j1], rthost[j1]);
1234  if (j1 >= 0 && bb_relation[j1] >= 2 && rthost[j1] == tmphost[itmp]) { // only the first
1235  // has rthost
1236  // printf("%d sid=%d bb_relation=%d send to rthost=%d k=%d\n", nrnmpi_myid,
1237  // sid[j1], bb_relation[j1], rthost[j1], k);
1238  // mark[j] points to the index for sid0
1239  nodeindex_rthost_[k] = rthost[j1];
1240  nodeindex_buffer_th_[k] = threadid[j - displ[i]];
1241  nodeindex_buffer_[k++] = inode[j - displ[i]];
1242  ++b;
1243  // one or two?
1244  if (bb_relation[jj] > 2) {
1245  // int iii = i;
1246  {
1247  // except that msti_[nthost_].offdiag_ has not been allocated, this is a
1248  // good time to figure out the pointers to sid1A and sid1B and which is
1249  // which
1251  int i;
1252  int* ix;
1253  int* ixth;
1254  double** od;
1255  int* iod;
1256  // start by assuming 2 and then increment by 2
1257  if (m.offdiag_) {
1258  ix = m.nd_rt_index_;
1259  ixth = m.nd_rt_index_th_;
1260  od = m.offdiag_;
1261  iod = m.ioffdiag_;
1262  m.nd_rt_index_ = new int[m.nnode_rt_ + 2];
1263  m.nd_rt_index_th_ = new int[m.nnode_rt_ + 2];
1264  m.offdiag_ = new double*[m.nnode_rt_ + 2];
1265  m.ioffdiag_ = new int[m.nnode_rt_ + 2];
1266  for (i = 0; i < m.nnode_rt_; ++i) {
1267  m.nd_rt_index_[i] = ix[i];
1268  m.nd_rt_index_th_[i] = ixth[i];
1269  m.offdiag_[i] = od[i];
1270  m.ioffdiag_[i] = iod[i];
1271  }
1272  delete[] ix;
1273  delete[] ixth;
1274  delete[] od;
1275  delete[] iod;
1276  ix = m.nd_rt_index_ + m.nnode_rt_;
1277  ixth = m.nd_rt_index_th_;
1278  od = m.offdiag_ + m.nnode_rt_;
1279  iod = m.ioffdiag_ + m.nnode_rt_;
1280  } else {
1281  m.nd_rt_index_ = new int[2];
1282  m.nd_rt_index_th_ = new int[2];
1283  m.offdiag_ = new double*[2];
1284  m.ioffdiag_ = new int[2];
1285  ix = m.nd_rt_index_;
1286  ixth = m.nd_rt_index_th_;
1287  od = m.offdiag_;
1288  iod = m.ioffdiag_;
1289  }
1290  // now fill in
1291  // sid0 is for sid1A[0] , sid1 is for sid1B[???]
1292  // sid0: nodeindex = inode[j1]
1293  // in multisplit_v_setup, we created two parallel vectors of size
1294  // nbackrt_ (number of backbones sending to reduced tree).
1295  // backsid_ is sid0, and
1296  // backAindex_ corresponding sid1A index for sid0
1297  // backBindex_ sid1B index for sid1
1298  // Should not be too many backbones on this machine so linear
1299  // search should be ok. BUG again. i is MultiSplit.back_index and
1300  // can determine MultiSplit from j1.
1301  i = vec2ms[jj]->back_index;
1302  MultiSplitThread& t = mth_[vec2ms[jj]->ithread];
1303  assert(sid[jj] == t.backsid_[i]);
1304  ix[0] = inode[jj];
1305  ix[1] = inode[jj + 1];
1306  ixth[0] = vec2ms[jj]->ithread;
1307  ixth[1] = vec2ms[jj]->ithread;
1308  od[0] = t.sid1A + t.backAindex_[i];
1309  od[1] = t.sid1B + t.backBindex_[i];
1310  iod[0] = t.backAindex_[i];
1311  iod[1] = t.backBindex_[i];
1312 #if 0
1313 printf("%d offdiag nbrt=%d iii=%d i=%d j1=%d jj=%d sid=%d ix = %d %d back = %d %d\n",
1314 nrnmpi_myid, nbackrt_, iii, i, j1, jj, sid[j1], ix[0], ix[1], t.backAindex_[i], t.backBindex_[i]);
1315 #endif
1316  m.nnode_rt_ += 2;
1317  }
1318  nodeindex_rthost_[k] = rthost[j1];
1319  ++j;
1320  ++jj;
1321  nodeindex_buffer_th_[k] = threadid[jj];
1322  nodeindex_buffer_[k++] = inode[jj];
1323  ++b;
1324  br += 2;
1325  }
1326  }
1327  }
1328  if (b || br) {
1329  msti_[nthost_].displ_ = mdisp;
1330  msti_[nthost_].size_ = 2 * b + br;
1331  mdisp += 2 * b + br;
1332  msti_[nthost_].host_ = tmphost[itmp];
1333  msti_[nthost_].rthost_ = tmphost[itmp];
1334  msti_[nthost_].nnode_ = b;
1335  msti_[nthost_].tag_ = 3;
1336  ++nthost_;
1337  }
1338  }
1339  delete[] tmphost;
1340 
1341  // third pass for the long->long backbones
1342  for (i = 0; i < nrnmpi_numprocs; ++i) {
1343  int b = 0;
1344  for (j = displ[i]; j < displ[i + 1]; ++j) {
1345  if (mark[j] >= 0 && bb_relation[mark[j]] == 0 && all_bb_relation[j] == 0) {
1346  nodeindex_buffer_th_[k] = threadid[mark[j]];
1347  nodeindex_buffer_[k++] = inode[mark[j]];
1348  // printf("%d i=%d j=%d k=%d nthost=%d mark=%d inode=%d\n", nrnmpi_myid, i, j, k,
1349  // nthost_, mark[j], inode[mark[j]]);
1350  ++b;
1351  }
1352  }
1353  if (b) {
1354  msti_[nthost_].displ_ = mdisp;
1355  msti_[nthost_].size_ = 2 * b;
1356  mdisp += 2 * b;
1357  msti_[nthost_].host_ = i;
1358  msti_[nthost_].nnode_ = b;
1359  msti_[nthost_].tag_ = 2;
1360  ++nthost_;
1361  }
1362  }
1363 
1364  // fourth pass for the reducedtree->long backbones
1365  // the reduced tree <-> sids not on this host
1367  for (i = 0; i < nrnmpi_numprocs; ++i) {
1368  int b = 0;
1369  int br = 0;
1370  // not the ones on this machine and rthost this machine
1371  for (j = displ[i]; j < displ[i + 1]; ++j) {
1372  int j1 = mark[j];
1373  if (j1 >= 0 && all_bb_relation[j] >= 2 && rthost[j1] == nrnmpi_myid &&
1374  rthost[j1] != i) {
1375  // printf("%d i=%d sid=%d bb_relation=%d send to rthost=%d rt=%p\n", nrnmpi_myid, i,
1376  // allsid[j], all_bb_relation[j], rthost[j1], rt[j1]);
1377  // don't care about the nodeindex, only
1378  // the ReducedTree.
1379  // fill in the ReducedTree map with RHS and D pointers to the sid[0] related receive
1380  // buffer and next iteration the sid[1]
1381  int ib = mdisp + 2 * b;
1382  // printf("%d fill buf %d\n", nrnmpi_myid, ib);
1383  rt[j1]->fillrmap(allsid[j], -1, trecvbuf_ + ib + 1); // exchange order is d, rhs
1384  rt[j1]->fillrmap(allsid[j], allsid[j], trecvbuf_ + ib);
1385  rt[j1]->fillsmap(allsid[j], tsendbuf_ + ib + 1, tsendbuf_ + ib);
1386  ++b; // at least D and RHS received and sent back
1387  if (all_bb_relation[j] > 2) { // two of these
1388  // need to fill in off diag after this iteration is done and then re-iterate
1389  // because they are at the end of the buffer section for machine i.
1390  ++br; // an offdiag received
1391  // though I suppose it does not
1392  // need to be sent back
1393  }
1394  }
1395  }
1396  // reiterate and fill in off diag
1397  br = 0;
1398  for (j = displ[i]; j < displ[i + 1]; ++j) {
1399  int j1 = mark[j];
1400  if (j1 >= 0 && all_bb_relation[j] >= 2 && rthost[j1] == nrnmpi_myid &&
1401  rthost[j1] != i) {
1402  if (all_bb_relation[j] > 2) { // two of these
1403  int ib = mdisp + 2 * b + br;
1404  // printf("%d offdiag fill buf %d j1=%d rt=%p\n", nrnmpi_myid, ib, j1, rt[j1]);
1405  rt[j1]->fillrmap(allsid[j + 1], allsid[j], trecvbuf_ + ib);
1406  rt[j1]->fillrmap(allsid[j], allsid[j + 1], trecvbuf_ + ib + 1);
1407  br += 2;
1408  j += 1;
1409  }
1410  }
1411  }
1412 
1413  if (b || br) {
1414  msti_[nthost_].displ_ = mdisp;
1415  msti_[nthost_].size_ = 2 * b + br;
1416  mdisp += 2 * b + br;
1417  msti_[nthost_].host_ = i;
1419  msti_[nthost_].nnode_ = b;
1420  // no nnode_rt_ for this phase
1421  // msti_[nthost_].nnode_rt_ = br;
1422  // msti_[nthost_].offdiag_ = new double*[br];
1423  msti_[nthost_].tag_ = 3;
1424  ++nthost_;
1425  }
1426  }
1427 
1428  // fifth pass for the short->long backbones
1429  int tmp_index = k;
1431  for (i = 0; i < nrnmpi_numprocs; ++i) {
1432  int b = 0;
1433  for (j = displ[i]; j < displ[i + 1]; ++j) {
1434  if (mark[j] >= 0 && bb_relation[mark[j]] == 1 && all_bb_relation[j] == 0) {
1435  nodeindex_buffer_th_[k] = threadid[mark[j]];
1436  nodeindex_buffer_[k++] = inode[mark[j]];
1437  // printf("%d i=%d j=%d k=%d nthost=%d mark=%d inode=%d\n", nrnmpi_myid, i, j, k,
1438  // nthost_, mark[j], inode[mark[j]]);
1439  ++b;
1440  }
1441  }
1442  if (b) {
1443  msti_[nthost_].displ_ = mdisp;
1444  msti_[nthost_].size_ = 2 * b;
1445  mdisp += 2 * b;
1446  msti_[nthost_].host_ = i;
1447  msti_[nthost_].nnode_ = b;
1448  msti_[nthost_].tag_ = 1;
1449  ++nthost_;
1450  }
1451  }
1452 
1453 #if 0
1454 printf("%d nthost_ = %d\n", nrnmpi_myid, nthost_);
1455 for (i=0; i < nthost_; ++i) {
1457 printf("%d i=%d displ_=%d host_=%d nnode_=%d tag_=%d\n", nrnmpi_myid, i, m.displ_,
1458 m.host_, m.nnode_, m.tag_);
1459 }
1460 #endif
1461 
1462 #if 0
1463 for (i=0; i < k; ++i) {
1465 printf("%d %d nodeindex_buffer %d %s %d\n", nrnmpi_myid, i, j,
1466 secname(v_node[j]->sec), v_node[j]->sec_node_index_);
1467 }
1468 #endif
1469 
1470  // the sids on this machine with rthost on this machine
1471  // go into the ReducedTree directly
1472  if (nrtree_) {
1473  i = 0;
1474  int ib = 0;
1475  for (MultiSplit* ms: *multisplit_list_) {
1476  NrnThread* _nt = nrn_threads + ms->ithread;
1477  MultiSplitThread& t = mth_[ms->ithread];
1478  if (ms->rthost == nrnmpi_myid) {
1479  // printf("%d nrtree_=%d i=%d rt=%p\n", nrnmpi_myid, nrtree_, i, rt[i]);
1480  int j = ms->nd[0]->v_node_index;
1481  // printf("%d call fillrmap sid %d,%d %d node=%d\n", nrnmpi_myid, ms->sid[0],
1482  // ms->sid[0], j);
1483  ms->rt_ = rt[i];
1484  ms->rmap_index_ = rt[i]->irfill;
1485  ms->smap_index_ = rt[i]->nsmap;
1486  rt[i]->fillrmap(ms->sid[0], -1, &RHS(j));
1487  rt[i]->fillrmap(ms->sid[0], ms->sid[0], &D(j));
1488  rt[i]->fillsmap(ms->sid[0], &RHS(j), &D(j));
1489  if (ms->nd[1]) {
1490  ib = ms->back_index;
1491  assert(t.backsid_[ib] == ms->sid[0]);
1492  // fill sid1 row
1493  // note: for cvode need to keep fillrmap as pairs so following
1494  // moved from between sid1A,sid1B fillrmap.
1495  j = ms->nd[1]->v_node_index;
1496  // printf("%d call fillrmap sid1 %d %d node=%d\n", nrnmpi_myid, ms->sid[1],
1497  // ms->sid[1], j);
1498  rt[i]->fillrmap(ms->sid[1], -1, &RHS(j));
1499  rt[i]->fillrmap(ms->sid[1], ms->sid[1], &D(j));
1500  rt[i]->fillsmap(ms->sid[1], &RHS(j), &D(j));
1501 
1502  // fill sid1A for sid0 row
1503  // printf("%d call fillrmap sid %d,%d %d ib=%d t.backsid=%d t.backAindex=%d\n",
1504  // nrnmpi_myid, ms->sid[1], ms->sid[0], ib, t.backsid_[ib], t.backAindex_[ib]);
1505  rt[i]->fillrmap(ms->sid[1], ms->sid[0], t.sid1A + t.backAindex_[ib]);
1506 
1507  // fill sid1B
1508  // printf("%d call fillrmap sid %d,%d ib=%d backBindex=%d\n", nrnmpi_myid,
1509  // ms->sid[0], ms->sid[1], ib, backBindex_[ib]);
1510  rt[i]->fillrmap(ms->sid[0], ms->sid[1], t.sid1B + t.backBindex_[ib]);
1511  }
1512  }
1513  ++i;
1514  if (ms->nd[1]) {
1515  ++i;
1516  }
1517  }
1518  }
1519 
1520  if (nthost_) {
1523  for (i = 1; i < nthost_; ++i) {
1525  msti_[i].nodeindex_ = msti_[i - 1].nodeindex_ + msti_[i - 1].nnode_;
1526  }
1527  }
1528  // count how many nodeindex_buffer are non-zero area nodes
1529  // iarea_short_long_ and narea_ only for ones
1530  // not related to the ReducedTree method
1531  // printf("%d ndbsize=%d\n", nrnmpi_myid, ndbsize);
1532  for (i = 0; i < ndbsize; ++i) {
1534  Node* nd = _nt->_v_node[nodeindex_buffer_[i]];
1535  if (i == tmp_index) {
1537  }
1538  if (nd->_classical_parent && nd->sec_node_index_ < nd->sec->nnode - 1) {
1539  if (nodeindex_rthost_[i] < 0) {
1540  ++narea_;
1541  }
1542  }
1543  }
1544  // printf("%d narea=%d iarea_short_long=%d\n", nrnmpi_myid, narea_, iarea_short_long_);
1545  if (narea_) {
1546  buf_area_indices_ = new int[narea_];
1547  area_node_indices_ = new int[narea_];
1548  k = 0;
1549  for (i = 0; i < ndbsize; ++i) {
1551  Node* nd = _nt->_v_node[nodeindex_buffer_[i]];
1552  if (nd->_classical_parent && nd->sec_node_index_ < nd->sec->nnode - 1) {
1553  buf_area_indices_[k] = 2 * i;
1555  ++k;
1556  }
1557  }
1558  }
1559 
1560  // look through the nthost list and fill in the area2buf receive buffer
1561  // indices for info sent to reducetrees (by definition,not on this machine)
1562  for (i = 0; i < nthost_; ++i) {
1563  MultiSplitTransferInfo& msti = msti_[i];
1564  if (msti.tag_ == 3) { // reduced tree related
1565  // but we only want to, not from, the reduced tree machine
1566  // any nodes nonzero area?
1567  // printf("%d i=%d nnode=%d nodeindex=%p host=%d rthost=%d\n", nrnmpi_myid, i,
1568  // msti.nnode_, msti.nodeindex_, msti.host_, msti.rthost_);
1569  if (msti.rthost_ != nrnmpi_myid)
1570  for (j = 0; j < msti.nnode_; ++j) {
1571  NrnThread* _nt = nrn_threads + msti.nodeindex_th_[j];
1572  int in = msti.nodeindex_[j];
1573  Node* nd = _nt->_v_node[in];
1574  if (nd->_classical_parent && nd->sec_node_index_ < nd->sec->nnode - 1) {
1575  // non-zero area
1576  // search the area2buf list
1577  for (k = 0; k < narea2buf_; ++k) {
1578  if (area2buf_[k].inode == in && area2buf_[k].ms->ithread == _nt->id) {
1579  break;
1580  }
1581  }
1582  assert(k < narea2buf_);
1583  Area2Buf& ab = area2buf_[k];
1584  // ok. fill in the ibuf info
1585  ab.ibuf[0] = msti.displ_ + 2 * j;
1586  ab.ibuf[1] = ab.ibuf[0] + 1;
1587  if (ab.n == 3) {
1588  // which offdiag item?
1589  int ioff;
1590  for (ioff = 0; ioff < msti.nnode_rt_; ++ioff) {
1591  if (msti.nd_rt_index_[ioff] == in &&
1592  msti.nd_rt_index_th_[ioff] == _nt->id) {
1593  break;
1594  }
1595  }
1596  assert(ioff < msti.nnode_rt_);
1597  ab.ibuf[2] = msti.displ_ + 2 * msti.nnode_ + ioff;
1598  }
1599 #if 0
1600 printf("%d in=%d n=%d ibuf %d %d %d\n", nrnmpi_myid, in, ab.n,
1601 ab.ibuf[0], ab.ibuf[1], (ab.n == 3) ? ab.ibuf[2] : -1);
1602 #endif
1603  }
1604  }
1605  }
1606  }
1607 
1608  // printf("%d leave exchange_setup\n", nrnmpi_myid);
1609  // nrnmpi_int_allgather(&n, nn, 1);
1610 
1611  delete[] nn;
1612  delete[] sid;
1613  delete[] inode;
1614  delete[] threadid;
1615  delete[] displ;
1616  delete[] allsid;
1617  delete[] vec2ms;
1618  delete[] mark;
1619  delete[] bb_relation;
1620  delete[] all_bb_relation;
1621  delete[] connects2short;
1622  delete[] rthost;
1623  delete[] rt;
1624 
1625  errno = 0;
1626 }
1627 
1628 // when d and rhs pointers are updated in the Node, they must be updated in
1629 // the reducedtree rmap and smap as well
1631  msc_->rt_map_update();
1632 }
1633 
1635  for (MultiSplit* ms: *multisplit_list_) {
1636  if (ms->rthost == nrnmpi_myid) { // reduced tree on this host
1637  assert(ms->rt_);
1638  assert(ms->rmap_index_ >= 0);
1639  assert(ms->smap_index_ >= 0);
1640  MultiSplitThread& t = mth_[ms->ithread];
1641  double** r = ms->rt_->rmap + ms->rmap_index_;
1642  double** s = ms->rt_->smap + ms->smap_index_;
1643  for (int j = 0; j < 2; ++j)
1644  if (ms->nd[j]) {
1645  *s++ = *r++ = &NODERHS(ms->nd[j]);
1646  *s++ = *r++ = &NODED(ms->nd[j]);
1647  }
1648  if (ms->nd[1]) { // do sid1a and sid1b as well
1649  assert(ms->back_index >= 0);
1650  *r++ = t.sid1A + t.backAindex_[ms->back_index];
1651  *r++ = t.sid1B + t.backBindex_[ms->back_index];
1652  }
1653  }
1654  }
1655  // also need to do the Area2RT.pd[3] where the
1656  // order is d, rhs, and possibly sid1A or sid1B
1657  for (int i = 0; i < narea2rt_; ++i) {
1658  Area2RT& art = area2rt_[i];
1659  MultiSplit& ms = *art.ms;
1660  NrnThread* _nt = nrn_threads + ms.ithread;
1661  art.pd[0] = &D(art.inode);
1662  art.pd[1] = &RHS(art.inode);
1663  if (art.n == 3) {
1664  MultiSplitThread& t = mth_[ms.ithread];
1665  if (art.inode == ms.nd[0]->v_node_index) {
1666  art.pd[2] = t.sid1A + t.backAindex_[ms.back_index];
1667  } else if (art.inode == ms.nd[1]->v_node_index) {
1668  art.pd[2] = t.sid1B + t.backBindex_[ms.back_index];
1669  } else {
1670  assert(0);
1671  }
1672  }
1673  }
1674 }
1675 
1677  int i, j, k, id;
1678  id = nrnmpi_myid;
1679  // assume only one thread when there is transfer info
1680  NrnThread* nt = nrn_threads;
1681  Printf("%d nthost_=%d\n", id, nthost_);
1682  for (i = 0; i < nthost_; ++i) {
1684  Printf("%d %d host=%d nnode=%d displ=%d\n", id, i, ms.host_, ms.nnode_, ms.displ_);
1685  for (j = 0; j < ms.nnode_; ++j) {
1686  k = ms.nodeindex_[j];
1687  Printf("%d %d %d %d %s %d\n",
1688  id,
1689  i,
1690  j,
1691  k,
1692  secname(nt->_v_node[k]->sec),
1693  nt->_v_node[k]->sec_node_index_);
1694  }
1695  }
1696 }
1697 
1699  int sid,
1700  int nt,
1701  int* mark,
1702  int* allsid,
1703  int* all_bb_relation) {
1704  int i;
1705  for (i = 0; i < nt; ++i) {
1706  if (mark[i] == -1 && allsid[i] == sid) {
1707  mark[i] = m;
1708  if (all_bb_relation[i] > 2) {
1709  int sid2 = all_bb_relation[i] - 3;
1710  reduced_mark(m, sid2, nt, mark, allsid, all_bb_relation);
1711  }
1712  }
1713  }
1714 }
1715 
1716 /*
1717 section ParallelContext.multisplit_connect(x, sid) is a generalization of
1718 root ParallelContext.splitcell_connect(adjacent_host).
1719 sid refers to a point and all points with the same sid are connected
1720 together with 0 resistance wires.
1721 If a connected set of sections has only 1 sid then that point will be the
1722 root and triangularization can proceed normally.
1723 If a connected set of sections has 2 sid then triangularization proceeds
1724 from the leaves to the backbone between the 2 sid which forms a cable.
1725 Then special triangularization with fill-in of the sid1A column
1726 proceeds from the backbone element adjacent to sid1 and then with fill-in
1727 of the sid1B column (which modifies the sid1A column) from the backbone end
1728 adjacent to sid0
1729 in the hope that the fill-in elements at the sid are small enough
1730 to ignore in the interprocessor jacobian element matrix exchange. Note
1731 that gaussian elimination in the backbone is more complex as normal
1732 tree elimination with the same number of nodes.
1733 We defer the implementation of more than 2 sid in a subtree.
1734 */
1735 
1736 /* Gaussian elimination strategy for the backbone.
1737 There are two different strategies that optimize either minimum
1738 number of matrix operations that is best for long backbones, or
1739 numerical stability by minimizing the far off diagonal just before
1740 exchange which would be best for short (tightly coupled) backbones.
1741 
1742 For long backbones, it is simplest to begin triangularization
1743 using the equations adjacent to the center as pivot equations.
1744 This fills in a single column. But in the trivial case of the center
1745 being the only interior point, there are no triangularization operations
1746 before exchange.
1747 1 xx
1748 2 xxx
1749 3 xxx
1750 4 xxx
1751 5 xxx
1752 6 xxx
1753 7 xx
1754 
1755 
1756 1 x x
1757 2 xx x
1758 3 xxx
1759 4 xxx
1760 5 xxx
1761 6 x xx
1762 7 x x
1763 
1764 exchange occurs
1765 
1766 1 x x
1767 2 x x
1768 3 xx
1769 4 x
1770 5 xx
1771 6 x x
1772 7 x x
1773 
1774 For short backbones and greatest stability/accuracy and still simple but
1775 many more operations, one begins adjacent to the sids. Now,
1776 in the trivial case of the center being the only interior point, it is
1777 used to reduce the off diagonal element. If there is no interior point
1778 nothing happens.
1779 
1780 1 xx
1781 2 xxx triangularize starting with equation 6 (cannot start with
1782 3 xxx equation 1 or 7 because we do not know the correct value for d)
1783 4 xxx this eliminates the a elements but at the cost of introducing
1784 5 xxx A elements in column 7
1785 6 xxx
1786 7 xx
1787 
1788 1 x x cannot use 1, instead do the same starting at 2
1789 2 xx x this eliminates b elements but at the cost of introducing
1790 3 xx x B elements in column 1. Also A elements are modified.
1791 4 xx x
1792 5 xx x
1793 6 xxx
1794 7 xx
1795 
1796 1 x x
1797 2 xx x
1798 3 x x x
1799 4 x x x
1800 5 x x x
1801 6 x xx
1802 7 x x
1803 
1804 exchange occurs (modified d for 1 and 7
1805 solve the 1 and 7 2x2 equations then back substitute
1806 
1807 */
1808 
1810  int id, i, it;
1811  for (id = 0; id < nrnmpi_numprocs; ++id) {
1812  nrnmpi_barrier();
1813  if (id == nrnmpi_myid) {
1814  Printf("myid=%d\n", id);
1815  Printf(" MultiSplit %ld\n", multisplit_list_->size());
1816  for (i = 0; i < multisplit_list_->size(); ++i) {
1817  MultiSplit* ms = (*multisplit_list_)[i];
1818  Printf(" %2d bbs=%d bi=%-2d rthost=%-4d %-4d %s{%d}",
1819  i,
1820  ms->backbone_style,
1821  ms->back_index,
1822  ms->rthost,
1823  ms->sid[0],
1824  secname(ms->nd[0]->sec),
1825  ms->nd[0]->sec_node_index_);
1826  if (ms->nd[1]) {
1827  Printf(" %-4d %s{%d}",
1828  ms->sid[1],
1829  secname(ms->nd[1]->sec),
1830  ms->nd[1]->sec_node_index_);
1831  }
1832  Printf("\n");
1833  }
1834  for (it = 0; it < nrn_nthread; ++it) {
1835  NrnThread* _nt = nrn_threads + it;
1836  MultiSplitThread& t = mth_[it];
1837  Printf(" backbone_begin=%d backbone_long_begin=%d backbone_interior_begin=%d\n",
1838  t.backbone_begin,
1839  t.backbone_long_begin,
1840  t.backbone_interior_begin);
1841  Printf(" backbone_sid1_begin=%d backbone_long_sid1_begin=%d backbone_end=%d\n",
1842  t.backbone_sid1_begin,
1843  t.backbone_long_sid1_begin,
1844  t.backbone_end);
1845  Printf(" nbackrt_=%d i, backsid_[i], backAindex_[i], backBindex_[i]\n",
1846  t.nbackrt_);
1847  if (t.nbackrt_) {
1848  for (int i = 0; i < t.nbackrt_; ++i) {
1849  Printf(" %2d %2d %5d %5d",
1850  i,
1851  t.backsid_[i],
1852  t.backAindex_[i],
1853  t.backBindex_[i]);
1854  Node* nd = _nt->_v_node[t.backbone_begin + t.backAindex_[i]];
1855  Printf(" %s{%d}", secname(nd->sec), nd->sec_node_index_);
1856  nd = _nt->_v_node[t.backbone_begin + t.backBindex_[i]];
1857  Printf(" %s{%d}", secname(nd->sec), nd->sec_node_index_);
1858  Printf("\n");
1859  }
1860  }
1861  }
1862  Printf(" ReducedTree %d\n", nrtree_);
1863  for (i = 0; i < nrtree_; ++i) {
1864  ReducedTree* rt = rtree_[i];
1865  Printf(" %d n=%d nmap=%d\n", i, rt->n, rt->nmap);
1866  rt->pr_map(tbsize, trecvbuf_);
1867  }
1868  Printf(" MultiSplitTransferInfo %d\n", nthost_);
1869  for (i = 0; i < nthost_; ++i) {
1871  Printf(" %d host=%d rthost=%d nnode=%d nnode_rt=%d size=%d tag=%d\n",
1872  i,
1873  m.host_,
1874  m.rthost_,
1875  m.nnode_,
1876  m.nnode_rt_,
1877  m.size_,
1878  m.tag_);
1879  if (m.nnode_) {
1880  Printf(" nodeindex=%p nodeindex_buffer = %p\n",
1881  m.nodeindex_,
1883  }
1884  }
1885  Printf(" ndbsize=%d i nodeindex_buffer_=%p nodeindex_rthost_=%p\n",
1886  ndbsize,
1889  if (ndbsize) {
1890  for (int i = 0; i < ndbsize; ++i) {
1891  Printf(" %d %d %d\n", i, nodeindex_buffer_[i], nodeindex_rthost_[i]);
1892  }
1893  }
1894  Printf(" tbsize=%d trecvbuf_=%p tsendbuf_=%p\n", tbsize, trecvbuf_, tsendbuf_);
1895  Printf("\n");
1896  }
1897  }
1898  nrnmpi_barrier();
1899 }
1900 
1901 // following uses the short backbone method (N form of the matrix)
1902 
1903 
1904 // What is a good order for the above? We already have a tree order structure
1905 // with a root. If there is a single sid involved then that should be the
1906 // root and we are conceptually the same as splitcell.cpp
1907 // For two sid, then the tree is ordered as (see above comment at the NOTE)
1908 
1910  msc_->solve();
1911 }
1912 
1914  msc_->mth_[nt->id].triang(nt);
1915  return nullptr;
1916 }
1918  if (nt->id == 0) {
1919  msc_->reduce_solve();
1920  }
1921  return nullptr;
1922 }
1924  msc_->mth_[nt->id].bksub(nt);
1925  return nullptr;
1926 }
1927 
1929  // if (t < .025) prstruct();
1930  // if (nrnmpi_myid == 0) pmat(true);
1931  NrnThread* nt = nrn_threads;
1932  MultiSplitThread& t = mth_[0];
1933  t.triang_subtree2backbone(nt);
1934  t.triang_backbone(nt);
1935  // if (nrnmpi_myid == 4) pmat(true);
1936  // pmat1("t");
1937  // printf("%d enter matrix exchange\n", nrnmpi_myid);
1938  matrix_exchange();
1939  // printf("%d leave matrix exchange\n", nrnmpi_myid);
1940  // pmat1("e");
1941  t.bksub_backbone(nt);
1942  t.bksub_subtrees(nt);
1943  // pmat(true);
1944  // nrnmpi_barrier(); // only possible if everyone is actually multisplit
1945  // abort();
1946 }
1947 
1950  triang_backbone(_nt);
1951 }
1953  matrix_exchange();
1954 }
1956  bksub_backbone(_nt);
1957  bksub_subtrees(_nt);
1958 }
1959 
1960 // In the typical case, all nodes connected to the same sids have 0 area
1961 // and thus the weighted average voltage is just sum RHS / sum D.
1962 // However, nrn_multisplit_nocap_v is somewhat of a misnomer because one or
1963 // more compartments (but not connected together with the same sid) may have
1964 // non-zero area and thus that voltage is correct and must be transferred
1965 // to all the connecting (with same sid) zero-area nodes. Furthermore
1966 // the sum RHS (of the zero area nodes) must be added to the RHS of non-zero
1967 // area node (after that non-zero RHS has been computed back in the caller).
1968 // Lastly, that sum of RHS has to be adjusted using the sum of D of the
1969 // zero area nodes so that the RHS is proper with respect to the correct
1970 // value of V.
1971 // So. When there is no non-zero area node for a sid, the voltage is
1972 // sum rhs / sum d. For a non-zero area node the V must be sent and the
1973 // sum rhs and sum d from the others must be received and the true rhs
1974 // put into adjust_rhs_ using rhs - d*v. For a zero-area
1975 // node (with same sid as non-zero area node), rhs and d must be sent, and
1976 // voltage can be computed after receiving the changed rhs and d by
1977 // rhs/d.
1978 
1983 }
1986 }
1989 }
1992 }
1993 
1995  int i;
1996  // scale the non-zero area node elements since those already
1997  // have the answer.
1998 
1999  // the problem here is that non-zero area v is correct,
2000  // but rhs ends up wrong for
2001  // non-zero area nodes (because current from zero area not added)
2002  // so encode v into D and sum of zero-area rhs will end up in
2003  // rhs.
2004  if (_nt->id == 0)
2005  for (i = 0; i < narea2buf_; ++i) {
2006  Area2Buf& ab = area2buf_[i];
2007  VEC_D(ab.inode) = 1e50; // sentinal
2008  VEC_RHS(ab.inode) = VEC_V(ab.inode) * 1e50;
2009  }
2010  // also scale the non-zero area elements on this host
2011  for (i = 0; i < narea2rt_; ++i) {
2012  Area2RT& ar = area2rt_[i];
2013  if (_nt->id == ar.ms->ithread) {
2014  VEC_D(ar.inode) = 1e50;
2015  VEC_RHS(ar.inode) = VEC_V(ar.inode) * 1e50;
2016  }
2017  }
2018 }
2020  if (_nt->id == 0) {
2022  }
2023 }
2025  // at this point, for zero area nodes, D is
2026  // 1.0, and the voltage is RHS/D).
2027  // So zero-area node information is fine.
2028  // But for non-zero area nodes, D is the sum of all zero-area
2029  // node d, and RHS is the sum of all zero-area node rhs.
2030  int i;
2031 
2032  if (_nt->id == 0)
2033  for (i = 0; i < narea2buf_; ++i) {
2034  Area2Buf& ab = area2buf_[i];
2035  int j = ab.inode;
2036  double afac = 100. / VEC_AREA(j);
2037  ab.adjust_rhs_ = (VEC_RHS(j) - VEC_D(j) * VEC_V(j)) * afac;
2038  // printf("%d nz1 %d D=%g RHS=%g V=%g afac=%g adjust=%g\n",
2039  // nrnmpi_myid, i, D(i), RHS(i), VEC_V(j), afac, ab.adjust_rhs_);
2040  }
2041  for (i = 0; i < narea2rt_; ++i) {
2042  Area2RT& ar = area2rt_[i];
2043  if (_nt->id == ar.ms->ithread) {
2044  int j = ar.inode;
2045  double afac = 100. / VEC_AREA(j);
2046  ar.adjust_rhs_ = (VEC_RHS(j) - VEC_D(j) * VEC_V(j)) * afac;
2047  // printf("%d nz2 %d D=%g RHS=%g V=%g afac=%g adjust=%g\n",
2048  // nrnmpi_myid, i, D(i), RHS(i), VEC_V(j), afac, ar.adjust_rhs_);
2049  }
2050  }
2051 }
2052 
2055 }
2056 
2058  int i;
2059  if (_nt->id == 0)
2060  for (i = 0; i < narea2buf_; ++i) {
2061  Area2Buf& ab = area2buf_[i];
2062  VEC_RHS(ab.inode) += ab.adjust_rhs_;
2063  }
2064  // also scale the non-zero area elements on this host
2065  for (i = 0; i < narea2rt_; ++i) {
2066  Area2RT& ar = area2rt_[i];
2067  if (_nt->id == ar.ms->ithread) {
2068  // printf("%d adjust %d %g %g\n",
2069  // nrnmpi_myid, ar.inode, ar.adjust_rhs_, VEC_RHS(ar.inode));
2070  VEC_RHS(ar.inode) += ar.adjust_rhs_;
2071  }
2072  }
2073 }
2074 
2075 // Two phases: Send all the info from long backbones and receive
2076 // all the short backbone info for this machine.
2077 // Then process the 2x2 short backbone and send remaining (short)
2078 // backbone and receive the rest (long) of the backbone info.
2079 
2080 // The ReducedTree solve fits into this strategy receiving from long at the
2081 // same time as receiving from short and
2082 // sending to long at the same time as sending from short
2083 
2084 // The MultiSplitTransferInfo order is
2085 // this -> other
2086 // long -> short 0, ihost_long_long-1
2087 // long -> long ihost_long_long, ihost_long_reduced-1
2088 // long -> reduced ihost_long_reduced, ihost_reduced_long-1
2089 // reduced -> long ihost_reduced_long, ihost_short_long-1
2090 // short -> long ihost_short_long, nthost_
2091 
2093  int i, j, jj, k;
2094  double* tbuf;
2095  double rttime;
2096  double wt = nrnmpi_wtime();
2097  NrnThread* _nt;
2098  // the mpi strategy is copied from the
2099  // cvode/examples_par/pvkxb.cpp exchange strategy
2100 
2101 #define EXCHANGE_ON 1
2102 #if EXCHANGE_ON
2103  // post all the receives
2104  for (i = 0; i < nthost_; ++i) {
2105  int tag;
2107  tag = mt.tag_;
2108  // receiving the result from a reduced tree is tag 4.
2109  if (tag == 3 && nrnmpi_myid != mt.rthost_) {
2110  tag = 4;
2111  }
2112  nrnmpi_postrecv_doubles(trecvbuf_ + mt.displ_, mt.size_, mt.host_, tag, &mt.request_);
2113 #if 0
2114 printf("%d post receive %d displ=%d size=%d host=%d tag=%d\n",
2115 nrnmpi_myid, i, mt.displ_, mt.size_, mt.host_, tag);
2116 #endif
2117  }
2118 
2119  // Note that we are assuming only one thread (_nt) when
2120  // mpi is used. (In order for D and RHS to make sense)
2121  // fill the send buffer with the long backbone info
2122  // i.e. long->short, long -> long, long -> reduced
2123  for (i = 0; i < ihost_reduced_long_; ++i) {
2125  j = 0;
2126  tbuf = tsendbuf_ + mt.displ_;
2127  for (jj = 0; jj < mt.nnode_; ++jj) {
2128  k = mt.nodeindex_[jj];
2129  _nt = nrn_threads + mt.nodeindex_th_[jj];
2130  tbuf[j++] = D(k);
2131  tbuf[j++] = RHS(k);
2132  }
2133  // each sent backbone will have added 2 to mt.nnode_rt_
2134  for (jj = 0; jj < mt.nnode_rt_; ++jj) {
2135  tbuf[j++] = *mt.offdiag_[jj];
2136  }
2137 #if 0
2138 //if (nrnmpi_myid == 4) {
2139 printf("%d send to %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2140 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_);
2141 //}
2142 #endif
2143 #if 0
2144 //if (nrnmpi_myid == 4) {
2145  for (j = 0; j < mt.nnode_; ++j) {
2146 printf("%d send to %d tbuf[%d] = %g tbuf[%d] = %g from node %d\n",
2147 nrnmpi_myid, mt.host_, 2*j, tbuf[2*j], 2*j+1, tbuf[2*j+1], mt.nodeindex_[j]);
2148  }
2149  for (j = 0; j < mt.nnode_rt_; ++j) {
2150  int jj = 2*mt.nnode_ + j;
2151 printf("%d send to %d offdiag tbuf[%d] = %g\n",
2152 nrnmpi_myid, mt.host_, jj, tbuf[jj]);
2153  }
2154 //)
2155 #endif
2156  }
2157 
2158  // adjust area for any D, RHS, sid1A, sid1B going to a ReducedTree host
2159  for (i = 0; i < narea2buf_; ++i) {
2160  Area2Buf& ab = area2buf_[i];
2161  _nt = nrn_threads + ab.ms->ithread;
2162  double afac = 0.01 * VEC_AREA(ab.inode);
2163  tbuf = tsendbuf_;
2164  for (j = 0; j < ab.n; ++j) {
2165  tbuf[ab.ibuf[j]] *= afac;
2166 #if 0
2167 printf("%d area2buf * afac=%g i=%d j=%d node=%d ibuf=%d buf=%g\n", nrnmpi_myid, afac,
2168 i, j, ab.inode, ab.ibuf[j], tbuf[ab.ibuf[j]]);
2169 #endif
2170  }
2171  }
2172 
2173  // send long backbone info (long->short, long->long)
2174  for (i = 0; i < ihost_reduced_long_; ++i) {
2176  tbuf = tsendbuf_ + mt.displ_;
2177  nrnmpi_send_doubles(tbuf, mt.size_, mt.host_, mt.tag_);
2178 #if 0
2179 printf("%d post send %d displ=%d size=%d host=%d tag=%d\n",
2180 nrnmpi_myid, i, mt.displ_, mt.size_, mt.host_, mt.tag_);
2181 #endif
2182  }
2183 
2184 #if 0
2185  for (i=ihost_reduced_long_; i < nthost_; ++i) {
2187 printf("%d will send result to %d size=%d tag=%d\n", nrnmpi_myid, mt.host_, mt.size_, mt.tag_);
2188  }
2189 #endif
2190  // handle the reduced trees and short backbones
2191 
2192  // wait for receives from the long backbones needed by reduced tree
2193  // and short backbones to complete (reduced <- long, short <- long)
2194  for (i = ihost_reduced_long_; i < nthost_; ++i) {
2196 #if 0
2197 printf("%d wait one %d\n", nrnmpi_myid, mt.host_);
2198 #endif
2199  nrnmpi_wait(&mt.request_);
2200 #if 0
2201 printf("%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d displ=%d\n",
2202 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_, mt.displ_);
2203 //for (j=0; j < mt.size_; ++j) { printf("%d receive tbuf[%d]=%g\n", nrnmpi_myid, j, trecvbuf_[mt.displ_ + j]);}
2204 #endif
2205  }
2206 #if 0
2207 for (i=0; i < tbsize_; ++i) { printf("%d trecvbuf[%d] = %g\n", nrnmpi_myid, i, trecvbuf_[i]);}
2208 #endif
2209 
2210  // measure reducedtree,short backbone computation time
2211  rttime = nrnmpi_wtime();
2212 
2213  // adjust area in place for any D, RHS, sid1A, sid1B on this host
2214  // going to ReducedTree on this host
2215  // if (narea2rt_) printf("%d adjust area in place\n", nrnmpi_myid);
2216  for (i = 0; i < narea2rt_; ++i) {
2217  Area2RT& ar = area2rt_[i];
2218  NrnThread* _nt = nrn_threads + ar.ms->ithread;
2219  double afac = 0.01 * VEC_AREA(ar.inode);
2220  for (j = 0; j < ar.n; ++j) {
2221  *ar.pd[j] *= afac;
2222  }
2223  }
2224 #endif // EXCHANGE_ON
2225 
2226  for (i = 0; i < nrtree_; ++i) {
2227  rtree_[i]->solve();
2228  }
2229 
2230 #if EXCHANGE_ON
2231  // measure reducedtree,short backbone computation time
2232  nrnmpi_rtcomp_time_ += nrnmpi_wtime() - rttime;
2233 
2234  // send reduced and short backbone info (reduced -> long, short -> long)
2235  for (i = ihost_reduced_long_; i < nthost_; ++i) {
2236  int tag;
2238  tbuf = tsendbuf_ + mt.displ_;
2239  // printf("%d send result to %d size=%d tag=%d\n", nrnmpi_myid, mt.host_, mt.size_,
2240  // mt.tag_);
2241  tag = mt.tag_;
2242  // sending result from reduced tree means tag is 4
2243  if (tag == 3) {
2244  tag = 4;
2245  }
2246  nrnmpi_send_doubles(tbuf, mt.size_, mt.host_, tag);
2247  // only moderately wasteful (50%) in sending back irrelevant
2248  // off diag info. Required is only 2*mt.nnode_. But these
2249  // messages are pretty short, anyway.
2250  }
2251 
2252  // handle the long backbones
2253 
2254  // wait for all remaining receives to complete
2255  // long <- short, long <- long, long <- reduced
2256  for (i = 0; i < ihost_reduced_long_; ++i) {
2258 #if 0
2259 printf("%d wait two %d\n", nrnmpi_myid, mt.host_);
2260 #endif
2261  nrnmpi_wait(&mt.request_);
2262 #if 0
2263 printf("%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2264 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_);
2265 #endif
2266  }
2267 
2268  // add to matrix (long <- long)
2269  // and also (long <- short) even though in principle should
2270  // not add those since already solved on short backbone machine
2271  // we can add since the ones sent were scaled by 1e30 which
2272  // will eliminate the effect of existing D, RHS, and S1A or S1B
2273  // as well as making the possible area scaling irrelevant
2274  // being able to handle them together means we do not need
2275  // 0 to ihost_long_long - 1 to refer to long <-> short
2276  for (i = 0; i < ihost_reduced_long_; ++i) {
2278  j = 0;
2279  tbuf = trecvbuf_ + mt.displ_;
2280  for (jj = 0; jj < mt.nnode_; ++jj) {
2281  k = mt.nodeindex_[jj];
2282  _nt = nrn_threads + mt.nodeindex_th_[jj];
2283  D(k) += tbuf[j++];
2284  RHS(k) += tbuf[j++];
2285  }
2286 #if 0
2287 if (nrnmpi_myid == 4) {
2288  for (j = 0; j < mt.nnode_; ++j) {
2289 printf("%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2290 nrnmpi_myid, mt.host_, 2*j, tbuf[2*j], 2*j+1, tbuf[2*j+1], mt.nodeindex_[j]);
2291  }
2292 }
2293 #endif
2294  }
2295 #endif // EXCHANGE_ON
2296 
2297 #if PARANEURON
2299 #endif
2300  errno = 0;
2301 }
2302 
2303 void MultiSplitControl::matrix_exchange_nocap() { // copy of matrix_exchange() and modified
2304  int i, j, jj, k;
2305  double* tbuf;
2306  double rttime;
2307  double wt = nrnmpi_wtime();
2308  NrnThread* _nt;
2309  // the mpi strategy is copied from the
2310  // cvode/examples_par/pvkxb.cpp exchange strategy
2311 
2312 #define EXCHANGE_ON 1
2313 #if EXCHANGE_ON
2314  // post all the receives
2315  for (i = 0; i < nthost_; ++i) {
2316  int tag;
2318  tag = mt.tag_;
2319  // receiving the result from a reduced tree is tag 4.
2320  if (tag == 3 && nrnmpi_myid != mt.rthost_) {
2321  tag = 4;
2322  }
2323  nrnmpi_postrecv_doubles(trecvbuf_ + mt.displ_, mt.size_, mt.host_, tag, &mt.request_);
2324 #if 0
2325 printf("%d post receive %d displ=%d size=%d host=%d tag=%d\n",
2326 nrnmpi_myid, i, mt.displ_, mt.size_, mt.host_, tag);
2327 #endif
2328  }
2329 
2330  // fill the send buffer with the long backbone info
2331  // i.e. long->short, long -> long, long -> reduced
2332  for (i = 0; i < ihost_reduced_long_; ++i) {
2334  j = 0;
2335  tbuf = tsendbuf_ + mt.displ_;
2336  for (jj = 0; jj < mt.nnode_; ++jj) {
2337  k = mt.nodeindex_[jj];
2338  _nt = nrn_threads + mt.nodeindex_th_[jj];
2339  tbuf[j++] = D(k);
2340  tbuf[j++] = RHS(k);
2341  }
2342  // each sent backbone will have added 2 to mt.nnode_rt_
2343  for (jj = 0; jj < mt.nnode_rt_; ++jj) {
2344  tbuf[j++] = *mt.offdiag_[jj];
2345  }
2346 #if 0
2347 //if (nrnmpi_myid == 4) {
2348 printf("%d send to %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2349 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_);
2350 //}
2351 #endif
2352 #if 0
2353 //if (nrnmpi_myid == 4) {
2354  for (j = 0; j < mt.nnode_; ++j) {
2355 printf("%d send to %d tbuf[%d] = %g tbuf[%d] = %g from node %d\n",
2356 nrnmpi_myid, mt.host_, 2*j, tbuf[2*j], 2*j+1, tbuf[2*j+1], mt.nodeindex_[j]);
2357  }
2358  for (j = 0; j < mt.nnode_rt_; ++j) {
2359  int jj = 2*mt.nnode_ + j;
2360 printf("%d send to %d offdiag tbuf[%d] = %g\n",
2361 nrnmpi_myid, mt.host_, jj, tbuf[jj]);
2362  }
2363 //}
2364 #endif
2365  }
2366 
2367  // send long backbone info (long->short, long->long)
2368  for (i = 0; i < ihost_reduced_long_; ++i) {
2370  tbuf = tsendbuf_ + mt.displ_;
2371  nrnmpi_send_doubles(tbuf, mt.size_, mt.host_, mt.tag_);
2372 #if 0
2373 printf("%d post send %d displ=%d size=%d host=%d tag=%d\n",
2374 nrnmpi_myid, i, mt.displ_, mt.size_, mt.host_, mt.tag_);
2375 #endif
2376  }
2377 
2378 #if 0
2379  for (i=ihost_reduced_long_; i < nthost_; ++i) {
2381 printf("%d will send result to %d size=%d tag=%d\n", nrnmpi_myid, mt.host_, mt.size_, mt.tag_);
2382  }
2383 #endif
2384  // handle the reduced trees and short backbones
2385 
2386  // wait for receives from the long backbones needed by reduced tree
2387  // and short backbones to complete (reduced <- long, short <- long)
2388  for (i = ihost_reduced_long_; i < nthost_; ++i) {
2390 #if 0
2391 printf("%d wait one %d\n", nrnmpi_myid, mt.host_);
2392 #endif
2393  nrnmpi_wait(&mt.request_);
2394 #if 0
2395 printf("%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d displ=%d\n",
2396 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_, mt.displ_);
2397 //for (j=0; j < mt.size_; ++j) { printf("%d receive tbuf[%d]=%g\n", nrnmpi_myid, j, trecvbuf_[mt.displ_ + j]);}
2398 #endif
2399  }
2400 #if 0
2401 for (i=0; i < tbsize_; ++i) { printf("%d trecvbuf[%d] = %g\n", nrnmpi_myid, i, trecvbuf_[i]);}
2402 #endif
2403 
2404  // measure reducedtree,short backbone computation time
2405  rttime = nrnmpi_wtime();
2406 
2407 #endif // EXCHANGE_ON
2408 
2409  // remember V may be encoded in D
2410  for (i = 0; i < nrtree_; ++i) {
2411  rtree_[i]->nocap();
2412  }
2413 
2414 #if EXCHANGE_ON
2415  // replace in matrix
2416  // for zero area nodes, D is 1.0 and RHS is V from zero area node.
2417  // for non-zero area nodes, they are sum from zero area.
2418  for (i = ihost_short_long_; i < nthost_; ++i) {
2420  j = 0;
2421  tbuf = trecvbuf_ + mt.displ_;
2422  for (jj = 0; jj < mt.nnode_; ++jj) {
2423  k = mt.nodeindex_[jj];
2424  _nt = nrn_threads + mt.nodeindex_th_[jj];
2425  D(k) = tbuf[j++];
2426  RHS(k) = tbuf[j++];
2427  }
2428 #if 0
2429  for (j = 0; j < mt.nnode_; ++j) {
2430 printf("%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2431 nrnmpi_myid, mt.host_, 2*j, tbuf[2*j], 2*j+1, tbuf[2*j+1], mt.nodeindex_[j]);
2432  }
2433 #endif
2434  }
2435  // measure reducedtree,short backbone computation time
2436  nrnmpi_rtcomp_time_ += nrnmpi_wtime() - rttime;
2437 
2438  // send reduced and short backbone info (reduced -> long, short -> long)
2439  for (i = ihost_reduced_long_; i < nthost_; ++i) {
2440  int tag;
2442  tbuf = tsendbuf_ + mt.displ_;
2443  // printf("%d send result to %d size=%d tag=%d\n", nrnmpi_myid, mt.host_, mt.size_,
2444  // mt.tag_);
2445  tag = mt.tag_;
2446  // sending result from reduced tree means tag is 4
2447  if (tag == 3) {
2448  tag = 4;
2449  }
2450  nrnmpi_send_doubles(tbuf, mt.size_, mt.host_, tag);
2451  // only moderately wasteful (50%) in sending back irrelevant
2452  // off diag info. Required is only 2*mt.nnode_. But these
2453  // messages are pretty short, anyway.
2454  }
2455 
2456  // handle the long backbones
2457 
2458  // wait for all remaining receives to complete
2459  // long <- short, long <- long, long <- reduced
2460  for (i = 0; i < ihost_reduced_long_; ++i) {
2462 #if 0
2463 printf("%d wait two %d\n", nrnmpi_myid, mt.host_);
2464 #endif
2465  nrnmpi_wait(&mt.request_);
2466 #if 0
2467 printf("%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2468 nrnmpi_myid, mt.host_, mt.nnode_, mt.nnode_rt_, mt.size_, mt.tag_);
2469 #endif
2470  }
2471 
2472  // replace in matrix
2473  // for zero area nodes, D is 1.0 and RHS is V from zero area node.
2474  // for non-zero area nodes, they are sum from zero area.
2475  for (i = 0; i < ihost_reduced_long_; ++i) {
2477  j = 0;
2478  tbuf = trecvbuf_ + mt.displ_;
2479  for (jj = 0; jj < mt.nnode_; ++jj) {
2480  k = mt.nodeindex_[jj];
2481  _nt = nrn_threads + mt.nodeindex_th_[jj];
2482  D(k) = tbuf[j++];
2483  RHS(k) = tbuf[j++];
2484  }
2485 #if 0
2486 if (nrnmpi_myid == 4) {
2487  for (j = 0; j < mt.nnode_; ++j) {
2488 printf("%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2489 nrnmpi_myid, mt.host_, 2*j, tbuf[2*j], 2*j+1, tbuf[2*j+1], mt.nodeindex_[j]);
2490  }
2491 }
2492 #endif
2493  }
2494 #endif // EXCHANGE_ON
2495 
2496 #if PARANEURON
2498 #endif
2499  errno = 0;
2500 }
2501 
2503  // printf("ReducedTree::ReducedTree(%d, %d)\n", rank, mapsize);
2504  int i;
2505  msc = ms;
2506  n = rank;
2507  assert(n > 0);
2508  assert(mapsize > 0);
2509  ip = new int[n];
2510  rhs = new double[4 * n];
2511  d = rhs + n;
2512  a = d + n;
2513  b = a + n;
2514 
2515  n2 = 2 * n;
2516  n4 = 4 * n;
2517  nmap = mapsize;
2518  smap = new double*[nmap];
2519  rmap = new double*[nmap];
2520  ismap = new int[nmap];
2521  irmap = new int[nmap];
2522  nzindex = new int[n];
2523  rmap2smap_index = new int[nmap];
2524  v = new double[n];
2525  nsmap = 0;
2526  irfill = 0;
2527  for (i = 0; i < nmap; ++i) {
2528  smap[i] = 0;
2529  ismap[i] = -1;
2530  rmap[i] = 0;
2531  irmap[i] = -1;
2532  rmap2smap_index[i] = -1;
2533  }
2534 }
2535 
2537  delete[] ip;
2538  delete[] rhs;
2539  delete[] smap;
2540  delete[] ismap;
2541  delete[] rmap;
2542  delete[] irmap;
2543  delete[] nzindex;
2544  delete[] v;
2545  delete[] rmap2smap_index;
2546 }
2547 
2549  // remember that info from zero area nodes is D=d and RHS=rhs
2550  // and from non-zero area is D=1e50 and RHS=v*1e50
2551  // on return, the info for zero area if no nonzero should be
2552  // D=sum d and RHS=sum rhs
2553  // the info for zero area if nonzero should be D=1.0 and RHS=v
2554  // and the info for nonzero should be D=sum d and RHS=sum rhs.
2555  int i;
2556  for (i = 0; i < n; ++i) {
2557  rhs[i] = 0.0;
2558  d[i] = 0.0;
2559  nzindex[i] = -1;
2560  }
2561  // not supposed to add when D=1e50. Note that order in rmap is
2562  // rhs followed by d
2563  for (i = 0; i < nmap; i += 2) {
2564  int j = irmap[i]; // reduced matrix row index for rhs
2565  if (*rmap[i + 1] == 1e50) {
2566  v[j] = *rmap[i] * 1e-50;
2567  nzindex[j] = rmap2smap_index[i];
2568  } else {
2569  rhs[j] += *rmap[i];
2570  d[j] += *rmap[i + 1];
2571  }
2572  }
2573 #if 0
2574  for (i=0; i < n; ++i) {
2575  printf("%d ReducedTree %2d %12.5g %12.5g %d %12.5g\n",
2576  nrnmpi_myid, i, d[i], rhs[i], nzindex[i], v[i]);
2577  }
2578 #endif
2579  for (i = 0; i < nsmap; i += 2) {
2580  int j = ismap[i]; // reduced matrix matrix row index for rhs
2581  if (nzindex[j] == -1 || i == nzindex[j]) {
2582  // for the non-zero area nodes and zero area nodes
2583  // when no non-zero area node involved
2584  *smap[i] = rhs[j];
2585  *smap[i + 1] = d[j];
2586  } else {
2587  // for the zero area nodes
2588  *smap[i] = v[j];
2589  *smap[i + 1] = 1.0;
2590  }
2591  }
2592 }
2593 
2595  int i;
2596  double p;
2597  gather();
2598 #if 0
2599  for (i=0; i < n; ++i) {
2600  printf("%d ReducedTree %2d %12.5g %12.5g %2d %12.5g %12.5g\n",
2601  nrnmpi_myid, i, b[i], d[i], ip[i], ip[i] >= 0?a[i]:0., rhs[i]);
2602  }
2603 #endif
2604  // triangularization
2605  for (i = n - 1; i > 0; --i) {
2606  p = a[i] / d[i];
2607  d[ip[i]] -= p * b[i];
2608  rhs[ip[i]] -= p * rhs[i];
2609  }
2610  // back substitution
2611  rhs[0] /= d[0];
2612  for (i = 1; i < n; ++i) {
2613  rhs[i] -= b[i] * rhs[ip[i]];
2614  rhs[i] /= d[i];
2615  }
2616 #if 0
2617  for (i=0; i < n; ++i) {
2618  printf("%d Result %2d %12.5g\n", nrnmpi_myid, i, rhs[i]);
2619  }
2620 #endif
2621  scatter();
2622 }
2623 
2625  int i;
2626  for (i = 0; i < n4; ++i) {
2627  rhs[i] = 0.0;
2628  }
2629  for (i = 0; i < nmap; ++i) {
2630  rhs[irmap[i]] += *rmap[i];
2631  }
2632 #if 0
2633 for (i=0; i < nmap; ++i){
2634 printf("%d %p gather i=%d ie=%d %g\n", nrnmpi_myid, this, i, irmap[i], *rmap[i]);
2635 }
2636 #endif
2637 }
2638 
2640  int i;
2641  // scatter the result for this host
2642  // fill the send buffer for other hosts
2643 #if 0
2644 for (i=0; i < nsmap; i += 2) {
2645 printf("%d enter scatter %d %p %g %p %g\n", nrnmpi_myid, i,
2646 smap[i], *smap[i], smap[i+1], *smap[i+1]);
2647 }
2648 #endif
2649  // printf("nsmap=%d\n", nsmap);
2650  for (i = 0; i < nsmap; i += 2) {
2651  // printf("i=%d ismap[i]=%d\n", i, ismap[i]);
2652  *smap[i] = 1e30 * rhs[ismap[i]];
2653  *smap[i + 1] = 1e30;
2654  }
2655 #if 0
2656 for (i=0; i < nsmap; i += 2) if (i > 10){
2657 printf("%d leave scatter %d %p %g %p %g\n", nrnmpi_myid, i,
2658 smap[i], *smap[i], smap[i+1], *smap[i+1]);
2659 }
2660 #endif
2661 }
2662 
2663 void ReducedTree::pr_map(int tsize, double* trbuf) {
2664  int i;
2665  Printf(" rmap\n");
2666  for (i = 0; i < nmap; ++i) {
2667  for (int it = 0; it < nrn_nthread; ++it) {
2668  NrnThread* nt = nrn_threads + it;
2669  MultiSplitThread& t = msc_->mth_[it];
2670  int nb = t.backbone_end - t.backbone_begin;
2671  if (rmap[i] >= trbuf && rmap[i] < (trbuf + tsize)) {
2672  Printf(" %2d rhs[%2d] += tbuf[%ld]\n", i, irmap[i], rmap[i] - trbuf);
2673  }
2674  if (rmap[i] >= nt->_actual_rhs && rmap[i] < (nt->_actual_rhs + nt->end)) {
2675  Node* nd = nt->_v_node[rmap[i] - nt->_actual_rhs];
2676  Printf(" %2d rhs[%2d] rhs[%d] += rhs[%ld] \t%s{%d}\n",
2677  i,
2678  irmap[i],
2679  irmap[i],
2680  rmap[i] - nt->_actual_rhs,
2681  secname(nd->sec),
2682  nd->sec_node_index_);
2683  }
2684  if (rmap[i] >= nt->_actual_d && rmap[i] < (nt->_actual_d + nt->end)) {
2685  Printf(" %2d rhs[%2d] d[%d] += d[%ld]\n",
2686  i,
2687  irmap[i],
2688  irmap[i] - n,
2689  rmap[i] - nt->_actual_d);
2690  }
2691  if (rmap[i] >= t.sid1A && rmap[i] < (t.sid1A + nb)) {
2692  Printf(" %2d rhs[%2d] a[%d] += sid1A[%ld]",
2693  i,
2694  irmap[i],
2695  irmap[i] - 2 * n,
2696  rmap[i] - t.sid1A);
2697  int j = (rmap[i] - t.sid1A) + t.backbone_begin;
2698  Node* nd = nt->_v_node[j];
2699  Printf(" \tA(%d) %s{%d}", j, secname(nd->sec), nd->sec_node_index_);
2700  Printf("\n");
2701  }
2702  if (rmap[i] >= t.sid1B && rmap[i] < (t.sid1B + nb)) {
2703  Printf(" %2d rhs[%2d] b[%d] += sid1B[%ld]",
2704  i,
2705  irmap[i],
2706  irmap[i] - 3 * n,
2707  rmap[i] - t.sid1B);
2708  int j = (rmap[i] - t.sid1B) + t.backbone_begin;
2709  Node* nd = nt->_v_node[j];
2710  Printf("\tB(%d) %s{%d}", j, secname(nd->sec), nd->sec_node_index_);
2711  Printf("\n");
2712  }
2713  }
2714  }
2715 }
2716 
2717 void ReducedTree::reorder(int j, int nt, int* mark, int* allbbr, int* allsid) {
2718  // specify ip and modify s2rt so they have tree order consistency
2719  // there should be n-1 edges, so
2720  if (n == 1) {
2721  ip[0] = -1;
2722  return;
2723  }
2724  int* e1 = new int[n - 1];
2725  int* e2 = new int[n - 1];
2726  int* order = new int[n];
2727  int* sid = new int[n];
2728  int singlesid = -1;
2729  int i;
2730  for (i = 0; i < n; ++i) {
2731  order[i] = -1;
2732  }
2733  // fill in the edges.
2734  int ne = n - 1;
2735  int ie = 0;
2736  for (i = 0; i < nt; ++i) {
2737  if (mark[i] == j && allbbr[i] == 2) {
2738  singlesid = allsid[i];
2739  }
2740  if (mark[i] == j && allbbr[i] > 2 && allsid[i] < allbbr[i] - 3) {
2741  // printf("i=%d ie=%d ne=%d mark=%d allsid=%d allbbr=%d\n", i, ie, ne, mark[i],
2742  // allsid[i], allbbr[i]-3);
2743  assert(ie < ne);
2744  const auto& e1ieiter = s2rt->find(allsid[i]);
2745  nrn_assert(e1ieiter != s2rt->end());
2746  e1[ie] = e1ieiter->second;
2747  sid[e1[ie]] = allsid[i];
2748  const auto& e2ieiter = s2rt->find(allbbr[i] - 3);
2749  nrn_assert(e2ieiter != s2rt->end());
2750  e2[ie] = e2ieiter->second;
2751  sid[e2[ie]] = allbbr[i] - 3;
2752  ++ie;
2753  }
2754  }
2755  assert(ie == ne);
2756  if (ne == 0) {
2757  assert(singlesid >= 0);
2758  sid[0] = singlesid;
2759  }
2760  // order the elements starting at 0 // also fill in the ip vector
2761  // should replace by a O(n) or at worst a O(nlogn) algorithm
2762  ip[0] = -1;
2763  order[0] = 0;
2764  int ordered = 1;
2765  while (ordered < n) {
2766  int old = ordered;
2767  for (i = 0; i < ne; ++i) {
2768  if (e1[i] >= 0) {
2769  if (order[e1[i]] >= 0) {
2770  assert(order[e2[i]] == -1);
2771  ip[ordered] = order[e1[i]];
2772  order[e2[i]] = ordered++;
2773  e1[i] = -1; // edge handled
2774  e2[i] = -1;
2775  } else if (order[e2[i]] >= 0) {
2776  assert(order[e1[i]] == -1);
2777  ip[ordered] = order[e2[i]];
2778  order[e1[i]] = ordered++;
2779  e1[i] = -1; // edge handled
2780  e2[i] = -1;
2781  }
2782  }
2783  }
2784  assert(ordered > old); // if tree then accomplished something
2785  }
2786 
2787  // reset the s2rt ranks for the sids
2788  for (i = 0; i < n; ++i) {
2789  (*s2rt)[sid[i]] = order[i];
2790  }
2791 
2792 #if 0
2793  printf("%d ReducedTree n=%d nmap=%d\n", nrnmpi_myid, n, nmap);
2794  for (i=0; i < n; ++i) {
2795  printf("%d i=%d sid=%d sid2i=%d\n", nrnmpi_myid, i, sid[i], (*s2rt)[sid[i]]);
2796  }
2797  for (i=0; i < n; ++i) {
2798  printf("%d i=%d ip=%d\n", nrnmpi_myid, i, ip[i]);
2799  }
2800 #endif
2801 
2802  delete[] e1;
2803  delete[] e2;
2804  delete[] order;
2805  delete[] sid;
2806 }
2807 
2808 void ReducedTree::fillrmap(int sid1, int sid2, double* pd) {
2809  const auto& sid1_iter = s2rt->find(sid1);
2810  nrn_assert(sid1_iter != s2rt->end());
2811  const int i = sid1_iter->second;
2812  int j;
2813 
2814  // type order is RHS, D, A, B
2815  if (sid2 < 0) { // RHS
2816  j = i;
2817  } else if (sid2 == sid1) { // D
2818  j = i + n;
2819  } else { // A or B?
2820  const auto& sid2_iter = s2rt->find(sid2);
2821  nrn_assert(sid2_iter != s2rt->end());
2822  j = sid2_iter->second;
2823  if (ip[i] == j) { // A
2824  j = i + 2 * n;
2825  } else if (ip[j] == i) { // B
2826  j = j + 3 * n;
2827  } else {
2828  assert(0);
2829  }
2830  }
2831  // in most cases we could have done RHS and D together since they
2832  // are adjacent in the receive buffer. However they are not adjacent
2833  // for the sids on this machine.
2834  // printf("%d fillrmap sid1=%d sid2=%d i=%d j=%d irfill=%d\n", nrnmpi_myid, sid1, sid2, i, j,
2835  // irfill);
2836  irmap[irfill] = j;
2837  rmap[irfill] = pd;
2839  irfill += 1;
2840 }
2841 
2842 void ReducedTree::fillsmap(int sid, double* prhs, double* pd) {
2843  const auto& sid_iter = s2rt->find(sid);
2844  nrn_assert(sid_iter != s2rt->end());
2845  const int i = sid_iter->second;
2846 
2847  // printf("%d fillsmap sid=%d i=%d nsmap=%d\n", nrnmpi_myid, sid, i, nsmap);
2848  ismap[nsmap] = i;
2849  smap[nsmap] = prhs;
2850  ismap[nsmap + 1] = i;
2851  smap[nsmap + 1] = pd;
2852  nsmap += 2;
2853 }
2854 
2855 // triangularize all subtrees, results in a backbone tri-diagonal
2857  int i, ip;
2858  double p;
2859  // eliminate a of the subtrees
2860  for (i = i3 - 1; i >= backbone_end; --i) {
2861  ip = _nt->_v_parent_index[i];
2862  p = A(i) / D(i);
2863  D(ip) -= p * B(i);
2864  RHS(ip) -= p * RHS(i);
2865  }
2866 #if 0
2867 printf("end of triang_subtree2backbone\n");
2868 for (i=i1; i < backbone_end; ++i) {
2869  printf("i=%d D=%g RHS=%g\n", i, D(i), RHS(i));
2870 }
2871 #endif
2872 }
2873 
2874 // backbone starts as tridiagonal, ends up in form of N.
2876  int i, j, ip, jp;
2877  double p;
2878  // begin the backbone triangularization. This eliminates a and fills in
2879  // sid1A column. Begin with pivot equation adjacent to sid1.
2880  for (i = backbone_sid1_begin; i < backbone_end; ++i) {
2881  // what is the equation index for A(i)
2882  j = _nt->_v_parent_index[i] - backbone_begin;
2883  S1A(j) = A(i);
2884  }
2885  for (i = backbone_sid1_begin - 1; i >= backbone_interior_begin; --i) {
2886  ip = _nt->_v_parent_index[i];
2887  j = i - backbone_begin;
2888  jp = ip - backbone_begin;
2889  p = A(i) / D(i);
2890  D(ip) -= p * B(i);
2891  RHS(ip) -= p * RHS(i);
2892  S1A(jp) = -p * S1A(j);
2893  // printf("iter i=%d ip=%d j=%d jp=%d D(ip)=%g RHS(ip)=%g S1A(ip)=%g\n",
2894  // i, ip, j, jp, D(ip), RHS(ip), S1A(jp));
2895  }
2896 
2897  // complete the backbone triangularization to form the N shaped matrix
2898  // This eliminates b and fills in the sid1B column. Begin with
2899  // pivot equation adjacent to sid0 (the root)
2901  ip = _nt->_v_parent_index[i];
2902  j = i - backbone_begin;
2903  if (ip < backbone_interior_begin) {
2904  S1B(j) = B(i);
2905  continue;
2906  }
2907  jp = ip - backbone_begin;
2908  p = B(i) / D(ip);
2909  RHS(i) -= p * RHS(ip);
2910  S1A(j) -= p * S1A(jp);
2911  S1B(j) = -p * S1B(jp);
2912  // printf("iter2 i=%d j=%d D(i)=%g RHS(i)=%g S1A(j)=%g S1B(j)=%g\n",
2913  // i, j, D(i), RHS(i), S1A(j), S1B(j));
2914  }
2915  // exactly like above but S1A happens to be D
2916  for (i = backbone_sid1_begin; i < backbone_end; ++i) {
2917  ip = _nt->_v_parent_index[i];
2918  j = i - backbone_begin;
2919  if (ip < backbone_interior_begin) {
2920  S1B(j) = B(i);
2921  continue;
2922  }
2923  jp = ip - backbone_begin;
2924  p = B(i) / D(ip);
2925  RHS(i) -= p * RHS(ip);
2926  D(i) -= p * S1A(jp);
2927  S1B(j) = -p * S1B(jp);
2928  // printf("iter3 i=%d j=%d D(i)=%g RHS(i)=%g S1B(j)=%g\n",
2929  // i, j, D(i), RHS(i), S1B(j));
2930  }
2931 #if 0
2932 printf("%d end of triang_backbone\n", nrnmpi_myid);
2933 for (i=i1; i < backbone_end; ++i) {
2934  printf("%d i=%d D=%g RHS=%g\n", nrnmpi_myid, i, D(i), RHS(i));
2935 }
2936 #endif
2937 }
2938 
2939 // exchange of d and rhs of sids has taken place and we can solve for the
2940 // backbone nodes
2942  int i, j;
2943  double a, b, p, vsid1;
2944  // need to solve the 2x2 consisting of sid0 and sid1 points
2945  // this part has already been done for short backbones
2947  // printf("%d, backbone %d %d %d %d %d %d\n", nrnmpi_myid,
2948  // backbone_begin, backbone_long_begin, backbone_interior_begin,
2949  // backbone_sid1_begin, backbone_long_sid1_begin, backbone_end);
2951  a = S1A(i - backbone_begin);
2952  b = S1B(j - backbone_begin);
2953  p = b / D(i);
2954  D(j) -= p * a;
2955  RHS(j) -= p * RHS(i);
2956  RHS(j) /= D(j);
2957  RHS(i) -= a * RHS(j);
2958  RHS(i) /= D(i);
2959  ++j;
2960  }
2961  // now sid0 and sid1 values give us the column values
2962  // do in two sweeps. Wish we could keep a single cell backbone
2963  // contiguous.
2964  // Do the S1A sweep on a per cell basis.
2965  for (i = backbone_sid1_begin; i < backbone_end; ++i) {
2966  vsid1 = RHS(i);
2967  for (j = _nt->_v_parent_index[i]; j >= backbone_interior_begin;
2968  j = _nt->_v_parent_index[j]) {
2969  RHS(j) -= S1A(j - backbone_begin) * vsid1;
2970  }
2971  }
2972  // For the S1B sweep we use sid0 root info for each interior node
2974  j = i - backbone_begin;
2975  RHS(i) -= S1B(j) * RHS(sid0i[j]);
2976  RHS(i) /= D(i);
2977  }
2978 #if 0
2979 printf("%d end of bksub_backbone\n", nrnmpi_myid);
2980 for (i=i1; i < backbone_end; ++i) {
2981  printf("%d i=%d RHS=%g\n", nrnmpi_myid, i, RHS(i));
2982 }
2983 #endif
2984 }
2985 
2987  int i, j;
2988  double a, b, p;
2989  // solve the 2x2 consisting of sid0 and sid1 points.
2991  for (i = backbone_begin; i < backbone_long_begin; ++i) {
2992  a = S1A(i - backbone_begin);
2993  b = S1B(j - backbone_begin);
2994 #if 0
2995 printf("%d part1 i=%d j=%d\n",
2996 nrnmpi_myid, i, j);
2997 printf("%d part1 d=%12.5g a=%12.5g rhs=%12.5g\n",
2998 nrnmpi_myid,D(i), a, RHS(i));
2999 printf("%d part1 b=%12.5g d=%12.5g rhs=%12.5g\n",
3000 nrnmpi_myid, b, D(j), RHS(j));
3001 #endif
3002  p = b / D(i);
3003  D(j) -= p * a;
3004  RHS(j) -= p * RHS(i);
3005  RHS(j) /= D(j);
3006  RHS(i) -= a * RHS(j);
3007  RHS(i) /= D(i);
3008 #if 0
3009 printf("%d part1 result %12.5g %12.5g\n",
3010 nrnmpi_myid, RHS(i), RHS(j));
3011 #endif
3012  ++j;
3013  }
3014  // Note that it no longer makes sense to add D and RHS
3015  // to the other machines since RHS is the solution itself
3016  // at the other machine corresponding points. In fact
3017  // we don't need to send D at all. Just replace the
3018  // RHS, skip that equation, and go on. But for
3019  // code uniformity and since skipping is more complex
3020  // it seems simpler to just setup the right equation
3021  // on the other machine. i.e. 1 * v = rhs
3022 }
3023 
3024 // solve the subtrees, rhs on the backbone is already solved
3026  int i, ip;
3027  // solve all rootnodes not part of a backbone
3028  for (i = i1; i < backbone_begin; ++i) {
3029  RHS(i) /= D(i);
3030  }
3031  // solve the subtrees
3032  for (i = backbone_end; i < i3; ++i) {
3033  ip = _nt->_v_parent_index[i];
3034  RHS(i) -= B(i) * RHS(ip);
3035  RHS(i) /= D(i);
3036  }
3037 #if 0
3038 printf("%d end of bksub_subtrees (just the backbone)\n", nrnmpi_myid);
3039 for (i=i1; i < backbone_end; ++i) {
3040  printf("%d i=%d RHS=%g\n", nrnmpi_myid, i, RHS(i));
3041 }
3042 #endif
3043 }
3044 
3045 // fill the v_node, v_parent node vectors in the proper order and
3046 // determine backbone index values. See the NOTE above. The relevant order
3047 // is:
3048 // 1) all the roots of cells with 0 or 1 sid (no backbone involved)
3049 // 2) all the sid0 when there are two sids in a cell. 1) + 2) = rootnodecount
3050 // 3) the interior backbone nodes (does not include sid0 or sid1 nodes)
3051 // 4) all the sid1 nodes
3052 // 5) all remaining nodes
3053 // backbone_begin is the index for 2).
3054 // backbone_long_begin ; earlier are short backbones, later are long
3055 // backbone_interior_begin is one more than the last index for 2)
3056 // backbone_sid1_begin is the index for 4)
3057 // backbone_long_sid1_begin ; ealier are short, later are long
3058 // backbone_end is the index for 5.
3059 // We allow the single or pair sid nodes to be different from a rootnode.
3060 // Note: generally speaking, the first sid will probably always be
3061 // a classical root node. However, we do want to handle the case where
3062 // many branches connect to soma(0.5), so do the whole tree ordering
3063 // as the general case.
3064 
3066  msc_->v_setup();
3067 }
3068 
3071  return;
3072  }
3073  // the typical case is that this comes from the beginning of exchange_setup
3074  // through recalc_diam since exhange_setup needs the proper
3075  // thread nt->_v_node and ms->ithread. Hence anything that
3076  // changes the overall structure
3077  // requires a complete start over from the point prior to splitting.
3078 
3080  assert(!use_sparse13);
3081  int i;
3082  // first time through, nth_ = 0
3083  if (nth_ == 0) {
3084  // printf("MultiSplitControl::v_setup due to multisplit()\n");
3085  assert(mth_ == 0);
3086  nth_ = nrn_nthread;
3087  mth_ = new MultiSplitThread[nth_];
3088  for (i = 0; i < nrn_nthread; ++i) {
3089  mth_[i].v_setup(nrn_threads + i);
3090  }
3091  } else {
3092  if (nth_ != nrn_nthread) {
3093  hoc_execerror(
3094  "ParallelContext.nthread() was changed after ParallelContext.multisplit()", 0);
3095  }
3096  // pointers have changed and need to reorder and update reduced tree maps
3097  // printf("MultiSplitControl::v_setup due to pointer change\n");
3098  for (i = 0; i < nrn_nthread; ++i) {
3099  mth_[i].v_setup(nrn_threads + i);
3100  }
3101  }
3102 }
3103 
3105  // Precondition:
3106  // v_node and v_parent node vectors are in their classical tree order
3107  // and Node.v_node_index also is consistent with that.
3108  // calls to pc.multisplit() define the implicit connections
3109  // between splitcell
3110  // Postcondition: v_node and v_parent node vectors are in their multisplit
3111  // tree order. That info can be subsequently used to re-allocate
3112  // the actual v
3113  i1 = 0;
3114  i2 = i1 + nt->ncell;
3115  i3 = nt->end;
3116  int nnode = i3 - i1;
3117 
3118  MultiSplitTable* classical_root_to_multisplit_ = msc_->classical_root_to_multisplit_.get();
3119  MultiSplitList* multisplit_list_ = msc_->multisplit_list_;
3120 
3121  // node vs v_node relation is always with an i1 offset
3122  Node** node = new Node*[nnode];
3123  Node** parent = new Node*[nnode];
3124  Node* nd;
3125  int i, j, i0, ii, in, ip, nback, ib, ibl, ibs;
3126  int is0, is1, k0, k1, iss0, iss1, isl0, isl1;
3127 
3128 #if 0
3129 printf("multisplit_v_setup %d\n", nnode);
3130  for (i=i1; i < i3; ++i) {
3131  assert(nt->_v_node[i]->v_node_index == i);
3132  assert(nt->_v_parent[i] == 0 || nt->_v_parent[i]->v_node_index < i);
3133  }
3134 #endif
3135 #if 0
3136 printf("multisplit_v_setup %d\n", nnode);
3137 printf("\nclassical order\n");
3138 for (i=i1; i < i3; ++i) {
3139  printf("%d %d %d %s %d\n", i, nt->_v_node[i]->v_node_index,
3140  nt->_v_parent[i] ? nt->_v_parent[i]->v_node_index : -1, secname(nt->v_node[i]->sec),
3141  nt->_v_node[i]->sec_node_index_);
3142 }
3143 #endif
3144  del_sidA();
3145 
3146  // the first phase is to transform from the classical tree order
3147  // to a tree order where the sid0 are roots. In splitcell, this
3148  // was done at the hoc level (rerooting a tree involves reversing
3149  // the parent child relationship on the line between the old and
3150  // new roots). However at the hoc level it is only possible to
3151  // have split points at section boundaries and
3152  // that meant that soma(0.5) could not
3153  // be a root. We want to allow that. It is also much simpler
3154  // to deal with the backbone between sid0 and sid1
3155  // if sid0 is already a root.
3156 
3157  // how many sid0 backbone (long and short) roots are there
3158  backbone_begin = i2;
3160  if (classical_root_to_multisplit_) {
3161  for (MultiSplit* ms: *multisplit_list_) {
3162  if (ms->nd[1]) {
3163  if (ms->nd[1]->_nt != nt) {
3164  continue;
3165  }
3166  --backbone_begin;
3167  if (ms->backbone_style != 1) {
3169  }
3170  if (ms->backbone_style == 2) {
3171  ++nbackrt_;
3172  }
3173  }
3174  }
3175  }
3177  if (nbackrt_) {
3178  backsid_ = new int[nbackrt_];
3179  backAindex_ = new int[nbackrt_];
3180  backBindex_ = new int[nbackrt_];
3181  }
3182 
3183  // reorder the trees so sid0 is the root, at least with respect to v_parent
3184  for (i = i1; i < i2; ++i) {
3185  Node* oldroot = nt->_v_node[i];
3186  if (classical_root_to_multisplit_) {
3187  const auto& msiter = classical_root_to_multisplit_->find(oldroot);
3188  if (msiter != classical_root_to_multisplit_->end()) {
3189  nd = msiter->second->nd[0];
3190  if (nd == oldroot) { // the cell tree is fine
3191  // the way it is (the usual case)
3192  nt->_v_parent[nd->v_node_index] = 0;
3193  } else {
3194  // need to reverse the parent/child relationship
3195  // on the path from nd to oldroot
3196  in = nd->v_node_index;
3197  nd = 0;
3198  while (in > i) {
3199  ip = nt->_v_parent[in]->v_node_index;
3200  nt->_v_parent[in] = nd;
3201  nd = nt->_v_node[in];
3202  in = ip;
3203  }
3204  nt->_v_parent[in] = nd;
3205  }
3206  }
3207  }
3208  }
3209 
3210  // Now the only problem is that assert(v_parent[i] < v_node[i])
3211  // is false. But we can take care of that at the end.
3212  // the sid0 are now root nodes with respect to v_parent
3213 
3214 #if 0
3215 printf("\nsid0 is a root\n");
3216 for (i=i1; i < i3; ++i) {
3217  printf("%d %d %d %s %d\n", i, nt->_v_node[i]->v_node_index,
3218  nt->_v_parent[i] ? nt->_v_parent[i]->v_node_index : -1,
3219  secname(nt->_v_node[i]->sec), nt->_v_node[i]->sec_node_index_);
3220 }
3221 #endif
3222  // Index all the classical rootnodes and sid rootnodes
3223  // If the classical rootnode has only 1 sid, it
3224  // replaces it as a rootnode. If there are 2 the first sid node
3225  // gets shifted to the backbone_begin indices in the proper
3226  // short or long section.
3227  backbone_end = i2;
3228  ii = i1;
3229  ibl = backbone_long_begin;
3230  ibs = backbone_begin;
3231  for (i = i1; i < i2; ++i) {
3232  nd = nt->_v_node[i];
3233  if (classical_root_to_multisplit_ &&
3234  classical_root_to_multisplit_->find(nd) != classical_root_to_multisplit_->end()) {
3235  MultiSplit* ms = classical_root_to_multisplit_->operator[](nd);
3236  if (ms->nd[1]) {
3237  ib = ms->backbone_style >= 1 ? ibs : ibl;
3238  node[ib - i1] = ms->nd[0];
3239  parent[ib - i1] = 0;
3240  if (ms->backbone_style >= 1) {
3241  ++ibs;
3242  } else {
3243  ++ibl;
3244  }
3245  // here we can get a bit greedy and
3246  // start counting how many indices are
3247  // needed for the backbones
3248  // how many nodes between nd[0] and nd[1]
3249  // note that nd[0] IS a root.
3250  i0 = ms->nd[0]->v_node_index;
3251  int iii = ms->nd[1]->v_node_index;
3252  while (i0 != iii) {
3253  iii = nt->_v_parent[iii]->v_node_index;
3254  ++backbone_end;
3255  }
3256  } else {
3257  node[ii - i1] = ms->nd[0];
3258  parent[ii - i1] = 0;
3259  ++ii;
3260  }
3261  } else {
3262  node[ii - i1] = nd;
3263  parent[ii - i1] = 0;
3264  ++ii;
3265  }
3266  }
3269 // printf("backbone begin=%d interior=%d sid1=%d end=%d\n",
3270 // backbone_begin, backbone_interior_begin, backbone_sid1_begin, backbone_end);
3271 #if 0
3272  for (i=i1; i < backbone_interior_begin; ++i) {
3273  assert(parent[i-i1] == 0);
3274  }
3275 #endif
3276  nback = backbone_end - backbone_begin;
3277  if (nback) {
3278  sid0i = new int[nback];
3279  sid1A = new double[nback];
3280  sid1B = new double[nback];
3281  for (i = 0; i < nback; ++i) {
3282  sid1A[i] = sid1B[i] = 0.;
3283  sid0i[i] = -1;
3284  }
3285  }
3286  iss0 = backbone_begin;
3287  iss1 = backbone_sid1_begin;
3288  isl0 = backbone_long_begin;
3289  isl1 = backbone_long_sid1_begin;
3290  ib = backbone_sid1_begin;
3291  // order backbone_sid1_begin to backbone_begin (same order as
3292  // backbone_begin to backbone_interior)
3293  // again, short are before long
3294  // the backbone interior can be any order
3295  int ibrt = 0;
3296  for (i = i1; i < i2; ++i) {
3297  nd = nt->_v_node[i];
3298  if (classical_root_to_multisplit_) {
3299  MultiSplit* ms;
3300  const auto& msiter = classical_root_to_multisplit_->find(nd);
3301  if (msiter != classical_root_to_multisplit_->end()) {
3302  ms = msiter->second;
3303  ms->ithread = nt->id;
3304  if (ms->nd[1]) {
3305  if (ms->backbone_style >= 1) {
3306  is0 = iss0++;
3307  is1 = iss1++;
3308  } else {
3309  is0 = isl0++;
3310  is1 = isl1++;
3311  }
3312  i0 = ms->nd[0]->v_node_index;
3313  ii = ms->nd[1]->v_node_index;
3314  node[is1 - i1] = ms->nd[1];
3315  parent[is1 - i1] = nt->_v_parent[ii];
3316  ii = nt->_v_parent[ii]->v_node_index;
3317  while (i0 != ii) {
3318  --ib;
3319  node[ib - i1] = nt->_v_node[ii];
3320  parent[ib - i1] = nt->_v_parent[ii];
3321  // printf("sid0i[%d] = %d\n", ib-backbone_begin, is0);
3322  sid0i[ib - backbone_begin] = is0;
3323  ii = nt->_v_parent[ii]->v_node_index;
3324  }
3325  if (ms->backbone_style == 2) {
3326  backsid_[ibrt] = ms->sid[0];
3327  ms->back_index = ibrt;
3328  backAindex_[ibrt] = is0 - backbone_begin;
3329  backBindex_[ibrt] = is1 - backbone_begin;
3330  // printf("backAindex[%d] = %d sid=%d Bindex=%d\n",
3331  // ibrt, backAindex_[ibrt], backsid_[ibrt], backBindex_[ibrt]);
3332  ++ibrt;
3333  }
3334  }
3335  }
3336  }
3337  }
3338  // printf("backbone order complete\n");
3339 
3340  // most of the rest are already in tree order and have the correct
3341  // parents. They just need to be copied into node and parent
3342  // so that the parent index is alway < the node index
3343  // However, sid0 becoming root can cause many places where
3344  // parent_index > node_index and these need to be relocated
3345  for (i = i1; i < i3; ++i) {
3346  // can exploit this variable because it will be set later
3347  nt->_v_node[i]->eqn_index_ = -1;
3348  }
3349  for (i = i1; i < backbone_end; ++i) {
3350 #if 0
3351 printf("backbone i=%d %d %s %d", i, node[i]->v_node_index, secname(node[i]->sec), node[i]->sec_node_index_);
3352 printf(" -> %s %d\n", parent[i]?secname(parent[i]->sec):"root",
3353 parent[i]?parent[i]->sec_node_index_:-1);
3354 #endif
3355  node[i - i1]->eqn_index_ = i;
3356  }
3357  // the rest are in order
3358  j = backbone_end;
3359  for (i = i1; i < i3; ++i) {
3360  k0 = 0;
3361  k1 = i;
3362  // printf("i=%d\n", i);
3363  while (nt->_v_node[k1]->eqn_index_ < 0) {
3364  // printf("counting i=%d k1=%d k0=%d\n", i, k1, k0);
3365  ++k0;
3366  if (!nt->_v_parent[k1]) {
3367  break;
3368  }
3369  k1 = nt->_v_parent[k1]->v_node_index;
3370  }
3371  // printf("i=%d k0=%d\n", i, k0);
3372  k1 = i;
3373  j += k0;
3374  k0 = j - 1;
3375  while (nt->_v_node[k1]->eqn_index_ < 0) {
3376  // printf("ordering i=%d k1=%d k0=%d parent=%p\n", i, k1, k0, nt->_v_parent[k1]);
3377  node[k0 - i1] = nt->_v_node[k1];
3378  parent[k0 - i1] = nt->_v_parent[k1];
3379  node[k0 - i1]->eqn_index_ = k0;
3380  --k0;
3381  if (!nt->_v_parent[k1]) {
3382  break;
3383  }
3384  k1 = nt->_v_parent[k1]->v_node_index;
3385  }
3386  }
3387  for (i = i1; i < i3; ++i) {
3388  assert(i == node[i - i1]->eqn_index_);
3389  nt->_v_node[i] = node[i - i1];
3390  nt->_v_parent[i] = parent[i - i1];
3391  nt->_v_node[i]->v_node_index = i;
3392  }
3393  delete[] node;
3394  delete[] parent;
3395 
3396 #if 0
3397 printf("\nmultisplit reordering\n");
3398 printf("backbone begin=%d long=%d interior=%d sid1=%d long=%d end=%d\n",
3401 for (i=i1; i < i3; ++i) {
3402  printf("%d %d %s %d ", nt->_v_node[i]->v_node_index,
3403  nt->_v_parent[i] ? nt->_v_parent[i]->v_node_index : -1,
3404  secname(nt->_v_node[i]->sec), nt->_v_node[i]->sec_node_index_);
3405  if (nt->_v_parent[i]) {
3406  printf(" -> %s %d\n", secname(nt->_v_parent[i]->sec), nt->_v_parent[i]->sec_node_index_);
3407  }else{
3408  printf(" root\n");
3409  }
3410 }
3411 #endif
3412  for (i = i1; i < i3; ++i)
3413  if (nt->_v_parent[i] && nt->_v_parent[i]->v_node_index >= i) {
3414  printf("i=%d parent=%d\n", i, nt->_v_parent[i]->v_node_index);
3415  }
3416  for (i = i1; i < i3; ++i) {
3417  assert(nt->_v_node[i]->v_node_index == i);
3418  assert(nt->_v_parent[i] == 0 || nt->_v_parent[i]->v_node_index < i);
3419  }
3420 
3421  // freeing sid1A... invalidated any offdiag_ pointers.
3422  for (i = 0; i < msc_->nthost_; ++i) {
3423  MultiSplitTransferInfo& msti = msc_->msti_[i];
3424  for (j = 0; j < msti.nnode_rt_; j += 2) {
3425  msti.offdiag_[j] = sid1A + msti.ioffdiag_[j];
3426  msti.offdiag_[j + 1] = sid1B + msti.ioffdiag_[j + 1];
3427  }
3428  }
3429  // sid1A, sid1B pointers in reducedtree map will be updated by
3430  // rt_map_update along with d and rhs
3431 }
3432 
3433 void MultiSplitControl::pmat(bool full) {
3434  int it, i, is;
3435  Printf("\n");
3436  for (it = 0; it < nrn_nthread; ++it) {
3437  NrnThread* _nt = nrn_threads + it;
3438  MultiSplitThread& t = mth_[it];
3439  for (i = 0; i < _nt->end; ++i) {
3440  is = _nt->_v_node[i]->_classical_parent ? _nt->_v_node[i]->sec_node_index_ : -1;
3441  Printf("%d %d %s %d",
3442  _nt->_v_node[i]->v_node_index,
3443  _nt->_v_parent[i] ? _nt->_v_parent[i]->v_node_index : -1,
3444  secname(_nt->_v_node[i]->sec),
3445  is);
3446  if (_nt->_v_parent[i]) {
3447  is = _nt->_v_parent[i]->_classical_parent ? _nt->_v_parent[i]->sec_node_index_ : -1;
3448  Printf(" -> %s %d", secname(_nt->_v_parent[i]->sec), is);
3449  Printf("\t %10.5g %10.5g", NODEB(_nt->_v_node[i]), NODEA(_nt->_v_node[i]));
3450  } else {
3451  Printf(" root\t\t %10.5g %10.5g", 0., 0.);
3452  }
3453 
3454  if (full) {
3455  Printf(" %10.5g %10.5g", NODED(_nt->_v_node[i]), NODERHS(_nt->_v_node[i]));
3456  if (t.sid0i && i >= t.backbone_begin && i < t.backbone_end) {
3457  Printf(" %10.5g %10.5g",
3458  t.S1B(i - t.backbone_begin),
3459  t.S1A(i - t.backbone_begin));
3460  }
3461  }
3462  Printf("\n");
3463  }
3464  }
3465 }
3466 
3467 void MultiSplitControl::pmatf(bool full) {
3468  int it, i, is;
3469  FILE* f;
3470  char fname[100];
3471 
3472  sprintf(fname, "pmat.%04d", nrnmpi_myid);
3473  f = fopen(fname, "w");
3474  for (it = 0; it < nrn_nthread; ++it) {
3475  NrnThread* _nt = nrn_threads + it;
3476  MultiSplitThread& t = mth_[it];
3477  fprintf(f, "%d %d\n", it, _nt->end);
3478  for (i = 0; i < _nt->end; ++i) {
3479  is = _nt->_v_node[i]->_classical_parent ? _nt->_v_node[i]->sec_node_index_ : -1;
3480  fprintf(f,
3481  "%d %d %s %d",
3482  _nt->_v_node[i]->v_node_index,
3483  _nt->_v_parent[i] ? _nt->_v_parent[i]->v_node_index : -1,
3484  secname(_nt->_v_node[i]->sec),
3485  is);
3486  if (_nt->_v_parent[i]) {
3487  is = _nt->_v_parent[i]->_classical_parent ? _nt->_v_parent[i]->sec_node_index_ : -1;
3488  fprintf(f, " -> %s %d", secname(_nt->_v_parent[i]->sec), is);
3489  fprintf(f, "\t %10.5g %10.5g", NODEB(_nt->_v_node[i]), NODEA(_nt->_v_node[i]));
3490  } else {
3491  fprintf(f, " root\t\t %10.5g %10.5g", 0., 0.);
3492  }
3493 
3494  if (full) {
3495  fprintf(f, " %10.5g %10.5g", NODED(_nt->_v_node[i]), NODERHS(_nt->_v_node[i]));
3496  if (t.sid0i && i >= t.backbone_begin && i < t.backbone_end) {
3497  fprintf(f,
3498  " %10.5g %10.5g",
3499  t.S1B(i - t.backbone_begin),
3500  t.S1A(i - t.backbone_begin));
3501  }
3502  }
3503  fprintf(f, "\n");
3504  }
3505  }
3506  fclose(f);
3507 }
3508 
3509 void MultiSplitControl::pmat1(const char* s) {
3510  int it;
3511  double a, b, d, rhs;
3512  for (it = 0; it < nrn_nthread; ++it) {
3513  NrnThread* _nt = nrn_threads + it;
3514  MultiSplitThread& t = mth_[it];
3515  int i1 = 0;
3516  int i3 = _nt->end;
3517  for (MultiSplit* ms: *multisplit_list_) {
3518  int i = ms->nd[0]->v_node_index;
3519  if (i < i1 || i >= i3) {
3520  continue;
3521  }
3522  d = D(i);
3523  rhs = RHS(i);
3524  a = b = 0.;
3525  if (ms->nd[1]) {
3526  a = mth_[it].S1A(0);
3527  }
3528  Printf("%2d %s sid=%d %12.5g %12.5g %12.5g %12.5g\n",
3529  nrnmpi_myid,
3530  s,
3531  ms->sid[0],
3532  b,
3533  d,
3534  a,
3535  rhs);
3536  if (ms->nd[1]) {
3537  d = D(ms->nd[1]->v_node_index);
3538  rhs = RHS(ms->nd[1]->v_node_index);
3539  a = 0.;
3540  b = t.S1B(t.backbone_sid1_begin - t.backbone_begin);
3541  Printf("%2d %s sid=%d %12.5g %12.5g %12.5g %12.5g\n",
3542  nrnmpi_myid,
3543  s,
3544  ms->sid[1],
3545  b,
3546  d,
3547  a,
3548  rhs);
3549  }
3550  }
3551  }
3552 }
3553 
3554 double* nrn_classicalNodeA(Node* nd) {
3555  int i = nd->v_node_index;
3556  Node* pnd = nd->_classical_parent;
3557  NrnThread* _nt = nd->_nt;
3558  if (_nt->_v_parent[i] == pnd) {
3559  return &NODEA(nd);
3560  } else if (pnd == 0) {
3561  return 0;
3562  } else if (_nt->_v_parent[pnd->v_node_index] == nd) {
3563  return &NODEB(pnd);
3564  } else {
3565  assert(0);
3566  }
3567  return 0;
3568 }
3569 
3570 double* nrn_classicalNodeB(Node* nd) {
3571  int i = nd->v_node_index;
3572  Node* pnd = nd->_classical_parent;
3573  NrnThread* _nt = nd->_nt;
3574  if (_nt->_v_parent[i] == pnd) {
3575  return &NODEB(nd);
3576  } else if (pnd == 0) {
3577  return 0;
3578  } else if (_nt->_v_parent[pnd->v_node_index] == nd) {
3579  return &NODEA(pnd);
3580  } else {
3581  assert(0);
3582  }
3583  return 0;
3584 }
const char * secname(Section *sec)
Definition: cabcode.cpp:1776
Node * node_exact(Section *sec, double x)
Definition: cabcode.cpp:1940
short type
Definition: cabvars.h:9
static Node * pnd
Definition: clamp.cpp:33
MultiSplitList * multisplit_list_
MultiSplitThread * mth_
void multisplit(Section *, double, int, int)
Definition: multisplit.cpp:363
void multisplit_nocap_v_part1(NrnThread *)
void multisplit_adjust_rhs(NrnThread *)
void pmatf(bool full=false)
void pmat1(const char *)
void matrix_exchange_nocap()
virtual ~MultiSplitControl()
Definition: multisplit.cpp:344
MultiSplitTransferInfo * msti_
std::unique_ptr< MultiSplitTable > classical_root_to_multisplit_
void multisplit_nocap_v_part3(NrnThread *)
void pmat(bool full=false)
ReducedTree ** rtree_
void reduced_mark(int, int, int, int *, int *, int *)
void multisplit_nocap_v_part2(NrnThread *)
ReducedTree * rt_
Definition: multisplit.cpp:273
int backbone_style
Definition: multisplit.cpp:260
Node * nd[2]
Definition: multisplit.cpp:258
int sid[2]
Definition: multisplit.cpp:259
void triang_backbone(NrnThread *)
void triang(NrnThread *)
void bksub_short_backbone_part1(NrnThread *)
void bksub(NrnThread *)
virtual ~MultiSplitThread()
Definition: multisplit.cpp:359
void bksub_backbone(NrnThread *)
void v_setup(NrnThread *)
void bksub_subtrees(NrnThread *)
void triang_subtree2backbone(NrnThread *)
double ** rmap
Definition: multisplit.cpp:241
int * rmap2smap_index
Definition: multisplit.cpp:245
double ** smap
Definition: multisplit.cpp:241
void fillrmap(int sid1, int sid2, double *pd)
virtual ~ReducedTree()
double * d
Definition: multisplit.cpp:231
double * b
Definition: multisplit.cpp:233
void fillsmap(int sid, double *prhs, double *pdiag)
void scatter()
double * v
Definition: multisplit.cpp:247
MultiSplitControl * msc
Definition: multisplit.cpp:227
void pr_map(int, double *)
ReducedTree(MultiSplitControl *, int rank, int mapsize)
std::unique_ptr< Int2IntTable > s2rt
Definition: multisplit.cpp:250
void reorder(int j, int nt, int *mark, int *all_bb_relation, int *allsid)
double * a
Definition: multisplit.cpp:232
double * rhs
Definition: multisplit.cpp:230
static double order(void *v)
Definition: cvodeobj.cpp:239
int use_cachevec
Definition: treeset.cpp:63
sprintf(buf, " if (secondorder) {\n" " int _i;\n" " for (_i = 0; _i < %d; ++_i) {\n" " _p[_slist%d[_i]] += dt*_p[_dlist%d[_i]];\n" " }}\n", numeqn, listnum, listnum)
ms
Definition: extargs.h:1
void hoc_execerror(const char *, const char *)
Definition: hoc.cpp:754
#define assert(ex)
Definition: hocassrt.h:32
void
#define rhs
Definition: lineq.h:6
#define sec
Definition: md1redef.h:13
#define id
Definition: md1redef.h:33
#define i
Definition: md1redef.h:12
#define Printf
Definition: model.h:237
char * nh
Definition: mos2nrn.cpp:11
int nrn_nthread
Definition: multicore.cpp:46
NrnThread * nrn_threads
Definition: multicore.cpp:47
int diam_changed
Definition: cabcode.cpp:23
void(* nrn_multisplit_setup_)()
Definition: treeset.cpp:46
std::vector< MultiSplit * > MultiSplitList
Definition: multisplit.cpp:304
static void nrnmpi_postrecv_doubles(double *, int, int, int, void **)
Definition: multisplit.cpp:51
#define D(i)
Definition: multisplit.cpp:65
void nrn_cachevec(int)
Definition: treeset.cpp:2127
void nrn_multisplit_nocap_v_part1(NrnThread *)
void * nrn_multisplit_reduce_solve(NrnThread *)
#define S1A(i)
Definition: multisplit.cpp:67
double * nrn_classicalNodeA(Node *nd)
static void nrnmpi_int_allgatherv(int *, int *, int *, int *)
Definition: multisplit.cpp:50
int nrn_multisplit_active_
Definition: multisplit.cpp:15
double * nrn_classicalNodeB(Node *nd)
static double nrnmpi_splitcell_wait_
Definition: multisplit.cpp:43
std::unordered_map< Node *, MultiSplit * > MultiSplitTable
Definition: multisplit.cpp:303
void nrn_multisplit_adjust_rhs(NrnThread *)
#define RHS(i)
Definition: multisplit.cpp:66
void setup_topology()
Definition: cabcode.cpp:1724
void(* nrn_multisplit_solve_)()
Definition: solve.cpp:71
void nrn_multisplit_ptr_update()
static MultiSplitControl * msc_
Definition: multisplit.cpp:307
void nrn_multisplit_nocap_v()
static void multisplit_v_setup()
#define A(i)
Definition: multisplit.cpp:63
void nrnmpi_multisplit_clear()
Definition: multisplit.cpp:509
double t
Definition: cvodeobj.cpp:59
static void nrnmpi_wait(void **)
Definition: multisplit.cpp:53
void nrn_multisplit_nocap_v_part2(NrnThread *)
static double nrnmpi_wtime()
Definition: multisplit.cpp:55
void * nrn_multisplit_triang(NrnThread *)
double nrnmpi_rtcomp_time_
Definition: ocbbs.cpp:33
void * nrn_multisplit_bksub(NrnThread *)
static void nrnmpi_send_doubles(double *, int, int, int)
Definition: multisplit.cpp:52
int tree_changed
void nrnmpi_multisplit(Section *, double x, int sid, int backbone_style)
Definition: multisplit.cpp:309
static void nrnmpi_int_allgather(int *, int *, int)
Definition: multisplit.cpp:49
#define S1B(i)
Definition: multisplit.cpp:68
static void multisplit_solve()
static int nrnmpi_use
Definition: multisplit.cpp:48
void nrn_matrix_node_free()
Definition: treeset.cpp:1916
static void nrnmpi_barrier()
Definition: multisplit.cpp:54
#define B(i)
Definition: multisplit.cpp:64
std::unordered_map< int, int > Int2IntTable
Definition: multisplit.cpp:207
void nrn_multisplit_nocap_v_part3(NrnThread *)
#define printf
Definition: mwprefix.h:26
#define fprintf
Definition: mwprefix.h:30
static Node * node(Object *)
Definition: netcvode.cpp:340
#define nrn_assert(ex)
Definition: nrnassrt.h:53
int const size_t const size_t n
Definition: nrngsl.h:11
size_t p
size_t j
int nrnmpi_myid
int nrnmpi_numprocs
static philox4x32_key_t k
Definition: nrnran123.cpp:11
void recalc_diam()
Definition: treeset.cpp:953
virtual void move(const Event &e)
Definition: ocinput.h:26
#define e
Definition: passive0.cpp:22
#define parent
Definition: rbtqueue.cpp:47
#define root
Definition: rbtqueue.cpp:53
#define ne
Definition: redef.h:94
#define NODEA(n)
Definition: section.h:124
#define NODEB(n)
Definition: section.h:125
#define VEC_D(i)
Definition: section.h:120
int use_sparse13
Definition: treeset.cpp:71
#define VEC_RHS(i)
Definition: section.h:121
#define VEC_AREA(i)
Definition: section.h:123
#define NODERHS(n)
Definition: section.h:105
#define NODED(n)
Definition: section.h:104
#define VEC_V(i)
Definition: section.h:122
FILE * fopen()
int ibuf[3]
Definition: multisplit.cpp:195
MultiSplit * ms
Definition: multisplit.cpp:197
double adjust_rhs_
Definition: multisplit.cpp:196
MultiSplit * ms
Definition: multisplit.cpp:204
double * pd[3]
Definition: multisplit.cpp:202
double adjust_rhs_
Definition: multisplit.cpp:203
Definition: section.h:133
struct NrnThread * _nt
Definition: section.h:158
struct Node * _classical_parent
Definition: section.h:157
int eqn_index_
Definition: section.h:149
Section * sec
Definition: section.h:155
int v_node_index
Definition: section.h:175
int sec_node_index_
Definition: section.h:177
Represent main neuron object computed by single thread.
Definition: multicore.h:58
int * _v_parent_index
Definition: multicore.h:76
int ncell
Definition: multicore.h:64
int id
Definition: multicore.h:66
int end
Definition: multicore.h:65
double * _actual_d
Definition: multicore.h:71
Node ** _v_parent
Definition: multicore.h:78
double * _actual_rhs
Definition: multicore.h:70
Node ** _v_node
Definition: multicore.h:77
short nnode
Definition: section.h:41
static const char * fname(const char *name)
Definition: nrnbbs.cpp:113