1 #include <../../nrnconf.h> 11 #include <unordered_map> 64 #define RHS(i) VEC_RHS(i) 65 #define S1A(i) sid1A[i] 66 #define S1B(i) sid1B[i] 247 void reorder(
int j,
int nt,
int* mark,
int* all_bb_relation,
int* allsid);
249 void fillrmap(
int sid1,
int sid2,
double* pd);
250 void fillsmap(
int sid,
double* prhs,
double* pdiag);
251 void pr_map(
int,
double*);
304 #include <multisplitcontrol.h> 311 msc_->
multisplit(sec, x, sid, backbone_style);
315 narea2buf_ = narea2rt_ = 0;
319 ihost_reduced_long_ = ihost_short_long_ = 0;
325 nodeindex_buffer_ = 0;
326 nodeindex_buffer_th_ = 0;
327 nodeindex_rthost_ = 0;
329 iarea_short_long_ = 0;
330 buf_area_indices_ = 0;
331 area_node_indices_ = 0;
337 multisplit_list_ = 0;
346 backbone_begin = backbone_long_begin = backbone_interior_begin = 0;
347 backbone_sid1_begin = backbone_long_sid1_begin = backbone_end = 0;
364 if (sid > 1000) { pexch();
return; }
365 if (sid >= 1000) { pmat(sid>1000);
return; }
369 if (classical_root_to_multisplit_) {
374 exchange_setup();
return;
377 if (backbone_style != 2) {
380 if (!classical_root_to_multisplit_) {
382 classical_root_to_multisplit_->reserve(97);
383 multisplit_list_ =
new MultiSplitList();
395 const auto& msiter = classical_root_to_multisplit_->find(root);
396 if (msiter != classical_root_to_multisplit_->end()) {
398 if (backbone_style == 2) {
400 hoc_execerror(
"earlier call for this cell did not have a backbone style = 2", 0);
402 }
else if (backbone_style == 1) {
407 if (ms->
sid[1] == ms->
sid[0]) {
409 sprintf(s,
"two sid = %d at same point on tree rooted at", sid);
425 (*classical_root_to_multisplit_)[
root] =
ms;
426 multisplit_list_->append(ms);
435 sid1A = 0; sid1B = 0; sid0i = 0;
439 delete [] backAindex_;
440 delete [] backBindex_;
448 for (i=0; i < nrtree_; ++
i) {
455 for (i=0; i < nthost_; ++
i) {
456 if (msti_[i].nnode_rt_) {
457 delete [] msti_[
i].nd_rt_index_;
458 delete [] msti_[
i].nd_rt_index_th_;
459 delete [] msti_[
i].offdiag_;
460 delete [] msti_[
i].ioffdiag_;
465 if (nodeindex_buffer_) {
466 delete [] nodeindex_buffer_;
467 delete [] nodeindex_buffer_th_;
468 delete [] nodeindex_rthost_;
470 nodeindex_buffer_ = 0;
471 nodeindex_buffer_th_ = 0;
472 nodeindex_rthost_ = 0;
477 trecvbuf_ = 0; tsendbuf_ = 0;
479 delete [] buf_area_indices_;
480 delete [] area_node_indices_;
481 buf_area_indices_ = 0;
482 area_node_indices_ = 0;
491 area2buf_ = 0; narea2buf_ = 0;
495 area2rt_ = 0; narea2rt_ = 0;
512 for (i=0; i < nth_; ++
i) {
521 if (classical_root_to_multisplit_) {
523 for(
const auto& mspair : *classical_root_to_multisplit_) {
524 delete mspair.second;
526 classical_root_to_multisplit_.reset();
527 delete multisplit_list_;
528 multisplit_list_ = 0;
568 if (classical_root_to_multisplit_) {
570 for (i=0; i < multisplit_list_->count(); ++
i) {
571 ms = multisplit_list_->item(i);
573 if (ms->
nd[1]) { ++
n; }
576 if (ms->
nd[1]) { ++nwc; }
610 int* bb_relation = 0;
614 bb_relation =
new int[
n];
615 threadid =
new int[
n];
617 if (classical_root_to_multisplit_) {
620 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
621 ms = multisplit_list_->item(ii);
634 bb_relation[i-1] += 1 + sid[
i];
635 bb_relation[
i] += 1 + sid[i-1];
648 displ[i+1] = displ[
i] + nn[
i];
651 int* allsid =
new int[nt];
652 int* all_bb_relation =
new int[nt];
657 for (i=0; i <
n; ++
i) {
659 all_bb_relation[
i] = bb_relation[
i];
664 delete [] all_bb_relation;
705 int* mark =
new int[nt];
706 int* connects2short =
new int[
n];
707 for (i = 0; i <
n; ++
i) {
708 connects2short[
i] = 0;
710 for (i=0; i < nt; ++
i) {
712 for (j=0; j <
n; ++
j) {
713 if (allsid[i] == sid[j]) {
720 if ((bb_relation[j] >= 2) != (all_bb_relation[i] >= 2)) {
721 hoc_execerror(
"backbone_style==2 inconsistent between hosts for same sid", 0);
724 if (all_bb_relation[i] < 2) {
726 if (all_bb_relation[i] == 1) {
727 connects2short[
j] = 1;
744 for (i=0; i < nt; ++
i) {
745 if (mark[i] >= 0 && connects2short[mark[i]]
746 && all_bb_relation[i] == 0 && bb_relation[mark[i]] == 0) {
751 for (i = 0; i < nt; ++
i) {
753 if (bb_relation[mark[i]] == 1 && all_bb_relation[i] == 1) {
754 hoc_execerror(
"a short to short backbone interprocessor connection exists", 0);
766 for (j = 0; j <
n; ++
j) {
767 if (bb_relation[j] >= 2) {
768 reduced_mark(j, sid[j], nt, mark, allsid, all_bb_relation);
776 for (i=0; i < nt; ++
i) {
printf(
"%d %d allsid=%d all_bb_relation=%d mark=%d\n",
777 nrnmpi_myid, i, allsid[i], all_bb_relation[i], mark[i]);}
786 int* mcnt =
new int[
n];
787 for (j=0; j <
n; ++
j) { mcnt[
j] = 0; }
789 if (all_bb_relation[i] >= 2 && mark[i] >= 0) {
791 if (all_bb_relation[i] > 2) { ++
i; }
794 for (j=0; j <
n; ++
j) {
809 int* rthost =
new int[
n];
811 for (j=0; j <
n; ++
j) { rthost[
j] = -1; rt[
j] = 0; }
814 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
815 ms = multisplit_list_->item(ii);
817 for (j = 0; j <
n; ++
j) {
818 if (sid[j] == ms->
sid[0]) {
825 for (i=displ[ih]; i < displ[ih+1]; ++
i) {
827 if (all_bb_relation[i] > 2) {
831 if (all_bb_relation[i] == 2) {
838 if (ms->
rthost != -1) {
break; }
850 for (j=0; j <
n; ++
j) {
852 bb_relation[j], rthost[j]);
867 for (j = displ[i]; j < nj; ++
j) {
868 if (all_bb_relation[j] >= 2 && mark[j] >= 0 && rthost[mark[j]] ==
nrnmpi_myid) {
871 if (all_bb_relation[j] > 2) { ++
j; }
878 for (j=0; j <
n; ++
j) {
886 for (j=0; j <
n; ++
j) {
891 int rank = 0;
int mapsize = 0;
892 for (k=0; k < nt; ++
k) {
893 if (mark[k] == j && all_bb_relation[k] >= 2) {
894 const auto&
result = s2rt->insert({allsid[
k], rank});
898 if (all_bb_relation[k] == 2) {
913 rtree_[
i]->reorder(j, nt, mark, all_bb_relation, allsid);
920 for (
int ii=0, j=0; ii < multisplit_list_->count(); ++ii, ++
j) {
927 if (ms->
nd[1]) { ++
j; }
931 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
937 for (j=0; j <
n; ++
j) {
939 bb_relation[j], rthost[j]);
944 for (i=0; i <
n; ++
i) {
946 if (rthost[i] >= 0)
for (j=0; j < 2; ++
j) {
957 if (bb_relation[i] == 2) {
963 if (narea2rt_) { area2rt_ =
new Area2RT[narea2rt_]; }
964 if (narea2buf_) { area2buf_ =
new Area2Buf[narea2buf_]; }
969 narea2rt_ = narea2buf_ = 0;
970 for (i=0; i <
n; ++
i) {
972 if (rthost[i] >= 0)
for (j=0; j < 2; ++
j) {
978 Area2RT& art = area2rt_[narea2rt_];
984 if (bb_relation[i] > 2) {
996 Area2Buf& ab = area2buf_[narea2buf_];
1002 if (bb_relation[i] > 2) {
1008 if (bb_relation[i] == 2) {
1015 for (i = 0; i < narea2rt_; ++
i) {
1019 for (i = 0; i < narea2buf_; ++
i) {
1026 for (i=0; i <
n; ++
i) {
1027 printf(
"%d %d sid=%d bbrelation=%d rthost=%d rt=%p\n",
nrnmpi_myid, i, sid[i], bb_relation[i], rthost[i], rt[i]);
1029 for (i=0; i < nrnmpi_numprocs+1; ++
i) {
1032 for (i=0; i < nt; ++
i) {
1033 printf(
"%d %d allsid=%d mark=%d all_bb_relation=%d\n",
nrnmpi_myid, i, allsid[i], mark[i], all_bb_relation[i]);
1053 int nh = 0;
int type = 0;
int nh1 = 0;
1054 for (j=displ[i]; j < displ[i+1]; ++
j) {
1057 if (bb_relation[mark[j]] == 1) {
1058 assert(all_bb_relation[j] == 0);
1062 }
else if (all_bb_relation[j] == 1) {
1066 }
else if (all_bb_relation[j] < 2) {
1070 }
else if (all_bb_relation[j] >= 2) {
1071 int rth = rthost[mark[
j]];
1081 if (all_bb_relation[j] > 2) {
1091 for (
int jj = displ[i]; jj <
j; ++jj) {
1092 if (rth == rthost[mark[jj]]) {
1099 if (all_bb_relation[j] > 2) {
1106 nh = (type&1) + ((type/2)&1) + ((type/4)&1) + ((type/010)&1);
1107 nthost_ += nh + nh1;
1112 for (i=0; i < nthost_; ++
i) {
1114 msti_[
i].nd_rt_index_ = 0;
1115 msti_[
i].nd_rt_index_th_ = 0;
1116 msti_[
i].offdiag_ = 0;
1117 msti_[
i].ioffdiag_ = 0;
1118 msti_[
i].rthost_ = -1;
1121 nodeindex_buffer_ =
new int[ndbsize];
1122 nodeindex_buffer_th_ =
new int[ndbsize];
1123 nodeindex_rthost_ =
new int[ndbsize];
1125 for (i=0; i < ndbsize; ++
i) {
1126 nodeindex_rthost_[
i] = -1;
1131 trecvbuf_ =
new double[tbsize];
1132 tsendbuf_ =
new double[tbsize];
1145 for (j=displ[i]; j < displ[i+1]; ++
j) {
1146 if (mark[j] >= 0 && bb_relation[mark[j]] == 0 && all_bb_relation[j] == 1) {
1147 nodeindex_buffer_th_[
k] = threadid[mark[
j]];
1148 nodeindex_buffer_[k++] = inode[mark[
j]];
1154 msti_[nthost_].displ_ = mdisp;
1155 msti_[nthost_].size_ = 2*b;
1157 msti_[nthost_].host_ =
i;
1158 msti_[nthost_].nnode_ = b;
1159 msti_[nthost_].tag_ = 1;
1169 int* tmphost =
new int[
n];
1172 for (j=displ[i]; j < displ[i+1]; ++
j) {
1174 if (j1 >= 0 && bb_relation[j1] >= 2 && rthost[j1] != i) {
1175 tmphost[ntmphost++] = rthost[j1];
1176 for (
int itmp = 1; itmp < ntmphost; ++itmp) {
1177 if (tmphost[itmp-1] == rthost[j1]) {
1186 for (
int itmp = 0; itmp < ntmphost; ++itmp) {
1193 for (j=displ[i]; j < displ[i+1]; ++
j) {
1194 int jj = j - displ[
i];
1199 if (j1 >= 0 && bb_relation[j1] >= 2
1200 && rthost[j1] == tmphost[itmp]) {
1204 nodeindex_rthost_[
k] = rthost[j1];
1205 nodeindex_buffer_th_[
k] = threadid[j - displ[
i]];
1206 nodeindex_buffer_[k++] = inode[j - displ[
i]];
1209 if (bb_relation[jj] > 2) {
1269 ix[1] = inode[jj + 1];
1270 ixth[0] = vec2ms[jj]->ithread;
1271 ixth[1] = vec2ms[jj]->ithread;
1277 printf(
"%d offdiag nbrt=%d iii=%d i=%d j1=%d jj=%d sid=%d ix = %d %d back = %d %d\n",
1278 nrnmpi_myid, nbackrt_, iii, i, j1, jj, sid[j1], ix[0], ix[1], t.
backAindex_[i], t.
backBindex_[i]);
1282 nodeindex_rthost_[
k] = rthost[j1];
1284 nodeindex_buffer_th_[
k] = threadid[jj];
1285 nodeindex_buffer_[k++] = inode[jj];
1292 msti_[nthost_].displ_ = mdisp;
1293 msti_[nthost_].size_ = 2*b + br;
1295 msti_[nthost_].host_ = tmphost[itmp];
1296 msti_[nthost_].rthost_ = tmphost[itmp];
1297 msti_[nthost_].nnode_ = b;
1298 msti_[nthost_].tag_ = 3;
1307 for (j=displ[i]; j < displ[i+1]; ++
j) {
1308 if (mark[j] >= 0 && bb_relation[mark[j]] == 0 && all_bb_relation[j] == 0) {
1309 nodeindex_buffer_th_[
k] = threadid[mark[
j]];
1310 nodeindex_buffer_[k++] = inode[mark[
j]];
1316 msti_[nthost_].displ_ = mdisp;
1317 msti_[nthost_].size_ = 2*b;
1319 msti_[nthost_].host_ =
i;
1320 msti_[nthost_].nnode_ = b;
1321 msti_[nthost_].tag_ = 2;
1328 ihost_reduced_long_ = nthost_;
1333 for (j=displ[i]; j < displ[i+1]; ++
j) {
1335 if (j1 >= 0 && all_bb_relation[j] >= 2
1337 && rthost[j1] != i) {
1344 int ib = mdisp + 2*b;
1346 rt[j1]->
fillrmap(allsid[j], -1, trecvbuf_ + ib+1);
1347 rt[j1]->
fillrmap(allsid[j], allsid[j], trecvbuf_ + ib);
1348 rt[j1]->
fillsmap(allsid[j], tsendbuf_ + ib+1, tsendbuf_ + ib);
1350 if (all_bb_relation[j] > 2) {
1362 for (j=displ[i]; j < displ[i+1]; ++
j) {
1364 if (j1 >= 0 && all_bb_relation[j] >= 2
1366 && rthost[j1] != i) {
1367 if (all_bb_relation[j] > 2) {
1368 int ib = mdisp + 2*b + br;
1370 rt[j1]->
fillrmap(allsid[j+1], allsid[j], trecvbuf_ + ib);
1371 rt[j1]->
fillrmap(allsid[j], allsid[j+1], trecvbuf_ + ib + 1);
1379 msti_[nthost_].displ_ = mdisp;
1380 msti_[nthost_].size_ = 2*b + br;
1382 msti_[nthost_].host_ =
i;
1384 msti_[nthost_].nnode_ = b;
1388 msti_[nthost_].tag_ = 3;
1395 ihost_short_long_ = nthost_;
1398 for (j=displ[i]; j < displ[i+1]; ++
j) {
1399 if (mark[j] >= 0 && bb_relation[mark[j]] == 1 && all_bb_relation[j] == 0) {
1400 nodeindex_buffer_th_[
k] = threadid[mark[
j]];
1401 nodeindex_buffer_[k++] = inode[mark[
j]];
1407 msti_[nthost_].displ_ = mdisp;
1408 msti_[nthost_].size_ = 2*b;
1410 msti_[nthost_].host_ =
i;
1411 msti_[nthost_].nnode_ = b;
1412 msti_[nthost_].tag_ = 1;
1419 for (i=0; i < nthost_; ++
i) {
1427 for (i=0; i <
k; ++
i) {
1428 j=nodeindex_buffer_[
i];
1430 secname(v_node[j]->
sec), v_node[j]->sec_node_index_);
1440 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
1441 ms = multisplit_list_->item(ii);
1476 ++
i;
if (ms->
nd[1]) { ++
i; }
1481 msti_[0].nodeindex_th_ = nodeindex_buffer_th_;
1482 msti_[0].nodeindex_ = nodeindex_buffer_;
1483 for (i=1; i < nthost_; ++
i) {
1484 msti_[
i].nodeindex_th_ = msti_[i-1].nodeindex_th_
1485 + msti_[i-1].nnode_;
1486 msti_[
i].nodeindex_ = msti_[i-1].nodeindex_
1487 + msti_[i-1].nnode_;
1494 for (i=0; i < ndbsize; ++
i) {
1497 if (i == tmp_index) {
1498 iarea_short_long_ = narea_;
1502 if (nodeindex_rthost_[i] < 0) {
1509 buf_area_indices_ =
new int[narea_];
1510 area_node_indices_ =
new int[narea_];
1512 for (i=0; i < ndbsize; ++
i) {
1517 buf_area_indices_[
k] = 2*
i;
1518 area_node_indices_[
k] = nodeindex_buffer_[
i];
1526 for (i=0; i < nthost_; ++
i) {
1528 if (msti.
tag_ == 3) {
1540 for (k = 0; k < narea2buf_; ++
k) {
1541 if (area2buf_[k].inode == in
1554 for (ioff = 0; ioff < msti.
nnode_rt_; ++ioff) {
1583 delete [] bb_relation;
1584 delete [] all_bb_relation;
1585 delete [] connects2short;
1599 for (
int i=0;
i < multisplit_list_->count(); ++
i) {
1608 for (
int j=0;
j < 2; ++
j)
if (ms.
nd[
j]) {
1621 for (
int i = 0;
i < narea2rt_; ++
i) {
1645 Printf(
"%d nthost_=%d\n",
id, nthost_);
1646 for (i=0; i < nthost_; ++
i) {
1649 for (j=0; j < ms.
nnode_; ++
j) {
1651 Printf(
"%d %d %d %d %s %d\n",
1658 int* all_bb_relation) {
1660 for (i = 0; i < nt; ++
i) {
1661 if (mark[i] == -1 && allsid[i] == sid) {
1663 if (all_bb_relation[i] > 2) {
1664 int sid2 = all_bb_relation[
i] - 3;
1665 reduced_mark(m, sid2, nt, mark, allsid, all_bb_relation);
1770 Printf(
" MultiSplit %ld\n", multisplit_list_->count());
1771 for (i=0; i < multisplit_list_->count(); ++
i) {
1788 Printf(
" nbackrt_=%d i, backsid_[i], backAindex_[i], backBindex_[i]\n", t.
nbackrt_);
1800 Printf(
" ReducedTree %d\n", nrtree_);
1801 for (i=0; i < nrtree_; ++
i) {
1804 rt->
pr_map(tbsize, trecvbuf_);
1806 Printf(
" MultiSplitTransferInfo %d\n", nthost_);
1807 for (i=0; i < nthost_; ++
i) {
1809 Printf(
" %d host=%d rthost=%d nnode=%d nnode_rt=%d size=%d tag=%d\n",
1812 Printf(
" nodeindex=%p nodeindex_buffer = %p\n", m.
nodeindex_,nodeindex_buffer_);
1815 Printf(
" ndbsize=%d i nodeindex_buffer_=%p nodeindex_rthost_=%p\n",
1816 ndbsize, nodeindex_buffer_, nodeindex_rthost_);
1818 for (
int i=0; i < ndbsize; ++
i) {
1819 Printf(
" %d %d %d\n", i, nodeindex_buffer_[i], nodeindex_rthost_[i]);
1822 Printf(
" tbsize=%d trecvbuf_=%p tsendbuf_=%p\n", tbsize,
1823 trecvbuf_, tsendbuf_);
1878 triang_subtree2backbone(_nt);
1879 triang_backbone(_nt);
1885 bksub_backbone(_nt);
1886 bksub_subtrees(_nt);
1933 if (_nt->
id == 0)
for (i=0; i < narea2buf_; ++
i) {
1939 for (i=0; i < narea2rt_; ++
i) {
1948 if (_nt->
id == 0) {matrix_exchange_nocap();}
1958 if (_nt->
id == 0)
for (i=0; i < narea2buf_; ++
i) {
1966 for (i=0; i < narea2rt_; ++
i) {
1984 if (_nt->
id == 0)
for (i=0; i < narea2buf_; ++
i) {
1989 for (i=0; i < narea2rt_; ++
i) {
2025 #define EXCHANGE_ON 1 2028 for (i=0; i < nthost_; ++
i) {
2037 printf(
"%d post receive %d displ=%d size=%d host=%d tag=%d\n",
2046 for (i=0; i < ihost_reduced_long_; ++
i) {
2049 tbuf = tsendbuf_ + mt.
displ_;
2050 for (jj = 0; jj < mt.
nnode_; ++jj) {
2062 printf(
"%d send to %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2068 for (j = 0; j < mt.
nnode_; ++
j) {
2069 printf(
"%d send to %d tbuf[%d] = %g tbuf[%d] = %g from node %d\n",
2074 printf(
"%d send to %d offdiag tbuf[%d] = %g\n",
2082 for (i=0; i < narea2buf_; ++
i) {
2087 for (j = 0; j < ab.
n; ++
j) {
2088 tbuf[ab.
ibuf[
j]] *= afac;
2090 printf(
"%d area2buf * afac=%g i=%d j=%d node=%d ibuf=%d buf=%g\n",
nrnmpi_myid, afac,
2097 for (i=0; i < ihost_reduced_long_; ++
i) {
2099 tbuf = tsendbuf_ + mt.
displ_;
2102 printf(
"%d post send %d displ=%d size=%d host=%d tag=%d\n",
2108 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2117 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2124 printf(
"%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d displ=%d\n",
2130 for (i=0; i < tbsize_; ++
i) {
printf(
"%d trecvbuf[%d] = %g\n",
nrnmpi_myid, i, trecvbuf_[i]);}
2139 for (i=0; i < narea2rt_; ++
i) {
2143 for (j = 0; j < ar.
n; ++
j) {
2147 #endif //EXCHANGE_ON 2149 for (i = 0; i < nrtree_; ++
i) {
2158 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2161 tbuf = tsendbuf_ + mt.
displ_;
2165 if (tag == 3) { tag = 4; }
2176 for (i=0; i < ihost_reduced_long_; ++
i) {
2183 printf(
"%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2196 for (i = 0; i < ihost_reduced_long_; ++
i) {
2199 tbuf = trecvbuf_ + mt.
displ_;
2200 for (jj = 0; jj < mt.
nnode_; ++jj) {
2204 RHS(k) += tbuf[j++];
2208 for (j = 0; j < mt.
nnode_; ++
j) {
2209 printf(
"%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2215 #endif //EXCHANGE_ON 2232 #define EXCHANGE_ON 1 2235 for (i=0; i < nthost_; ++
i) {
2244 printf(
"%d post receive %d displ=%d size=%d host=%d tag=%d\n",
2251 for (i=0; i < ihost_reduced_long_; ++
i) {
2254 tbuf = tsendbuf_ + mt.
displ_;
2255 for (jj = 0; jj < mt.
nnode_; ++jj) {
2267 printf(
"%d send to %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2273 for (j = 0; j < mt.
nnode_; ++
j) {
2274 printf(
"%d send to %d tbuf[%d] = %g tbuf[%d] = %g from node %d\n",
2279 printf(
"%d send to %d offdiag tbuf[%d] = %g\n",
2287 for (i=0; i < ihost_reduced_long_; ++
i) {
2289 tbuf = tsendbuf_ + mt.
displ_;
2292 printf(
"%d post send %d displ=%d size=%d host=%d tag=%d\n",
2298 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2307 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2314 printf(
"%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d displ=%d\n",
2320 for (i=0; i < tbsize_; ++
i) {
printf(
"%d trecvbuf[%d] = %g\n",
nrnmpi_myid, i, trecvbuf_[i]);}
2326 #endif //EXCHANGE_ON 2329 for (i = 0; i < nrtree_; ++
i) {
2337 for (i=ihost_short_long_; i < nthost_; ++
i) {
2340 tbuf = trecvbuf_ + mt.
displ_;
2341 for (jj = 0; jj < mt.
nnode_; ++jj) {
2348 for (j = 0; j < mt.
nnode_; ++
j) {
2349 printf(
"%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2358 for (i=ihost_reduced_long_; i < nthost_; ++
i) {
2361 tbuf = tsendbuf_ + mt.
displ_;
2365 if (tag == 3) { tag = 4; }
2376 for (i=0; i < ihost_reduced_long_; ++
i) {
2383 printf(
"%d receive from %d nnode=%d nnode_rt=%d size=%d tag=%d\n",
2391 for (i = 0; i < ihost_reduced_long_; ++
i) {
2394 tbuf = trecvbuf_ + mt.
displ_;
2395 for (jj = 0; jj < mt.
nnode_; ++jj) {
2403 for (j = 0; j < mt.
nnode_; ++
j) {
2404 printf(
"%d received from %d tbuf[%d] = %g tbuf[%d] = %g added to node %d\n",
2410 #endif //EXCHANGE_ON 2426 rhs =
new double[4*
n];
2434 smap =
new double*[nmap];
2435 rmap =
new double*[nmap];
2436 ismap =
new int[nmap];
2437 irmap =
new int[nmap];
2438 nzindex =
new int[
n];
2439 rmap2smap_index =
new int[nmap];
2443 for (i = 0; i < nmap; ++
i) {
2448 rmap2smap_index[
i] = -1;
2462 delete [] rmap2smap_index;
2473 for (i=0; i <
n; ++
i) {
2480 for (i=0; i < nmap; i+=2) {
2482 if (*rmap[i+1] == 1e50) {
2483 v[
j] = *rmap[
i] * 1
e-50;
2484 nzindex[
j] = rmap2smap_index[
i];
2491 for (i=0; i <
n; ++
i) {
2492 printf(
"%d ReducedTree %2d %12.5g %12.5g %d %12.5g\n",
2496 for (i=0; i < nsmap; i += 2) {
2498 if (nzindex[j] == -1 || i == nzindex[j]) {
2516 for (i=0; i <
n; ++
i) {
2517 printf(
"%d ReducedTree %2d %12.5g %12.5g %2d %12.5g %12.5g\n",
2522 for (i = n - 1; i > 0; --
i) {
2529 for (i = 1; i <
n; ++
i) {
2534 for (i=0; i <
n; ++
i) {
2543 for (i=0; i < n4; ++
i) {
rhs[
i] = 0.0; }
2544 for (i=0; i < nmap; ++
i) {
rhs[irmap[
i]] += *rmap[
i]; }
2546 for (i=0; i < nmap; ++
i){
2547 printf(
"%d %p gather i=%d ie=%d %g\n",
nrnmpi_myid,
this, i, irmap[i], *rmap[i]);
2557 for (i=0; i < nsmap; i += 2) {
2559 smap[i], *smap[i], smap[i+1], *smap[i+1]);
2563 for (i=0; i < nsmap; i += 2) {
2565 *smap[
i] = 1e30*
rhs[ismap[
i]];
2569 for (i=0; i < nsmap; i += 2) if (i > 10){
2571 smap[i], *smap[i], smap[i+1], *smap[i+1]);
2579 for (i=0; i < nmap; ++
i) {
2584 if (rmap[i] >= trbuf && rmap[i] < (trbuf + tsize)) {
2585 Printf(
" %2d rhs[%2d] += tbuf[%ld]\n", i, irmap[i], rmap[i]-trbuf);
2592 Printf(
" %2d rhs[%2d] d[%d] += d[%ld]\n", i, irmap[i], irmap[i] -
n, rmap[i] - nt->
_actual_d);
2594 if (rmap[i] >= t.
sid1A && rmap[i] < (t.
sid1A + nb)) {
2595 Printf(
" %2d rhs[%2d] a[%d] += sid1A[%ld]", i, irmap[i], irmap[i] - 2*
n, rmap[i]-t.
sid1A);
2601 if (rmap[i] >= t.
sid1B && rmap[i] < (t.
sid1B + nb)) {
2602 Printf(
" %2d rhs[%2d] b[%d] += sid1B[%ld]", i, irmap[i], irmap[i] - 3*
n, rmap[i]-t.
sid1B);
2619 int* e1 =
new int[
n-1];
2620 int* e2 =
new int[
n-1];
2622 int* sid =
new int[
n];
2625 for (i=0; i <
n; ++
i) { order[
i] = -1; }
2629 for (i = 0; i < nt; ++
i) {
2630 if (mark[i] == j && allbbr[i] == 2) { singlesid = allsid[
i]; }
2631 if (mark[i] == j && allbbr[i] > 2 && allsid[i] < allbbr[i]-3) {
2634 const auto& e1ieiter = s2rt->find(allsid[i]);
2636 e1[ie] = e1ieiter->second;
2637 sid[e1[ie]]= allsid[
i];
2638 const auto& e2ieiter = s2rt->find(allbbr[i]-3);
2640 e2[ie] = e2ieiter->second;
2641 sid[e2[ie]] = allbbr[
i]-3;
2646 if (ne == 0) {
assert(singlesid >= 0); sid[0] = singlesid; }
2652 while (ordered < n) {
2654 for (i=0; i <
ne; ++
i) {
2656 if (order[e1[i]] >= 0) {
2657 assert(order[e2[i]] == -1);
2658 ip[ordered] = order[e1[
i]];
2659 order[e2[
i]] = ordered++;
2662 }
else if (order[e2[i]] >= 0) {
2663 assert(order[e1[i]] == -1);
2664 ip[ordered] = order[e2[
i]];
2665 order[e1[
i]] = ordered++;
2675 for (i=0; i <
n; ++
i) {
2676 (*s2rt)[sid[
i]] = order[
i];
2681 for (i=0; i <
n; ++
i) {
2684 for (i=0; i <
n; ++
i) {
2696 const auto& sid1_iter = s2rt->find(sid1);
2698 const int i = sid1_iter->second;
2704 }
else if (sid2 == sid1) {
2707 const auto& sid2_iter = s2rt->find(sid2);
2709 j = sid2_iter->second;
2712 }
else if (ip[j] == i) {
2724 rmap2smap_index[irfill] = nsmap;
2729 const auto& sid_iter = s2rt->find(sid);
2731 const int i = sid_iter->second;
2746 for (i=i3 - 1; i >= backbone_end; --
i) {
2753 printf(
"end of triang_subtree2backbone\n");
2754 for (i=i1; i < backbone_end; ++
i) {
2755 printf(
"i=%d D=%g RHS=%g\n", i,
D(i),
RHS(i));
2766 for (i=backbone_sid1_begin; i < backbone_end; ++
i) {
2771 for (i = backbone_sid1_begin-1; i >= backbone_interior_begin; --
i) {
2773 j = i - backbone_begin;
2774 jp = ip - backbone_begin;
2786 for (i = backbone_interior_begin; i < backbone_sid1_begin; ++
i) {
2788 j = i - backbone_begin;
2789 if (ip < backbone_interior_begin) {
2793 jp = ip - backbone_begin;
2802 for (i = backbone_sid1_begin; i < backbone_end; ++
i) {
2804 j = i - backbone_begin;
2805 if (ip < backbone_interior_begin) {
2809 jp = ip - backbone_begin;
2812 D(i) -= p *
S1A(jp);
2819 for (i=i1; i < backbone_end; ++
i) {
2828 int i,
j, ip, ip1, ip2;
2829 double a, b,
p, vsid1;
2832 j = backbone_long_sid1_begin;
2836 for (i=backbone_long_begin; i < backbone_interior_begin; ++
i) {
2837 a =
S1A(i - backbone_begin);
2838 b =
S1B(j - backbone_begin);
2851 for (i = backbone_sid1_begin; i < backbone_end; ++
i) {
2854 RHS(j) -=
S1A(j - backbone_begin) * vsid1;
2858 for (i = backbone_interior_begin; i < backbone_sid1_begin; ++
i) {
2859 j = i - backbone_begin;
2865 for (i=i1; i < backbone_end; ++
i) {
2875 j = backbone_sid1_begin;
2876 for (i=backbone_begin; i < backbone_long_begin; ++
i) {
2877 a =
S1A(i - backbone_begin);
2878 b =
S1B(j - backbone_begin);
2880 printf(
"%d part1 i=%d j=%d\n",
2882 printf(
"%d part1 d=%12.5g a=%12.5g rhs=%12.5g\n",
2884 printf(
"%d part1 b=%12.5g d=%12.5g rhs=%12.5g\n",
2894 printf(
"%d part1 result %12.5g %12.5g\n",
2913 for (i=i1; i < backbone_begin; ++
i) {
2917 for (i=backbone_end; i < i3; ++
i) {
2924 for (i=i1; i < backbone_end; ++
i) {
2955 if (!classical_root_to_multisplit_) {
return; }
2976 hoc_execerror(
"ParallelContext.nthread() was changed after ParallelContext.multisplit()",0);
2996 i2 = i1 + nt->
ncell;
2998 int nnode = i3 - i1;
3007 int i,
j, i0, ii, in, ip, nback, ib, ibl, ibs;
3008 int is0, is1, k0, k1, iss0, iss1, isl0, isl1;
3011 printf(
"multisplit_v_setup %d\n", nnode);
3012 for (i=i1; i < i3; ++
i) {
3018 printf(
"multisplit_v_setup %d\n", nnode);
3019 printf(
"\nclassical order\n");
3020 for (i=i1; i < i3; ++
i) {
3040 backbone_begin = i2;
3041 backbone_long_begin = backbone_begin;
3042 if (classical_root_to_multisplit_) {
3044 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
3045 ms = multisplit_list_->item(ii);
3047 if (ms->
nd[1]->
_nt != nt) {
continue; }
3050 --backbone_long_begin;
3058 backbone_interior_begin = i2;
3060 backsid_ =
new int[nbackrt_];
3061 backAindex_ =
new int[nbackrt_];
3062 backBindex_ =
new int[nbackrt_];
3066 for (i = i1; i < i2; ++
i) {
3068 if (classical_root_to_multisplit_) {
3069 const auto& msiter = classical_root_to_multisplit_->find(oldroot);
3070 if (msiter != classical_root_to_multisplit_->end()) {
3071 nd = msiter->second->nd[0];
3072 if (nd == oldroot) {
3097 printf(
"\nsid0 is a root\n");
3098 for (i=i1; i < i3; ++
i) {
3111 ibl = backbone_long_begin;
3112 ibs = backbone_begin;
3113 for (i = i1; i < i2; ++
i) {
3115 if(classical_root_to_multisplit_ && classical_root_to_multisplit_->find(nd) != classical_root_to_multisplit_->end()) {
3119 node[ib-i1] = ms->
nd[0];
3134 node[ii-i1] = ms->
nd[0];
3144 backbone_sid1_begin = backbone_end - (backbone_interior_begin - backbone_begin);
3145 backbone_long_sid1_begin = backbone_sid1_begin + (backbone_long_begin - backbone_begin);
3149 for (i=i1; i < backbone_interior_begin; ++
i) {
3150 assert(parent[i-i1] == 0);
3153 nback = backbone_end - backbone_begin;
3155 sid0i =
new int[nback];
3156 sid1A =
new double[nback];
3157 sid1B =
new double[nback];
3158 for (i=0; i < nback; ++
i) {
3159 sid1A[
i] = sid1B[
i] = 0.;
3163 iss0 = backbone_begin;
3164 iss1 = backbone_sid1_begin;
3165 isl0 = backbone_long_begin;
3166 isl1 = backbone_long_sid1_begin;
3167 ib = backbone_sid1_begin;
3173 for (i = i1; i < i2; ++
i) {
3175 if(classical_root_to_multisplit_) {
3177 const auto &msiter = classical_root_to_multisplit_->find(nd);
3178 if (msiter != classical_root_to_multisplit_->end()) {
3179 ms = msiter->second;
3191 node[is1 - i1] = ms->
nd[1];
3196 node[ib - i1] = nt->
_v_node[ii];
3199 sid0i[ib - backbone_begin] = is0;
3203 backsid_[ibrt] = ms->
sid[0];
3205 backAindex_[ibrt] = is0 - backbone_begin;
3206 backBindex_[ibrt] = is1 - backbone_begin;
3222 for (i=i1; i < i3; ++
i) {
3226 for (i=i1; i < backbone_end; ++
i) {
3228 printf(
"backbone i=%d %d %s %d", i, node[i]->v_node_index,
secname(node[i]->
sec), node[i]->sec_node_index_);
3230 parent[i]?parent[i]->sec_node_index_:-1);
3236 for (i = i1; i < i3; ++
i) {
3252 node[k0-i1] = nt->
_v_node[k1];
3260 for (i = i1; i < i3; ++
i) {
3261 assert(i == node[i-i1]->eqn_index_);
3270 printf(
"\nmultisplit reordering\n");
3271 printf(
"backbone begin=%d long=%d interior=%d sid1=%d long=%d end=%d\n",
3272 backbone_begin, backbone_long_begin, backbone_interior_begin,
3273 backbone_sid1_begin, backbone_long_sid1_begin, backbone_end);
3274 for (i=i1; i < i3; ++
i) {
3288 for (i=i1; i < i3; ++
i) {
3294 for (i=0; i < msc_->
nthost_; ++
i) {
3296 for (j = 0; j < msti.
nnode_rt_; j += 2) {
3311 for (i=0; i < _nt->
end; ++
i) {
3321 Printf(
" root\t\t %10.5g %10.5g", 0., 0.);
3337 FILE* f;
char fname[100];
3340 f =
fopen(fname,
"w");
3345 for (i=0; i < _nt->
end; ++
i) {
3355 fprintf(f,
" root\t\t %10.5g %10.5g", 0., 0.);
3372 double a, b, d,
rhs;
3379 for (
int ii=0; ii < multisplit_list_->count(); ++ii) {
3380 ms = multisplit_list_->item(ii);
3382 if (i < i1 || i >= i3) {
continue; }
3387 a = mth_[it].S1A(0);
3390 s, ms->
sid[0], b, d, a, rhs);
3397 s, ms->
sid[1], b, d, a, rhs);
3409 }
else if (pnd == 0) {
3425 }
else if (pnd == 0) {
ReducedTree(MultiSplitControl *, int rank, int mapsize)
void bksub_backbone(NrnThread *)
void nrn_multisplit_adjust_rhs(NrnThread *)
void fillsmap(int sid, double *prhs, double *pdiag)
static void nrnmpi_int_allgather(int *, int *, int)
void * nrn_multisplit_triang(NrnThread *)
static double nrnmpi_splitcell_wait_
struct Section * parentsec
std::unique_ptr< MultiSplitTable > classical_root_to_multisplit_
double * nrn_classicalNodeA(Node *nd)
void * nrn_multisplit_bksub(NrnThread *)
void reduced_mark(int, int, int, int *, int *, int *)
void nrn_multisplit_nocap_v()
void pmatf(bool full=false)
Represent main neuron object computed by single thread.
static void nrnmpi_barrier()
struct Node * _classical_parent
MultiSplitTransferInfo * msti_
void nrn_matrix_node_free()
std::unique_ptr< Int2IntTable > s2rt
static philox4x32_key_t k
sprintf(buf," if (secondorder) {\ " int _i;\" " for(_i=0;_i< %d;++_i) {\" " _p[_slist%d[_i]]+=dt *_p[_dlist%d[_i]];\" " }}\", numeqn, listnum, listnum)
void nrn_multisplit_nocap_v_part1(NrnThread *)
std::unordered_map< Node *, MultiSplit * > MultiSplitTable
void pmat(bool full=false)
static void nrnmpi_postrecv_doubles(double *, int, int, int, void **)
static void multisplit_v_setup()
#define implementPtrList(PtrList, T)
void bksub_subtrees(NrnThread *)
void nrn_multisplit_nocap_v_part3(NrnThread *)
void nrn_multisplit_nocap_v_part2(NrnThread *)
void triang_subtree2backbone(NrnThread *)
int backbone_interior_begin
static MultiSplit MultiSplitControl * msc_
const char * secname(Section *sec)
void nrnmpi_multisplit(Section *, double x, int sid, int backbone_style)
static const char * fname(const char *name)
double * nrn_classicalNodeB(Node *nd)
void reorder(int j, int nt, int *mark, int *all_bb_relation, int *allsid)
void hoc_execerror(const char *, const char *)
std::unordered_map< int, int > Int2IntTable
void triang_backbone(NrnThread *)
fprintf(stderr, "Don't know the location of params at %p\, pp)
void matrix_exchange_nocap()
void(* nrn_multisplit_setup_)()
int backbone_long_sid1_begin
static double gather(void *v)
void * nrn_multisplit_reduce_solve(NrnThread *)
virtual ~MultiSplitThread()
static void nrnmpi_int_allgatherv(int *, int *, int *, int *)
static void nrnmpi_send_doubles(double *, int, int, int)
void pr_map(int, double *)
double nrnmpi_rtcomp_time_
void bksub_short_backbone_part1(NrnThread *)
void nrnmpi_multisplit_clear()
static double scatter(void *v)
void multisplit_nocap_v_part1(NrnThread *)
void multisplit_nocap_v_part3(NrnThread *)
static void multisplit_solve()
void v_setup(NrnThread *)
static double nrnmpi_wtime()
void multisplit_nocap_v_part2(NrnThread *)
void multisplit(Section *, double, int, int)
int nrn_multisplit_active_
static Node * node(Object *)
declarePtrList(MultiSplitList, MultiSplit) implementPtrList(MultiSplitList
MultiSplitList * multisplit_list_
static void nrnmpi_wait(void **)
void fillrmap(int sid1, int sid2, double *pd)
Node * node_exact(Section *sec, double x)
void nrn_multisplit_ptr_update()
void(* nrn_multisplit_solve_)()
virtual ~MultiSplitControl()
void multisplit_adjust_rhs(NrnThread *)