diff --git a/src/nrncvode/netcvode.cpp b/src/nrncvode/netcvode.cpp index 6e4522c852..b9be01a69c 100644 --- a/src/nrncvode/netcvode.cpp +++ b/src/nrncvode/netcvode.cpp @@ -1620,12 +1620,11 @@ bool NetCvode::init_global() { // The sum of the ml[i].nodecount must equal the mechanism // nodecount for the cell and each ml[i] data must be contiguous. // Ideally the node permutation would be such that each cell - // is contiguous. So only needing a ml[0]. That is sadly not - // the case with the default permutation. The cell root nodes are - // all at the beginning, and thereafter only Section nodes are - // contiguous. It would be easy to permute nodes so that each cell - // is contiguous (except root node). This would result in a - // CvMembList.ml.size() == 1 almost always with an exception of + // is contiguous. So only needing a ml[0]. This is now mostly + // the case with the default permutation. The root nodes are + // all at the beginning, and thereafter all the cell nodes are + // contiguous. This results in a + // CvMembList.ml.size() == 1 almost always, with an exception of // size() == 2 only for extracellular and for POINT_PROCESSes // located both in the root node and other cell nodes. @@ -1674,6 +1673,9 @@ bool NetCvode::init_global() { // instance to cml->ml cml->ml.emplace_back(cml->index); assert(cml->ml.back().nodecount == 0); + // For the default node permutation, cell + // nodes are contiguous except for rootnode. + assert(cml->ml.size() < 3); } } diff --git a/src/nrnoc/multicore.cpp b/src/nrnoc/multicore.cpp index 7827bce1b3..10286a6bc0 100644 --- a/src/nrnoc/multicore.cpp +++ b/src/nrnoc/multicore.cpp @@ -47,6 +47,7 @@ the handling of v_structure_change as long as possible. #include #include #include +#include // for breadth first cell traversal without recursion #define CACHELINE_ALLOC(name, type, size) \ name = (type*) nrn_cacheline_alloc((void**) &name, size * sizeof(type)) @@ -652,49 +653,45 @@ void nrn_thread_memblist_setup() { /* at the beginning of each thread region */ /* this differs from original secorder where all roots are at the beginning */ /* in passing, also set start and end indices. */ +// Until 2025-01-03 I was under the misapprehension that (except for roots) +// this ordering kept cells contiguous. Lvardt sizes for CvMembList.ml disabused +// me of that. reorder_secorder now, in fact, keeps cells contiguous except +// for roots. void reorder_secorder() { Section *sec, *ch; Node* nd; hoc_Item* qsec; hoc_List* sl; - int order, isec, j, inode; + int order, inode; + /* count and allocate */ - // ForAllSections(sec) + // First pass: count nodes per thread to determine _nt->end for allocation + // of _v_node, _v_parent, and _v_parent_index. ITERATE(qsec, section_list) { Section* sec = hocSEC(qsec); sec->order = -1; } - order = 0; for (NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) { - /* roots of this thread */ sl = _nt->roots; inode = 0; + // Count root nodes ITERATE(qsec, sl) { sec = hocSEC(qsec); assert(sec->order == -1); - secorder[order] = sec; - sec->order = order; - ++order; - nd = sec->parentnode; - nd->_nt = _nt; inode += 1; } - /* all children of what is already in secorder */ - for (isec = order - _nt->ncell; isec < order; ++isec) { - sec = secorder[isec]; - /* to make it easy to fill in PreSyn.nt_*/ - sec->prop->dparam[9] = {neuron::container::do_not_search, _nt}; - for (j = 0; j < sec->nnode; ++j) { - nd = sec->pnode[j]; - nd->_nt = _nt; - inode += 1; - } - for (ch = sec->child; ch; ch = ch->sibling) { - assert(ch->order == -1); - secorder[order] = ch; - ch->order = order; - ++order; + // Count nodes in all sections of each cell via breadth-first traversal + ITERATE(qsec, sl) { + std::queue que; + que.push(hocSEC(qsec)); + while (!que.empty()) { + sec = que.front(); + que.pop(); + for (ch = sec->child; ch; ch = ch->sibling) { + que.push(ch); + } + inode += sec->nnode; } } _nt->end = inode; @@ -702,61 +699,59 @@ void reorder_secorder() { CACHELINE_CALLOC(_nt->_v_parent, Node*, inode); CACHELINE_CALLOC(_nt->_v_parent_index, int, inode); } - /* do it again and fill _v_node and _v_parent */ - /* index each cell section in relative order. Do offset later */ - // ForAllSections(sec) - ITERATE(qsec, section_list) { - Section* sec = hocSEC(qsec); - sec->order = -1; - } + + /* traverse and fill _v_node, _v_parent, and secorder */ order = 0; for (NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) { - /* roots of this thread */ sl = _nt->roots; inode = 0; + // Process root nodes ITERATE(qsec, sl) { sec = hocSEC(qsec); assert(sec->order == -1); - secorder[order] = sec; - sec->order = order; - ++order; nd = sec->parentnode; nd->_nt = _nt; _nt->_v_node[inode] = nd; - _nt->_v_parent[inode] = nullptr; // because this is a root node + _nt->_v_parent[inode] = nullptr; // Root nodes have no parent _nt->_v_node[inode]->v_node_index = inode; ++inode; } - /* all children of what is already in secorder */ - for (isec = order - _nt->ncell; isec < order; ++isec) { - sec = secorder[isec]; - /* to make it easy to fill in PreSyn.nt_*/ - sec->prop->dparam[9] = {neuron::container::do_not_search, _nt}; - for (j = 0; j < sec->nnode; ++j) { - nd = sec->pnode[j]; - nd->_nt = _nt; - _nt->_v_node[inode] = nd; - if (j) { - _nt->_v_parent[inode] = sec->pnode[j - 1]; - } else { - _nt->_v_parent[inode] = sec->parentnode; + // Breadth-first traversal to fill arrays and set section order + ITERATE(qsec, sl) { + std::queue que; + que.push(hocSEC(qsec)); + while (!que.empty()) { + sec = que.front(); + que.pop(); + for (ch = sec->child; ch; ch = ch->sibling) { + que.push(ch); + } + // Assign section order and thread + assert(sec->order == -1); + sec->prop->dparam[9] = {neuron::container::do_not_search, _nt}; + secorder[order] = sec; + sec->order = order++; + // Process nodes + for (int j = 0; j < sec->nnode; ++j) { + nd = sec->pnode[j]; + nd->_nt = _nt; + _nt->_v_node[inode] = nd; + if (j) { + _nt->_v_parent[inode] = sec->pnode[j - 1]; + } else { + _nt->_v_parent[inode] = sec->parentnode; + } + _nt->_v_node[inode]->v_node_index = inode; + ++inode; } - _nt->_v_node[inode]->v_node_index = inode; - inode += 1; - } - for (ch = sec->child; ch; ch = ch->sibling) { - assert(ch->order == -1); - secorder[order] = ch; - ch->order = order; - ++order; } } - _nt->end = inode; + assert(_nt->end == inode); } assert(order == section_count); /*assert(inode == v_node_count);*/ + /* not missing any */ - // ForAllSections(sec) ITERATE(qsec, section_list) { Section* sec = hocSEC(qsec); assert(sec->order != -1); @@ -766,23 +761,23 @@ void reorder_secorder() { in either case, we can then point to v, d, rhs in proper node order */ - for (const NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) + for (const NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) { for (inode = 0; inode < _nt->end; ++inode) { _nt->_v_node[inode]->_classical_parent = _nt->_v_parent[inode]; } + } if (nrn_multisplit_setup_) { /* classical order abandoned */ (*nrn_multisplit_setup_)(); } /* because the d,rhs changed, if multisplit is used we need to update - the reduced tree gather/scatter pointers + the reduced tree gather/scatter pointers */ if (nrn_multisplit_setup_) { nrn_multisplit_ptr_update(); } } - void nrn_mk_table_check() { std::size_t table_check_cnt_{}; std::vector ix(n_memb_func, -1); diff --git a/test/external/CMakeLists.txt b/test/external/CMakeLists.txt index 243a3ab904..4256f2e7f3 100644 --- a/test/external/CMakeLists.txt +++ b/test/external/CMakeLists.txt @@ -7,7 +7,7 @@ include(FetchContent) FetchContent_Declare( ringtest GIT_REPOSITORY https://github.com/neuronsimulator/ringtest - GIT_TAG 7d1aee72939f6dfcb2a14db4c7b00725d9e485fa + GIT_TAG b096d530e2636b34b79c812356979a0fa4e3323b SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/tests/ringtest) FetchContent_Declare(