Skip to content

Default node order is cell contiguous (except for thread rootnodes). #3300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
May 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions src/nrncvode/netcvode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1620,12 +1620,11 @@ bool NetCvode::init_global() {
// The sum of the ml[i].nodecount must equal the mechanism
// nodecount for the cell and each ml[i] data must be contiguous.
// Ideally the node permutation would be such that each cell
// is contiguous. So only needing a ml[0]. That is sadly not
// the case with the default permutation. The cell root nodes are
// all at the beginning, and thereafter only Section nodes are
// contiguous. It would be easy to permute nodes so that each cell
// is contiguous (except root node). This would result in a
// CvMembList.ml.size() == 1 almost always with an exception of
// is contiguous. So only needing a ml[0]. This is now mostly
// the case with the default permutation. The root nodes are
// all at the beginning, and thereafter all the cell nodes are
// contiguous. This results in a
// CvMembList.ml.size() == 1 almost always, with an exception of
// size() == 2 only for extracellular and for POINT_PROCESSes
// located both in the root node and other cell nodes.

Expand Down Expand Up @@ -1674,6 +1673,9 @@ bool NetCvode::init_global() {
// instance to cml->ml
cml->ml.emplace_back(cml->index);
assert(cml->ml.back().nodecount == 0);
// For the default node permutation, cell
// nodes are contiguous except for rootnode.
assert(cml->ml.size() < 3);
}
}

Expand Down
119 changes: 57 additions & 62 deletions src/nrnoc/multicore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ the handling of v_structure_change as long as possible.
#include <variant>
#include <vector>
#include <iostream>
#include <queue> // for breadth first cell traversal without recursion

#define CACHELINE_ALLOC(name, type, size) \
name = (type*) nrn_cacheline_alloc((void**) &name, size * sizeof(type))
Expand Down Expand Up @@ -652,111 +653,105 @@ void nrn_thread_memblist_setup() {
/* at the beginning of each thread region */
/* this differs from original secorder where all roots are at the beginning */
/* in passing, also set start and end indices. */
// Until 2025-01-03 I was under the misapprehension that (except for roots)
// this ordering kept cells contiguous. Lvardt sizes for CvMembList.ml disabused
// me of that. reorder_secorder now, in fact, keeps cells contiguous except
// for roots.

void reorder_secorder() {
Section *sec, *ch;
Node* nd;
hoc_Item* qsec;
hoc_List* sl;
int order, isec, j, inode;
int order, inode;

/* count and allocate */
// ForAllSections(sec)
// First pass: count nodes per thread to determine _nt->end for allocation
// of _v_node, _v_parent, and _v_parent_index.
ITERATE(qsec, section_list) {
Section* sec = hocSEC(qsec);
sec->order = -1;
}
order = 0;
for (NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) {
/* roots of this thread */
sl = _nt->roots;
inode = 0;
// Count root nodes
ITERATE(qsec, sl) {
sec = hocSEC(qsec);
assert(sec->order == -1);
secorder[order] = sec;
sec->order = order;
++order;
nd = sec->parentnode;
nd->_nt = _nt;
inode += 1;
}
/* all children of what is already in secorder */
for (isec = order - _nt->ncell; isec < order; ++isec) {
sec = secorder[isec];
/* to make it easy to fill in PreSyn.nt_*/
sec->prop->dparam[9] = {neuron::container::do_not_search, _nt};
for (j = 0; j < sec->nnode; ++j) {
nd = sec->pnode[j];
nd->_nt = _nt;
inode += 1;
}
for (ch = sec->child; ch; ch = ch->sibling) {
assert(ch->order == -1);
secorder[order] = ch;
ch->order = order;
++order;
// Count nodes in all sections of each cell via breadth-first traversal
ITERATE(qsec, sl) {
std::queue<Section*> que;
que.push(hocSEC(qsec));
while (!que.empty()) {
sec = que.front();
que.pop();
for (ch = sec->child; ch; ch = ch->sibling) {
que.push(ch);
}
inode += sec->nnode;
}
}
_nt->end = inode;
CACHELINE_CALLOC(_nt->_v_node, Node*, inode);
CACHELINE_CALLOC(_nt->_v_parent, Node*, inode);
CACHELINE_CALLOC(_nt->_v_parent_index, int, inode);
}
/* do it again and fill _v_node and _v_parent */
/* index each cell section in relative order. Do offset later */
// ForAllSections(sec)
ITERATE(qsec, section_list) {
Section* sec = hocSEC(qsec);
sec->order = -1;
}

/* traverse and fill _v_node, _v_parent, and secorder */
order = 0;
for (NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) {
/* roots of this thread */
sl = _nt->roots;
inode = 0;
// Process root nodes
ITERATE(qsec, sl) {
sec = hocSEC(qsec);
assert(sec->order == -1);
secorder[order] = sec;
sec->order = order;
++order;
nd = sec->parentnode;
nd->_nt = _nt;
_nt->_v_node[inode] = nd;
_nt->_v_parent[inode] = nullptr; // because this is a root node
_nt->_v_parent[inode] = nullptr; // Root nodes have no parent
_nt->_v_node[inode]->v_node_index = inode;
++inode;
}
/* all children of what is already in secorder */
for (isec = order - _nt->ncell; isec < order; ++isec) {
sec = secorder[isec];
/* to make it easy to fill in PreSyn.nt_*/
sec->prop->dparam[9] = {neuron::container::do_not_search, _nt};
for (j = 0; j < sec->nnode; ++j) {
nd = sec->pnode[j];
nd->_nt = _nt;
_nt->_v_node[inode] = nd;
if (j) {
_nt->_v_parent[inode] = sec->pnode[j - 1];
} else {
_nt->_v_parent[inode] = sec->parentnode;
// Breadth-first traversal to fill arrays and set section order
ITERATE(qsec, sl) {
std::queue<Section*> que;
que.push(hocSEC(qsec));
while (!que.empty()) {
sec = que.front();
que.pop();
for (ch = sec->child; ch; ch = ch->sibling) {
que.push(ch);
}
// Assign section order and thread
assert(sec->order == -1);
sec->prop->dparam[9] = {neuron::container::do_not_search, _nt};
secorder[order] = sec;
sec->order = order++;
// Process nodes
for (int j = 0; j < sec->nnode; ++j) {
nd = sec->pnode[j];
nd->_nt = _nt;
_nt->_v_node[inode] = nd;
if (j) {
_nt->_v_parent[inode] = sec->pnode[j - 1];
} else {
_nt->_v_parent[inode] = sec->parentnode;
}
_nt->_v_node[inode]->v_node_index = inode;
++inode;
}
_nt->_v_node[inode]->v_node_index = inode;
inode += 1;
}
for (ch = sec->child; ch; ch = ch->sibling) {
assert(ch->order == -1);
secorder[order] = ch;
ch->order = order;
++order;
}
}
_nt->end = inode;
assert(_nt->end == inode);
}
assert(order == section_count);
/*assert(inode == v_node_count);*/

/* not missing any */
// ForAllSections(sec)
ITERATE(qsec, section_list) {
Section* sec = hocSEC(qsec);
assert(sec->order != -1);
Expand All @@ -766,23 +761,23 @@ void reorder_secorder() {
in either case, we can then point to v, d, rhs in proper
node order
*/
for (const NrnThread* _nt: for_threads(nrn_threads, nrn_nthread))
for (const NrnThread* _nt: for_threads(nrn_threads, nrn_nthread)) {
for (inode = 0; inode < _nt->end; ++inode) {
_nt->_v_node[inode]->_classical_parent = _nt->_v_parent[inode];
}
}
if (nrn_multisplit_setup_) {
/* classical order abandoned */
(*nrn_multisplit_setup_)();
}
/* because the d,rhs changed, if multisplit is used we need to update
the reduced tree gather/scatter pointers
the reduced tree gather/scatter pointers
*/
if (nrn_multisplit_setup_) {
nrn_multisplit_ptr_update();
}
}


void nrn_mk_table_check() {
std::size_t table_check_cnt_{};
std::vector<int> ix(n_memb_func, -1);
Expand Down
2 changes: 1 addition & 1 deletion test/external/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(FetchContent)
FetchContent_Declare(
ringtest
GIT_REPOSITORY https://github.com/neuronsimulator/ringtest
GIT_TAG 7d1aee72939f6dfcb2a14db4c7b00725d9e485fa
GIT_TAG b096d530e2636b34b79c812356979a0fa4e3323b
SOURCE_DIR ${PROJECT_SOURCE_DIR}/external/tests/ringtest)

FetchContent_Declare(
Expand Down
Loading