Skip to content

Commit a10c794

Browse files
committed
FEAT: pass latitudes from one grid to the next
1 parent e0ad5fb commit a10c794

File tree

1 file changed

+159
-9
lines changed

1 file changed

+159
-9
lines changed

src/grid_match.cpp

Lines changed: 159 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,65 @@
33

44
#include "aether.h"
55

6+
// -----------------------------------------------------------------------------
7+
// Send arrays of variables to other processors on the given grid.
8+
// -----------------------------------------------------------------------------
9+
10+
bool exchange_information(int64_t *nPointsToPass,
11+
std::vector<precision_t *> varToSend,
12+
int64_t *nPointsToReceive,
13+
std::vector<precision_t *> varToReceive) {
14+
15+
int64_t jNode, iPt, iTag, iProcTo, iProcFrom;
16+
std::vector<MPI_Request> requests(nGrids);
17+
18+
// Here we send the message into the wind:
19+
// - if it is the same processor, just copy the information
20+
// - if it is a different processor, send the data
21+
for (jNode = 0; jNode < nGrids ; jNode++) {
22+
if (jNode == iGrid) {
23+
for (iPt = 0; iPt < nPointsToPass[jNode]; iPt ++) {
24+
varToReceive[jNode][iPt] = varToSend[jNode][iPt];
25+
}
26+
} else {
27+
iProcTo = iMember * nGrids + jNode;
28+
// iTag is a unique id allowing all processors to
29+
// communicate asynchronously
30+
iTag = iProc * 10000 + iProcTo;
31+
MPI_Isend(varToSend[jNode],
32+
nPointsToPass[jNode] * sizeof(precision_t),
33+
MPI_BYTE,
34+
iProcTo,
35+
iTag,
36+
aether_comm,
37+
&requests[jNode]);
38+
}
39+
}
40+
41+
// Wait for everyone to get the information that was sent:
42+
for (jNode = 0; jNode < nGrids ; jNode++)
43+
if (jNode != iGrid)
44+
MPI_Wait(&requests[jNode], MPI_STATUS_IGNORE);
45+
46+
// Receive it into the receiving array:
47+
for (jNode = 0; jNode < nGrids ; jNode++)
48+
if (jNode != iGrid) {
49+
iProcFrom = iMember * nGrids + jNode;
50+
// Rebuid the unique id:
51+
iTag = iProcFrom * 10000 + iProc;
52+
MPI_Recv(varToReceive[jNode],
53+
nPointsToReceive[jNode] * sizeof(precision_t),
54+
MPI_BYTE,
55+
jNode,
56+
iTag,
57+
aether_comm,
58+
MPI_STATUS_IGNORE);
59+
}
60+
61+
MPI_Barrier(aether_comm);
62+
return true;
63+
}
64+
665
bool grid_match(Grid gGrid,
766
Grid mGrid,
867
Quadtree gQuadtree,
@@ -17,8 +76,16 @@ bool grid_match(Grid gGrid,
1776
precision_t lon, lat;
1877
precision_t normX, normY, normZ;
1978
arma_vec norms(3);
20-
int64_t iNode;
79+
int64_t jNode, kNode;
80+
int64_t *nPointsToPass = static_cast<int64_t*>(malloc(nGrids * sizeof(int64_t)));
81+
int64_t *nPointsToReceive = static_cast<int64_t*>(malloc(nGrids * sizeof(int64_t)));
82+
int64_t *nPointsDummy = static_cast<int64_t*>(malloc(nGrids * sizeof(int64_t)));
83+
84+
for (jNode = 0; jNode < nGrids ; jNode++)
85+
nPointsToPass[jNode] = 0;
2186

87+
// This is not the most efficient way to do this, but the first pass, let's
88+
// just count how many points we need to send to the other processors:
2289
for (iX = mGCs; iX < mnX - mGCs; iX++) {
2390
for (iY = mGCs; iY < mnY - mGCs; iY++) {
2491
for (iZ = mGCs; iZ < mnZ - mGCs; iZ++) {
@@ -28,19 +95,102 @@ bool grid_match(Grid gGrid,
2895
norms(0) = lon / cPI;
2996
norms(1) = lat / cPI;
3097
norms(2) = 0.0;
31-
iNode = gQuadtree.find_point(norms);
98+
jNode = gQuadtree.find_point(norms);
3299
} else {
33100
norms = sphere_to_cube(lon, lat);
34-
iNode = gQuadtree.find_point(norms);
101+
jNode = gQuadtree.find_point(norms);
102+
}
103+
if (jNode < 0 || jNode >= nGrids) {
104+
std::cout << "out of bounds!!! " << jNode << "\n";
35105
}
36-
std::cout << "lon, lat, node: " << lon*cRtoD << " "
37-
<< lat*cRtoD << " "
38-
<< norms(0) << " "
39-
<< norms(1) << " "
40-
<< norms(2) << " "
41-
<< iNode << "\n";
106+
nPointsToPass[jNode] = nPointsToPass[jNode]+1;
107+
/* std::cout << "lon, lat, node: " << lon*cRtoD << " "
108+
<< lat*cRtoD << " "
109+
<< norms(0) << " "
110+
<< norms(1) << " "
111+
<< norms(2) << " "
112+
<< jNode << " "
113+
<< iProc << " "
114+
<< nPoints[jNode] << "\n"; */
42115
}
43116
}
44117
}
118+
std::cout << "made it here: " << iProc << "\n";
119+
MPI_Barrier(aether_comm);
120+
121+
for (jNode = 0; jNode < nGrids ; jNode++)
122+
std::cout << "nPtsToPass : " << iProc << " " << nPointsToPass[jNode] << "\n";
123+
124+
std::cout << "sending number of points :\n";
125+
126+
// This section sends the number of points that need to be transfered to each processor.
127+
// Then the processor saves the number of points, so it can be remembered, and both the
128+
// sender and receiver will have the information.
129+
for (jNode = 0; jNode < nGrids ; jNode++) {
130+
if (jNode == iGrid) {
131+
for (kNode = 0; kNode < nGrids ; kNode++)
132+
nPointsDummy[kNode] = nPointsToPass[kNode];
133+
}
134+
MPI_Bcast(nPointsDummy, nGrids, MPI_INT64_T, jNode, aether_comm);
135+
nPointsToReceive[jNode] = nPointsDummy[iGrid];
136+
}
137+
138+
MPI_Barrier(aether_comm);
139+
140+
for (jNode = 0; jNode < nGrids ; jNode++) {
141+
std::cout << "nPtsToReceive : " << iProc << " " << jNode << " " << nPointsToReceive[jNode] << "\n";
142+
MPI_Barrier(aether_comm);
143+
}
144+
145+
// Now we need to create an array of send points and an array of receive points.
146+
std::vector<precision_t *> latsToPass(nGrids);
147+
std::vector<precision_t *> lonsToPass(nGrids);
148+
std::vector<precision_t *> altsToPass(nGrids);
149+
for (jNode = 0; jNode < nGrids ; jNode++) {
150+
latsToPass[jNode] = static_cast<precision_t*>(malloc(nPointsToPass[jNode] * sizeof(precision_t)));
151+
lonsToPass[jNode] = static_cast<precision_t*>(malloc(nPointsToPass[jNode] * sizeof(precision_t)));
152+
altsToPass[jNode] = static_cast<precision_t*>(malloc(nPointsToPass[jNode] * sizeof(precision_t)));
153+
}
154+
155+
std::vector<precision_t *> latsToInterTo(nGrids);
156+
std::vector<precision_t *> lonsToInterTo(nGrids);
157+
std::vector<precision_t *> altsToInterTo(nGrids);
158+
for (jNode = 0; jNode < nGrids ; jNode++) {
159+
latsToInterTo[jNode] = static_cast<precision_t*>(malloc(nPointsToReceive[jNode] * sizeof(precision_t)));
160+
lonsToInterTo[jNode] = static_cast<precision_t*>(malloc(nPointsToReceive[jNode] * sizeof(precision_t)));
161+
altsToInterTo[jNode] = static_cast<precision_t*>(malloc(nPointsToReceive[jNode] * sizeof(precision_t)));
162+
}
163+
164+
// now, the second pass, let's store the information so we can pass it:
165+
for (jNode = 0; jNode < nGrids ; jNode++)
166+
nPointsToPass[jNode] = 0;
167+
for (iX = mGCs; iX < mnX - mGCs; iX++) {
168+
for (iY = mGCs; iY < mnY - mGCs; iY++) {
169+
for (iZ = mGCs; iZ < mnZ - mGCs; iZ++) {
170+
lon = mGrid.geoLon_scgc(iX, iY, iZ);
171+
lat = mGrid.geoLat_scgc(iX, iY, iZ);
172+
if (gGrid.iGridShape_ == gGrid.iSphere_) {
173+
norms(0) = lon / cPI;
174+
norms(1) = lat / cPI;
175+
norms(2) = 0.0;
176+
jNode = gQuadtree.find_point(norms);
177+
} else {
178+
norms = sphere_to_cube(lon, lat);
179+
jNode = gQuadtree.find_point(norms);
180+
}
181+
latsToPass[jNode][nPointsToPass[jNode]] = lat;
182+
lonsToPass[jNode][nPointsToPass[jNode]] = lon;
183+
altsToPass[jNode][nPointsToPass[jNode]] = mGrid.geoAlt_scgc(iX, iY, iZ);
184+
nPointsToPass[jNode] = nPointsToPass[jNode]+1;
185+
}
186+
}
187+
}
188+
bool didWork;
189+
didWork = exchange_information(nPointsToPass,
190+
latsToPass,
191+
nPointsToReceive,
192+
latsToInterTo);
193+
194+
45195
return true;
46196
}

0 commit comments

Comments
 (0)