3
3
4
4
#include " aether.h"
5
5
6
+ // -----------------------------------------------------------------------------
7
+ // Send arrays of variables to other processors on the given grid.
8
+ // -----------------------------------------------------------------------------
9
+
10
+ bool exchange_information (int64_t *nPointsToPass,
11
+ std::vector<precision_t *> varToSend,
12
+ int64_t *nPointsToReceive,
13
+ std::vector<precision_t *> varToReceive) {
14
+
15
+ int64_t jNode, iPt, iTag, iProcTo, iProcFrom;
16
+ std::vector<MPI_Request> requests (nGrids);
17
+
18
+ // Here we send the message into the wind:
19
+ // - if it is the same processor, just copy the information
20
+ // - if it is a different processor, send the data
21
+ for (jNode = 0 ; jNode < nGrids ; jNode++) {
22
+ if (jNode == iGrid) {
23
+ for (iPt = 0 ; iPt < nPointsToPass[jNode]; iPt ++) {
24
+ varToReceive[jNode][iPt] = varToSend[jNode][iPt];
25
+ }
26
+ } else {
27
+ iProcTo = iMember * nGrids + jNode;
28
+ // iTag is a unique id allowing all processors to
29
+ // communicate asynchronously
30
+ iTag = iProc * 10000 + iProcTo;
31
+ MPI_Isend (varToSend[jNode],
32
+ nPointsToPass[jNode] * sizeof (precision_t ),
33
+ MPI_BYTE,
34
+ iProcTo,
35
+ iTag,
36
+ aether_comm,
37
+ &requests[jNode]);
38
+ }
39
+ }
40
+
41
+ // Wait for everyone to get the information that was sent:
42
+ for (jNode = 0 ; jNode < nGrids ; jNode++)
43
+ if (jNode != iGrid)
44
+ MPI_Wait (&requests[jNode], MPI_STATUS_IGNORE);
45
+
46
+ // Receive it into the receiving array:
47
+ for (jNode = 0 ; jNode < nGrids ; jNode++)
48
+ if (jNode != iGrid) {
49
+ iProcFrom = iMember * nGrids + jNode;
50
+ // Rebuid the unique id:
51
+ iTag = iProcFrom * 10000 + iProc;
52
+ MPI_Recv (varToReceive[jNode],
53
+ nPointsToReceive[jNode] * sizeof (precision_t ),
54
+ MPI_BYTE,
55
+ jNode,
56
+ iTag,
57
+ aether_comm,
58
+ MPI_STATUS_IGNORE);
59
+ }
60
+
61
+ MPI_Barrier (aether_comm);
62
+ return true ;
63
+ }
64
+
6
65
bool grid_match (Grid gGrid ,
7
66
Grid mGrid ,
8
67
Quadtree gQuadtree ,
@@ -17,8 +76,16 @@ bool grid_match(Grid gGrid,
17
76
precision_t lon, lat;
18
77
precision_t normX, normY, normZ;
19
78
arma_vec norms (3 );
20
- int64_t iNode;
79
+ int64_t jNode, kNode ;
80
+ int64_t *nPointsToPass = static_cast <int64_t *>(malloc (nGrids * sizeof (int64_t )));
81
+ int64_t *nPointsToReceive = static_cast <int64_t *>(malloc (nGrids * sizeof (int64_t )));
82
+ int64_t *nPointsDummy = static_cast <int64_t *>(malloc (nGrids * sizeof (int64_t )));
83
+
84
+ for (jNode = 0 ; jNode < nGrids ; jNode++)
85
+ nPointsToPass[jNode] = 0 ;
21
86
87
+ // This is not the most efficient way to do this, but the first pass, let's
88
+ // just count how many points we need to send to the other processors:
22
89
for (iX = mGCs ; iX < mnX - mGCs ; iX++) {
23
90
for (iY = mGCs ; iY < mnY - mGCs ; iY++) {
24
91
for (iZ = mGCs ; iZ < mnZ - mGCs ; iZ++) {
@@ -28,19 +95,102 @@ bool grid_match(Grid gGrid,
28
95
norms (0 ) = lon / cPI;
29
96
norms (1 ) = lat / cPI;
30
97
norms (2 ) = 0.0 ;
31
- iNode = gQuadtree .find_point (norms);
98
+ jNode = gQuadtree .find_point (norms);
32
99
} else {
33
100
norms = sphere_to_cube (lon, lat);
34
- iNode = gQuadtree .find_point (norms);
101
+ jNode = gQuadtree .find_point (norms);
102
+ }
103
+ if (jNode < 0 || jNode >= nGrids) {
104
+ std::cout << " out of bounds!!! " << jNode << " \n " ;
35
105
}
36
- std::cout << " lon, lat, node: " << lon*cRtoD << " "
37
- << lat*cRtoD << " "
38
- << norms (0 ) << " "
39
- << norms (1 ) << " "
40
- << norms (2 ) << " "
41
- << iNode << " \n " ;
106
+ nPointsToPass[jNode] = nPointsToPass[jNode]+1 ;
107
+ /* std::cout << "lon, lat, node: " << lon*cRtoD << " "
108
+ << lat*cRtoD << " "
109
+ << norms(0) << " "
110
+ << norms(1) << " "
111
+ << norms(2) << " "
112
+ << jNode << " "
113
+ << iProc << " "
114
+ << nPoints[jNode] << "\n"; */
42
115
}
43
116
}
44
117
}
118
+ std::cout << " made it here: " << iProc << " \n " ;
119
+ MPI_Barrier (aether_comm);
120
+
121
+ for (jNode = 0 ; jNode < nGrids ; jNode++)
122
+ std::cout << " nPtsToPass : " << iProc << " " << nPointsToPass[jNode] << " \n " ;
123
+
124
+ std::cout << " sending number of points :\n " ;
125
+
126
+ // This section sends the number of points that need to be transfered to each processor.
127
+ // Then the processor saves the number of points, so it can be remembered, and both the
128
+ // sender and receiver will have the information.
129
+ for (jNode = 0 ; jNode < nGrids ; jNode++) {
130
+ if (jNode == iGrid) {
131
+ for (kNode = 0 ; kNode < nGrids ; kNode ++)
132
+ nPointsDummy[kNode ] = nPointsToPass[kNode ];
133
+ }
134
+ MPI_Bcast (nPointsDummy, nGrids, MPI_INT64_T, jNode, aether_comm);
135
+ nPointsToReceive[jNode] = nPointsDummy[iGrid];
136
+ }
137
+
138
+ MPI_Barrier (aether_comm);
139
+
140
+ for (jNode = 0 ; jNode < nGrids ; jNode++) {
141
+ std::cout << " nPtsToReceive : " << iProc << " " << jNode << " " << nPointsToReceive[jNode] << " \n " ;
142
+ MPI_Barrier (aether_comm);
143
+ }
144
+
145
+ // Now we need to create an array of send points and an array of receive points.
146
+ std::vector<precision_t *> latsToPass (nGrids);
147
+ std::vector<precision_t *> lonsToPass (nGrids);
148
+ std::vector<precision_t *> altsToPass (nGrids);
149
+ for (jNode = 0 ; jNode < nGrids ; jNode++) {
150
+ latsToPass[jNode] = static_cast <precision_t *>(malloc (nPointsToPass[jNode] * sizeof (precision_t )));
151
+ lonsToPass[jNode] = static_cast <precision_t *>(malloc (nPointsToPass[jNode] * sizeof (precision_t )));
152
+ altsToPass[jNode] = static_cast <precision_t *>(malloc (nPointsToPass[jNode] * sizeof (precision_t )));
153
+ }
154
+
155
+ std::vector<precision_t *> latsToInterTo (nGrids);
156
+ std::vector<precision_t *> lonsToInterTo (nGrids);
157
+ std::vector<precision_t *> altsToInterTo (nGrids);
158
+ for (jNode = 0 ; jNode < nGrids ; jNode++) {
159
+ latsToInterTo[jNode] = static_cast <precision_t *>(malloc (nPointsToReceive[jNode] * sizeof (precision_t )));
160
+ lonsToInterTo[jNode] = static_cast <precision_t *>(malloc (nPointsToReceive[jNode] * sizeof (precision_t )));
161
+ altsToInterTo[jNode] = static_cast <precision_t *>(malloc (nPointsToReceive[jNode] * sizeof (precision_t )));
162
+ }
163
+
164
+ // now, the second pass, let's store the information so we can pass it:
165
+ for (jNode = 0 ; jNode < nGrids ; jNode++)
166
+ nPointsToPass[jNode] = 0 ;
167
+ for (iX = mGCs ; iX < mnX - mGCs ; iX++) {
168
+ for (iY = mGCs ; iY < mnY - mGCs ; iY++) {
169
+ for (iZ = mGCs ; iZ < mnZ - mGCs ; iZ++) {
170
+ lon = mGrid .geoLon_scgc (iX, iY, iZ);
171
+ lat = mGrid .geoLat_scgc (iX, iY, iZ);
172
+ if (gGrid .iGridShape_ == gGrid .iSphere_ ) {
173
+ norms (0 ) = lon / cPI;
174
+ norms (1 ) = lat / cPI;
175
+ norms (2 ) = 0.0 ;
176
+ jNode = gQuadtree .find_point (norms);
177
+ } else {
178
+ norms = sphere_to_cube (lon, lat);
179
+ jNode = gQuadtree .find_point (norms);
180
+ }
181
+ latsToPass[jNode][nPointsToPass[jNode]] = lat;
182
+ lonsToPass[jNode][nPointsToPass[jNode]] = lon;
183
+ altsToPass[jNode][nPointsToPass[jNode]] = mGrid .geoAlt_scgc (iX, iY, iZ);
184
+ nPointsToPass[jNode] = nPointsToPass[jNode]+1 ;
185
+ }
186
+ }
187
+ }
188
+ bool didWork;
189
+ didWork = exchange_information (nPointsToPass,
190
+ latsToPass,
191
+ nPointsToReceive,
192
+ latsToInterTo);
193
+
194
+
45
195
return true ;
46
196
}
0 commit comments