1 |
adcroft |
1.5 |
C $Header: /u/gcmpack/models/MITgcmUV/eesupp/src/exch.F,v 1.4.2.2 1998/06/22 02:11:15 cnh Exp $ |
2 |
cnh |
1.1 |
|
3 |
|
|
#include "CPP_EEOPTIONS.h" |
4 |
|
|
|
5 |
|
|
C-- File exch.F: Routines that perform atomic communication operations |
6 |
|
|
C i.e. communication operations that are completed by |
7 |
|
|
C time the routines return. |
8 |
|
|
C Note |
9 |
|
|
C ==== |
10 |
|
|
C The code in here although intricate is fairly |
11 |
|
|
C straightforward. There are four routines, one |
12 |
|
|
C for each of the data patterns we wish to do overlap |
13 |
|
|
C updates on - as listed below. Each routine has two |
14 |
|
|
C parts. The first part does overlap updates to and from |
15 |
|
|
C remote "processes", that is processes who do not truly |
16 |
|
|
C share the address space of this process. This part |
17 |
|
|
C requires a facility like MPI or CRAY shmem get and put |
18 |
|
|
C operations. It is possible to switch this part on or |
19 |
|
|
C off using compiler directives. In the case of a simple |
20 |
|
|
C serial execution nothing happens in this part. This part |
21 |
|
|
C is also switched off when communication strategies |
22 |
|
|
C that allow the overlapping of communication and computation |
23 |
|
|
C are employed. |
24 |
|
|
C The second part of each routine does the true shared memory |
25 |
|
|
C overlap copying i.e. copying from one part of array phi |
26 |
|
|
C to another part. This part is always active, in the case |
27 |
|
|
C of a single threaded messaging code, however, this part |
28 |
|
|
C will not do any copies as all edges will be flagged as using |
29 |
|
|
C for example MPI or SHMPUT, SHMGET. |
30 |
|
|
C Contents |
31 |
|
|
C o exch_xy_r4 - Does overlap updates for REAL*4 2d XY fields |
32 |
|
|
C o exch_xy_r8 - Does overlap updates for REAL*8 2d XY fields |
33 |
|
|
C o exch_xyz_r4 - Does overlap updates for REAL*4 3d XYZ fields |
34 |
|
|
C o exch_xyz_r8 - Does overlap updates for REAL*8 3d XYZ fields |
35 |
|
|
|
36 |
|
|
CStartOfInterface |
37 |
|
|
SUBROUTINE EXCH_XY_R4( |
38 |
|
|
U phi, |
39 |
|
|
I myThid ) |
40 |
|
|
C /==========================================================\ |
41 |
|
|
C | SUBROUTINE EXCH_XY_R4 | |
42 |
|
|
C | o Handle exchanges for real*4, two-dimensional arrays. | |
43 |
|
|
C |==========================================================| |
44 |
|
|
C | Do true shared-memory data transfers and "messaging" | |
45 |
|
|
C | tranfers for blocking case of data transfers. | |
46 |
|
|
C | Applications call this routine using | |
47 |
|
|
C | CALL EXCH..( x, myThid ) | |
48 |
|
|
C | where x is a two-dimensional array with overlaps. | |
49 |
|
|
C | This routine does true-shared-memory copies for blocks | |
50 |
|
|
C | within a thread. It will also do MPI meesaging between | |
51 |
|
|
C | different processes. | |
52 |
|
|
C | Note: | |
53 |
|
|
C | ===== | |
54 |
|
|
C | If it is used, "asynchronous" messaging in which | |
55 |
|
|
C | communication overlaps computation is handled elsewhere | |
56 |
|
|
C | - see recv.F and send.F. | |
57 |
|
|
C | In practice MPI implementations may not be completely | |
58 |
|
|
C | thread safe. In principle this code is correct even for | |
59 |
|
|
C | mixed MPI and multi-threaded execution. | |
60 |
|
|
C | In multi-thread execution data managed by other threads | |
61 |
|
|
C | is only automatically visible to another thread if it | |
62 |
|
|
C | existed before the thread was created. This means that | |
63 |
|
|
C | all Fortran we declare arrays with overlap regions in | |
64 |
|
|
C | COMMON blocks. | |
65 |
|
|
C \==========================================================/ |
66 |
|
|
|
67 |
|
|
C === Global data === |
68 |
|
|
#include "SIZE.h" |
69 |
|
|
#include "EEPARAMS.h" |
70 |
|
|
#include "EESUPPORT.h" |
71 |
|
|
|
72 |
|
|
C === Routine arguments === |
73 |
|
|
C phi - Array who's overlap regions are to be exchanged |
74 |
|
|
C myThid - My thread id. |
75 |
|
|
Real*4 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) |
76 |
|
|
INTEGER myThid |
77 |
|
|
CEndOfInterface |
78 |
|
|
|
79 |
|
|
C === Local variables === |
80 |
|
|
C bi,bj - Outer loop counters |
81 |
|
|
C I,J,K - Inner loop counters |
82 |
|
|
INTEGER bi, bj |
83 |
|
|
INTEGER I, J |
84 |
|
|
#ifdef ALLOW_USE_MPI |
85 |
|
|
C tagSend - Tags used to mark and select messages |
86 |
|
|
C tagRecv |
87 |
|
|
C nReqs - MPI request counter. |
88 |
|
|
C reqIds - Outstanding requests to wait on. |
89 |
|
|
C mpiRC - MPI return code |
90 |
|
|
C mpiStatArr - Multi-request status reporting array. |
91 |
|
|
C toPid - Proc. id sending to |
92 |
|
|
C fromPid - Proc. id receving from |
93 |
|
|
C elCount - Number of items to send or receive |
94 |
|
|
C elType - Datat type of elements |
95 |
|
|
INTEGER tagSend |
96 |
|
|
INTEGER tagRecv |
97 |
|
|
INTEGER nReqs |
98 |
|
|
INTEGER reqIds(4) |
99 |
|
|
INTEGER mpiRC |
100 |
|
|
INTEGER mpiStatArr(MPI_STATUS_SIZE,4) |
101 |
|
|
INTEGER toPid |
102 |
|
|
INTEGER fromPid |
103 |
|
|
INTEGER elCount |
104 |
|
|
INTEGER elType |
105 |
|
|
#endif /* ALLOW_USE_MPI */ |
106 |
|
|
C bE, bW - Block index to east, west, north and |
107 |
|
|
C bN, bS south. |
108 |
|
|
INTEGER bW, bE, bS, bN |
109 |
|
|
|
110 |
|
|
C-- Do "message-passing" data exchanges |
111 |
|
|
#ifdef ALLOW_SYNC_COMMUNICATION |
112 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
113 |
|
|
IF ( usingSyncMessages ) THEN |
114 |
|
|
#endif |
115 |
|
|
|
116 |
|
|
C-- MPI based data exchages |
117 |
|
|
#ifdef ALLOW_USE_MPI |
118 |
|
|
#ifndef ALWAYS_USE_MPI |
119 |
|
|
IF ( usingMPI ) THEN |
120 |
|
|
#endif |
121 |
|
|
|
122 |
|
|
C Here use the approach that all the X shifts are completed |
123 |
|
|
C before the Y shifts start. Then we can expand our Y shifts |
124 |
|
|
C to include X overlap regions. This means we don't have to |
125 |
|
|
C transfer corners separately. |
126 |
|
|
C Notes: |
127 |
|
|
C ====== |
128 |
|
|
C 1. If we are only using messages in Y and using "true shared-memory" |
129 |
|
|
C in X then we rely on the shared-memory code to be implemented so that |
130 |
|
|
C corner values will be set correctly. The true shared-memory code |
131 |
|
|
C is in the block below - so be careful when altering it! |
132 |
|
|
C 2. In order to ensure we grab the right message we need to tag |
133 |
|
|
C messages. We tag the messages with the direction in which they |
134 |
|
|
C were sent out. Thus to receive from a processor to our west we |
135 |
|
|
C request the next message from that processor that was sent out |
136 |
|
|
C with a tag indicating an easterly send. |
137 |
|
|
C 3. We could implement a "safe" code here by having everyone |
138 |
|
|
C sync and then getting a particular thread ( say thread 1 ) to |
139 |
|
|
C do the MPI_Isend sequentially. In this mode the "communication" |
140 |
|
|
C thread would loop over nThreads and do MPI_Sendrecv for each |
141 |
|
|
C one. This could be more efficient on some platforms as the messages |
142 |
|
|
C might be sent as one large unit rather than by separate threads. |
143 |
|
|
C It would also be "thread-safe" for MPI implementations that have |
144 |
|
|
C multi-threading problems with things like request ids. |
145 |
|
|
C 4. We include overlap regions in both X and Y sends. |
146 |
|
|
C This means we can interchange the Y block with the X block. |
147 |
|
|
C Not sure whether this will ever be useful! |
148 |
|
|
C 5. The generation of a request handle by MPI_Isend and |
149 |
|
|
C MPI_Irecv ouught to involve some global state within MPI. |
150 |
|
|
C If this process is not thread safe it may be necessary to |
151 |
|
|
C enable a critical section around the MPI_Isend and |
152 |
|
|
C MPI_Irecv calls. |
153 |
|
|
|
154 |
|
|
C We need a barrier here to synchronise threads otherwise |
155 |
|
|
C one thread might declare it has is ready to send and receive |
156 |
|
|
C even though another thread is going to write to the first |
157 |
|
|
C threads send or receive region. This won't happen, however, |
158 |
|
|
C if we are careful about which data a thread updates - but at |
159 |
|
|
C the moment we aren't careful! |
160 |
|
|
_BARRIER |
161 |
|
|
|
162 |
|
|
C-- East-west messaging communication |
163 |
|
|
nReqs = 0 |
164 |
|
|
bE = myBxLo(myThid) |
165 |
|
|
IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN |
166 |
|
|
C My west face uses MPI. Get ready to receive data from |
167 |
|
|
C west and post that we are ready to send data to the west. |
168 |
|
|
C Data we receive has to come with tag of thread we know is |
169 |
|
|
C sending from our west, data we send is tagged with our thread id |
170 |
|
|
C and the send direction. |
171 |
|
|
nReqs = nReqs+1 |
172 |
|
|
tagSend = mpiTagW*nThreads+myThid |
173 |
|
|
toPid = mpiPidW |
174 |
|
|
elCount = 1 |
175 |
|
|
elType = mpiTypeXFaceThread_xy_r4(myThid) |
176 |
|
|
C I tagSend,MPI_COMM_WORLD, |
177 |
|
|
CALL MPI_Isend( |
178 |
|
|
I phi(1,1-OLy,bE,myByLo(myThid)), |
179 |
|
|
I elCount, elType, toPid, |
180 |
|
|
I tagSend,MPI_COMM_WORLD, |
181 |
|
|
I reqIds(nReqs),mpiRC) |
182 |
|
|
nReqs = nReqs+1 |
183 |
|
|
tagRecv = mpiTagE*nThreads+myThrW(myThid) |
184 |
|
|
fromPid = mpiPidW |
185 |
|
|
elCount = 1 |
186 |
|
|
elType = mpiTypeXFaceThread_xy_r4(myThid) |
187 |
|
|
CALL MPI_Irecv( |
188 |
|
|
U phi(1-OLx,1-OLy,bE,myByLo(myThid)), |
189 |
|
|
I elCount, elType, fromPid, |
190 |
|
|
I tagRecv,MPI_COMM_WORLD, |
191 |
|
|
I reqIds(nReqs),mpiRC) |
192 |
|
|
ENDIF |
193 |
|
|
bW = myBxHi(myThid) |
194 |
|
|
IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN |
195 |
|
|
C My east face uses MPI. Get ready to receive data from |
196 |
|
|
C east and post that we are ready to send data to the east. |
197 |
|
|
C Data we receive has to come with tag of thread we know is |
198 |
|
|
C sending from our west, data we send is tagged with our thread id. |
199 |
|
|
nReqs = nReqs+1 |
200 |
|
|
tagSend = mpiTagE*nThreads+myThid |
201 |
|
|
toPid = mpiPidE |
202 |
|
|
elCount = 1 |
203 |
|
|
elType = mpiTypeXFaceThread_xy_r4(myThid) |
204 |
|
|
CALL MPI_Isend( |
205 |
|
|
I phi(sNx-OLx+1,1-OLy,bW,myByLo(myThid)), |
206 |
|
|
I elCount, elType, toPid, |
207 |
|
|
I tagSend,MPI_COMM_WORLD, |
208 |
|
|
I reqIds(nReqs),mpiRC) |
209 |
|
|
nReqs = nReqs+1 |
210 |
|
|
tagRecv = mpiTagW*nThreads+myThrE(myThid) |
211 |
|
|
fromPid = mpiPidE |
212 |
|
|
elCount = 1 |
213 |
|
|
elType = mpiTypeXFaceThread_xy_r4(myThid) |
214 |
|
|
CALL MPI_Irecv( |
215 |
|
|
U phi(sNx+1,1-OLy,bW,myByLo(myThid)), |
216 |
|
|
I elCount, elType, fromPid, |
217 |
|
|
I tagRecv,MPI_COMM_WORLD, |
218 |
|
|
I reqIds(nReqs),mpiRC) |
219 |
|
|
ENDIF |
220 |
|
|
C Wait for this threads east-west transactions to finish before |
221 |
|
|
C posting north-south transactions. We have to do this so that |
222 |
|
|
C the north-south transactions will send out the correct overlap |
223 |
|
|
C region values into the corner sections of our neighbors. |
224 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
225 |
|
|
|
226 |
cnh |
1.4 |
CcnhDebugStarts |
227 |
adcroft |
1.5 |
C RETURN |
228 |
cnh |
1.4 |
CcnhDebugEnds |
229 |
|
|
|
230 |
cnh |
1.1 |
C-- North-south messaging communication |
231 |
|
|
nReqs = 0 |
232 |
|
|
bN = myByLo(myTHid) |
233 |
|
|
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
234 |
|
|
C My south face uses MPI. Get ready to receive data from |
235 |
|
|
C south and post that I am ready to send data to the south. |
236 |
|
|
nReqs = nReqs + 1 |
237 |
|
|
tagSend = mpiTagS*nThreads+myThid |
238 |
|
|
toPid = mpiPidS |
239 |
|
|
elCount = 1 |
240 |
|
|
elType = mpiTypeYFaceThread_xy_r4(myThid) |
241 |
|
|
CALL MPI_Isend( |
242 |
|
|
I phi(1-OLx,1,myBxLo(myThid),bN), |
243 |
|
|
I elCount, elType, toPid, |
244 |
|
|
I tagSend,MPI_COMM_WORLD, |
245 |
|
|
I reqIds(nReqs), mpiRC ) |
246 |
|
|
nReqs = nReqs + 1 |
247 |
|
|
tagRecv = mpiTagN*nThreads+myThrS(myThid) |
248 |
|
|
fromPid = mpiPidS |
249 |
|
|
elCount = 1 |
250 |
|
|
elType = mpiTypeYFaceThread_xy_r4(myThid) |
251 |
|
|
CALL MPI_Irecv( |
252 |
|
|
I phi(1-OLx,1-OLy,myBxLo(myThid),bN), |
253 |
|
|
I elCount, elType, fromPid, |
254 |
|
|
I tagRecv,MPI_COMM_WORLD, |
255 |
|
|
I reqIds(nReqs), mpiRC ) |
256 |
|
|
ENDIF |
257 |
|
|
C |
258 |
|
|
bS = myByHi(myTHid) |
259 |
adcroft |
1.5 |
IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN |
260 |
cnh |
1.1 |
C My north face uses MPI. Get ready to receive data from |
261 |
|
|
C north and post that I am ready to send data to the north. |
262 |
|
|
nReqs = nReqs + 1 |
263 |
|
|
tagSend = mpiTagN*nThreads+myThid |
264 |
|
|
toPid = mpiPidN |
265 |
|
|
elCount = 1 |
266 |
|
|
elType = mpiTypeYFaceThread_xy_r4(myThid) |
267 |
|
|
CALL MPI_Isend( |
268 |
|
|
I phi(1-OLx,sNy-OLy+1,myBxLo(myThid),bS), |
269 |
|
|
I elCount, elType, toPid, |
270 |
|
|
I tagSend,MPI_COMM_WORLD, |
271 |
|
|
I reqIds(nReqs), mpiRC ) |
272 |
|
|
nReqs = nReqs + 1 |
273 |
|
|
tagRecv = mpiTagS*nThreads+myThrN(myThid) |
274 |
|
|
fromPid = mpiPidN |
275 |
|
|
elCount = 1 |
276 |
|
|
elType = mpiTypeYFaceThread_xy_r4(myThid) |
277 |
|
|
CALL MPI_Irecv( |
278 |
|
|
I phi(1-OLx,sNy+1,myBxLo(myThid),bS), |
279 |
|
|
I elCount, elType, fromPid, |
280 |
|
|
I tagRecv,MPI_COMM_WORLD, |
281 |
|
|
I reqIds(nReqs), mpiRC ) |
282 |
|
|
ENDIF |
283 |
|
|
C Wait for this threads north-south transactions to finish. |
284 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
285 |
|
|
|
286 |
|
|
#ifndef ALWAYS_USE_MPI |
287 |
|
|
ENDIF |
288 |
|
|
#endif |
289 |
|
|
#endif /* ALLOW_USE_MPI */ |
290 |
|
|
|
291 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
292 |
|
|
ENDIF |
293 |
|
|
#endif |
294 |
|
|
#endif /* ALLOW_SYNC_COMMUNICATION */ |
295 |
|
|
|
296 |
|
|
C-- Do true shared-memory data exchanges |
297 |
|
|
C Note this is also doing the edge copies for a regular serial |
298 |
|
|
C code. |
299 |
|
|
C-- First make sure all threads have reached here |
300 |
|
|
_BARRIER |
301 |
|
|
|
302 |
|
|
C-- Now do copies |
303 |
|
|
C Notes: |
304 |
|
|
C ====== |
305 |
|
|
C Here we copy from up to and including the overlap |
306 |
|
|
C regions for both the X shift and the Y shift. This |
307 |
|
|
C catches the "corners" of the data copied using messages |
308 |
|
|
C in the "synchronous communication" section above. |
309 |
|
|
C As coded here we also write to a remote region in |
310 |
|
|
C one direction. In some situations it may be better only |
311 |
|
|
C to write to our own overlaps - could be the case with |
312 |
|
|
C Wildfire replication. This should be chamged once everything |
313 |
|
|
C is working OK. |
314 |
|
|
C |
315 |
|
|
C-- x-axis exchanges |
316 |
|
|
bE = myBxLo(myThid) |
317 |
|
|
IF ( bE .NE. 1 .OR. |
318 |
|
|
& commW(myThid) .EQ. COMM_SHARED ) THEN |
319 |
|
|
bW = bE-1 |
320 |
|
|
IF ( bW .LT. 1 ) bW = nSx |
321 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
322 |
|
|
DO J=1-OLy,sNy+OLy |
323 |
|
|
DO I=1,OLx |
324 |
|
|
phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj) |
325 |
|
|
phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj) |
326 |
|
|
ENDDO |
327 |
|
|
ENDDO |
328 |
|
|
ENDDO |
329 |
|
|
ENDIF |
330 |
|
|
DO bE=myBxLo(myThid)+1,myBxHi(myThid) |
331 |
|
|
bW = bE-1 |
332 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
333 |
|
|
DO J=1-OLy,sNy+OLy |
334 |
|
|
DO I=1,OLx |
335 |
|
|
phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj) |
336 |
|
|
phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj) |
337 |
|
|
ENDDO |
338 |
|
|
ENDDO |
339 |
|
|
ENDDO |
340 |
|
|
ENDDO |
341 |
|
|
|
342 |
|
|
C Need to ensure all threads have completed x-axis transfers before |
343 |
|
|
C we can do y-axis exchanges. |
344 |
|
|
_BARRIER |
345 |
|
|
|
346 |
|
|
C-- y-axis exchanges |
347 |
|
|
bN = myByLo(myThid) |
348 |
|
|
IF ( bN .NE. 1 .OR. |
349 |
|
|
& commS(myThid) .EQ. COMM_SHARED ) THEN |
350 |
|
|
bS = bN - 1 |
351 |
|
|
IF ( bS .LT. 1 ) bS = nSy |
352 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
353 |
|
|
DO J=1,OLy |
354 |
|
|
DO I=1-OLx,sNx+OLx |
355 |
|
|
phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS) |
356 |
|
|
phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN) |
357 |
|
|
ENDDO |
358 |
|
|
ENDDO |
359 |
|
|
ENDDO |
360 |
|
|
ENDIF |
361 |
|
|
DO bN=myByLo(myThid)+1,myByHi(myThid) |
362 |
|
|
bS = bN - 1 |
363 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
364 |
|
|
DO J=1,OLy |
365 |
|
|
DO I=1-OLx,sNx+OLx |
366 |
|
|
phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS) |
367 |
|
|
phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN) |
368 |
|
|
ENDDO |
369 |
|
|
ENDDO |
370 |
|
|
ENDDO |
371 |
|
|
ENDDO |
372 |
|
|
|
373 |
|
|
_BARRIER |
374 |
|
|
|
375 |
|
|
RETURN |
376 |
|
|
END |
377 |
|
|
|
378 |
|
|
CStartOfInterface |
379 |
|
|
SUBROUTINE EXCH_XYZ_R4( |
380 |
|
|
U phi, |
381 |
|
|
I myThid ) |
382 |
|
|
C /==========================================================\ |
383 |
|
|
C | SUBROUTINE EXCH_XYZ_R4 | |
384 |
|
|
C | o Handle exchanges for real*4, three-dimensional arrays. | |
385 |
|
|
C |==========================================================| |
386 |
|
|
C | Do true shared-memory data transfers and "messaging" | |
387 |
|
|
C | tranfers for blocking case of data transfers. | |
388 |
|
|
C | Applications call this routine using | |
389 |
|
|
C | CALL EXCH..( x, myThid ) | |
390 |
|
|
C | where x is a three-dimensional array with overlaps. | |
391 |
|
|
C | This routine does true-shared-memory copies for blocks | |
392 |
|
|
C | within a thread. It will also do MPI meesaging between | |
393 |
|
|
C | different processes. | |
394 |
|
|
C | Note: | |
395 |
|
|
C | ===== | |
396 |
|
|
C | If it is used, "asynchronous" messaging in which | |
397 |
|
|
C | communication overlaps computation is handled elsewhere | |
398 |
|
|
C | - see recv.F and send.F. | |
399 |
|
|
C | In practice MPI implementations may not be completely | |
400 |
|
|
C | thread safe. In principle this code is correct even for | |
401 |
|
|
C | mixed MPI and multi-threaded execution. | |
402 |
|
|
C | In multi-thread execution data managed by other threads | |
403 |
|
|
C | is only automatically visible to another thread if it | |
404 |
|
|
C | existed before the thread was created. This means that | |
405 |
|
|
C | for Fortran we declare arrays with overlap regions in | |
406 |
|
|
C | COMMON blocks. | |
407 |
|
|
C \==========================================================/ |
408 |
|
|
|
409 |
|
|
C === Global data === |
410 |
|
|
#include "SIZE.h" |
411 |
|
|
#include "EEPARAMS.h" |
412 |
|
|
#include "EESUPPORT.h" |
413 |
|
|
|
414 |
|
|
C === Routine arguments === |
415 |
|
|
C phi - Array who's overlap regions are to be exchanged |
416 |
|
|
C myThid - My thread id. |
417 |
|
|
Real*4 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,1:Nz,nSx,nSy) |
418 |
|
|
INTEGER myThid |
419 |
|
|
CEndOfInterface |
420 |
|
|
|
421 |
|
|
C === Local variables === |
422 |
|
|
C bi,bj - Outer loop counters |
423 |
|
|
C I,J,K - Inner loop counters |
424 |
|
|
INTEGER bi, bj |
425 |
|
|
INTEGER I, J, K |
426 |
|
|
#ifdef ALLOW_USE_MPI |
427 |
|
|
C tagSend - Tags used to mark and select messages |
428 |
|
|
C tagRecv |
429 |
|
|
C nReqs - MPI request counter. |
430 |
|
|
C reqIds - Outstanding requests to wait on. |
431 |
|
|
C mpiRC - MPI return code |
432 |
|
|
C mpiStatArr - Multi-request status reporting array. |
433 |
|
|
C toPid - Proc. id sending to |
434 |
|
|
C fromPid - Proc. id receving from |
435 |
|
|
C elCount - Number of items to send or receive |
436 |
|
|
C elType - Data type of elements |
437 |
|
|
INTEGER tagSend |
438 |
|
|
INTEGER tagRecv |
439 |
|
|
INTEGER nReqs |
440 |
|
|
INTEGER reqIds(4) |
441 |
|
|
INTEGER mpiRC |
442 |
|
|
INTEGER mpiStatArr(MPI_STATUS_SIZE,4) |
443 |
|
|
INTEGER toPid |
444 |
|
|
INTEGER fromPid |
445 |
|
|
INTEGER elCount |
446 |
|
|
INTEGER elType |
447 |
|
|
#endif /* ALLOW_USE_MPI */ |
448 |
|
|
C bE, bW - Block index to east, west, north and |
449 |
|
|
C bN, bS south. |
450 |
|
|
INTEGER bW, bE, bS, bN |
451 |
|
|
|
452 |
|
|
C-- Do "message-passing" data exchanges |
453 |
|
|
#ifdef ALLOW_SYNC_COMMUNICATION |
454 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
455 |
|
|
IF ( usingSyncMessages ) THEN |
456 |
|
|
#endif |
457 |
|
|
|
458 |
|
|
C-- MPI based data exchages |
459 |
|
|
#ifdef ALLOW_USE_MPI |
460 |
|
|
#ifndef ALWAYS_USE_MPI |
461 |
|
|
IF ( usingMPI ) THEN |
462 |
|
|
#endif |
463 |
|
|
|
464 |
|
|
C Here use the approach that all the X shifts are completed |
465 |
|
|
C before the Y shifts start. Then we can expand our Y shifts |
466 |
|
|
C to include X overlap regions. This means we don't have to |
467 |
|
|
C transfer corners separately. |
468 |
|
|
C Notes: |
469 |
|
|
C ====== |
470 |
|
|
C 1. If we are only using messages in Y and using "true shared-memory" |
471 |
|
|
C in X then we rely on the shared-memory code to be implemented so that |
472 |
|
|
C corner values will be set correctly. The true shared-memory code |
473 |
|
|
C is in the block below - so be careful when altering it! |
474 |
|
|
C 2. In order to ensure we grab the right message we need to tag |
475 |
|
|
C messages. We tag the messages with the direction in which they |
476 |
|
|
C were sent out. Thus to receive from a processor to our west we |
477 |
|
|
C request the next message from that processor that was sent out |
478 |
|
|
C with a tag indicating an easterly send. |
479 |
|
|
C 3. We could implement a "safe" code here by having everyone |
480 |
|
|
C sync and then getting a particular thread ( say thread 1 ) to |
481 |
|
|
C do the MPI_Isend sequentially. In this mode the "communication" |
482 |
|
|
C thread would loop over nThreads and do MPI_Sendrecv for each |
483 |
|
|
C one. This could be more efficient on some platforms as the messages |
484 |
|
|
C might be sent as one large unit rather than by separate threads. |
485 |
|
|
C It would also be "thread-safe" for MPI implementations that have |
486 |
|
|
C multi-threading problems with things like request ids. |
487 |
|
|
C 4. We include overlap regions in both X and Y sends. |
488 |
|
|
C This means we can interchange the Y block with the X block. |
489 |
|
|
C Not sure whether this will ever be useful! |
490 |
|
|
C 5. The generation of a request handle by MPI_Isend and |
491 |
|
|
C MPI_Irecv ouught to involve some global state within MPI. |
492 |
|
|
C If this process is not thread safe it may be necessary to |
493 |
|
|
C enable a critical section around the MPI_Isend and |
494 |
|
|
C MPI_Irecv calls. |
495 |
|
|
|
496 |
|
|
C We need a barrier here to synchronise threads otherwise |
497 |
|
|
C one thread might declare it has is ready to send and receive |
498 |
|
|
C even though another thread is going to write to the first |
499 |
|
|
C threads send or receive region. This won't happen, however, |
500 |
|
|
C if we are careful about which data a thread updates - but at |
501 |
|
|
C the moment we aren't careful! |
502 |
|
|
_BARRIER |
503 |
|
|
|
504 |
|
|
C-- East-west messaging communication |
505 |
|
|
nReqs = 0 |
506 |
|
|
bE = myBxLo(myThid) |
507 |
|
|
IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN |
508 |
|
|
C My west face uses MPI. Get ready to receive data from |
509 |
|
|
C west and post that we are ready to send data to the west. |
510 |
|
|
C Data we receive has to come with tag of thread we know is |
511 |
|
|
C sending from our west, data we send is tagged with our thread id |
512 |
|
|
C and the send direction. |
513 |
|
|
nReqs = nReqs+1 |
514 |
|
|
tagSend = mpiTagW*nThreads+myThid |
515 |
|
|
toPid = mpiPidW |
516 |
|
|
elCount = 1 |
517 |
|
|
elType = mpiTypeXFaceThread_xyz_r4(myThid) |
518 |
|
|
CALL MPI_Isend( |
519 |
|
|
I phi(1,1-OLy,1,bE,myByLo(myThid)), |
520 |
|
|
I elCount, elType, toPid, |
521 |
|
|
I tagSend,MPI_COMM_WORLD, |
522 |
|
|
I reqIds(nReqs),mpiRC) |
523 |
|
|
nReqs = nReqs+1 |
524 |
|
|
tagRecv = mpiTagE*nThreads+myThrW(myThid) |
525 |
|
|
fromPid = mpiPidW |
526 |
|
|
elCount = 1 |
527 |
|
|
elType = mpiTypeXFaceThread_xyz_r4(myThid) |
528 |
|
|
CALL MPI_Irecv( |
529 |
|
|
U phi(1-OLx,1-OLy,1,bE,myByLo(myThid)), |
530 |
|
|
I elCount, elType, fromPid, |
531 |
|
|
I tagRecv,MPI_COMM_WORLD, |
532 |
|
|
I reqIds(nReqs),mpiRC) |
533 |
|
|
ENDIF |
534 |
|
|
bW = myBxHi(myThid) |
535 |
|
|
IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN |
536 |
|
|
C My east face uses MPI. Get ready to receive data from |
537 |
|
|
C east and post that we are ready to send data to the east. |
538 |
|
|
C Data we receive has to come with tag of thread we know is |
539 |
|
|
C sending from our west, data we send is tagged with our thread id. |
540 |
|
|
nReqs = nReqs+1 |
541 |
|
|
tagSend = mpiTagE*nThreads+myThid |
542 |
|
|
toPid = mpiPidE |
543 |
|
|
elCount = 1 |
544 |
|
|
elType = mpiTypeXFaceThread_xyz_r4(myThid) |
545 |
|
|
CALL MPI_Isend( |
546 |
|
|
I phi(sNx-OLx+1,1-OLy,1,bW,myByLo(myThid)), |
547 |
|
|
I elCount, elType, toPid, |
548 |
|
|
I tagSend,MPI_COMM_WORLD, |
549 |
|
|
I reqIds(nReqs),mpiRC) |
550 |
|
|
nReqs = nReqs+1 |
551 |
|
|
tagRecv = mpiTagW*nThreads+myThrE(myThid) |
552 |
|
|
fromPid = mpiPidE |
553 |
|
|
elCount = 1 |
554 |
|
|
elType = mpiTypeXFaceThread_xyz_r4(myThid) |
555 |
|
|
CALL MPI_Irecv( |
556 |
|
|
U phi(sNx+1,1-OLy,1,bW,myByLo(myThid)), |
557 |
|
|
I elCount, elType, fromPid, |
558 |
|
|
I tagRecv,MPI_COMM_WORLD, |
559 |
|
|
I reqIds(nReqs),mpiRC) |
560 |
|
|
ENDIF |
561 |
|
|
C Wait for this threads east-west transactions to finish before |
562 |
|
|
C posting north-south transactions. We have to do this so that |
563 |
|
|
C the north-south transactions will send out the correct overlap |
564 |
|
|
C region values into the corner sections of our neighbors. |
565 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
566 |
cnh |
1.4 |
CcnhDebugStarts |
567 |
adcroft |
1.5 |
C RETURN |
568 |
cnh |
1.4 |
CcnhDebugEnds |
569 |
|
|
|
570 |
cnh |
1.1 |
|
571 |
|
|
C-- North-south messaging communication |
572 |
|
|
nReqs = 0 |
573 |
|
|
bN = myByLo(myTHid) |
574 |
|
|
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
575 |
|
|
C My south face uses MPI. Get ready to receive data from |
576 |
|
|
C south and post that I am ready to send data to the south. |
577 |
|
|
nReqs = nReqs + 1 |
578 |
|
|
tagSend = mpiTagS*nThreads+myThid |
579 |
|
|
toPid = mpiPidS |
580 |
|
|
elCount = 1 |
581 |
|
|
elType = mpiTypeYFaceThread_xyz_r4(myThid) |
582 |
|
|
CALL MPI_Isend( |
583 |
|
|
I phi(1-OLx,1,1,myBxLo(myThid),bN), |
584 |
|
|
I elCount, elType, toPid, |
585 |
|
|
I tagSend,MPI_COMM_WORLD, |
586 |
|
|
I reqIds(nReqs), mpiRC ) |
587 |
|
|
nReqs = nReqs + 1 |
588 |
|
|
tagRecv = mpiTagN*nThreads+myThrS(myThid) |
589 |
|
|
fromPid = mpiPidS |
590 |
|
|
elCount = 1 |
591 |
|
|
elType = mpiTypeYFaceThread_xyz_r4(myThid) |
592 |
|
|
CALL MPI_Irecv( |
593 |
|
|
I phi(1-OLx,1-OLy,1,myBxLo(myThid),bN), |
594 |
|
|
I elCount, elType, fromPid, |
595 |
|
|
I tagRecv,MPI_COMM_WORLD, |
596 |
|
|
I reqIds(nReqs), mpiRC ) |
597 |
|
|
ENDIF |
598 |
|
|
C |
599 |
|
|
bS = myByHi(myTHid) |
600 |
|
|
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
601 |
|
|
C My north face uses MPI. Get ready to receive data from |
602 |
|
|
C north and post that I am ready to send data to the north. |
603 |
|
|
nReqs = nReqs + 1 |
604 |
|
|
tagSend = mpiTagN*nThreads+myThid |
605 |
|
|
toPid = mpiPidN |
606 |
|
|
elCount = 1 |
607 |
|
|
elType = mpiTypeYFaceThread_xyz_r4(myThid) |
608 |
|
|
CALL MPI_Isend( |
609 |
|
|
I phi(1-OLx,sNy-OLy+1,1,myBxLo(myThid),bS), |
610 |
|
|
I elCount, elType, toPid, |
611 |
|
|
I tagSend,MPI_COMM_WORLD, |
612 |
|
|
I reqIds(nReqs), mpiRC ) |
613 |
|
|
nReqs = nReqs + 1 |
614 |
|
|
tagRecv = mpiTagS*nThreads+myThrN(myThid) |
615 |
|
|
fromPid = mpiPidN |
616 |
|
|
elCount = 1 |
617 |
|
|
elType = mpiTypeYFaceThread_xyz_r4(myThid) |
618 |
|
|
CALL MPI_Irecv( |
619 |
|
|
I phi(1-OLx,sNy+1,1,myBxLo(myThid),bS), |
620 |
|
|
I elCount, elType, fromPid, |
621 |
|
|
I tagRecv,MPI_COMM_WORLD, |
622 |
|
|
I reqIds(nReqs), mpiRC ) |
623 |
|
|
ENDIF |
624 |
|
|
C Wait for this threads north-south transactions to finish. |
625 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
626 |
|
|
|
627 |
|
|
#ifndef ALWAYS_USE_MPI |
628 |
|
|
ENDIF |
629 |
|
|
#endif |
630 |
|
|
#endif /* ALLOW_USE_MPI */ |
631 |
|
|
|
632 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
633 |
|
|
ENDIF |
634 |
|
|
#endif |
635 |
|
|
#endif /* ALLOW_SYNC_COMMUNICATION */ |
636 |
|
|
|
637 |
|
|
C-- Do true shared-memory data exchanges |
638 |
|
|
C Note: This is also doing the overlap copies |
639 |
|
|
C for a single threaded code. |
640 |
|
|
C-- First make sure all threads have reached here |
641 |
|
|
_BARRIER |
642 |
|
|
|
643 |
|
|
C-- Now do copies |
644 |
|
|
C Notes: |
645 |
|
|
C ====== |
646 |
|
|
C Here we copy from up to and including the overlap |
647 |
|
|
C regions for both the X shift and the Y shift. This |
648 |
|
|
C catches the "corners" of the data copied using messages |
649 |
|
|
C in the "synchronous communication" section above. |
650 |
|
|
C-- x-axis exchanges |
651 |
|
|
bE = myBxLo(myThid) |
652 |
|
|
IF ( bE .NE. 1 .OR. |
653 |
|
|
& commW(myThid) .EQ. COMM_SHARED ) THEN |
654 |
|
|
bW = bE-1 |
655 |
|
|
IF ( bW .LT. 1 ) bW = nSx |
656 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
657 |
|
|
DO K=1,Nz |
658 |
|
|
DO J=1-OLy,sNy+OLy |
659 |
|
|
DO I=1,OLx |
660 |
|
|
phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj) |
661 |
|
|
phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj) |
662 |
|
|
ENDDO |
663 |
|
|
ENDDO |
664 |
|
|
ENDDO |
665 |
|
|
ENDDO |
666 |
|
|
ENDIF |
667 |
|
|
DO bE=myBxLo(myThid)+1,myBxHi(myThid) |
668 |
|
|
bW = bE-1 |
669 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
670 |
|
|
DO K=1,Nz |
671 |
|
|
DO J=1-OLy,sNy+OLy |
672 |
|
|
DO I=1,OLx |
673 |
|
|
phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj) |
674 |
|
|
phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj) |
675 |
|
|
ENDDO |
676 |
|
|
ENDDO |
677 |
|
|
ENDDO |
678 |
|
|
ENDDO |
679 |
|
|
ENDDO |
680 |
|
|
|
681 |
|
|
C Need to ensure all threads have completed x-axis transfers before |
682 |
|
|
C we can do y-axis exchanges. |
683 |
|
|
_BARRIER |
684 |
|
|
|
685 |
|
|
C-- y-axis exchanges |
686 |
|
|
bN = myByLo(myThid) |
687 |
|
|
IF ( bN .NE. 1 .OR. |
688 |
|
|
& commS(myThid) .EQ. COMM_SHARED ) THEN |
689 |
|
|
bS = bN - 1 |
690 |
|
|
IF ( bS .LT. 1 ) bS = nSy |
691 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
692 |
|
|
DO K=1,Nz |
693 |
|
|
DO J=1,OLy |
694 |
|
|
DO I=1-OLx,sNx+OLx |
695 |
|
|
phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS) |
696 |
|
|
phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN) |
697 |
|
|
ENDDO |
698 |
|
|
ENDDO |
699 |
|
|
ENDDO |
700 |
|
|
ENDDO |
701 |
|
|
ENDIF |
702 |
|
|
DO bN=myByLo(myThid)+1,myByHi(myThid) |
703 |
|
|
bS = bN - 1 |
704 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
705 |
|
|
DO K=1,Nz |
706 |
|
|
DO J=1,OLy |
707 |
|
|
DO I=1-OLx,sNx+OLx |
708 |
|
|
phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS) |
709 |
|
|
phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN) |
710 |
|
|
ENDDO |
711 |
|
|
ENDDO |
712 |
|
|
ENDDO |
713 |
|
|
ENDDO |
714 |
|
|
ENDDO |
715 |
|
|
|
716 |
|
|
_BARRIER |
717 |
|
|
|
718 |
|
|
RETURN |
719 |
|
|
END |
720 |
|
|
|
721 |
|
|
CStartOfInterface |
722 |
|
|
SUBROUTINE EXCH_XY_R8( |
723 |
|
|
U phi, |
724 |
|
|
I myThid ) |
725 |
|
|
C /==========================================================\ |
726 |
|
|
C | SUBROUTINE EXCH_XY_R8 | |
727 |
|
|
C | o Handle exchanges for real*8, two-dimensional arrays. | |
728 |
|
|
C |==========================================================| |
729 |
|
|
C | Do true shared-memory data transfers and "messaging" | |
730 |
|
|
C | tranfers for blocking case of data transfers. | |
731 |
|
|
C | Applications call this routine using | |
732 |
|
|
C | CALL EXCH..( x, myThid ) | |
733 |
|
|
C | where x is a two-dimensional array with overlaps. | |
734 |
|
|
C | This routine does true-shared-memory copies for blocks | |
735 |
|
|
C | within a thread. It will also do MPI meesaging between | |
736 |
|
|
C | different processes. | |
737 |
|
|
C | Note: | |
738 |
|
|
C | ===== | |
739 |
|
|
C | If it is used, "asynchronous" messaging in which | |
740 |
|
|
C | communication overlaps computation is handled elsewhere | |
741 |
|
|
C | - see recv.F and send.F. | |
742 |
|
|
C | In practice MPI implementations may not be completely | |
743 |
|
|
C | thread safe. In principle this code is correct even for | |
744 |
|
|
C | mixed MPI and multi-threaded execution. | |
745 |
|
|
C | In multi-thread execution data managed by other threads | |
746 |
|
|
C | is only automatically visible to another thread if it | |
747 |
|
|
C | existed before the thread was created. This means that | |
748 |
|
|
C | all Fortran we declare arrays with overlap regions in | |
749 |
|
|
C | COMMON blocks. | |
750 |
|
|
C \==========================================================/ |
751 |
|
|
|
752 |
|
|
C === Global data === |
753 |
|
|
#include "SIZE.h" |
754 |
|
|
#include "EEPARAMS.h" |
755 |
|
|
#include "EESUPPORT.h" |
756 |
|
|
|
757 |
|
|
C === Routine arguments === |
758 |
|
|
C phi - Array who's overlap regions are to be exchanged |
759 |
|
|
C myThid - My thread id. |
760 |
|
|
Real*8 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) |
761 |
|
|
INTEGER myThid |
762 |
|
|
CEndOfInterface |
763 |
|
|
|
764 |
|
|
C === Local variables === |
765 |
|
|
C bi,bj - Outer loop counters |
766 |
|
|
C I,J,K - Inner loop counters |
767 |
|
|
INTEGER bi, bj |
768 |
|
|
INTEGER I, J |
769 |
|
|
#ifdef ALLOW_USE_MPI |
770 |
|
|
C tagSend - Tags used to mark and select messages |
771 |
|
|
C tagRecv |
772 |
|
|
C nReqs - MPI request counter. |
773 |
|
|
C reqIds - Outstanding requests to wait on. |
774 |
|
|
C mpiRC - MPI return code |
775 |
|
|
C mpiStatArr - Multi-request status reporting array. |
776 |
|
|
C toPid - Proc. id sending to |
777 |
|
|
C fromPid - Proc. id receving from |
778 |
|
|
C elCount - Number of items to send or receive |
779 |
|
|
C elType - Datat type of elements |
780 |
|
|
INTEGER tagSend |
781 |
|
|
INTEGER tagRecv |
782 |
|
|
INTEGER nReqs |
783 |
|
|
INTEGER reqIds(4) |
784 |
|
|
INTEGER mpiRC |
785 |
|
|
INTEGER mpiStatArr(MPI_STATUS_SIZE,4) |
786 |
|
|
INTEGER toPid |
787 |
|
|
INTEGER fromPid |
788 |
|
|
INTEGER elCount |
789 |
|
|
INTEGER elType |
790 |
|
|
#endif /* ALLOW_USE_MPI */ |
791 |
|
|
C bE, bW - Block index to east, west, north and |
792 |
|
|
C bN, bS south. |
793 |
|
|
INTEGER bW, bE, bS, bN |
794 |
|
|
|
795 |
|
|
C-- Do "message-passing" data exchanges |
796 |
|
|
#ifdef ALLOW_SYNC_COMMUNICATION |
797 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
798 |
|
|
IF ( usingSyncMessages ) THEN |
799 |
|
|
#endif |
800 |
|
|
|
801 |
|
|
C-- MPI based data exchages |
802 |
|
|
#ifdef ALLOW_USE_MPI |
803 |
|
|
#ifndef ALWAYS_USE_MPI |
804 |
|
|
IF ( usingMPI ) THEN |
805 |
|
|
#endif |
806 |
|
|
|
807 |
|
|
C Here use the approach that all the X shifts are completed |
808 |
|
|
C before the Y shifts start. Then we can expand our Y shifts |
809 |
|
|
C to include X overlap regions. This means we don't have to |
810 |
|
|
C transfer corners separately. |
811 |
|
|
C Notes: |
812 |
|
|
C ====== |
813 |
|
|
C 1. If we are only using messages in Y and using "true shared-memory" |
814 |
|
|
C in X then we rely on the shared-memory code to be implemented so that |
815 |
|
|
C corner values will be set correctly. The true shared-memory code |
816 |
|
|
C is in the block below - so be careful when altering it! |
817 |
|
|
C 2. In order to ensure we grab the right message we need to tag |
818 |
|
|
C messages. We tag the messages with the direction in which they |
819 |
|
|
C were sent out. Thus to receive from a processor to our west we |
820 |
|
|
C request the next message from that processor that was sent out |
821 |
|
|
C with a tag indicating an easterly send. |
822 |
|
|
C 3. We could implement a "safe" code here by having everyone |
823 |
|
|
C sync and then getting a particular thread ( say thread 1 ) to |
824 |
|
|
C do the MPI_Isend sequentially. In this mode the "communication" |
825 |
|
|
C thread would loop over nThreads and do MPI_Sendrecv for each |
826 |
|
|
C one. This could be more efficient on some platforms as the messages |
827 |
|
|
C might be sent as one large unit rather than by separate threads. |
828 |
|
|
C It would also be "thread-safe" for MPI implementations that have |
829 |
|
|
C multi-threading problems with things like request ids. |
830 |
|
|
C 4. We include overlap regions in both X and Y sends. |
831 |
|
|
C This means we can interchange the Y block with the X block. |
832 |
|
|
C Not sure whether this will ever be useful! |
833 |
|
|
C 5. The generation of a request handle by MPI_Isend and |
834 |
|
|
C MPI_Irecv ouught to involve some global state within MPI. |
835 |
|
|
C If this process is not thread safe it may be necessary to |
836 |
|
|
C enable a critical section around the MPI_Isend and |
837 |
|
|
C MPI_Irecv calls. |
838 |
|
|
|
839 |
|
|
C We need a barrier here to synchronise threads otherwise |
840 |
|
|
C one thread might declare it has is ready to send and receive |
841 |
|
|
C even though another thread is going to write to the first |
842 |
|
|
C threads send or receive region. This won't happen, however, |
843 |
|
|
C if we are careful about which data a thread updates - but at |
844 |
|
|
C the moment we aren't careful! |
845 |
|
|
_BARRIER |
846 |
|
|
|
847 |
|
|
C-- East-west messaging communication |
848 |
|
|
nReqs = 0 |
849 |
|
|
bE = myBxLo(myThid) |
850 |
|
|
IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN |
851 |
|
|
C My west face uses MPI. Get ready to receive data from |
852 |
|
|
C west and post that we are ready to send data to the west. |
853 |
|
|
C Data we receive has to come with tag of thread we know is |
854 |
|
|
C sending from our west, data we send is tagged with our thread id |
855 |
|
|
C and the send direction. |
856 |
|
|
nReqs = nReqs+1 |
857 |
|
|
tagSend = mpiTagW*nThreads+myThid |
858 |
|
|
toPid = mpiPidW |
859 |
|
|
elCount = 1 |
860 |
|
|
elType = mpiTypeXFaceThread_xy_R8(myThid) |
861 |
|
|
C I tagSend,MPI_COMM_WORLD, |
862 |
|
|
CALL MPI_Isend( |
863 |
|
|
I phi(1,1-OLy,bE,myByLo(myThid)), |
864 |
|
|
I elCount, elType, toPid, |
865 |
|
|
I tagSend,MPI_COMM_WORLD, |
866 |
|
|
I reqIds(nReqs),mpiRC) |
867 |
|
|
nReqs = nReqs+1 |
868 |
|
|
tagRecv = mpiTagE*nThreads+myThrW(myThid) |
869 |
|
|
fromPid = mpiPidW |
870 |
|
|
elCount = 1 |
871 |
|
|
elType = mpiTypeXFaceThread_xy_R8(myThid) |
872 |
|
|
CALL MPI_Irecv( |
873 |
|
|
U phi(1-OLx,1-OLy,bE,myByLo(myThid)), |
874 |
|
|
I elCount, elType, fromPid, |
875 |
|
|
I tagRecv,MPI_COMM_WORLD, |
876 |
|
|
I reqIds(nReqs),mpiRC) |
877 |
|
|
ENDIF |
878 |
|
|
bW = myBxHi(myThid) |
879 |
|
|
IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN |
880 |
|
|
C My east face uses MPI. Get ready to receive data from |
881 |
|
|
C east and post that we are ready to send data to the east. |
882 |
|
|
C Data we receive has to come with tag of thread we know is |
883 |
|
|
C sending from our west, data we send is tagged with our thread id. |
884 |
|
|
nReqs = nReqs+1 |
885 |
|
|
tagSend = mpiTagE*nThreads+myThid |
886 |
|
|
toPid = mpiPidE |
887 |
|
|
elCount = 1 |
888 |
|
|
elType = mpiTypeXFaceThread_xy_R8(myThid) |
889 |
|
|
CALL MPI_Isend( |
890 |
|
|
I phi(sNx-OLx+1,1-OLy,bW,myByLo(myThid)), |
891 |
|
|
I elCount, elType, toPid, |
892 |
|
|
I tagSend,MPI_COMM_WORLD, |
893 |
|
|
I reqIds(nReqs),mpiRC) |
894 |
|
|
nReqs = nReqs+1 |
895 |
|
|
tagRecv = mpiTagW*nThreads+myThrE(myThid) |
896 |
|
|
fromPid = mpiPidE |
897 |
|
|
elCount = 1 |
898 |
|
|
elType = mpiTypeXFaceThread_xy_R8(myThid) |
899 |
|
|
CALL MPI_Irecv( |
900 |
|
|
U phi(sNx+1,1-OLy,bW,myByLo(myThid)), |
901 |
|
|
I elCount, elType, fromPid, |
902 |
|
|
I tagRecv,MPI_COMM_WORLD, |
903 |
|
|
I reqIds(nReqs),mpiRC) |
904 |
|
|
ENDIF |
905 |
|
|
C Wait for this threads east-west transactions to finish before |
906 |
|
|
C posting north-south transactions. We have to do this so that |
907 |
|
|
C the north-south transactions will send out the correct overlap |
908 |
|
|
C region values into the corner sections of our neighbors. |
909 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
910 |
|
|
|
911 |
cnh |
1.4 |
CcnhDebugStarts |
912 |
adcroft |
1.5 |
C RETURN |
913 |
cnh |
1.4 |
CcnhDebugEnds |
914 |
|
|
|
915 |
|
|
|
916 |
cnh |
1.1 |
C-- North-south messaging communication |
917 |
|
|
nReqs = 0 |
918 |
|
|
bN = myByLo(myTHid) |
919 |
|
|
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
920 |
|
|
C My south face uses MPI. Get ready to receive data from |
921 |
|
|
C south and post that I am ready to send data to the south. |
922 |
|
|
nReqs = nReqs + 1 |
923 |
|
|
tagSend = mpiTagS*nThreads+myThid |
924 |
|
|
toPid = mpiPidS |
925 |
|
|
elCount = 1 |
926 |
|
|
elType = mpiTypeYFaceThread_xy_R8(myThid) |
927 |
|
|
CALL MPI_Isend( |
928 |
|
|
I phi(1-OLx,1,myBxLo(myThid),bN), |
929 |
|
|
I elCount, elType, toPid, |
930 |
|
|
I tagSend,MPI_COMM_WORLD, |
931 |
|
|
I reqIds(nReqs), mpiRC ) |
932 |
|
|
nReqs = nReqs + 1 |
933 |
|
|
tagRecv = mpiTagN*nThreads+myThrS(myThid) |
934 |
|
|
fromPid = mpiPidS |
935 |
|
|
elCount = 1 |
936 |
|
|
elType = mpiTypeYFaceThread_xy_R8(myThid) |
937 |
|
|
CALL MPI_Irecv( |
938 |
|
|
I phi(1-OLx,1-OLy,myBxLo(myThid),bN), |
939 |
|
|
I elCount, elType, fromPid, |
940 |
|
|
I tagRecv,MPI_COMM_WORLD, |
941 |
|
|
I reqIds(nReqs), mpiRC ) |
942 |
|
|
ENDIF |
943 |
|
|
C |
944 |
|
|
bS = myByHi(myTHid) |
945 |
adcroft |
1.5 |
IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN |
946 |
cnh |
1.1 |
C My north face uses MPI. Get ready to receive data from |
947 |
|
|
C north and post that I am ready to send data to the north. |
948 |
|
|
nReqs = nReqs + 1 |
949 |
|
|
tagSend = mpiTagN*nThreads+myThid |
950 |
|
|
toPid = mpiPidN |
951 |
|
|
elCount = 1 |
952 |
|
|
elType = mpiTypeYFaceThread_xy_R8(myThid) |
953 |
|
|
CALL MPI_Isend( |
954 |
|
|
I phi(1-OLx,sNy-OLy+1,myBxLo(myThid),bS), |
955 |
|
|
I elCount, elType, toPid, |
956 |
|
|
I tagSend,MPI_COMM_WORLD, |
957 |
|
|
I reqIds(nReqs), mpiRC ) |
958 |
|
|
nReqs = nReqs + 1 |
959 |
|
|
tagRecv = mpiTagS*nThreads+myThrN(myThid) |
960 |
|
|
fromPid = mpiPidN |
961 |
|
|
elCount = 1 |
962 |
|
|
elType = mpiTypeYFaceThread_xy_R8(myThid) |
963 |
|
|
CALL MPI_Irecv( |
964 |
|
|
I phi(1-OLx,sNy+1,myBxLo(myThid),bS), |
965 |
|
|
I elCount, elType, fromPid, |
966 |
|
|
I tagRecv,MPI_COMM_WORLD, |
967 |
|
|
I reqIds(nReqs), mpiRC ) |
968 |
|
|
ENDIF |
969 |
|
|
C Wait for this threads north-south transactions to finish. |
970 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
971 |
|
|
|
972 |
|
|
#ifndef ALWAYS_USE_MPI |
973 |
|
|
ENDIF |
974 |
|
|
#endif |
975 |
|
|
#endif /* ALLOW_USE_MPI */ |
976 |
|
|
|
977 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
978 |
|
|
ENDIF |
979 |
|
|
#endif |
980 |
|
|
#endif /* ALLOW_SYNC_COMMUNICATION */ |
981 |
|
|
|
982 |
|
|
C-- Do true shared-memory data exchanges |
983 |
|
|
C Note: This section also does the overlap copies for serial |
984 |
|
|
C code overlap copies. |
985 |
|
|
C-- First make sure all threads have reached here |
986 |
|
|
_BARRIER |
987 |
|
|
|
988 |
|
|
C-- Now do copies |
989 |
|
|
C Notes: |
990 |
|
|
C ====== |
991 |
|
|
C Here we copy from up to and including the overlap |
992 |
|
|
C regions for both the X shift and the Y shift. This |
993 |
|
|
C catches the "corners" of the data copied using messages |
994 |
|
|
C in the "synchronous communication" section above. |
995 |
|
|
C-- x-axis exchanges |
996 |
|
|
bE = myBxLo(myThid) |
997 |
|
|
IF ( bE .NE. 1 .OR. |
998 |
|
|
& commW(myThid) .EQ. COMM_SHARED ) THEN |
999 |
|
|
bW = bE-1 |
1000 |
|
|
IF ( bW .LT. 1 ) bW = nSx |
1001 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
1002 |
|
|
DO J=1-OLy,sNy+OLy |
1003 |
|
|
DO I=1,OLx |
1004 |
|
|
phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj) |
1005 |
|
|
phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj) |
1006 |
|
|
ENDDO |
1007 |
|
|
ENDDO |
1008 |
|
|
ENDDO |
1009 |
|
|
ENDIF |
1010 |
|
|
DO bE=myBxLo(myThid)+1,myBxHi(myThid) |
1011 |
|
|
bW = bE-1 |
1012 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
1013 |
|
|
DO J=1-OLy,sNy+OLy |
1014 |
|
|
DO I=1,OLx |
1015 |
|
|
phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj) |
1016 |
|
|
phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj) |
1017 |
|
|
ENDDO |
1018 |
|
|
ENDDO |
1019 |
|
|
ENDDO |
1020 |
|
|
ENDDO |
1021 |
|
|
|
1022 |
|
|
C Need to ensure all threads have completed x-axis transfers before |
1023 |
|
|
C we can do y-axis exchanges. |
1024 |
|
|
_BARRIER |
1025 |
|
|
|
1026 |
|
|
C-- y-axis exchanges |
1027 |
|
|
bN = myByLo(myThid) |
1028 |
|
|
IF ( bN .NE. 1 .OR. |
1029 |
|
|
& commS(myThid) .EQ. COMM_SHARED ) THEN |
1030 |
|
|
bS = bN - 1 |
1031 |
|
|
IF ( bS .LT. 1 ) bS = nSy |
1032 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
1033 |
|
|
DO J=1,OLy |
1034 |
|
|
DO I=1-OLx,sNx+OLx |
1035 |
|
|
phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS) |
1036 |
|
|
phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN) |
1037 |
|
|
ENDDO |
1038 |
|
|
ENDDO |
1039 |
|
|
ENDDO |
1040 |
|
|
ENDIF |
1041 |
|
|
DO bN=myByLo(myThid)+1,myByHi(myThid) |
1042 |
|
|
bS = bN - 1 |
1043 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
1044 |
|
|
DO J=1,OLy |
1045 |
|
|
DO I=1-OLx,sNx+OLx |
1046 |
|
|
phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS) |
1047 |
|
|
phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN) |
1048 |
|
|
ENDDO |
1049 |
|
|
ENDDO |
1050 |
|
|
ENDDO |
1051 |
|
|
ENDDO |
1052 |
|
|
|
1053 |
|
|
_BARRIER |
1054 |
|
|
|
1055 |
|
|
RETURN |
1056 |
|
|
END |
1057 |
|
|
|
1058 |
|
|
CStartOfInterface |
1059 |
|
|
SUBROUTINE EXCH_XYZ_R8( |
1060 |
|
|
U phi, |
1061 |
|
|
I myThid ) |
1062 |
|
|
C /==========================================================\ |
1063 |
|
|
C | SUBROUTINE EXCH_XYZ_R8 | |
1064 |
|
|
C | o Handle exchanges for real*8, three-dimensional arrays. | |
1065 |
|
|
C |==========================================================| |
1066 |
|
|
C | Do true shared-memory data transfers and "messaging" | |
1067 |
|
|
C | tranfers for blocking case of data transfers. | |
1068 |
|
|
C | Applications call this routine using | |
1069 |
|
|
C | CALL EXCH..( x, myThid ) | |
1070 |
|
|
C | where x is a three-dimensional array with overlaps. | |
1071 |
|
|
C | This routine does true-shared-memory copies for blocks | |
1072 |
|
|
C | within a thread. It will also do MPI meesaging between | |
1073 |
|
|
C | different processes. | |
1074 |
|
|
C | Note: | |
1075 |
|
|
C | ===== | |
1076 |
|
|
C | If it is used, "asynchronous" messaging in which | |
1077 |
|
|
C | communication overlaps computation is handled elsewhere | |
1078 |
|
|
C | - see recv.F and send.F. | |
1079 |
|
|
C | In practice MPI implementations may not be completely | |
1080 |
|
|
C | thread safe. In principle this code is correct even for | |
1081 |
|
|
C | mixed MPI and multi-threaded execution. | |
1082 |
|
|
C | In multi-thread execution data managed by other threads | |
1083 |
|
|
C | is only automatically visible to another thread if it | |
1084 |
|
|
C | existed before the thread was created. This means that | |
1085 |
|
|
C | for Fortran we declare arrays with overlap regions in | |
1086 |
|
|
C | COMMON blocks. | |
1087 |
|
|
C \==========================================================/ |
1088 |
|
|
|
1089 |
|
|
C === Global data === |
1090 |
|
|
#include "SIZE.h" |
1091 |
|
|
#include "EEPARAMS.h" |
1092 |
|
|
#include "EESUPPORT.h" |
1093 |
|
|
|
1094 |
|
|
C === Routine arguments === |
1095 |
|
|
C phi - Array who's overlap regions are to be exchanged |
1096 |
|
|
C myThid - My thread id. |
1097 |
|
|
Real*8 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,1:Nz,nSx,nSy) |
1098 |
|
|
INTEGER myThid |
1099 |
|
|
CEndOfInterface |
1100 |
|
|
|
1101 |
|
|
C === Local variables === |
1102 |
|
|
C bi,bj - Outer loop counters |
1103 |
|
|
C I,J,K - Inner loop counters |
1104 |
|
|
INTEGER bi, bj |
1105 |
|
|
INTEGER I, J, K |
1106 |
|
|
#ifdef ALLOW_USE_MPI |
1107 |
|
|
C tagSend - Tags used to mark and select messages |
1108 |
|
|
C tagRecv |
1109 |
|
|
C nReqs - MPI request counter. |
1110 |
|
|
C reqIds - Outstanding requests to wait on. |
1111 |
|
|
C mpiRC - MPI return code |
1112 |
|
|
C mpiStatArr - Multi-request status reporting array. |
1113 |
|
|
C toPid - Proc. id sending to |
1114 |
|
|
C fromPid - Proc. id receving from |
1115 |
|
|
C elCount - Number of items to send or receive |
1116 |
|
|
C elType - Datat type of elements |
1117 |
|
|
INTEGER tagSend |
1118 |
|
|
INTEGER tagRecv |
1119 |
|
|
INTEGER nReqs |
1120 |
|
|
INTEGER reqIds(4) |
1121 |
|
|
INTEGER mpiRC |
1122 |
|
|
INTEGER mpiStatArr(MPI_STATUS_SIZE,4) |
1123 |
|
|
INTEGER toPid |
1124 |
|
|
INTEGER fromPid |
1125 |
|
|
INTEGER elCount |
1126 |
|
|
INTEGER elType |
1127 |
|
|
#endif /* ALLOW_USE_MPI */ |
1128 |
|
|
C bE, bW - Block index to east, west, north and |
1129 |
|
|
C bN, bS south. |
1130 |
|
|
INTEGER bW, bE, bS, bN |
1131 |
|
|
|
1132 |
|
|
C-- Do "message-passing" data exchanges |
1133 |
|
|
#ifdef ALLOW_SYNC_COMMUNICATION |
1134 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
1135 |
|
|
IF ( usingSyncMessages ) THEN |
1136 |
|
|
#endif |
1137 |
|
|
|
1138 |
|
|
C-- MPI based data exchages |
1139 |
|
|
#ifdef ALLOW_USE_MPI |
1140 |
|
|
#ifndef ALWAYS_USE_MPI |
1141 |
|
|
IF ( usingMPI ) THEN |
1142 |
|
|
#endif |
1143 |
|
|
|
1144 |
|
|
C Here use the approach that all the X shifts are completed |
1145 |
|
|
C before the Y shifts start. Then we can expand our Y shifts |
1146 |
|
|
C to include X overlap regions. This means we don't have to |
1147 |
|
|
C transfer corners separately. |
1148 |
|
|
C Notes: |
1149 |
|
|
C ====== |
1150 |
|
|
C 1. If we are only using messages in Y and using "true shared-memory" |
1151 |
|
|
C in X then we rely on the shared-memory code to be implemented so that |
1152 |
|
|
C corner values will be set correctly. The true shared-memory code |
1153 |
|
|
C is in the block below - so be careful when altering it! |
1154 |
|
|
C 2. In order to ensure we grab the right message we need to tag |
1155 |
|
|
C messages. We tag the messages with the direction in which they |
1156 |
|
|
C were sent out. Thus to receive from a processor to our west we |
1157 |
|
|
C request the next message from that processor that was sent out |
1158 |
|
|
C with a tag indicating an easterly send. |
1159 |
|
|
C 3. We could implement a "safe" code here by having everyone |
1160 |
|
|
C sync and then getting a particular thread ( say thread 1 ) to |
1161 |
|
|
C do the MPI_Isend sequentially. In this mode the "communication" |
1162 |
|
|
C thread would loop over nThreads and do MPI_Sendrecv for each |
1163 |
|
|
C one. This could be more efficient on some platforms as the messages |
1164 |
|
|
C might be sent as one large unit rather than by separate threads. |
1165 |
|
|
C It would also be "thread-safe" for MPI implementations that have |
1166 |
|
|
C multi-threading problems with things like request ids. |
1167 |
|
|
C 4. We include overlap regions in both X and Y sends. |
1168 |
|
|
C This means we can interchange the Y block with the X block. |
1169 |
|
|
C Not sure whether this will ever be useful! |
1170 |
|
|
C 5. The generation of a request handle by MPI_Isend and |
1171 |
|
|
C MPI_Irecv ouught to involve some global state within MPI. |
1172 |
|
|
C If this process is not thread safe it may be necessary to |
1173 |
|
|
C enable a critical section around the MPI_Isend and |
1174 |
|
|
C MPI_Irecv calls. |
1175 |
|
|
|
1176 |
|
|
C We need a barrier here to synchronise threads otherwise |
1177 |
|
|
C one thread might declare it has is ready to send and receive |
1178 |
|
|
C even though another thread is going to write to the first |
1179 |
|
|
C threads send or receive region. This won't happen, however, |
1180 |
|
|
C if we are careful about which data a thread updates - but at |
1181 |
|
|
C the moment we aren't careful! |
1182 |
|
|
_BARRIER |
1183 |
|
|
|
1184 |
|
|
C-- East-west messaging communication |
1185 |
cnh |
1.4 |
bE = myBxLo(myThid) |
1186 |
|
|
bW = myBxHi(myThid) |
1187 |
cnh |
1.1 |
nReqs = 0 |
1188 |
|
|
IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN |
1189 |
|
|
C My west face uses MPI. Get ready to receive data from |
1190 |
|
|
C west and post that we are ready to send data to the west. |
1191 |
|
|
C Data we receive has to come with tag of thread we know is |
1192 |
|
|
C sending from our west, data we send is tagged with our thread id |
1193 |
|
|
C and the send direction. |
1194 |
|
|
nReqs = nReqs+1 |
1195 |
|
|
tagSend = mpiTagW*nThreads+myThid |
1196 |
|
|
toPid = mpiPidW |
1197 |
|
|
elCount = 1 |
1198 |
|
|
elType = mpiTypeXFaceThread_xyz_R8(myThid) |
1199 |
|
|
CALL MPI_Isend( |
1200 |
|
|
I phi(1,1-OLy,1,bE,myByLo(myThid)), |
1201 |
|
|
I elCount, elType, toPid, |
1202 |
|
|
I tagSend,MPI_COMM_WORLD, |
1203 |
|
|
I reqIds(nReqs),mpiRC) |
1204 |
cnh |
1.4 |
ENDIF |
1205 |
|
|
IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN |
1206 |
cnh |
1.1 |
nReqs = nReqs+1 |
1207 |
cnh |
1.4 |
tagRecv = mpiTagW*nThreads+myThrE(myThid) |
1208 |
|
|
fromPid = mpiPidE |
1209 |
cnh |
1.1 |
elCount = 1 |
1210 |
|
|
elType = mpiTypeXFaceThread_xyz_R8(myThid) |
1211 |
|
|
CALL MPI_Irecv( |
1212 |
cnh |
1.4 |
U phi(sNx+1,1-OLy,1,bW,myByLo(myThid)), |
1213 |
cnh |
1.1 |
I elCount, elType, fromPid, |
1214 |
|
|
I tagRecv,MPI_COMM_WORLD, |
1215 |
|
|
I reqIds(nReqs),mpiRC) |
1216 |
|
|
ENDIF |
1217 |
cnh |
1.4 |
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
1218 |
|
|
|
1219 |
|
|
nReqs = 0 |
1220 |
cnh |
1.1 |
IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN |
1221 |
|
|
C My east face uses MPI. Get ready to receive data from |
1222 |
|
|
C east and post that we are ready to send data to the east. |
1223 |
|
|
C Data we receive has to come with tag of thread we know is |
1224 |
|
|
C sending from our west, data we send is tagged with our thread id. |
1225 |
|
|
nReqs = nReqs+1 |
1226 |
|
|
tagSend = mpiTagE*nThreads+myThid |
1227 |
|
|
toPid = mpiPidE |
1228 |
|
|
elCount = 1 |
1229 |
|
|
elType = mpiTypeXFaceThread_xyz_R8(myThid) |
1230 |
|
|
CALL MPI_Isend( |
1231 |
|
|
I phi(sNx-OLx+1,1-OLy,1,bW,myByLo(myThid)), |
1232 |
|
|
I elCount, elType, toPid, |
1233 |
|
|
I tagSend,MPI_COMM_WORLD, |
1234 |
|
|
I reqIds(nReqs),mpiRC) |
1235 |
cnh |
1.4 |
ENDIF |
1236 |
|
|
IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN |
1237 |
cnh |
1.1 |
nReqs = nReqs+1 |
1238 |
cnh |
1.4 |
tagRecv = mpiTagE*nThreads+myThrW(myThid) |
1239 |
|
|
fromPid = mpiPidW |
1240 |
cnh |
1.1 |
elCount = 1 |
1241 |
|
|
elType = mpiTypeXFaceThread_xyz_R8(myThid) |
1242 |
|
|
CALL MPI_Irecv( |
1243 |
cnh |
1.4 |
U phi(1-OLx,1-OLy,1,bE,myByLo(myThid)), |
1244 |
cnh |
1.1 |
I elCount, elType, fromPid, |
1245 |
|
|
I tagRecv,MPI_COMM_WORLD, |
1246 |
|
|
I reqIds(nReqs),mpiRC) |
1247 |
|
|
ENDIF |
1248 |
cnh |
1.4 |
|
1249 |
cnh |
1.1 |
C Wait for this threads east-west transactions to finish before |
1250 |
|
|
C posting north-south transactions. We have to do this so that |
1251 |
|
|
C the north-south transactions will send out the correct overlap |
1252 |
|
|
C region values into the corner sections of our neighbors. |
1253 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
1254 |
|
|
|
1255 |
cnh |
1.4 |
CcnhDebugStarts |
1256 |
|
|
C RETURN |
1257 |
|
|
CcnhDebugEnds |
1258 |
|
|
|
1259 |
|
|
|
1260 |
cnh |
1.1 |
C-- North-south messaging communication |
1261 |
|
|
nReqs = 0 |
1262 |
|
|
bN = myByLo(myTHid) |
1263 |
cnh |
1.4 |
bS = myByHi(myTHid) |
1264 |
|
|
|
1265 |
cnh |
1.1 |
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
1266 |
|
|
C My south face uses MPI. Get ready to receive data from |
1267 |
|
|
C south and post that I am ready to send data to the south. |
1268 |
|
|
nReqs = nReqs + 1 |
1269 |
|
|
tagSend = mpiTagS*nThreads+myThid |
1270 |
|
|
toPid = mpiPidS |
1271 |
|
|
elCount = 1 |
1272 |
|
|
elType = mpiTypeYFaceThread_xyz_R8(myThid) |
1273 |
|
|
CALL MPI_Isend( |
1274 |
|
|
I phi(1-OLx,1,1,myBxLo(myThid),bN), |
1275 |
|
|
I elCount, elType, toPid, |
1276 |
|
|
I tagSend,MPI_COMM_WORLD, |
1277 |
|
|
I reqIds(nReqs), mpiRC ) |
1278 |
cnh |
1.4 |
ENDIF |
1279 |
|
|
IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN |
1280 |
cnh |
1.1 |
nReqs = nReqs + 1 |
1281 |
cnh |
1.4 |
tagRecv = mpiTagS*nThreads+myThrN(myThid) |
1282 |
|
|
fromPid = mpiPidN |
1283 |
cnh |
1.1 |
elCount = 1 |
1284 |
|
|
elType = mpiTypeYFaceThread_xyz_R8(myThid) |
1285 |
|
|
CALL MPI_Irecv( |
1286 |
cnh |
1.4 |
I phi(1-OLx,sNy+1,1,myBxLo(myThid),bS), |
1287 |
cnh |
1.1 |
I elCount, elType, fromPid, |
1288 |
|
|
I tagRecv,MPI_COMM_WORLD, |
1289 |
|
|
I reqIds(nReqs), mpiRC ) |
1290 |
|
|
ENDIF |
1291 |
cnh |
1.4 |
C Wait for this threads north-south transactions to finish. |
1292 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
1293 |
cnh |
1.1 |
C |
1294 |
cnh |
1.4 |
nReqs = 0 |
1295 |
|
|
IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN |
1296 |
cnh |
1.1 |
C My north face uses MPI. Get ready to receive data from |
1297 |
|
|
C north and post that I am ready to send data to the north. |
1298 |
|
|
nReqs = nReqs + 1 |
1299 |
|
|
tagSend = mpiTagN*nThreads+myThid |
1300 |
|
|
toPid = mpiPidN |
1301 |
|
|
elCount = 1 |
1302 |
|
|
elType = mpiTypeYFaceThread_xyz_R8(myThid) |
1303 |
|
|
CALL MPI_Isend( |
1304 |
|
|
I phi(1-OLx,sNy-OLy+1,1,myBxLo(myThid),bS), |
1305 |
|
|
I elCount, elType, toPid, |
1306 |
|
|
I tagSend,MPI_COMM_WORLD, |
1307 |
|
|
I reqIds(nReqs), mpiRC ) |
1308 |
cnh |
1.4 |
ENDIF |
1309 |
|
|
IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN |
1310 |
cnh |
1.1 |
nReqs = nReqs + 1 |
1311 |
cnh |
1.4 |
tagRecv = mpiTagN*nThreads+myThrS(myThid) |
1312 |
|
|
fromPid = mpiPidS |
1313 |
cnh |
1.1 |
elCount = 1 |
1314 |
|
|
elType = mpiTypeYFaceThread_xyz_R8(myThid) |
1315 |
|
|
CALL MPI_Irecv( |
1316 |
cnh |
1.4 |
I phi(1-OLx,1-OLy,1,myBxLo(myThid),bN), |
1317 |
cnh |
1.1 |
I elCount, elType, fromPid, |
1318 |
|
|
I tagRecv,MPI_COMM_WORLD, |
1319 |
|
|
I reqIds(nReqs), mpiRC ) |
1320 |
|
|
ENDIF |
1321 |
|
|
C Wait for this threads north-south transactions to finish. |
1322 |
|
|
CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC ) |
1323 |
|
|
|
1324 |
|
|
#ifndef ALWAYS_USE_MPI |
1325 |
|
|
ENDIF |
1326 |
|
|
#endif |
1327 |
|
|
#endif /* ALLOW_USE_MPI */ |
1328 |
|
|
|
1329 |
|
|
#ifndef ALWAYS_USE_SYNC_COMMUNICATION |
1330 |
|
|
ENDIF |
1331 |
|
|
#endif |
1332 |
|
|
#endif /* ALLOW_SYNC_COMMUNICATION */ |
1333 |
|
|
|
1334 |
|
|
C-- Do true shared-memory data exchanges |
1335 |
|
|
C Note: This section also does the overlap copies for serial |
1336 |
|
|
C code overlap copies. |
1337 |
|
|
C-- First make sure all threads have reached here |
1338 |
|
|
_BARRIER |
1339 |
|
|
|
1340 |
|
|
C-- Now do copies |
1341 |
|
|
C Notes: |
1342 |
|
|
C ====== |
1343 |
|
|
C Here we copy from up to and including the overlap |
1344 |
|
|
C regions for both the X shift and the Y shift. This |
1345 |
|
|
C catches the "corners" of the data copied using messages |
1346 |
|
|
C in the "synchronous communication" section above. |
1347 |
|
|
C-- x-axis exchanges |
1348 |
|
|
bE = myBxLo(myThid) |
1349 |
|
|
IF ( bE .NE. 1 .OR. |
1350 |
|
|
& commW(myThid) .EQ. COMM_SHARED ) THEN |
1351 |
|
|
bW = bE-1 |
1352 |
|
|
IF ( bW .LT. 1 ) bW = nSx |
1353 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
1354 |
|
|
DO K=1,Nz |
1355 |
|
|
DO J=1-OLy,sNy+OLy |
1356 |
|
|
DO I=1,OLx |
1357 |
|
|
phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj) |
1358 |
|
|
phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj) |
1359 |
|
|
ENDDO |
1360 |
|
|
ENDDO |
1361 |
|
|
ENDDO |
1362 |
|
|
ENDDO |
1363 |
|
|
ENDIF |
1364 |
|
|
DO bE=myBxLo(myThid)+1,myBxHi(myThid) |
1365 |
|
|
bW = bE-1 |
1366 |
|
|
DO bj=myByLo(myThid),myByHi(myThid) |
1367 |
|
|
DO K=1,Nz |
1368 |
|
|
DO J=1-OLy,sNy+OLy |
1369 |
|
|
DO I=1,OLx |
1370 |
|
|
phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj) |
1371 |
|
|
phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj) |
1372 |
|
|
ENDDO |
1373 |
|
|
ENDDO |
1374 |
|
|
ENDDO |
1375 |
|
|
ENDDO |
1376 |
|
|
ENDDO |
1377 |
|
|
|
1378 |
|
|
C Need to ensure all threads have completed x-axis transfers before |
1379 |
|
|
C we can do y-axis exchanges. |
1380 |
|
|
_BARRIER |
1381 |
|
|
|
1382 |
|
|
C-- y-axis exchanges |
1383 |
|
|
bN = myByLo(myThid) |
1384 |
|
|
IF ( bN .NE. 1 .OR. |
1385 |
|
|
& commS(myThid) .EQ. COMM_SHARED ) THEN |
1386 |
|
|
bS = bN - 1 |
1387 |
|
|
IF ( bS .LT. 1 ) bS = nSy |
1388 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
1389 |
|
|
DO K=1,Nz |
1390 |
|
|
DO J=1,OLy |
1391 |
|
|
DO I=1-OLx,sNx+OLx |
1392 |
|
|
phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS) |
1393 |
|
|
phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN) |
1394 |
|
|
ENDDO |
1395 |
|
|
ENDDO |
1396 |
|
|
ENDDO |
1397 |
|
|
ENDDO |
1398 |
|
|
ENDIF |
1399 |
|
|
DO bN=myByLo(myThid)+1,myByHi(myThid) |
1400 |
|
|
bS = bN - 1 |
1401 |
|
|
DO bi=myBxLo(myThid),myBxHi(myThid) |
1402 |
|
|
DO K=1,Nz |
1403 |
|
|
DO J=1,OLy |
1404 |
|
|
DO I=1-OLx,sNx+OLx |
1405 |
|
|
phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS) |
1406 |
|
|
phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN) |
1407 |
|
|
ENDDO |
1408 |
|
|
ENDDO |
1409 |
|
|
ENDDO |
1410 |
|
|
ENDDO |
1411 |
|
|
ENDDO |
1412 |
|
|
|
1413 |
|
|
_BARRIER |
1414 |
|
|
|
1415 |
|
|
RETURN |
1416 |
|
|
END |