/[MITgcm]/MITgcm/eesupp/src/exch.F
ViewVC logotype

Annotation of /MITgcm/eesupp/src/exch.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (hide annotations) (download)
Mon Jun 22 16:24:51 1998 UTC (25 years, 10 months ago) by adcroft
Branch: MAIN
CVS Tags: checkpoint9, checkpoint11, checkpoint10, checkpoint13, checkpoint12, branch-point-rdot
Branch point for: branch-rdot
Changes since 1.4: +6 -6 lines
o General tidy-up.
o MPI fix. Filename changes (meta/data). salbin*y stuff.
o SST.bin SSS.bin added to verification/exp2

1 adcroft 1.5 C $Header: /u/gcmpack/models/MITgcmUV/eesupp/src/exch.F,v 1.4.2.2 1998/06/22 02:11:15 cnh Exp $
2 cnh 1.1
3     #include "CPP_EEOPTIONS.h"
4    
5     C-- File exch.F: Routines that perform atomic communication operations
6     C i.e. communication operations that are completed by
7     C time the routines return.
8     C Note
9     C ====
10     C The code in here although intricate is fairly
11     C straightforward. There are four routines, one
12     C for each of the data patterns we wish to do overlap
13     C updates on - as listed below. Each routine has two
14     C parts. The first part does overlap updates to and from
15     C remote "processes", that is processes who do not truly
16     C share the address space of this process. This part
17     C requires a facility like MPI or CRAY shmem get and put
18     C operations. It is possible to switch this part on or
19     C off using compiler directives. In the case of a simple
20     C serial execution nothing happens in this part. This part
21     C is also switched off when communication strategies
22     C that allow the overlapping of communication and computation
23     C are employed.
24     C The second part of each routine does the true shared memory
25     C overlap copying i.e. copying from one part of array phi
26     C to another part. This part is always active, in the case
27     C of a single threaded messaging code, however, this part
28     C will not do any copies as all edges will be flagged as using
29     C for example MPI or SHMPUT, SHMGET.
30     C Contents
31     C o exch_xy_r4 - Does overlap updates for REAL*4 2d XY fields
32     C o exch_xy_r8 - Does overlap updates for REAL*8 2d XY fields
33     C o exch_xyz_r4 - Does overlap updates for REAL*4 3d XYZ fields
34     C o exch_xyz_r8 - Does overlap updates for REAL*8 3d XYZ fields
35    
36     CStartOfInterface
37     SUBROUTINE EXCH_XY_R4(
38     U phi,
39     I myThid )
40     C /==========================================================\
41     C | SUBROUTINE EXCH_XY_R4 |
42     C | o Handle exchanges for real*4, two-dimensional arrays. |
43     C |==========================================================|
44     C | Do true shared-memory data transfers and "messaging" |
45     C | tranfers for blocking case of data transfers. |
46     C | Applications call this routine using |
47     C | CALL EXCH..( x, myThid ) |
48     C | where x is a two-dimensional array with overlaps. |
49     C | This routine does true-shared-memory copies for blocks |
50     C | within a thread. It will also do MPI meesaging between |
51     C | different processes. |
52     C | Note: |
53     C | ===== |
54     C | If it is used, "asynchronous" messaging in which |
55     C | communication overlaps computation is handled elsewhere |
56     C | - see recv.F and send.F. |
57     C | In practice MPI implementations may not be completely |
58     C | thread safe. In principle this code is correct even for |
59     C | mixed MPI and multi-threaded execution. |
60     C | In multi-thread execution data managed by other threads |
61     C | is only automatically visible to another thread if it |
62     C | existed before the thread was created. This means that |
63     C | all Fortran we declare arrays with overlap regions in |
64     C | COMMON blocks. |
65     C \==========================================================/
66    
67     C === Global data ===
68     #include "SIZE.h"
69     #include "EEPARAMS.h"
70     #include "EESUPPORT.h"
71    
72     C === Routine arguments ===
73     C phi - Array who's overlap regions are to be exchanged
74     C myThid - My thread id.
75     Real*4 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
76     INTEGER myThid
77     CEndOfInterface
78    
79     C === Local variables ===
80     C bi,bj - Outer loop counters
81     C I,J,K - Inner loop counters
82     INTEGER bi, bj
83     INTEGER I, J
84     #ifdef ALLOW_USE_MPI
85     C tagSend - Tags used to mark and select messages
86     C tagRecv
87     C nReqs - MPI request counter.
88     C reqIds - Outstanding requests to wait on.
89     C mpiRC - MPI return code
90     C mpiStatArr - Multi-request status reporting array.
91     C toPid - Proc. id sending to
92     C fromPid - Proc. id receving from
93     C elCount - Number of items to send or receive
94     C elType - Datat type of elements
95     INTEGER tagSend
96     INTEGER tagRecv
97     INTEGER nReqs
98     INTEGER reqIds(4)
99     INTEGER mpiRC
100     INTEGER mpiStatArr(MPI_STATUS_SIZE,4)
101     INTEGER toPid
102     INTEGER fromPid
103     INTEGER elCount
104     INTEGER elType
105     #endif /* ALLOW_USE_MPI */
106     C bE, bW - Block index to east, west, north and
107     C bN, bS south.
108     INTEGER bW, bE, bS, bN
109    
110     C-- Do "message-passing" data exchanges
111     #ifdef ALLOW_SYNC_COMMUNICATION
112     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
113     IF ( usingSyncMessages ) THEN
114     #endif
115    
116     C-- MPI based data exchages
117     #ifdef ALLOW_USE_MPI
118     #ifndef ALWAYS_USE_MPI
119     IF ( usingMPI ) THEN
120     #endif
121    
122     C Here use the approach that all the X shifts are completed
123     C before the Y shifts start. Then we can expand our Y shifts
124     C to include X overlap regions. This means we don't have to
125     C transfer corners separately.
126     C Notes:
127     C ======
128     C 1. If we are only using messages in Y and using "true shared-memory"
129     C in X then we rely on the shared-memory code to be implemented so that
130     C corner values will be set correctly. The true shared-memory code
131     C is in the block below - so be careful when altering it!
132     C 2. In order to ensure we grab the right message we need to tag
133     C messages. We tag the messages with the direction in which they
134     C were sent out. Thus to receive from a processor to our west we
135     C request the next message from that processor that was sent out
136     C with a tag indicating an easterly send.
137     C 3. We could implement a "safe" code here by having everyone
138     C sync and then getting a particular thread ( say thread 1 ) to
139     C do the MPI_Isend sequentially. In this mode the "communication"
140     C thread would loop over nThreads and do MPI_Sendrecv for each
141     C one. This could be more efficient on some platforms as the messages
142     C might be sent as one large unit rather than by separate threads.
143     C It would also be "thread-safe" for MPI implementations that have
144     C multi-threading problems with things like request ids.
145     C 4. We include overlap regions in both X and Y sends.
146     C This means we can interchange the Y block with the X block.
147     C Not sure whether this will ever be useful!
148     C 5. The generation of a request handle by MPI_Isend and
149     C MPI_Irecv ouught to involve some global state within MPI.
150     C If this process is not thread safe it may be necessary to
151     C enable a critical section around the MPI_Isend and
152     C MPI_Irecv calls.
153    
154     C We need a barrier here to synchronise threads otherwise
155     C one thread might declare it has is ready to send and receive
156     C even though another thread is going to write to the first
157     C threads send or receive region. This won't happen, however,
158     C if we are careful about which data a thread updates - but at
159     C the moment we aren't careful!
160     _BARRIER
161    
162     C-- East-west messaging communication
163     nReqs = 0
164     bE = myBxLo(myThid)
165     IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN
166     C My west face uses MPI. Get ready to receive data from
167     C west and post that we are ready to send data to the west.
168     C Data we receive has to come with tag of thread we know is
169     C sending from our west, data we send is tagged with our thread id
170     C and the send direction.
171     nReqs = nReqs+1
172     tagSend = mpiTagW*nThreads+myThid
173     toPid = mpiPidW
174     elCount = 1
175     elType = mpiTypeXFaceThread_xy_r4(myThid)
176     C I tagSend,MPI_COMM_WORLD,
177     CALL MPI_Isend(
178     I phi(1,1-OLy,bE,myByLo(myThid)),
179     I elCount, elType, toPid,
180     I tagSend,MPI_COMM_WORLD,
181     I reqIds(nReqs),mpiRC)
182     nReqs = nReqs+1
183     tagRecv = mpiTagE*nThreads+myThrW(myThid)
184     fromPid = mpiPidW
185     elCount = 1
186     elType = mpiTypeXFaceThread_xy_r4(myThid)
187     CALL MPI_Irecv(
188     U phi(1-OLx,1-OLy,bE,myByLo(myThid)),
189     I elCount, elType, fromPid,
190     I tagRecv,MPI_COMM_WORLD,
191     I reqIds(nReqs),mpiRC)
192     ENDIF
193     bW = myBxHi(myThid)
194     IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN
195     C My east face uses MPI. Get ready to receive data from
196     C east and post that we are ready to send data to the east.
197     C Data we receive has to come with tag of thread we know is
198     C sending from our west, data we send is tagged with our thread id.
199     nReqs = nReqs+1
200     tagSend = mpiTagE*nThreads+myThid
201     toPid = mpiPidE
202     elCount = 1
203     elType = mpiTypeXFaceThread_xy_r4(myThid)
204     CALL MPI_Isend(
205     I phi(sNx-OLx+1,1-OLy,bW,myByLo(myThid)),
206     I elCount, elType, toPid,
207     I tagSend,MPI_COMM_WORLD,
208     I reqIds(nReqs),mpiRC)
209     nReqs = nReqs+1
210     tagRecv = mpiTagW*nThreads+myThrE(myThid)
211     fromPid = mpiPidE
212     elCount = 1
213     elType = mpiTypeXFaceThread_xy_r4(myThid)
214     CALL MPI_Irecv(
215     U phi(sNx+1,1-OLy,bW,myByLo(myThid)),
216     I elCount, elType, fromPid,
217     I tagRecv,MPI_COMM_WORLD,
218     I reqIds(nReqs),mpiRC)
219     ENDIF
220     C Wait for this threads east-west transactions to finish before
221     C posting north-south transactions. We have to do this so that
222     C the north-south transactions will send out the correct overlap
223     C region values into the corner sections of our neighbors.
224     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
225    
226 cnh 1.4 CcnhDebugStarts
227 adcroft 1.5 C RETURN
228 cnh 1.4 CcnhDebugEnds
229    
230 cnh 1.1 C-- North-south messaging communication
231     nReqs = 0
232     bN = myByLo(myTHid)
233     IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
234     C My south face uses MPI. Get ready to receive data from
235     C south and post that I am ready to send data to the south.
236     nReqs = nReqs + 1
237     tagSend = mpiTagS*nThreads+myThid
238     toPid = mpiPidS
239     elCount = 1
240     elType = mpiTypeYFaceThread_xy_r4(myThid)
241     CALL MPI_Isend(
242     I phi(1-OLx,1,myBxLo(myThid),bN),
243     I elCount, elType, toPid,
244     I tagSend,MPI_COMM_WORLD,
245     I reqIds(nReqs), mpiRC )
246     nReqs = nReqs + 1
247     tagRecv = mpiTagN*nThreads+myThrS(myThid)
248     fromPid = mpiPidS
249     elCount = 1
250     elType = mpiTypeYFaceThread_xy_r4(myThid)
251     CALL MPI_Irecv(
252     I phi(1-OLx,1-OLy,myBxLo(myThid),bN),
253     I elCount, elType, fromPid,
254     I tagRecv,MPI_COMM_WORLD,
255     I reqIds(nReqs), mpiRC )
256     ENDIF
257     C
258     bS = myByHi(myTHid)
259 adcroft 1.5 IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN
260 cnh 1.1 C My north face uses MPI. Get ready to receive data from
261     C north and post that I am ready to send data to the north.
262     nReqs = nReqs + 1
263     tagSend = mpiTagN*nThreads+myThid
264     toPid = mpiPidN
265     elCount = 1
266     elType = mpiTypeYFaceThread_xy_r4(myThid)
267     CALL MPI_Isend(
268     I phi(1-OLx,sNy-OLy+1,myBxLo(myThid),bS),
269     I elCount, elType, toPid,
270     I tagSend,MPI_COMM_WORLD,
271     I reqIds(nReqs), mpiRC )
272     nReqs = nReqs + 1
273     tagRecv = mpiTagS*nThreads+myThrN(myThid)
274     fromPid = mpiPidN
275     elCount = 1
276     elType = mpiTypeYFaceThread_xy_r4(myThid)
277     CALL MPI_Irecv(
278     I phi(1-OLx,sNy+1,myBxLo(myThid),bS),
279     I elCount, elType, fromPid,
280     I tagRecv,MPI_COMM_WORLD,
281     I reqIds(nReqs), mpiRC )
282     ENDIF
283     C Wait for this threads north-south transactions to finish.
284     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
285    
286     #ifndef ALWAYS_USE_MPI
287     ENDIF
288     #endif
289     #endif /* ALLOW_USE_MPI */
290    
291     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
292     ENDIF
293     #endif
294     #endif /* ALLOW_SYNC_COMMUNICATION */
295    
296     C-- Do true shared-memory data exchanges
297     C Note this is also doing the edge copies for a regular serial
298     C code.
299     C-- First make sure all threads have reached here
300     _BARRIER
301    
302     C-- Now do copies
303     C Notes:
304     C ======
305     C Here we copy from up to and including the overlap
306     C regions for both the X shift and the Y shift. This
307     C catches the "corners" of the data copied using messages
308     C in the "synchronous communication" section above.
309     C As coded here we also write to a remote region in
310     C one direction. In some situations it may be better only
311     C to write to our own overlaps - could be the case with
312     C Wildfire replication. This should be chamged once everything
313     C is working OK.
314     C
315     C-- x-axis exchanges
316     bE = myBxLo(myThid)
317     IF ( bE .NE. 1 .OR.
318     & commW(myThid) .EQ. COMM_SHARED ) THEN
319     bW = bE-1
320     IF ( bW .LT. 1 ) bW = nSx
321     DO bj=myByLo(myThid),myByHi(myThid)
322     DO J=1-OLy,sNy+OLy
323     DO I=1,OLx
324     phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj)
325     phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj)
326     ENDDO
327     ENDDO
328     ENDDO
329     ENDIF
330     DO bE=myBxLo(myThid)+1,myBxHi(myThid)
331     bW = bE-1
332     DO bj=myByLo(myThid),myByHi(myThid)
333     DO J=1-OLy,sNy+OLy
334     DO I=1,OLx
335     phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj)
336     phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj)
337     ENDDO
338     ENDDO
339     ENDDO
340     ENDDO
341    
342     C Need to ensure all threads have completed x-axis transfers before
343     C we can do y-axis exchanges.
344     _BARRIER
345    
346     C-- y-axis exchanges
347     bN = myByLo(myThid)
348     IF ( bN .NE. 1 .OR.
349     & commS(myThid) .EQ. COMM_SHARED ) THEN
350     bS = bN - 1
351     IF ( bS .LT. 1 ) bS = nSy
352     DO bi=myBxLo(myThid),myBxHi(myThid)
353     DO J=1,OLy
354     DO I=1-OLx,sNx+OLx
355     phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS)
356     phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN)
357     ENDDO
358     ENDDO
359     ENDDO
360     ENDIF
361     DO bN=myByLo(myThid)+1,myByHi(myThid)
362     bS = bN - 1
363     DO bi=myBxLo(myThid),myBxHi(myThid)
364     DO J=1,OLy
365     DO I=1-OLx,sNx+OLx
366     phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS)
367     phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN)
368     ENDDO
369     ENDDO
370     ENDDO
371     ENDDO
372    
373     _BARRIER
374    
375     RETURN
376     END
377    
378     CStartOfInterface
379     SUBROUTINE EXCH_XYZ_R4(
380     U phi,
381     I myThid )
382     C /==========================================================\
383     C | SUBROUTINE EXCH_XYZ_R4 |
384     C | o Handle exchanges for real*4, three-dimensional arrays. |
385     C |==========================================================|
386     C | Do true shared-memory data transfers and "messaging" |
387     C | tranfers for blocking case of data transfers. |
388     C | Applications call this routine using |
389     C | CALL EXCH..( x, myThid ) |
390     C | where x is a three-dimensional array with overlaps. |
391     C | This routine does true-shared-memory copies for blocks |
392     C | within a thread. It will also do MPI meesaging between |
393     C | different processes. |
394     C | Note: |
395     C | ===== |
396     C | If it is used, "asynchronous" messaging in which |
397     C | communication overlaps computation is handled elsewhere |
398     C | - see recv.F and send.F. |
399     C | In practice MPI implementations may not be completely |
400     C | thread safe. In principle this code is correct even for |
401     C | mixed MPI and multi-threaded execution. |
402     C | In multi-thread execution data managed by other threads |
403     C | is only automatically visible to another thread if it |
404     C | existed before the thread was created. This means that |
405     C | for Fortran we declare arrays with overlap regions in |
406     C | COMMON blocks. |
407     C \==========================================================/
408    
409     C === Global data ===
410     #include "SIZE.h"
411     #include "EEPARAMS.h"
412     #include "EESUPPORT.h"
413    
414     C === Routine arguments ===
415     C phi - Array who's overlap regions are to be exchanged
416     C myThid - My thread id.
417     Real*4 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,1:Nz,nSx,nSy)
418     INTEGER myThid
419     CEndOfInterface
420    
421     C === Local variables ===
422     C bi,bj - Outer loop counters
423     C I,J,K - Inner loop counters
424     INTEGER bi, bj
425     INTEGER I, J, K
426     #ifdef ALLOW_USE_MPI
427     C tagSend - Tags used to mark and select messages
428     C tagRecv
429     C nReqs - MPI request counter.
430     C reqIds - Outstanding requests to wait on.
431     C mpiRC - MPI return code
432     C mpiStatArr - Multi-request status reporting array.
433     C toPid - Proc. id sending to
434     C fromPid - Proc. id receving from
435     C elCount - Number of items to send or receive
436     C elType - Data type of elements
437     INTEGER tagSend
438     INTEGER tagRecv
439     INTEGER nReqs
440     INTEGER reqIds(4)
441     INTEGER mpiRC
442     INTEGER mpiStatArr(MPI_STATUS_SIZE,4)
443     INTEGER toPid
444     INTEGER fromPid
445     INTEGER elCount
446     INTEGER elType
447     #endif /* ALLOW_USE_MPI */
448     C bE, bW - Block index to east, west, north and
449     C bN, bS south.
450     INTEGER bW, bE, bS, bN
451    
452     C-- Do "message-passing" data exchanges
453     #ifdef ALLOW_SYNC_COMMUNICATION
454     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
455     IF ( usingSyncMessages ) THEN
456     #endif
457    
458     C-- MPI based data exchages
459     #ifdef ALLOW_USE_MPI
460     #ifndef ALWAYS_USE_MPI
461     IF ( usingMPI ) THEN
462     #endif
463    
464     C Here use the approach that all the X shifts are completed
465     C before the Y shifts start. Then we can expand our Y shifts
466     C to include X overlap regions. This means we don't have to
467     C transfer corners separately.
468     C Notes:
469     C ======
470     C 1. If we are only using messages in Y and using "true shared-memory"
471     C in X then we rely on the shared-memory code to be implemented so that
472     C corner values will be set correctly. The true shared-memory code
473     C is in the block below - so be careful when altering it!
474     C 2. In order to ensure we grab the right message we need to tag
475     C messages. We tag the messages with the direction in which they
476     C were sent out. Thus to receive from a processor to our west we
477     C request the next message from that processor that was sent out
478     C with a tag indicating an easterly send.
479     C 3. We could implement a "safe" code here by having everyone
480     C sync and then getting a particular thread ( say thread 1 ) to
481     C do the MPI_Isend sequentially. In this mode the "communication"
482     C thread would loop over nThreads and do MPI_Sendrecv for each
483     C one. This could be more efficient on some platforms as the messages
484     C might be sent as one large unit rather than by separate threads.
485     C It would also be "thread-safe" for MPI implementations that have
486     C multi-threading problems with things like request ids.
487     C 4. We include overlap regions in both X and Y sends.
488     C This means we can interchange the Y block with the X block.
489     C Not sure whether this will ever be useful!
490     C 5. The generation of a request handle by MPI_Isend and
491     C MPI_Irecv ouught to involve some global state within MPI.
492     C If this process is not thread safe it may be necessary to
493     C enable a critical section around the MPI_Isend and
494     C MPI_Irecv calls.
495    
496     C We need a barrier here to synchronise threads otherwise
497     C one thread might declare it has is ready to send and receive
498     C even though another thread is going to write to the first
499     C threads send or receive region. This won't happen, however,
500     C if we are careful about which data a thread updates - but at
501     C the moment we aren't careful!
502     _BARRIER
503    
504     C-- East-west messaging communication
505     nReqs = 0
506     bE = myBxLo(myThid)
507     IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN
508     C My west face uses MPI. Get ready to receive data from
509     C west and post that we are ready to send data to the west.
510     C Data we receive has to come with tag of thread we know is
511     C sending from our west, data we send is tagged with our thread id
512     C and the send direction.
513     nReqs = nReqs+1
514     tagSend = mpiTagW*nThreads+myThid
515     toPid = mpiPidW
516     elCount = 1
517     elType = mpiTypeXFaceThread_xyz_r4(myThid)
518     CALL MPI_Isend(
519     I phi(1,1-OLy,1,bE,myByLo(myThid)),
520     I elCount, elType, toPid,
521     I tagSend,MPI_COMM_WORLD,
522     I reqIds(nReqs),mpiRC)
523     nReqs = nReqs+1
524     tagRecv = mpiTagE*nThreads+myThrW(myThid)
525     fromPid = mpiPidW
526     elCount = 1
527     elType = mpiTypeXFaceThread_xyz_r4(myThid)
528     CALL MPI_Irecv(
529     U phi(1-OLx,1-OLy,1,bE,myByLo(myThid)),
530     I elCount, elType, fromPid,
531     I tagRecv,MPI_COMM_WORLD,
532     I reqIds(nReqs),mpiRC)
533     ENDIF
534     bW = myBxHi(myThid)
535     IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN
536     C My east face uses MPI. Get ready to receive data from
537     C east and post that we are ready to send data to the east.
538     C Data we receive has to come with tag of thread we know is
539     C sending from our west, data we send is tagged with our thread id.
540     nReqs = nReqs+1
541     tagSend = mpiTagE*nThreads+myThid
542     toPid = mpiPidE
543     elCount = 1
544     elType = mpiTypeXFaceThread_xyz_r4(myThid)
545     CALL MPI_Isend(
546     I phi(sNx-OLx+1,1-OLy,1,bW,myByLo(myThid)),
547     I elCount, elType, toPid,
548     I tagSend,MPI_COMM_WORLD,
549     I reqIds(nReqs),mpiRC)
550     nReqs = nReqs+1
551     tagRecv = mpiTagW*nThreads+myThrE(myThid)
552     fromPid = mpiPidE
553     elCount = 1
554     elType = mpiTypeXFaceThread_xyz_r4(myThid)
555     CALL MPI_Irecv(
556     U phi(sNx+1,1-OLy,1,bW,myByLo(myThid)),
557     I elCount, elType, fromPid,
558     I tagRecv,MPI_COMM_WORLD,
559     I reqIds(nReqs),mpiRC)
560     ENDIF
561     C Wait for this threads east-west transactions to finish before
562     C posting north-south transactions. We have to do this so that
563     C the north-south transactions will send out the correct overlap
564     C region values into the corner sections of our neighbors.
565     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
566 cnh 1.4 CcnhDebugStarts
567 adcroft 1.5 C RETURN
568 cnh 1.4 CcnhDebugEnds
569    
570 cnh 1.1
571     C-- North-south messaging communication
572     nReqs = 0
573     bN = myByLo(myTHid)
574     IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
575     C My south face uses MPI. Get ready to receive data from
576     C south and post that I am ready to send data to the south.
577     nReqs = nReqs + 1
578     tagSend = mpiTagS*nThreads+myThid
579     toPid = mpiPidS
580     elCount = 1
581     elType = mpiTypeYFaceThread_xyz_r4(myThid)
582     CALL MPI_Isend(
583     I phi(1-OLx,1,1,myBxLo(myThid),bN),
584     I elCount, elType, toPid,
585     I tagSend,MPI_COMM_WORLD,
586     I reqIds(nReqs), mpiRC )
587     nReqs = nReqs + 1
588     tagRecv = mpiTagN*nThreads+myThrS(myThid)
589     fromPid = mpiPidS
590     elCount = 1
591     elType = mpiTypeYFaceThread_xyz_r4(myThid)
592     CALL MPI_Irecv(
593     I phi(1-OLx,1-OLy,1,myBxLo(myThid),bN),
594     I elCount, elType, fromPid,
595     I tagRecv,MPI_COMM_WORLD,
596     I reqIds(nReqs), mpiRC )
597     ENDIF
598     C
599     bS = myByHi(myTHid)
600     IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
601     C My north face uses MPI. Get ready to receive data from
602     C north and post that I am ready to send data to the north.
603     nReqs = nReqs + 1
604     tagSend = mpiTagN*nThreads+myThid
605     toPid = mpiPidN
606     elCount = 1
607     elType = mpiTypeYFaceThread_xyz_r4(myThid)
608     CALL MPI_Isend(
609     I phi(1-OLx,sNy-OLy+1,1,myBxLo(myThid),bS),
610     I elCount, elType, toPid,
611     I tagSend,MPI_COMM_WORLD,
612     I reqIds(nReqs), mpiRC )
613     nReqs = nReqs + 1
614     tagRecv = mpiTagS*nThreads+myThrN(myThid)
615     fromPid = mpiPidN
616     elCount = 1
617     elType = mpiTypeYFaceThread_xyz_r4(myThid)
618     CALL MPI_Irecv(
619     I phi(1-OLx,sNy+1,1,myBxLo(myThid),bS),
620     I elCount, elType, fromPid,
621     I tagRecv,MPI_COMM_WORLD,
622     I reqIds(nReqs), mpiRC )
623     ENDIF
624     C Wait for this threads north-south transactions to finish.
625     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
626    
627     #ifndef ALWAYS_USE_MPI
628     ENDIF
629     #endif
630     #endif /* ALLOW_USE_MPI */
631    
632     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
633     ENDIF
634     #endif
635     #endif /* ALLOW_SYNC_COMMUNICATION */
636    
637     C-- Do true shared-memory data exchanges
638     C Note: This is also doing the overlap copies
639     C for a single threaded code.
640     C-- First make sure all threads have reached here
641     _BARRIER
642    
643     C-- Now do copies
644     C Notes:
645     C ======
646     C Here we copy from up to and including the overlap
647     C regions for both the X shift and the Y shift. This
648     C catches the "corners" of the data copied using messages
649     C in the "synchronous communication" section above.
650     C-- x-axis exchanges
651     bE = myBxLo(myThid)
652     IF ( bE .NE. 1 .OR.
653     & commW(myThid) .EQ. COMM_SHARED ) THEN
654     bW = bE-1
655     IF ( bW .LT. 1 ) bW = nSx
656     DO bj=myByLo(myThid),myByHi(myThid)
657     DO K=1,Nz
658     DO J=1-OLy,sNy+OLy
659     DO I=1,OLx
660     phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj)
661     phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj)
662     ENDDO
663     ENDDO
664     ENDDO
665     ENDDO
666     ENDIF
667     DO bE=myBxLo(myThid)+1,myBxHi(myThid)
668     bW = bE-1
669     DO bj=myByLo(myThid),myByHi(myThid)
670     DO K=1,Nz
671     DO J=1-OLy,sNy+OLy
672     DO I=1,OLx
673     phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj)
674     phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj)
675     ENDDO
676     ENDDO
677     ENDDO
678     ENDDO
679     ENDDO
680    
681     C Need to ensure all threads have completed x-axis transfers before
682     C we can do y-axis exchanges.
683     _BARRIER
684    
685     C-- y-axis exchanges
686     bN = myByLo(myThid)
687     IF ( bN .NE. 1 .OR.
688     & commS(myThid) .EQ. COMM_SHARED ) THEN
689     bS = bN - 1
690     IF ( bS .LT. 1 ) bS = nSy
691     DO bi=myBxLo(myThid),myBxHi(myThid)
692     DO K=1,Nz
693     DO J=1,OLy
694     DO I=1-OLx,sNx+OLx
695     phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS)
696     phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN)
697     ENDDO
698     ENDDO
699     ENDDO
700     ENDDO
701     ENDIF
702     DO bN=myByLo(myThid)+1,myByHi(myThid)
703     bS = bN - 1
704     DO bi=myBxLo(myThid),myBxHi(myThid)
705     DO K=1,Nz
706     DO J=1,OLy
707     DO I=1-OLx,sNx+OLx
708     phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS)
709     phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN)
710     ENDDO
711     ENDDO
712     ENDDO
713     ENDDO
714     ENDDO
715    
716     _BARRIER
717    
718     RETURN
719     END
720    
721     CStartOfInterface
722     SUBROUTINE EXCH_XY_R8(
723     U phi,
724     I myThid )
725     C /==========================================================\
726     C | SUBROUTINE EXCH_XY_R8 |
727     C | o Handle exchanges for real*8, two-dimensional arrays. |
728     C |==========================================================|
729     C | Do true shared-memory data transfers and "messaging" |
730     C | tranfers for blocking case of data transfers. |
731     C | Applications call this routine using |
732     C | CALL EXCH..( x, myThid ) |
733     C | where x is a two-dimensional array with overlaps. |
734     C | This routine does true-shared-memory copies for blocks |
735     C | within a thread. It will also do MPI meesaging between |
736     C | different processes. |
737     C | Note: |
738     C | ===== |
739     C | If it is used, "asynchronous" messaging in which |
740     C | communication overlaps computation is handled elsewhere |
741     C | - see recv.F and send.F. |
742     C | In practice MPI implementations may not be completely |
743     C | thread safe. In principle this code is correct even for |
744     C | mixed MPI and multi-threaded execution. |
745     C | In multi-thread execution data managed by other threads |
746     C | is only automatically visible to another thread if it |
747     C | existed before the thread was created. This means that |
748     C | all Fortran we declare arrays with overlap regions in |
749     C | COMMON blocks. |
750     C \==========================================================/
751    
752     C === Global data ===
753     #include "SIZE.h"
754     #include "EEPARAMS.h"
755     #include "EESUPPORT.h"
756    
757     C === Routine arguments ===
758     C phi - Array who's overlap regions are to be exchanged
759     C myThid - My thread id.
760     Real*8 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
761     INTEGER myThid
762     CEndOfInterface
763    
764     C === Local variables ===
765     C bi,bj - Outer loop counters
766     C I,J,K - Inner loop counters
767     INTEGER bi, bj
768     INTEGER I, J
769     #ifdef ALLOW_USE_MPI
770     C tagSend - Tags used to mark and select messages
771     C tagRecv
772     C nReqs - MPI request counter.
773     C reqIds - Outstanding requests to wait on.
774     C mpiRC - MPI return code
775     C mpiStatArr - Multi-request status reporting array.
776     C toPid - Proc. id sending to
777     C fromPid - Proc. id receving from
778     C elCount - Number of items to send or receive
779     C elType - Datat type of elements
780     INTEGER tagSend
781     INTEGER tagRecv
782     INTEGER nReqs
783     INTEGER reqIds(4)
784     INTEGER mpiRC
785     INTEGER mpiStatArr(MPI_STATUS_SIZE,4)
786     INTEGER toPid
787     INTEGER fromPid
788     INTEGER elCount
789     INTEGER elType
790     #endif /* ALLOW_USE_MPI */
791     C bE, bW - Block index to east, west, north and
792     C bN, bS south.
793     INTEGER bW, bE, bS, bN
794    
795     C-- Do "message-passing" data exchanges
796     #ifdef ALLOW_SYNC_COMMUNICATION
797     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
798     IF ( usingSyncMessages ) THEN
799     #endif
800    
801     C-- MPI based data exchages
802     #ifdef ALLOW_USE_MPI
803     #ifndef ALWAYS_USE_MPI
804     IF ( usingMPI ) THEN
805     #endif
806    
807     C Here use the approach that all the X shifts are completed
808     C before the Y shifts start. Then we can expand our Y shifts
809     C to include X overlap regions. This means we don't have to
810     C transfer corners separately.
811     C Notes:
812     C ======
813     C 1. If we are only using messages in Y and using "true shared-memory"
814     C in X then we rely on the shared-memory code to be implemented so that
815     C corner values will be set correctly. The true shared-memory code
816     C is in the block below - so be careful when altering it!
817     C 2. In order to ensure we grab the right message we need to tag
818     C messages. We tag the messages with the direction in which they
819     C were sent out. Thus to receive from a processor to our west we
820     C request the next message from that processor that was sent out
821     C with a tag indicating an easterly send.
822     C 3. We could implement a "safe" code here by having everyone
823     C sync and then getting a particular thread ( say thread 1 ) to
824     C do the MPI_Isend sequentially. In this mode the "communication"
825     C thread would loop over nThreads and do MPI_Sendrecv for each
826     C one. This could be more efficient on some platforms as the messages
827     C might be sent as one large unit rather than by separate threads.
828     C It would also be "thread-safe" for MPI implementations that have
829     C multi-threading problems with things like request ids.
830     C 4. We include overlap regions in both X and Y sends.
831     C This means we can interchange the Y block with the X block.
832     C Not sure whether this will ever be useful!
833     C 5. The generation of a request handle by MPI_Isend and
834     C MPI_Irecv ouught to involve some global state within MPI.
835     C If this process is not thread safe it may be necessary to
836     C enable a critical section around the MPI_Isend and
837     C MPI_Irecv calls.
838    
839     C We need a barrier here to synchronise threads otherwise
840     C one thread might declare it has is ready to send and receive
841     C even though another thread is going to write to the first
842     C threads send or receive region. This won't happen, however,
843     C if we are careful about which data a thread updates - but at
844     C the moment we aren't careful!
845     _BARRIER
846    
847     C-- East-west messaging communication
848     nReqs = 0
849     bE = myBxLo(myThid)
850     IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN
851     C My west face uses MPI. Get ready to receive data from
852     C west and post that we are ready to send data to the west.
853     C Data we receive has to come with tag of thread we know is
854     C sending from our west, data we send is tagged with our thread id
855     C and the send direction.
856     nReqs = nReqs+1
857     tagSend = mpiTagW*nThreads+myThid
858     toPid = mpiPidW
859     elCount = 1
860     elType = mpiTypeXFaceThread_xy_R8(myThid)
861     C I tagSend,MPI_COMM_WORLD,
862     CALL MPI_Isend(
863     I phi(1,1-OLy,bE,myByLo(myThid)),
864     I elCount, elType, toPid,
865     I tagSend,MPI_COMM_WORLD,
866     I reqIds(nReqs),mpiRC)
867     nReqs = nReqs+1
868     tagRecv = mpiTagE*nThreads+myThrW(myThid)
869     fromPid = mpiPidW
870     elCount = 1
871     elType = mpiTypeXFaceThread_xy_R8(myThid)
872     CALL MPI_Irecv(
873     U phi(1-OLx,1-OLy,bE,myByLo(myThid)),
874     I elCount, elType, fromPid,
875     I tagRecv,MPI_COMM_WORLD,
876     I reqIds(nReqs),mpiRC)
877     ENDIF
878     bW = myBxHi(myThid)
879     IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN
880     C My east face uses MPI. Get ready to receive data from
881     C east and post that we are ready to send data to the east.
882     C Data we receive has to come with tag of thread we know is
883     C sending from our west, data we send is tagged with our thread id.
884     nReqs = nReqs+1
885     tagSend = mpiTagE*nThreads+myThid
886     toPid = mpiPidE
887     elCount = 1
888     elType = mpiTypeXFaceThread_xy_R8(myThid)
889     CALL MPI_Isend(
890     I phi(sNx-OLx+1,1-OLy,bW,myByLo(myThid)),
891     I elCount, elType, toPid,
892     I tagSend,MPI_COMM_WORLD,
893     I reqIds(nReqs),mpiRC)
894     nReqs = nReqs+1
895     tagRecv = mpiTagW*nThreads+myThrE(myThid)
896     fromPid = mpiPidE
897     elCount = 1
898     elType = mpiTypeXFaceThread_xy_R8(myThid)
899     CALL MPI_Irecv(
900     U phi(sNx+1,1-OLy,bW,myByLo(myThid)),
901     I elCount, elType, fromPid,
902     I tagRecv,MPI_COMM_WORLD,
903     I reqIds(nReqs),mpiRC)
904     ENDIF
905     C Wait for this threads east-west transactions to finish before
906     C posting north-south transactions. We have to do this so that
907     C the north-south transactions will send out the correct overlap
908     C region values into the corner sections of our neighbors.
909     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
910    
911 cnh 1.4 CcnhDebugStarts
912 adcroft 1.5 C RETURN
913 cnh 1.4 CcnhDebugEnds
914    
915    
916 cnh 1.1 C-- North-south messaging communication
917     nReqs = 0
918     bN = myByLo(myTHid)
919     IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
920     C My south face uses MPI. Get ready to receive data from
921     C south and post that I am ready to send data to the south.
922     nReqs = nReqs + 1
923     tagSend = mpiTagS*nThreads+myThid
924     toPid = mpiPidS
925     elCount = 1
926     elType = mpiTypeYFaceThread_xy_R8(myThid)
927     CALL MPI_Isend(
928     I phi(1-OLx,1,myBxLo(myThid),bN),
929     I elCount, elType, toPid,
930     I tagSend,MPI_COMM_WORLD,
931     I reqIds(nReqs), mpiRC )
932     nReqs = nReqs + 1
933     tagRecv = mpiTagN*nThreads+myThrS(myThid)
934     fromPid = mpiPidS
935     elCount = 1
936     elType = mpiTypeYFaceThread_xy_R8(myThid)
937     CALL MPI_Irecv(
938     I phi(1-OLx,1-OLy,myBxLo(myThid),bN),
939     I elCount, elType, fromPid,
940     I tagRecv,MPI_COMM_WORLD,
941     I reqIds(nReqs), mpiRC )
942     ENDIF
943     C
944     bS = myByHi(myTHid)
945 adcroft 1.5 IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN
946 cnh 1.1 C My north face uses MPI. Get ready to receive data from
947     C north and post that I am ready to send data to the north.
948     nReqs = nReqs + 1
949     tagSend = mpiTagN*nThreads+myThid
950     toPid = mpiPidN
951     elCount = 1
952     elType = mpiTypeYFaceThread_xy_R8(myThid)
953     CALL MPI_Isend(
954     I phi(1-OLx,sNy-OLy+1,myBxLo(myThid),bS),
955     I elCount, elType, toPid,
956     I tagSend,MPI_COMM_WORLD,
957     I reqIds(nReqs), mpiRC )
958     nReqs = nReqs + 1
959     tagRecv = mpiTagS*nThreads+myThrN(myThid)
960     fromPid = mpiPidN
961     elCount = 1
962     elType = mpiTypeYFaceThread_xy_R8(myThid)
963     CALL MPI_Irecv(
964     I phi(1-OLx,sNy+1,myBxLo(myThid),bS),
965     I elCount, elType, fromPid,
966     I tagRecv,MPI_COMM_WORLD,
967     I reqIds(nReqs), mpiRC )
968     ENDIF
969     C Wait for this threads north-south transactions to finish.
970     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
971    
972     #ifndef ALWAYS_USE_MPI
973     ENDIF
974     #endif
975     #endif /* ALLOW_USE_MPI */
976    
977     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
978     ENDIF
979     #endif
980     #endif /* ALLOW_SYNC_COMMUNICATION */
981    
982     C-- Do true shared-memory data exchanges
983     C Note: This section also does the overlap copies for serial
984     C code overlap copies.
985     C-- First make sure all threads have reached here
986     _BARRIER
987    
988     C-- Now do copies
989     C Notes:
990     C ======
991     C Here we copy from up to and including the overlap
992     C regions for both the X shift and the Y shift. This
993     C catches the "corners" of the data copied using messages
994     C in the "synchronous communication" section above.
995     C-- x-axis exchanges
996     bE = myBxLo(myThid)
997     IF ( bE .NE. 1 .OR.
998     & commW(myThid) .EQ. COMM_SHARED ) THEN
999     bW = bE-1
1000     IF ( bW .LT. 1 ) bW = nSx
1001     DO bj=myByLo(myThid),myByHi(myThid)
1002     DO J=1-OLy,sNy+OLy
1003     DO I=1,OLx
1004     phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj)
1005     phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj)
1006     ENDDO
1007     ENDDO
1008     ENDDO
1009     ENDIF
1010     DO bE=myBxLo(myThid)+1,myBxHi(myThid)
1011     bW = bE-1
1012     DO bj=myByLo(myThid),myByHi(myThid)
1013     DO J=1-OLy,sNy+OLy
1014     DO I=1,OLx
1015     phi(sNx+I,J,bW,bj)=phi( 1+I-1,J,bE,bj)
1016     phi(1-I ,J,bE,bj)=phi(sNx-I+1,J,bW,bj)
1017     ENDDO
1018     ENDDO
1019     ENDDO
1020     ENDDO
1021    
1022     C Need to ensure all threads have completed x-axis transfers before
1023     C we can do y-axis exchanges.
1024     _BARRIER
1025    
1026     C-- y-axis exchanges
1027     bN = myByLo(myThid)
1028     IF ( bN .NE. 1 .OR.
1029     & commS(myThid) .EQ. COMM_SHARED ) THEN
1030     bS = bN - 1
1031     IF ( bS .LT. 1 ) bS = nSy
1032     DO bi=myBxLo(myThid),myBxHi(myThid)
1033     DO J=1,OLy
1034     DO I=1-OLx,sNx+OLx
1035     phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS)
1036     phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN)
1037     ENDDO
1038     ENDDO
1039     ENDDO
1040     ENDIF
1041     DO bN=myByLo(myThid)+1,myByHi(myThid)
1042     bS = bN - 1
1043     DO bi=myBxLo(myThid),myBxHi(myThid)
1044     DO J=1,OLy
1045     DO I=1-OLx,sNx+OLx
1046     phi(I,1-J ,bi,bN)=phi(I,sNy-J+1,bi,bS)
1047     phi(I,sNy+J ,bi,bS)=phi(I,1+J-1 ,bi,bN)
1048     ENDDO
1049     ENDDO
1050     ENDDO
1051     ENDDO
1052    
1053     _BARRIER
1054    
1055     RETURN
1056     END
1057    
1058     CStartOfInterface
1059     SUBROUTINE EXCH_XYZ_R8(
1060     U phi,
1061     I myThid )
1062     C /==========================================================\
1063     C | SUBROUTINE EXCH_XYZ_R8 |
1064     C | o Handle exchanges for real*8, three-dimensional arrays. |
1065     C |==========================================================|
1066     C | Do true shared-memory data transfers and "messaging" |
1067     C | tranfers for blocking case of data transfers. |
1068     C | Applications call this routine using |
1069     C | CALL EXCH..( x, myThid ) |
1070     C | where x is a three-dimensional array with overlaps. |
1071     C | This routine does true-shared-memory copies for blocks |
1072     C | within a thread. It will also do MPI meesaging between |
1073     C | different processes. |
1074     C | Note: |
1075     C | ===== |
1076     C | If it is used, "asynchronous" messaging in which |
1077     C | communication overlaps computation is handled elsewhere |
1078     C | - see recv.F and send.F. |
1079     C | In practice MPI implementations may not be completely |
1080     C | thread safe. In principle this code is correct even for |
1081     C | mixed MPI and multi-threaded execution. |
1082     C | In multi-thread execution data managed by other threads |
1083     C | is only automatically visible to another thread if it |
1084     C | existed before the thread was created. This means that |
1085     C | for Fortran we declare arrays with overlap regions in |
1086     C | COMMON blocks. |
1087     C \==========================================================/
1088    
1089     C === Global data ===
1090     #include "SIZE.h"
1091     #include "EEPARAMS.h"
1092     #include "EESUPPORT.h"
1093    
1094     C === Routine arguments ===
1095     C phi - Array who's overlap regions are to be exchanged
1096     C myThid - My thread id.
1097     Real*8 phi(1-OLx:sNx+OLx,1-OLy:sNy+OLy,1:Nz,nSx,nSy)
1098     INTEGER myThid
1099     CEndOfInterface
1100    
1101     C === Local variables ===
1102     C bi,bj - Outer loop counters
1103     C I,J,K - Inner loop counters
1104     INTEGER bi, bj
1105     INTEGER I, J, K
1106     #ifdef ALLOW_USE_MPI
1107     C tagSend - Tags used to mark and select messages
1108     C tagRecv
1109     C nReqs - MPI request counter.
1110     C reqIds - Outstanding requests to wait on.
1111     C mpiRC - MPI return code
1112     C mpiStatArr - Multi-request status reporting array.
1113     C toPid - Proc. id sending to
1114     C fromPid - Proc. id receving from
1115     C elCount - Number of items to send or receive
1116     C elType - Datat type of elements
1117     INTEGER tagSend
1118     INTEGER tagRecv
1119     INTEGER nReqs
1120     INTEGER reqIds(4)
1121     INTEGER mpiRC
1122     INTEGER mpiStatArr(MPI_STATUS_SIZE,4)
1123     INTEGER toPid
1124     INTEGER fromPid
1125     INTEGER elCount
1126     INTEGER elType
1127     #endif /* ALLOW_USE_MPI */
1128     C bE, bW - Block index to east, west, north and
1129     C bN, bS south.
1130     INTEGER bW, bE, bS, bN
1131    
1132     C-- Do "message-passing" data exchanges
1133     #ifdef ALLOW_SYNC_COMMUNICATION
1134     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
1135     IF ( usingSyncMessages ) THEN
1136     #endif
1137    
1138     C-- MPI based data exchages
1139     #ifdef ALLOW_USE_MPI
1140     #ifndef ALWAYS_USE_MPI
1141     IF ( usingMPI ) THEN
1142     #endif
1143    
1144     C Here use the approach that all the X shifts are completed
1145     C before the Y shifts start. Then we can expand our Y shifts
1146     C to include X overlap regions. This means we don't have to
1147     C transfer corners separately.
1148     C Notes:
1149     C ======
1150     C 1. If we are only using messages in Y and using "true shared-memory"
1151     C in X then we rely on the shared-memory code to be implemented so that
1152     C corner values will be set correctly. The true shared-memory code
1153     C is in the block below - so be careful when altering it!
1154     C 2. In order to ensure we grab the right message we need to tag
1155     C messages. We tag the messages with the direction in which they
1156     C were sent out. Thus to receive from a processor to our west we
1157     C request the next message from that processor that was sent out
1158     C with a tag indicating an easterly send.
1159     C 3. We could implement a "safe" code here by having everyone
1160     C sync and then getting a particular thread ( say thread 1 ) to
1161     C do the MPI_Isend sequentially. In this mode the "communication"
1162     C thread would loop over nThreads and do MPI_Sendrecv for each
1163     C one. This could be more efficient on some platforms as the messages
1164     C might be sent as one large unit rather than by separate threads.
1165     C It would also be "thread-safe" for MPI implementations that have
1166     C multi-threading problems with things like request ids.
1167     C 4. We include overlap regions in both X and Y sends.
1168     C This means we can interchange the Y block with the X block.
1169     C Not sure whether this will ever be useful!
1170     C 5. The generation of a request handle by MPI_Isend and
1171     C MPI_Irecv ouught to involve some global state within MPI.
1172     C If this process is not thread safe it may be necessary to
1173     C enable a critical section around the MPI_Isend and
1174     C MPI_Irecv calls.
1175    
1176     C We need a barrier here to synchronise threads otherwise
1177     C one thread might declare it has is ready to send and receive
1178     C even though another thread is going to write to the first
1179     C threads send or receive region. This won't happen, however,
1180     C if we are careful about which data a thread updates - but at
1181     C the moment we aren't careful!
1182     _BARRIER
1183    
1184     C-- East-west messaging communication
1185 cnh 1.4 bE = myBxLo(myThid)
1186     bW = myBxHi(myThid)
1187 cnh 1.1 nReqs = 0
1188     IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN
1189     C My west face uses MPI. Get ready to receive data from
1190     C west and post that we are ready to send data to the west.
1191     C Data we receive has to come with tag of thread we know is
1192     C sending from our west, data we send is tagged with our thread id
1193     C and the send direction.
1194     nReqs = nReqs+1
1195     tagSend = mpiTagW*nThreads+myThid
1196     toPid = mpiPidW
1197     elCount = 1
1198     elType = mpiTypeXFaceThread_xyz_R8(myThid)
1199     CALL MPI_Isend(
1200     I phi(1,1-OLy,1,bE,myByLo(myThid)),
1201     I elCount, elType, toPid,
1202     I tagSend,MPI_COMM_WORLD,
1203     I reqIds(nReqs),mpiRC)
1204 cnh 1.4 ENDIF
1205     IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN
1206 cnh 1.1 nReqs = nReqs+1
1207 cnh 1.4 tagRecv = mpiTagW*nThreads+myThrE(myThid)
1208     fromPid = mpiPidE
1209 cnh 1.1 elCount = 1
1210     elType = mpiTypeXFaceThread_xyz_R8(myThid)
1211     CALL MPI_Irecv(
1212 cnh 1.4 U phi(sNx+1,1-OLy,1,bW,myByLo(myThid)),
1213 cnh 1.1 I elCount, elType, fromPid,
1214     I tagRecv,MPI_COMM_WORLD,
1215     I reqIds(nReqs),mpiRC)
1216     ENDIF
1217 cnh 1.4 CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
1218    
1219     nReqs = 0
1220 cnh 1.1 IF ( bW .EQ. nSx .AND. commE(MyThid) .EQ. COMM_MPI ) THEN
1221     C My east face uses MPI. Get ready to receive data from
1222     C east and post that we are ready to send data to the east.
1223     C Data we receive has to come with tag of thread we know is
1224     C sending from our west, data we send is tagged with our thread id.
1225     nReqs = nReqs+1
1226     tagSend = mpiTagE*nThreads+myThid
1227     toPid = mpiPidE
1228     elCount = 1
1229     elType = mpiTypeXFaceThread_xyz_R8(myThid)
1230     CALL MPI_Isend(
1231     I phi(sNx-OLx+1,1-OLy,1,bW,myByLo(myThid)),
1232     I elCount, elType, toPid,
1233     I tagSend,MPI_COMM_WORLD,
1234     I reqIds(nReqs),mpiRC)
1235 cnh 1.4 ENDIF
1236     IF ( bE .EQ. 1 .AND. commW(myThid) .EQ. COMM_MPI ) THEN
1237 cnh 1.1 nReqs = nReqs+1
1238 cnh 1.4 tagRecv = mpiTagE*nThreads+myThrW(myThid)
1239     fromPid = mpiPidW
1240 cnh 1.1 elCount = 1
1241     elType = mpiTypeXFaceThread_xyz_R8(myThid)
1242     CALL MPI_Irecv(
1243 cnh 1.4 U phi(1-OLx,1-OLy,1,bE,myByLo(myThid)),
1244 cnh 1.1 I elCount, elType, fromPid,
1245     I tagRecv,MPI_COMM_WORLD,
1246     I reqIds(nReqs),mpiRC)
1247     ENDIF
1248 cnh 1.4
1249 cnh 1.1 C Wait for this threads east-west transactions to finish before
1250     C posting north-south transactions. We have to do this so that
1251     C the north-south transactions will send out the correct overlap
1252     C region values into the corner sections of our neighbors.
1253     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
1254    
1255 cnh 1.4 CcnhDebugStarts
1256     C RETURN
1257     CcnhDebugEnds
1258    
1259    
1260 cnh 1.1 C-- North-south messaging communication
1261     nReqs = 0
1262     bN = myByLo(myTHid)
1263 cnh 1.4 bS = myByHi(myTHid)
1264    
1265 cnh 1.1 IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
1266     C My south face uses MPI. Get ready to receive data from
1267     C south and post that I am ready to send data to the south.
1268     nReqs = nReqs + 1
1269     tagSend = mpiTagS*nThreads+myThid
1270     toPid = mpiPidS
1271     elCount = 1
1272     elType = mpiTypeYFaceThread_xyz_R8(myThid)
1273     CALL MPI_Isend(
1274     I phi(1-OLx,1,1,myBxLo(myThid),bN),
1275     I elCount, elType, toPid,
1276     I tagSend,MPI_COMM_WORLD,
1277     I reqIds(nReqs), mpiRC )
1278 cnh 1.4 ENDIF
1279     IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN
1280 cnh 1.1 nReqs = nReqs + 1
1281 cnh 1.4 tagRecv = mpiTagS*nThreads+myThrN(myThid)
1282     fromPid = mpiPidN
1283 cnh 1.1 elCount = 1
1284     elType = mpiTypeYFaceThread_xyz_R8(myThid)
1285     CALL MPI_Irecv(
1286 cnh 1.4 I phi(1-OLx,sNy+1,1,myBxLo(myThid),bS),
1287 cnh 1.1 I elCount, elType, fromPid,
1288     I tagRecv,MPI_COMM_WORLD,
1289     I reqIds(nReqs), mpiRC )
1290     ENDIF
1291 cnh 1.4 C Wait for this threads north-south transactions to finish.
1292     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
1293 cnh 1.1 C
1294 cnh 1.4 nReqs = 0
1295     IF ( bS .EQ. nSy .AND. commN(myThid) .EQ. COMM_MPI ) THEN
1296 cnh 1.1 C My north face uses MPI. Get ready to receive data from
1297     C north and post that I am ready to send data to the north.
1298     nReqs = nReqs + 1
1299     tagSend = mpiTagN*nThreads+myThid
1300     toPid = mpiPidN
1301     elCount = 1
1302     elType = mpiTypeYFaceThread_xyz_R8(myThid)
1303     CALL MPI_Isend(
1304     I phi(1-OLx,sNy-OLy+1,1,myBxLo(myThid),bS),
1305     I elCount, elType, toPid,
1306     I tagSend,MPI_COMM_WORLD,
1307     I reqIds(nReqs), mpiRC )
1308 cnh 1.4 ENDIF
1309     IF ( bN .EQ. 1 .AND. commS(myThid) .EQ. COMM_MPI ) THEN
1310 cnh 1.1 nReqs = nReqs + 1
1311 cnh 1.4 tagRecv = mpiTagN*nThreads+myThrS(myThid)
1312     fromPid = mpiPidS
1313 cnh 1.1 elCount = 1
1314     elType = mpiTypeYFaceThread_xyz_R8(myThid)
1315     CALL MPI_Irecv(
1316 cnh 1.4 I phi(1-OLx,1-OLy,1,myBxLo(myThid),bN),
1317 cnh 1.1 I elCount, elType, fromPid,
1318     I tagRecv,MPI_COMM_WORLD,
1319     I reqIds(nReqs), mpiRC )
1320     ENDIF
1321     C Wait for this threads north-south transactions to finish.
1322     CALL MPI_Waitall( nReqs, reqIds, mpiStatArr, mpiRC )
1323    
1324     #ifndef ALWAYS_USE_MPI
1325     ENDIF
1326     #endif
1327     #endif /* ALLOW_USE_MPI */
1328    
1329     #ifndef ALWAYS_USE_SYNC_COMMUNICATION
1330     ENDIF
1331     #endif
1332     #endif /* ALLOW_SYNC_COMMUNICATION */
1333    
1334     C-- Do true shared-memory data exchanges
1335     C Note: This section also does the overlap copies for serial
1336     C code overlap copies.
1337     C-- First make sure all threads have reached here
1338     _BARRIER
1339    
1340     C-- Now do copies
1341     C Notes:
1342     C ======
1343     C Here we copy from up to and including the overlap
1344     C regions for both the X shift and the Y shift. This
1345     C catches the "corners" of the data copied using messages
1346     C in the "synchronous communication" section above.
1347     C-- x-axis exchanges
1348     bE = myBxLo(myThid)
1349     IF ( bE .NE. 1 .OR.
1350     & commW(myThid) .EQ. COMM_SHARED ) THEN
1351     bW = bE-1
1352     IF ( bW .LT. 1 ) bW = nSx
1353     DO bj=myByLo(myThid),myByHi(myThid)
1354     DO K=1,Nz
1355     DO J=1-OLy,sNy+OLy
1356     DO I=1,OLx
1357     phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj)
1358     phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj)
1359     ENDDO
1360     ENDDO
1361     ENDDO
1362     ENDDO
1363     ENDIF
1364     DO bE=myBxLo(myThid)+1,myBxHi(myThid)
1365     bW = bE-1
1366     DO bj=myByLo(myThid),myByHi(myThid)
1367     DO K=1,Nz
1368     DO J=1-OLy,sNy+OLy
1369     DO I=1,OLx
1370     phi(sNx+I,J,K,bW,bj)=phi( 1+I-1,J,K,bE,bj)
1371     phi(1-I ,J,K,bE,bj)=phi(sNx-I+1,J,K,bW,bj)
1372     ENDDO
1373     ENDDO
1374     ENDDO
1375     ENDDO
1376     ENDDO
1377    
1378     C Need to ensure all threads have completed x-axis transfers before
1379     C we can do y-axis exchanges.
1380     _BARRIER
1381    
1382     C-- y-axis exchanges
1383     bN = myByLo(myThid)
1384     IF ( bN .NE. 1 .OR.
1385     & commS(myThid) .EQ. COMM_SHARED ) THEN
1386     bS = bN - 1
1387     IF ( bS .LT. 1 ) bS = nSy
1388     DO bi=myBxLo(myThid),myBxHi(myThid)
1389     DO K=1,Nz
1390     DO J=1,OLy
1391     DO I=1-OLx,sNx+OLx
1392     phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS)
1393     phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN)
1394     ENDDO
1395     ENDDO
1396     ENDDO
1397     ENDDO
1398     ENDIF
1399     DO bN=myByLo(myThid)+1,myByHi(myThid)
1400     bS = bN - 1
1401     DO bi=myBxLo(myThid),myBxHi(myThid)
1402     DO K=1,Nz
1403     DO J=1,OLy
1404     DO I=1-OLx,sNx+OLx
1405     phi(I,1-J ,K,bi,bN)=phi(I,sNy-J+1,K,bi,bS)
1406     phi(I,sNy+J ,K,bi,bS)=phi(I,1+J-1 ,K,bi,bN)
1407     ENDDO
1408     ENDDO
1409     ENDDO
1410     ENDDO
1411     ENDDO
1412    
1413     _BARRIER
1414    
1415     RETURN
1416     END

  ViewVC Help
Powered by ViewVC 1.1.22