1 |
C $Header: /u/gcmpack/MITgcm/pkg/flt/exch_send_put_vec.F,v 1.4 2009/01/04 00:58:23 jmc Exp $ |
2 |
C $Name: $ |
3 |
|
4 |
#include "FLT_OPTIONS.h" |
5 |
|
6 |
C-- Contents |
7 |
C-- o EXCH_RL_SEND_PUT_VEC_X |
8 |
C-- o EXCH_RL_SEND_PUT_VEC_Y |
9 |
|
10 |
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
11 |
|
12 |
SUBROUTINE EXCH_RL_SEND_PUT_VEC_X( |
13 |
I arrayE, arrayW, |
14 |
O bufRecE, bufRecW, |
15 |
I myd1, myThid ) |
16 |
C /==========================================================\ |
17 |
C | SUBROUTINE EXCH_RL_SEND_PUT_X | |
18 |
C | o "Send" or "put" X edges for RL array. | |
19 |
C |==========================================================| |
20 |
C | Routine that invokes actual message passing send or | |
21 |
C | direct "put" of data to update X faces of an XY[R] array.| |
22 |
C \==========================================================/ |
23 |
IMPLICIT NONE |
24 |
|
25 |
C == Global variables == |
26 |
#include "SIZE.h" |
27 |
#include "EEPARAMS.h" |
28 |
#include "EESUPPORT.h" |
29 |
#include "FLT.h" |
30 |
#include "EXCH.h" |
31 |
C == Routine arguments == |
32 |
C arrayE - Array to be exchanged. |
33 |
C arrayW |
34 |
C myd1 - sizes. |
35 |
C myThid - Thread number of this instance of S/R EXCH... |
36 |
INTEGER myd1 |
37 |
_RL arrayE(myd1,nSx,nSy), arrayW(myd1,nSx,nSy) |
38 |
_RL bufRecE(myd1,nSx,nSy), bufRecW(myd1,nSx,nSy) |
39 |
INTEGER myThid |
40 |
CEndOfInterface |
41 |
|
42 |
#ifdef ALLOW_FLT |
43 |
C == Local variables == |
44 |
C I, J - Loop counters and extents |
45 |
C bi, bj |
46 |
C biW, bjW - West tile indices |
47 |
C biE, bjE - East tile indices |
48 |
C theProc, theTag, theType, - Variables used in message building |
49 |
C theSize |
50 |
C westCommMode - Working variables holding type |
51 |
C eastCommMode of communication a particular |
52 |
C tile face uses. |
53 |
INTEGER I |
54 |
INTEGER bi, bj, biW, bjW, biE, bjE |
55 |
INTEGER westCommMode |
56 |
INTEGER eastCommMode |
57 |
|
58 |
#ifdef ALLOW_USE_MPI |
59 |
INTEGER theProc, theTag, theType, theSize, mpiRc |
60 |
#endif |
61 |
C-- Write data to exchange buffer |
62 |
C Various actions are possible depending on the communication mode |
63 |
C as follows: |
64 |
C Mode Action |
65 |
C -------- --------------------------- |
66 |
C COMM_NONE Do nothing |
67 |
C |
68 |
C COMM_MSG Message passing communication ( e.g. MPI ) |
69 |
C Fill west send buffer from this tile. |
70 |
C Send data with tag identifying tile and direction. |
71 |
C Fill east send buffer from this tile. |
72 |
C Send data with tag identifying tile and direction. |
73 |
C |
74 |
C COMM_PUT "Put" communication ( UMP_, shmemput, etc... ) |
75 |
C Fill east receive buffer of west-neighbor tile |
76 |
C Fill west receive buffer of east-neighbor tile |
77 |
C Sync. memory |
78 |
C Write data-ready Ack for east edge of west-neighbor |
79 |
C tile |
80 |
C Write data-ready Ack for west edge of east-neighbor |
81 |
C tile |
82 |
C Sync. memory |
83 |
C |
84 |
DO bj=myByLo(myThid),myByHi(myThid) |
85 |
DO bi=myBxLo(myThid),myBxHi(myThid) |
86 |
|
87 |
westCommMode = _tileCommModeW(bi,bj) |
88 |
eastCommMode = _tileCommModeE(bi,bj) |
89 |
biE = _tileBiE(bi,bj) |
90 |
bjE = _tileBjE(bi,bj) |
91 |
biW = _tileBiW(bi,bj) |
92 |
bjW = _tileBjW(bi,bj) |
93 |
|
94 |
C o Send or Put west edge |
95 |
IF ( westCommMode .EQ. COMM_MSG ) THEN |
96 |
C Send the data |
97 |
#ifdef ALLOW_USE_MPI |
98 |
#ifndef ALWAYS_USE_MPI |
99 |
IF ( usingMPI ) THEN |
100 |
#endif |
101 |
theProc = tilePidW(bi,bj) |
102 |
theTag = _tileTagSendW(bi,bj) |
103 |
theSize = myd1 |
104 |
theType = MPI_DOUBLE_PRECISION |
105 |
c exchVReqsX(1,bi,bj) = exchVReqsX(1,bi,bj)+1 |
106 |
exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 |
107 |
CALL MPI_Isend(arrayW(1,bi,bj), theSize, theType, |
108 |
& theProc, theTag, MPI_COMM_MODEL, |
109 |
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc) |
110 |
c & exchReqVIdX(exchVReqsX(1,bi,bj),1,bi,bj), mpiRc) |
111 |
#ifndef ALWAYS_USE_MPI |
112 |
ENDIF |
113 |
#endif |
114 |
#endif /* ALLOW_USE_MPI */ |
115 |
eastRecvAck(1,biW,bjW) = 1. |
116 |
ELSEIF ( westCommMode .EQ. COMM_PUT ) THEN |
117 |
c write(0,*) 'SEND_PUT_VEC_X: copy E:',biW,bjW,' <- W:',bi,bj |
118 |
DO I=1,myd1 |
119 |
bufRecE(I,biW,bjW) = arrayW(I,bi,bj) |
120 |
ENDDO |
121 |
ELSEIF ( westCommMode .NE. COMM_NONE ) THEN |
122 |
STOP ' S/R EXCH: Invalid commW mode.' |
123 |
ENDIF |
124 |
|
125 |
C o Send or Put east edge |
126 |
IF ( eastCommMode .EQ. COMM_MSG ) THEN |
127 |
C Send the data |
128 |
#ifdef ALLOW_USE_MPI |
129 |
#ifndef ALWAYS_USE_MPI |
130 |
IF ( usingMPI ) THEN |
131 |
#endif |
132 |
theProc = tilePidE(bi,bj) |
133 |
theTag = _tileTagSendE(bi,bj) |
134 |
theSize = myd1 |
135 |
theType = MPI_DOUBLE_PRECISION |
136 |
c exchVReqsX(1,bi,bj) = exchVReqsX(1,bi,bj)+1 |
137 |
exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 |
138 |
c if (theProc .eq. 2 .or. theProc .eq. 4) then |
139 |
c if (arrayE(1,bi,bj) .ne. 0.) then |
140 |
c write(errormessageunit,*) 'qq1y: ',myprocid, |
141 |
c & theProc,theTag,theSize,(arrayE(i,bi,bj),i=1,32) |
142 |
c endif |
143 |
c endif |
144 |
CALL MPI_Isend(arrayE(1,bi,bj), theSize, theType, |
145 |
& theProc, theTag, MPI_COMM_MODEL, |
146 |
& exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc) |
147 |
c & exchReqVIdX(exchVReqsX(1,bi,bj),1,bi,bj), mpiRc) |
148 |
#ifndef ALWAYS_USE_MPI |
149 |
ENDIF |
150 |
#endif |
151 |
#endif /* ALLOW_USE_MPI */ |
152 |
westRecvAck(1,biE,bjE) = 1. |
153 |
ELSEIF ( eastCommMode .EQ. COMM_PUT ) THEN |
154 |
c write(0,*) 'SEND_PUT_VEC_X: copy W:',biE,bjE,' <- E:',bi,bj |
155 |
DO I=1,myd1 |
156 |
bufRecW(I,biE,bjE) = arrayE(I,bi,bj) |
157 |
ENDDO |
158 |
ELSEIF ( eastCommMode .NE. COMM_NONE ) THEN |
159 |
STOP ' S/R EXCH: Invalid commE mode.' |
160 |
ENDIF |
161 |
|
162 |
ENDDO |
163 |
ENDDO |
164 |
|
165 |
C-- Signal completetion ( making sure system-wide memory state is |
166 |
C-- consistent ). |
167 |
|
168 |
C ** NOTE ** We are relying on being able to produce strong-ordered |
169 |
C memory semantics here. In other words we assume that there is a |
170 |
C mechanism which can ensure that by the time the Ack is seen the |
171 |
C overlap region data that will be exchanged is up to date. |
172 |
IF ( exchNeedsMemSync ) CALL MEMSYNC |
173 |
|
174 |
DO bj=myByLo(myThid),myByHi(myThid) |
175 |
DO bi=myBxLo(myThid),myBxHi(myThid) |
176 |
biE = _tileBiE(bi,bj) |
177 |
bjE = _tileBjE(bi,bj) |
178 |
biW = _tileBiW(bi,bj) |
179 |
bjW = _tileBjW(bi,bj) |
180 |
westCommMode = _tileCommModeW(bi,bj) |
181 |
eastCommMode = _tileCommModeE(bi,bj) |
182 |
IF ( westCommMode .EQ. COMM_PUT ) eastRecvAck(1,biW,bjW) = 1. |
183 |
IF ( eastCommMode .EQ. COMM_PUT ) westRecvAck(1,biE,bjE) = 1. |
184 |
IF ( westCommMode .EQ. COMM_GET ) eastRecvAck(1,biW,bjW) = 1. |
185 |
IF ( eastCommMode .EQ. COMM_GET ) westRecvAck(1,biE,bjE) = 1. |
186 |
ENDDO |
187 |
ENDDO |
188 |
|
189 |
C-- Make sure "ack" setting is seen system-wide. |
190 |
C Here strong-ordering is not an issue but we want to make |
191 |
C sure that processes that might spin on the above Ack settings |
192 |
C will see the setting. |
193 |
C ** NOTE ** On some machines we wont spin on the Ack setting |
194 |
C ( particularly the T90 ), instead we will use s system barrier. |
195 |
C On the T90 the system barrier is very fast and switches out the |
196 |
C thread while it waits. On most machines the system barrier |
197 |
C is much too slow and if we own the machine and have one thread |
198 |
C per process preemption is not a problem. |
199 |
IF ( exchNeedsMemSync ) CALL MEMSYNC |
200 |
|
201 |
#endif /* ALLOW_FLT */ |
202 |
RETURN |
203 |
END |
204 |
|
205 |
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
206 |
|
207 |
SUBROUTINE EXCH_RL_SEND_PUT_VEC_Y( |
208 |
I arrayN, arrayS, |
209 |
O bufRecN, bufRecS, |
210 |
I myd1, myThid ) |
211 |
C /==========================================================\ |
212 |
C | SUBROUTINE EXCH_RL_SEND_PUT_Y | |
213 |
C | o "Send" or "put" Y edges for RL array. | |
214 |
C |==========================================================| |
215 |
C | Routine that invokes actual message passing send or | |
216 |
C | direct "put" of data to update X faces of an XY[R] array.| |
217 |
C \==========================================================/ |
218 |
IMPLICIT NONE |
219 |
|
220 |
C == Global variables == |
221 |
#include "SIZE.h" |
222 |
#include "EEPARAMS.h" |
223 |
#include "EESUPPORT.h" |
224 |
#include "FLT.h" |
225 |
#include "EXCH.h" |
226 |
C == Routine arguments == |
227 |
C arrayN - Array to be exchanged. |
228 |
C arrayS |
229 |
C myd1 - sizes. |
230 |
C myThid - Thread number of this instance of S/R EXCH... |
231 |
INTEGER myd1 |
232 |
_RL arrayN(myd1,nSx,nSy), arrayS(myd1,nSx,nSy) |
233 |
_RL bufRecN(myd1,nSx,nSy), bufRecS(myd1,nSx,nSy) |
234 |
INTEGER myThid |
235 |
CEndOfInterface |
236 |
|
237 |
#ifdef ALLOW_FLT |
238 |
C == Local variables == |
239 |
C I, J - Loop counters and extents |
240 |
C bi, bj |
241 |
C biN, bjN - North tile indices |
242 |
C biS, bjS - South tile indices |
243 |
C theProc, theTag, theType, - Variables used in message building |
244 |
C theSize |
245 |
C westCommMode - Working variables holding type |
246 |
C eastCommMode of communication a particular |
247 |
C tile face uses. |
248 |
INTEGER I |
249 |
INTEGER bi, bj, biS, bjS, biN, bjN |
250 |
INTEGER southCommMode |
251 |
INTEGER northCommMode |
252 |
|
253 |
#ifdef ALLOW_USE_MPI |
254 |
INTEGER theProc, theTag, theType, theSize, mpiRc |
255 |
#endif |
256 |
C-- Write data to exchange buffer |
257 |
C Various actions are possible depending on the communication mode |
258 |
C as follows: |
259 |
C Mode Action |
260 |
C -------- --------------------------- |
261 |
C COMM_NONE Do nothing |
262 |
C |
263 |
C COMM_MSG Message passing communication ( e.g. MPI ) |
264 |
C Fill west send buffer from this tile. |
265 |
C Send data with tag identifying tile and direction. |
266 |
C Fill east send buffer from this tile. |
267 |
C Send data with tag identifying tile and direction. |
268 |
C |
269 |
C COMM_PUT "Put" communication ( UMP_, shmemput, etc... ) |
270 |
C Fill east receive buffer of south-neighbor tile |
271 |
C Fill west receive buffer of north-neighbor tile |
272 |
C Sync. memory |
273 |
C Write data-ready Ack for east edge of south-neighbor |
274 |
C tile |
275 |
C Write data-ready Ack for west edge of north-neighbor |
276 |
C tile |
277 |
C Sync. memory |
278 |
C |
279 |
DO bj=myByLo(myThid),myByHi(myThid) |
280 |
DO bi=myBxLo(myThid),myBxHi(myThid) |
281 |
|
282 |
southCommMode = _tileCommModeS(bi,bj) |
283 |
northCommMode = _tileCommModeN(bi,bj) |
284 |
biN = _tileBiN(bi,bj) |
285 |
bjN = _tileBjN(bi,bj) |
286 |
biS = _tileBiS(bi,bj) |
287 |
bjS = _tileBjS(bi,bj) |
288 |
|
289 |
C o Send or Put south edge |
290 |
IF ( southCommMode .EQ. COMM_MSG ) THEN |
291 |
C Send the data |
292 |
#ifdef ALLOW_USE_MPI |
293 |
#ifndef ALWAYS_USE_MPI |
294 |
IF ( usingMPI ) THEN |
295 |
#endif |
296 |
theProc = tilePidS(bi,bj) |
297 |
theTag = _tileTagSendS(bi,bj) |
298 |
theSize = myd1 |
299 |
theType = MPI_DOUBLE_PRECISION |
300 |
c exchVReqsY(1,bi,bj) = exchVReqsY(1,bi,bj)+1 |
301 |
exchNReqsY(1,bi,bj) = exchNReqsY(1,bi,bj)+1 |
302 |
CALL MPI_Isend(arrayS(1,bi,bj), theSize, theType, |
303 |
& theProc, theTag, MPI_COMM_MODEL, |
304 |
& exchReqIdY(exchNReqsY(1,bi,bj),1,bi,bj), mpiRc) |
305 |
c & exchReqVIdY(exchVReqsY(1,bi,bj),1,bi,bj), mpiRc) |
306 |
#ifndef ALWAYS_USE_MPI |
307 |
ENDIF |
308 |
#endif |
309 |
#endif /* ALLOW_USE_MPI */ |
310 |
northRecvAck(1,biS,bjS) = 1. |
311 |
ELSEIF ( southCommMode .EQ. COMM_PUT ) THEN |
312 |
c write(0,*) 'SEND_PUT_VEC_Y: copy N:',biS,bjS,' <- S:',bi,bj |
313 |
DO I=1,myd1 |
314 |
bufRecN(I,biS,bjS) = arrayS(I,bi,bj) |
315 |
ENDDO |
316 |
ELSEIF ( southCommMode .NE. COMM_NONE ) THEN |
317 |
STOP ' S/R EXCH: Invalid commS mode.' |
318 |
ENDIF |
319 |
|
320 |
C o Send or Put north edge |
321 |
IF ( northCommMode .EQ. COMM_MSG ) THEN |
322 |
C Send the data |
323 |
#ifdef ALLOW_USE_MPI |
324 |
#ifndef ALWAYS_USE_MPI |
325 |
IF ( usingMPI ) THEN |
326 |
#endif |
327 |
theProc = tilePidN(bi,bj) |
328 |
theTag = _tileTagSendN(bi,bj) |
329 |
theSize = myd1 |
330 |
theType = MPI_DOUBLE_PRECISION |
331 |
c exchVReqsY(1,bi,bj) = exchVReqsY(1,bi,bj)+1 |
332 |
exchNReqsY(1,bi,bj) = exchNReqsY(1,bi,bj)+1 |
333 |
CALL MPI_Isend(arrayN(1,bi,bj), theSize, theType, |
334 |
& theProc, theTag, MPI_COMM_MODEL, |
335 |
& exchReqIdY(exchNReqsY(1,bi,bj),1,bi,bj), mpiRc) |
336 |
c & exchReqVIdY(exchVReqsY(1,bi,bj),1,bi,bj), mpiRc) |
337 |
#ifndef ALWAYS_USE_MPI |
338 |
ENDIF |
339 |
#endif |
340 |
#endif /* ALLOW_USE_MPI */ |
341 |
southRecvAck(1,biN,bjN) = 1. |
342 |
ELSEIF ( northCommMode .EQ. COMM_PUT ) THEN |
343 |
c write(0,*) 'SEND_PUT_VEC_Y: copy S:',biN,bjN,' <- N:',bi,bj |
344 |
DO I=1,myd1 |
345 |
bufRecS(I,biN,bjN) = arrayN(I,bi,bj) |
346 |
ENDDO |
347 |
ELSEIF ( northCommMode .NE. COMM_NONE ) THEN |
348 |
STOP ' S/R EXCH: Invalid commN mode.' |
349 |
ENDIF |
350 |
|
351 |
ENDDO |
352 |
ENDDO |
353 |
|
354 |
C-- Signal completetion ( making sure system-wide memory state is |
355 |
C-- consistent ). |
356 |
|
357 |
C ** NOTE ** We are relying on being able to produce strong-ordered |
358 |
C memory semantics here. In other words we assume that there is a |
359 |
C mechanism which can ensure that by the time the Ack is seen the |
360 |
C overlap region data that will be exchanged is up to date. |
361 |
IF ( exchNeedsMemSync ) CALL MEMSYNC |
362 |
|
363 |
DO bj=myByLo(myThid),myByHi(myThid) |
364 |
DO bi=myBxLo(myThid),myBxHi(myThid) |
365 |
biN = _tileBiN(bi,bj) |
366 |
bjN = _tileBjN(bi,bj) |
367 |
biS = _tileBiS(bi,bj) |
368 |
bjS = _tileBjS(bi,bj) |
369 |
southCommMode = _tileCommModeE(bi,bj) |
370 |
northCommMode = _tileCommModeN(bi,bj) |
371 |
IF ( southCommMode .EQ. COMM_PUT ) northRecvAck(1,biS,bjS) = 1. |
372 |
IF ( northCommMode .EQ. COMM_PUT ) southRecvAck(1,biN,bjN) = 1. |
373 |
IF ( southCommMode .EQ. COMM_GET ) northRecvAck(1,biS,bjS) = 1. |
374 |
IF ( northCommMode .EQ. COMM_GET ) southRecvAck(1,biN,bjN) = 1. |
375 |
ENDDO |
376 |
ENDDO |
377 |
|
378 |
C-- Make sure "ack" setting is seen system-wide. |
379 |
C Here strong-ordering is not an issue but we want to make |
380 |
C sure that processes that might spin on the above Ack settings |
381 |
C will see the setting. |
382 |
C ** NOTE ** On some machines we wont spin on the Ack setting |
383 |
C ( particularly the T90 ), instead we will use s system barrier. |
384 |
C On the T90 the system barrier is very fast and switches out the |
385 |
C thread while it waits. On most machines the system barrier |
386 |
C is much too slow and if we own the machine and have one thread |
387 |
C per process preemption is not a problem. |
388 |
IF ( exchNeedsMemSync ) CALL MEMSYNC |
389 |
|
390 |
#endif /* ALLOW_FLT */ |
391 |
RETURN |
392 |
END |