/[MITgcm]/MITgcm/eesupp/src/cumulsum_z_tile.F
ViewVC logotype

Contents of /MITgcm/eesupp/src/cumulsum_z_tile.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download)
Thu Sep 6 15:25:01 2012 UTC (12 years, 10 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint64y, checkpoint64x, checkpoint64z, checkpoint64q, checkpoint64p, checkpoint64s, checkpoint64r, checkpoint64u, checkpoint64t, checkpoint64w, checkpoint64v, checkpoint64i, checkpoint64h, checkpoint64k, checkpoint64j, checkpoint64m, checkpoint64l, checkpoint64o, checkpoint64n, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, checkpoint64, checkpoint65, checkpoint66g, checkpoint66f, checkpoint66e, checkpoint66d, checkpoint66c, checkpoint66b, checkpoint66a, checkpoint66o, checkpoint66n, checkpoint66m, checkpoint66l, checkpoint66k, checkpoint66j, checkpoint66i, checkpoint66h, checkpoint65z, checkpoint65x, checkpoint65y, checkpoint65r, checkpoint65s, checkpoint65p, checkpoint65q, checkpoint65v, checkpoint65w, checkpoint65t, checkpoint65u, checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65n, checkpoint65o, checkpoint65l, checkpoint65m, checkpoint65b, checkpoint65c, checkpoint65a, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e, HEAD
Changes since 1.2: +3 -8 lines
finish to remove ALWAYS_USE_MPI in source code that TAF does not see

1 C $Header: /u/gcmpack/MITgcm/eesupp/src/cumulsum_z_tile.F,v 1.2 2011/07/09 22:13:44 jmc Exp $
2 C $Name: $
3
4 #include "PACKAGES_CONFIG.h"
5 #include "CPP_EEOPTIONS.h"
6
7 C-- File cumulsum_z_tile.F: Routines that perform cumulated sum
8 C on a tiled array, corner grid-cell location
9 C Contents
10 C o CUMULSUM_Z_TILE_RL
11 C o CUMULSUM_Z_TILE_RS <- not yet coded
12
13 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
14 CBOP
15 C !ROUTINE: CUMULSUM_Z_TILE_RL
16
17 C !INTERFACE:
18 SUBROUTINE CUMULSUM_Z_TILE_RL(
19 O psiZ, psiLoc,
20 I dPsiX, dPsiY, myThid )
21
22 C !DESCRIPTION:
23 C *==========================================================*
24 C | SUBROUTINE CUMULSUM\_Z\_TILE\_RL
25 C | o Handle cumulated sum for _RL tile data.
26 C *==========================================================*
27 C | Cumulate sum on tiled array, corner grid-cell location:
28 C | Starts from 1rst tile and, going through all tiles & all
29 C | the processes, add increment in both directions
30 C *==========================================================*
31
32 C !USES:
33 IMPLICIT NONE
34
35 C == Global data ==
36 #include "SIZE.h"
37 #include "EEPARAMS.h"
38 #include "EESUPPORT.h"
39 #include "CUMULSUM.h"
40
41 C !INPUT/OUTPUT PARAMETERS:
42 C == Routine arguments ==
43 C psiZ :: results of cumulated sum, corresponds to tile South-East corner
44 C psiLoc :: cumulated sum at special locations
45 C dPsiX :: tile increment in X direction
46 C dPsiY :: tile increment in Y direction
47 C myThid :: my Thread Id. number
48 _RL psiZ (nSx,nSy)
49 _RL psiLoc(2)
50 _RL dPsiX (nSx,nSy)
51 _RL dPsiY (nSx,nSy)
52 INTEGER myThid
53
54 C !LOCAL VARIABLES:
55 #ifndef ALLOW_EXCH2
56 C == Local variables ==
57 C bi,bj :: tile indices
58 C- type declaration of: loc[1,2]Buf and shareBufCS[1,2]_R8 :
59 C all 4 needs to have the same length as MPI_DOUBLE_PRECISION
60 INTEGER bi,bj
61 INTEGER nf
62 #ifdef ALLOW_USE_MPI
63 INTEGER biG, bjG, npe, np1
64 INTEGER lbuf1, lbuf2, idest, itag, ready_to_receive
65 INTEGER istatus(MPI_STATUS_SIZE), ierr
66 Real*8 loc1Buf (nSx,nSy)
67 Real*8 loc2Buf(2,nSx,nSy)
68 Real*8 globalBuf(3,nSx*nPx,nSy*nPy)
69 #endif /* ALLOW_USE_MPI */
70 #endif /* ALLOW_EXCH2 */
71 CEOP
72
73 #ifdef ALLOW_EXCH2
74 CALL W2_CUMULSUM_Z_TILE_RL(
75 O psiZ, psiLoc,
76 I dPsiX, dPsiY, myThid )
77
78 #else /* ALLOW_EXCH2 */
79 C-- write input into shared-buffer array
80 DO bj = myByLo(myThid), myByHi(myThid)
81 DO bi = myBxLo(myThid), myBxHi(myThid)
82 shareBufCS2_R8(1,bi,bj) = dPsiX(bi,bj)
83 shareBufCS2_R8(2,bi,bj) = dPsiY(bi,bj)
84 ENDDO
85 ENDDO
86 psiLoc(1) = 0.
87 psiLoc(2) = 0.
88
89 C-- Master thread cannot start until everyone is ready:
90 CALL BAR2( myThid )
91 _BEGIN_MASTER( myThid )
92
93 #ifdef ALLOW_USE_MPI
94 IF ( usingMPI ) THEN
95
96 lbuf1 = nSx*nSy
97 lbuf2 = 2*lbuf1
98 idest = 0
99 itag = 0
100 ready_to_receive = 0
101
102 IF ( mpiMyId.NE.0 ) THEN
103
104 C-- All proceses except 0 wait to be polled then send local array
105 #ifndef DISABLE_MPI_READY_TO_RECEIVE
106 CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER,
107 & idest, itag, MPI_COMM_MODEL, istatus, ierr)
108 #endif
109 CALL MPI_SEND (shareBufCS2_R8, lbuf2, MPI_DOUBLE_PRECISION,
110 & idest, itag, MPI_COMM_MODEL, ierr)
111
112 C-- All proceses except 0 receive result from process 0
113 CALL MPI_RECV (shareBufCS1_R8, lbuf1, MPI_DOUBLE_PRECISION,
114 & idest, itag, MPI_COMM_MODEL, istatus, ierr)
115
116 ELSE
117
118 C-- Process 0 fills-in its local data
119 np1 = 1
120 DO bj=1,nSy
121 DO bi=1,nSx
122 biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi
123 bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj
124 globalBuf(1,biG,bjG) = shareBufCS2_R8(1,bi,bj)
125 globalBuf(2,biG,bjG) = shareBufCS2_R8(2,bi,bj)
126 ENDDO
127 ENDDO
128
129 C-- Process 0 polls and receives data from each process in turn
130 DO npe = 1, numberOfProcs-1
131 #ifndef DISABLE_MPI_READY_TO_RECEIVE
132 CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER,
133 & npe, itag, MPI_COMM_MODEL, ierr)
134 #endif
135 CALL MPI_RECV (loc2Buf, lbuf2, MPI_DOUBLE_PRECISION,
136 & npe, itag, MPI_COMM_MODEL, istatus, ierr)
137
138 C-- Process 0 gathers the local arrays into a global array.
139 np1 = npe + 1
140 DO bj=1,nSy
141 DO bi=1,nSx
142 biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi
143 bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj
144 globalBuf(1,biG,bjG) = loc2Buf(1,bi,bj)
145 globalBuf(2,biG,bjG) = loc2Buf(2,bi,bj)
146 ENDDO
147 ENDDO
148 ENDDO
149
150 C-- Cumulate Sum over all tiles:
151 globalBuf(3,1,1) = 0.
152 bj = 1
153 DO bi = 1,nSx*nPx-1
154 globalBuf(3,1+bi,bj) = globalBuf(3,bi,bj)
155 & + globalBuf(1,bi,bj)
156 ENDDO
157 DO bj = 1,nSy*nPy-1
158 DO bi = 1,nSx*nPx
159 globalBuf(3,bi,1+bj) = globalBuf(3,bi,bj)
160 & + globalBuf(2,bi,bj)
161 ENDDO
162 ENDDO
163
164 C-- Process 0 fills-in its local data
165 np1 = 1
166 DO bj=1,nSy
167 DO bi=1,nSx
168 biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi
169 bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj
170 shareBufCS1_R8(bi,bj) = globalBuf(3,biG,bjG)
171 ENDDO
172 ENDDO
173
174 C-- Process 0 sends result to all other processes
175 DO npe = 1, numberOfProcs-1
176 C- fill local array with relevant portion of global array
177 np1 = npe + 1
178 DO bj=1,nSy
179 DO bi=1,nSx
180 biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi
181 bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj
182 loc1Buf(bi,bj) = globalBuf(3,biG,bjG)
183 ENDDO
184 ENDDO
185 CALL MPI_SEND (loc1Buf, lbuf1, MPI_DOUBLE_PRECISION,
186 & npe, itag, MPI_COMM_MODEL, ierr)
187
188 ENDDO
189
190 ENDIF
191
192 ELSEIF (useCubedSphereExchange) THEN
193 #else /* not USE_MPI */
194 IF (useCubedSphereExchange) THEN
195 #endif /* ALLOW_USE_MPI */
196
197 C-- assume 1 tile / face, from bi=1 to 6, no MPI
198 shareBufCS1_R8(1,1) = 0.
199 bj = 1
200 DO bi = 1,nSx-1
201 nf = 1 + MOD(1+bi,2)
202 shareBufCS1_R8(1+bi,bj) = shareBufCS1_R8(bi,bj)
203 & + shareBufCS2_R8(nf,bi,bj)
204 ENDDO
205 C- fill in missing corner: 1 = North-West corner of face 1
206 C- 2 = South-East corner of face 2
207 bi = 1
208 psiLoc(1) = shareBufCS1_R8(bi,bj) + shareBufCS2_R8(2,bi,bj)
209 bi = MIN(2,nSx)
210 psiLoc(2) = shareBufCS1_R8(bi,bj) + shareBufCS2_R8(1,bi,bj)
211
212 ELSE
213
214 C-- Cumulate Sum over all tiles:
215 shareBufCS1_R8(1,1) = 0.
216 bj = 1
217 DO bi = 1,nSx-1
218 shareBufCS1_R8(1+bi,bj) = shareBufCS1_R8(bi,bj)
219 & + shareBufCS2_R8(1,bi,bj)
220 ENDDO
221 DO bj = 1,nSy-1
222 DO bi = 1,nSx
223 shareBufCS1_R8(bi,1+bj) = shareBufCS1_R8(bi,bj)
224 & + shareBufCS2_R8(2,bi,bj)
225 ENDDO
226 ENDDO
227
228 ENDIF
229
230 _END_MASTER( myThid )
231 C-- Everyone wait for Master thread to be ready
232 CALL BAR2( myThid )
233
234 C-- set result for every threads
235 DO bj = myByLo(myThid), myByHi(myThid)
236 DO bi = myBxLo(myThid), myBxHi(myThid)
237 psiZ(bi,bj) = shareBufCS1_R8(bi,bj)
238 ENDDO
239 ENDDO
240
241 #endif /* ALLOW_EXCH2 */
242 RETURN
243 END

  ViewVC Help
Powered by ViewVC 1.1.22