1 |
jmc |
1.3 |
C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_tile.F,v 1.2 2009/06/07 15:19:41 jmc Exp $ |
2 |
jmc |
1.1 |
C $Name: $ |
3 |
|
|
|
4 |
jmc |
1.3 |
#include "CPP_EEOPTIONS.h" |
5 |
|
|
|
6 |
jmc |
1.1 |
C-- File global_sum_tile.F: Routines that perform global sum |
7 |
|
|
C on a tile array |
8 |
|
|
C Contents |
9 |
jmc |
1.3 |
C o GLOBAL_SUM_TILE_RL |
10 |
|
|
C o GLOBAL_SUM_TILE_RS <- not yet coded |
11 |
jmc |
1.1 |
|
12 |
jmc |
1.3 |
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
13 |
jmc |
1.1 |
CBOP |
14 |
|
|
C !ROUTINE: GLOBAL_SUM_TILE_RL |
15 |
|
|
|
16 |
|
|
C !INTERFACE: |
17 |
|
|
SUBROUTINE GLOBAL_SUM_TILE_RL( |
18 |
|
|
I phiTile, |
19 |
|
|
O sumPhi, |
20 |
|
|
I myThid ) |
21 |
jmc |
1.3 |
|
22 |
jmc |
1.1 |
C !DESCRIPTION: |
23 |
|
|
C *==========================================================* |
24 |
|
|
C | SUBROUTINE GLOBAL\_SUM\_TILE\_RL |
25 |
|
|
C | o Handle sum for _RL data. |
26 |
|
|
C *==========================================================* |
27 |
|
|
C | Apply sum on an array of one value per tile |
28 |
|
|
C | and operate over all tiles & all the processes. |
29 |
|
|
C *==========================================================* |
30 |
|
|
|
31 |
|
|
C !USES: |
32 |
jmc |
1.3 |
IMPLICIT NONE |
33 |
|
|
|
34 |
jmc |
1.1 |
C == Global data == |
35 |
|
|
#include "SIZE.h" |
36 |
|
|
#include "EEPARAMS.h" |
37 |
|
|
#include "EESUPPORT.h" |
38 |
|
|
#include "GLOBAL_SUM.h" |
39 |
|
|
|
40 |
|
|
C !INPUT/OUTPUT PARAMETERS: |
41 |
|
|
C == Routine arguments == |
42 |
|
|
C phiTile :: Input array with one value per tile |
43 |
|
|
C sumPhi :: Result of sum. |
44 |
|
|
C myThid :: My thread id. |
45 |
|
|
_RL phiTile(nSx,nSy) |
46 |
|
|
_RL sumPhi |
47 |
|
|
INTEGER myThid |
48 |
|
|
|
49 |
|
|
C !LOCAL VARIABLES: |
50 |
|
|
C == Local variables == |
51 |
|
|
C bi,bj :: Loop counters |
52 |
|
|
C mpiRC - MPI return code |
53 |
|
|
C- type declaration of: sumMyPr, sumAllP, localBuf and shareBufGSR8 : |
54 |
|
|
C all 4 needs to have the same length as MPI_DOUBLE_PRECISION |
55 |
|
|
INTEGER bi,bj |
56 |
|
|
#ifdef ALLOW_USE_MPI |
57 |
|
|
#ifdef GLOBAL_SUM_SEND_RECV |
58 |
jmc |
1.3 |
INTEGER biG, bjG, npe, np1 |
59 |
jmc |
1.1 |
INTEGER lbuff, idest, itag, ready_to_receive |
60 |
|
|
INTEGER istatus(MPI_STATUS_SIZE), ierr |
61 |
|
|
Real*8 localBuf (nSx,nSy) |
62 |
|
|
Real*8 globalBuf(nSx*nPx,nSy*nPy) |
63 |
|
|
#else |
64 |
|
|
INTEGER mpiRC |
65 |
|
|
Real*8 sumMyPr |
66 |
|
|
#endif |
67 |
|
|
#else /* ALLOW_USE_MPI */ |
68 |
|
|
Real*8 sumMyPr |
69 |
|
|
#endif /* ALLOW_USE_MPI */ |
70 |
|
|
Real*8 sumAllP |
71 |
|
|
CEOP |
72 |
|
|
|
73 |
jmc |
1.2 |
C this barrier is not necessary: |
74 |
|
|
c CALL BAR2( myThid ) |
75 |
jmc |
1.1 |
|
76 |
|
|
C-- write local sum into shared-buffer array |
77 |
|
|
DO bj = myByLo(myThid), myByHi(myThid) |
78 |
|
|
DO bi = myBxLo(myThid), myBxHi(myThid) |
79 |
|
|
shareBufGSR8(bi,bj) = phiTile(bi,bj) |
80 |
|
|
ENDDO |
81 |
|
|
ENDDO |
82 |
|
|
|
83 |
|
|
C-- Master thread cannot start until everyone is ready: |
84 |
|
|
CALL BAR2( myThid ) |
85 |
|
|
_BEGIN_MASTER( myThid ) |
86 |
|
|
|
87 |
|
|
#if (defined (GLOBAL_SUM_SEND_RECV) && defined (ALLOW_USE_MPI) ) |
88 |
|
|
|
89 |
|
|
lbuff = nSx*nSy |
90 |
|
|
idest = 0 |
91 |
|
|
itag = 0 |
92 |
|
|
ready_to_receive = 0 |
93 |
|
|
|
94 |
|
|
IF ( mpiMyId.NE.0 ) THEN |
95 |
|
|
|
96 |
|
|
C-- All proceses except 0 wait to be polled then send local array |
97 |
|
|
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
98 |
|
|
CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER, |
99 |
|
|
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
100 |
|
|
#endif |
101 |
|
|
CALL MPI_SEND (shareBufGSR8, lbuff, MPI_DOUBLE_PRECISION, |
102 |
|
|
& idest, itag, MPI_COMM_MODEL, ierr) |
103 |
|
|
|
104 |
|
|
C-- All proceses except 0 receive result from process 0 |
105 |
|
|
CALL MPI_RECV (sumAllP, 1, MPI_DOUBLE_PRECISION, |
106 |
|
|
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
107 |
|
|
|
108 |
|
|
ELSE |
109 |
|
|
|
110 |
|
|
C-- Process 0 fills-in its local data |
111 |
jmc |
1.3 |
np1 = 1 |
112 |
jmc |
1.1 |
DO bj=1,nSy |
113 |
|
|
DO bi=1,nSx |
114 |
jmc |
1.3 |
biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi |
115 |
|
|
bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj |
116 |
jmc |
1.1 |
globalBuf(biG,bjG) = shareBufGSR8(bi,bj) |
117 |
|
|
ENDDO |
118 |
|
|
ENDDO |
119 |
|
|
|
120 |
|
|
C-- Process 0 polls and receives data from each process in turn |
121 |
|
|
DO npe = 1, numberOfProcs-1 |
122 |
|
|
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
123 |
|
|
CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER, |
124 |
|
|
& npe, itag, MPI_COMM_MODEL, ierr) |
125 |
|
|
#endif |
126 |
|
|
CALL MPI_RECV (localBuf, lbuff, MPI_DOUBLE_PRECISION, |
127 |
|
|
& npe, itag, MPI_COMM_MODEL, istatus, ierr) |
128 |
|
|
|
129 |
|
|
C-- Process 0 gathers the local arrays into a global array. |
130 |
jmc |
1.3 |
np1 = npe + 1 |
131 |
jmc |
1.1 |
DO bj=1,nSy |
132 |
|
|
DO bi=1,nSx |
133 |
jmc |
1.3 |
biG = (mpi_myXGlobalLo(np1)-1)/sNx+bi |
134 |
|
|
bjG = (mpi_myYGlobalLo(np1)-1)/sNy+bj |
135 |
jmc |
1.1 |
globalBuf(biG,bjG) = localBuf(bi,bj) |
136 |
|
|
ENDDO |
137 |
|
|
ENDDO |
138 |
|
|
ENDDO |
139 |
|
|
|
140 |
|
|
C-- Sum over all tiles: |
141 |
|
|
sumAllP = 0. |
142 |
|
|
DO bjG = 1,nSy*nPy |
143 |
|
|
DO biG = 1,nSx*nPx |
144 |
|
|
sumAllP = sumAllP + globalBuf(biG,bjG) |
145 |
|
|
ENDDO |
146 |
|
|
ENDDO |
147 |
|
|
|
148 |
|
|
C-- Process 0 sends result to all other processes |
149 |
|
|
lbuff = 1 |
150 |
|
|
DO npe = 1, numberOfProcs-1 |
151 |
|
|
CALL MPI_SEND (sumAllP, 1, MPI_DOUBLE_PRECISION, |
152 |
|
|
& npe, itag, MPI_COMM_MODEL, ierr) |
153 |
|
|
ENDDO |
154 |
|
|
|
155 |
|
|
|
156 |
|
|
ENDIF |
157 |
|
|
|
158 |
|
|
#else /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
159 |
|
|
|
160 |
|
|
C-- Sum over all tiles (of the same process) first |
161 |
|
|
sumMyPr = 0. |
162 |
|
|
DO bj = 1,nSy |
163 |
|
|
DO bi = 1,nSx |
164 |
|
|
sumMyPr = sumMyPr + shareBufGSR8(bi,bj) |
165 |
|
|
ENDDO |
166 |
|
|
ENDDO |
167 |
|
|
|
168 |
|
|
C in case MPI is not used: |
169 |
|
|
sumAllP = sumMyPr |
170 |
|
|
|
171 |
|
|
#ifdef ALLOW_USE_MPI |
172 |
|
|
#ifndef ALWAYS_USE_MPI |
173 |
|
|
IF ( usingMPI ) THEN |
174 |
|
|
#endif |
175 |
|
|
CALL MPI_Allreduce(sumMyPr,sumAllP,1,MPI_DOUBLE_PRECISION, |
176 |
|
|
& MPI_SUM,MPI_COMM_MODEL,mpiRC) |
177 |
|
|
#ifndef ALWAYS_USE_MPI |
178 |
|
|
ENDIF |
179 |
|
|
#endif |
180 |
|
|
#endif /* ALLOW_USE_MPI */ |
181 |
|
|
|
182 |
|
|
#endif /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
183 |
|
|
|
184 |
|
|
C-- Write solution to shared buffer (all threads can see it) |
185 |
jmc |
1.3 |
c shareBufGSR8(1,1) = sumAllP |
186 |
|
|
phiGSR8(1,0) = sumAllP |
187 |
jmc |
1.1 |
|
188 |
|
|
_END_MASTER( myThid ) |
189 |
|
|
C-- Everyone wait for Master thread to be ready |
190 |
|
|
CALL BAR2( myThid ) |
191 |
|
|
|
192 |
|
|
C-- set result for every threads |
193 |
jmc |
1.3 |
c sumPhi = shareBufGSR8(1,1) |
194 |
|
|
sumPhi = phiGSR8(1,0) |
195 |
jmc |
1.1 |
|
196 |
jmc |
1.3 |
C-- A barrier was needed here to prevent thread 1 to modify shareBufGSR8(1,1) |
197 |
jmc |
1.2 |
C (as it would in the following call to this S/R) before all threads get |
198 |
|
|
C their global-sum result out. |
199 |
jmc |
1.3 |
C No longer needed since a dedicated shared var. is used to share the output |
200 |
|
|
c CALL BAR2( myThid ) |
201 |
jmc |
1.1 |
|
202 |
|
|
RETURN |
203 |
|
|
END |