1 |
C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_tile.F,v 1.1 2007/09/04 14:46:31 jmc Exp $ |
2 |
C $Name: $ |
3 |
|
4 |
C-- File global_sum_tile.F: Routines that perform global sum |
5 |
C on a tile array |
6 |
C Contents |
7 |
C o global_sum_tile_rl |
8 |
C o global_sum_tile_rs <- not yet coded |
9 |
#include "CPP_EEOPTIONS.h" |
10 |
|
11 |
CBOP |
12 |
C !ROUTINE: GLOBAL_SUM_TILE_RL |
13 |
|
14 |
C !INTERFACE: |
15 |
SUBROUTINE GLOBAL_SUM_TILE_RL( |
16 |
I phiTile, |
17 |
O sumPhi, |
18 |
I myThid ) |
19 |
IMPLICIT NONE |
20 |
C !DESCRIPTION: |
21 |
C *==========================================================* |
22 |
C | SUBROUTINE GLOBAL\_SUM\_TILE\_RL |
23 |
C | o Handle sum for _RL data. |
24 |
C *==========================================================* |
25 |
C | Apply sum on an array of one value per tile |
26 |
C | and operate over all tiles & all the processes. |
27 |
C *==========================================================* |
28 |
|
29 |
C !USES: |
30 |
C == Global data == |
31 |
#include "SIZE.h" |
32 |
#include "EEPARAMS.h" |
33 |
#include "EESUPPORT.h" |
34 |
#include "GLOBAL_SUM.h" |
35 |
|
36 |
C !INPUT/OUTPUT PARAMETERS: |
37 |
C == Routine arguments == |
38 |
C phiTile :: Input array with one value per tile |
39 |
C sumPhi :: Result of sum. |
40 |
C myThid :: My thread id. |
41 |
_RL phiTile(nSx,nSy) |
42 |
_RL sumPhi |
43 |
INTEGER myThid |
44 |
|
45 |
C !LOCAL VARIABLES: |
46 |
C == Local variables == |
47 |
C bi,bj :: Loop counters |
48 |
C mpiRC - MPI return code |
49 |
C- type declaration of: sumMyPr, sumAllP, localBuf and shareBufGSR8 : |
50 |
C all 4 needs to have the same length as MPI_DOUBLE_PRECISION |
51 |
INTEGER bi,bj |
52 |
#ifdef ALLOW_USE_MPI |
53 |
#ifdef GLOBAL_SUM_SEND_RECV |
54 |
INTEGER biG, bjG, npe |
55 |
INTEGER lbuff, idest, itag, ready_to_receive |
56 |
INTEGER istatus(MPI_STATUS_SIZE), ierr |
57 |
Real*8 localBuf (nSx,nSy) |
58 |
Real*8 globalBuf(nSx*nPx,nSy*nPy) |
59 |
#else |
60 |
INTEGER mpiRC |
61 |
Real*8 sumMyPr |
62 |
#endif |
63 |
#else /* ALLOW_USE_MPI */ |
64 |
Real*8 sumMyPr |
65 |
#endif /* ALLOW_USE_MPI */ |
66 |
Real*8 sumAllP |
67 |
CEOP |
68 |
|
69 |
C this barrier is not necessary: |
70 |
c CALL BAR2( myThid ) |
71 |
|
72 |
C-- write local sum into shared-buffer array |
73 |
DO bj = myByLo(myThid), myByHi(myThid) |
74 |
DO bi = myBxLo(myThid), myBxHi(myThid) |
75 |
shareBufGSR8(bi,bj) = phiTile(bi,bj) |
76 |
ENDDO |
77 |
ENDDO |
78 |
|
79 |
C-- Master thread cannot start until everyone is ready: |
80 |
CALL BAR2( myThid ) |
81 |
_BEGIN_MASTER( myThid ) |
82 |
|
83 |
#if (defined (GLOBAL_SUM_SEND_RECV) && defined (ALLOW_USE_MPI) ) |
84 |
|
85 |
lbuff = nSx*nSy |
86 |
idest = 0 |
87 |
itag = 0 |
88 |
ready_to_receive = 0 |
89 |
|
90 |
IF ( mpiMyId.NE.0 ) THEN |
91 |
|
92 |
C-- All proceses except 0 wait to be polled then send local array |
93 |
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
94 |
CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER, |
95 |
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
96 |
#endif |
97 |
CALL MPI_SEND (shareBufGSR8, lbuff, MPI_DOUBLE_PRECISION, |
98 |
& idest, itag, MPI_COMM_MODEL, ierr) |
99 |
|
100 |
C-- All proceses except 0 receive result from process 0 |
101 |
CALL MPI_RECV (sumAllP, 1, MPI_DOUBLE_PRECISION, |
102 |
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
103 |
|
104 |
ELSE |
105 |
|
106 |
C-- Process 0 fills-in its local data |
107 |
npe = 0 |
108 |
DO bj=1,nSy |
109 |
DO bi=1,nSx |
110 |
biG = (mpi_myXGlobalLo(npe+1)-1)/sNx+bi |
111 |
bjG = (mpi_myYGlobalLo(npe+1)-1)/sNy+bj |
112 |
globalBuf(biG,bjG) = shareBufGSR8(bi,bj) |
113 |
ENDDO |
114 |
ENDDO |
115 |
|
116 |
C-- Process 0 polls and receives data from each process in turn |
117 |
DO npe = 1, numberOfProcs-1 |
118 |
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
119 |
CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER, |
120 |
& npe, itag, MPI_COMM_MODEL, ierr) |
121 |
#endif |
122 |
CALL MPI_RECV (localBuf, lbuff, MPI_DOUBLE_PRECISION, |
123 |
& npe, itag, MPI_COMM_MODEL, istatus, ierr) |
124 |
|
125 |
C-- Process 0 gathers the local arrays into a global array. |
126 |
DO bj=1,nSy |
127 |
DO bi=1,nSx |
128 |
biG = (mpi_myXGlobalLo(npe+1)-1)/sNx+bi |
129 |
bjG = (mpi_myYGlobalLo(npe+1)-1)/sNy+bj |
130 |
globalBuf(biG,bjG) = localBuf(bi,bj) |
131 |
ENDDO |
132 |
ENDDO |
133 |
ENDDO |
134 |
|
135 |
C-- Sum over all tiles: |
136 |
sumAllP = 0. |
137 |
DO bjG = 1,nSy*nPy |
138 |
DO biG = 1,nSx*nPx |
139 |
sumAllP = sumAllP + globalBuf(biG,bjG) |
140 |
ENDDO |
141 |
ENDDO |
142 |
|
143 |
C-- Process 0 sends result to all other processes |
144 |
lbuff = 1 |
145 |
DO npe = 1, numberOfProcs-1 |
146 |
CALL MPI_SEND (sumAllP, 1, MPI_DOUBLE_PRECISION, |
147 |
& npe, itag, MPI_COMM_MODEL, ierr) |
148 |
ENDDO |
149 |
|
150 |
|
151 |
ENDIF |
152 |
|
153 |
#else /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
154 |
|
155 |
C-- Sum over all tiles (of the same process) first |
156 |
sumMyPr = 0. |
157 |
DO bj = 1,nSy |
158 |
DO bi = 1,nSx |
159 |
sumMyPr = sumMyPr + shareBufGSR8(bi,bj) |
160 |
ENDDO |
161 |
ENDDO |
162 |
|
163 |
C in case MPI is not used: |
164 |
sumAllP = sumMyPr |
165 |
|
166 |
#ifdef ALLOW_USE_MPI |
167 |
#ifndef ALWAYS_USE_MPI |
168 |
IF ( usingMPI ) THEN |
169 |
#endif |
170 |
CALL MPI_Allreduce(sumMyPr,sumAllP,1,MPI_DOUBLE_PRECISION, |
171 |
& MPI_SUM,MPI_COMM_MODEL,mpiRC) |
172 |
#ifndef ALWAYS_USE_MPI |
173 |
ENDIF |
174 |
#endif |
175 |
#endif /* ALLOW_USE_MPI */ |
176 |
|
177 |
#endif /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
178 |
|
179 |
C-- Write solution to shared buffer (all threads can see it) |
180 |
shareBufGSR8(1,1) = sumAllP |
181 |
|
182 |
_END_MASTER( myThid ) |
183 |
C-- Everyone wait for Master thread to be ready |
184 |
CALL BAR2( myThid ) |
185 |
|
186 |
C-- set result for every threads |
187 |
sumPhi = shareBufGSR8(1,1) |
188 |
|
189 |
C- Need a barrier here to prevent thread 1 to modify shareBufGSR8(1,1) |
190 |
C (as it would in the following call to this S/R) before all threads get |
191 |
C their global-sum result out. |
192 |
CALL BAR2( myThid ) |
193 |
|
194 |
RETURN |
195 |
END |