1 |
C $Header: $ |
2 |
C $Name: $ |
3 |
|
4 |
C-- File global_sum_tile.F: Routines that perform global sum |
5 |
C on a tile array |
6 |
C Contents |
7 |
C o global_sum_tile_rl |
8 |
C o global_sum_tile_rs <- not yet coded |
9 |
#include "CPP_EEOPTIONS.h" |
10 |
|
11 |
CBOP |
12 |
C !ROUTINE: GLOBAL_SUM_TILE_RL |
13 |
|
14 |
C !INTERFACE: |
15 |
SUBROUTINE GLOBAL_SUM_TILE_RL( |
16 |
I phiTile, |
17 |
O sumPhi, |
18 |
I myThid ) |
19 |
IMPLICIT NONE |
20 |
C !DESCRIPTION: |
21 |
C *==========================================================* |
22 |
C | SUBROUTINE GLOBAL\_SUM\_TILE\_RL |
23 |
C | o Handle sum for _RL data. |
24 |
C *==========================================================* |
25 |
C | Apply sum on an array of one value per tile |
26 |
C | and operate over all tiles & all the processes. |
27 |
C *==========================================================* |
28 |
|
29 |
C !USES: |
30 |
C == Global data == |
31 |
#include "SIZE.h" |
32 |
#include "EEPARAMS.h" |
33 |
#include "EESUPPORT.h" |
34 |
#include "GLOBAL_SUM.h" |
35 |
|
36 |
C !INPUT/OUTPUT PARAMETERS: |
37 |
C == Routine arguments == |
38 |
C phiTile :: Input array with one value per tile |
39 |
C sumPhi :: Result of sum. |
40 |
C myThid :: My thread id. |
41 |
_RL phiTile(nSx,nSy) |
42 |
_RL sumPhi |
43 |
INTEGER myThid |
44 |
|
45 |
C !LOCAL VARIABLES: |
46 |
C == Local variables == |
47 |
C bi,bj :: Loop counters |
48 |
C mpiRC - MPI return code |
49 |
C- type declaration of: sumMyPr, sumAllP, localBuf and shareBufGSR8 : |
50 |
C all 4 needs to have the same length as MPI_DOUBLE_PRECISION |
51 |
INTEGER bi,bj |
52 |
#ifdef ALLOW_USE_MPI |
53 |
#ifdef GLOBAL_SUM_SEND_RECV |
54 |
INTEGER biG, bjG, npe |
55 |
INTEGER lbuff, idest, itag, ready_to_receive |
56 |
INTEGER istatus(MPI_STATUS_SIZE), ierr |
57 |
Real*8 localBuf (nSx,nSy) |
58 |
Real*8 globalBuf(nSx*nPx,nSy*nPy) |
59 |
#else |
60 |
INTEGER mpiRC |
61 |
Real*8 sumMyPr |
62 |
#endif |
63 |
#else /* ALLOW_USE_MPI */ |
64 |
Real*8 sumMyPr |
65 |
#endif /* ALLOW_USE_MPI */ |
66 |
Real*8 sumAllP |
67 |
CEOP |
68 |
|
69 |
C- note(jmc): do not see why we need this Barrier here + the one at the end ; |
70 |
C however a) both are present in the standard version of global_sum ; |
71 |
C b) with OpenMP on hugo, does not work if both are commented out |
72 |
CALL BAR2( myThid ) |
73 |
|
74 |
C-- write local sum into shared-buffer array |
75 |
DO bj = myByLo(myThid), myByHi(myThid) |
76 |
DO bi = myBxLo(myThid), myBxHi(myThid) |
77 |
shareBufGSR8(bi,bj) = phiTile(bi,bj) |
78 |
ENDDO |
79 |
ENDDO |
80 |
|
81 |
C-- Master thread cannot start until everyone is ready: |
82 |
CALL BAR2( myThid ) |
83 |
_BEGIN_MASTER( myThid ) |
84 |
|
85 |
#if (defined (GLOBAL_SUM_SEND_RECV) && defined (ALLOW_USE_MPI) ) |
86 |
|
87 |
lbuff = nSx*nSy |
88 |
idest = 0 |
89 |
itag = 0 |
90 |
ready_to_receive = 0 |
91 |
|
92 |
IF ( mpiMyId.NE.0 ) THEN |
93 |
|
94 |
C-- All proceses except 0 wait to be polled then send local array |
95 |
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
96 |
CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER, |
97 |
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
98 |
#endif |
99 |
CALL MPI_SEND (shareBufGSR8, lbuff, MPI_DOUBLE_PRECISION, |
100 |
& idest, itag, MPI_COMM_MODEL, ierr) |
101 |
|
102 |
C-- All proceses except 0 receive result from process 0 |
103 |
CALL MPI_RECV (sumAllP, 1, MPI_DOUBLE_PRECISION, |
104 |
& idest, itag, MPI_COMM_MODEL, istatus, ierr) |
105 |
|
106 |
ELSE |
107 |
|
108 |
C-- Process 0 fills-in its local data |
109 |
npe = 0 |
110 |
DO bj=1,nSy |
111 |
DO bi=1,nSx |
112 |
biG = (mpi_myXGlobalLo(npe+1)-1)/sNx+bi |
113 |
bjG = (mpi_myYGlobalLo(npe+1)-1)/sNy+bj |
114 |
globalBuf(biG,bjG) = shareBufGSR8(bi,bj) |
115 |
ENDDO |
116 |
ENDDO |
117 |
|
118 |
C-- Process 0 polls and receives data from each process in turn |
119 |
DO npe = 1, numberOfProcs-1 |
120 |
#ifndef DISABLE_MPI_READY_TO_RECEIVE |
121 |
CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER, |
122 |
& npe, itag, MPI_COMM_MODEL, ierr) |
123 |
#endif |
124 |
CALL MPI_RECV (localBuf, lbuff, MPI_DOUBLE_PRECISION, |
125 |
& npe, itag, MPI_COMM_MODEL, istatus, ierr) |
126 |
|
127 |
C-- Process 0 gathers the local arrays into a global array. |
128 |
DO bj=1,nSy |
129 |
DO bi=1,nSx |
130 |
biG = (mpi_myXGlobalLo(npe+1)-1)/sNx+bi |
131 |
bjG = (mpi_myYGlobalLo(npe+1)-1)/sNy+bj |
132 |
globalBuf(biG,bjG) = localBuf(bi,bj) |
133 |
ENDDO |
134 |
ENDDO |
135 |
ENDDO |
136 |
|
137 |
C-- Sum over all tiles: |
138 |
sumAllP = 0. |
139 |
DO bjG = 1,nSy*nPy |
140 |
DO biG = 1,nSx*nPx |
141 |
sumAllP = sumAllP + globalBuf(biG,bjG) |
142 |
ENDDO |
143 |
ENDDO |
144 |
|
145 |
C-- Process 0 sends result to all other processes |
146 |
lbuff = 1 |
147 |
DO npe = 1, numberOfProcs-1 |
148 |
CALL MPI_SEND (sumAllP, 1, MPI_DOUBLE_PRECISION, |
149 |
& npe, itag, MPI_COMM_MODEL, ierr) |
150 |
ENDDO |
151 |
|
152 |
|
153 |
ENDIF |
154 |
|
155 |
#else /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
156 |
|
157 |
C-- Sum over all tiles (of the same process) first |
158 |
sumMyPr = 0. |
159 |
DO bj = 1,nSy |
160 |
DO bi = 1,nSx |
161 |
sumMyPr = sumMyPr + shareBufGSR8(bi,bj) |
162 |
ENDDO |
163 |
ENDDO |
164 |
|
165 |
C in case MPI is not used: |
166 |
sumAllP = sumMyPr |
167 |
|
168 |
#ifdef ALLOW_USE_MPI |
169 |
#ifndef ALWAYS_USE_MPI |
170 |
IF ( usingMPI ) THEN |
171 |
#endif |
172 |
CALL MPI_Allreduce(sumMyPr,sumAllP,1,MPI_DOUBLE_PRECISION, |
173 |
& MPI_SUM,MPI_COMM_MODEL,mpiRC) |
174 |
#ifndef ALWAYS_USE_MPI |
175 |
ENDIF |
176 |
#endif |
177 |
#endif /* ALLOW_USE_MPI */ |
178 |
|
179 |
#endif /* not (GLOBAL_SUM_SEND_RECV & ALLOW_USE_MPI) */ |
180 |
|
181 |
C-- Write solution to shared buffer (all threads can see it) |
182 |
shareBufGSR8(1,1) = sumAllP |
183 |
|
184 |
_END_MASTER( myThid ) |
185 |
C-- Everyone wait for Master thread to be ready |
186 |
CALL BAR2( myThid ) |
187 |
|
188 |
C-- set result for every threads |
189 |
sumPhi = shareBufGSR8(1,1) |
190 |
|
191 |
C- note(jmc): do not see why we need this Barrier here (see comment @ the top) |
192 |
CALL BAR2( myThid ) |
193 |
|
194 |
RETURN |
195 |
END |