eesupp/src/global_sum_singlecpu.F

C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_singlecpu.F,v 1.2 2009/01/09 22:51:12 jmc Exp $
C $Name:  $

C--   File global_sum_singlecpu.F: Routines that perform global sum
C                                  on a single CPU
C      Contents
C      o global_sum_singlecpu_rl
C      o global_sum_singlecpu_rs <- not yet coded

#include "PACKAGES_CONFIG.h"
#include "CPP_EEOPTIONS.h"

CBOP
C     !ROUTINE: GLOBAL_SUM_SINGLECPU_RL

C     !INTERFACE:
      SUBROUTINE GLOBAL_SUM_SINGLECPU_RL(
     I                       phiLocal,
     O                       sumPhi,
     I                       oLi, oLj, myThid )
      IMPLICIT NONE
C     !DESCRIPTION:
C     *==========================================================*
C     | SUBROUTINE GLOBAL\_SUM\_SINGLECPU\_RL
C     | o Handle sum for _RL data.
C     *==========================================================*
C     | Global sum of 2d array
C     | independent of tiling as sum is performed on a single CPU
C     | sum is performed in REAL*8
C     *==========================================================*

C     !USES:
C     == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "GLOBAL_SUM.h"
#ifdef ALLOW_EXCH2
#include "W2_EXCH2_SIZE.h"
#include "W2_EXCH2_TOPOLOGY.h"
#endif
#include "EEBUFF_SCPU.h"

C     !INPUT/OUTPUT PARAMETERS:
C     == Routine arguments ==
C     phiLocal :: local input array without overlap regions.
C     sumPhi   :: Result of sum.
C     oLi, oLj :: overlap size of input array in I & J direction.
C     myThid   :: My thread id.
      INTEGER oLi, oLj
      _RL    phiLocal(1-oLi:sNx+oLi,1-oLj:sNy+oLj,nSx,nSy)
      _RL     sumPhi
      INTEGER myThid

C     !LOCAL VARIABLES:
C     == Local variables ==
C- type declaration of: sumAll, globalBuf :
C         sumAll needs to have the same length as MPI_DOUBLE_PRECISION
      LOGICAL useExch2GlobLayOut, zeroBuff
      INTEGER xSize, ySize
      INTEGER i, j, ij
      INTEGER bi, bj
      Real*8  sumAll
#ifdef ALLOW_USE_MPI
      INTEGER npe
      INTEGER lbuff, idest, itag
      INTEGER istatus(MPI_STATUS_SIZE), ierr
#endif /* ALLOW_USE_MPI */
CEOP

#ifdef ALLOW_EXCH2
      zeroBuff = .TRUE.
      useExch2GlobLayOut = .TRUE.
      xSize = exch2_global_Nx
      ySize = exch2_global_Ny
#else /* ALLOW_EXCH2 */
      zeroBuff = .FALSE.
      useExch2GlobLayOut = .FALSE.
      xSize = Nx
      ySize = Ny
#endif /* ALLOW_EXCH2 */

C--   copy (and conversion to real*8) to Shared buffer:
      DO bj = myByLo(myThid), myByHi(myThid)
       DO bi = myBxLo(myThid), myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
           sharedLocBuf_r8(i,j,bi,bj) = phiLocal(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

C- note(jmc): do not see why we need this Barrier here + the one at the end ;
C     however a) both are present in the standard version of global_sum ;
C             b) with OpenMP on hugo, does not work if both are commented out
      CALL BAR2( myThid )

C--   Gather local arrays
      CALL GATHER_2D_R8(
     O                       xy_buffer_r8,
     I                       sharedLocBuf_r8,
     I                       xSize, ySize,
     I                       useExch2GlobLayOut, zeroBuff, myThid )

C--   Master thread does the communications and the global sum
C--   Master thread cannot start until everyone is ready:
      CALL BAR2( myThid )
      _BEGIN_MASTER( myThid )

#ifdef ALLOW_USE_MPI
       idest = 0
       itag  = 0

       IF ( mpiMyId.EQ.0 ) THEN
#endif

C--   Process 0 sums the global array
        sumAll = 0. _d 0
        DO ij=1,xSize*ySize
          sumAll = sumAll + xy_buffer_r8(ij)
        ENDDO

#ifdef ALLOW_USE_MPI
C--   Process 0 sends result to all other processes
        lbuff = 1
        DO npe = 1, numberOfProcs-1
          CALL MPI_SEND (sumAll, 1, MPI_DOUBLE_PRECISION,
     &                   npe, itag, MPI_COMM_MODEL, ierr)
        ENDDO

       ELSE

C--   All proceses except 0 receive result from process 0
          CALL MPI_RECV (sumAll, 1, MPI_DOUBLE_PRECISION,
     &         idest, itag, MPI_COMM_MODEL, istatus, ierr)

       ENDIF
#endif /* not ALLOW_USE_MPI */

C--   Write solution to shared buffer (all threads can see it)
      shareBufGSR8(1,1) = sumAll

      _END_MASTER( myThid )
C--   Everyone wait for Master thread to be ready
      CALL BAR2( myThid )

C--   set result for every threads
      sumPhi = shareBufGSR8(1,1)

C- note(jmc): do not see why we need this Barrier here (see comment @ the top)
      CALL BAR2( myThid )

      RETURN
      END
1	jmc	1.3	C $Header: /u/gcmpack/MITgcm/eesupp/src/global_sum_singlecpu.F,v 1.2 2009/01/09 22:51:12 jmc Exp $
2	jmc	1.2	C $Name: $
3	jahn	1.1
4			C-- File global_sum_singlecpu.F: Routines that perform global sum
5	jmc	1.3	C on a single CPU
6			C Contents
7			C o global_sum_singlecpu_rl
8			C o global_sum_singlecpu_rs <- not yet coded
9
10			#include "PACKAGES_CONFIG.h"
11	jahn	1.1	#include "CPP_EEOPTIONS.h"
12
13			CBOP
14			C !ROUTINE: GLOBAL_SUM_SINGLECPU_RL
15
16			C !INTERFACE:
17			SUBROUTINE GLOBAL_SUM_SINGLECPU_RL(
18			I phiLocal,
19			O sumPhi,
20	jmc	1.3	I oLi, oLj, myThid )
21	jahn	1.1	IMPLICIT NONE
22			C !DESCRIPTION:
23			C ==========================================================
24			C \| SUBROUTINE GLOBAL\_SUM\_SINGLECPU\_RL
25			C \| o Handle sum for _RL data.
26			C ==========================================================
27			C \| Global sum of 2d array
28			C \| independent of tiling as sum is performed on a single CPU
29			C \| sum is performed in REAL*8
30			C ==========================================================
31
32			C !USES:
33			C == Global data ==
34			#include "SIZE.h"
35			#include "EEPARAMS.h"
36			#include "EESUPPORT.h"
37			#include "GLOBAL_SUM.h"
38	jmc	1.3	#ifdef ALLOW_EXCH2
39			#include "W2_EXCH2_SIZE.h"
40			#include "W2_EXCH2_TOPOLOGY.h"
41			#endif
42			#include "EEBUFF_SCPU.h"
43	jahn	1.1
44			C !INPUT/OUTPUT PARAMETERS:
45			C == Routine arguments ==
46			C phiLocal :: local input array without overlap regions.
47			C sumPhi :: Result of sum.
48	jmc	1.3	C oLi, oLj :: overlap size of input array in I & J direction.
49	jahn	1.1	C myThid :: My thread id.
50	jmc	1.3	INTEGER oLi, oLj
51			_RL phiLocal(1-oLi:sNx+oLi,1-oLj:sNy+oLj,nSx,nSy)
52	jahn	1.1	_RL sumPhi
53			INTEGER myThid
54
55			C !LOCAL VARIABLES:
56			C == Local variables ==
57			C- type declaration of: sumAll, globalBuf :
58			C sumAll needs to have the same length as MPI_DOUBLE_PRECISION
59	jmc	1.3	LOGICAL useExch2GlobLayOut, zeroBuff
60			INTEGER xSize, ySize
61			INTEGER i, j, ij
62			INTEGER bi, bj
63	jahn	1.1	Real*8 sumAll
64			#ifdef ALLOW_USE_MPI
65			INTEGER npe
66			INTEGER lbuff, idest, itag
67			INTEGER istatus(MPI_STATUS_SIZE), ierr
68			#endif /* ALLOW_USE_MPI */
69			CEOP
70
71	jmc	1.3	#ifdef ALLOW_EXCH2
72			zeroBuff = .TRUE.
73			useExch2GlobLayOut = .TRUE.
74			xSize = exch2_global_Nx
75			ySize = exch2_global_Ny
76			#else /* ALLOW_EXCH2 */
77			zeroBuff = .FALSE.
78			useExch2GlobLayOut = .FALSE.
79			xSize = Nx
80			ySize = Ny
81			#endif /* ALLOW_EXCH2 */
82
83			C-- copy (and conversion to real*8) to Shared buffer:
84			DO bj = myByLo(myThid), myByHi(myThid)
85			DO bi = myBxLo(myThid), myBxHi(myThid)
86			DO j=1,sNy
87			DO i=1,sNx
88			sharedLocBuf_r8(i,j,bi,bj) = phiLocal(i,j,bi,bj)
89			ENDDO
90			ENDDO
91			ENDDO
92			ENDDO
93
94	jahn	1.1	C- note(jmc): do not see why we need this Barrier here + the one at the end ;
95			C however a) both are present in the standard version of global_sum ;
96			C b) with OpenMP on hugo, does not work if both are commented out
97			CALL BAR2( myThid )
98
99			C-- Gather local arrays
100	jmc	1.3	CALL GATHER_2D_R8(
101			O xy_buffer_r8,
102			I sharedLocBuf_r8,
103			I xSize, ySize,
104			I useExch2GlobLayOut, zeroBuff, myThid )
105	jahn	1.1
106			C-- Master thread does the communications and the global sum
107			C-- Master thread cannot start until everyone is ready:
108			CALL BAR2( myThid )
109			_BEGIN_MASTER( myThid )
110
111			#ifdef ALLOW_USE_MPI
112			idest = 0
113			itag = 0
114
115			IF ( mpiMyId.EQ.0 ) THEN
116			#endif
117
118			C-- Process 0 sums the global array
119	jmc	1.2	sumAll = 0. _d 0
120	jmc	1.3	DO ij=1,xSize*ySize
121			sumAll = sumAll + xy_buffer_r8(ij)
122	jmc	1.2	ENDDO
123	jahn	1.1
124			#ifdef ALLOW_USE_MPI
125			C-- Process 0 sends result to all other processes
126			lbuff = 1
127			DO npe = 1, numberOfProcs-1
128			CALL MPI_SEND (sumAll, 1, MPI_DOUBLE_PRECISION,
129			& npe, itag, MPI_COMM_MODEL, ierr)
130			ENDDO
131
132			ELSE
133	jmc	1.3
134	jahn	1.1	C-- All proceses except 0 receive result from process 0
135			CALL MPI_RECV (sumAll, 1, MPI_DOUBLE_PRECISION,
136			& idest, itag, MPI_COMM_MODEL, istatus, ierr)
137
138			ENDIF
139			#endif /* not ALLOW_USE_MPI */
140
141			C-- Write solution to shared buffer (all threads can see it)
142			shareBufGSR8(1,1) = sumAll
143
144			_END_MASTER( myThid )
145			C-- Everyone wait for Master thread to be ready
146			CALL BAR2( myThid )
147
148			C-- set result for every threads
149			sumPhi = shareBufGSR8(1,1)
150
151			C- note(jmc): do not see why we need this Barrier here (see comment @ the top)
152			CALL BAR2( myThid )
153
154			RETURN
155			END