pkg/exch2/w2_cumulsum_z_tile.F

C $Header: /u/gcmpack/MITgcm/eesupp/src/cumulsum_z_tile.F,v 1.1 2011/07/01 18:18:31 jmc Exp $
C $Name:  $

#include "CPP_EEOPTIONS.h"
#include "W2_OPTIONS.h"

C--   File w2_cumulsum_z_tile.F: Routines that perform cumulated sum
C                                on a tiled array, corner grid-cell location
C      Contents
C      o W2_CUMULSUM_Z_TILE_RL
C      o W2_CUMULSUM_Z_TILE_RS <- not yet coded

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: W2_CUMULSUM_Z_TILE_RL

C     !INTERFACE:
      SUBROUTINE W2_CUMULSUM_Z_TILE_RL(
     O                       psiZ, psiLoc,
     I                       dPsiX, dPsiY, myThid )

C     !DESCRIPTION:
C     *==========================================================*
C     | SUBROUTINE W2\_CUMULSUM\_Z\_TILE\_RL
C     | o Handle cumulated sum for _RL tile data.
C     *==========================================================*
C     | Cumulate sum on tiled array, corner grid-cell location:
C     |  Starts from 1rst tile and, going through all tiles & all
C     |  the processes, add increment in both directions
C     *==========================================================*

C     !USES:
      IMPLICIT NONE

C     == Global data ==
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
#include "W2_EXCH2_SIZE.h"
#include "W2_EXCH2_TOPOLOGY.h"
#include "CUMULSUM.h"

C     !INPUT/OUTPUT PARAMETERS:
C     == Routine arguments ==
C     psiZ    :: results of cumulated sum, corresponds to tile South-East corner
C     psiLoc  :: cumulated sum at special locations
C     dPsiX   :: tile increment in X direction
C     dPsiY   :: tile increment in Y direction
C     myThid  :: my Thread Id. number
      _RL     psiZ  (nSx,nSy)
      _RL     psiLoc(2)
      _RL     dPsiX (nSx,nSy)
      _RL     dPsiY (nSx,nSy)
      INTEGER myThid

C     !LOCAL VARIABLES:
C     == Local variables ==
C     bi,bj   :: tile indices
C- type declaration of: loc[1,2]Buf and shareBufCS[1,2]_R8 :
C         all 4 needs to have the same length as MPI_DOUBLE_PRECISION
      INTEGER bi,bj
      INTEGER tN, tS
      Real*8  globalBuf(3,W2_maxNbTiles)
#ifdef ALLOW_USE_MPI
      INTEGER npe, np1
      INTEGER lbuf1, lbuf2, idest, itag, ready_to_receive
      INTEGER istatus(MPI_STATUS_SIZE), ierr
      Real*8  loc1Buf  (nSx,nSy)
      Real*8  loc2Buf(2,nSx,nSy)
#endif /* ALLOW_USE_MPI */
CEOP

C--   Initialise to zero:
      psiLoc(1) = 0.
      psiLoc(2) = 0.
      DO tN = 1,exch2_nTiles
        globalBuf(1,tN) = 0.
        globalBuf(2,tN) = 0.
        globalBuf(3,tN) = 0.
      ENDDO

C--   write input into shared-buffer array
      DO bj = myByLo(myThid), myByHi(myThid)
       DO bi = myBxLo(myThid), myBxHi(myThid)
         shareBufCS2_R8(1,bi,bj) = dPsiX(bi,bj)
         shareBufCS2_R8(2,bi,bj) = dPsiY(bi,bj)
       ENDDO
      ENDDO

C--   Master thread cannot start until everyone is ready:
      CALL BAR2( myThid )
      _BEGIN_MASTER( myThid )

#ifdef ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
      IF ( usingMPI ) THEN
#endif

       lbuf1 = nSx*nSy
       lbuf2 = 2*lbuf1
       idest = 0
       itag  = 0
       ready_to_receive = 0

       IF ( mpiMyId.NE.0 ) THEN

C--   All proceses except 0 wait to be polled then send local array
#ifndef DISABLE_MPI_READY_TO_RECEIVE
           CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER,
     &                    idest, itag, MPI_COMM_MODEL, istatus, ierr)
#endif
           CALL MPI_SEND (shareBufCS2_R8, lbuf2, MPI_DOUBLE_PRECISION,
     &                    idest, itag, MPI_COMM_MODEL, ierr)

C--   All proceses except 0 receive result from process 0
           CALL MPI_RECV (shareBufCS1_R8, lbuf1, MPI_DOUBLE_PRECISION,
     &                    idest, itag, MPI_COMM_MODEL, istatus, ierr)

       ELSE

C--   Process 0 polls and receives data from each process in turn
         DO npe = 1, numberOfProcs-1
#ifndef DISABLE_MPI_READY_TO_RECEIVE
           CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER,
     &                    npe, itag, MPI_COMM_MODEL, ierr)
#endif
           CALL MPI_RECV (loc2Buf, lbuf2, MPI_DOUBLE_PRECISION,
     &                    npe, itag, MPI_COMM_MODEL, istatus, ierr)

C--   Process 0 gathers the local arrays into a global array.
           np1 = npe + 1
           DO bj=1,nSy
            DO bi=1,nSx
              tN = W2_procTileList(bi,bj,np1)
              globalBuf(1,tN) = loc2Buf(1,bi,bj)
              globalBuf(2,tN) = loc2Buf(2,bi,bj)
            ENDDO
           ENDDO
         ENDDO

C--   end if process not 0 / else = 0
       ENDIF

#ifndef ALWAYS_USE_MPI
      ENDIF
#endif
#endif /* ALLOW_USE_MPI */

      IF ( myProcId.EQ.0 ) THEN

C--   Process 0 fills-in its local data
         DO bj=1,nSy
          DO bi=1,nSx
            tN = W2_myTileList(bi,bj)
            globalBuf(1,tN) = shareBufCS2_R8(1,bi,bj)
            globalBuf(2,tN) = shareBufCS2_R8(2,bi,bj)
          ENDDO
         ENDDO

C--   Cumulate Sum over all tiles:
         DO tN = 1,exch2_nTiles
           globalBuf(3,tN) = 0.
           DO tS = 1,exch2_nTiles
             globalBuf(3,tN) = globalBuf(3,tN)
     &                       + W2_cumSum_tiles(1,tS,tN)*globalBuf(1,tS)
     &                       + W2_cumSum_tiles(2,tS,tN)*globalBuf(2,tS)
           ENDDO
         ENDDO
C-    Value at Special location (e.g., Missing-Corner values)
         IF ( W2_tMC1.GE.1 )
     &     psiLoc(1) = globalBuf(3,W2_tMC1) + globalBuf(2,W2_tMC1)
         IF ( W2_tMC2.GE.1 )
     &     psiLoc(2) = globalBuf(3,W2_tMC2) + globalBuf(1,W2_tMC2)

C--   Process 0 fills-in its local data
         DO bj=1,nSy
          DO bi=1,nSx
            tN = W2_myTileList(bi,bj)
            shareBufCS1_R8(bi,bj) = globalBuf(3,tN)
          ENDDO
         ENDDO

#ifdef ALLOW_USE_MPI
#ifndef ALWAYS_USE_MPI
        IF ( usingMPI ) THEN
#endif
C--   Process 0 sends result to all other processes
         DO npe = 1, numberOfProcs-1
C-    fill local array with relevant portion of global array
           np1 = npe + 1
           DO bj=1,nSy
            DO bi=1,nSx
              tN = W2_procTileList(bi,bj,np1)
              loc1Buf(bi,bj) = globalBuf(3,tN)
            ENDDO
           ENDDO
           CALL MPI_SEND (loc1Buf, lbuf1, MPI_DOUBLE_PRECISION,
     &                    npe, itag, MPI_COMM_MODEL, ierr)
         ENDDO

#ifndef ALWAYS_USE_MPI
        ENDIF
#endif
#endif /* ALLOW_USE_MPI */

C--   end if process 0
      ENDIF

      _END_MASTER( myThid )
C--   Everyone wait for Master thread to be ready
      CALL BAR2( myThid )

C--   set result for every threads
      DO bj = myByLo(myThid), myByHi(myThid)
       DO bi = myBxLo(myThid), myBxHi(myThid)
         psiZ(bi,bj) = shareBufCS1_R8(bi,bj)
       ENDDO
      ENDDO

      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/eesupp/src/cumulsum_z_tile.F,v 1.1 2011/07/01 18:18:31 jmc Exp $
2	C $Name: $
3
4	#include "CPP_EEOPTIONS.h"
5	#include "W2_OPTIONS.h"
6
7	C-- File w2_cumulsum_z_tile.F: Routines that perform cumulated sum
8	C on a tiled array, corner grid-cell location
9	C Contents
10	C o W2_CUMULSUM_Z_TILE_RL
11	C o W2_CUMULSUM_Z_TILE_RS <- not yet coded
12
13	C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-\|--+----\|
14	CBOP
15	C !ROUTINE: W2_CUMULSUM_Z_TILE_RL
16
17	C !INTERFACE:
18	SUBROUTINE W2_CUMULSUM_Z_TILE_RL(
19	O psiZ, psiLoc,
20	I dPsiX, dPsiY, myThid )
21
22	C !DESCRIPTION:
23	C ==========================================================
24	C \| SUBROUTINE W2\_CUMULSUM\_Z\_TILE\_RL
25	C \| o Handle cumulated sum for _RL tile data.
26	C ==========================================================
27	C \| Cumulate sum on tiled array, corner grid-cell location:
28	C \| Starts from 1rst tile and, going through all tiles & all
29	C \| the processes, add increment in both directions
30	C ==========================================================
31
32	C !USES:
33	IMPLICIT NONE
34
35	C == Global data ==
36	#include "SIZE.h"
37	#include "EEPARAMS.h"
38	#include "EESUPPORT.h"
39	#include "W2_EXCH2_SIZE.h"
40	#include "W2_EXCH2_TOPOLOGY.h"
41	#include "CUMULSUM.h"
42
43	C !INPUT/OUTPUT PARAMETERS:
44	C == Routine arguments ==
45	C psiZ :: results of cumulated sum, corresponds to tile South-East corner
46	C psiLoc :: cumulated sum at special locations
47	C dPsiX :: tile increment in X direction
48	C dPsiY :: tile increment in Y direction
49	C myThid :: my Thread Id. number
50	_RL psiZ (nSx,nSy)
51	_RL psiLoc(2)
52	_RL dPsiX (nSx,nSy)
53	_RL dPsiY (nSx,nSy)
54	INTEGER myThid
55
56	C !LOCAL VARIABLES:
57	C == Local variables ==
58	C bi,bj :: tile indices
59	C- type declaration of: loc[1,2]Buf and shareBufCS[1,2]_R8 :
60	C all 4 needs to have the same length as MPI_DOUBLE_PRECISION
61	INTEGER bi,bj
62	INTEGER tN, tS
63	Real*8 globalBuf(3,W2_maxNbTiles)
64	#ifdef ALLOW_USE_MPI
65	INTEGER npe, np1
66	INTEGER lbuf1, lbuf2, idest, itag, ready_to_receive
67	INTEGER istatus(MPI_STATUS_SIZE), ierr
68	Real*8 loc1Buf (nSx,nSy)
69	Real*8 loc2Buf(2,nSx,nSy)
70	#endif /* ALLOW_USE_MPI */
71	CEOP
72
73	C-- Initialise to zero:
74	psiLoc(1) = 0.
75	psiLoc(2) = 0.
76	DO tN = 1,exch2_nTiles
77	globalBuf(1,tN) = 0.
78	globalBuf(2,tN) = 0.
79	globalBuf(3,tN) = 0.
80	ENDDO
81
82	C-- write input into shared-buffer array
83	DO bj = myByLo(myThid), myByHi(myThid)
84	DO bi = myBxLo(myThid), myBxHi(myThid)
85	shareBufCS2_R8(1,bi,bj) = dPsiX(bi,bj)
86	shareBufCS2_R8(2,bi,bj) = dPsiY(bi,bj)
87	ENDDO
88	ENDDO
89
90	C-- Master thread cannot start until everyone is ready:
91	CALL BAR2( myThid )
92	_BEGIN_MASTER( myThid )
93
94	#ifdef ALLOW_USE_MPI
95	#ifndef ALWAYS_USE_MPI
96	IF ( usingMPI ) THEN
97	#endif
98
99	lbuf1 = nSx*nSy
100	lbuf2 = 2*lbuf1
101	idest = 0
102	itag = 0
103	ready_to_receive = 0
104
105	IF ( mpiMyId.NE.0 ) THEN
106
107	C-- All proceses except 0 wait to be polled then send local array
108	#ifndef DISABLE_MPI_READY_TO_RECEIVE
109	CALL MPI_RECV (ready_to_receive, 1, MPI_INTEGER,
110	& idest, itag, MPI_COMM_MODEL, istatus, ierr)
111	#endif
112	CALL MPI_SEND (shareBufCS2_R8, lbuf2, MPI_DOUBLE_PRECISION,
113	& idest, itag, MPI_COMM_MODEL, ierr)
114
115	C-- All proceses except 0 receive result from process 0
116	CALL MPI_RECV (shareBufCS1_R8, lbuf1, MPI_DOUBLE_PRECISION,
117	& idest, itag, MPI_COMM_MODEL, istatus, ierr)
118
119	ELSE
120
121	C-- Process 0 polls and receives data from each process in turn
122	DO npe = 1, numberOfProcs-1
123	#ifndef DISABLE_MPI_READY_TO_RECEIVE
124	CALL MPI_SEND (ready_to_receive, 1, MPI_INTEGER,
125	& npe, itag, MPI_COMM_MODEL, ierr)
126	#endif
127	CALL MPI_RECV (loc2Buf, lbuf2, MPI_DOUBLE_PRECISION,
128	& npe, itag, MPI_COMM_MODEL, istatus, ierr)
129
130	C-- Process 0 gathers the local arrays into a global array.
131	np1 = npe + 1
132	DO bj=1,nSy
133	DO bi=1,nSx
134	tN = W2_procTileList(bi,bj,np1)
135	globalBuf(1,tN) = loc2Buf(1,bi,bj)
136	globalBuf(2,tN) = loc2Buf(2,bi,bj)
137	ENDDO
138	ENDDO
139	ENDDO
140
141	C-- end if process not 0 / else = 0
142	ENDIF
143
144	#ifndef ALWAYS_USE_MPI
145	ENDIF
146	#endif
147	#endif /* ALLOW_USE_MPI */
148
149	IF ( myProcId.EQ.0 ) THEN
150
151	C-- Process 0 fills-in its local data
152	DO bj=1,nSy
153	DO bi=1,nSx
154	tN = W2_myTileList(bi,bj)
155	globalBuf(1,tN) = shareBufCS2_R8(1,bi,bj)
156	globalBuf(2,tN) = shareBufCS2_R8(2,bi,bj)
157	ENDDO
158	ENDDO
159
160	C-- Cumulate Sum over all tiles:
161	DO tN = 1,exch2_nTiles
162	globalBuf(3,tN) = 0.
163	DO tS = 1,exch2_nTiles
164	globalBuf(3,tN) = globalBuf(3,tN)
165	& + W2_cumSum_tiles(1,tS,tN)*globalBuf(1,tS)
166	& + W2_cumSum_tiles(2,tS,tN)*globalBuf(2,tS)
167	ENDDO
168	ENDDO
169	C- Value at Special location (e.g., Missing-Corner values)
170	IF ( W2_tMC1.GE.1 )
171	& psiLoc(1) = globalBuf(3,W2_tMC1) + globalBuf(2,W2_tMC1)
172	IF ( W2_tMC2.GE.1 )
173	& psiLoc(2) = globalBuf(3,W2_tMC2) + globalBuf(1,W2_tMC2)
174
175	C-- Process 0 fills-in its local data
176	DO bj=1,nSy
177	DO bi=1,nSx
178	tN = W2_myTileList(bi,bj)
179	shareBufCS1_R8(bi,bj) = globalBuf(3,tN)
180	ENDDO
181	ENDDO
182
183	#ifdef ALLOW_USE_MPI
184	#ifndef ALWAYS_USE_MPI
185	IF ( usingMPI ) THEN
186	#endif
187	C-- Process 0 sends result to all other processes
188	DO npe = 1, numberOfProcs-1
189	C- fill local array with relevant portion of global array
190	np1 = npe + 1
191	DO bj=1,nSy
192	DO bi=1,nSx
193	tN = W2_procTileList(bi,bj,np1)
194	loc1Buf(bi,bj) = globalBuf(3,tN)
195	ENDDO
196	ENDDO
197	CALL MPI_SEND (loc1Buf, lbuf1, MPI_DOUBLE_PRECISION,
198	& npe, itag, MPI_COMM_MODEL, ierr)
199	ENDDO
200
201	#ifndef ALWAYS_USE_MPI
202	ENDIF
203	#endif
204	#endif /* ALLOW_USE_MPI */
205
206	C-- end if process 0
207	ENDIF
208
209	_END_MASTER( myThid )
210	C-- Everyone wait for Master thread to be ready
211	CALL BAR2( myThid )
212
213	C-- set result for every threads
214	DO bj = myByLo(myThid), myByHi(myThid)
215	DO bi = myBxLo(myThid), myBxHi(myThid)
216	psiZ(bi,bj) = shareBufCS1_R8(bi,bj)
217	ENDDO
218	ENDDO
219
220	RETURN
221	END