C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/pkg/exch2/exch2_rx2_cube_ad.template,v 1.13 2012/09/03 19:39:25 jmc Exp $ C $Name: $ #include "CPP_EEOPTIONS.h" CBOP C !ROUTINE: EXCH_RX2_CUBE_AD C !INTERFACE: SUBROUTINE EXCH2_RX2_CUBE_AD( U array1, array2, I signOption, fieldCode, I myOLw, myOLe, myOLs, myOLn, myNz, I exchWidthX, exchWidthY, I cornerMode, myThid ) C !DESCRIPTION: C Two components vector field AD-Exchange: C Tile-edge overlap-region of a 2 component vector field is added to C corresponding near-edge interior data point and then zero out. C !USES: IMPLICIT NONE C == Global data == #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "W2_EXCH2_SIZE.h" #include "W2_EXCH2_TOPOLOGY.h" #include "W2_EXCH2_BUFFER.h" C !INPUT/OUTPUT PARAMETERS: C array1 :: 1rst component array with edges to exchange. C array2 :: 2nd component array with edges to exchange. C signOption :: Flag controlling whether vector is signed. C fieldCode :: field code (position on staggered grid) C myOLw,myOLe :: West and East overlap region sizes. C myOLs,myOLn :: South and North overlap region sizes. C exchWidthX :: Width of data region exchanged in X. C exchWidthY :: Width of data region exchanged in Y. C cornerMode :: halo-corner-region treatment: update/ignore corner region C myThid :: Thread number of this instance of S/R EXCH... INTEGER myOLw, myOLe, myOLs, myOLn, myNz _RX array1(1-myOLw:sNx+myOLe, & 1-myOLs:sNy+myOLn, & myNz, nSx, nSy) _RX array2(1-myOLw:sNx+myOLe, & 1-myOLs:sNy+myOLn, & myNz, nSx, nSy) LOGICAL signOption CHARACTER*2 fieldCode INTEGER exchWidthX INTEGER exchWidthY INTEGER cornerMode INTEGER myThid C !LOCAL VARIABLES: C e2_msgHandles :: Synchronization and coordination data structure used to C :: coordinate access to e2Bufr1_RX or to regulate message C :: buffering. In PUT communication sender will increment C :: handle entry once data is ready in buffer. Receiver will C :: decrement handle once data is consumed from buffer. C :: For MPI MSG communication MPI_Wait uses handle to check C :: Isend has cleared. This is done in routine after receives. C note: a) current implementation does not use e2_msgHandles for communication C between threads: all-threads barriers are used (see CNH note below). C For a 2-threads synchro communication (future version), C e2_msgHandles should be shared (in common block, moved to BUFFER.h) INTEGER bi, bj C Variables for working through W2 topology INTEGER e2_msgHandles( 2, W2_maxNeighbours, nSx, nSy ) INTEGER thisTile, farTile, N, nN, oN INTEGER tIlo1, tIhi1, tJlo1, tJhi1, oIs1, oJs1 INTEGER tIlo2, tIhi2, tJlo2, tJhi2, oIs2, oJs2 INTEGER tIStride, tJStride INTEGER tKlo, tKhi, tKStride INTEGER i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi INTEGER i2Lo, i2Hi, j2Lo, j2Hi, k2Lo, k2Hi LOGICAL updateCorners #ifdef ALLOW_USE_MPI INTEGER iBufr1, iBufr2, nri, nrj C MPI stuff (should be in a routine call) INTEGER mpiStatus(MPI_STATUS_SIZE) INTEGER mpiRc INTEGER wHandle #endif CEOP updateCorners = cornerMode .EQ. EXCH_UPDATE_CORNERS C- Tile size of arrays to exchange: i1Lo = 1-myOLw i1Hi = sNx+myOLe j1Lo = 1-myOLs j1Hi = sNy+myOLn k1Lo = 1 k1Hi = myNz i2Lo = 1-myOLw i2Hi = sNx+myOLe j2Lo = 1-myOLs j2Hi = sNy+myOLn k2Lo = 1 k2Hi = myNz C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| C Prevent anyone to access shared buffer while an other thread modifies it CALL BAR2( myThid ) C-- Extract from buffer (either from level 1 if local exch, C or level 2 if coming from an other Proc) C AD: = fill buffer from my-tile-edge overlap-region, level 1 or 2 depending C AD: on local (to this Proc) or remote Proc tile destination DO bj=myByLo(myThid), myByHi(myThid) DO bi=myBxLo(myThid), myBxHi(myThid) thisTile=W2_myTileList(bi,bj) nN=exch2_nNeighbours(thisTile) DO N=1,nN CALL EXCH2_GET_UV_BOUNDS( I fieldCode, exchWidthX, updateCorners, I thisTile, N, O tIlo1, tIhi1, tJlo1, tJhi1, O tIlo2, tIhi2, tJlo2, tJhi2, O tiStride, tjStride, O oIs1, oJs1, oIs2, oJs2, I myThid ) tKLo=1 tKHi=myNz tKStride=1 C From buffer, get my points C (tIlo:tIhi:tiStride,tJlo:tJhi,tJStride,tKlo:tKhi,tKStride) in "array1,2": C Note: when transferring data within a process: C o e2Bufr entry to read is entry associated with opposing send record C o e2_msgHandle entry to read is entry associated with opposing send record. CALL EXCH2_AD_GET_RX2( I tIlo1, tIhi1, tIlo2, tIhi2, tiStride, I tJlo1, tJhi1, tJlo2, tJhi2, tjStride, I tKlo, tKhi, tkStride, I thisTile, N, bi, bj, I e2BufrRecSize, W2_maxNeighbours, nSx, nSy, O iBuf1Filled(N,bi,bj), iBuf2Filled(N,bi,bj), O e2Bufr1_RX, e2Bufr2_RX, U array1(1-myOLw,1-myOLs,1,bi,bj), U array2(1-myOLw,1-myOLs,1,bi,bj), I i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi, I i2Lo, i2Hi, j2Lo, j2Hi, k2Lo, k2Hi, U e2_msgHandles, I W2_myCommFlag(N,bi,bj), myThid ) ENDDO ENDDO ENDDO C Wait until all threads finish filling buffer CALL BAR2( myThid ) C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| #ifdef ALLOW_USE_MPI IF ( usingMPI ) THEN C AD: all MPI part is acting on buffer and is identical to forward code, C AD: except a) the buffer level: send from lev.2, receive into lev.1 C AD: b) the length of transferred buffer (<- match the ad_put/ad_get) _BEGIN_MASTER( myThid ) C-- Send my data (in buffer, level 1) to target Process DO bj=1,nSy DO bi=1,nSx thisTile=W2_myTileList(bi,bj) nN=exch2_nNeighbours(thisTile) DO N=1,nN C- Skip the call if this is an internal exchange IF ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN CALL EXCH2_SEND_RX2( I thisTile, N, I e2BufrRecSize, I iBuf1Filled(N,bi,bj), iBuf2Filled(N,bi,bj), I e2Bufr1_RX(1,N,bi,bj,2), e2Bufr2_RX(1,N,bi,bj,2), O e2_msgHandles(1,N,bi,bj), I W2_myCommFlag(N,bi,bj), myThid ) ENDIF ENDDO ENDDO ENDDO C-- Receive data (in buffer, level 1) from source Process DO bj=1,nSy DO bi=1,nSx thisTile=W2_myTileList(bi,bj) nN=exch2_nNeighbours(thisTile) DO N=1,nN C- Skip the call if this is an internal exchange IF ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN farTile=exch2_neighbourId(N,thisTile) oN = exch2_opposingSend(N,thisTile) CALL EXCH2_GET_UV_BOUNDS( I fieldCode, exchWidthX, updateCorners, I farTile, oN, O tIlo1, tIhi1, tJlo1, tJhi1, O tIlo2, tIhi2, tJlo2, tJhi2, O tiStride, tjStride, O oIs1, oJs1, oIs2, oJs2, I myThid ) nri = 1 + (tIhi1-tIlo1)/tiStride nrj = 1 + (tJhi1-tJlo1)/tjStride iBufr1 = nri*nrj*myNz nri = 1 + (tIhi2-tIlo2)/tiStride nrj = 1 + (tJhi2-tJlo2)/tjStride iBufr2 = nri*nrj*myNz C Receive from neighbour N to fill buffer and later on the array CALL EXCH2_RECV_RX2( I thisTile, N, I e2BufrRecSize, I iBufr1, iBufr2, I e2Bufr1_RX(1,N,bi,bj,1), e2Bufr2_RX(1,N,bi,bj,1), I W2_myCommFlag(N,bi,bj), myThid ) ENDIF ENDDO ENDDO ENDDO C-- Clear message handles/locks DO bj=1,nSy DO bi=1,nSx thisTile=W2_myTileList(bi,bj) nN=exch2_nNeighbours(thisTile) DO N=1,nN C Note: In a between process tile-tile data transport using C MPI the sender needs to clear an Isend wait handle here. C In a within process tile-tile data transport using true C shared address space/or direct transfer through commonly C addressable memory blocks the receiver needs to assert C that he has consumed the buffer the sender filled here. farTile=exch2_neighbourId(N,thisTile) IF ( W2_myCommFlag(N,bi,bj) .EQ. 'M' ) THEN wHandle = e2_msgHandles(1,N,bi,bj) CALL MPI_Wait( wHandle, mpiStatus, mpiRc ) wHandle = e2_msgHandles(2,N,bi,bj) CALL MPI_Wait( wHandle, mpiStatus, mpiRc ) ELSEIF ( W2_myCommFlag(N,bi,bj) .EQ. 'P' ) THEN ELSE ENDIF ENDDO ENDDO ENDDO _END_MASTER( myThid ) C Everyone waits until master-thread finishes receiving CALL BAR2( myThid ) ENDIF #endif /* ALLOW_USE_MPI */ C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| C-- Post sends into buffer (buffer level 1): DO bj=myByLo(myThid), myByHi(myThid) DO bi=myBxLo(myThid), myBxHi(myThid) thisTile=W2_myTileList(bi,bj) nN=exch2_nNeighbours(thisTile) DO N=1,nN farTile=exch2_neighbourId(N,thisTile) oN = exch2_opposingSend(N,thisTile) CALL EXCH2_GET_UV_BOUNDS( I fieldCode, exchWidthX, updateCorners, I farTile, oN, O tIlo1, tIhi1, tJlo1, tJhi1, O tIlo2, tIhi2, tJlo2, tJhi2, O tiStride, tjStride, O oIs1, oJs1, oIs2, oJs2, I myThid ) tKLo=1 tKHi=myNz tKStride=1 C- Put my points in buffer for neighbour N to fill points C (tIlo:tIhi:tiStride,tJlo:tJhi,tJStride,tKlo:tKhi,tKStride) C in its copy of "array1" & "array2". CALL EXCH2_AD_PUT_RX2( I tIlo1, tIhi1, tIlo2, tIhi2, tiStride, I tJlo1, tJhi1, tJlo2, tJhi2, tjStride, I tKlo, tKhi, tkStride, I oIs1, oJs1, oIs2, oJs2, I thisTile, N, I e2BufrRecSize, O e2Bufr1_RX(1,N,bi,bj,1), e2Bufr2_RX(1,N,bi,bj,1), I array1(1-myOLw,1-myOLs,1,bi,bj), I array2(1-myOLw,1-myOLs,1,bi,bj), I i1Lo, i1Hi, j1Lo, j1Hi, k1Lo, k1Hi, I i2Lo, i2Hi, j2Lo, j2Hi, k2Lo, k2Hi, O e2_msgHandles(1,N,bi,bj), I W2_myCommFlag(N,bi,bj), signOption, myThid ) ENDDO ENDDO ENDDO RETURN END C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| CEH3 ;;; Local Variables: *** CEH3 ;;; mode:fortran *** CEH3 ;;; End: ***