C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/eesupp/src/exch_rx_send_put_x.template,v 1.9 2008/03/28 18:39:54 utke Exp $ C $Name: $ #include "CPP_EEOPTIONS.h" CBOP C !ROUTINE: EXCH_RX_SEND_PUT_X C !INTERFACE: SUBROUTINE EXCH_RX_SEND_PUT_X( array, I myOLw, myOLe, myOLs, myOLn, myNz, I exchWidthX, exchWidthY, I thesimulationMode, thecornerMode, myThid ) IMPLICIT NONE C !DESCRIPTION: C *==========================================================* C | SUBROUTINE EXCH_RX_SEND_PUT_X C | o "Send" or "put" X edges for RX array. C *==========================================================* C | Routine that invokes actual message passing send or C | direct "put" of data to update X faces of an XY[R] array. C *==========================================================* C !USES: C == Global variables == #include "SIZE.h" #include "EEPARAMS.h" #include "EESUPPORT.h" #include "EXCH.h" C !INPUT/OUTPUT PARAMETERS: C == Routine arguments == C array :: Array with edges to exchange. C myOLw :: West, East, North and South overlap region sizes. C myOLe C myOLn C myOLs C exchWidthX :: Width of data region exchanged. C exchWidthY C theSimulationMode :: Forward or reverse mode exchange ( provides C support for adjoint integration of code. ) C theCornerMode :: Flag indicating whether corner updates are C needed. C myThid :: Thread number of this instance of S/R EXCH... C eBl :: Edge buffer level INTEGER myOLw INTEGER myOLe INTEGER myOLs INTEGER myOLn INTEGER myNz _RX array(1-myOLw:sNx+myOLe, & 1-myOLs:sNy+myOLn, & myNZ, nSx, nSy) INTEGER exchWidthX INTEGER exchWidthY INTEGER theSimulationMode INTEGER theCornerMode INTEGER myThid C !LOCAL VARIABLES: C == Local variables == C I, J, K, iMin, iMax, iB :: Loop counters and extents C bi, bj C biW, bjW :: West tile indices C biE, bjE :: East tile indices C eBl :: Current exchange buffer level C theProc, theTag, theType, :: Variables used in message building C theSize C westCommMode :: Working variables holding type C eastCommMode of communication a particular C tile face uses. INTEGER I, J, K, iMin, iMax, iB INTEGER bi, bj, biW, bjW, biE, bjE INTEGER eBl INTEGER westCommMode INTEGER eastCommMode #ifdef ALLOW_USE_MPI INTEGER theProc, theTag, theType, theSize, mpiRc # ifdef ALLOW_AUTODIFF_OPENAD INTEGER mpiStatus(MPI_STATUS_SIZE) # endif #endif C-- Write data to exchange buffer C Various actions are possible depending on the communication mode C as follows: C Mode Action C -------- --------------------------- C COMM_NONE Do nothing C C COMM_MSG Message passing communication ( e.g. MPI ) C Fill west send buffer from this tile. C Send data with tag identifying tile and direction. C Fill east send buffer from this tile. C Send data with tag identifying tile and direction. C C COMM_PUT "Put" communication ( UMP_, shmemput, etc... ) C Fill east receive buffer of west-neighbor tile C Fill west receive buffer of east-neighbor tile C Sync. memory C Write data-ready Ack for east edge of west-neighbor C tile C Write data-ready Ack for west edge of east-neighbor C tile C Sync. memory C CEOP INTEGER myBxLoSave(MAX_NO_THREADS) INTEGER myBxHiSave(MAX_NO_THREADS) INTEGER myByLoSave(MAX_NO_THREADS) INTEGER myByHiSave(MAX_NO_THREADS) LOGICAL doingSingleThreadedComms doingSingleThreadedComms = .FALSE. #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif C Set default behavior to have MPI comms done by a single thread. C Most MPI implementations don't support concurrent comms from C several threads. IF ( nThreads .GT. 1 ) THEN _BARRIER _BEGIN_MASTER( myThid ) DO I=1,nThreads myBxLoSave(I) = myBxLo(I) myBxHiSave(I) = myBxHi(I) myByLoSave(I) = myByLo(I) myByHiSave(I) = myByHi(I) ENDDO C Comment out loop below and myB[xy][Lo|Hi](1) settings below C if you want to get multi-threaded MPI comms. DO I=1,nThreads myBxLo(I) = 0 myBxHi(I) = -1 myByLo(I) = 0 myByHi(I) = -1 ENDDO myBxLo(1) = 1 myBxHi(1) = nSx myByLo(1) = 1 myByHi(1) = nSy doingSingleThreadedComms = .TRUE. _END_MASTER( myThid ) _BARRIER ENDIF #ifndef ALWAYS_USE_MPI ENDIF #endif #endif #ifdef ALLOW_AUTODIFF_OPENAD # ifdef ALLOW_USE_MPI DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) # ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN # endif CALL ampi_awaitall ( & exchNReqsX(1,bi,bj) , & exchReqIdX(1,1,bi,bj) , & mpiStatus , & mpiRC ) # ifndef ALWAYS_USE_MPI ENDIF # endif ENDDO ENDDO # endif #endif DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) ebL = exchangeBufLevel(1,bi,bj) westCommMode = _tileCommModeW(bi,bj) eastCommMode = _tileCommModeE(bi,bj) biE = _tileBiE(bi,bj) bjE = _tileBjE(bi,bj) biW = _tileBiW(bi,bj) bjW = _tileBjW(bi,bj) C o Send or Put west edge c >>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<< c >>>>>>>>>>>>>>>>>>> FORWARD RUN <<<<<<<<<<<<<<<<<<< c >>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<< IF ( _theSimulationMode .EQ. FORWARD_SIMULATION ) THEN iMin = 1 iMax = 1+exchWidthX-1 IF ( westCommMode .EQ. COMM_MSG ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 westSendBuf_RX(iB,eBl,bi,bj) = array(I,J,K,bi,bj) ENDDO ENDDO ENDDO C Send the data #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif theProc = tilePidW(bi,bj) theTag = _tileTagSendW(bi,bj) theSize = iB theType = _MPI_TYPE_RX # ifndef ALLOW_AUTODIFF_OPENAD exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 CALL MPI_Isend(westSendBuf_RX(1,eBl,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc ) # else CALL ampi_isend_RX( & westSendBuf_RX(1,eBl,bi,bj), & theSize, & theType, & theProc, & theTag, & MPI_COMM_MODEL, & exchReqIdX(exchNReqsX(1,bi,bj)+1,1,bi,bj), & exchNReqsX(1,bi,bj), & mpiStatus , & mpiRc ) # endif /* ALLOW_AUTODIFF_OPENAD */ #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ eastRecvAck(eBl,biW,bjW) = 1 ELSEIF ( westCommMode .EQ. COMM_PUT ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 eastRecvBuf_RX(iB,eBl,biW,bjW) = array(I,J,K,bi,bj) ENDDO ENDDO ENDDO ELSEIF ( westCommMode .NE. COMM_NONE & .AND. westCommMode .NE. COMM_GET ) THEN STOP ' S/R EXCH: Invalid commW mode.' ENDIF C o Send or Put east edge iMin = sNx-exchWidthX+1 iMax = sNx IF ( eastCommMode .EQ. COMM_MSG ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 eastSendBuf_RX(iB,eBl,bi,bj) = array(I,J,K,bi,bj) ENDDO ENDDO ENDDO C Send the data #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif theProc = tilePidE(bi,bj) theTag = _tileTagSendE(bi,bj) theSize = iB theType = _MPI_TYPE_RX # ifndef ALLOW_AUTODIFF_OPENAD exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 CALL MPI_Isend(eastSendBuf_RX(1,eBl,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc ) # else CALL ampi_isend_RX( & eastSendBuf_RX(1,eBl,bi,bj) , & theSize , & theType , & theProc , & theTag , & MPI_COMM_MODEL , & exchReqIdX(exchNReqsX(1,bi,bj)+1,1,bi,bj) , & exchNReqsX(1,bi,bj), & mpiStatus , & mpiRc ) # endif /* ALLOW_AUTODIFF_OPENAD */ #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ westRecvAck(eBl,biE,bjE) = 1 ELSEIF ( eastCommMode .EQ. COMM_PUT ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 westRecvBuf_RX(iB,eBl,biE,bjE) = array(I,J,K,bi,bj) ENDDO ENDDO ENDDO ELSEIF ( eastCommMode .NE. COMM_NONE & .AND. eastCommMode .NE. COMM_GET ) THEN STOP ' S/R EXCH: Invalid commE mode.' ENDIF #ifndef ALLOW_AUTODIFF_OPENAD c >>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<< c >>>>>>>>>>>>>>>>>>> ADJOINT RUN <<<<<<<<<<<<<<<<<<< c >>>>>>>>>>>>>>>>>>> <<<<<<<<<<<<<<<<<<< ELSEIF ( _theSimulationMode .EQ. REVERSE_SIMULATION ) THEN iMin = 1-exchWidthX iMax = 0 IF ( westCommMode .EQ. COMM_MSG ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 westSendBuf_RX(iB,eBl,bi,bj) = array(I,J,K,bi,bj) array(I,J,K,bi,bj) = 0.0 ENDDO ENDDO ENDDO C Send the data #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif theProc = tilePidW(bi,bj) theTag = _tileTagSendW(bi,bj) theSize = iB theType = _MPI_TYPE_RX exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 CALL MPI_Isend(westSendBuf_RX(1,eBl,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc ) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ eastRecvAck(eBl,biW,bjW) = 1 ELSEIF ( westCommMode .EQ. COMM_PUT ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 eastRecvBuf_RX(iB,eBl,biW,bjW) = array(I,J,K,bi,bj) array(I,J,K,bi,bj) = 0.0 ENDDO ENDDO ENDDO ELSEIF ( westCommMode .NE. COMM_NONE & .AND. westCommMode .NE. COMM_GET ) THEN STOP ' S/R EXCH: Invalid commW mode.' ENDIF C o Send or Put east edge iMin = sNx+1 iMax = sNx+exchWidthX IF ( eastCommMode .EQ. COMM_MSG ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 eastSendBuf_RX(iB,eBl,bi,bj) = array(I,J,K,bi,bj) array(I,J,K,bi,bj) = 0.0 ENDDO ENDDO ENDDO C Send the data #ifdef ALLOW_USE_MPI #ifndef ALWAYS_USE_MPI IF ( usingMPI ) THEN #endif theProc = tilePidE(bi,bj) theTag = _tileTagSendE(bi,bj) theSize = iB theType = _MPI_TYPE_RX exchNReqsX(1,bi,bj) = exchNReqsX(1,bi,bj)+1 CALL MPI_Isend(eastSendBuf_RX(1,eBl,bi,bj), theSize, theType, & theProc, theTag, MPI_COMM_MODEL, & exchReqIdX(exchNReqsX(1,bi,bj),1,bi,bj), mpiRc ) #ifndef ALWAYS_USE_MPI ENDIF #endif #endif /* ALLOW_USE_MPI */ westRecvAck(eBl,biE,bjE) = 1 ELSEIF ( eastCommMode .EQ. COMM_PUT ) THEN iB = 0 DO K=1,myNz DO J=1,sNy DO I=iMin,iMax iB = iB + 1 westRecvBuf_RX(iB,eBl,biE,bjE) = array(I,J,K,bi,bj) array(I,J,K,bi,bj) = 0.0 ENDDO ENDDO ENDDO ELSEIF ( eastCommMode .NE. COMM_NONE & .AND. eastCommMode .NE. COMM_GET ) THEN STOP ' S/R EXCH: Invalid commE mode.' ENDIF #endif /* ALLOW_AUTODIFF_OPENAD */ ENDIF ENDDO ENDDO C-- Signal completetion ( making sure system-wide memory state is C-- consistent ). C ** NOTE ** We are relying on being able to produce strong-ordered C memory semantics here. In other words we assume that there is a C mechanism which can ensure that by the time the Ack is seen the C overlap region data that will be exchanged is up to date. IF ( exchNeedsMemSync ) CALL MEMSYNC DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) ebL = exchangeBufLevel(1,bi,bj) biE = _tileBiE(bi,bj) bjE = _tileBjE(bi,bj) biW = _tileBiW(bi,bj) bjW = _tileBjW(bi,bj) westCommMode = _tileCommModeW(bi,bj) eastCommMode = _tileCommModeE(bi,bj) IF ( westCommMode.EQ.COMM_PUT ) eastRecvAck(eBl,biW,bjW) = 1 IF ( eastCommMode.EQ.COMM_PUT ) westRecvAck(eBl,biE,bjE) = 1 IF ( westCommMode.EQ.COMM_GET ) eastRecvAck(eBl,biW,bjW) = 1 IF ( eastCommMode.EQ.COMM_GET ) westRecvAck(eBl,biE,bjE) = 1 ENDDO ENDDO C-- Make sure "ack" setting is seen system-wide. C Here strong-ordering is not an issue but we want to make C sure that processes that might spin on the above Ack settings C will see the setting. C ** NOTE ** On some machines we wont spin on the Ack setting C ( particularly the T90 ), instead we will use s system barrier. C On the T90 the system barrier is very fast and switches out the C thread while it waits. On most machines the system barrier C is much too slow and if we own the machine and have one thread C per process preemption is not a problem. IF ( exchNeedsMemSync ) CALL MEMSYNC _BARRIER IF ( doingSingleThreadedComms ) THEN C Restore saved settings that were stored to allow C single thred comms. _BEGIN_MASTER(myThid) DO I=1,nThreads myBxLo(I) = myBxLoSave(I) myBxHi(I) = myBxHiSave(I) myByLo(I) = myByLoSave(I) myByHi(I) = myByHiSave(I) ENDDO _END_MASTER(myThid) ENDIF _BARRIER RETURN END