model/src/cg2d_nsa.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.6 2012/08/12 20:24:23 jmc Exp $
C $Name:  $

#include "PACKAGES_CONFIG.h"
#include "CPP_OPTIONS.h"
#ifdef ALLOW_AUTODIFF
# include "AUTODIFF_OPTIONS.h"
#endif

CML THIS DOES NOT WORK +++++
#undef ALLOW_LOOP_DIRECTIVE
CBOP
C     !ROUTINE: CG2D_NSA
C     !INTERFACE:
      SUBROUTINE CG2D_NSA(
     U                cg2d_b, cg2d_x,
     O                firstResidual, minResidualSq, lastResidual,
     U                numIters, nIterMin,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D_NSA
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     | o This version is used only in the case when the matrix
C     |   operator is not "self-adjoint" (NSA). Any remaining
C     |   residuals will immediately reported to the department
C     |   of homeland security.
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal
C     | matrix of the form that arises in the discrete
C     | representation of the del^2 operator in a
C     | two-dimensional space.
C     | Notes:
C     | ======
C     | This implementation can support shared-memory
C     | multi-threaded execution. In order to do this COMMON
C     | blocks are used for many of the arrays - even ones that
C     | are only used for intermedaite results. This design is
C     | OK if you want to all the threads to collaborate on
C     | solving the same problem. On the other hand if you want
C     | the threads to solve several different problems
C     | concurrently this implementation will not work.
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"
#ifdef ALLOW_AUTODIFF
# include "tamc.h"
# include "tamc_keys.h"
#endif

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     cg2d_b    :: The source term or "right hand side" (Output: normalised RHS)
C     cg2d_x    :: The solution (Input: first guess)
C     firstResidual :: the initial residual before any iterations
C     minResidualSq :: the lowest residual reached (squared)
C     lastResidual  :: the actual residual reached
C     numIters  :: Inp: the maximum number of iterations allowed
C                  Out: the actual number of iterations used
C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
C                  Out: iteration number corresponding to lowest residual
C     myThid    :: Thread on which I am working.
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  minResidualSq
      _RL  lastResidual
      INTEGER numIters
      INTEGER nIterMin
      INTEGER myThid

#ifdef ALLOW_CG2D_NSA
C     !LOCAL VARIABLES:
C     === Local variables ====
C     bi, bj     :: tile index in X and Y.
C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
C     actualIts  :: actual CG iteration number
C     err_sq     :: Measure of the square of the residual of Ax - b.
C     eta_qrN    :: Used in computing search directions; suffix N and NM1
C     eta_qrNM1     denote current and previous iterations respectively.
C     recip_eta_qrNM1 :: reciprocal of eta_qrNM1
C     cgBeta     :: coeff used to update conjugate direction vector "s".
C     alpha      :: coeff used to update solution & residual
C     alphaSum   :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
C                   debugging/trouble shooting diagnostic. For neumann problems
C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
C     cg2d_min   :: used to store solution corresponding to lowest residual.
C     msgBuf     :: Informational/error message buffer
      INTEGER bi, bj
      INTEGER i, j, it2d
      INTEGER actualIts
      _RL    cg2dTolerance_sq
      _RL    err_sq,  errTile(nSx,nSy)
      _RL    eta_qrN, eta_qrNtile(nSx,nSy)
      _RL    eta_qrNM1, recip_eta_qrNM1
      _RL    cgBeta,  alpha
      _RL    alphaSum,alphaTile(nSx,nSy)
      _RL    sumRHS,  sumRHStile(nSx,nSy)
      _RL    rhsMax,  rhsNorm
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP

#ifdef ALLOW_AUTODIFF_TAMC
      IF ( numIters .GT. numItersMax ) THEN
        WRITE(msgBuf,'(A,I10)')
     &    'CG2D_NSA: numIters > numItersMax =', numItersMax
        CALL PRINT_ERROR( msgBuf, myThid )
        STOP 'ABNORMAL END: S/R CG2D_NSA'
      ENDIF
      IF ( cg2dNormaliseRHS ) THEN
        WRITE(msgBuf,'(A)') 'CG2D_NSA: cg2dNormaliseRHS is disabled'
        CALL PRINT_ERROR( msgBuf, myThid )
        WRITE(msgBuf,'(A)')
     &    'set cg2dTargetResWunit (instead of cg2dTargetResidual)'
        CALL PRINT_ERROR( msgBuf, myThid )
        STOP 'ABNORMAL END: S/R CG2D_NSA'
      ENDIF
#endif /* ALLOW_AUTODIFF_TAMC */

#ifdef ALLOW_AUTODIFF_TAMC
          act1 = myThid - 1
          max1 = nTx*nTy
          act2 = ikey_dynamics - 1
          ikey = (act1 + 1) + act2*max1
#endif /* ALLOW_AUTODIFF_TAMC */

C--   Initialise auxiliary constant, some output variable and inverter
      cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
      minResidualSq = -1. _d 0
      eta_qrNM1     =  1. _d 0
      recip_eta_qrNM1= 1. _d 0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

#ifndef ALLOW_AUTODIFF_TAMC
      IF (cg2dNormaliseRHS) THEN
C-    Normalise RHS :
       _GLOBAL_MAX_RL( rhsMax, myThid )
       rhsNorm = 1. _d 0
       IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
           cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
          ENDDO
         ENDDO
        ENDDO
       ENDDO
C-    end Normalise RHS
      ENDIF
#endif /* ndef ALLOW_AUTODIFF_TAMC */

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile(bi,bj)    = 0. _d 0
        sumRHStile(bi,bj) = 0. _d 0
        DO j=0,sNy+1
         DO i=0,sNx+1
          cg2d_s(i,j,bi,bj) = 0.
         ENDDO
        ENDDO
        DO j=1,sNy
         DO i=1,sNx
          cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
     &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
     &    )
          errTile(bi,bj)    = errTile(bi,bj)
     &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

c     CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
      CALL EXCH_XY_RL ( cg2d_r, myThid )
      CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
      CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
      actualIts = 0
      IF ( err_sq .NE. 0. ) THEN
        firstResidual = SQRT(err_sq)
      ELSE
        firstResidual = 0.
      ENDIF

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Cml   begin main solver loop
#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter
#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
      DO it2d=1, numIters
#ifdef ALLOW_LOOP_DIRECTIVE
CML      it2d = 0
CML      DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters )
CML      it2d = it2d+1
#endif /* ALLOW_LOOP_DIRECTIVE */

#ifdef ALLOW_AUTODIFF_TAMC
       icg2dkey = (ikey-1)*numItersMax + it2d
CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      IF ( err_sq .GE. cg2dTolerance_sq ) THEN

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile(bi,bj) = 0. _d 0
         DO j=1,sNy
          DO i=1,sNx
           cg2d_z(i,j,bi,bj) =
     &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &     +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
#ifdef ALLOW_AUTODIFF_TAMC
CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
CML       cgBeta   = eta_qrN/eta_qrNM1
       cgBeta   = eta_qrN*recip_eta_qrNM1
Cml    store normalisation factor for the next interation (in case there is one).
CML    store the inverse of the normalization factor for higher precision
CML       eta_qrNM1 = eta_qrN
       recip_eta_qrNM1 = 1. _d 0/eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj)
     &                       + cgBeta*cg2d_s(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C--    Do exchanges that require messages i.e. between
C--    processes.
c      CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
       CALL EXCH_XY_RL ( cg2d_s, myThid )

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
#ifdef ALLOW_AUTODIFF_TAMC
#ifndef ALLOW_LOOP_DIRECTIVE
CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* not ALLOW_LOOP_DIRECTIVE */
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         alphaTile(bi,bj) = 0. _d 0
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
           alphaTile(bi,bj) = alphaTile(bi,bj)
     &                      + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       CALL GLOBAL_SUM_TILE_RL( alphaTile, alphaSum, myThid )
       alpha = eta_qrN/alphaSum

C==    Update simultaneously solution and residual vectors (and Iter number)
C      Now compute "interior" points.
#ifdef ALLOW_AUTODIFF_TAMC
#ifndef ALLOW_LOOP_DIRECTIVE
CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_LOOP_DIRECTIVE */
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         errTile(bi,bj) = 0. _d 0
         DO j=1,sNy
          DO i=1,sNx
           cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
           cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
           errTile(bi,bj) = errTile(bi,bj)
     &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       actualIts = it2d

       CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq,    myThid )
       IF ( printResidual ) THEN
        IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
         WRITE(msgBuf,'(A,I6,A,1PE21.14)')
     &    ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
         CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                       SQUEEZE_RIGHT, myThid )
        ENDIF
       ENDIF

c      CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
       CALL EXCH_XY_RL ( cg2d_r, myThid )

Cml   end of if "err >= cg2dTolerance" block ; end main solver loop
      ENDIF
      ENDDO

#ifndef ALLOW_AUTODIFF_TAMC
      IF (cg2dNormaliseRHS) THEN
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF
#endif /* ndef ALLOW_AUTODIFF_TAMC */

C--   Return parameters to caller
      IF ( err_sq .NE. 0. ) THEN
        lastResidual = SQRT(err_sq)
      ELSE
        lastResidual = 0.
      ENDIF
      numIters = actualIts

#endif /* ALLOW_CG2D_NSA */
      RETURN
      END

#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))

C     These routines are routinely part of the TAMC/TAF library that is
C     not included in the MITcgm, therefore they are mimicked here.

      subroutine adstore(chardum,int1,idow,int2,int3,icount)

      implicit none

#include "SIZE.h"
#include "tamc.h"

      character*(*) chardum
      integer int1, int2, int3, idow, icount

C     the length of this vector must be greater or equal
C     twice the number of timesteps
      integer nidow
#ifdef ALLOW_TAMC_CHECKPOINTING
      parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
#else
      parameter ( nidow = 1000000 )
#endif /* ALLOW_TAMC_CHECKPOINTING */
      integer istoreidow(nidow)
      common /istorecommon/ istoreidow

      print *, 'adstore: ', chardum, int1, idow, int2, int3, icount

      if ( icount .gt. nidow ) then
         print *, 'adstore: error: icount > nidow = ', nidow
         stop 'ABNORMAL STOP in adstore'
      endif

      istoreidow(icount) = idow

      return
      end

      subroutine adresto(chardum,int1,idow,int2,int3,icount)

      implicit none

#include "SIZE.h"
#include "tamc.h"

      character*(*) chardum
      integer int1, int2, int3, idow, icount

C     the length of this vector must be greater or equal
C     twice the number of timesteps
      integer nidow
#ifdef ALLOW_TAMC_CHECKPOINTING
      parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
#else
      parameter ( nidow = 1000000 )
#endif /* ALLOW_TAMC_CHECKPOINTING */
      integer istoreidow(nidow)
      common /istorecommon/ istoreidow

      print *, 'adresto: ', chardum, int1, idow, int2, int3, icount

      if ( icount .gt. nidow ) then
         print *, 'adstore: error: icount > nidow = ', nidow
         stop 'ABNORMAL STOP in adstore'
      endif

      idow = istoreidow(icount)

      return
      end
#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.6 2012/08/12 20:24:23 jmc Exp $
2	C $Name: $
3
4	#include "PACKAGES_CONFIG.h"
5	#include "CPP_OPTIONS.h"
6	#ifdef ALLOW_AUTODIFF
7	# include "AUTODIFF_OPTIONS.h"
8	#endif
9
10	CML THIS DOES NOT WORK +++++
11	#undef ALLOW_LOOP_DIRECTIVE
12	CBOP
13	C !ROUTINE: CG2D_NSA
14	C !INTERFACE:
15	SUBROUTINE CG2D_NSA(
16	U cg2d_b, cg2d_x,
17	O firstResidual, minResidualSq, lastResidual,
18	U numIters, nIterMin,
19	I myThid )
20	C !DESCRIPTION: \bv
21	C ==========================================================
22	C \| SUBROUTINE CG2D_NSA
23	C \| o Two-dimensional grid problem conjugate-gradient
24	C \| inverter (with preconditioner).
25	C \| o This version is used only in the case when the matrix
26	C \| operator is not "self-adjoint" (NSA). Any remaining
27	C \| residuals will immediately reported to the department
28	C \| of homeland security.
29	C ==========================================================
30	C \| Con. grad is an iterative procedure for solving Ax = b.
31	C \| It requires the A be symmetric.
32	C \| This implementation assumes A is a five-diagonal
33	C \| matrix of the form that arises in the discrete
34	C \| representation of the del^2 operator in a
35	C \| two-dimensional space.
36	C \| Notes:
37	C \| ======
38	C \| This implementation can support shared-memory
39	C \| multi-threaded execution. In order to do this COMMON
40	C \| blocks are used for many of the arrays - even ones that
41	C \| are only used for intermedaite results. This design is
42	C \| OK if you want to all the threads to collaborate on
43	C \| solving the same problem. On the other hand if you want
44	C \| the threads to solve several different problems
45	C \| concurrently this implementation will not work.
46	C ==========================================================
47	C \ev
48
49	C !USES:
50	IMPLICIT NONE
51	C === Global data ===
52	#include "SIZE.h"
53	#include "EEPARAMS.h"
54	#include "PARAMS.h"
55	#include "CG2D.h"
56	#ifdef ALLOW_AUTODIFF
57	# include "tamc.h"
58	# include "tamc_keys.h"
59	#endif
60
61	C !INPUT/OUTPUT PARAMETERS:
62	C === Routine arguments ===
63	C cg2d_b :: The source term or "right hand side" (Output: normalised RHS)
64	C cg2d_x :: The solution (Input: first guess)
65	C firstResidual :: the initial residual before any iterations
66	C minResidualSq :: the lowest residual reached (squared)
67	C lastResidual :: the actual residual reached
68	C numIters :: Inp: the maximum number of iterations allowed
69	C Out: the actual number of iterations used
70	C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
71	C Out: iteration number corresponding to lowest residual
72	C myThid :: Thread on which I am working.
73	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
74	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
75	_RL firstResidual
76	_RL minResidualSq
77	_RL lastResidual
78	INTEGER numIters
79	INTEGER nIterMin
80	INTEGER myThid
81
82	#ifdef ALLOW_CG2D_NSA
83	C !LOCAL VARIABLES:
84	C === Local variables ====
85	C bi, bj :: tile index in X and Y.
86	C i, j, it2d :: Loop counters ( it2d counts CG iterations )
87	C actualIts :: actual CG iteration number
88	C err_sq :: Measure of the square of the residual of Ax - b.
89	C eta_qrN :: Used in computing search directions; suffix N and NM1
90	C eta_qrNM1 denote current and previous iterations respectively.
91	C recip_eta_qrNM1 :: reciprocal of eta_qrNM1
92	C cgBeta :: coeff used to update conjugate direction vector "s".
93	C alpha :: coeff used to update solution & residual
94	C alphaSum :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
95	C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
96	C debugging/trouble shooting diagnostic. For neumann problems
97	C sumRHS needs to be ~0 or it converge at a non-zero residual.
98	C cg2d_min :: used to store solution corresponding to lowest residual.
99	C msgBuf :: Informational/error message buffer
100	INTEGER bi, bj
101	INTEGER i, j, it2d
102	INTEGER actualIts
103	_RL cg2dTolerance_sq
104	_RL err_sq, errTile(nSx,nSy)
105	_RL eta_qrN, eta_qrNtile(nSx,nSy)
106	_RL eta_qrNM1, recip_eta_qrNM1
107	_RL cgBeta, alpha
108	_RL alphaSum,alphaTile(nSx,nSy)
109	_RL sumRHS, sumRHStile(nSx,nSy)
110	_RL rhsMax, rhsNorm
111	CHARACTER*(MAX_LEN_MBUF) msgBuf
112	LOGICAL printResidual
113	CEOP
114
115	#ifdef ALLOW_AUTODIFF_TAMC
116	IF ( numIters .GT. numItersMax ) THEN
117	WRITE(msgBuf,'(A,I10)')
118	& 'CG2D_NSA: numIters > numItersMax =', numItersMax
119	CALL PRINT_ERROR( msgBuf, myThid )
120	STOP 'ABNORMAL END: S/R CG2D_NSA'
121	ENDIF
122	IF ( cg2dNormaliseRHS ) THEN
123	WRITE(msgBuf,'(A)') 'CG2D_NSA: cg2dNormaliseRHS is disabled'
124	CALL PRINT_ERROR( msgBuf, myThid )
125	WRITE(msgBuf,'(A)')
126	& 'set cg2dTargetResWunit (instead of cg2dTargetResidual)'
127	CALL PRINT_ERROR( msgBuf, myThid )
128	STOP 'ABNORMAL END: S/R CG2D_NSA'
129	ENDIF
130	#endif /* ALLOW_AUTODIFF_TAMC */
131
132	#ifdef ALLOW_AUTODIFF_TAMC
133	act1 = myThid - 1
134	max1 = nTx*nTy
135	act2 = ikey_dynamics - 1
136	ikey = (act1 + 1) + act2*max1
137	#endif /* ALLOW_AUTODIFF_TAMC */
138
139	C-- Initialise auxiliary constant, some output variable and inverter
140	cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
141	minResidualSq = -1. _d 0
142	eta_qrNM1 = 1. _d 0
143	recip_eta_qrNM1= 1. _d 0
144
145	C-- Normalise RHS
146	rhsMax = 0. _d 0
147	DO bj=myByLo(myThid),myByHi(myThid)
148	DO bi=myBxLo(myThid),myBxHi(myThid)
149	DO j=1,sNy
150	DO i=1,sNx
151	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
152	rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
153	ENDDO
154	ENDDO
155	ENDDO
156	ENDDO
157
158	#ifndef ALLOW_AUTODIFF_TAMC
159	IF (cg2dNormaliseRHS) THEN
160	C- Normalise RHS :
161	_GLOBAL_MAX_RL( rhsMax, myThid )
162	rhsNorm = 1. _d 0
163	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
164	DO bj=myByLo(myThid),myByHi(myThid)
165	DO bi=myBxLo(myThid),myBxHi(myThid)
166	DO j=1,sNy
167	DO i=1,sNx
168	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
169	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
170	ENDDO
171	ENDDO
172	ENDDO
173	ENDDO
174	C- end Normalise RHS
175	ENDIF
176	#endif /* ndef ALLOW_AUTODIFF_TAMC */
177
178	C-- Update overlaps
179	CALL EXCH_XY_RL( cg2d_x, myThid )
180
181	C-- Initial residual calculation
182	#ifdef ALLOW_AUTODIFF_TAMC
183	CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
184	CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
185	#endif /* ALLOW_AUTODIFF_TAMC */
186	DO bj=myByLo(myThid),myByHi(myThid)
187	DO bi=myBxLo(myThid),myBxHi(myThid)
188	errTile(bi,bj) = 0. _d 0
189	sumRHStile(bi,bj) = 0. _d 0
190	DO j=0,sNy+1
191	DO i=0,sNx+1
192	cg2d_s(i,j,bi,bj) = 0.
193	ENDDO
194	ENDDO
195	DO j=1,sNy
196	DO i=1,sNx
197	cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
198	& (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
199	& +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
200	& +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
201	& +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
202	& +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
203	& )
204	errTile(bi,bj) = errTile(bi,bj)
205	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
206	sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
207	ENDDO
208	ENDDO
209	ENDDO
210	ENDDO
211
212	c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
213	CALL EXCH_XY_RL ( cg2d_r, myThid )
214	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
215	CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
216	actualIts = 0
217	IF ( err_sq .NE. 0. ) THEN
218	firstResidual = SQRT(err_sq)
219	ELSE
220	firstResidual = 0.
221	ENDIF
222
223	printResidual = .FALSE.
224	IF ( debugLevel .GE. debLevZero ) THEN
225	_BEGIN_MASTER( myThid )
226	printResidual = printResidualFreq.GE.1
227	WRITE(standardmessageunit,'(A,1P2E22.14)')
228	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
229	_END_MASTER( myThid )
230	ENDIF
231
232	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
233	Cml begin main solver loop
234	#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
235	CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter
236	#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
237	DO it2d=1, numIters
238	#ifdef ALLOW_LOOP_DIRECTIVE
239	CML it2d = 0
240	CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters )
241	CML it2d = it2d+1
242	#endif /* ALLOW_LOOP_DIRECTIVE */
243
244	#ifdef ALLOW_AUTODIFF_TAMC
245	icg2dkey = (ikey-1)*numItersMax + it2d
246	CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
247	#endif /* ALLOW_AUTODIFF_TAMC */
248	IF ( err_sq .GE. cg2dTolerance_sq ) THEN
249
250	C-- Solve preconditioning equation and update
251	C-- conjugate direction vector "s".
252	#ifdef ALLOW_AUTODIFF_TAMC
253	CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
254	CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
255	#endif /* ALLOW_AUTODIFF_TAMC */
256	DO bj=myByLo(myThid),myByHi(myThid)
257	DO bi=myBxLo(myThid),myBxHi(myThid)
258	eta_qrNtile(bi,bj) = 0. _d 0
259	DO j=1,sNy
260	DO i=1,sNx
261	cg2d_z(i,j,bi,bj) =
262	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
263	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
264	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
265	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
266	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
267	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
268	& +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
269	ENDDO
270	ENDDO
271	ENDDO
272	ENDDO
273
274	CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
275	#ifdef ALLOW_AUTODIFF_TAMC
276	CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
277	CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
278	#endif /* ALLOW_AUTODIFF_TAMC */
279	CML cgBeta = eta_qrN/eta_qrNM1
280	cgBeta = eta_qrN*recip_eta_qrNM1
281	Cml store normalisation factor for the next interation (in case there is one).
282	CML store the inverse of the normalization factor for higher precision
283	CML eta_qrNM1 = eta_qrN
284	recip_eta_qrNM1 = 1. _d 0/eta_qrN
285
286	DO bj=myByLo(myThid),myByHi(myThid)
287	DO bi=myBxLo(myThid),myBxHi(myThid)
288	DO j=1,sNy
289	DO i=1,sNx
290	cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj)
291	& + cgBeta*cg2d_s(i,j,bi,bj)
292	ENDDO
293	ENDDO
294	ENDDO
295	ENDDO
296
297	C-- Do exchanges that require messages i.e. between
298	C-- processes.
299	c CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
300	CALL EXCH_XY_RL ( cg2d_s, myThid )
301
302	C== Evaluate laplace operator on conjugate gradient vector
303	C== q = A.s
304	#ifdef ALLOW_AUTODIFF_TAMC
305	#ifndef ALLOW_LOOP_DIRECTIVE
306	CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
307	#endif /* not ALLOW_LOOP_DIRECTIVE */
308	#endif /* ALLOW_AUTODIFF_TAMC */
309	DO bj=myByLo(myThid),myByHi(myThid)
310	DO bi=myBxLo(myThid),myBxHi(myThid)
311	alphaTile(bi,bj) = 0. _d 0
312	DO j=1,sNy
313	DO i=1,sNx
314	cg2d_q(i,j,bi,bj) =
315	& aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
316	& +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
317	& +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
318	& +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
319	& +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
320	alphaTile(bi,bj) = alphaTile(bi,bj)
321	& + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
322	ENDDO
323	ENDDO
324	ENDDO
325	ENDDO
326	CALL GLOBAL_SUM_TILE_RL( alphaTile, alphaSum, myThid )
327	alpha = eta_qrN/alphaSum
328
329	C== Update simultaneously solution and residual vectors (and Iter number)
330	C Now compute "interior" points.
331	#ifdef ALLOW_AUTODIFF_TAMC
332	#ifndef ALLOW_LOOP_DIRECTIVE
333	CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
334	#endif /* ALLOW_LOOP_DIRECTIVE */
335	#endif /* ALLOW_AUTODIFF_TAMC */
336	DO bj=myByLo(myThid),myByHi(myThid)
337	DO bi=myBxLo(myThid),myBxHi(myThid)
338	errTile(bi,bj) = 0. _d 0
339	DO j=1,sNy
340	DO i=1,sNx
341	cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
342	cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
343	errTile(bi,bj) = errTile(bi,bj)
344	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
345	ENDDO
346	ENDDO
347	ENDDO
348	ENDDO
349	actualIts = it2d
350
351	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
352	IF ( printResidual ) THEN
353	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
354	WRITE(msgBuf,'(A,I6,A,1PE21.14)')
355	& ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
356	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
357	& SQUEEZE_RIGHT, myThid )
358	ENDIF
359	ENDIF
360
361	c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
362	CALL EXCH_XY_RL ( cg2d_r, myThid )
363
364	Cml end of if "err >= cg2dTolerance" block ; end main solver loop
365	ENDIF
366	ENDDO
367
368	#ifndef ALLOW_AUTODIFF_TAMC
369	IF (cg2dNormaliseRHS) THEN
370	C-- Un-normalise the answer
371	DO bj=myByLo(myThid),myByHi(myThid)
372	DO bi=myBxLo(myThid),myBxHi(myThid)
373	DO j=1,sNy
374	DO i=1,sNx
375	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
376	ENDDO
377	ENDDO
378	ENDDO
379	ENDDO
380	ENDIF
381	#endif /* ndef ALLOW_AUTODIFF_TAMC */
382
383	C-- Return parameters to caller
384	IF ( err_sq .NE. 0. ) THEN
385	lastResidual = SQRT(err_sq)
386	ELSE
387	lastResidual = 0.
388	ENDIF
389	numIters = actualIts
390
391	#endif /* ALLOW_CG2D_NSA */
392	RETURN
393	END
394
395	#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
396
397	C These routines are routinely part of the TAMC/TAF library that is
398	C not included in the MITcgm, therefore they are mimicked here.
399
400	subroutine adstore(chardum,int1,idow,int2,int3,icount)
401
402	implicit none
403
404	#include "SIZE.h"
405	#include "tamc.h"
406
407	character() chardum
408	integer int1, int2, int3, idow, icount
409
410	C the length of this vector must be greater or equal
411	C twice the number of timesteps
412	integer nidow
413	#ifdef ALLOW_TAMC_CHECKPOINTING
414	parameter ( nidow = 2nchklev_1nchklev_2*nchklev_3 )
415	#else
416	parameter ( nidow = 1000000 )
417	#endif /* ALLOW_TAMC_CHECKPOINTING */
418	integer istoreidow(nidow)
419	common /istorecommon/ istoreidow
420
421	print *, 'adstore: ', chardum, int1, idow, int2, int3, icount
422
423	if ( icount .gt. nidow ) then
424	print *, 'adstore: error: icount > nidow = ', nidow
425	stop 'ABNORMAL STOP in adstore'
426	endif
427
428	istoreidow(icount) = idow
429
430	return
431	end
432
433	subroutine adresto(chardum,int1,idow,int2,int3,icount)
434
435	implicit none
436
437	#include "SIZE.h"
438	#include "tamc.h"
439
440	character() chardum
441	integer int1, int2, int3, idow, icount
442
443	C the length of this vector must be greater or equal
444	C twice the number of timesteps
445	integer nidow
446	#ifdef ALLOW_TAMC_CHECKPOINTING
447	parameter ( nidow = 2nchklev_1nchklev_2*nchklev_3 )
448	#else
449	parameter ( nidow = 1000000 )
450	#endif /* ALLOW_TAMC_CHECKPOINTING */
451	integer istoreidow(nidow)
452	common /istorecommon/ istoreidow
453
454	print *, 'adresto: ', chardum, int1, idow, int2, int3, icount
455
456	if ( icount .gt. nidow ) then
457	print *, 'adstore: error: icount > nidow = ', nidow
458	stop 'ABNORMAL STOP in adstore'
459	endif
460
461	idow = istoreidow(icount)
462
463	return
464	end
465	#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */