model/src/cg2d_nsa.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.3 2009/07/11 21:45:44 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef ALLOW_USE_MPI
C HACK to avoid global_max
# define ALLOW_CONST_RHSMAX
#endif

CML THIS DOES NOT WORK +++++
#undef ALLOW_LOOP_DIRECTIVE
CBOP
C     !ROUTINE: CG2D_NSA
C     !INTERFACE:
      SUBROUTINE CG2D_NSA(
     U                cg2d_b, cg2d_x,
     O                firstResidual, minResidualSq, lastResidual,
     U                numIters, nIterMin,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D_NSA
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     | o This version is used only in the case when the matrix
C     |   operator is not "self-adjoint" (NSA). Any remaining
C     |   residuals will immediately reported to the department
C     |   of homeland security.
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal
C     | matrix of the form that arises in the discrete
C     | representation of the del^2 operator in a
C     | two-dimensional space.
C     | Notes:
C     | ======
C     | This implementation can support shared-memory
C     | multi-threaded execution. In order to do this COMMON
C     | blocks are used for many of the arrays - even ones that
C     | are only used for intermedaite results. This design is
C     | OK if you want to all the threads to collaborate on
C     | solving the same problem. On the other hand if you want
C     | the threads to solve several different problems
C     | concurrently this implementation will not work.
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"
#ifdef ALLOW_AUTODIFF_TAMC
# include "tamc.h"
# include "tamc_keys.h"
#endif

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     cg2d_b    :: The source term or "right hand side" (Output: normalised RHS)
C     cg2d_x    :: The solution (Input: first guess)
C     firstResidual :: the initial residual before any iterations
C     minResidualSq :: the lowest residual reached (squared)
C     lastResidual  :: the actual residual reached
C     numIters  :: Inp: the maximum number of iterations allowed
C                  Out: the actual number of iterations used
C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
C                  Out: iteration number corresponding to lowest residual
C     myThid    :: Thread on which I am working.
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  minResidualSq
      _RL  lastResidual
      INTEGER numIters
      INTEGER nIterMin
      INTEGER myThid

#ifdef ALLOW_CG2D_NSA
C     !LOCAL VARIABLES:
C     === Local variables ====
C     bi, bj     :: tile index in X and Y.
C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
C     actualIts  :: actual CG iteration number
C     err_sq     :: Measure of the square of the residual of Ax - b.
C     eta_qrN    :: Used in computing search directions; suffix N and NM1
C     eta_qrNM1     denote current and previous iterations respectively.
C     recip_eta_qrNM1 :: reciprocal of eta_qrNM1
C     cgBeta     :: coeff used to update conjugate direction vector "s".
C     alpha      :: coeff used to update solution & residual
C     alpha_aux  :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
C                   debugging/trouble shooting diagnostic. For neumann problems
C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
C     cg2d_min   :: used to store solution corresponding to lowest residual.
C     msgBuf     :: Informational/error message buffer
      INTEGER bi, bj
      INTEGER i, j, it2d
      INTEGER actualIts
      _RL    cg2dTolerance_sq
      _RL    err_sq
      _RL    eta_qrN
      _RL    eta_qrNM1
      _RL    recip_eta_qrNM1
      _RL    cgBeta
      _RL    alpha
      _RL    alpha_aux
      _RL    sumRHS
      _RL    rhsMax, rhsMaxGlobal
      _RL    rhsNorm
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP

#ifdef ALLOW_AUTODIFF_TAMC
      IF ( numIters .GT. numItersMax ) THEN
         WRITE(standardMessageUnit,'(A,I10)')
     &        'CG2D_NSA: numIters > numItersMax = ', numItersMax
         STOP 'NON-NORMAL in CG2D_NSA'
      ENDIF
#endif /* ALLOW_AUTODIFF_TAMC */

#ifdef ALLOW_AUTODIFF_TAMC
          act1 = myThid - 1
          max1 = nTx*nTy
          act2 = ikey_dynamics - 1
          ikey = (act1 + 1) + act2*max1
#endif /* ALLOW_AUTODIFF_TAMC */

C--   Initialise auxiliary constant, some output variable and inverter
      cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
      minResidualSq = -1. _d 0
      eta_qrNM1     =  1. _d 0
      recip_eta_qrNM1= 1. _d 0

C--   Normalise RHS
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      IF (cg2dNormaliseRHS) THEN
C     -  Normalise RHS :
#ifdef ALLOW_CONST_RHSMAX
      rhsMaxGlobal=1.
#else
      rhsMaxGlobal=rhsMax
      _GLOBAL_MAX_RL( rhsMaxGlobal, myThid )
#endif /* ALLOW_CONST_RHSMAX */
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      IF ( rhsMaxGlobal .NE. 0. ) THEN
       rhsNorm = 1. _d 0 / rhsMaxGlobal
      ELSE
       rhsNorm = 1. _d 0
      ENDIF

#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
          cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C- end Normalise RHS
      ENDIF

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
      err_sq = 0. _d 0
      sumRHS = 0. _d 0
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=0,sNy+1
         DO i=0,sNx+1
          cg2d_s(i,j,bi,bj) = 0.
         ENDDO
        ENDDO
        DO j=1,sNy
         DO i=1,sNx
          cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
     &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
     &    )
          err_sq         = err_sq         +
     &     cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          sumRHS            = sumRHS            +
     &     cg2d_b(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

c     CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
      CALL EXCH_XY_RL ( cg2d_r, myThid )
      _GLOBAL_SUM_RL( sumRHS, myThid )
      _GLOBAL_SUM_RL( err_sq, myThid )
      actualIts = 0
      IF ( err_sq .NE. 0. ) THEN
        firstResidual = SQRT(err_sq)
      ELSE
        firstResidual = 0.
      ENDIF

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal
        _END_MASTER( myThid )
      ENDIF

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Cml   begin main solver loop
#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter
#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
      DO it2d=1, numIters
#ifdef ALLOW_LOOP_DIRECTIVE
CML      it2d = 0
CML      DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters )
CML      it2d = it2d+1
#endif /* ALLOW_LOOP_DIRECTIVE */

#ifdef ALLOW_AUTODIFF_TAMC
       icg2dkey = (ikey-1)*numItersMax + it2d
CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
      IF ( err_sq .GE. cg2dTolerance_sq ) THEN

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
       eta_qrN = 0. _d 0
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_z(i,j,bi,bj) =
     &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
           eta_qrN = eta_qrN
     &     +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       _GLOBAL_SUM_RL(eta_qrN, myThid)
#ifdef ALLOW_AUTODIFF_TAMC
CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
CML       cgBeta   = eta_qrN/eta_qrNM1
       cgBeta   = eta_qrN*recip_eta_qrNM1
Cml    store normalisation factor for the next interation
Cml    (in case there is one).
CML    store the inverse of the normalization factor for higher precision
CML       eta_qrNM1 = eta_qrN
       recip_eta_qrNM1 = 1./eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj)
     &                       + cgBeta*cg2d_s(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C--    Do exchanges that require messages i.e. between
C--    processes.
c      CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
       CALL EXCH_XY_RL ( cg2d_s, myThid )

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
       alpha     = 0. _d 0
       alpha_aux = 0. _d 0
#ifdef ALLOW_AUTODIFF_TAMC
#ifndef ALLOW_LOOP_DIRECTIVE
CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* not ALLOW_LOOP_DIRECTIVE */
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
           alpha_aux = alpha_aux+cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       _GLOBAL_SUM_RL(alpha_aux,myThid)
       alpha = eta_qrN/alpha_aux

C==    Update simultaneously solution and residual vectors (and Iter number)
C      Now compute "interior" points.
       err_sq = 0. _d 0
#ifdef ALLOW_AUTODIFF_TAMC
#ifndef ALLOW_LOOP_DIRECTIVE
CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
#endif /* ALLOW_LOOP_DIRECTIVE */
#endif /* ALLOW_AUTODIFF_TAMC */
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
           cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
           err_sq = err_sq+cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       actualIts = it2d

       _GLOBAL_SUM_RL( err_sq   , myThid )
       IF ( printResidual ) THEN
        IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
         WRITE(msgBuf,'(A,I6,A,1PE21.14)')
     &    ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
         CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                       SQUEEZE_RIGHT, myThid )
        ENDIF
       ENDIF

c      CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
       CALL EXCH_XY_RL ( cg2d_r, myThid )

Cml   end of if "err >= cg2dTolerance" block ; end main solver loop
      ENDIF
      ENDDO

      IF (cg2dNormaliseRHS) THEN
#ifdef ALLOW_AUTODIFF_TAMC
CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte
CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
#endif /* ALLOW_AUTODIFF_TAMC */
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

C--   Return parameters to caller
      IF ( err_sq .NE. 0. ) THEN
        lastResidual = SQRT(err_sq)
      ELSE
        lastResidual = 0.
      ENDIF
      numIters = actualIts

#endif /* ALLOW_CG2D_NSA */
      RETURN
      END

#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
C
C     These routines are routinely part of the TAMC/TAF library that is
C     not included in the MITcgm, therefore they are mimicked here.
C
      subroutine adstore(chardum,int1,idow,int2,int3,icount)

      implicit none

#include "SIZE.h"
#include "tamc.h"

      character*(*) chardum
      integer int1, int2, int3, idow, icount

C     the length of this vector must be greater or equal
C     twice the number of timesteps
      integer nidow
#ifdef ALLOW_TAMC_CHECKPOINTING
      parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
#else
      parameter ( nidow = 1000000 )
#endif /* ALLOW_TAMC_CHECKPOINTING */
      integer istoreidow(nidow)
      common /istorecommon/ istoreidow

      print *, 'adstore: ', chardum, int1, idow, int2, int3, icount

      if ( icount .gt. nidow ) then
         print *, 'adstore: error: icount > nidow = ', nidow
         stop 'ABNORMAL STOP in adstore'
      endif

      istoreidow(icount) = idow

      return
      end

      subroutine adresto(chardum,int1,idow,int2,int3,icount)

      implicit none

#include "SIZE.h"
#include "tamc.h"

      character*(*) chardum
      integer int1, int2, int3, idow, icount


C     the length of this vector must be greater or equal
C     twice the number of timesteps
      integer nidow
#ifdef ALLOW_TAMC_CHECKPOINTING
      parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
#else
      parameter ( nidow = 1000000 )
#endif /* ALLOW_TAMC_CHECKPOINTING */
      integer istoreidow(nidow)
      common /istorecommon/ istoreidow

      print *, 'adresto: ', chardum, int1, idow, int2, int3, icount

      if ( icount .gt. nidow ) then
         print *, 'adstore: error: icount > nidow = ', nidow
         stop 'ABNORMAL STOP in adstore'
      endif

      idow = istoreidow(icount)

      return
      end
#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.3 2009/07/11 21:45:44 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5	#ifdef ALLOW_USE_MPI
6	C HACK to avoid global_max
7	# define ALLOW_CONST_RHSMAX
8	#endif
9
10	CML THIS DOES NOT WORK +++++
11	#undef ALLOW_LOOP_DIRECTIVE
12	CBOP
13	C !ROUTINE: CG2D_NSA
14	C !INTERFACE:
15	SUBROUTINE CG2D_NSA(
16	U cg2d_b, cg2d_x,
17	O firstResidual, minResidualSq, lastResidual,
18	U numIters, nIterMin,
19	I myThid )
20	C !DESCRIPTION: \bv
21	C ==========================================================
22	C \| SUBROUTINE CG2D_NSA
23	C \| o Two-dimensional grid problem conjugate-gradient
24	C \| inverter (with preconditioner).
25	C \| o This version is used only in the case when the matrix
26	C \| operator is not "self-adjoint" (NSA). Any remaining
27	C \| residuals will immediately reported to the department
28	C \| of homeland security.
29	C ==========================================================
30	C \| Con. grad is an iterative procedure for solving Ax = b.
31	C \| It requires the A be symmetric.
32	C \| This implementation assumes A is a five-diagonal
33	C \| matrix of the form that arises in the discrete
34	C \| representation of the del^2 operator in a
35	C \| two-dimensional space.
36	C \| Notes:
37	C \| ======
38	C \| This implementation can support shared-memory
39	C \| multi-threaded execution. In order to do this COMMON
40	C \| blocks are used for many of the arrays - even ones that
41	C \| are only used for intermedaite results. This design is
42	C \| OK if you want to all the threads to collaborate on
43	C \| solving the same problem. On the other hand if you want
44	C \| the threads to solve several different problems
45	C \| concurrently this implementation will not work.
46	C ==========================================================
47	C \ev
48
49	C !USES:
50	IMPLICIT NONE
51	C === Global data ===
52	#include "SIZE.h"
53	#include "EEPARAMS.h"
54	#include "PARAMS.h"
55	#include "CG2D.h"
56	#ifdef ALLOW_AUTODIFF_TAMC
57	# include "tamc.h"
58	# include "tamc_keys.h"
59	#endif
60
61	C !INPUT/OUTPUT PARAMETERS:
62	C === Routine arguments ===
63	C cg2d_b :: The source term or "right hand side" (Output: normalised RHS)
64	C cg2d_x :: The solution (Input: first guess)
65	C firstResidual :: the initial residual before any iterations
66	C minResidualSq :: the lowest residual reached (squared)
67	C lastResidual :: the actual residual reached
68	C numIters :: Inp: the maximum number of iterations allowed
69	C Out: the actual number of iterations used
70	C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
71	C Out: iteration number corresponding to lowest residual
72	C myThid :: Thread on which I am working.
73	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
74	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
75	_RL firstResidual
76	_RL minResidualSq
77	_RL lastResidual
78	INTEGER numIters
79	INTEGER nIterMin
80	INTEGER myThid
81
82	#ifdef ALLOW_CG2D_NSA
83	C !LOCAL VARIABLES:
84	C === Local variables ====
85	C bi, bj :: tile index in X and Y.
86	C i, j, it2d :: Loop counters ( it2d counts CG iterations )
87	C actualIts :: actual CG iteration number
88	C err_sq :: Measure of the square of the residual of Ax - b.
89	C eta_qrN :: Used in computing search directions; suffix N and NM1
90	C eta_qrNM1 denote current and previous iterations respectively.
91	C recip_eta_qrNM1 :: reciprocal of eta_qrNM1
92	C cgBeta :: coeff used to update conjugate direction vector "s".
93	C alpha :: coeff used to update solution & residual
94	C alpha_aux :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
95	C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
96	C debugging/trouble shooting diagnostic. For neumann problems
97	C sumRHS needs to be ~0 or it converge at a non-zero residual.
98	C cg2d_min :: used to store solution corresponding to lowest residual.
99	C msgBuf :: Informational/error message buffer
100	INTEGER bi, bj
101	INTEGER i, j, it2d
102	INTEGER actualIts
103	_RL cg2dTolerance_sq
104	_RL err_sq
105	_RL eta_qrN
106	_RL eta_qrNM1
107	_RL recip_eta_qrNM1
108	_RL cgBeta
109	_RL alpha
110	_RL alpha_aux
111	_RL sumRHS
112	_RL rhsMax, rhsMaxGlobal
113	_RL rhsNorm
114	CHARACTER*(MAX_LEN_MBUF) msgBuf
115	LOGICAL printResidual
116	CEOP
117
118	#ifdef ALLOW_AUTODIFF_TAMC
119	IF ( numIters .GT. numItersMax ) THEN
120	WRITE(standardMessageUnit,'(A,I10)')
121	& 'CG2D_NSA: numIters > numItersMax = ', numItersMax
122	STOP 'NON-NORMAL in CG2D_NSA'
123	ENDIF
124	#endif /* ALLOW_AUTODIFF_TAMC */
125
126	#ifdef ALLOW_AUTODIFF_TAMC
127	act1 = myThid - 1
128	max1 = nTx*nTy
129	act2 = ikey_dynamics - 1
130	ikey = (act1 + 1) + act2*max1
131	#endif /* ALLOW_AUTODIFF_TAMC */
132
133	C-- Initialise auxiliary constant, some output variable and inverter
134	cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
135	minResidualSq = -1. _d 0
136	eta_qrNM1 = 1. _d 0
137	recip_eta_qrNM1= 1. _d 0
138
139	C-- Normalise RHS
140	#ifdef ALLOW_AUTODIFF_TAMC
141	CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
142	#endif /* ALLOW_AUTODIFF_TAMC */
143	rhsMax = 0. _d 0
144	DO bj=myByLo(myThid),myByHi(myThid)
145	DO bi=myBxLo(myThid),myBxHi(myThid)
146	DO j=1,sNy
147	DO i=1,sNx
148	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
149	rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
150	ENDDO
151	ENDDO
152	ENDDO
153	ENDDO
154
155	IF (cg2dNormaliseRHS) THEN
156	C - Normalise RHS :
157	#ifdef ALLOW_CONST_RHSMAX
158	rhsMaxGlobal=1.
159	#else
160	rhsMaxGlobal=rhsMax
161	_GLOBAL_MAX_RL( rhsMaxGlobal, myThid )
162	#endif /* ALLOW_CONST_RHSMAX */
163	#ifdef ALLOW_AUTODIFF_TAMC
164	CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte
165	#endif /* ALLOW_AUTODIFF_TAMC */
166	IF ( rhsMaxGlobal .NE. 0. ) THEN
167	rhsNorm = 1. _d 0 / rhsMaxGlobal
168	ELSE
169	rhsNorm = 1. _d 0
170	ENDIF
171
172	#ifdef ALLOW_AUTODIFF_TAMC
173	CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
174	CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
175	#endif /* ALLOW_AUTODIFF_TAMC */
176	DO bj=myByLo(myThid),myByHi(myThid)
177	DO bi=myBxLo(myThid),myBxHi(myThid)
178	DO j=1,sNy
179	DO i=1,sNx
180	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
181	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
182	ENDDO
183	ENDDO
184	ENDDO
185	ENDDO
186	C- end Normalise RHS
187	ENDIF
188
189	C-- Update overlaps
190	CALL EXCH_XY_RL( cg2d_x, myThid )
191
192	C-- Initial residual calculation
193	err_sq = 0. _d 0
194	sumRHS = 0. _d 0
195	#ifdef ALLOW_AUTODIFF_TAMC
196	CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
197	CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
198	#endif /* ALLOW_AUTODIFF_TAMC */
199	DO bj=myByLo(myThid),myByHi(myThid)
200	DO bi=myBxLo(myThid),myBxHi(myThid)
201	DO j=0,sNy+1
202	DO i=0,sNx+1
203	cg2d_s(i,j,bi,bj) = 0.
204	ENDDO
205	ENDDO
206	DO j=1,sNy
207	DO i=1,sNx
208	cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
209	& (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
210	& +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
211	& +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
212	& +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
213	& +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
214	& )
215	err_sq = err_sq +
216	& cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
217	sumRHS = sumRHS +
218	& cg2d_b(i,j,bi,bj)
219	ENDDO
220	ENDDO
221	ENDDO
222	ENDDO
223
224	c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
225	CALL EXCH_XY_RL ( cg2d_r, myThid )
226	_GLOBAL_SUM_RL( sumRHS, myThid )
227	_GLOBAL_SUM_RL( err_sq, myThid )
228	actualIts = 0
229	IF ( err_sq .NE. 0. ) THEN
230	firstResidual = SQRT(err_sq)
231	ELSE
232	firstResidual = 0.
233	ENDIF
234
235	printResidual = .FALSE.
236	IF ( debugLevel .GE. debLevZero ) THEN
237	_BEGIN_MASTER( myThid )
238	printResidual = printResidualFreq.GE.1
239	WRITE(standardmessageunit,'(A,1P2E22.14)')
240	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal
241	_END_MASTER( myThid )
242	ENDIF
243
244	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
245	Cml begin main solver loop
246	#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
247	CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter
248	#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
249	DO it2d=1, numIters
250	#ifdef ALLOW_LOOP_DIRECTIVE
251	CML it2d = 0
252	CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters )
253	CML it2d = it2d+1
254	#endif /* ALLOW_LOOP_DIRECTIVE */
255
256	#ifdef ALLOW_AUTODIFF_TAMC
257	icg2dkey = (ikey-1)*numItersMax + it2d
258	CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
259	#endif /* ALLOW_AUTODIFF_TAMC */
260	IF ( err_sq .GE. cg2dTolerance_sq ) THEN
261
262	C-- Solve preconditioning equation and update
263	C-- conjugate direction vector "s".
264	eta_qrN = 0. _d 0
265	#ifdef ALLOW_AUTODIFF_TAMC
266	CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
267	CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
268	#endif /* ALLOW_AUTODIFF_TAMC */
269	DO bj=myByLo(myThid),myByHi(myThid)
270	DO bi=myBxLo(myThid),myBxHi(myThid)
271	DO j=1,sNy
272	DO i=1,sNx
273	cg2d_z(i,j,bi,bj) =
274	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
275	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
276	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
277	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
278	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
279	eta_qrN = eta_qrN
280	& +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
281	ENDDO
282	ENDDO
283	ENDDO
284	ENDDO
285
286	_GLOBAL_SUM_RL(eta_qrN, myThid)
287	#ifdef ALLOW_AUTODIFF_TAMC
288	CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
289	CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
290	#endif /* ALLOW_AUTODIFF_TAMC */
291	CML cgBeta = eta_qrN/eta_qrNM1
292	cgBeta = eta_qrN*recip_eta_qrNM1
293	Cml store normalisation factor for the next interation
294	Cml (in case there is one).
295	CML store the inverse of the normalization factor for higher precision
296	CML eta_qrNM1 = eta_qrN
297	recip_eta_qrNM1 = 1./eta_qrN
298
299	DO bj=myByLo(myThid),myByHi(myThid)
300	DO bi=myBxLo(myThid),myBxHi(myThid)
301	DO j=1,sNy
302	DO i=1,sNx
303	cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj)
304	& + cgBeta*cg2d_s(i,j,bi,bj)
305	ENDDO
306	ENDDO
307	ENDDO
308	ENDDO
309
310	C-- Do exchanges that require messages i.e. between
311	C-- processes.
312	c CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
313	CALL EXCH_XY_RL ( cg2d_s, myThid )
314
315	C== Evaluate laplace operator on conjugate gradient vector
316	C== q = A.s
317	alpha = 0. _d 0
318	alpha_aux = 0. _d 0
319	#ifdef ALLOW_AUTODIFF_TAMC
320	#ifndef ALLOW_LOOP_DIRECTIVE
321	CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
322	#endif /* not ALLOW_LOOP_DIRECTIVE */
323	#endif /* ALLOW_AUTODIFF_TAMC */
324	DO bj=myByLo(myThid),myByHi(myThid)
325	DO bi=myBxLo(myThid),myBxHi(myThid)
326	DO j=1,sNy
327	DO i=1,sNx
328	cg2d_q(i,j,bi,bj) =
329	& aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
330	& +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
331	& +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
332	& +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
333	& +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
334	alpha_aux = alpha_aux+cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
335	ENDDO
336	ENDDO
337	ENDDO
338	ENDDO
339	_GLOBAL_SUM_RL(alpha_aux,myThid)
340	alpha = eta_qrN/alpha_aux
341
342	C== Update simultaneously solution and residual vectors (and Iter number)
343	C Now compute "interior" points.
344	err_sq = 0. _d 0
345	#ifdef ALLOW_AUTODIFF_TAMC
346	#ifndef ALLOW_LOOP_DIRECTIVE
347	CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
348	#endif /* ALLOW_LOOP_DIRECTIVE */
349	#endif /* ALLOW_AUTODIFF_TAMC */
350	DO bj=myByLo(myThid),myByHi(myThid)
351	DO bi=myBxLo(myThid),myBxHi(myThid)
352	DO j=1,sNy
353	DO i=1,sNx
354	cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
355	cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
356	err_sq = err_sq+cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
357	ENDDO
358	ENDDO
359	ENDDO
360	ENDDO
361	actualIts = it2d
362
363	_GLOBAL_SUM_RL( err_sq , myThid )
364	IF ( printResidual ) THEN
365	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
366	WRITE(msgBuf,'(A,I6,A,1PE21.14)')
367	& ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
368	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
369	& SQUEEZE_RIGHT, myThid )
370	ENDIF
371	ENDIF
372
373	c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
374	CALL EXCH_XY_RL ( cg2d_r, myThid )
375
376	Cml end of if "err >= cg2dTolerance" block ; end main solver loop
377	ENDIF
378	ENDDO
379
380	IF (cg2dNormaliseRHS) THEN
381	#ifdef ALLOW_AUTODIFF_TAMC
382	CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte
383	CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
384	#endif /* ALLOW_AUTODIFF_TAMC */
385	C-- Un-normalise the answer
386	DO bj=myByLo(myThid),myByHi(myThid)
387	DO bi=myBxLo(myThid),myBxHi(myThid)
388	DO j=1,sNy
389	DO i=1,sNx
390	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
391	ENDDO
392	ENDDO
393	ENDDO
394	ENDDO
395	ENDIF
396
397	C-- Return parameters to caller
398	IF ( err_sq .NE. 0. ) THEN
399	lastResidual = SQRT(err_sq)
400	ELSE
401	lastResidual = 0.
402	ENDIF
403	numIters = actualIts
404
405	#endif /* ALLOW_CG2D_NSA */
406	RETURN
407	END
408
409	#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
410	C
411	C These routines are routinely part of the TAMC/TAF library that is
412	C not included in the MITcgm, therefore they are mimicked here.
413	C
414	subroutine adstore(chardum,int1,idow,int2,int3,icount)
415
416	implicit none
417
418	#include "SIZE.h"
419	#include "tamc.h"
420
421	character() chardum
422	integer int1, int2, int3, idow, icount
423
424	C the length of this vector must be greater or equal
425	C twice the number of timesteps
426	integer nidow
427	#ifdef ALLOW_TAMC_CHECKPOINTING
428	parameter ( nidow = 2nchklev_1nchklev_2*nchklev_3 )
429	#else
430	parameter ( nidow = 1000000 )
431	#endif /* ALLOW_TAMC_CHECKPOINTING */
432	integer istoreidow(nidow)
433	common /istorecommon/ istoreidow
434
435	print *, 'adstore: ', chardum, int1, idow, int2, int3, icount
436
437	if ( icount .gt. nidow ) then
438	print *, 'adstore: error: icount > nidow = ', nidow
439	stop 'ABNORMAL STOP in adstore'
440	endif
441
442	istoreidow(icount) = idow
443
444	return
445	end
446
447	subroutine adresto(chardum,int1,idow,int2,int3,icount)
448
449	implicit none
450
451	#include "SIZE.h"
452	#include "tamc.h"
453
454	character() chardum
455	integer int1, int2, int3, idow, icount
456
457
458	C the length of this vector must be greater or equal
459	C twice the number of timesteps
460	integer nidow
461	#ifdef ALLOW_TAMC_CHECKPOINTING
462	parameter ( nidow = 2nchklev_1nchklev_2*nchklev_3 )
463	#else
464	parameter ( nidow = 1000000 )
465	#endif /* ALLOW_TAMC_CHECKPOINTING */
466	integer istoreidow(nidow)
467	common /istorecommon/ istoreidow
468
469	print *, 'adresto: ', chardum, int1, idow, int2, int3, icount
470
471	if ( icount .gt. nidow ) then
472	print *, 'adstore: error: icount > nidow = ', nidow
473	stop 'ABNORMAL STOP in adstore'
474	endif
475
476	idow = istoreidow(icount)
477
478	return
479	end
480	#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */