model/src/cg2d_sr.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.5 2011/12/22 19:00:58 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef TARGET_NEC_SX
C     set a sensible default for the outer loop unrolling parameter that can
C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
#ifndef CG2D_OUTERLOOPITERS
# define CG2D_OUTERLOOPITERS 10
#endif
#endif /* TARGET_NEC_SX */

CBOP
C     !ROUTINE: CG2D_SR
C     !INTERFACE:
      SUBROUTINE CG2D_SR(
     U                cg2d_b, cg2d_x,
     O                firstResidual, minResidualSq, lastResidual,
     U                numIters, nIterMin,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal
C     | matrix of the form that arises in the discrete
C     | representation of the del^2 operator in a
C     | two-dimensional space.
C     | Notes:
C     | ======
C     | This implementation can support shared-memory
C     | multi-threaded execution. In order to do this COMMON
C     | blocks are used for many of the arrays - even ones that
C     | are only used for intermedaite results. This design is
C     | OK if you want to all the threads to collaborate on
C     | solving the same problem. On the other hand if you want
C     | the threads to solve several different problems
C     | concurrently this implementation will not work.
C     |
C     | This version implements the single-reduction CG algorithm of
C     | d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
C     | C. Wolfe, November 2009, clwolfe@ucsd.edu
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     cg2d_b    :: The source term or "right hand side" (output: normalised RHS)
C     cg2d_x    :: The solution (input: first guess)
C     firstResidual :: the initial residual before any iterations
C     minResidualSq :: the lowest residual reached (squared)
C     lastResidual  :: the actual residual reached
C     numIters  :: Inp: the maximum number of iterations allowed
C                  Out: the actual number of iterations used
C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
C                  Out: iteration number corresponding to lowest residual
C     myThid    :: Thread on which I am working.
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  minResidualSq
      _RL  lastResidual
      INTEGER numIters
      INTEGER nIterMin
      INTEGER myThid

#ifdef ALLOW_SRCG

C     !LOCAL VARIABLES:
C     === Local variables ====
C     bi, bj     :: tile index in X and Y.
C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
C     actualIts  :: actual CG iteration number
C     err_sq     :: Measure of the square of the residual of Ax - b.
C     eta_qrN    :: Used in computing search directions; suffix N and NM1
C     eta_qrNM1     denote current and previous iterations respectively.
C     cgBeta     :: coeff used to update conjugate direction vector "s".
C     alpha      :: coeff used to update solution & residual
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
C                   debugging/trouble shooting diagnostic. For neumann problems
C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
C     cg2d_min   :: used to store solution corresponding to lowest residual.
C     msgBuf     :: Informational/error message buffer
      INTEGER bi, bj
      INTEGER i, j, it2d
c     INTEGER actualIts
      _RL    cg2dTolerance_sq
      _RL    err_sq,  errTile(nSx,nSy)
      _RL    eta_qrN, eta_qrNtile(nSx,nSy)
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha,  alphaTile(nSx,nSy)
      _RL    sigma
      _RL    delta,  deltaTile(nSx,nSy)
      _RL    sumRHS, sumRHStile(nSx,nSy)
      _RL    rhsMax
      _RL    rhsNorm
      _RL    cg2d_min(1:sNx,1:sNy,nSx,nSy)
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP

C--   Initialise auxiliary constant, some output variable and inverter
      cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
      minResidualSq = -1. _d 0
      eta_qrNM1     =  1. _d 0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      IF (cg2dNormaliseRHS) THEN
C-  Normalise RHS :
      _GLOBAL_MAX_RL( rhsMax, myThid )
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
          cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C- end Normalise RHS
      ENDIF

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        IF ( nIterMin.GE.0 ) THEN
         DO j=1,sNy
          DO i=1,sNx
            cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDIF
        DO j=0,sNy+1
         DO i=0,sNx+1
          cg2d_s(i,j,bi,bj) = 0.
         ENDDO
        ENDDO
        sumRHStile(bi,bj) = 0. _d 0
        errTile(bi,bj)    = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO j=1,sNy
         DO i=1,sNx
          cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
     &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
     &    )
          errTile(bi,bj)    = errTile(bi,bj)
     &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL EXCH_S3D_RL( cg2d_r, 1,  myThid )

      CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
      CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )

      it2d      = 0
c     actualIts = 0
      firstResidual = SQRT(err_sq)
      IF ( nIterMin.GE.0 ) THEN
        nIterMin = 0
        minResidualSq = err_sq
      ENDIF

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

      IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11

C DER (1999) do one iteration outside of the loop to start things up.
C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        eta_qrNtile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO j=1,sNy
         DO i=1,sNx
           cg2d_y(i,j,bi,bj) =
     &       pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &      +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &      +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &      +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &      +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
           cg2d_s(i,j,bi,bj)  = cg2d_y(i,j,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &      +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
      CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )

      eta_qrNM1 = eta_qrN

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        alphaTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO j=1,sNy
         DO i=1,sNx
          cg2d_q(i,j,bi,bj) =
     &      aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
     &     +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
     &     +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
     &     +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
     &     +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
          alphaTile(bi,bj) = alphaTile(bi,bj)
     &                     + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )

      sigma = eta_qrN/alpha

C==   Update simultaneously solution and residual vectors (and Iter number)
C     Now compute "interior" points.
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+sigma*cg2d_s(i,j,bi,bj)
          cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-sigma*cg2d_q(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO
c     actualIts = 1

      CALL EXCH_S3D_RL( cg2d_r,1, myThid )

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it2d=1, numIters-1
c      actualIts = it2d

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
C--    z = M^-1 r
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_y(i,j,bi,bj) =
     &       pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &      +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &      +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &      +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &      +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       CALL EXCH_S3D_RL( cg2d_y, 1, myThid )

C==    v = A.z
C--    eta_qr = <z,r>
C--    delta = <z,v>
C--    Do the error calcuation here to consolidate global reductions
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile(bi,bj) = 0. _d 0
         deltaTile(bi,bj)   = 0. _d 0
         errTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_v(i,j,bi,bj) =
     &       aW2d(i  ,j  ,bi,bj)*cg2d_y(i-1,j  ,bi,bj)
     &      +aW2d(i+1,j  ,bi,bj)*cg2d_y(i+1,j  ,bi,bj)
     &      +aS2d(i  ,j  ,bi,bj)*cg2d_y(i  ,j-1,bi,bj)
     &      +aS2d(i  ,j+1,bi,bj)*cg2d_y(i  ,j+1,bi,bj)
     &      +aC2d(i  ,j  ,bi,bj)*cg2d_y(i  ,j  ,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &      +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
           deltaTile(bi,bj) = deltaTile(bi,bj)
     &      +cg2d_y(i,j,bi,bj)*cg2d_v(i,j,bi,bj)
           errTile(bi,bj) = errTile(bi,bj)
     &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
         sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
         sumPhi(2,bi,bj) = deltaTile(bi,bj)
         sumPhi(3,bi,bj) = errTile(bi,bj)
        ENDDO
       ENDDO

C      CALL GLOBAL_SUM_TILE_RL( eta_qrNtile, eta_qrN,myThid )
C      CALL GLOBAL_SUM_TILE_RL( deltaTile, delta, myThid )
C      CALL GLOBAL_SUM_TILE_RL( errTile,  err_sq, myThid )
       CALL GLOBAL_VEC_SUM_R8( 3, 3, sumPhi, myThid )

       eta_qrN = sumPhi(1,1,1)
       delta   = sumPhi(2,1,1)
       err_sq  = sumPhi(3,1,1)

       IF ( printResidual ) THEN
        IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
         WRITE(msgBuf,'(A,I6,A,1PE21.14)')
     &    ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
         CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                       SQUEEZE_RIGHT, myThid )
        ENDIF
       ENDIF
       IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
       IF ( err_sq .LT. minResidualSq ) THEN
C-     Store lowest residual solution
         minResidualSq = err_sq
         nIterMin = it2d
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
           DO j=1,sNy
            DO i=1,sNx
             cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDDO
       ENDIF

       cgBeta   = eta_qrN/eta_qrNM1
       eta_qrNM1 = eta_qrN
       alpha = delta - (cgBeta*cgBeta)*alpha
       sigma = eta_qrN/alpha

C==    Update simultaneously solution and residual vectors (and Iter number)
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj)
     &                       + cgBeta*cg2d_s(i,j,bi,bj)
           cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
     &                       + sigma*cg2d_s(i,j,bi,bj)
           cg2d_q(i,j,bi,bj) = cg2d_v(i,j,bi,bj)
     &                       + cgBeta*cg2d_q(i,j,bi,bj)
           cg2d_r(i,j,bi,bj) = cg2d_r(i,j,bi,bj)
     &                       - sigma*cg2d_q(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
c      actualIts = it2d + 1

       CALL EXCH_S3D_RL( cg2d_r, 1, myThid )

   10 CONTINUE
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile(bi,bj) = 0. _d 0
        DO j=1,sNy
         DO i=1,sNx
          errTile(bi,bj) = errTile(bi,bj)
     &                   + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO
      CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
   11 CONTINUE

      IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
C-    use the lowest residual solution (instead of current one = last residual)
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
             cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

      IF (cg2dNormaliseRHS) THEN
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

C--   Return parameters to caller
      lastResidual = SQRT(err_sq)
      numIters = it2d
c     numIters = actualIts

#endif /* ALLOW_SRCG */
      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.5 2011/12/22 19:00:58 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5	#ifdef TARGET_NEC_SX
6	C set a sensible default for the outer loop unrolling parameter that can
7	C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8	#ifndef CG2D_OUTERLOOPITERS
9	# define CG2D_OUTERLOOPITERS 10
10	#endif
11	#endif /* TARGET_NEC_SX */
12
13	CBOP
14	C !ROUTINE: CG2D_SR
15	C !INTERFACE:
16	SUBROUTINE CG2D_SR(
17	U cg2d_b, cg2d_x,
18	O firstResidual, minResidualSq, lastResidual,
19	U numIters, nIterMin,
20	I myThid )
21	C !DESCRIPTION: \bv
22	C ==========================================================
23	C \| SUBROUTINE CG2D
24	C \| o Two-dimensional grid problem conjugate-gradient
25	C \| inverter (with preconditioner).
26	C ==========================================================
27	C \| Con. grad is an iterative procedure for solving Ax = b.
28	C \| It requires the A be symmetric.
29	C \| This implementation assumes A is a five-diagonal
30	C \| matrix of the form that arises in the discrete
31	C \| representation of the del^2 operator in a
32	C \| two-dimensional space.
33	C \| Notes:
34	C \| ======
35	C \| This implementation can support shared-memory
36	C \| multi-threaded execution. In order to do this COMMON
37	C \| blocks are used for many of the arrays - even ones that
38	C \| are only used for intermedaite results. This design is
39	C \| OK if you want to all the threads to collaborate on
40	C \| solving the same problem. On the other hand if you want
41	C \| the threads to solve several different problems
42	C \| concurrently this implementation will not work.
43	C \|
44	C \| This version implements the single-reduction CG algorithm of
45	C \| d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
46	C \| C. Wolfe, November 2009, clwolfe@ucsd.edu
47	C ==========================================================
48	C \ev
49
50	C !USES:
51	IMPLICIT NONE
52	C === Global data ===
53	#include "SIZE.h"
54	#include "EEPARAMS.h"
55	#include "PARAMS.h"
56	#include "CG2D.h"
57
58	C !INPUT/OUTPUT PARAMETERS:
59	C === Routine arguments ===
60	C cg2d_b :: The source term or "right hand side" (output: normalised RHS)
61	C cg2d_x :: The solution (input: first guess)
62	C firstResidual :: the initial residual before any iterations
63	C minResidualSq :: the lowest residual reached (squared)
64	C lastResidual :: the actual residual reached
65	C numIters :: Inp: the maximum number of iterations allowed
66	C Out: the actual number of iterations used
67	C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
68	C Out: iteration number corresponding to lowest residual
69	C myThid :: Thread on which I am working.
70	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
71	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
72	_RL firstResidual
73	_RL minResidualSq
74	_RL lastResidual
75	INTEGER numIters
76	INTEGER nIterMin
77	INTEGER myThid
78
79	#ifdef ALLOW_SRCG
80
81	C !LOCAL VARIABLES:
82	C === Local variables ====
83	C bi, bj :: tile index in X and Y.
84	C i, j, it2d :: Loop counters ( it2d counts CG iterations )
85	C actualIts :: actual CG iteration number
86	C err_sq :: Measure of the square of the residual of Ax - b.
87	C eta_qrN :: Used in computing search directions; suffix N and NM1
88	C eta_qrNM1 denote current and previous iterations respectively.
89	C cgBeta :: coeff used to update conjugate direction vector "s".
90	C alpha :: coeff used to update solution & residual
91	C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
92	C debugging/trouble shooting diagnostic. For neumann problems
93	C sumRHS needs to be ~0 or it converge at a non-zero residual.
94	C cg2d_min :: used to store solution corresponding to lowest residual.
95	C msgBuf :: Informational/error message buffer
96	INTEGER bi, bj
97	INTEGER i, j, it2d
98	c INTEGER actualIts
99	_RL cg2dTolerance_sq
100	_RL err_sq, errTile(nSx,nSy)
101	_RL eta_qrN, eta_qrNtile(nSx,nSy)
102	_RL eta_qrNM1
103	_RL cgBeta
104	_RL alpha, alphaTile(nSx,nSy)
105	_RL sigma
106	_RL delta, deltaTile(nSx,nSy)
107	_RL sumRHS, sumRHStile(nSx,nSy)
108	_RL rhsMax
109	_RL rhsNorm
110	_RL cg2d_min(1:sNx,1:sNy,nSx,nSy)
111	CHARACTER*(MAX_LEN_MBUF) msgBuf
112	LOGICAL printResidual
113	CEOP
114
115	C-- Initialise auxiliary constant, some output variable and inverter
116	cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
117	minResidualSq = -1. _d 0
118	eta_qrNM1 = 1. _d 0
119
120	C-- Normalise RHS
121	rhsMax = 0. _d 0
122	DO bj=myByLo(myThid),myByHi(myThid)
123	DO bi=myBxLo(myThid),myBxHi(myThid)
124	DO j=1,sNy
125	DO i=1,sNx
126	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
127	rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
128	ENDDO
129	ENDDO
130	ENDDO
131	ENDDO
132
133	IF (cg2dNormaliseRHS) THEN
134	C- Normalise RHS :
135	_GLOBAL_MAX_RL( rhsMax, myThid )
136	rhsNorm = 1. _d 0
137	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
138	DO bj=myByLo(myThid),myByHi(myThid)
139	DO bi=myBxLo(myThid),myBxHi(myThid)
140	DO j=1,sNy
141	DO i=1,sNx
142	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
143	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
144	ENDDO
145	ENDDO
146	ENDDO
147	ENDDO
148	C- end Normalise RHS
149	ENDIF
150
151	C-- Update overlaps
152	CALL EXCH_XY_RL( cg2d_x, myThid )
153
154	C-- Initial residual calculation
155	DO bj=myByLo(myThid),myByHi(myThid)
156	DO bi=myBxLo(myThid),myBxHi(myThid)
157	IF ( nIterMin.GE.0 ) THEN
158	DO j=1,sNy
159	DO i=1,sNx
160	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
161	ENDDO
162	ENDDO
163	ENDIF
164	DO j=0,sNy+1
165	DO i=0,sNx+1
166	cg2d_s(i,j,bi,bj) = 0.
167	ENDDO
168	ENDDO
169	sumRHStile(bi,bj) = 0. _d 0
170	errTile(bi,bj) = 0. _d 0
171	#ifdef TARGET_NEC_SX
172	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
173	#endif /* TARGET_NEC_SX */
174	DO j=1,sNy
175	DO i=1,sNx
176	cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
177	& (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
178	& +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
179	& +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
180	& +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
181	& +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
182	& )
183	errTile(bi,bj) = errTile(bi,bj)
184	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
185	sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
186	ENDDO
187	ENDDO
188	ENDDO
189	ENDDO
190
191	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
192
193	CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
194	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
195
196	it2d = 0
197	c actualIts = 0
198	firstResidual = SQRT(err_sq)
199	IF ( nIterMin.GE.0 ) THEN
200	nIterMin = 0
201	minResidualSq = err_sq
202	ENDIF
203
204	printResidual = .FALSE.
205	IF ( debugLevel .GE. debLevZero ) THEN
206	_BEGIN_MASTER( myThid )
207	printResidual = printResidualFreq.GE.1
208	WRITE(standardmessageunit,'(A,1P2E22.14)')
209	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
210	_END_MASTER( myThid )
211	ENDIF
212
213	IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
214
215	C DER (1999) do one iteration outside of the loop to start things up.
216	C-- Solve preconditioning equation and update
217	C-- conjugate direction vector "s".
218	DO bj=myByLo(myThid),myByHi(myThid)
219	DO bi=myBxLo(myThid),myBxHi(myThid)
220	eta_qrNtile(bi,bj) = 0. _d 0
221	#ifdef TARGET_NEC_SX
222	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
223	#endif /* TARGET_NEC_SX */
224	DO j=1,sNy
225	DO i=1,sNx
226	cg2d_y(i,j,bi,bj) =
227	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
228	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
229	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
230	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
231	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
232	cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj)
233	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
234	& +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
235	ENDDO
236	ENDDO
237	ENDDO
238	ENDDO
239
240	CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
241	CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
242
243	eta_qrNM1 = eta_qrN
244
245	C== Evaluate laplace operator on conjugate gradient vector
246	C== q = A.s
247	DO bj=myByLo(myThid),myByHi(myThid)
248	DO bi=myBxLo(myThid),myBxHi(myThid)
249	alphaTile(bi,bj) = 0. _d 0
250	#ifdef TARGET_NEC_SX
251	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
252	#endif /* TARGET_NEC_SX */
253	DO j=1,sNy
254	DO i=1,sNx
255	cg2d_q(i,j,bi,bj) =
256	& aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
257	& +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
258	& +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
259	& +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
260	& +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
261	alphaTile(bi,bj) = alphaTile(bi,bj)
262	& + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
263	ENDDO
264	ENDDO
265	ENDDO
266	ENDDO
267
268	CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
269
270	sigma = eta_qrN/alpha
271
272	C== Update simultaneously solution and residual vectors (and Iter number)
273	C Now compute "interior" points.
274	DO bj=myByLo(myThid),myByHi(myThid)
275	DO bi=myBxLo(myThid),myBxHi(myThid)
276	DO j=1,sNy
277	DO i=1,sNx
278	cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+sigma*cg2d_s(i,j,bi,bj)
279	cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-sigma*cg2d_q(i,j,bi,bj)
280	ENDDO
281	ENDDO
282	ENDDO
283	ENDDO
284	c actualIts = 1
285
286	CALL EXCH_S3D_RL( cg2d_r,1, myThid )
287
288	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
289	DO 10 it2d=1, numIters-1
290	c actualIts = it2d
291
292	C-- Solve preconditioning equation and update
293	C-- conjugate direction vector "s".
294	C-- z = M^-1 r
295	DO bj=myByLo(myThid),myByHi(myThid)
296	DO bi=myBxLo(myThid),myBxHi(myThid)
297	#ifdef TARGET_NEC_SX
298	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
299	#endif /* TARGET_NEC_SX */
300	DO j=1,sNy
301	DO i=1,sNx
302	cg2d_y(i,j,bi,bj) =
303	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
304	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
305	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
306	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
307	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
308	ENDDO
309	ENDDO
310	ENDDO
311	ENDDO
312
313	CALL EXCH_S3D_RL( cg2d_y, 1, myThid )
314
315	C== v = A.z
316	C-- eta_qr = <z,r>
317	C-- delta = <z,v>
318	C-- Do the error calcuation here to consolidate global reductions
319	DO bj=myByLo(myThid),myByHi(myThid)
320	DO bi=myBxLo(myThid),myBxHi(myThid)
321	eta_qrNtile(bi,bj) = 0. _d 0
322	deltaTile(bi,bj) = 0. _d 0
323	errTile(bi,bj) = 0. _d 0
324	#ifdef TARGET_NEC_SX
325	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
326	#endif /* TARGET_NEC_SX */
327	DO j=1,sNy
328	DO i=1,sNx
329	cg2d_v(i,j,bi,bj) =
330	& aW2d(i ,j ,bi,bj)*cg2d_y(i-1,j ,bi,bj)
331	& +aW2d(i+1,j ,bi,bj)*cg2d_y(i+1,j ,bi,bj)
332	& +aS2d(i ,j ,bi,bj)*cg2d_y(i ,j-1,bi,bj)
333	& +aS2d(i ,j+1,bi,bj)*cg2d_y(i ,j+1,bi,bj)
334	& +aC2d(i ,j ,bi,bj)*cg2d_y(i ,j ,bi,bj)
335	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
336	& +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
337	deltaTile(bi,bj) = deltaTile(bi,bj)
338	& +cg2d_y(i,j,bi,bj)*cg2d_v(i,j,bi,bj)
339	errTile(bi,bj) = errTile(bi,bj)
340	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
341	ENDDO
342	ENDDO
343	sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
344	sumPhi(2,bi,bj) = deltaTile(bi,bj)
345	sumPhi(3,bi,bj) = errTile(bi,bj)
346	ENDDO
347	ENDDO
348
349	C CALL GLOBAL_SUM_TILE_RL( eta_qrNtile, eta_qrN,myThid )
350	C CALL GLOBAL_SUM_TILE_RL( deltaTile, delta, myThid )
351	C CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
352	CALL GLOBAL_VEC_SUM_R8( 3, 3, sumPhi, myThid )
353
354	eta_qrN = sumPhi(1,1,1)
355	delta = sumPhi(2,1,1)
356	err_sq = sumPhi(3,1,1)
357
358	IF ( printResidual ) THEN
359	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
360	WRITE(msgBuf,'(A,I6,A,1PE21.14)')
361	& ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
362	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
363	& SQUEEZE_RIGHT, myThid )
364	ENDIF
365	ENDIF
366	IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
367	IF ( err_sq .LT. minResidualSq ) THEN
368	C- Store lowest residual solution
369	minResidualSq = err_sq
370	nIterMin = it2d
371	DO bj=myByLo(myThid),myByHi(myThid)
372	DO bi=myBxLo(myThid),myBxHi(myThid)
373	DO j=1,sNy
374	DO i=1,sNx
375	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
376	ENDDO
377	ENDDO
378	ENDDO
379	ENDDO
380	ENDIF
381
382	cgBeta = eta_qrN/eta_qrNM1
383	eta_qrNM1 = eta_qrN
384	alpha = delta - (cgBetacgBeta)alpha
385	sigma = eta_qrN/alpha
386
387	C== Update simultaneously solution and residual vectors (and Iter number)
388	DO bj=myByLo(myThid),myByHi(myThid)
389	DO bi=myBxLo(myThid),myBxHi(myThid)
390	DO j=1,sNy
391	DO i=1,sNx
392	cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj)
393	& + cgBeta*cg2d_s(i,j,bi,bj)
394	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
395	& + sigma*cg2d_s(i,j,bi,bj)
396	cg2d_q(i,j,bi,bj) = cg2d_v(i,j,bi,bj)
397	& + cgBeta*cg2d_q(i,j,bi,bj)
398	cg2d_r(i,j,bi,bj) = cg2d_r(i,j,bi,bj)
399	& - sigma*cg2d_q(i,j,bi,bj)
400	ENDDO
401	ENDDO
402	ENDDO
403	ENDDO
404	c actualIts = it2d + 1
405
406	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
407
408	10 CONTINUE
409	DO bj=myByLo(myThid),myByHi(myThid)
410	DO bi=myBxLo(myThid),myBxHi(myThid)
411	errTile(bi,bj) = 0. _d 0
412	DO j=1,sNy
413	DO i=1,sNx
414	errTile(bi,bj) = errTile(bi,bj)
415	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
416	ENDDO
417	ENDDO
418	ENDDO
419	ENDDO
420	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
421	11 CONTINUE
422
423	IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
424	C- use the lowest residual solution (instead of current one = last residual)
425	DO bj=myByLo(myThid),myByHi(myThid)
426	DO bi=myBxLo(myThid),myBxHi(myThid)
427	DO j=1,sNy
428	DO i=1,sNx
429	cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
430	ENDDO
431	ENDDO
432	ENDDO
433	ENDDO
434	ENDIF
435
436	IF (cg2dNormaliseRHS) THEN
437	C-- Un-normalise the answer
438	DO bj=myByLo(myThid),myByHi(myThid)
439	DO bi=myBxLo(myThid),myBxHi(myThid)
440	DO j=1,sNy
441	DO i=1,sNx
442	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
443	ENDDO
444	ENDDO
445	ENDDO
446	ENDDO
447	ENDIF
448
449	C-- Return parameters to caller
450	lastResidual = SQRT(err_sq)
451	numIters = it2d
452	c numIters = actualIts
453
454	#endif /* ALLOW_SRCG */
455	RETURN
456	END