model/src/cg3d.F

C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.16 2005/11/08 06:18:10 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"

CBOP
C     !ROUTINE: CG3D
C     !INTERFACE:
      SUBROUTINE CG3D(  
     I                cg3d_b,
     U                cg3d_x,
     O                firstResidual,
     O                lastResidual,
     U                numIters,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG3D                                           
C     | o Three-dimensional grid problem conjugate-gradient       
C     |   inverter (with preconditioner).                         
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.   
C     | It requires the A be symmetric.                           
C     | This implementation assumes A is a seven-diagonal          
C     | matrix of the form that arises in the discrete            
C     | representation of the del^2 operator in a                 
C     | three-dimensional space.                                    
C     | Notes:                                                    
C     | ======                                                    
C     | This implementation can support shared-memory              
C     | multi-threaded execution. In order to do this COMMON       
C     | blocks are used for many of the arrays - even ones that    
C     | are only used for intermedaite results. This design is     
C     | OK if you want to all the threads to collaborate on        
C     | solving the same problem. On the other hand if you want    
C     | the threads to solve several different problems            
C     | concurrently this implementation will not work.           
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#include "CG3D.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     myThid    - Thread on which I am working.
C     cg3d_b    - The source term or "right hand side"
C     cg3d_x    - The solution
C     firstResidual - the initial residual before any iterations
C     lastResidual  - the actual residual reached
C     numIters  - Entry: the maximum number of iterations allowed
C                 Exit:  the actual number of iterations used
      _RL  cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
      _RL  cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
      _RL  firstResidual
      _RL  lastResidual
      INTEGER numIters
      INTEGER myThid


#ifdef ALLOW_NONHYDROSTATIC

C     !LOCAL VARIABLES:
C     === Local variables ====
C     actualIts      - Number of iterations taken
C     actualResidual - residual
C     bi          - Block index in X and Y.
C     bj
C     eta_qrN     - Used in computing search directions
C     eta_qrNM1     suffix N and NM1 denote current and
C     cgBeta        previous iterations respectively.
C     alpha  
C     sumRHS      - Sum of right-hand-side. Sometimes this is a
C                   useful debuggin/trouble shooting diagnostic.
C                   For neumann problems sumRHS needs to be ~0.
C                   or they converge at a non-zero residual.
C     err         - Measure of residual of Ax - b, usually the norm.
C     I, J, K, N  - Loop counters ( N counts CG iterations )
      INTEGER actualIts
      _RL    actualResidual
      INTEGER bi, bj              
      INTEGER I, J, K, it3d
      INTEGER Km1, Kp1
      _RL    maskM1, maskP1
      _RL    err, errTile
      _RL    eta_qrN, eta_qrNtile
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha , alphaTile
      _RL    sumRHS, sumRHStile
      _RL    rhsMax
      _RL    rhsNorm
CEOP


C--   Initialise inverter
      eta_qrNM1 = 1. D0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*cg3dNorm
     &                          * maskC(I,J,K,bi,bj)
           rhsMax = MAX(ABS(cg3d_b(I,J,K,bi,bj)),rhsMax)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO
      _GLOBAL_MAX_R8( rhsMax, myThid )
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*rhsNorm
           cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)*rhsNorm
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO

C--   Update overlaps
c     _EXCH_XYZ_R8( cg3d_b, myThid )
      _EXCH_XYZ_R8( cg3d_x, myThid )

C--   Initial residual calculation (with free-Surface term)
      err    = 0. _d 0
      sumRHS = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile    = 0. _d 0
        sumRHStile = 0. _d 0
        DO K=1,Nr
         Km1 = MAX(K-1, 1 )
         Kp1 = MIN(K+1, Nr)
         maskM1 = 1. _d 0
         maskP1 = 1. _d 0
         IF ( K .EQ. 1 ) maskM1 = 0. _d 0
         IF ( K .EQ. Nr) maskP1 = 0. _d 0
           
         DO J=1,sNy
          DO I=1,sNx
           cg3d_r(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj) -( 0.
     &     +aW3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I-1,J  ,K  ,bi,bj)
     &     +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_x(I+1,J  ,K  ,bi,bj)
     &     +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J-1,K  ,bi,bj)
     &     +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_x(I  ,J+1,K  ,bi,bj)
     &     +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,Km1,bi,bj)*maskM1
     &     +aV3d(I  ,J  ,Kp1,bi,bj)*cg3d_x(I  ,J  ,Kp1,bi,bj)*maskP1
     &     +aC3d(I  ,J  ,K  ,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
     &     )
           errTile = errTile
     &     +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           sumRHStile = sumRHStile
     &     +cg3d_b(I,J,K,bi,bj)
          ENDDO
         ENDDO
         DO J=1-1,sNy+1
          DO I=1-1,sNx+1
           cg3d_s(I,J,K,bi,bj) = 0.
          ENDDO
         ENDDO
        ENDDO
        err    = err    + errTile
        sumRHS = sumRHS + sumRHStile
       ENDDO
      ENDDO
       CALL EXCH_S3D_RL( cg3d_r, myThid )
c      CALL EXCH_S3D_RL( cg3d_s, myThid )
      _GLOBAL_SUM_R8( sumRHS, myThid )
      _GLOBAL_SUM_R8( err   , myThid )
      
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        write(standardmessageunit,'(A,1P2E22.14)')
     &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

      actualIts      = 0
      actualResidual = SQRT(err)
C     _BARRIER
c     _BEGIN_MASTER( myThid )
c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
c    &  actualIts, actualResidual
c     _END_MASTER( myThid )
      firstResidual=actualResidual

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it3d=1, numIters

CcnhDebugStarts
c      IF ( mod(it3d-1,10).EQ.0)
c    &   WRITE(*,*) ' CG3D: Iteration ',it3d-1,
c    &      ' residual = ',actualResidual
CcnhDebugEnds
       IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
C      Note. On the next to loops over all tiles the inner loop ranges
C            in sNx and sNy are expanded by 1 to avoid a communication 
C            step. However this entails a bit of gynamastics because we only 
C            want eta_qrN for the interior points.
       eta_qrN = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile = 0. _d 0
         DO K=1,1
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &       zMC(I  ,J  ,K,bi,bj)*cg3d_r(I  ,J  ,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         DO K=2,Nr
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &       zMC(I,J,K,bi,bj)*(cg3d_r(I,J,K  ,bi,bj)
     &      -zML(I,J,K,bi,bj)*cg3d_q(I,J,K-1,bi,bj))
           ENDDO
          ENDDO
         ENDDO
         DO K=Nr,Nr
caja      IF (Nr .GT. 1) THEN
caja       DO J=1-1,sNy+1
caja        DO I=1-1,sNx+1
caja         cg3d_q(I,J,K,bi,bj) = 
caja &        zMC(i,j,k,bi,bj)*(cg3d_r(i,j,k  ,bi,bj)
caja &       -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj))
caja        ENDDO
caja       ENDDO
caja      ENDIF
          DO J=1,sNy
           DO I=1,sNx
            eta_qrNtile = eta_qrNtile
     &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         DO K=Nr-1,1,-1
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_q(I,J,K,bi,bj) = 
     &      cg3d_q(I,J,K,bi,bj)
     &      -zMU(I,J,K,bi,bj)*cg3d_q(I,J,K+1,bi,bj)
           ENDDO
          ENDDO
          DO J=1,sNy
           DO I=1,sNx
            eta_qrNtile = eta_qrNtile
     &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         eta_qrN = eta_qrN + eta_qrNtile
        ENDDO
       ENDDO
caja
caja  eta_qrN=0.
caja   DO bj=myByLo(myThid),myByHi(myThid)
caja    DO bi=myBxLo(myThid),myBxHi(myThid)
caja     DO K=1,Nr
caja      DO J=1,sNy
caja       DO I=1,sNx
caja        eta_qrN = eta_qrN
caja &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
caja       ENDDO
caja      ENDDO
caja     ENDDO
caja    ENDDO
caja   ENDDO
caja

       _GLOBAL_SUM_R8(eta_qrN, myThid)
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
CcnhDebugEnds
       cgBeta   = eta_qrN/eta_qrNM1
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
CcnhDebugEnds
       eta_qrNM1 = eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO K=1,Nr
          DO J=1-1,sNy+1
           DO I=1-1,sNx+1
            cg3d_s(I,J,K,bi,bj) = cg3d_q(I,J,K,bi,bj) 
     &                          + cgBeta*cg3d_s(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
       alpha = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         alphaTile = 0. _d 0
         IF ( Nr .GT. 1 ) THEN
          DO K=1,1
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      +aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K+1,bi,bj)
     &      +aC3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ELSE
          DO K=1,1
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      +aC3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDIF
         DO K=2,Nr-1
          DO J=1,sNy
           DO I=1,sNx
            cg3d_q(I,J,K,bi,bj) = 
     &      aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &     +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &     +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &     +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &     +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K-1,bi,bj)
     &     +aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K+1,bi,bj)
     &     +aC3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
            alphaTile = alphaTile
     &                +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         IF ( Nr .GT. 1 ) THEN
          DO K=Nr,Nr
           DO J=1,sNy
            DO I=1,sNx
             cg3d_q(I,J,K,bi,bj) = 
     &       aW3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I-1,J  ,K  ,bi,bj)
     &      +aW3d(I+1,J  ,K  ,bi,bj)*cg3d_s(I+1,J  ,K  ,bi,bj)
     &      +aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J-1,K  ,bi,bj)
     &      +aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J+1,K  ,bi,bj)
     &      +aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K-1,bi,bj)
     &      +aC3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
             alphaTile = alphaTile
     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDIF
         alpha = alpha + alphaTile
        ENDDO
       ENDDO
       _GLOBAL_SUM_R8(alpha,myThid)
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
CcnhDebugEnds
       alpha = eta_qrN/alpha
CcnhDebugStarts
C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
CcnhDebugEnds
     
C==    Update solution and residual vectors
C      Now compute "interior" points.
       err = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile    = 0. _d 0
         DO K=1,Nr
          DO J=1,sNy
           DO I=1,sNx
            cg3d_x(I,J,K,bi,bj)=cg3d_x(I,J,K,bi,bj)
     &            +alpha*cg3d_s(I,J,K,bi,bj)
            cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj)
     &            -alpha*cg3d_q(I,J,K,bi,bj)
            errTile = errTile
     &             +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
         err = err + errTile
        ENDDO
       ENDDO

       _GLOBAL_SUM_R8( err   , myThid )
       err = SQRT(err)
       actualIts      = it3d
       actualResidual = err
       IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
       CALL EXCH_S3D_RL( cg3d_r, myThid )

   10 CONTINUE
   11 CONTINUE

C--   Un-normalise the answer
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO K=1,Nr
         DO J=1,sNy
          DO I=1,sNx
           cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)/rhsNorm
          ENDDO
         ENDDO
        ENDDO
       ENDDO
      ENDDO

Cadj  _EXCH_XYZ_R8(cg3d_x, myThid )
c     _BEGIN_MASTER( myThid )
c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
c    &  actualIts, actualResidual
c     _END_MASTER( myThid )
      lastResidual=actualResidual
      numIters=actualIts

#endif /* ALLOW_NONHYDROSTATIC */

      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.16 2005/11/08 06:18:10 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5
6	CBOP
7	C !ROUTINE: CG3D
8	C !INTERFACE:
9	SUBROUTINE CG3D(
10	I cg3d_b,
11	U cg3d_x,
12	O firstResidual,
13	O lastResidual,
14	U numIters,
15	I myThid )
16	C !DESCRIPTION: \bv
17	C ==========================================================
18	C \| SUBROUTINE CG3D
19	C \| o Three-dimensional grid problem conjugate-gradient
20	C \| inverter (with preconditioner).
21	C ==========================================================
22	C \| Con. grad is an iterative procedure for solving Ax = b.
23	C \| It requires the A be symmetric.
24	C \| This implementation assumes A is a seven-diagonal
25	C \| matrix of the form that arises in the discrete
26	C \| representation of the del^2 operator in a
27	C \| three-dimensional space.
28	C \| Notes:
29	C \| ======
30	C \| This implementation can support shared-memory
31	C \| multi-threaded execution. In order to do this COMMON
32	C \| blocks are used for many of the arrays - even ones that
33	C \| are only used for intermedaite results. This design is
34	C \| OK if you want to all the threads to collaborate on
35	C \| solving the same problem. On the other hand if you want
36	C \| the threads to solve several different problems
37	C \| concurrently this implementation will not work.
38	C ==========================================================
39	C \ev
40
41	C !USES:
42	IMPLICIT NONE
43	C === Global data ===
44	#include "SIZE.h"
45	#include "EEPARAMS.h"
46	#include "PARAMS.h"
47	#include "GRID.h"
48	#include "CG3D.h"
49
50	C !INPUT/OUTPUT PARAMETERS:
51	C === Routine arguments ===
52	C myThid - Thread on which I am working.
53	C cg3d_b - The source term or "right hand side"
54	C cg3d_x - The solution
55	C firstResidual - the initial residual before any iterations
56	C lastResidual - the actual residual reached
57	C numIters - Entry: the maximum number of iterations allowed
58	C Exit: the actual number of iterations used
59	_RL cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
60	_RL cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
61	_RL firstResidual
62	_RL lastResidual
63	INTEGER numIters
64	INTEGER myThid
65
66
67	#ifdef ALLOW_NONHYDROSTATIC
68
69	C !LOCAL VARIABLES:
70	C === Local variables ====
71	C actualIts - Number of iterations taken
72	C actualResidual - residual
73	C bi - Block index in X and Y.
74	C bj
75	C eta_qrN - Used in computing search directions
76	C eta_qrNM1 suffix N and NM1 denote current and
77	C cgBeta previous iterations respectively.
78	C alpha
79	C sumRHS - Sum of right-hand-side. Sometimes this is a
80	C useful debuggin/trouble shooting diagnostic.
81	C For neumann problems sumRHS needs to be ~0.
82	C or they converge at a non-zero residual.
83	C err - Measure of residual of Ax - b, usually the norm.
84	C I, J, K, N - Loop counters ( N counts CG iterations )
85	INTEGER actualIts
86	_RL actualResidual
87	INTEGER bi, bj
88	INTEGER I, J, K, it3d
89	INTEGER Km1, Kp1
90	_RL maskM1, maskP1
91	_RL err, errTile
92	_RL eta_qrN, eta_qrNtile
93	_RL eta_qrNM1
94	_RL cgBeta
95	_RL alpha , alphaTile
96	_RL sumRHS, sumRHStile
97	_RL rhsMax
98	_RL rhsNorm
99	CEOP
100
101
102	C-- Initialise inverter
103	eta_qrNM1 = 1. D0
104
105	C-- Normalise RHS
106	rhsMax = 0. _d 0
107	DO bj=myByLo(myThid),myByHi(myThid)
108	DO bi=myBxLo(myThid),myBxHi(myThid)
109	DO K=1,Nr
110	DO J=1,sNy
111	DO I=1,sNx
112	cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*cg3dNorm
113	& * maskC(I,J,K,bi,bj)
114	rhsMax = MAX(ABS(cg3d_b(I,J,K,bi,bj)),rhsMax)
115	ENDDO
116	ENDDO
117	ENDDO
118	ENDDO
119	ENDDO
120	_GLOBAL_MAX_R8( rhsMax, myThid )
121	rhsNorm = 1. _d 0
122	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
123	DO bj=myByLo(myThid),myByHi(myThid)
124	DO bi=myBxLo(myThid),myBxHi(myThid)
125	DO K=1,Nr
126	DO J=1,sNy
127	DO I=1,sNx
128	cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*rhsNorm
129	cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)*rhsNorm
130	ENDDO
131	ENDDO
132	ENDDO
133	ENDDO
134	ENDDO
135
136	C-- Update overlaps
137	c _EXCH_XYZ_R8( cg3d_b, myThid )
138	_EXCH_XYZ_R8( cg3d_x, myThid )
139
140	C-- Initial residual calculation (with free-Surface term)
141	err = 0. _d 0
142	sumRHS = 0. _d 0
143	DO bj=myByLo(myThid),myByHi(myThid)
144	DO bi=myBxLo(myThid),myBxHi(myThid)
145	errTile = 0. _d 0
146	sumRHStile = 0. _d 0
147	DO K=1,Nr
148	Km1 = MAX(K-1, 1 )
149	Kp1 = MIN(K+1, Nr)
150	maskM1 = 1. _d 0
151	maskP1 = 1. _d 0
152	IF ( K .EQ. 1 ) maskM1 = 0. _d 0
153	IF ( K .EQ. Nr) maskP1 = 0. _d 0
154
155	DO J=1,sNy
156	DO I=1,sNx
157	cg3d_r(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj) -( 0.
158	& +aW3d(I ,J ,K ,bi,bj)*cg3d_x(I-1,J ,K ,bi,bj)
159	& +aW3d(I+1,J ,K ,bi,bj)*cg3d_x(I+1,J ,K ,bi,bj)
160	& +aS3d(I ,J ,K ,bi,bj)*cg3d_x(I ,J-1,K ,bi,bj)
161	& +aS3d(I ,J+1,K ,bi,bj)*cg3d_x(I ,J+1,K ,bi,bj)
162	& +aV3d(I ,J ,K ,bi,bj)cg3d_x(I ,J ,Km1,bi,bj)maskM1
163	& +aV3d(I ,J ,Kp1,bi,bj)cg3d_x(I ,J ,Kp1,bi,bj)maskP1
164	& +aC3d(I ,J ,K ,bi,bj)*cg3d_x(I ,J ,K ,bi,bj)
165	& )
166	errTile = errTile
167	& +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
168	sumRHStile = sumRHStile
169	& +cg3d_b(I,J,K,bi,bj)
170	ENDDO
171	ENDDO
172	DO J=1-1,sNy+1
173	DO I=1-1,sNx+1
174	cg3d_s(I,J,K,bi,bj) = 0.
175	ENDDO
176	ENDDO
177	ENDDO
178	err = err + errTile
179	sumRHS = sumRHS + sumRHStile
180	ENDDO
181	ENDDO
182	CALL EXCH_S3D_RL( cg3d_r, myThid )
183	c CALL EXCH_S3D_RL( cg3d_s, myThid )
184	_GLOBAL_SUM_R8( sumRHS, myThid )
185	_GLOBAL_SUM_R8( err , myThid )
186
187	IF ( debugLevel .GE. debLevZero ) THEN
188	_BEGIN_MASTER( myThid )
189	write(standardmessageunit,'(A,1P2E22.14)')
190	& ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
191	_END_MASTER( myThid )
192	ENDIF
193
194	actualIts = 0
195	actualResidual = SQRT(err)
196	C _BARRIER
197	c _BEGIN_MASTER( myThid )
198	c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
199	c & actualIts, actualResidual
200	c _END_MASTER( myThid )
201	firstResidual=actualResidual
202
203	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
204	DO 10 it3d=1, numIters
205
206	CcnhDebugStarts
207	c IF ( mod(it3d-1,10).EQ.0)
208	c & WRITE(,) ' CG3D: Iteration ',it3d-1,
209	c & ' residual = ',actualResidual
210	CcnhDebugEnds
211	IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
212	C-- Solve preconditioning equation and update
213	C-- conjugate direction vector "s".
214	C Note. On the next to loops over all tiles the inner loop ranges
215	C in sNx and sNy are expanded by 1 to avoid a communication
216	C step. However this entails a bit of gynamastics because we only
217	C want eta_qrN for the interior points.
218	eta_qrN = 0. _d 0
219	DO bj=myByLo(myThid),myByHi(myThid)
220	DO bi=myBxLo(myThid),myBxHi(myThid)
221	eta_qrNtile = 0. _d 0
222	DO K=1,1
223	DO J=1-1,sNy+1
224	DO I=1-1,sNx+1
225	cg3d_q(I,J,K,bi,bj) =
226	& zMC(I ,J ,K,bi,bj)*cg3d_r(I ,J ,K,bi,bj)
227	ENDDO
228	ENDDO
229	ENDDO
230	DO K=2,Nr
231	DO J=1-1,sNy+1
232	DO I=1-1,sNx+1
233	cg3d_q(I,J,K,bi,bj) =
234	& zMC(I,J,K,bi,bj)*(cg3d_r(I,J,K ,bi,bj)
235	& -zML(I,J,K,bi,bj)*cg3d_q(I,J,K-1,bi,bj))
236	ENDDO
237	ENDDO
238	ENDDO
239	DO K=Nr,Nr
240	caja IF (Nr .GT. 1) THEN
241	caja DO J=1-1,sNy+1
242	caja DO I=1-1,sNx+1
243	caja cg3d_q(I,J,K,bi,bj) =
244	caja & zMC(i,j,k,bi,bj)*(cg3d_r(i,j,k ,bi,bj)
245	caja & -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj))
246	caja ENDDO
247	caja ENDDO
248	caja ENDIF
249	DO J=1,sNy
250	DO I=1,sNx
251	eta_qrNtile = eta_qrNtile
252	& +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
253	ENDDO
254	ENDDO
255	ENDDO
256	DO K=Nr-1,1,-1
257	DO J=1-1,sNy+1
258	DO I=1-1,sNx+1
259	cg3d_q(I,J,K,bi,bj) =
260	& cg3d_q(I,J,K,bi,bj)
261	& -zMU(I,J,K,bi,bj)*cg3d_q(I,J,K+1,bi,bj)
262	ENDDO
263	ENDDO
264	DO J=1,sNy
265	DO I=1,sNx
266	eta_qrNtile = eta_qrNtile
267	& +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
268	ENDDO
269	ENDDO
270	ENDDO
271	eta_qrN = eta_qrN + eta_qrNtile
272	ENDDO
273	ENDDO
274	caja
275	caja eta_qrN=0.
276	caja DO bj=myByLo(myThid),myByHi(myThid)
277	caja DO bi=myBxLo(myThid),myBxHi(myThid)
278	caja DO K=1,Nr
279	caja DO J=1,sNy
280	caja DO I=1,sNx
281	caja eta_qrN = eta_qrN
282	caja & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
283	caja ENDDO
284	caja ENDDO
285	caja ENDDO
286	caja ENDDO
287	caja ENDDO
288	caja
289
290	_GLOBAL_SUM_R8(eta_qrN, myThid)
291	CcnhDebugStarts
292	C WRITE(,) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
293	CcnhDebugEnds
294	cgBeta = eta_qrN/eta_qrNM1
295	CcnhDebugStarts
296	C WRITE(,) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
297	CcnhDebugEnds
298	eta_qrNM1 = eta_qrN
299
300	DO bj=myByLo(myThid),myByHi(myThid)
301	DO bi=myBxLo(myThid),myBxHi(myThid)
302	DO K=1,Nr
303	DO J=1-1,sNy+1
304	DO I=1-1,sNx+1
305	cg3d_s(I,J,K,bi,bj) = cg3d_q(I,J,K,bi,bj)
306	& + cgBeta*cg3d_s(I,J,K,bi,bj)
307	ENDDO
308	ENDDO
309	ENDDO
310	ENDDO
311	ENDDO
312
313	C== Evaluate laplace operator on conjugate gradient vector
314	C== q = A.s
315	alpha = 0. _d 0
316	DO bj=myByLo(myThid),myByHi(myThid)
317	DO bi=myBxLo(myThid),myBxHi(myThid)
318	alphaTile = 0. _d 0
319	IF ( Nr .GT. 1 ) THEN
320	DO K=1,1
321	DO J=1,sNy
322	DO I=1,sNx
323	cg3d_q(I,J,K,bi,bj) =
324	& aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
325	& +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
326	& +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
327	& +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
328	& +aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K+1,bi,bj)
329	& +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
330	alphaTile = alphaTile
331	& +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
332	ENDDO
333	ENDDO
334	ENDDO
335	ELSE
336	DO K=1,1
337	DO J=1,sNy
338	DO I=1,sNx
339	cg3d_q(I,J,K,bi,bj) =
340	& aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
341	& +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
342	& +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
343	& +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
344	& +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
345	alphaTile = alphaTile
346	& +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
347	ENDDO
348	ENDDO
349	ENDDO
350	ENDIF
351	DO K=2,Nr-1
352	DO J=1,sNy
353	DO I=1,sNx
354	cg3d_q(I,J,K,bi,bj) =
355	& aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
356	& +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
357	& +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
358	& +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
359	& +aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K-1,bi,bj)
360	& +aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K+1,bi,bj)
361	& +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
362	alphaTile = alphaTile
363	& +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
364	ENDDO
365	ENDDO
366	ENDDO
367	IF ( Nr .GT. 1 ) THEN
368	DO K=Nr,Nr
369	DO J=1,sNy
370	DO I=1,sNx
371	cg3d_q(I,J,K,bi,bj) =
372	& aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
373	& +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
374	& +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
375	& +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
376	& +aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K-1,bi,bj)
377	& +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
378	alphaTile = alphaTile
379	& +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
380	ENDDO
381	ENDDO
382	ENDDO
383	ENDIF
384	alpha = alpha + alphaTile
385	ENDDO
386	ENDDO
387	_GLOBAL_SUM_R8(alpha,myThid)
388	CcnhDebugStarts
389	C WRITE(,) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
390	CcnhDebugEnds
391	alpha = eta_qrN/alpha
392	CcnhDebugStarts
393	C WRITE(,) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
394	CcnhDebugEnds
395
396	C== Update solution and residual vectors
397	C Now compute "interior" points.
398	err = 0. _d 0
399	DO bj=myByLo(myThid),myByHi(myThid)
400	DO bi=myBxLo(myThid),myBxHi(myThid)
401	errTile = 0. _d 0
402	DO K=1,Nr
403	DO J=1,sNy
404	DO I=1,sNx
405	cg3d_x(I,J,K,bi,bj)=cg3d_x(I,J,K,bi,bj)
406	& +alpha*cg3d_s(I,J,K,bi,bj)
407	cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj)
408	& -alpha*cg3d_q(I,J,K,bi,bj)
409	errTile = errTile
410	& +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
411	ENDDO
412	ENDDO
413	ENDDO
414	err = err + errTile
415	ENDDO
416	ENDDO
417
418	_GLOBAL_SUM_R8( err , myThid )
419	err = SQRT(err)
420	actualIts = it3d
421	actualResidual = err
422	IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
423	CALL EXCH_S3D_RL( cg3d_r, myThid )
424
425	10 CONTINUE
426	11 CONTINUE
427
428	C-- Un-normalise the answer
429	DO bj=myByLo(myThid),myByHi(myThid)
430	DO bi=myBxLo(myThid),myBxHi(myThid)
431	DO K=1,Nr
432	DO J=1,sNy
433	DO I=1,sNx
434	cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)/rhsNorm
435	ENDDO
436	ENDDO
437	ENDDO
438	ENDDO
439	ENDDO
440
441	Cadj _EXCH_XYZ_R8(cg3d_x, myThid )
442	c _BEGIN_MASTER( myThid )
443	c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
444	c & actualIts, actualResidual
445	c _END_MASTER( myThid )
446	lastResidual=actualResidual
447	numIters=actualIts
448
449	#endif /* ALLOW_NONHYDROSTATIC */
450
451	RETURN
452	END