model/src/cg2d.F

C $Header: /u/gcmpack/models/MITgcmUV/model/src/cg2d.F,v 1.30 2001/03/09 20:45:09 adcroft Exp $
C $Name:  $

#include "CPP_OPTIONS.h"

#define USE_OPTIMIZED_OVERLAPS

      SUBROUTINE CG2D(  
     I                cg2d_b,
     U                cg2d_x,
     U                tolerance,
     O                residual,
     U                numIters,
     I                myThid )
C     /==========================================================\
C     | SUBROUTINE CG2D                                          |
C     | o Two-dimensional grid problem conjugate-gradient        |
C     |   inverter (with preconditioner).                        |
C     |==========================================================|
C     | Con. grad is an iterative procedure for solving Ax = b.  |
C     | It requires the A be symmetric.                          |
C     | This implementation assumes A is a five-diagonal         |
C     | matrix of the form that arises in the discrete           |
C     | representation of the del^2 operator in a                |
C     | two-dimensional space.                                   |
C     | Notes:                                                   |
C     | ======                                                   |
C     | This implementation can support shared-memory            | 
C     | multi-threaded execution. In order to do this COMMON     | 
C     | blocks are used for many of the arrays - even ones that  | 
C     | are only used for intermedaite results. This design is   | 
C     | OK if you want to all the threads to collaborate on      | 
C     | solving the same problem. On the other hand if you want  | 
C     | the threads to solve several different problems          | 
C     | concurrently this implementation will not work.          |
C     \==========================================================/
      IMPLICIT NONE

C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "GRID.h"
#include "CG2D.h"
#include "SURFACE.h"

C     === Routine arguments ===
C     myThid    - Thread on which I am working.
C     cg2d_b    - The source term or "right hand side"
C     cg2d_x    - The solution
C     tolerance - Entry: the tolerance of accuracy to solve to
C                 Exit:  the initial residual before any iterations
C     residual  - Exit:  the actual residual reached
C     numIters  - Entry: the maximum number of iterations allowed
C                 Exit:  the actual number of iterations used
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  tolerance
      _RL  residual
      INTEGER numIters
      INTEGER myThid

C     === Local variables ====
C     actualIts      - Number of iterations taken
C     actualResidual - residual
C     bi          - Block index in X and Y.
C     bj
C     eta_qrN     - Used in computing search directions
C     eta_qrNM1     suffix N and NM1 denote current and
C     cgBeta        previous iterations respectively.
C     alpha  
C     sumRHS      - Sum of right-hand-side. Sometimes this is a
C                   useful debuggin/trouble shooting diagnostic.
C                   For neumann problems sumRHS needs to be ~0.
C                   or they converge at a non-zero residual.
C     err         - Measure of residual of Ax - b, usually the norm.
C     I, J, N     - Loop counters ( N counts CG iterations )
      INTEGER actualIts
      _RL    actualResidual,initialResidual
      INTEGER bi, bj              
      INTEGER I, J, it2d
      _RL    err
      _RL    eta_qrN
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha
      _RL    sumRHS
      _RL    rhsMax
      _RL    rhsNorm

      INTEGER OLw
      INTEGER OLe
      INTEGER OLn
      INTEGER OLs
      INTEGER exchWidthX
      INTEGER exchWidthY
      INTEGER myNz


CcnhDebugStarts
C     CHARACTER*(MAX_LEN_FNAM) suff
CcnhDebugEnds


C--   Initialise inverter
      eta_qrNM1 = 1. _d 0

CcnhDebugStarts
C     _EXCH_XY_R8( cg2d_b, myThid )
C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
C     suff = 'unnormalised'
C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
C     STOP
CcnhDebugEnds

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO
#ifdef LETS_MAKE_JAM
C     _GLOBAL_MAX_R8( rhsMax, myThid )
      rhsMax=1.
#else
      _GLOBAL_MAX_R8( rhsMax, myThid )
Catm  rhsMax=1.
#endif
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
          cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO

C--   Update overlaps
      _EXCH_XY_R8( cg2d_b, myThid )
      _EXCH_XY_R8( cg2d_x, myThid )
CcnhDebugStarts
C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
C     suff = 'normalised'
C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
CcnhDebugEnds

C--   Initial residual calculation
      err    = 0. _d 0
      sumRHS = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_s(I,J,bi,bj) = 0.
          cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
     &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
     &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
     &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
     &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
     &    -aW2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aW2d(I+1,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* 
     &     cg2d_x(I  ,J  ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm
     &    )
          err            = err            + 
     &     cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          sumRHS            = sumRHS            +
     &     cg2d_b(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C     _EXCH_XY_R8( cg2d_r, myThid )
#ifdef LETS_MAKE_JAM
      CALL EXCH_XY_O1_R8_JAM( cg2d_r )
#else
#ifdef USE_OPTIMIZED_OVERLAPS
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
#else
       OLw        = Olx
       OLe        = Olx
       OLn        = Oly
       OLs        = Oly
#endif
       exchWidthX = 1
       exchWidthY = 1
       myNz       = 1
       CALL EXCH_RL( cg2d_r,
     I            OLw, OLe, OLs, OLn, myNz,
     I            exchWidthX, exchWidthY,
     I            FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
#endif
C     _EXCH_XY_R8( cg2d_s, myThid )
#ifdef LETS_MAKE_JAM
      CALL EXCH_XY_O1_R8_JAM( cg2d_s )
#else
#ifdef USE_OPTIMIZED_OVERLAPS
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
#else
       OLw        = Olx
       OLe        = Olx
       OLn        = Oly
       OLs        = Oly
#endif
       exchWidthX = 1
       exchWidthY = 1
       myNz       = 1
       CALL EXCH_RL( cg2d_s,
     I            OLw, OLe, OLs, OLn, myNz,
     I            exchWidthX, exchWidthY,
     I            FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
#endif
      _GLOBAL_SUM_R8( sumRHS, myThid )
C     WRITE(6,*) ' mythid, err = ', mythid, SQRT(err)
      _GLOBAL_SUM_R8( err   , myThid )
      
      _BEGIN_MASTER( myThid )
      write(0,'(A,1PE30.14)') ' cg2d: Sum(rhs) = ',sumRHS
      _END_MASTER( )

      actualIts      = 0
      actualResidual = SQRT(err)
C     _BARRIER
c     _BEGIN_MASTER( myThid )
c      WRITE(0,'(A,I6,1PE30.14)') ' CG2D iters, err = ', 
c    & actualIts, actualResidual
c     _END_MASTER( )
      initialResidual=actualResidual

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it2d=1, numIters

CcnhDebugStarts
C      WRITE(0,*) ' CG2D: Iteration ',it2d-1,' residual = ',
C    &  actualResidual
CcnhDebugEnds
       IF ( err .LT. tolerance ) GOTO 11
C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
       eta_qrN = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO J=1,sNy
          DO I=1,sNx
           cg2d_q(I,J,bi,bj) = 
     &      pC(I  ,J  ,bi,bj)*cg2d_r(I  ,J  ,bi,bj)
     &     +pW(I  ,J  ,bi,bj)*cg2d_r(I-1,J  ,bi,bj)
     &     +pW(I+1,J  ,bi,bj)*cg2d_r(I+1,J  ,bi,bj)
     &     +pS(I  ,J  ,bi,bj)*cg2d_r(I  ,J-1,bi,bj)
     &     +pS(I  ,J+1,bi,bj)*cg2d_r(I  ,J+1,bi,bj)
CcnhDebugStarts
C          cg2d_q(I,J,bi,bj) = cg2d_r(I  ,J  ,bi,bj)
CcnhDebugEnds
           eta_qrN = eta_qrN
     &     +cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       _GLOBAL_SUM_R8(eta_qrN, myThid)
CcnhDebugStarts
C      WRITE(0,*) ' CG2D: Iteration ',it2d-1,' eta_qrN = ',eta_qrN
CcnhDebugEnds
       cgBeta   = eta_qrN/eta_qrNM1
CcnhDebugStarts
C      WRITE(0,*) ' CG2D: Iteration ',it2d-1,' beta = ',cgBeta
CcnhDebugEnds
       eta_qrNM1 = eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO J=1,sNy
          DO I=1,sNx
           cg2d_s(I,J,bi,bj) = cg2d_q(I,J,bi,bj)
     &                       + cgBeta*cg2d_s(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C--    Do exchanges that require messages i.e. between
C--    processes.
C      _EXCH_XY_R8( cg2d_s, myThid )
#ifdef LETS_MAKE_JAM
      CALL EXCH_XY_O1_R8_JAM( cg2d_s )
#else
#ifdef USE_OPTIMIZED_OVERLAPS
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
#else
       OLw        = Olx
       OLe        = Olx
       OLn        = Oly
       OLs        = Oly
#endif
       exchWidthX = 1
       exchWidthY = 1
       myNz       = 1
       CALL EXCH_RL( cg2d_s,
     I            OLw, OLe, OLs, OLn, myNz,
     I            exchWidthX, exchWidthY,
     I            FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
#endif

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
       alpha = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO J=1,sNy
          DO I=1,sNx
           cg2d_q(I,J,bi,bj) = 
     &     aW2d(I  ,J  ,bi,bj)*cg2d_s(I-1,J  ,bi,bj)
     &    +aW2d(I+1,J  ,bi,bj)*cg2d_s(I+1,J  ,bi,bj)
     &    +aS2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J-1,bi,bj)
     &    +aS2d(I  ,J+1,bi,bj)*cg2d_s(I  ,J+1,bi,bj)
     &    -aW2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
     &    -aW2d(I+1,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
     &    -aS2d(I  ,J+1,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
     &    -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* 
     &     cg2d_s(I  ,J  ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm
          alpha = alpha+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       _GLOBAL_SUM_R8(alpha,myThid)
CcnhDebugStarts
C      WRITE(0,*) ' CG2D: Iteration ',it2d-1,' SUM(s*q)= ',alpha
CcnhDebugEnds
       alpha = eta_qrN/alpha
CcnhDebugStarts
C      WRITE(0,*) ' CG2D: Iteration ',it2d-1,' alpha= ',alpha
CcnhDebugEnds
     
C==    Update solution and residual vectors
C      Now compute "interior" points.
       err = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO J=1,sNy
          DO I=1,sNx
           cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj)
           cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj)
           err = err+cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       _GLOBAL_SUM_R8( err   , myThid )
       err = SQRT(err)
       actualIts      = it2d
       actualResidual = err
       IF ( err .LT. cg2dTargetResidual ) GOTO 11
C      _EXCH_XY_R8(cg2d_r, myThid )
#ifdef LETS_MAKE_JAM
      CALL EXCH_XY_O1_R8_JAM( cg2d_r )
#else
#ifdef USE_OPTIMIZED_OVERLAPS
       OLw        = 1
       OLe        = 1
       OLn        = 1
       OLs        = 1
#else
       OLw        = Olx
       OLe        = Olx
       OLn        = Oly
       OLs        = Oly
#endif
       exchWidthX = 1
       exchWidthY = 1
       myNz       = 1
       CALL EXCH_RL( cg2d_r,
     I             OLw, OLe, OLs, OLn, myNz,
     I             exchWidthX, exchWidthY,
     I             FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
#endif

   10 CONTINUE
   11 CONTINUE

C--   Un-normalise the answer
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_x(I  ,J  ,bi,bj) = cg2d_x(I  ,J  ,bi,bj)/rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO

C     The following exchange was moved up to solve_for_pressure
C     for compatibility with TAMC.
C     _EXCH_XY_R8(cg2d_x, myThid )
c     _BEGIN_MASTER( myThid )
c      WRITE(0,'(A,I6,1PE30.14)') ' CG2D iters, err = ', 
c    & actualIts, actualResidual
c     _END_MASTER( )

C--   Return parameters to caller
      tolerance=initialResidual
      residual=actualResidual
      numIters=actualIts

CcnhDebugStarts
C     CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
C     err    = 0. _d 0
C     DO bj=myByLo(myThid),myByHi(myThid)
C      DO bi=myBxLo(myThid),myBxHi(myThid)
C       DO J=1,sNy
C        DO I=1,sNx
C         cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
C    &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
C    &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
C    &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
C    &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
C    &    -aW2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
C    &    -aW2d(I+1,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
C    &    -aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
C    &    -aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J  ,bi,bj))
C         err            = err            + 
C    &     cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
C        ENDDO
C       ENDDO
C      ENDDO
C     ENDDO
C     _GLOBAL_SUM_R8( err   , myThid )
C     write(0,*) 'cg2d: Ax - b = ',SQRT(err)
CcnhDebugEnds

      RETURN
      END
1	C $Header: /u/gcmpack/models/MITgcmUV/model/src/cg2d.F,v 1.30 2001/03/09 20:45:09 adcroft Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5
6	#define USE_OPTIMIZED_OVERLAPS
7
8	SUBROUTINE CG2D(
9	I cg2d_b,
10	U cg2d_x,
11	U tolerance,
12	O residual,
13	U numIters,
14	I myThid )
15	C /==========================================================\
16	C \| SUBROUTINE CG2D \|
17	C \| o Two-dimensional grid problem conjugate-gradient \|
18	C \| inverter (with preconditioner). \|
19	C \|==========================================================\|
20	C \| Con. grad is an iterative procedure for solving Ax = b. \|
21	C \| It requires the A be symmetric. \|
22	C \| This implementation assumes A is a five-diagonal \|
23	C \| matrix of the form that arises in the discrete \|
24	C \| representation of the del^2 operator in a \|
25	C \| two-dimensional space. \|
26	C \| Notes: \|
27	C \| ====== \|
28	C \| This implementation can support shared-memory \|
29	C \| multi-threaded execution. In order to do this COMMON \|
30	C \| blocks are used for many of the arrays - even ones that \|
31	C \| are only used for intermedaite results. This design is \|
32	C \| OK if you want to all the threads to collaborate on \|
33	C \| solving the same problem. On the other hand if you want \|
34	C \| the threads to solve several different problems \|
35	C \| concurrently this implementation will not work. \|
36	C \==========================================================/
37	IMPLICIT NONE
38
39	C === Global data ===
40	#include "SIZE.h"
41	#include "EEPARAMS.h"
42	#include "PARAMS.h"
43	#include "GRID.h"
44	#include "CG2D.h"
45	#include "SURFACE.h"
46
47	C === Routine arguments ===
48	C myThid - Thread on which I am working.
49	C cg2d_b - The source term or "right hand side"
50	C cg2d_x - The solution
51	C tolerance - Entry: the tolerance of accuracy to solve to
52	C Exit: the initial residual before any iterations
53	C residual - Exit: the actual residual reached
54	C numIters - Entry: the maximum number of iterations allowed
55	C Exit: the actual number of iterations used
56	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
57	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
58	_RL tolerance
59	_RL residual
60	INTEGER numIters
61	INTEGER myThid
62
63	C === Local variables ====
64	C actualIts - Number of iterations taken
65	C actualResidual - residual
66	C bi - Block index in X and Y.
67	C bj
68	C eta_qrN - Used in computing search directions
69	C eta_qrNM1 suffix N and NM1 denote current and
70	C cgBeta previous iterations respectively.
71	C alpha
72	C sumRHS - Sum of right-hand-side. Sometimes this is a
73	C useful debuggin/trouble shooting diagnostic.
74	C For neumann problems sumRHS needs to be ~0.
75	C or they converge at a non-zero residual.
76	C err - Measure of residual of Ax - b, usually the norm.
77	C I, J, N - Loop counters ( N counts CG iterations )
78	INTEGER actualIts
79	_RL actualResidual,initialResidual
80	INTEGER bi, bj
81	INTEGER I, J, it2d
82	_RL err
83	_RL eta_qrN
84	_RL eta_qrNM1
85	_RL cgBeta
86	_RL alpha
87	_RL sumRHS
88	_RL rhsMax
89	_RL rhsNorm
90
91	INTEGER OLw
92	INTEGER OLe
93	INTEGER OLn
94	INTEGER OLs
95	INTEGER exchWidthX
96	INTEGER exchWidthY
97	INTEGER myNz
98
99
100	CcnhDebugStarts
101	C CHARACTER*(MAX_LEN_FNAM) suff
102	CcnhDebugEnds
103
104
105	C-- Initialise inverter
106	eta_qrNM1 = 1. _d 0
107
108	CcnhDebugStarts
109	C _EXCH_XY_R8( cg2d_b, myThid )
110	C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
111	C suff = 'unnormalised'
112	C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
113	C STOP
114	CcnhDebugEnds
115
116	C-- Normalise RHS
117	rhsMax = 0. _d 0
118	DO bj=myByLo(myThid),myByHi(myThid)
119	DO bi=myBxLo(myThid),myBxHi(myThid)
120	DO J=1,sNy
121	DO I=1,sNx
122	cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
123	rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
124	ENDDO
125	ENDDO
126	ENDDO
127	ENDDO
128	#ifdef LETS_MAKE_JAM
129	C _GLOBAL_MAX_R8( rhsMax, myThid )
130	rhsMax=1.
131	#else
132	_GLOBAL_MAX_R8( rhsMax, myThid )
133	Catm rhsMax=1.
134	#endif
135	rhsNorm = 1. _d 0
136	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
137	DO bj=myByLo(myThid),myByHi(myThid)
138	DO bi=myBxLo(myThid),myBxHi(myThid)
139	DO J=1,sNy
140	DO I=1,sNx
141	cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
142	cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
143	ENDDO
144	ENDDO
145	ENDDO
146	ENDDO
147
148	C-- Update overlaps
149	_EXCH_XY_R8( cg2d_b, myThid )
150	_EXCH_XY_R8( cg2d_x, myThid )
151	CcnhDebugStarts
152	C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
153	C suff = 'normalised'
154	C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
155	CcnhDebugEnds
156
157	C-- Initial residual calculation
158	err = 0. _d 0
159	sumRHS = 0. _d 0
160	DO bj=myByLo(myThid),myByHi(myThid)
161	DO bi=myBxLo(myThid),myBxHi(myThid)
162	DO J=1,sNy
163	DO I=1,sNx
164	cg2d_s(I,J,bi,bj) = 0.
165	cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
166	& (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
167	& +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
168	& +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
169	& +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
170	& -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
171	& -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
172	& -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
173	& -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj)
174	& -freeSurfFac_rA(i,j,bi,bj)recip_Bo(i,j,bi,bj)*
175	& cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm
176	& )
177	err = err +
178	& cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
179	sumRHS = sumRHS +
180	& cg2d_b(I,J,bi,bj)
181	ENDDO
182	ENDDO
183	ENDDO
184	ENDDO
185	C _EXCH_XY_R8( cg2d_r, myThid )
186	#ifdef LETS_MAKE_JAM
187	CALL EXCH_XY_O1_R8_JAM( cg2d_r )
188	#else
189	#ifdef USE_OPTIMIZED_OVERLAPS
190	OLw = 1
191	OLe = 1
192	OLn = 1
193	OLs = 1
194	#else
195	OLw = Olx
196	OLe = Olx
197	OLn = Oly
198	OLs = Oly
199	#endif
200	exchWidthX = 1
201	exchWidthY = 1
202	myNz = 1
203	CALL EXCH_RL( cg2d_r,
204	I OLw, OLe, OLs, OLn, myNz,
205	I exchWidthX, exchWidthY,
206	I FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
207	#endif
208	C _EXCH_XY_R8( cg2d_s, myThid )
209	#ifdef LETS_MAKE_JAM
210	CALL EXCH_XY_O1_R8_JAM( cg2d_s )
211	#else
212	#ifdef USE_OPTIMIZED_OVERLAPS
213	OLw = 1
214	OLe = 1
215	OLn = 1
216	OLs = 1
217	#else
218	OLw = Olx
219	OLe = Olx
220	OLn = Oly
221	OLs = Oly
222	#endif
223	exchWidthX = 1
224	exchWidthY = 1
225	myNz = 1
226	CALL EXCH_RL( cg2d_s,
227	I OLw, OLe, OLs, OLn, myNz,
228	I exchWidthX, exchWidthY,
229	I FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
230	#endif
231	_GLOBAL_SUM_R8( sumRHS, myThid )
232	C WRITE(6,*) ' mythid, err = ', mythid, SQRT(err)
233	_GLOBAL_SUM_R8( err , myThid )
234
235	_BEGIN_MASTER( myThid )
236	write(0,'(A,1PE30.14)') ' cg2d: Sum(rhs) = ',sumRHS
237	_END_MASTER( )
238
239	actualIts = 0
240	actualResidual = SQRT(err)
241	C _BARRIER
242	c _BEGIN_MASTER( myThid )
243	c WRITE(0,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
244	c & actualIts, actualResidual
245	c _END_MASTER( )
246	initialResidual=actualResidual
247
248	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
249	DO 10 it2d=1, numIters
250
251	CcnhDebugStarts
252	C WRITE(0,*) ' CG2D: Iteration ',it2d-1,' residual = ',
253	C & actualResidual
254	CcnhDebugEnds
255	IF ( err .LT. tolerance ) GOTO 11
256	C-- Solve preconditioning equation and update
257	C-- conjugate direction vector "s".
258	eta_qrN = 0. _d 0
259	DO bj=myByLo(myThid),myByHi(myThid)
260	DO bi=myBxLo(myThid),myBxHi(myThid)
261	DO J=1,sNy
262	DO I=1,sNx
263	cg2d_q(I,J,bi,bj) =
264	& pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
265	& +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
266	& +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
267	& +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
268	& +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
269	CcnhDebugStarts
270	C cg2d_q(I,J,bi,bj) = cg2d_r(I ,J ,bi,bj)
271	CcnhDebugEnds
272	eta_qrN = eta_qrN
273	& +cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
274	ENDDO
275	ENDDO
276	ENDDO
277	ENDDO
278
279	_GLOBAL_SUM_R8(eta_qrN, myThid)
280	CcnhDebugStarts
281	C WRITE(0,*) ' CG2D: Iteration ',it2d-1,' eta_qrN = ',eta_qrN
282	CcnhDebugEnds
283	cgBeta = eta_qrN/eta_qrNM1
284	CcnhDebugStarts
285	C WRITE(0,*) ' CG2D: Iteration ',it2d-1,' beta = ',cgBeta
286	CcnhDebugEnds
287	eta_qrNM1 = eta_qrN
288
289	DO bj=myByLo(myThid),myByHi(myThid)
290	DO bi=myBxLo(myThid),myBxHi(myThid)
291	DO J=1,sNy
292	DO I=1,sNx
293	cg2d_s(I,J,bi,bj) = cg2d_q(I,J,bi,bj)
294	& + cgBeta*cg2d_s(I,J,bi,bj)
295	ENDDO
296	ENDDO
297	ENDDO
298	ENDDO
299
300	C-- Do exchanges that require messages i.e. between
301	C-- processes.
302	C _EXCH_XY_R8( cg2d_s, myThid )
303	#ifdef LETS_MAKE_JAM
304	CALL EXCH_XY_O1_R8_JAM( cg2d_s )
305	#else
306	#ifdef USE_OPTIMIZED_OVERLAPS
307	OLw = 1
308	OLe = 1
309	OLn = 1
310	OLs = 1
311	#else
312	OLw = Olx
313	OLe = Olx
314	OLn = Oly
315	OLs = Oly
316	#endif
317	exchWidthX = 1
318	exchWidthY = 1
319	myNz = 1
320	CALL EXCH_RL( cg2d_s,
321	I OLw, OLe, OLs, OLn, myNz,
322	I exchWidthX, exchWidthY,
323	I FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
324	#endif
325
326	C== Evaluate laplace operator on conjugate gradient vector
327	C== q = A.s
328	alpha = 0. _d 0
329	DO bj=myByLo(myThid),myByHi(myThid)
330	DO bi=myBxLo(myThid),myBxHi(myThid)
331	DO J=1,sNy
332	DO I=1,sNx
333	cg2d_q(I,J,bi,bj) =
334	& aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj)
335	& +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj)
336	& +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj)
337	& +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj)
338	& -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
339	& -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
340	& -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
341	& -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj)
342	& -freeSurfFac_rA(i,j,bi,bj)recip_Bo(i,j,bi,bj)*
343	& cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm
344	alpha = alpha+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
345	ENDDO
346	ENDDO
347	ENDDO
348	ENDDO
349	_GLOBAL_SUM_R8(alpha,myThid)
350	CcnhDebugStarts
351	C WRITE(0,) ' CG2D: Iteration ',it2d-1,' SUM(sq)= ',alpha
352	CcnhDebugEnds
353	alpha = eta_qrN/alpha
354	CcnhDebugStarts
355	C WRITE(0,*) ' CG2D: Iteration ',it2d-1,' alpha= ',alpha
356	CcnhDebugEnds
357
358	C== Update solution and residual vectors
359	C Now compute "interior" points.
360	err = 0. _d 0
361	DO bj=myByLo(myThid),myByHi(myThid)
362	DO bi=myBxLo(myThid),myBxHi(myThid)
363	DO J=1,sNy
364	DO I=1,sNx
365	cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj)
366	cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj)
367	err = err+cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
368	ENDDO
369	ENDDO
370	ENDDO
371	ENDDO
372
373	_GLOBAL_SUM_R8( err , myThid )
374	err = SQRT(err)
375	actualIts = it2d
376	actualResidual = err
377	IF ( err .LT. cg2dTargetResidual ) GOTO 11
378	C _EXCH_XY_R8(cg2d_r, myThid )
379	#ifdef LETS_MAKE_JAM
380	CALL EXCH_XY_O1_R8_JAM( cg2d_r )
381	#else
382	#ifdef USE_OPTIMIZED_OVERLAPS
383	OLw = 1
384	OLe = 1
385	OLn = 1
386	OLs = 1
387	#else
388	OLw = Olx
389	OLe = Olx
390	OLn = Oly
391	OLs = Oly
392	#endif
393	exchWidthX = 1
394	exchWidthY = 1
395	myNz = 1
396	CALL EXCH_RL( cg2d_r,
397	I OLw, OLe, OLs, OLn, myNz,
398	I exchWidthX, exchWidthY,
399	I FORWARD_SIMULATION, EXCH_IGNORE_CORNERS, myThid )
400	#endif
401
402	10 CONTINUE
403	11 CONTINUE
404
405	C-- Un-normalise the answer
406	DO bj=myByLo(myThid),myByHi(myThid)
407	DO bi=myBxLo(myThid),myBxHi(myThid)
408	DO J=1,sNy
409	DO I=1,sNx
410	cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm
411	ENDDO
412	ENDDO
413	ENDDO
414	ENDDO
415
416	C The following exchange was moved up to solve_for_pressure
417	C for compatibility with TAMC.
418	C _EXCH_XY_R8(cg2d_x, myThid )
419	c _BEGIN_MASTER( myThid )
420	c WRITE(0,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
421	c & actualIts, actualResidual
422	c _END_MASTER( )
423
424	C-- Return parameters to caller
425	tolerance=initialResidual
426	residual=actualResidual
427	numIters=actualIts
428
429	CcnhDebugStarts
430	C CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
431	C err = 0. _d 0
432	C DO bj=myByLo(myThid),myByHi(myThid)
433	C DO bi=myBxLo(myThid),myBxHi(myThid)
434	C DO J=1,sNy
435	C DO I=1,sNx
436	C cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
437	C & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
438	C & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
439	C & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
440	C & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
441	C & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
442	C & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
443	C & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
444	C & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj))
445	C err = err +
446	C & cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
447	C ENDDO
448	C ENDDO
449	C ENDDO
450	C ENDDO
451	C _GLOBAL_SUM_R8( err , myThid )
452	C write(0,*) 'cg2d: Ax - b = ',SQRT(err)
453	CcnhDebugEnds
454
455	RETURN
456	END