model/src/cg2d_sr.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.3 2010/03/16 00:08:27 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef TARGET_NEC_SX
C     set a sensible default for the outer loop unrolling parameter that can
C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
#ifndef CG2D_OUTERLOOPITERS
# define CG2D_OUTERLOOPITERS 10
#endif
#endif /* TARGET_NEC_SX */

CBOP
C     !ROUTINE: CG2D_SR
C     !INTERFACE:
      SUBROUTINE CG2D_SR(
     I                cg2d_b,
     U                cg2d_x,
     O                firstResidual,
     O                lastResidual,
     U                numIters,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal
C     | matrix of the form that arises in the discrete
C     | representation of the del^2 operator in a
C     | two-dimensional space.
C     | Notes:
C     | ======
C     | This implementation can support shared-memory
C     | multi-threaded execution. In order to do this COMMON
C     | blocks are used for many of the arrays - even ones that
C     | are only used for intermedaite results. This design is
C     | OK if you want to all the threads to collaborate on
C     | solving the same problem. On the other hand if you want
C     | the threads to solve several different problems
C     | concurrently this implementation will not work.
C     |
C     | This version implements the single-reduction CG algorithm of
C     | d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
C     | C. Wolfe, November 2009, clwolfe@ucsd.edu
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"
c#include "GRID.h"
c#include "SURFACE.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     myThid    :: Thread on which I am working.
C     cg2d_b    :: The source term or "right hand side"
C     cg2d_x    :: The solution
C     firstResidual :: the initial residual before any iterations
C     lastResidual  :: the actual residual reached
C     numIters  :: Entry: the maximum number of iterations allowed
C                  Exit:  the actual number of iterations used
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  lastResidual
      INTEGER numIters
      INTEGER myThid

#ifdef ALLOW_SRCG

C     !LOCAL VARIABLES:
C     === Local variables ====
C     actualIts      :: Number of iterations taken
C     actualResidual :: residual
C     bi, bj     :: Block index in X and Y.
C     eta_qrN    :: Used in computing search directions
C     eta_qrNM1     suffix N and NM1 denote current and
C     cgBeta        previous iterations respectively.
C     alpha
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a
C                   useful debuggin/trouble shooting diagnostic.
C                   For neumann problems sumRHS needs to be ~0.
C                   or they converge at a non-zero residual.
C     err        :: Measure of residual of Ax - b, usually the norm.
C     I, J, it2d :: Loop counters ( it2d counts CG iterations )
      INTEGER actualIts
      _RL    actualResidual
      INTEGER bi, bj
      INTEGER I, J, it2d
      _RL    err,    errTile(nSx,nSy)
      _RL    eta_qrN,eta_qrNtile(nSx,nSy)
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha,  alphaTile(nSx,nSy)
      _RL    sigma,  sigmaTile(nSx,nSy)
      _RL    delta,  deltaTile(nSx,nSy)
      _RL    sumRHS, sumRHStile(nSx,nSy)
      _RL    rhsMax
      _RL    rhsNorm
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP


C--   Initialise inverter
      eta_qrNM1 = 1. _d 0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      IF (cg2dNormaliseRHS) THEN
C-  Normalise RHS :
      _GLOBAL_MAX_RL( rhsMax, myThid )
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1,sNy
         DO I=1,sNx
          cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
          cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C- end Normalise RHS
      ENDIF

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
      err    = 0. _d 0
      sumRHS = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO J=1-1,sNy+1
         DO I=1-1,sNx+1
          cg2d_s(I,J,bi,bj) = 0.
         ENDDO
        ENDDO
        sumRHStile(bi,bj) = 0. _d 0
        errTile(bi,bj)    = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO J=1,sNy
         DO I=1,sNx
          cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
     &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
     &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
     &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
     &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
     &    +aC2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
     &    )
          errTile(bi,bj)    = errTile(bi,bj)
     &                      + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL EXCH_S3D_RL( cg2d_r, 1,  myThid )

      CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
      CALL GLOBAL_SUM_TILE_RL( errTile,    err,    myThid )

      err = SQRT(err)
      actualIts      = 0
      actualResidual = err

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF
      firstResidual=actualResidual

      IF ( err .LT. cg2dTolerance ) GOTO 11

C DER (1999) do one iteration outside of the loop to start things up.
C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
      eta_qrN = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        eta_qrNtile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO J=1,sNy
         DO I=1,sNx
           cg2d_y(I,J,bi,bj) =
     &       pC(I  ,J  ,bi,bj)*cg2d_r(I  ,J  ,bi,bj)
     &      +pW(I  ,J  ,bi,bj)*cg2d_r(I-1,J  ,bi,bj)
     &      +pW(I+1,J  ,bi,bj)*cg2d_r(I+1,J  ,bi,bj)
     &      +pS(I  ,J  ,bi,bj)*cg2d_r(I  ,J-1,bi,bj)
     &      +pS(I  ,J+1,bi,bj)*cg2d_r(I  ,J+1,bi,bj)
           cg2d_s(I,J,bi,bj)  = cg2d_y(I,J,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &      +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
      CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )

      eta_qrNM1 = eta_qrN

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
      alpha = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        alphaTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO J=1,sNy
         DO I=1,sNx
          cg2d_q(I,J,bi,bj) =
     &      aW2d(I  ,J  ,bi,bj)*cg2d_s(I-1,J  ,bi,bj)
     &     +aW2d(I+1,J  ,bi,bj)*cg2d_s(I+1,J  ,bi,bj)
     &     +aS2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J-1,bi,bj)
     &     +aS2d(I  ,J+1,bi,bj)*cg2d_s(I  ,J+1,bi,bj)
     &     +aC2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
          alphaTile(bi,bj) = alphaTile(bi,bj)
     &                     + cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )

      sigma = eta_qrN/alpha

C==   Update solution and residual vectors
C     Now compute "interior" points.
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        errTile(bi,bj) = 0. _d 0
        DO J=1,sNy
         DO I=1,sNx
          cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+sigma*cg2d_s(I,J,bi,bj)
          cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-sigma*cg2d_q(I,J,bi,bj)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      CALL EXCH_S3D_RL( cg2d_r,1, myThid )

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it2d=1, numIters

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
C--    z = M^-1 r
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO J=1,sNy
          DO I=1,sNx
           cg2d_y(I,J,bi,bj) =
     &       pC(I  ,J  ,bi,bj)*cg2d_r(I  ,J  ,bi,bj)
     &      +pW(I  ,J  ,bi,bj)*cg2d_r(I-1,J  ,bi,bj)
     &      +pW(I+1,J  ,bi,bj)*cg2d_r(I+1,J  ,bi,bj)
     &      +pS(I  ,J  ,bi,bj)*cg2d_r(I  ,J-1,bi,bj)
     &      +pS(I  ,J+1,bi,bj)*cg2d_r(I  ,J+1,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       CALL EXCH_S3D_RL( cg2d_y, 1, myThid )

C==    v = A.z
C--    eta_qr = <z,r>
C--    delta = <z,v>
C--    Do the error calcuation here to consolidate global reductions
       eta_qrN = 0. _d 0
       delta   = 0. _d 0
       err = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile(bi,bj) = 0. _d 0
         deltaTile(bi,bj)   = 0. _d 0
         errTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO J=1,sNy
          DO I=1,sNx
           cg2d_v(I,J,bi,bj) =
     &       aW2d(I  ,J  ,bi,bj)*cg2d_y(I-1,J  ,bi,bj)
     &      +aW2d(I+1,J  ,bi,bj)*cg2d_y(I+1,J  ,bi,bj)
     &      +aS2d(I  ,J  ,bi,bj)*cg2d_y(I  ,J-1,bi,bj)
     &      +aS2d(I  ,J+1,bi,bj)*cg2d_y(I  ,J+1,bi,bj)
     &      +aC2d(I  ,J  ,bi,bj)*cg2d_y(I  ,J  ,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &      +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
           deltaTile(bi,bj) = deltaTile(bi,bj)
     &      +cg2d_y(I,J,bi,bj)*cg2d_v(I,J,bi,bj)
           errTile(bi,bj) = errTile(bi,bj)
     &                    + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C      CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
C      CALL GLOBAL_SUM_TILE_RL( deltaTile,delta,myThid )
C      CALL GLOBAL_SUM_TILE_RL( errTile,    err,    myThid )
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
         sumPhi(2,bi,bj) = deltaTile(bi,bj)
         sumPhi(3,bi,bj) = errTile(bi,bj)
        ENDDO
       ENDDO

C     global_vec_sum_r8 does not call BAR2 on input
       CALL BAR2( myThid)
       CALL GLOBAL_VEC_SUM_R8(3,3,sumPhi,myThid)

       eta_qrN = sumPhi(1,1,1)
       delta   = sumPhi(2,1,1)
       err     = sumPhi(3,1,1)

       err = SQRT(err)
       actualIts      = it2d
       actualResidual = err
       IF ( printResidual ) THEN
        IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
         WRITE(msgBuf,'(A,I6,A,1PE21.14)')
     &    ' cg2d: iter=', actualIts, ' ; resid.= ', actualResidual
         CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                       SQUEEZE_RIGHT, myThid )
        ENDIF
       ENDIF
       IF ( err .LT. cg2dTolerance ) GOTO 11

       cgBeta   = eta_qrN/eta_qrNM1
       eta_qrNM1 = eta_qrN
       alpha = delta - cgBeta**2*alpha
       sigma = eta_qrN/alpha

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO J=1,sNy
          DO I=1,sNx
           cg2d_s(I,J,bi,bj) = cg2d_y(I,J,bi,bj)
     &                       + cgBeta*cg2d_s(I,J,bi,bj)
           cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)
     &                       + sigma*cg2d_s(I,J,bi,bj)
           cg2d_q(I,J,bi,bj) = cg2d_v(I,J,bi,bj)
     &                       + cgBeta*cg2d_q(I,J,bi,bj)
           cg2d_r(I,J,bi,bj) = cg2d_r(I,J,bi,bj)
     &                       - sigma*cg2d_q(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

       CALL EXCH_S3D_RL( cg2d_r, 1, myThid )

   10 CONTINUE
   11 CONTINUE

      IF (cg2dNormaliseRHS) THEN
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO J=1,sNy
           DO I=1,sNx
            cg2d_x(I  ,J  ,bi,bj) = cg2d_x(I  ,J  ,bi,bj)/rhsNorm
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

C--   Return parameters to caller
      lastResidual=actualResidual
      numIters=actualIts

C     The following exchange was moved up to solve_for_pressure
C     for compatibility with TAMC.
C     _EXCH_XY_R8(cg2d_x, myThid )
c     _BEGIN_MASTER( myThid )
c      WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
c    & actualIts, actualResidual
c     _END_MASTER( myThid )

#endif /* ALLOW_SRCG */
      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.3 2010/03/16 00:08:27 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5	#ifdef TARGET_NEC_SX
6	C set a sensible default for the outer loop unrolling parameter that can
7	C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8	#ifndef CG2D_OUTERLOOPITERS
9	# define CG2D_OUTERLOOPITERS 10
10	#endif
11	#endif /* TARGET_NEC_SX */
12
13	CBOP
14	C !ROUTINE: CG2D_SR
15	C !INTERFACE:
16	SUBROUTINE CG2D_SR(
17	I cg2d_b,
18	U cg2d_x,
19	O firstResidual,
20	O lastResidual,
21	U numIters,
22	I myThid )
23	C !DESCRIPTION: \bv
24	C ==========================================================
25	C \| SUBROUTINE CG2D
26	C \| o Two-dimensional grid problem conjugate-gradient
27	C \| inverter (with preconditioner).
28	C ==========================================================
29	C \| Con. grad is an iterative procedure for solving Ax = b.
30	C \| It requires the A be symmetric.
31	C \| This implementation assumes A is a five-diagonal
32	C \| matrix of the form that arises in the discrete
33	C \| representation of the del^2 operator in a
34	C \| two-dimensional space.
35	C \| Notes:
36	C \| ======
37	C \| This implementation can support shared-memory
38	C \| multi-threaded execution. In order to do this COMMON
39	C \| blocks are used for many of the arrays - even ones that
40	C \| are only used for intermedaite results. This design is
41	C \| OK if you want to all the threads to collaborate on
42	C \| solving the same problem. On the other hand if you want
43	C \| the threads to solve several different problems
44	C \| concurrently this implementation will not work.
45	C \|
46	C \| This version implements the single-reduction CG algorithm of
47	C \| d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
48	C \| C. Wolfe, November 2009, clwolfe@ucsd.edu
49	C ==========================================================
50	C \ev
51
52	C !USES:
53	IMPLICIT NONE
54	C === Global data ===
55	#include "SIZE.h"
56	#include "EEPARAMS.h"
57	#include "PARAMS.h"
58	#include "CG2D.h"
59	c#include "GRID.h"
60	c#include "SURFACE.h"
61
62	C !INPUT/OUTPUT PARAMETERS:
63	C === Routine arguments ===
64	C myThid :: Thread on which I am working.
65	C cg2d_b :: The source term or "right hand side"
66	C cg2d_x :: The solution
67	C firstResidual :: the initial residual before any iterations
68	C lastResidual :: the actual residual reached
69	C numIters :: Entry: the maximum number of iterations allowed
70	C Exit: the actual number of iterations used
71	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
72	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
73	_RL firstResidual
74	_RL lastResidual
75	INTEGER numIters
76	INTEGER myThid
77
78	#ifdef ALLOW_SRCG
79
80	C !LOCAL VARIABLES:
81	C === Local variables ====
82	C actualIts :: Number of iterations taken
83	C actualResidual :: residual
84	C bi, bj :: Block index in X and Y.
85	C eta_qrN :: Used in computing search directions
86	C eta_qrNM1 suffix N and NM1 denote current and
87	C cgBeta previous iterations respectively.
88	C alpha
89	C sumRHS :: Sum of right-hand-side. Sometimes this is a
90	C useful debuggin/trouble shooting diagnostic.
91	C For neumann problems sumRHS needs to be ~0.
92	C or they converge at a non-zero residual.
93	C err :: Measure of residual of Ax - b, usually the norm.
94	C I, J, it2d :: Loop counters ( it2d counts CG iterations )
95	INTEGER actualIts
96	_RL actualResidual
97	INTEGER bi, bj
98	INTEGER I, J, it2d
99	_RL err, errTile(nSx,nSy)
100	_RL eta_qrN,eta_qrNtile(nSx,nSy)
101	_RL eta_qrNM1
102	_RL cgBeta
103	_RL alpha, alphaTile(nSx,nSy)
104	_RL sigma, sigmaTile(nSx,nSy)
105	_RL delta, deltaTile(nSx,nSy)
106	_RL sumRHS, sumRHStile(nSx,nSy)
107	_RL rhsMax
108	_RL rhsNorm
109	CHARACTER*(MAX_LEN_MBUF) msgBuf
110	LOGICAL printResidual
111	CEOP
112
113
114	C-- Initialise inverter
115	eta_qrNM1 = 1. _d 0
116
117	C-- Normalise RHS
118	rhsMax = 0. _d 0
119	DO bj=myByLo(myThid),myByHi(myThid)
120	DO bi=myBxLo(myThid),myBxHi(myThid)
121	DO J=1,sNy
122	DO I=1,sNx
123	cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
124	rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
125	ENDDO
126	ENDDO
127	ENDDO
128	ENDDO
129
130	IF (cg2dNormaliseRHS) THEN
131	C- Normalise RHS :
132	_GLOBAL_MAX_RL( rhsMax, myThid )
133	rhsNorm = 1. _d 0
134	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
135	DO bj=myByLo(myThid),myByHi(myThid)
136	DO bi=myBxLo(myThid),myBxHi(myThid)
137	DO J=1,sNy
138	DO I=1,sNx
139	cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
140	cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
141	ENDDO
142	ENDDO
143	ENDDO
144	ENDDO
145	C- end Normalise RHS
146	ENDIF
147
148	C-- Update overlaps
149	CALL EXCH_XY_RL( cg2d_x, myThid )
150
151	C-- Initial residual calculation
152	err = 0. _d 0
153	sumRHS = 0. _d 0
154	DO bj=myByLo(myThid),myByHi(myThid)
155	DO bi=myBxLo(myThid),myBxHi(myThid)
156	DO J=1-1,sNy+1
157	DO I=1-1,sNx+1
158	cg2d_s(I,J,bi,bj) = 0.
159	ENDDO
160	ENDDO
161	sumRHStile(bi,bj) = 0. _d 0
162	errTile(bi,bj) = 0. _d 0
163	#ifdef TARGET_NEC_SX
164	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
165	#endif /* TARGET_NEC_SX */
166	DO J=1,sNy
167	DO I=1,sNx
168	cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
169	& (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
170	& +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
171	& +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
172	& +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
173	& +aC2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
174	& )
175	errTile(bi,bj) = errTile(bi,bj)
176	& + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
177	sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(I,J,bi,bj)
178	ENDDO
179	ENDDO
180	ENDDO
181	ENDDO
182
183	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
184
185	CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
186	CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
187
188	err = SQRT(err)
189	actualIts = 0
190	actualResidual = err
191
192	printResidual = .FALSE.
193	IF ( debugLevel .GE. debLevZero ) THEN
194	_BEGIN_MASTER( myThid )
195	printResidual = printResidualFreq.GE.1
196	WRITE(standardmessageunit,'(A,1P2E22.14)')
197	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
198	_END_MASTER( myThid )
199	ENDIF
200	firstResidual=actualResidual
201
202	IF ( err .LT. cg2dTolerance ) GOTO 11
203
204	C DER (1999) do one iteration outside of the loop to start things up.
205	C-- Solve preconditioning equation and update
206	C-- conjugate direction vector "s".
207	eta_qrN = 0. _d 0
208	DO bj=myByLo(myThid),myByHi(myThid)
209	DO bi=myBxLo(myThid),myBxHi(myThid)
210	eta_qrNtile(bi,bj) = 0. _d 0
211	#ifdef TARGET_NEC_SX
212	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
213	#endif /* TARGET_NEC_SX */
214	DO J=1,sNy
215	DO I=1,sNx
216	cg2d_y(I,J,bi,bj) =
217	& pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
218	& +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
219	& +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
220	& +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
221	& +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
222	cg2d_s(I,J,bi,bj) = cg2d_y(I,J,bi,bj)
223	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
224	& +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
225	ENDDO
226	ENDDO
227	ENDDO
228	ENDDO
229
230	CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
231	CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
232
233	eta_qrNM1 = eta_qrN
234
235	C== Evaluate laplace operator on conjugate gradient vector
236	C== q = A.s
237	alpha = 0. _d 0
238	DO bj=myByLo(myThid),myByHi(myThid)
239	DO bi=myBxLo(myThid),myBxHi(myThid)
240	alphaTile(bi,bj) = 0. _d 0
241	#ifdef TARGET_NEC_SX
242	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
243	#endif /* TARGET_NEC_SX */
244	DO J=1,sNy
245	DO I=1,sNx
246	cg2d_q(I,J,bi,bj) =
247	& aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj)
248	& +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj)
249	& +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj)
250	& +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj)
251	& +aC2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
252	alphaTile(bi,bj) = alphaTile(bi,bj)
253	& + cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
254	ENDDO
255	ENDDO
256	ENDDO
257	ENDDO
258
259	CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
260
261	sigma = eta_qrN/alpha
262
263	C== Update solution and residual vectors
264	C Now compute "interior" points.
265	DO bj=myByLo(myThid),myByHi(myThid)
266	DO bi=myBxLo(myThid),myBxHi(myThid)
267	errTile(bi,bj) = 0. _d 0
268	DO J=1,sNy
269	DO I=1,sNx
270	cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+sigma*cg2d_s(I,J,bi,bj)
271	cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-sigma*cg2d_q(I,J,bi,bj)
272	ENDDO
273	ENDDO
274	ENDDO
275	ENDDO
276
277	CALL EXCH_S3D_RL( cg2d_r,1, myThid )
278
279	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
280	DO 10 it2d=1, numIters
281
282	C-- Solve preconditioning equation and update
283	C-- conjugate direction vector "s".
284	C-- z = M^-1 r
285	DO bj=myByLo(myThid),myByHi(myThid)
286	DO bi=myBxLo(myThid),myBxHi(myThid)
287	#ifdef TARGET_NEC_SX
288	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
289	#endif /* TARGET_NEC_SX */
290	DO J=1,sNy
291	DO I=1,sNx
292	cg2d_y(I,J,bi,bj) =
293	& pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
294	& +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
295	& +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
296	& +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
297	& +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
298	ENDDO
299	ENDDO
300	ENDDO
301	ENDDO
302
303	CALL EXCH_S3D_RL( cg2d_y, 1, myThid )
304
305	C== v = A.z
306	C-- eta_qr = <z,r>
307	C-- delta = <z,v>
308	C-- Do the error calcuation here to consolidate global reductions
309	eta_qrN = 0. _d 0
310	delta = 0. _d 0
311	err = 0. _d 0
312	DO bj=myByLo(myThid),myByHi(myThid)
313	DO bi=myBxLo(myThid),myBxHi(myThid)
314	eta_qrNtile(bi,bj) = 0. _d 0
315	deltaTile(bi,bj) = 0. _d 0
316	errTile(bi,bj) = 0. _d 0
317	#ifdef TARGET_NEC_SX
318	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
319	#endif /* TARGET_NEC_SX */
320	DO J=1,sNy
321	DO I=1,sNx
322	cg2d_v(I,J,bi,bj) =
323	& aW2d(I ,J ,bi,bj)*cg2d_y(I-1,J ,bi,bj)
324	& +aW2d(I+1,J ,bi,bj)*cg2d_y(I+1,J ,bi,bj)
325	& +aS2d(I ,J ,bi,bj)*cg2d_y(I ,J-1,bi,bj)
326	& +aS2d(I ,J+1,bi,bj)*cg2d_y(I ,J+1,bi,bj)
327	& +aC2d(I ,J ,bi,bj)*cg2d_y(I ,J ,bi,bj)
328	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
329	& +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
330	deltaTile(bi,bj) = deltaTile(bi,bj)
331	& +cg2d_y(I,J,bi,bj)*cg2d_v(I,J,bi,bj)
332	errTile(bi,bj) = errTile(bi,bj)
333	& + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
334	ENDDO
335	ENDDO
336	ENDDO
337	ENDDO
338
339	C CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
340	C CALL GLOBAL_SUM_TILE_RL( deltaTile,delta,myThid )
341	C CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
342	DO bj=myByLo(myThid),myByHi(myThid)
343	DO bi=myBxLo(myThid),myBxHi(myThid)
344	sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
345	sumPhi(2,bi,bj) = deltaTile(bi,bj)
346	sumPhi(3,bi,bj) = errTile(bi,bj)
347	ENDDO
348	ENDDO
349
350	C global_vec_sum_r8 does not call BAR2 on input
351	CALL BAR2( myThid)
352	CALL GLOBAL_VEC_SUM_R8(3,3,sumPhi,myThid)
353
354	eta_qrN = sumPhi(1,1,1)
355	delta = sumPhi(2,1,1)
356	err = sumPhi(3,1,1)
357
358	err = SQRT(err)
359	actualIts = it2d
360	actualResidual = err
361	IF ( printResidual ) THEN
362	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
363	WRITE(msgBuf,'(A,I6,A,1PE21.14)')
364	& ' cg2d: iter=', actualIts, ' ; resid.= ', actualResidual
365	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
366	& SQUEEZE_RIGHT, myThid )
367	ENDIF
368	ENDIF
369	IF ( err .LT. cg2dTolerance ) GOTO 11
370
371	cgBeta = eta_qrN/eta_qrNM1
372	eta_qrNM1 = eta_qrN
373	alpha = delta - cgBeta*2alpha
374	sigma = eta_qrN/alpha
375
376	DO bj=myByLo(myThid),myByHi(myThid)
377	DO bi=myBxLo(myThid),myBxHi(myThid)
378	DO J=1,sNy
379	DO I=1,sNx
380	cg2d_s(I,J,bi,bj) = cg2d_y(I,J,bi,bj)
381	& + cgBeta*cg2d_s(I,J,bi,bj)
382	cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)
383	& + sigma*cg2d_s(I,J,bi,bj)
384	cg2d_q(I,J,bi,bj) = cg2d_v(I,J,bi,bj)
385	& + cgBeta*cg2d_q(I,J,bi,bj)
386	cg2d_r(I,J,bi,bj) = cg2d_r(I,J,bi,bj)
387	& - sigma*cg2d_q(I,J,bi,bj)
388	ENDDO
389	ENDDO
390	ENDDO
391	ENDDO
392
393	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
394
395	10 CONTINUE
396	11 CONTINUE
397
398	IF (cg2dNormaliseRHS) THEN
399	C-- Un-normalise the answer
400	DO bj=myByLo(myThid),myByHi(myThid)
401	DO bi=myBxLo(myThid),myBxHi(myThid)
402	DO J=1,sNy
403	DO I=1,sNx
404	cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm
405	ENDDO
406	ENDDO
407	ENDDO
408	ENDDO
409	ENDIF
410
411	C-- Return parameters to caller
412	lastResidual=actualResidual
413	numIters=actualIts
414
415	C The following exchange was moved up to solve_for_pressure
416	C for compatibility with TAMC.
417	C _EXCH_XY_R8(cg2d_x, myThid )
418	c _BEGIN_MASTER( myThid )
419	c WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
420	c & actualIts, actualResidual
421	c _END_MASTER( myThid )
422
423	#endif /* ALLOW_SRCG */
424	RETURN
425	END