model/src/cg2d_ex0.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.55 2012/05/11 23:28:10 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef TARGET_NEC_SX
C     set a sensible default for the outer loop unrolling parameter that can
C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
#ifndef CG2D_OUTERLOOPITERS
# define CG2D_OUTERLOOPITERS 10
#endif
#endif /* TARGET_NEC_SX */

CBOP
C     !ROUTINE: CG2D_EX0
C     !INTERFACE:
      SUBROUTINE CG2D_EX0(
     U                cg2d_b, cg2d_x,
     O                firstResidual, minResidualSq, lastResidual,
     U                numIters, nIterMin,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D_EX0
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     | This is the disconnected-tile version (each tile treated
C     |   independently as a small domain, with locally periodic
C     |   BC at the edges.
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal matrix
C     | of the form that arises in the discrete representation of
C     | the del^2 operator in a two-dimensional space.
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     cg2d_b    :: The source term or "right hand side" (output: normalised RHS)
C     cg2d_x    :: The solution (input: first guess)
C     firstResidual :: the initial residual before any iterations
C     minResidualSq :: the lowest residual reached (squared)
C     lastResidual  :: the actual residual reached
C     numIters  :: Inp: the maximum number of iterations allowed
C                  Out: the actual number of iterations used
C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
C                  Out: iteration number corresponding to lowest residual
C     myThid    :: Thread on which I am working.
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  minResidualSq
      _RL  lastResidual
      INTEGER numIters
      INTEGER nIterMin
      INTEGER myThid

C     !LOCAL VARIABLES:
C     === Local variables ====
C     bi, bj     :: tile index in X and Y.
C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
C     actualIts  :: actual CG iteration number
C     err_sq     :: Measure of the square of the residual of Ax - b.
C     eta_qrN    :: Used in computing search directions; suffix N and NM1
C     eta_qrNM1     denote current and previous iterations respectively.
C     cgBeta     :: coeff used to update conjugate direction vector "s".
C     alpha      :: coeff used to update solution & residual
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
C                   debugging/trouble shooting diagnostic. For neumann problems
C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
C     cg2d_min   :: used to store solution corresponding to lowest residual.
C     msgBuf     :: Informational/error message buffer
      INTEGER bi, bj
      INTEGER i, j, it2d
      INTEGER actualIts(nSx,nSy)
      INTEGER minResIter(nSx,nSy)
      _RL    cg2dTolerance_sq
      _RL    err_sq,  errTile(nSx,nSy)
      _RL    eta_qrNtile(nSx,nSy)
      _RL    eta_qrNM1(nSx,nSy)
      _RL    cgBeta
      _RL    alpha,   alphaTile(nSx,nSy)
      _RL    sumRHS,  sumRHStile(nSx,nSy)
      _RL    rhsMax, rhsMaxLoc
      _RL    rhsNorm(nSx,nSy)
      _RL    minResTile(nSx,nSy)
      _RL    cg2d_min(1:sNx,1:sNy,nSx,nSy)
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP

C--   Initialise auxiliary constant, some output variable
      cg2dTolerance_sq = cg2dTolerance*cg2dTolerance

C--   Initialise inverter and Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)

        actualIts(bi,bj)  = 0
        minResIter(bi,bj) = 0
        minResTile(bi,bj) = -1. _d 0
        eta_qrNM1(bi,bj) =  1. _d 0
        rhsMaxLoc = 0. _d 0
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
          rhsMaxLoc = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMaxLoc)
         ENDDO
        ENDDO
        rhsNorm(bi,bj) = 1. _d 0
        IF ( rhsMaxLoc .NE. 0. ) rhsNorm(bi,bj) = 1. _d 0 / rhsMaxLoc
        IF (cg2dNormaliseRHS) THEN
         DO j=1,sNy
          DO i=1,sNx
           cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm(bi,bj)
           cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm(bi,bj)
          ENDDO
         ENDDO
        ENDIF
        rhsMax = MAX( rhsMaxLoc, rhsMax )

       ENDDO
      ENDDO
      _GLOBAL_MAX_RL( rhsMax, myThid )

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
      err_sq = 0.
      sumRHS = 0.
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        IF ( nIterMin.GE.0 ) THEN
         DO j=1,sNy
          DO i=1,sNx
            cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDIF
        DO j=0,sNy+1
         DO i=0,sNx+1
          cg2d_s(i,j,bi,bj) = 0.
         ENDDO
        ENDDO
        sumRHStile(bi,bj) = 0. _d 0
        errTile(bi,bj)    = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO j=1,sNy
         DO i=1,sNx
          cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
     &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
     &    )
          errTile(bi,bj)    = errTile(bi,bj)
     &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
         ENDDO
        ENDDO
        IF ( nIterMin.GE.0 ) THEN
          minResTile(bi,bj) = errTile(bi,bj)
        ENDIF
        err_sq = MAX( errTile(bi,bj), err_sq )
        sumRHS = MAX( ABS(sumRHStile(bi,bj)), sumRHS )

       ENDDO
      ENDDO
      CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
      _GLOBAL_MAX_RL( err_sq, myThid )
      _GLOBAL_MAX_RL( sumRHS, myThid )
      firstResidual = SQRT(err_sq)

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

c     IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO it2d=1, numIters
      IF ( err_sq.GE.cg2dTolerance_sq ) THEN
       err_sq = 0. _d 0

       DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        IF ( errTile(bi,bj).GE.cg2dTolerance_sq ) THEN

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
         eta_qrNtile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &     +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO

         cgBeta   = eta_qrNtile(bi,bj)/eta_qrNM1(bi,bj)
         eta_qrNM1(bi,bj) = eta_qrNtile(bi,bj)

         DO j=1,sNy
          DO i=1,sNx
           cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
     &                       + cgBeta*cg2d_s(i,j,bi,bj)
          ENDDO
         ENDDO

C--    Do exchanges that require messages i.e. between processes.
         CALL FILL_HALO_LOCAL_RL(
     U                  cg2d_s(0,0,bi,bj),
     I                  1, 1, 1, 1, 1,
     I                  EXCH_IGNORE_CORNERS, bi, bj, myThid )

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
         alphaTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
          alphaTile(bi,bj) = alphaTile(bi,bj)
     &                     + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
          ENDDO
         ENDDO
         alpha = eta_qrNtile(bi,bj)/alphaTile(bi,bj)

C==    Update simultaneously solution and residual vectors (and Iter number)
C      Now compute "interior" points.
         errTile(bi,bj) = 0. _d 0
         DO j=1,sNy
          DO i=1,sNx
           cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
           cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
           errTile(bi,bj) = errTile(bi,bj)
     &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          ENDDO
         ENDDO
         actualIts(bi,bj) = it2d

         IF ( printResidual ) THEN
          IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
           WRITE(msgBuf,'(A,2I4,A,I6,A,1PE21.14)') ' cg2d(bi,bj=', bi,
     &      bj, '): iter=', it2d, ' ; resid.= ', SQRT(errTile(bi,bj))
           CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                         SQUEEZE_RIGHT, myThid )
          ENDIF
         ENDIF
         IF ( errTile(bi,bj) .GE. cg2dTolerance_sq .AND.
     &        errTile(bi,bj) .LT. minResTile(bi,bj) ) THEN
C-     Store lowest residual solution
           minResTile(bi,bj) = errTile(bi,bj)
           minResIter(bi,bj) = it2d
           DO j=1,sNy
            DO i=1,sNx
             cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
            ENDDO
           ENDDO
         ENDIF

         CALL FILL_HALO_LOCAL_RL(
     U                  cg2d_r(0,0,bi,bj),
     I                  1, 1, 1, 1, 1,
     I                  EXCH_IGNORE_CORNERS, bi, bj, myThid )

        ENDIF
        err_sq = MAX( errTile(bi,bj), err_sq )
C-    end bi,bj loops
       ENDDO
       ENDDO
C-    end cg-2d iteration loop
      ENDIF
      ENDDO

c  11 CONTINUE

      IF ( nIterMin.GE.0 ) THEN
C-    use the lowest residual solution (instead of current one = last residual)
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
c         minResidualSq = MAX( minResTile(bi,bj), minResidualSq )
c         nIterMin = MAX( minResIter(bi,bj), nIterMin )
         IF ( errTile(bi,bj) .GT. minResTile(bi,bj) ) THEN
          DO j=1,sNy
           DO i=1,sNx
             cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
           ENDDO
          ENDDO
         ENDIF
        ENDDO
       ENDDO
      ENDIF

      IF (cg2dNormaliseRHS) THEN
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm(bi,bj)
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

C--   Return parameters to caller
C     return largest Iter # and Max residual     in numIters and lastResidual
C     return lowest  Iter # and Min residual(^2) in nIterMin and minResidualSq
      _GLOBAL_MAX_RL( err_sq, myThid )
      nIterMin = numIters
      numIters = 0
      minResidualSq = err_sq
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
         nIterMin = MIN( actualIts(bi,bj), nIterMin )
         numIters = MAX( actualIts(bi,bj), numIters )
         minResidualSq = MIN( errTile(bi,bj), minResidualSq )
       ENDDO
      ENDDO
      lastResidual = SQRT(err_sq)
      alpha = -nIterMin
      _GLOBAL_MAX_RL( alpha, myThid )
      nIterMin = NINT( -alpha )
      alpha = numIters
      _GLOBAL_MAX_RL( alpha, myThid )
      numIters = NINT( alpha )
      alpha = -minResidualSq
      _GLOBAL_MAX_RL( alpha, myThid )
      minResidualSq = -alpha

      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.55 2012/05/11 23:28:10 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5	#ifdef TARGET_NEC_SX
6	C set a sensible default for the outer loop unrolling parameter that can
7	C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8	#ifndef CG2D_OUTERLOOPITERS
9	# define CG2D_OUTERLOOPITERS 10
10	#endif
11	#endif /* TARGET_NEC_SX */
12
13	CBOP
14	C !ROUTINE: CG2D_EX0
15	C !INTERFACE:
16	SUBROUTINE CG2D_EX0(
17	U cg2d_b, cg2d_x,
18	O firstResidual, minResidualSq, lastResidual,
19	U numIters, nIterMin,
20	I myThid )
21	C !DESCRIPTION: \bv
22	C ==========================================================
23	C \| SUBROUTINE CG2D_EX0
24	C \| o Two-dimensional grid problem conjugate-gradient
25	C \| inverter (with preconditioner).
26	C \| This is the disconnected-tile version (each tile treated
27	C \| independently as a small domain, with locally periodic
28	C \| BC at the edges.
29	C ==========================================================
30	C \| Con. grad is an iterative procedure for solving Ax = b.
31	C \| It requires the A be symmetric.
32	C \| This implementation assumes A is a five-diagonal matrix
33	C \| of the form that arises in the discrete representation of
34	C \| the del^2 operator in a two-dimensional space.
35	C ==========================================================
36	C \ev
37
38	C !USES:
39	IMPLICIT NONE
40	C === Global data ===
41	#include "SIZE.h"
42	#include "EEPARAMS.h"
43	#include "PARAMS.h"
44	#include "CG2D.h"
45
46	C !INPUT/OUTPUT PARAMETERS:
47	C === Routine arguments ===
48	C cg2d_b :: The source term or "right hand side" (output: normalised RHS)
49	C cg2d_x :: The solution (input: first guess)
50	C firstResidual :: the initial residual before any iterations
51	C minResidualSq :: the lowest residual reached (squared)
52	C lastResidual :: the actual residual reached
53	C numIters :: Inp: the maximum number of iterations allowed
54	C Out: the actual number of iterations used
55	C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
56	C Out: iteration number corresponding to lowest residual
57	C myThid :: Thread on which I am working.
58	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
59	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
60	_RL firstResidual
61	_RL minResidualSq
62	_RL lastResidual
63	INTEGER numIters
64	INTEGER nIterMin
65	INTEGER myThid
66
67	C !LOCAL VARIABLES:
68	C === Local variables ====
69	C bi, bj :: tile index in X and Y.
70	C i, j, it2d :: Loop counters ( it2d counts CG iterations )
71	C actualIts :: actual CG iteration number
72	C err_sq :: Measure of the square of the residual of Ax - b.
73	C eta_qrN :: Used in computing search directions; suffix N and NM1
74	C eta_qrNM1 denote current and previous iterations respectively.
75	C cgBeta :: coeff used to update conjugate direction vector "s".
76	C alpha :: coeff used to update solution & residual
77	C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
78	C debugging/trouble shooting diagnostic. For neumann problems
79	C sumRHS needs to be ~0 or it converge at a non-zero residual.
80	C cg2d_min :: used to store solution corresponding to lowest residual.
81	C msgBuf :: Informational/error message buffer
82	INTEGER bi, bj
83	INTEGER i, j, it2d
84	INTEGER actualIts(nSx,nSy)
85	INTEGER minResIter(nSx,nSy)
86	_RL cg2dTolerance_sq
87	_RL err_sq, errTile(nSx,nSy)
88	_RL eta_qrNtile(nSx,nSy)
89	_RL eta_qrNM1(nSx,nSy)
90	_RL cgBeta
91	_RL alpha, alphaTile(nSx,nSy)
92	_RL sumRHS, sumRHStile(nSx,nSy)
93	_RL rhsMax, rhsMaxLoc
94	_RL rhsNorm(nSx,nSy)
95	_RL minResTile(nSx,nSy)
96	_RL cg2d_min(1:sNx,1:sNy,nSx,nSy)
97	CHARACTER*(MAX_LEN_MBUF) msgBuf
98	LOGICAL printResidual
99	CEOP
100
101	C-- Initialise auxiliary constant, some output variable
102	cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
103
104	C-- Initialise inverter and Normalise RHS
105	rhsMax = 0. _d 0
106	DO bj=myByLo(myThid),myByHi(myThid)
107	DO bi=myBxLo(myThid),myBxHi(myThid)
108
109	actualIts(bi,bj) = 0
110	minResIter(bi,bj) = 0
111	minResTile(bi,bj) = -1. _d 0
112	eta_qrNM1(bi,bj) = 1. _d 0
113	rhsMaxLoc = 0. _d 0
114	DO j=1,sNy
115	DO i=1,sNx
116	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
117	rhsMaxLoc = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMaxLoc)
118	ENDDO
119	ENDDO
120	rhsNorm(bi,bj) = 1. _d 0
121	IF ( rhsMaxLoc .NE. 0. ) rhsNorm(bi,bj) = 1. _d 0 / rhsMaxLoc
122	IF (cg2dNormaliseRHS) THEN
123	DO j=1,sNy
124	DO i=1,sNx
125	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm(bi,bj)
126	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm(bi,bj)
127	ENDDO
128	ENDDO
129	ENDIF
130	rhsMax = MAX( rhsMaxLoc, rhsMax )
131
132	ENDDO
133	ENDDO
134	_GLOBAL_MAX_RL( rhsMax, myThid )
135
136	C-- Update overlaps
137	CALL EXCH_XY_RL( cg2d_x, myThid )
138
139	C-- Initial residual calculation
140	err_sq = 0.
141	sumRHS = 0.
142	DO bj=myByLo(myThid),myByHi(myThid)
143	DO bi=myBxLo(myThid),myBxHi(myThid)
144	IF ( nIterMin.GE.0 ) THEN
145	DO j=1,sNy
146	DO i=1,sNx
147	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
148	ENDDO
149	ENDDO
150	ENDIF
151	DO j=0,sNy+1
152	DO i=0,sNx+1
153	cg2d_s(i,j,bi,bj) = 0.
154	ENDDO
155	ENDDO
156	sumRHStile(bi,bj) = 0. _d 0
157	errTile(bi,bj) = 0. _d 0
158	#ifdef TARGET_NEC_SX
159	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
160	#endif /* TARGET_NEC_SX */
161	DO j=1,sNy
162	DO i=1,sNx
163	cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
164	& (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
165	& +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
166	& +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
167	& +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
168	& +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
169	& )
170	errTile(bi,bj) = errTile(bi,bj)
171	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
172	sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
173	ENDDO
174	ENDDO
175	IF ( nIterMin.GE.0 ) THEN
176	minResTile(bi,bj) = errTile(bi,bj)
177	ENDIF
178	err_sq = MAX( errTile(bi,bj), err_sq )
179	sumRHS = MAX( ABS(sumRHStile(bi,bj)), sumRHS )
180
181	ENDDO
182	ENDDO
183	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
184	_GLOBAL_MAX_RL( err_sq, myThid )
185	_GLOBAL_MAX_RL( sumRHS, myThid )
186	firstResidual = SQRT(err_sq)
187
188	printResidual = .FALSE.
189	IF ( debugLevel .GE. debLevZero ) THEN
190	_BEGIN_MASTER( myThid )
191	printResidual = printResidualFreq.GE.1
192	WRITE(standardmessageunit,'(A,1P2E22.14)')
193	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
194	_END_MASTER( myThid )
195	ENDIF
196
197	c IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
198
199	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
200	DO it2d=1, numIters
201	IF ( err_sq.GE.cg2dTolerance_sq ) THEN
202	err_sq = 0. _d 0
203
204	DO bj=myByLo(myThid),myByHi(myThid)
205	DO bi=myBxLo(myThid),myBxHi(myThid)
206	IF ( errTile(bi,bj).GE.cg2dTolerance_sq ) THEN
207
208	C-- Solve preconditioning equation and update
209	C-- conjugate direction vector "s".
210	eta_qrNtile(bi,bj) = 0. _d 0
211	#ifdef TARGET_NEC_SX
212	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
213	#endif /* TARGET_NEC_SX */
214	DO j=1,sNy
215	DO i=1,sNx
216	cg2d_q(i,j,bi,bj) =
217	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
218	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
219	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
220	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
221	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
222	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
223	& +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
224	ENDDO
225	ENDDO
226
227	cgBeta = eta_qrNtile(bi,bj)/eta_qrNM1(bi,bj)
228	eta_qrNM1(bi,bj) = eta_qrNtile(bi,bj)
229
230	DO j=1,sNy
231	DO i=1,sNx
232	cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
233	& + cgBeta*cg2d_s(i,j,bi,bj)
234	ENDDO
235	ENDDO
236
237	C-- Do exchanges that require messages i.e. between processes.
238	CALL FILL_HALO_LOCAL_RL(
239	U cg2d_s(0,0,bi,bj),
240	I 1, 1, 1, 1, 1,
241	I EXCH_IGNORE_CORNERS, bi, bj, myThid )
242
243	C== Evaluate laplace operator on conjugate gradient vector
244	C== q = A.s
245	alphaTile(bi,bj) = 0. _d 0
246	#ifdef TARGET_NEC_SX
247	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
248	#endif /* TARGET_NEC_SX */
249	DO j=1,sNy
250	DO i=1,sNx
251	cg2d_q(i,j,bi,bj) =
252	& aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
253	& +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
254	& +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
255	& +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
256	& +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
257	alphaTile(bi,bj) = alphaTile(bi,bj)
258	& + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
259	ENDDO
260	ENDDO
261	alpha = eta_qrNtile(bi,bj)/alphaTile(bi,bj)
262
263	C== Update simultaneously solution and residual vectors (and Iter number)
264	C Now compute "interior" points.
265	errTile(bi,bj) = 0. _d 0
266	DO j=1,sNy
267	DO i=1,sNx
268	cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
269	cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
270	errTile(bi,bj) = errTile(bi,bj)
271	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
272	ENDDO
273	ENDDO
274	actualIts(bi,bj) = it2d
275
276	IF ( printResidual ) THEN
277	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
278	WRITE(msgBuf,'(A,2I4,A,I6,A,1PE21.14)') ' cg2d(bi,bj=', bi,
279	& bj, '): iter=', it2d, ' ; resid.= ', SQRT(errTile(bi,bj))
280	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
281	& SQUEEZE_RIGHT, myThid )
282	ENDIF
283	ENDIF
284	IF ( errTile(bi,bj) .GE. cg2dTolerance_sq .AND.
285	& errTile(bi,bj) .LT. minResTile(bi,bj) ) THEN
286	C- Store lowest residual solution
287	minResTile(bi,bj) = errTile(bi,bj)
288	minResIter(bi,bj) = it2d
289	DO j=1,sNy
290	DO i=1,sNx
291	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
292	ENDDO
293	ENDDO
294	ENDIF
295
296	CALL FILL_HALO_LOCAL_RL(
297	U cg2d_r(0,0,bi,bj),
298	I 1, 1, 1, 1, 1,
299	I EXCH_IGNORE_CORNERS, bi, bj, myThid )
300
301	ENDIF
302	err_sq = MAX( errTile(bi,bj), err_sq )
303	C- end bi,bj loops
304	ENDDO
305	ENDDO
306	C- end cg-2d iteration loop
307	ENDIF
308	ENDDO
309
310	c 11 CONTINUE
311
312	IF ( nIterMin.GE.0 ) THEN
313	C- use the lowest residual solution (instead of current one = last residual)
314	DO bj=myByLo(myThid),myByHi(myThid)
315	DO bi=myBxLo(myThid),myBxHi(myThid)
316	c minResidualSq = MAX( minResTile(bi,bj), minResidualSq )
317	c nIterMin = MAX( minResIter(bi,bj), nIterMin )
318	IF ( errTile(bi,bj) .GT. minResTile(bi,bj) ) THEN
319	DO j=1,sNy
320	DO i=1,sNx
321	cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
322	ENDDO
323	ENDDO
324	ENDIF
325	ENDDO
326	ENDDO
327	ENDIF
328
329	IF (cg2dNormaliseRHS) THEN
330	C-- Un-normalise the answer
331	DO bj=myByLo(myThid),myByHi(myThid)
332	DO bi=myBxLo(myThid),myBxHi(myThid)
333	DO j=1,sNy
334	DO i=1,sNx
335	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm(bi,bj)
336	ENDDO
337	ENDDO
338	ENDDO
339	ENDDO
340	ENDIF
341
342	C-- Return parameters to caller
343	C return largest Iter # and Max residual in numIters and lastResidual
344	C return lowest Iter # and Min residual(^2) in nIterMin and minResidualSq
345	_GLOBAL_MAX_RL( err_sq, myThid )
346	nIterMin = numIters
347	numIters = 0
348	minResidualSq = err_sq
349	DO bj=myByLo(myThid),myByHi(myThid)
350	DO bi=myBxLo(myThid),myBxHi(myThid)
351	nIterMin = MIN( actualIts(bi,bj), nIterMin )
352	numIters = MAX( actualIts(bi,bj), numIters )
353	minResidualSq = MIN( errTile(bi,bj), minResidualSq )
354	ENDDO
355	ENDDO
356	lastResidual = SQRT(err_sq)
357	alpha = -nIterMin
358	_GLOBAL_MAX_RL( alpha, myThid )
359	nIterMin = NINT( -alpha )
360	alpha = numIters
361	_GLOBAL_MAX_RL( alpha, myThid )
362	numIters = NINT( alpha )
363	alpha = -minResidualSq
364	_GLOBAL_MAX_RL( alpha, myThid )
365	minResidualSq = -alpha
366
367	RETURN
368	END