model/src/cg2d.F

C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.54 2011/06/08 01:46:34 jmc Exp $
C $Name:  $

#include "CPP_OPTIONS.h"
#ifdef TARGET_NEC_SX
C     set a sensible default for the outer loop unrolling parameter that can
C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
#ifndef CG2D_OUTERLOOPITERS
# define CG2D_OUTERLOOPITERS 10
#endif
#endif /* TARGET_NEC_SX */

CBOP
C     !ROUTINE: CG2D
C     !INTERFACE:
      SUBROUTINE CG2D(
     U                cg2d_b, cg2d_x,
     O                firstResidual, minResidualSq, lastResidual,
     U                numIters, nIterMin,
     I                myThid )
C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE CG2D
C     | o Two-dimensional grid problem conjugate-gradient
C     |   inverter (with preconditioner).
C     *==========================================================*
C     | Con. grad is an iterative procedure for solving Ax = b.
C     | It requires the A be symmetric.
C     | This implementation assumes A is a five-diagonal
C     | matrix of the form that arises in the discrete
C     | representation of the del^2 operator in a
C     | two-dimensional space.
C     | Notes:
C     | ======
C     | This implementation can support shared-memory
C     | multi-threaded execution. In order to do this COMMON
C     | blocks are used for many of the arrays - even ones that
C     | are only used for intermedaite results. This design is
C     | OK if you want to all the threads to collaborate on
C     | solving the same problem. On the other hand if you want
C     | the threads to solve several different problems
C     | concurrently this implementation will not work.
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE
C     === Global data ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "PARAMS.h"
#include "CG2D.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     cg2d_b    :: The source term or "right hand side" (output: normalised RHS)
C     cg2d_x    :: The solution (input: first guess)
C     firstResidual :: the initial residual before any iterations
C     minResidualSq :: the lowest residual reached (squared)
C     lastResidual  :: the actual residual reached
C     numIters  :: Inp: the maximum number of iterations allowed
C                  Out: the actual number of iterations used
C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
C                  Out: iteration number corresponding to lowest residual
C     myThid    :: Thread on which I am working.
      _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL  firstResidual
      _RL  minResidualSq
      _RL  lastResidual
      INTEGER numIters
      INTEGER nIterMin
      INTEGER myThid

C     !LOCAL VARIABLES:
C     === Local variables ====
C     bi, bj     :: tile index in X and Y.
C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
C     actualIts  :: actual CG iteration number
C     err_sq     :: Measure of the square of the residual of Ax - b.
C     eta_qrN    :: Used in computing search directions; suffix N and NM1
C     eta_qrNM1     denote current and previous iterations respectively.
C     cgBeta     :: coeff used to update conjugate direction vector "s".
C     alpha      :: coeff used to update solution & residual
C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
C                   debugging/trouble shooting diagnostic. For neumann problems
C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
C     cg2d_min   :: used to store solution corresponding to lowest residual.
C     msgBuf     :: Informational/error message buffer
      INTEGER bi, bj
      INTEGER i, j, it2d
      INTEGER actualIts
      _RL    cg2dTolerance_sq
      _RL    err_sq,  errTile(nSx,nSy)
      _RL    eta_qrN, eta_qrNtile(nSx,nSy)
      _RL    eta_qrNM1
      _RL    cgBeta
      _RL    alpha,   alphaTile(nSx,nSy)
      _RL    sumRHS,  sumRHStile(nSx,nSy)
      _RL    rhsMax
      _RL    rhsNorm
      _RL    cg2d_min(1:sNx,1:sNy,nSx,nSy)
#ifdef CG2D_SINGLECPU_SUM
      _RL    localBuf(1:sNx,1:sNy,nSx,nSy)
#endif
      CHARACTER*(MAX_LEN_MBUF) msgBuf
      LOGICAL printResidual
CEOP

C--   Initialise auxiliary constant, some output variable and inverter
      cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
      minResidualSq = -1. _d 0
      eta_qrNM1     =  1. _d 0

C--   Normalise RHS
      rhsMax = 0. _d 0
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
          rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
         ENDDO
        ENDDO
       ENDDO
      ENDDO

      IF (cg2dNormaliseRHS) THEN
C-  Normalise RHS :
      _GLOBAL_MAX_RL( rhsMax, myThid )
      rhsNorm = 1. _d 0
      IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        DO j=1,sNy
         DO i=1,sNx
          cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
          cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
         ENDDO
        ENDDO
       ENDDO
      ENDDO
C- end Normalise RHS
      ENDIF

C--   Update overlaps
      CALL EXCH_XY_RL( cg2d_x, myThid )

C--   Initial residual calculation
      DO bj=myByLo(myThid),myByHi(myThid)
       DO bi=myBxLo(myThid),myBxHi(myThid)
        IF ( nIterMin.GE.0 ) THEN
         DO j=1,sNy
          DO i=1,sNx
            cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDIF
        DO j=0,sNy+1
         DO i=0,sNx+1
          cg2d_s(i,j,bi,bj) = 0.
         ENDDO
        ENDDO
        sumRHStile(bi,bj) = 0. _d 0
        errTile(bi,bj)    = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
        DO j=1,sNy
         DO i=1,sNx
          cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
     &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
     &    )
#ifdef CG2D_SINGLECPU_SUM
          localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
#else
          errTile(bi,bj)    = errTile(bi,bj)
     &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
          sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
#endif
         ENDDO
        ENDDO
       ENDDO
      ENDDO
      CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
#ifdef CG2D_SINGLECPU_SUM
      CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
      CALL GLOBAL_SUM_SINGLECPU_RL(cg2d_b, sumRHS, OLx, OLy, myThid)
#else
      CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
      CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
#endif
      actualIts = 0
      firstResidual = SQRT(err_sq)
      IF ( nIterMin.GE.0 ) THEN
        nIterMin = 0
        minResidualSq = err_sq
      ENDIF

      printResidual = .FALSE.
      IF ( debugLevel .GE. debLevZero ) THEN
        _BEGIN_MASTER( myThid )
        printResidual = printResidualFreq.GE.1
        WRITE(standardmessageunit,'(A,1P2E22.14)')
     &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
        _END_MASTER( myThid )
      ENDIF

      IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11

C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
      DO 10 it2d=1, numIters

C--    Solve preconditioning equation and update
C--    conjugate direction vector "s".
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         eta_qrNtile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
     &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
     &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
     &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
     &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
CcnhDebugStarts
c          cg2d_q(i,j,bi,bj) = cg2d_r(j  ,j  ,bi,bj)
CcnhDebugEnds
#ifdef CG2D_SINGLECPU_SUM
          localBuf(i,j,bi,bj) =
     &      cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
#else
           eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
     &     +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
#endif
          ENDDO
         ENDDO
        ENDDO
       ENDDO

#ifdef CG2D_SINGLECPU_SUM
       CALL GLOBAL_SUM_SINGLECPU_RL( localBuf,eta_qrN,0,0,myThid )
#else
       CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
#endif
       cgBeta   = eta_qrN/eta_qrNM1
CcnhDebugStarts
c      WRITE(*,*) ' CG2D: Iteration ', it2d-1,
c    &            ' eta_qrN=', eta_qrN, ' beta=', cgBeta
CcnhDebugEnds
       eta_qrNM1 = eta_qrN

       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         DO j=1,sNy
          DO i=1,sNx
           cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
     &                       + cgBeta*cg2d_s(i,j,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO

C--    Do exchanges that require messages i.e. between processes.
       CALL EXCH_S3D_RL( cg2d_s, 1, myThid )

C==    Evaluate laplace operator on conjugate gradient vector
C==    q = A.s
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         alphaTile(bi,bj) = 0. _d 0
#ifdef TARGET_NEC_SX
!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
#endif /* TARGET_NEC_SX */
         DO j=1,sNy
          DO i=1,sNx
           cg2d_q(i,j,bi,bj) =
     &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
     &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
     &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
     &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
     &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
#ifdef CG2D_SINGLECPU_SUM
          localBuf(i,j,bi,bj) = cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
#else
          alphaTile(bi,bj) = alphaTile(bi,bj)
     &                     + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
#endif
          ENDDO
         ENDDO
        ENDDO
       ENDDO
#ifdef CG2D_SINGLECPU_SUM
       CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, alpha, 0, 0, myThid)
#else
       CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )
#endif
CcnhDebugStarts
c      WRITE(*,*) ' CG2D: Iteration ', it2d-1,
c    &            ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha
CcnhDebugEnds
       alpha = eta_qrN/alpha

C==    Update simultaneously solution and residual vectors (and Iter number)
C      Now compute "interior" points.
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         errTile(bi,bj) = 0. _d 0
         DO j=1,sNy
          DO i=1,sNx
           cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
           cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
#ifdef CG2D_SINGLECPU_SUM
           localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
#else
           errTile(bi,bj) = errTile(bi,bj)
     &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
#endif
          ENDDO
         ENDDO
        ENDDO
       ENDDO
       actualIts = it2d

#ifdef CG2D_SINGLECPU_SUM
       CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
#else
       CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq,    myThid )
#endif
       IF ( printResidual ) THEN
        IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
         WRITE(msgBuf,'(A,I6,A,1PE21.14)')
     &    ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
         CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
     &                       SQUEEZE_RIGHT, myThid )
        ENDIF
       ENDIF
       IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
       IF ( err_sq .LT. minResidualSq ) THEN
C-     Store lowest residual solution
         minResidualSq = err_sq
         nIterMin = it2d
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
           DO j=1,sNy
            DO i=1,sNx
             cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDDO
       ENDIF

       CALL EXCH_S3D_RL( cg2d_r, 1, myThid )

   10 CONTINUE
   11 CONTINUE

      IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
C-    use the lowest residual solution (instead of current one = last residual)
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
             cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

      IF (cg2dNormaliseRHS) THEN
C--   Un-normalise the answer
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
          DO j=1,sNy
           DO i=1,sNx
            cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
           ENDDO
          ENDDO
         ENDDO
        ENDDO
      ENDIF

C--   Return parameters to caller
      lastResidual = SQRT(err_sq)
      numIters = actualIts

CcnhDebugStarts
c     _EXCH_XY_RL(cg2d_x, myThid )
c     CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
c     err_sq = 0. _d 0
c     DO bj=myByLo(myThid),myByHi(myThid)
c      DO bi=myBxLo(myThid),myBxHi(myThid)
c       DO j=1,sNy
c        DO i=1,sNx
c         cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
c    &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
c    &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
c    &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
c    &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
c    &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
c    &    )
c         err_sq = err_sq + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
c        ENDDO
c       ENDDO
c      ENDDO
c     ENDDO
c     _GLOBAL_SUM_RL( err_sq, myThid )
c     write(*,*) 'cg2d: Ax - b = ',SQRT(err_sq)
CcnhDebugEnds

      RETURN
      END
1	C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.54 2011/06/08 01:46:34 jmc Exp $
2	C $Name: $
3
4	#include "CPP_OPTIONS.h"
5	#ifdef TARGET_NEC_SX
6	C set a sensible default for the outer loop unrolling parameter that can
7	C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8	#ifndef CG2D_OUTERLOOPITERS
9	# define CG2D_OUTERLOOPITERS 10
10	#endif
11	#endif /* TARGET_NEC_SX */
12
13	CBOP
14	C !ROUTINE: CG2D
15	C !INTERFACE:
16	SUBROUTINE CG2D(
17	U cg2d_b, cg2d_x,
18	O firstResidual, minResidualSq, lastResidual,
19	U numIters, nIterMin,
20	I myThid )
21	C !DESCRIPTION: \bv
22	C ==========================================================
23	C \| SUBROUTINE CG2D
24	C \| o Two-dimensional grid problem conjugate-gradient
25	C \| inverter (with preconditioner).
26	C ==========================================================
27	C \| Con. grad is an iterative procedure for solving Ax = b.
28	C \| It requires the A be symmetric.
29	C \| This implementation assumes A is a five-diagonal
30	C \| matrix of the form that arises in the discrete
31	C \| representation of the del^2 operator in a
32	C \| two-dimensional space.
33	C \| Notes:
34	C \| ======
35	C \| This implementation can support shared-memory
36	C \| multi-threaded execution. In order to do this COMMON
37	C \| blocks are used for many of the arrays - even ones that
38	C \| are only used for intermedaite results. This design is
39	C \| OK if you want to all the threads to collaborate on
40	C \| solving the same problem. On the other hand if you want
41	C \| the threads to solve several different problems
42	C \| concurrently this implementation will not work.
43	C ==========================================================
44	C \ev
45
46	C !USES:
47	IMPLICIT NONE
48	C === Global data ===
49	#include "SIZE.h"
50	#include "EEPARAMS.h"
51	#include "PARAMS.h"
52	#include "CG2D.h"
53
54	C !INPUT/OUTPUT PARAMETERS:
55	C === Routine arguments ===
56	C cg2d_b :: The source term or "right hand side" (output: normalised RHS)
57	C cg2d_x :: The solution (input: first guess)
58	C firstResidual :: the initial residual before any iterations
59	C minResidualSq :: the lowest residual reached (squared)
60	C lastResidual :: the actual residual reached
61	C numIters :: Inp: the maximum number of iterations allowed
62	C Out: the actual number of iterations used
63	C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
64	C Out: iteration number corresponding to lowest residual
65	C myThid :: Thread on which I am working.
66	_RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
67	_RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
68	_RL firstResidual
69	_RL minResidualSq
70	_RL lastResidual
71	INTEGER numIters
72	INTEGER nIterMin
73	INTEGER myThid
74
75	C !LOCAL VARIABLES:
76	C === Local variables ====
77	C bi, bj :: tile index in X and Y.
78	C i, j, it2d :: Loop counters ( it2d counts CG iterations )
79	C actualIts :: actual CG iteration number
80	C err_sq :: Measure of the square of the residual of Ax - b.
81	C eta_qrN :: Used in computing search directions; suffix N and NM1
82	C eta_qrNM1 denote current and previous iterations respectively.
83	C cgBeta :: coeff used to update conjugate direction vector "s".
84	C alpha :: coeff used to update solution & residual
85	C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
86	C debugging/trouble shooting diagnostic. For neumann problems
87	C sumRHS needs to be ~0 or it converge at a non-zero residual.
88	C cg2d_min :: used to store solution corresponding to lowest residual.
89	C msgBuf :: Informational/error message buffer
90	INTEGER bi, bj
91	INTEGER i, j, it2d
92	INTEGER actualIts
93	_RL cg2dTolerance_sq
94	_RL err_sq, errTile(nSx,nSy)
95	_RL eta_qrN, eta_qrNtile(nSx,nSy)
96	_RL eta_qrNM1
97	_RL cgBeta
98	_RL alpha, alphaTile(nSx,nSy)
99	_RL sumRHS, sumRHStile(nSx,nSy)
100	_RL rhsMax
101	_RL rhsNorm
102	_RL cg2d_min(1:sNx,1:sNy,nSx,nSy)
103	#ifdef CG2D_SINGLECPU_SUM
104	_RL localBuf(1:sNx,1:sNy,nSx,nSy)
105	#endif
106	CHARACTER*(MAX_LEN_MBUF) msgBuf
107	LOGICAL printResidual
108	CEOP
109
110	C-- Initialise auxiliary constant, some output variable and inverter
111	cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
112	minResidualSq = -1. _d 0
113	eta_qrNM1 = 1. _d 0
114
115	C-- Normalise RHS
116	rhsMax = 0. _d 0
117	DO bj=myByLo(myThid),myByHi(myThid)
118	DO bi=myBxLo(myThid),myBxHi(myThid)
119	DO j=1,sNy
120	DO i=1,sNx
121	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
122	rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
123	ENDDO
124	ENDDO
125	ENDDO
126	ENDDO
127
128	IF (cg2dNormaliseRHS) THEN
129	C- Normalise RHS :
130	_GLOBAL_MAX_RL( rhsMax, myThid )
131	rhsNorm = 1. _d 0
132	IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
133	DO bj=myByLo(myThid),myByHi(myThid)
134	DO bi=myBxLo(myThid),myBxHi(myThid)
135	DO j=1,sNy
136	DO i=1,sNx
137	cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
138	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
139	ENDDO
140	ENDDO
141	ENDDO
142	ENDDO
143	C- end Normalise RHS
144	ENDIF
145
146	C-- Update overlaps
147	CALL EXCH_XY_RL( cg2d_x, myThid )
148
149	C-- Initial residual calculation
150	DO bj=myByLo(myThid),myByHi(myThid)
151	DO bi=myBxLo(myThid),myBxHi(myThid)
152	IF ( nIterMin.GE.0 ) THEN
153	DO j=1,sNy
154	DO i=1,sNx
155	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
156	ENDDO
157	ENDDO
158	ENDIF
159	DO j=0,sNy+1
160	DO i=0,sNx+1
161	cg2d_s(i,j,bi,bj) = 0.
162	ENDDO
163	ENDDO
164	sumRHStile(bi,bj) = 0. _d 0
165	errTile(bi,bj) = 0. _d 0
166	#ifdef TARGET_NEC_SX
167	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
168	#endif /* TARGET_NEC_SX */
169	DO j=1,sNy
170	DO i=1,sNx
171	cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
172	& (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
173	& +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
174	& +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
175	& +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
176	& +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
177	& )
178	#ifdef CG2D_SINGLECPU_SUM
179	localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
180	#else
181	errTile(bi,bj) = errTile(bi,bj)
182	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
183	sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
184	#endif
185	ENDDO
186	ENDDO
187	ENDDO
188	ENDDO
189	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
190	#ifdef CG2D_SINGLECPU_SUM
191	CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
192	CALL GLOBAL_SUM_SINGLECPU_RL(cg2d_b, sumRHS, OLx, OLy, myThid)
193	#else
194	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
195	CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
196	#endif
197	actualIts = 0
198	firstResidual = SQRT(err_sq)
199	IF ( nIterMin.GE.0 ) THEN
200	nIterMin = 0
201	minResidualSq = err_sq
202	ENDIF
203
204	printResidual = .FALSE.
205	IF ( debugLevel .GE. debLevZero ) THEN
206	_BEGIN_MASTER( myThid )
207	printResidual = printResidualFreq.GE.1
208	WRITE(standardmessageunit,'(A,1P2E22.14)')
209	& ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
210	_END_MASTER( myThid )
211	ENDIF
212
213	IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
214
215	C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
216	DO 10 it2d=1, numIters
217
218	C-- Solve preconditioning equation and update
219	C-- conjugate direction vector "s".
220	DO bj=myByLo(myThid),myByHi(myThid)
221	DO bi=myBxLo(myThid),myBxHi(myThid)
222	eta_qrNtile(bi,bj) = 0. _d 0
223	#ifdef TARGET_NEC_SX
224	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
225	#endif /* TARGET_NEC_SX */
226	DO j=1,sNy
227	DO i=1,sNx
228	cg2d_q(i,j,bi,bj) =
229	& pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
230	& +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
231	& +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
232	& +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
233	& +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
234	CcnhDebugStarts
235	c cg2d_q(i,j,bi,bj) = cg2d_r(j ,j ,bi,bj)
236	CcnhDebugEnds
237	#ifdef CG2D_SINGLECPU_SUM
238	localBuf(i,j,bi,bj) =
239	& cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
240	#else
241	eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
242	& +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
243	#endif
244	ENDDO
245	ENDDO
246	ENDDO
247	ENDDO
248
249	#ifdef CG2D_SINGLECPU_SUM
250	CALL GLOBAL_SUM_SINGLECPU_RL( localBuf,eta_qrN,0,0,myThid )
251	#else
252	CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
253	#endif
254	cgBeta = eta_qrN/eta_qrNM1
255	CcnhDebugStarts
256	c WRITE(,) ' CG2D: Iteration ', it2d-1,
257	c & ' eta_qrN=', eta_qrN, ' beta=', cgBeta
258	CcnhDebugEnds
259	eta_qrNM1 = eta_qrN
260
261	DO bj=myByLo(myThid),myByHi(myThid)
262	DO bi=myBxLo(myThid),myBxHi(myThid)
263	DO j=1,sNy
264	DO i=1,sNx
265	cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
266	& + cgBeta*cg2d_s(i,j,bi,bj)
267	ENDDO
268	ENDDO
269	ENDDO
270	ENDDO
271
272	C-- Do exchanges that require messages i.e. between processes.
273	CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
274
275	C== Evaluate laplace operator on conjugate gradient vector
276	C== q = A.s
277	DO bj=myByLo(myThid),myByHi(myThid)
278	DO bi=myBxLo(myThid),myBxHi(myThid)
279	alphaTile(bi,bj) = 0. _d 0
280	#ifdef TARGET_NEC_SX
281	!CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
282	#endif /* TARGET_NEC_SX */
283	DO j=1,sNy
284	DO i=1,sNx
285	cg2d_q(i,j,bi,bj) =
286	& aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
287	& +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
288	& +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
289	& +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
290	& +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
291	#ifdef CG2D_SINGLECPU_SUM
292	localBuf(i,j,bi,bj) = cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
293	#else
294	alphaTile(bi,bj) = alphaTile(bi,bj)
295	& + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
296	#endif
297	ENDDO
298	ENDDO
299	ENDDO
300	ENDDO
301	#ifdef CG2D_SINGLECPU_SUM
302	CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, alpha, 0, 0, myThid)
303	#else
304	CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
305	#endif
306	CcnhDebugStarts
307	c WRITE(,) ' CG2D: Iteration ', it2d-1,
308	c & ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha
309	CcnhDebugEnds
310	alpha = eta_qrN/alpha
311
312	C== Update simultaneously solution and residual vectors (and Iter number)
313	C Now compute "interior" points.
314	DO bj=myByLo(myThid),myByHi(myThid)
315	DO bi=myBxLo(myThid),myBxHi(myThid)
316	errTile(bi,bj) = 0. _d 0
317	DO j=1,sNy
318	DO i=1,sNx
319	cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
320	cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
321	#ifdef CG2D_SINGLECPU_SUM
322	localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
323	#else
324	errTile(bi,bj) = errTile(bi,bj)
325	& + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
326	#endif
327	ENDDO
328	ENDDO
329	ENDDO
330	ENDDO
331	actualIts = it2d
332
333	#ifdef CG2D_SINGLECPU_SUM
334	CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
335	#else
336	CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
337	#endif
338	IF ( printResidual ) THEN
339	IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
340	WRITE(msgBuf,'(A,I6,A,1PE21.14)')
341	& ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
342	CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
343	& SQUEEZE_RIGHT, myThid )
344	ENDIF
345	ENDIF
346	IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
347	IF ( err_sq .LT. minResidualSq ) THEN
348	C- Store lowest residual solution
349	minResidualSq = err_sq
350	nIterMin = it2d
351	DO bj=myByLo(myThid),myByHi(myThid)
352	DO bi=myBxLo(myThid),myBxHi(myThid)
353	DO j=1,sNy
354	DO i=1,sNx
355	cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
356	ENDDO
357	ENDDO
358	ENDDO
359	ENDDO
360	ENDIF
361
362	CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
363
364	10 CONTINUE
365	11 CONTINUE
366
367	IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
368	C- use the lowest residual solution (instead of current one = last residual)
369	DO bj=myByLo(myThid),myByHi(myThid)
370	DO bi=myBxLo(myThid),myBxHi(myThid)
371	DO j=1,sNy
372	DO i=1,sNx
373	cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
374	ENDDO
375	ENDDO
376	ENDDO
377	ENDDO
378	ENDIF
379
380	IF (cg2dNormaliseRHS) THEN
381	C-- Un-normalise the answer
382	DO bj=myByLo(myThid),myByHi(myThid)
383	DO bi=myBxLo(myThid),myBxHi(myThid)
384	DO j=1,sNy
385	DO i=1,sNx
386	cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
387	ENDDO
388	ENDDO
389	ENDDO
390	ENDDO
391	ENDIF
392
393	C-- Return parameters to caller
394	lastResidual = SQRT(err_sq)
395	numIters = actualIts
396
397	CcnhDebugStarts
398	c _EXCH_XY_RL(cg2d_x, myThid )
399	c CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
400	c err_sq = 0. _d 0
401	c DO bj=myByLo(myThid),myByHi(myThid)
402	c DO bi=myBxLo(myThid),myBxHi(myThid)
403	c DO j=1,sNy
404	c DO i=1,sNx
405	c cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
406	c & (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
407	c & +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
408	c & +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
409	c & +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
410	c & +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
411	c & )
412	c err_sq = err_sq + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
413	c ENDDO
414	c ENDDO
415	c ENDDO
416	c ENDDO
417	c _GLOBAL_SUM_RL( err_sq, myThid )
418	c write(,) 'cg2d: Ax - b = ',SQRT(err_sq)
419	CcnhDebugEnds
420
421	RETURN
422	END