/[MITgcm]/MITgcm/model/src/cg2d.F

Diff of /MITgcm/model/src/cg2d.F

Parent Directory | Revision Log | View Revision Graph Revision Graph | View Patch Patch

-revision 1.34.6.4 by heimbach,
Tue Jul  8 15:18:29 2003 UTC
+revision 1.55 by jmc,
Fri May 11 23:28:10 2012 UTC
 Line 2 
 C $Header$
  C $Name$
  #include "CPP_OPTIONS.h"
+ #ifdef TARGET_NEC_SX
+ C     set a sensible default for the outer loop unrolling parameter that can
+ C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
+ #ifndef CG2D_OUTERLOOPITERS
+ # define CG2D_OUTERLOOPITERS 10
+ #endif
+ #endif /* TARGET_NEC_SX */
  CBOP
  C     !ROUTINE: CG2D
  C     !INTERFACE:
        SUBROUTINE CG2D(
-      I                cg2d_b,
+      U                cg2d_b, cg2d_x,
-      U                cg2d_x,
+      O                firstResidual, minResidualSq, lastResidual,
-      O                firstResidual,
+      U                numIters, nIterMin,
-      O                lastResidual,
-      U                numIters,
       I                myThid )
  C     !DESCRIPTION: \bv
  C     *==========================================================*
  C     | SUBROUTINE CG2D
  C     | o Two-dimensional grid problem conjugate-gradient
  C     |   inverter (with preconditioner).
  C     *==========================================================*
  C     | Con. grad is an iterative procedure for solving Ax = b.
  C     | It requires the A be symmetric.
  C     | This implementation assumes A is a five-diagonal
  C     | matrix of the form that arises in the discrete
  C     | representation of the del^2 operator in a
  C     | two-dimensional space.
  C     | Notes:
  C     | ======
  C     | This implementation can support shared-memory
  C     | multi-threaded execution. In order to do this COMMON
  C     | blocks are used for many of the arrays - even ones that
  C     | are only used for intermedaite results. This design is
  C     | OK if you want to all the threads to collaborate on
  C     | solving the same problem. On the other hand if you want
  C     | the threads to solve several different problems
  C     | concurrently this implementation will not work.
  C     *==========================================================*
  C     \ev
-Line 44 
 C     === Global data ===
+Line 49 
 C     === Global data ===
  #include "SIZE.h"
  #include "EEPARAMS.h"
  #include "PARAMS.h"
- #include "GRID.h"
  #include "CG2D.h"
- #include "SURFACE.h"
  C     !INPUT/OUTPUT PARAMETERS:
  C     === Routine arguments ===
- C     myThid    - Thread on which I am working.
+ C     cg2d_b    :: The source term or "right hand side" (output: normalised RHS)
- C     cg2d_b    - The source term or "right hand side"
+ C     cg2d_x    :: The solution (input: first guess)
- C     cg2d_x    - The solution
+ C     firstResidual :: the initial residual before any iterations
- C     firstResidual - the initial residual before any iterations
+ C     minResidualSq :: the lowest residual reached (squared)
- C     lastResidual  - the actual residual reached
+ C     lastResidual  :: the actual residual reached
- C     numIters  - Entry: the maximum number of iterations allowed
+ C     numIters  :: Inp: the maximum number of iterations allowed
- C                 Exit:  the actual number of iterations used
+ C                  Out: the actual number of iterations used
+ C     nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
+ C                  Out: iteration number corresponding to lowest residual
+ C     myThid    :: Thread on which I am working.
        _RL  cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
        _RL  cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
        _RL  firstResidual
+       _RL  minResidualSq
        _RL  lastResidual
        INTEGER numIters
+       INTEGER nIterMin
        INTEGER myThid
  C     !LOCAL VARIABLES:
  C     === Local variables ====
- C     actualIts      - Number of iterations taken
+ C     bi, bj     :: tile index in X and Y.
- C     actualResidual - residual
+ C     i, j, it2d :: Loop counters ( it2d counts CG iterations )
- C     bi          - Block index in X and Y.
+ C     actualIts  :: actual CG iteration number
- C     bj
+ C     err_sq     :: Measure of the square of the residual of Ax - b.
- C     eta_qrN     - Used in computing search directions
+ C     eta_qrN    :: Used in computing search directions; suffix N and NM1
- C     eta_qrNM1     suffix N and NM1 denote current and
+ C     eta_qrNM1     denote current and previous iterations respectively.
- C     cgBeta        previous iterations respectively.
+ C     cgBeta     :: coeff used to update conjugate direction vector "s".
- C     alpha
+ C     alpha      :: coeff used to update solution & residual
- C     sumRHS      - Sum of right-hand-side. Sometimes this is a
+ C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful
- C                   useful debuggin/trouble shooting diagnostic.
+ C                   debugging/trouble shooting diagnostic. For neumann problems
- C                   For neumann problems sumRHS needs to be ~0.
+ C                   sumRHS needs to be ~0 or it converge at a non-zero residual.
- C                   or they converge at a non-zero residual.
+ C     cg2d_min   :: used to store solution corresponding to lowest residual.
- C     err         - Measure of residual of Ax - b, usually the norm.
+ C     msgBuf     :: Informational/error message buffer
- C     I, J, N     - Loop counters ( N counts CG iterations )
+       INTEGER bi, bj
+       INTEGER i, j, it2d
        INTEGER actualIts
-       _RL    actualResidual
+       _RL    cg2dTolerance_sq
-       INTEGER bi, bj
+       _RL    err_sq,  errTile(nSx,nSy)
-       INTEGER I, J, it2d
+       _RL    eta_qrN, eta_qrNtile(nSx,nSy)
-       _RL    err
-       _RL    eta_qrN
        _RL    eta_qrNM1
        _RL    cgBeta
-       _RL    alpha
+       _RL    alpha,   alphaTile(nSx,nSy)
-       _RL    sumRHS
+       _RL    sumRHS,  sumRHStile(nSx,nSy)
        _RL    rhsMax
        _RL    rhsNorm
+       _RL    cg2d_min(1:sNx,1:sNy,nSx,nSy)
-       INTEGER OLw
+ #ifdef CG2D_SINGLECPU_SUM
-       INTEGER OLe
+       _RL    localBuf(1:sNx,1:sNy,nSx,nSy)
-       INTEGER OLn
+ #endif
-       INTEGER OLs
+       CHARACTER*(MAX_LEN_MBUF) msgBuf
-       INTEGER exchWidthX
+       LOGICAL printResidual
-       INTEGER exchWidthY
-       INTEGER myNz
  CEOP
+ C--   Initialise auxiliary constant, some output variable and inverter
- CcnhDebugStarts
+       cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
- C     CHARACTER*(MAX_LEN_FNAM) suff
+       minResidualSq = -1. _d 0
- CcnhDebugEnds
+       eta_qrNM1     =  1. _d 0
- C--   Initialise inverter
-       eta_qrNM1 = 1. _d 0
- CcnhDebugStarts
- C     _EXCH_XY_R8( cg2d_b, myThid )
- C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
- C     suff = 'unnormalised'
- C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
- C     STOP
- CcnhDebugEnds
  C--   Normalise RHS
        rhsMax = 0. _d 0
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
-         DO J=1,sNy
+         DO j=1,sNy
-          DO I=1,sNx
+          DO i=1,sNx
-           cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
+           cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
-           rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
+           rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
           ENDDO
          ENDDO
         ENDDO
-Line 134 
 C--   Normalise RHS
+Line 127 
 C--   Normalise RHS
        IF (cg2dNormaliseRHS) THEN
  C-  Normalise RHS :
- #ifdef LETS_MAKE_JAM
+       _GLOBAL_MAX_RL( rhsMax, myThid )
- C     _GLOBAL_MAX_R8( rhsMax, myThid )
-       rhsMax=1.
- #else
-       _GLOBAL_MAX_R8( rhsMax, myThid )
- Catm  rhsMax=1.
- #endif
        rhsNorm = 1. _d 0
        IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
-         DO J=1,sNy
+         DO j=1,sNy
-          DO I=1,sNx
+          DO i=1,sNx
-           cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
+           cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
-           cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
+           cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
           ENDDO
          ENDDO
         ENDDO
-Line 157 
 C- end Normalise RHS
+Line 144 
 C- end Normalise RHS
        ENDIF
  C--   Update overlaps
-       _EXCH_XY_R8( cg2d_b, myThid )
+       CALL EXCH_XY_RL( cg2d_x, myThid )
-       _EXCH_XY_R8( cg2d_x, myThid )
- CcnhDebugStarts
- C     CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
- C     suff = 'normalised'
- C     CALL WRITE_FLD_XY_RL (  'cg2d_b.',suff,    cg2d_b, 1, myThid)
- CcnhDebugEnds
  C--   Initial residual calculation
-       err    = 0. _d 0
-       sumRHS = 0. _d 0
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
-         DO J=1,sNy
+         IF ( nIterMin.GE.0 ) THEN
-          DO I=1,sNx
+          DO j=1,sNy
-           cg2d_s(I,J,bi,bj) = 0.
+           DO i=1,sNx
-           cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
+             cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
-      &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
+           ENDDO
-      &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
+          ENDDO
-      &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
+         ENDIF
-      &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
+         DO j=0,sNy+1
-      &    -aW2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+          DO i=0,sNx+1
-      &    -aW2d(I+1,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+           cg2d_s(i,j,bi,bj) = 0.
-      &    -aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+          ENDDO
-      &    -aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+         ENDDO
-      &    -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)*
+         sumRHStile(bi,bj) = 0. _d 0
-      &     cg2d_x(I  ,J  ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm
+         errTile(bi,bj)    = 0. _d 0
+ #ifdef TARGET_NEC_SX
+ !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
+ #endif /* TARGET_NEC_SX */
+         DO j=1,sNy
+          DO i=1,sNx
+           cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
+      &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
+      &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
+      &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
+      &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
+      &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
       &    )
-           err            = err            +
+ #ifdef CG2D_SINGLECPU_SUM
-      &     cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
+           localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
-           sumRHS            = sumRHS            +
+ #else
-      &     cg2d_b(I,J,bi,bj)
+           errTile(bi,bj)    = errTile(bi,bj)
+      &                      + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
+           sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
+ #endif
           ENDDO
          ENDDO
         ENDDO
        ENDDO
- C     _EXCH_XY_R8( cg2d_r, myThid )
+       CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
- #ifdef LETS_MAKE_JAM
+ #ifdef CG2D_SINGLECPU_SUM
-       CALL EXCH_XY_O1_R8_JAM( cg2d_r )
+       CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
- #else
+       CALL GLOBAL_SUM_SINGLECPU_RL(cg2d_b, sumRHS, OLx, OLy, myThid)
-       CALL EXCH_XY_RL( cg2d_r, myThid )
- #endif
- C     _EXCH_XY_R8( cg2d_s, myThid )
- #ifdef LETS_MAKE_JAM
-       CALL EXCH_XY_O1_R8_JAM( cg2d_s )
  #else
-       CALL EXCH_XY_RL( cg2d_s, myThid )
+       CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid )
+       CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
  #endif
-        _GLOBAL_SUM_R8( sumRHS, myThid )
+       actualIts = 0
-        _GLOBAL_SUM_R8( err   , myThid )
+       firstResidual = SQRT(err_sq)
-        err = SQRT(err)
+       IF ( nIterMin.GE.0 ) THEN
-        actualIts      = 0
+         nIterMin = 0
-        actualResidual = err
+         minResidualSq = err_sq
+       ENDIF
-        IF ( debugLevel .GE. debLevA ) THEN
+       printResidual = .FALSE.
+       IF ( debugLevel .GE. debLevZero ) THEN
          _BEGIN_MASTER( myThid )
-         write(*,'(A,1P2E22.14)')' cg2d: Sum(rhs),rhsMax = ',
+         printResidual = printResidualFreq.GE.1
-      &                                  sumRHS,rhsMax
+         WRITE(standardmessageunit,'(A,1P2E22.14)')
-         _END_MASTER( )
+      &  ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
-        ENDIF
+         _END_MASTER( myThid )
- C     _BARRIER
+       ENDIF
- c     _BEGIN_MASTER( myThid )
- c      WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
- c    & actualIts, actualResidual
- c     _END_MASTER( )
-       firstResidual=actualResidual
-       IF ( err .LT. cg2dTolerance ) GOTO 11
+       IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
  C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
        DO 10 it2d=1, numIters
- CcnhDebugStarts
- C      WRITE(*,*) ' CG2D: Iteration ',it2d-1,' residual = ',
- C    &  actualResidual
- CcnhDebugEnds
  C--    Solve preconditioning equation and update
  C--    conjugate direction vector "s".
-        eta_qrN = 0. _d 0
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
-          DO J=1,sNy
+          eta_qrNtile(bi,bj) = 0. _d 0
-           DO I=1,sNx
+ #ifdef TARGET_NEC_SX
-            cg2d_q(I,J,bi,bj) =
+ !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
-      &      pC(I  ,J  ,bi,bj)*cg2d_r(I  ,J  ,bi,bj)
+ #endif /* TARGET_NEC_SX */
-      &     +pW(I  ,J  ,bi,bj)*cg2d_r(I-1,J  ,bi,bj)
+          DO j=1,sNy
-      &     +pW(I+1,J  ,bi,bj)*cg2d_r(I+1,J  ,bi,bj)
+           DO i=1,sNx
-      &     +pS(I  ,J  ,bi,bj)*cg2d_r(I  ,J-1,bi,bj)
+            cg2d_q(i,j,bi,bj) =
-      &     +pS(I  ,J+1,bi,bj)*cg2d_r(I  ,J+1,bi,bj)
+      &      pC(i  ,j  ,bi,bj)*cg2d_r(i  ,j  ,bi,bj)
+      &     +pW(i  ,j  ,bi,bj)*cg2d_r(i-1,j  ,bi,bj)
+      &     +pW(i+1,j  ,bi,bj)*cg2d_r(i+1,j  ,bi,bj)
+      &     +pS(i  ,j  ,bi,bj)*cg2d_r(i  ,j-1,bi,bj)
+      &     +pS(i  ,j+1,bi,bj)*cg2d_r(i  ,j+1,bi,bj)
  CcnhDebugStarts
- C          cg2d_q(I,J,bi,bj) = cg2d_r(I  ,J  ,bi,bj)
+ c          cg2d_q(i,j,bi,bj) = cg2d_r(j  ,j  ,bi,bj)
  CcnhDebugEnds
-            eta_qrN = eta_qrN
+ #ifdef CG2D_SINGLECPU_SUM
-      &     +cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
+           localBuf(i,j,bi,bj) =
+      &      cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
+ #else
+            eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
+      &     +cg2d_q(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
+ #endif
            ENDDO
           ENDDO
          ENDDO
         ENDDO
-        _GLOBAL_SUM_R8(eta_qrN, myThid)
+ #ifdef CG2D_SINGLECPU_SUM
- CcnhDebugStarts
+        CALL GLOBAL_SUM_SINGLECPU_RL( localBuf,eta_qrN,0,0,myThid )
- C      WRITE(*,*) ' CG2D: Iteration ',it2d-1,' eta_qrN = ',eta_qrN
+ #else
- CcnhDebugEnds
+        CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
+ #endif
         cgBeta   = eta_qrN/eta_qrNM1
  CcnhDebugStarts
- C      WRITE(*,*) ' CG2D: Iteration ',it2d-1,' beta = ',cgBeta
+ c      WRITE(*,*) ' CG2D: Iteration ', it2d-1,
+ c    &            ' eta_qrN=', eta_qrN, ' beta=', cgBeta
  CcnhDebugEnds
         eta_qrNM1 = eta_qrN
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
-          DO J=1,sNy
+          DO j=1,sNy
-           DO I=1,sNx
+           DO i=1,sNx
-            cg2d_s(I,J,bi,bj) = cg2d_q(I,J,bi,bj)
+            cg2d_s(i,j,bi,bj) = cg2d_q(i,j,bi,bj)
-      &                       + cgBeta*cg2d_s(I,J,bi,bj)
+      &                       + cgBeta*cg2d_s(i,j,bi,bj)
            ENDDO
           ENDDO
          ENDDO
         ENDDO
- C--    Do exchanges that require messages i.e. between
+ C--    Do exchanges that require messages i.e. between processes.
- C--    processes.
+        CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
- C      _EXCH_XY_R8( cg2d_s, myThid )
- #ifdef LETS_MAKE_JAM
-       CALL EXCH_XY_O1_R8_JAM( cg2d_s )
- #else
-       CALL EXCH_XY_RL( cg2d_s, myThid )
- #endif
  C==    Evaluate laplace operator on conjugate gradient vector
  C==    q = A.s
-        alpha = 0. _d 0
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
-          DO J=1,sNy
+          alphaTile(bi,bj) = 0. _d 0
-           DO I=1,sNx
+ #ifdef TARGET_NEC_SX
-            cg2d_q(I,J,bi,bj) =
+ !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
-      &     aW2d(I  ,J  ,bi,bj)*cg2d_s(I-1,J  ,bi,bj)
+ #endif /* TARGET_NEC_SX */
-      &    +aW2d(I+1,J  ,bi,bj)*cg2d_s(I+1,J  ,bi,bj)
+          DO j=1,sNy
-      &    +aS2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J-1,bi,bj)
+           DO i=1,sNx
-      &    +aS2d(I  ,J+1,bi,bj)*cg2d_s(I  ,J+1,bi,bj)
+            cg2d_q(i,j,bi,bj) =
-      &    -aW2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
+      &     aW2d(i  ,j  ,bi,bj)*cg2d_s(i-1,j  ,bi,bj)
-      &    -aW2d(I+1,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
+      &    +aW2d(i+1,j  ,bi,bj)*cg2d_s(i+1,j  ,bi,bj)
-      &    -aS2d(I  ,J  ,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
+      &    +aS2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j-1,bi,bj)
-      &    -aS2d(I  ,J+1,bi,bj)*cg2d_s(I  ,J  ,bi,bj)
+      &    +aS2d(i  ,j+1,bi,bj)*cg2d_s(i  ,j+1,bi,bj)
-      &    -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)*
+      &    +aC2d(i  ,j  ,bi,bj)*cg2d_s(i  ,j  ,bi,bj)
-      &     cg2d_s(I  ,J  ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm
+ #ifdef CG2D_SINGLECPU_SUM
-           alpha = alpha+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
+           localBuf(i,j,bi,bj) = cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
+ #else
+           alphaTile(bi,bj) = alphaTile(bi,bj)
+      &                     + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
+ #endif
            ENDDO
           ENDDO
          ENDDO
         ENDDO
-        _GLOBAL_SUM_R8(alpha,myThid)
+ #ifdef CG2D_SINGLECPU_SUM
+        CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, alpha, 0, 0, myThid)
+ #else
+        CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid )
+ #endif
  CcnhDebugStarts
- C      WRITE(*,*) ' CG2D: Iteration ',it2d-1,' SUM(s*q)= ',alpha
+ c      WRITE(*,*) ' CG2D: Iteration ', it2d-1,
+ c    &            ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha
  CcnhDebugEnds
         alpha = eta_qrN/alpha
- CcnhDebugStarts
- C      WRITE(*,*) ' CG2D: Iteration ',it2d-1,' alpha= ',alpha
+ C==    Update simultaneously solution and residual vectors (and Iter number)
- CcnhDebugEnds
- C==    Update solution and residual vectors
  C      Now compute "interior" points.
-        err = 0. _d 0
         DO bj=myByLo(myThid),myByHi(myThid)
          DO bi=myBxLo(myThid),myBxHi(myThid)
-          DO J=1,sNy
+          errTile(bi,bj) = 0. _d 0
-           DO I=1,sNx
+          DO j=1,sNy
-            cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj)
+           DO i=1,sNx
-            cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj)
+            cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
-            err = err+cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
+            cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
+ #ifdef CG2D_SINGLECPU_SUM
+            localBuf(i,j,bi,bj) = cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
+ #else
+            errTile(bi,bj) = errTile(bi,bj)
+      &                    + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
+ #endif
            ENDDO
           ENDDO
          ENDDO
         ENDDO
+        actualIts = it2d
-        _GLOBAL_SUM_R8( err   , myThid )
+ #ifdef CG2D_SINGLECPU_SUM
-        err = SQRT(err)
+        CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err_sq, 0, 0, myThid)
-        actualIts      = it2d
-        actualResidual = err
-        IF ( err .LT. cg2dTolerance ) GOTO 11
- C      _EXCH_XY_R8(cg2d_r, myThid )
- #ifdef LETS_MAKE_JAM
-       CALL EXCH_XY_O1_R8_JAM( cg2d_r )
  #else
-       CALL EXCH_XY_RL( cg2d_r, myThid )
+        CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq,    myThid )
  #endif
+        IF ( printResidual ) THEN
+         IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
+          WRITE(msgBuf,'(A,I6,A,1PE21.14)')
+      &    ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
+          CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
+      &                       SQUEEZE_RIGHT, myThid )
+         ENDIF
+        ENDIF
+        IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
+        IF ( err_sq .LT. minResidualSq ) THEN
+ C-     Store lowest residual solution
+          minResidualSq = err_sq
+          nIterMin = it2d
+          DO bj=myByLo(myThid),myByHi(myThid)
+           DO bi=myBxLo(myThid),myBxHi(myThid)
+            DO j=1,sNy
+             DO i=1,sNx
+              cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
+             ENDDO
+            ENDDO
+           ENDDO
+          ENDDO
+        ENDIF
+        CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
 CONTINUE
 CONTINUE
+       IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
+ C-    use the lowest residual solution (instead of current one = last residual)
+         DO bj=myByLo(myThid),myByHi(myThid)
+          DO bi=myBxLo(myThid),myBxHi(myThid)
+           DO j=1,sNy
+            DO i=1,sNx
+              cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
+            ENDDO
+           ENDDO
+          ENDDO
+         ENDDO
+       ENDIF
        IF (cg2dNormaliseRHS) THEN
  C--   Un-normalise the answer
          DO bj=myByLo(myThid),myByHi(myThid)
           DO bi=myBxLo(myThid),myBxHi(myThid)
-           DO J=1,sNy
+           DO j=1,sNy
-            DO I=1,sNx
+            DO i=1,sNx
-             cg2d_x(I  ,J  ,bi,bj) = cg2d_x(I  ,J  ,bi,bj)/rhsNorm
+             cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
             ENDDO
            ENDDO
           ENDDO
          ENDDO
        ENDIF
- C     The following exchange was moved up to solve_for_pressure
- C     for compatibility with TAMC.
- C     _EXCH_XY_R8(cg2d_x, myThid )
- c     _BEGIN_MASTER( myThid )
- c      WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
- c    & actualIts, actualResidual
- c     _END_MASTER( )
  C--   Return parameters to caller
-       lastResidual=actualResidual
+       lastResidual = SQRT(err_sq)
-       numIters=actualIts
+       numIters = actualIts
  CcnhDebugStarts
- C     CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
+ c     _EXCH_XY_RL(cg2d_x, myThid )
- C     err    = 0. _d 0
+ c     CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
- C     DO bj=myByLo(myThid),myByHi(myThid)
+ c     err_sq = 0. _d 0
- C      DO bi=myBxLo(myThid),myBxHi(myThid)
+ c     DO bj=myByLo(myThid),myByHi(myThid)
- C       DO J=1,sNy
+ c      DO bi=myBxLo(myThid),myBxHi(myThid)
- C        DO I=1,sNx
+ c       DO j=1,sNy
- C         cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
+ c        DO i=1,sNx
- C    &    (aW2d(I  ,J  ,bi,bj)*cg2d_x(I-1,J  ,bi,bj)
+ c         cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
- C    &    +aW2d(I+1,J  ,bi,bj)*cg2d_x(I+1,J  ,bi,bj)
+ c    &    (aW2d(i  ,j  ,bi,bj)*cg2d_x(i-1,j  ,bi,bj)
- C    &    +aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J-1,bi,bj)
+ c    &    +aW2d(i+1,j  ,bi,bj)*cg2d_x(i+1,j  ,bi,bj)
- C    &    +aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J+1,bi,bj)
+ c    &    +aS2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j-1,bi,bj)
- C    &    -aW2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+ c    &    +aS2d(i  ,j+1,bi,bj)*cg2d_x(i  ,j+1,bi,bj)
- C    &    -aW2d(I+1,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+ c    &    +aC2d(i  ,j  ,bi,bj)*cg2d_x(i  ,j  ,bi,bj)
- C    &    -aS2d(I  ,J  ,bi,bj)*cg2d_x(I  ,J  ,bi,bj)
+ c    &    )
- C    &    -aS2d(I  ,J+1,bi,bj)*cg2d_x(I  ,J  ,bi,bj))
+ c         err_sq = err_sq + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
- C         err            = err            +
+ c        ENDDO
- C    &     cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
+ c       ENDDO
- C        ENDDO
+ c      ENDDO
- C       ENDDO
+ c     ENDDO
- C      ENDDO
+ c     _GLOBAL_SUM_RL( err_sq, myThid )
- C     ENDDO
+ c     write(*,*) 'cg2d: Ax - b = ',SQRT(err_sq)
- C     _GLOBAL_SUM_R8( err   , myThid )
- C     write(*,*) 'cg2d: Ax - b = ',SQRT(err)
  CcnhDebugEnds
        RETURN

 Legend:



Removed from v.1.34.6.4
 


changed lines


 
Added in v.1.55
 Legend:



Removed from v.1.34.6.4
 


changed lines


 
Added in v.1.55
-Removed from v.1.34.6.4
+Added in v.1.55

	ViewVC Help
Powered by ViewVC 1.1.22