--- MITgcm/model/src/cg3d.F	2001/03/08 20:45:53	1.7
+++ MITgcm/model/src/cg3d.F	2005/02/04 19:30:33	1.15
@@ -1,36 +1,48 @@
-C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg3d.F,v 1.7 2001/03/08 20:45:53 jmc Exp $
+C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg3d.F,v 1.15 2005/02/04 19:30:33 jmc Exp $
 C $Name:  $
 
+#include "PACKAGES_CONFIG.h"
 #include "CPP_OPTIONS.h"
 
 #define VERBOSE
 
+CBOP
+C     !ROUTINE: CG3D
+C     !INTERFACE:
       SUBROUTINE CG3D(  
+     I                cg3d_b,
+     U                cg3d_x,
+     O                firstResidual,
+     O                lastResidual,
+     U                numIters,
      I                myThid )
-C     /==========================================================\
-C     | SUBROUTINE CG3D                                          |
-C     | o Three-dimensional grid problem conjugate-gradient      |
-C     |   inverter (with preconditioner).                        |
-C     |==========================================================|
-C     | Con. grad is an iterative procedure for solving Ax = b.  |
-C     | It requires the A be symmetric.                          |
-C     | This implementation assumes A is a five-diagonal         |
-C     | matrix of the form that arises in the discrete           |
-C     | representation of the del^2 operator in a                |
-C     | two-dimensional space.                                   |
-C     | Notes:                                                   |
-C     | ======                                                   |
-C     | This implementation can support shared-memory            | 
-C     | multi-threaded execution. In order to do this COMMON     | 
-C     | blocks are used for many of the arrays - even ones that  | 
-C     | are only used for intermedaite results. This design is   | 
-C     | OK if you want to all the threads to collaborate on      | 
-C     | solving the same problem. On the other hand if you want  | 
-C     | the threads to solve several different problems          | 
-C     | concurrently this implementation will not work.          |
-C     \==========================================================/
-      IMPLICIT NONE
+C     !DESCRIPTION: \bv
+C     *==========================================================*
+C     | SUBROUTINE CG3D                                           
+C     | o Three-dimensional grid problem conjugate-gradient       
+C     |   inverter (with preconditioner).                         
+C     *==========================================================*
+C     | Con. grad is an iterative procedure for solving Ax = b.   
+C     | It requires the A be symmetric.                           
+C     | This implementation assumes A is a seven-diagonal          
+C     | matrix of the form that arises in the discrete            
+C     | representation of the del^2 operator in a                 
+C     | three-dimensional space.                                    
+C     | Notes:                                                    
+C     | ======                                                    
+C     | This implementation can support shared-memory              
+C     | multi-threaded execution. In order to do this COMMON       
+C     | blocks are used for many of the arrays - even ones that    
+C     | are only used for intermedaite results. This design is     
+C     | OK if you want to all the threads to collaborate on        
+C     | solving the same problem. On the other hand if you want    
+C     | the threads to solve several different problems            
+C     | concurrently this implementation will not work.           
+C     *==========================================================*
+C     \ev
 
+C     !USES:
+      IMPLICIT NONE
 C     === Global data ===
 #include "SIZE.h"
 #include "EEPARAMS.h"
@@ -38,12 +50,26 @@
 #include "GRID.h"
 #include "CG3D.h"
 
+C     !INPUT/OUTPUT PARAMETERS:
 C     === Routine arguments ===
-C     myThid - Thread on which I am working.
+C     myThid    - Thread on which I am working.
+C     cg2d_b    - The source term or "right hand side"
+C     cg2d_x    - The solution
+C     firstResidual - the initial residual before any iterations
+C     lastResidual  - the actual residual reached
+C     numIters  - Entry: the maximum number of iterations allowed
+C                 Exit:  the actual number of iterations used
+      _RL  cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
+      _RL  cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
+      _RL  firstResidual
+      _RL  lastResidual
+      INTEGER numIters
       INTEGER myThid
 
+
 #ifdef ALLOW_NONHYDROSTATIC
 
+C     !LOCAL VARIABLES:
 C     === Local variables ====
 C     actualIts      - Number of iterations taken
 C     actualResidual - residual
@@ -64,12 +90,12 @@
       INTEGER bi, bj              
       INTEGER I, J, K, it3d
       INTEGER KM1, KP1
-      _RL    err
-      _RL    eta_qrN
+      _RL    err, errTile
+      _RL    eta_qrN, eta_qrNtile
       _RL    eta_qrNM1
       _RL    cgBeta
-      _RL    alpha
-      _RL    sumRHS
+      _RL    alpha , alphaTile
+      _RL    sumRHS, sumRHStile
       _RL    rhsMax
       _RL    rhsNorm
 
@@ -81,6 +107,10 @@
       INTEGER exchWidthY
       INTEGER myNz
       _RL     topLevTerm
+CEOP
+
+ceh3 needs an IF ( useNONHYDROSTATIC ) THEN
+
 
 C--   Initialise inverter
       eta_qrNM1 = 1. D0
@@ -124,6 +154,8 @@
       sumRHS = 0. _d 0
       DO bj=myByLo(myThid),myByHi(myThid)
        DO bi=myBxLo(myThid),myBxHi(myThid)
+        errTile    = 0. _d 0
+        sumRHStile = 0. _d 0
         DO K=1,Nr
          KM1 = K-1
          IF ( K .EQ. 1 ) KM1 = 1
@@ -150,13 +182,15 @@
      &     -aV3d(I  ,J  ,KP1,bi,bj)*cg3d_x(I  ,J  ,K  ,bi,bj)
      &     -topLevTerm*_rA(I,J,bi,bj)*cg3d_x(I,J,K,bi,bj)
      &     )
-           err = err
+           errTile = errTile
      &     +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
-           sumRHS = sumRHS
+           sumRHStile = sumRHStile
      &     +cg3d_b(I,J,K,bi,bj)
           ENDDO
          ENDDO
         ENDDO
+        err    = err    + errTile
+        sumRHS = sumRHS + sumRHStile
        ENDDO
       ENDDO
 C     _EXCH_XYZ_R8( cg3d_r, myThid )
@@ -186,26 +220,30 @@
       _GLOBAL_SUM_R8( sumRHS, myThid )
       _GLOBAL_SUM_R8( err   , myThid )
       
-      _BEGIN_MASTER( myThid )
-      write(0,'(A,1PE30.14)') ' cg3d: Sum(rhs) = ',sumRHS
-      _END_MASTER( )
+      IF ( debugLevel .GE. debLevZero ) THEN
+        _BEGIN_MASTER( myThid )
+        write(*,'(A,1P2E22.14)')
+     &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
+        _END_MASTER( myThid )
+      ENDIF
 
       actualIts      = 0
       actualResidual = SQRT(err)
 C     _BARRIER
-      _BEGIN_MASTER( myThid )
-       WRITE(0,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
-     &  actualIts, actualResidual
-      _END_MASTER( )
+c     _BEGIN_MASTER( myThid )
+c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
+c    &  actualIts, actualResidual
+c     _END_MASTER( myThid )
+      firstResidual=actualResidual
 
 C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
       DO 10 it3d=1, cg3dMaxIters
 
 CcnhDebugStarts
 #ifdef VERBOSE
-       IF ( mod(it3d-1,10).EQ.0)
-     &   WRITE(0,*) ' CG3D: Iteration ',it3d-1,
-     &      ' residual = ',actualResidual
+c      IF ( mod(it3d-1,10).EQ.0)
+c    &   WRITE(*,*) ' CG3D: Iteration ',it3d-1,
+c    &      ' residual = ',actualResidual
 #endif
 CcnhDebugEnds
        IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
@@ -218,6 +256,7 @@
        eta_qrN = 0. _d 0
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
+         eta_qrNtile = 0. _d 0
          DO K=1,1
           DO J=1-1,sNy+1
            DO I=1-1,sNx+1
@@ -247,7 +286,7 @@
 caja      ENDIF
           DO J=1,sNy
            DO I=1,sNx
-            eta_qrN = eta_qrN
+            eta_qrNtile = eta_qrNtile
      &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
            ENDDO
           ENDDO
@@ -262,11 +301,12 @@
           ENDDO
           DO J=1,sNy
            DO I=1,sNx
-            eta_qrN = eta_qrN
+            eta_qrNtile = eta_qrNtile
      &      +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
+         eta_qrN = eta_qrN + eta_qrNtile
         ENDDO
        ENDDO
 caja
@@ -287,11 +327,11 @@
 
        _GLOBAL_SUM_R8(eta_qrN, myThid)
 CcnhDebugStarts
-C      WRITE(0,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
+C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
 CcnhDebugEnds
        cgBeta   = eta_qrN/eta_qrNM1
 CcnhDebugStarts
-C      WRITE(0,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
+C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
 CcnhDebugEnds
        eta_qrNM1 = eta_qrN
 
@@ -315,6 +355,7 @@
      &      (horiVertRatio/gravity)/deltaTMom/deltaTMom
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
+         alphaTile = 0. _d 0
          IF ( Nr .GT. 1 ) THEN
           DO K=1,1
            DO J=1,sNy
@@ -331,7 +372,8 @@
      &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj)
-             alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
+             alphaTile = alphaTile
+     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
             ENDDO
            ENDDO
           ENDDO
@@ -349,7 +391,8 @@
      &      -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj)
-             alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
+             alphaTile = alphaTile
+     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
             ENDDO
            ENDDO
           ENDDO
@@ -370,7 +413,8 @@
      &     -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &     -aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &     -aV3d(I  ,J  ,K+1,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
-            alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
+            alphaTile = alphaTile
+     &                +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
@@ -389,20 +433,22 @@
      &      -aS3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -aS3d(I  ,J+1,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
      &      -aV3d(I  ,J  ,K  ,bi,bj)*cg3d_s(I  ,J  ,K  ,bi,bj)
-             alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
+             alphaTile = alphaTile
+     &                 +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
             ENDDO
            ENDDO
           ENDDO
          ENDIF
+         alpha = alpha + alphaTile
         ENDDO
        ENDDO
        _GLOBAL_SUM_R8(alpha,myThid)
 CcnhDebugStarts
-C      WRITE(0,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
+C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
 CcnhDebugEnds
        alpha = eta_qrN/alpha
 CcnhDebugStarts
-C      WRITE(0,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
+C      WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
 CcnhDebugEnds
      
 C==    Update solution and residual vectors
@@ -410,6 +456,7 @@
        err = 0. _d 0
        DO bj=myByLo(myThid),myByHi(myThid)
         DO bi=myBxLo(myThid),myBxHi(myThid)
+        errTile    = 0. _d 0
          DO K=1,Nr
           DO J=1,sNy
            DO I=1,sNx
@@ -417,10 +464,12 @@
      &            +alpha*cg3d_s(I,J,K,bi,bj)
             cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj)
      &            -alpha*cg3d_q(I,J,K,bi,bj)
-            err = err+cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
+           errTile = errTile
+     &             +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
            ENDDO
           ENDDO
          ENDDO
+         err = err + errTile
         ENDDO
        ENDDO
 
@@ -459,10 +508,12 @@
       ENDDO
 
 Cadj  _EXCH_XYZ_R8(cg3d_x, myThid )
-      _BEGIN_MASTER( myThid )
-       WRITE(0,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
-     &  actualIts, actualResidual
-      _END_MASTER( )
+c     _BEGIN_MASTER( myThid )
+c      WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
+c    &  actualIts, actualResidual
+c     _END_MASTER( myThid )
+      lastResidual=actualResidual
+      numIters=actualIts
 
 #endif /* ALLOW_NONHYDROSTATIC */