--- MITgcm/model/src/cg3d.F 2001/03/08 20:45:53 1.7 +++ MITgcm/model/src/cg3d.F 2005/02/04 19:30:33 1.15 @@ -1,36 +1,48 @@ -C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg3d.F,v 1.7 2001/03/08 20:45:53 jmc Exp $ +C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg3d.F,v 1.15 2005/02/04 19:30:33 jmc Exp $ C $Name: $ +#include "PACKAGES_CONFIG.h" #include "CPP_OPTIONS.h" #define VERBOSE +CBOP +C !ROUTINE: CG3D +C !INTERFACE: SUBROUTINE CG3D( + I cg3d_b, + U cg3d_x, + O firstResidual, + O lastResidual, + U numIters, I myThid ) -C /==========================================================\ -C | SUBROUTINE CG3D | -C | o Three-dimensional grid problem conjugate-gradient | -C | inverter (with preconditioner). | -C |==========================================================| -C | Con. grad is an iterative procedure for solving Ax = b. | -C | It requires the A be symmetric. | -C | This implementation assumes A is a five-diagonal | -C | matrix of the form that arises in the discrete | -C | representation of the del^2 operator in a | -C | two-dimensional space. | -C | Notes: | -C | ====== | -C | This implementation can support shared-memory | -C | multi-threaded execution. In order to do this COMMON | -C | blocks are used for many of the arrays - even ones that | -C | are only used for intermedaite results. This design is | -C | OK if you want to all the threads to collaborate on | -C | solving the same problem. On the other hand if you want | -C | the threads to solve several different problems | -C | concurrently this implementation will not work. | -C \==========================================================/ - IMPLICIT NONE +C !DESCRIPTION: \bv +C *==========================================================* +C | SUBROUTINE CG3D +C | o Three-dimensional grid problem conjugate-gradient +C | inverter (with preconditioner). +C *==========================================================* +C | Con. grad is an iterative procedure for solving Ax = b. +C | It requires the A be symmetric. +C | This implementation assumes A is a seven-diagonal +C | matrix of the form that arises in the discrete +C | representation of the del^2 operator in a +C | three-dimensional space. +C | Notes: +C | ====== +C | This implementation can support shared-memory +C | multi-threaded execution. In order to do this COMMON +C | blocks are used for many of the arrays - even ones that +C | are only used for intermedaite results. This design is +C | OK if you want to all the threads to collaborate on +C | solving the same problem. On the other hand if you want +C | the threads to solve several different problems +C | concurrently this implementation will not work. +C *==========================================================* +C \ev +C !USES: + IMPLICIT NONE C === Global data === #include "SIZE.h" #include "EEPARAMS.h" @@ -38,12 +50,26 @@ #include "GRID.h" #include "CG3D.h" +C !INPUT/OUTPUT PARAMETERS: C === Routine arguments === -C myThid - Thread on which I am working. +C myThid - Thread on which I am working. +C cg2d_b - The source term or "right hand side" +C cg2d_x - The solution +C firstResidual - the initial residual before any iterations +C lastResidual - the actual residual reached +C numIters - Entry: the maximum number of iterations allowed +C Exit: the actual number of iterations used + _RL cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) + _RL cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) + _RL firstResidual + _RL lastResidual + INTEGER numIters INTEGER myThid + #ifdef ALLOW_NONHYDROSTATIC +C !LOCAL VARIABLES: C === Local variables ==== C actualIts - Number of iterations taken C actualResidual - residual @@ -64,12 +90,12 @@ INTEGER bi, bj INTEGER I, J, K, it3d INTEGER KM1, KP1 - _RL err - _RL eta_qrN + _RL err, errTile + _RL eta_qrN, eta_qrNtile _RL eta_qrNM1 _RL cgBeta - _RL alpha - _RL sumRHS + _RL alpha , alphaTile + _RL sumRHS, sumRHStile _RL rhsMax _RL rhsNorm @@ -81,6 +107,10 @@ INTEGER exchWidthY INTEGER myNz _RL topLevTerm +CEOP + +ceh3 needs an IF ( useNONHYDROSTATIC ) THEN + C-- Initialise inverter eta_qrNM1 = 1. D0 @@ -124,6 +154,8 @@ sumRHS = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) + errTile = 0. _d 0 + sumRHStile = 0. _d 0 DO K=1,Nr KM1 = K-1 IF ( K .EQ. 1 ) KM1 = 1 @@ -150,13 +182,15 @@ & -aV3d(I ,J ,KP1,bi,bj)*cg3d_x(I ,J ,K ,bi,bj) & -topLevTerm*_rA(I,J,bi,bj)*cg3d_x(I,J,K,bi,bj) & ) - err = err + errTile = errTile & +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj) - sumRHS = sumRHS + sumRHStile = sumRHStile & +cg3d_b(I,J,K,bi,bj) ENDDO ENDDO ENDDO + err = err + errTile + sumRHS = sumRHS + sumRHStile ENDDO ENDDO C _EXCH_XYZ_R8( cg3d_r, myThid ) @@ -186,26 +220,30 @@ _GLOBAL_SUM_R8( sumRHS, myThid ) _GLOBAL_SUM_R8( err , myThid ) - _BEGIN_MASTER( myThid ) - write(0,'(A,1PE30.14)') ' cg3d: Sum(rhs) = ',sumRHS - _END_MASTER( ) + IF ( debugLevel .GE. debLevZero ) THEN + _BEGIN_MASTER( myThid ) + write(*,'(A,1P2E22.14)') + & ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax + _END_MASTER( myThid ) + ENDIF actualIts = 0 actualResidual = SQRT(err) C _BARRIER - _BEGIN_MASTER( myThid ) - WRITE(0,'(A,I6,1PE30.14)') ' CG3D iters, err = ', - & actualIts, actualResidual - _END_MASTER( ) +c _BEGIN_MASTER( myThid ) +c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ', +c & actualIts, actualResidual +c _END_MASTER( myThid ) + firstResidual=actualResidual C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< DO 10 it3d=1, cg3dMaxIters CcnhDebugStarts #ifdef VERBOSE - IF ( mod(it3d-1,10).EQ.0) - & WRITE(0,*) ' CG3D: Iteration ',it3d-1, - & ' residual = ',actualResidual +c IF ( mod(it3d-1,10).EQ.0) +c & WRITE(*,*) ' CG3D: Iteration ',it3d-1, +c & ' residual = ',actualResidual #endif CcnhDebugEnds IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11 @@ -218,6 +256,7 @@ eta_qrN = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) + eta_qrNtile = 0. _d 0 DO K=1,1 DO J=1-1,sNy+1 DO I=1-1,sNx+1 @@ -247,7 +286,7 @@ caja ENDIF DO J=1,sNy DO I=1,sNx - eta_qrN = eta_qrN + eta_qrNtile = eta_qrNtile & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj) ENDDO ENDDO @@ -262,11 +301,12 @@ ENDDO DO J=1,sNy DO I=1,sNx - eta_qrN = eta_qrN + eta_qrNtile = eta_qrNtile & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj) ENDDO ENDDO ENDDO + eta_qrN = eta_qrN + eta_qrNtile ENDDO ENDDO caja @@ -287,11 +327,11 @@ _GLOBAL_SUM_R8(eta_qrN, myThid) CcnhDebugStarts -C WRITE(0,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN +C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN CcnhDebugEnds cgBeta = eta_qrN/eta_qrNM1 CcnhDebugStarts -C WRITE(0,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta +C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta CcnhDebugEnds eta_qrNM1 = eta_qrN @@ -315,6 +355,7 @@ & (horiVertRatio/gravity)/deltaTMom/deltaTMom DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) + alphaTile = 0. _d 0 IF ( Nr .GT. 1 ) THEN DO K=1,1 DO J=1,sNy @@ -331,7 +372,8 @@ & -aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj) - alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) + alphaTile = alphaTile + & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) ENDDO ENDDO ENDDO @@ -349,7 +391,8 @@ & -aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -topLevTerm*_rA(I,J,bi,bj)*cg3d_s(I,J,K,bi,bj) - alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) + alphaTile = alphaTile + & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) ENDDO ENDDO ENDDO @@ -370,7 +413,8 @@ & -aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) - alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) + alphaTile = alphaTile + & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) ENDDO ENDDO ENDDO @@ -389,20 +433,22 @@ & -aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) & -aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj) - alpha = alpha+cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) + alphaTile = alphaTile + & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj) ENDDO ENDDO ENDDO ENDIF + alpha = alpha + alphaTile ENDDO ENDDO _GLOBAL_SUM_R8(alpha,myThid) CcnhDebugStarts -C WRITE(0,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha +C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha CcnhDebugEnds alpha = eta_qrN/alpha CcnhDebugStarts -C WRITE(0,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha +C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha CcnhDebugEnds C== Update solution and residual vectors @@ -410,6 +456,7 @@ err = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) + errTile = 0. _d 0 DO K=1,Nr DO J=1,sNy DO I=1,sNx @@ -417,10 +464,12 @@ & +alpha*cg3d_s(I,J,K,bi,bj) cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj) & -alpha*cg3d_q(I,J,K,bi,bj) - err = err+cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj) + errTile = errTile + & +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj) ENDDO ENDDO ENDDO + err = err + errTile ENDDO ENDDO @@ -459,10 +508,12 @@ ENDDO Cadj _EXCH_XYZ_R8(cg3d_x, myThid ) - _BEGIN_MASTER( myThid ) - WRITE(0,'(A,I6,1PE30.14)') ' CG3D iters, err = ', - & actualIts, actualResidual - _END_MASTER( ) +c _BEGIN_MASTER( myThid ) +c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ', +c & actualIts, actualResidual +c _END_MASTER( myThid ) + lastResidual=actualResidual + numIters=actualIts #endif /* ALLOW_NONHYDROSTATIC */