--- MITgcm/model/src/cg2d_nsa.F 2006/06/07 01:45:42 1.1 +++ MITgcm/model/src/cg2d_nsa.F 2012/07/17 21:31:52 1.5 @@ -1,50 +1,44 @@ -C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg2d_nsa.F,v 1.1 2006/06/07 01:45:42 heimbach Exp $ +C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg2d_nsa.F,v 1.5 2012/07/17 21:31:52 jmc Exp $ C $Name: $ #include "CPP_OPTIONS.h" -#ifdef ALLOW_USE_MPI -C HACK to avoid global_max -# define ALLOW_CONST_RHSMAX -#endif CML THIS DOES NOT WORK +++++ #undef ALLOW_LOOP_DIRECTIVE CBOP C !ROUTINE: CG2D_NSA C !INTERFACE: - SUBROUTINE CG2D_NSA( - I cg2d_b, - U cg2d_x, - O firstResidual, - O lastResidual, - U numIters, + SUBROUTINE CG2D_NSA( + U cg2d_b, cg2d_x, + O firstResidual, minResidualSq, lastResidual, + U numIters, nIterMin, I myThid ) C !DESCRIPTION: \bv C *==========================================================* -C | SUBROUTINE CG2D_NSA -C | o Two-dimensional grid problem conjugate-gradient -C | inverter (with preconditioner). +C | SUBROUTINE CG2D_NSA +C | o Two-dimensional grid problem conjugate-gradient +C | inverter (with preconditioner). C | o This version is used only in the case when the matrix -C | operator is not "self-adjoint" (NSA). Any remaining +C | operator is not "self-adjoint" (NSA). Any remaining C | residuals will immediately reported to the department C | of homeland security. C *==========================================================* -C | Con. grad is an iterative procedure for solving Ax = b. -C | It requires the A be symmetric. -C | This implementation assumes A is a five-diagonal -C | matrix of the form that arises in the discrete -C | representation of the del^2 operator in a -C | two-dimensional space. -C | Notes: -C | ====== -C | This implementation can support shared-memory -C | multi-threaded execution. In order to do this COMMON -C | blocks are used for many of the arrays - even ones that -C | are only used for intermedaite results. This design is -C | OK if you want to all the threads to collaborate on -C | solving the same problem. On the other hand if you want -C | the threads to solve several different problems -C | concurrently this implementation will not work. +C | Con. grad is an iterative procedure for solving Ax = b. +C | It requires the A be symmetric. +C | This implementation assumes A is a five-diagonal +C | matrix of the form that arises in the discrete +C | representation of the del^2 operator in a +C | two-dimensional space. +C | Notes: +C | ====== +C | This implementation can support shared-memory +C | multi-threaded execution. In order to do this COMMON +C | blocks are used for many of the arrays - even ones that +C | are only used for intermedaite results. This design is +C | OK if you want to all the threads to collaborate on +C | solving the same problem. On the other hand if you want +C | the threads to solve several different problems +C | concurrently this implementation will not work. C *==========================================================* C \ev @@ -54,9 +48,7 @@ #include "SIZE.h" #include "EEPARAMS.h" #include "PARAMS.h" -#include "GRID.h" #include "CG2D.h" -#include "SURFACE.h" #ifdef ALLOW_AUTODIFF_TAMC # include "tamc.h" # include "tamc_keys.h" @@ -64,71 +56,75 @@ C !INPUT/OUTPUT PARAMETERS: C === Routine arguments === -C myThid - Thread on which I am working. -C cg2d_b - The source term or "right hand side" -C cg2d_x - The solution -C firstResidual - the initial residual before any iterations -C lastResidual - the actual residual reached -C numIters - Entry: the maximum number of iterations allowed -C Exit: the actual number of iterations used +C cg2d_b :: The source term or "right hand side" (Output: normalised RHS) +C cg2d_x :: The solution (Input: first guess) +C firstResidual :: the initial residual before any iterations +C minResidualSq :: the lowest residual reached (squared) +C lastResidual :: the actual residual reached +C numIters :: Inp: the maximum number of iterations allowed +C Out: the actual number of iterations used +C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol. +C Out: iteration number corresponding to lowest residual +C myThid :: Thread on which I am working. _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL firstResidual + _RL minResidualSq _RL lastResidual INTEGER numIters + INTEGER nIterMin INTEGER myThid #ifdef ALLOW_CG2D_NSA C !LOCAL VARIABLES: C === Local variables ==== -C actualIts - Number of iterations taken -C actualResidual - residual -C bi - Block index in X and Y. -C bj -C eta_qrN - Used in computing search directions -C eta_qrNM1 suffix N and NM1 denote current and -C cgBeta previous iterations respectively. -C recip_eta_qrNM1 reciprocal of eta_qrNM1 -C alpha -C alpha_aux - to avoid the statement: alpha = 1./alpha (for TAMC/TAF) -C sumRHS - Sum of right-hand-side. Sometimes this is a -C useful debuggin/trouble shooting diagnostic. -C For neumann problems sumRHS needs to be ~0. -C or they converge at a non-zero residual. -C err - Measure of residual of Ax - b, usually the norm. -C err_sq - square of err (for TAMC/TAF) -C I, J, N - Loop counters ( N counts CG iterations ) +C bi, bj :: tile index in X and Y. +C i, j, it2d :: Loop counters ( it2d counts CG iterations ) +C actualIts :: actual CG iteration number +C err_sq :: Measure of the square of the residual of Ax - b. +C eta_qrN :: Used in computing search directions; suffix N and NM1 +C eta_qrNM1 denote current and previous iterations respectively. +C recip_eta_qrNM1 :: reciprocal of eta_qrNM1 +C cgBeta :: coeff used to update conjugate direction vector "s". +C alpha :: coeff used to update solution & residual +C alphaSum :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF) +C sumRHS :: Sum of right-hand-side. Sometimes this is a useful +C debugging/trouble shooting diagnostic. For neumann problems +C sumRHS needs to be ~0 or it converge at a non-zero residual. +C cg2d_min :: used to store solution corresponding to lowest residual. +C msgBuf :: Informational/error message buffer + INTEGER bi, bj + INTEGER i, j, it2d INTEGER actualIts - _RL actualResidual - INTEGER bi, bj - INTEGER I, J, it2d - - _RL err - _RL err_sq - _RL eta_qrN - _RL eta_qrNM1 - _RL recip_eta_qrNM1 - _RL cgBeta - _RL alpha - _RL alpha_aux - _RL sumRHS - _RL rhsMax, rhsMaxGlobal - _RL rhsNorm _RL cg2dTolerance_sq + _RL err_sq, errTile(nSx,nSy) + _RL eta_qrN, eta_qrNtile(nSx,nSy) + _RL eta_qrNM1, recip_eta_qrNM1 + _RL cgBeta, alpha + _RL alphaSum,alphaTile(nSx,nSy) + _RL sumRHS, sumRHStile(nSx,nSy) + _RL rhsMax, rhsNorm + CHARACTER*(MAX_LEN_MBUF) msgBuf + LOGICAL printResidual CEOP #ifdef ALLOW_AUTODIFF_TAMC IF ( numIters .GT. numItersMax ) THEN - WRITE(standardMessageUnit,'(A,I10)') - & 'CG2D_NSA: numIters > numItersMax = ', numItersMax - STOP 'NON-NORMAL in CG2D_NSA' + WRITE(msgBuf,'(A,I10)') + & 'CG2D_NSA: numIters > numItersMax =', numItersMax + CALL PRINT_ERROR( msgBuf, myThid ) + STOP 'ABNORMAL END: S/R CG2D_NSA' + ENDIF + IF ( cg2dNormaliseRHS ) THEN + WRITE(msgBuf,'(A)') 'CG2D_NSA: cg2dNormaliseRHS is disabled' + CALL PRINT_ERROR( msgBuf, myThid ) + WRITE(msgBuf,'(A)') + & 'set cg2dTargetResWunit (instead of cg2dTargetResidual)' + CALL PRINT_ERROR( msgBuf, myThid ) + STOP 'ABNORMAL END: S/R CG2D_NSA' ENDIF #endif /* ALLOW_AUTODIFF_TAMC */ -CcnhDebugStarts -C CHARACTER*(MAX_LEN_FNAM) suff -CcnhDebugEnds - #ifdef ALLOW_AUTODIFF_TAMC act1 = myThid - 1 max1 = nTx*nTy @@ -136,145 +132,98 @@ ikey = (act1 + 1) + act2*max1 #endif /* ALLOW_AUTODIFF_TAMC */ -C-- Initialise inverter - eta_qrNM1 = 1. _d 0 - recip_eta_qrNM1 = 1./eta_qrNM1 - -CcnhDebugStarts -C _EXCH_XY_R8( cg2d_b, myThid ) -C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid ) -C suff = 'unnormalised' -C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid) -C STOP -CcnhDebugEnds +C-- Initialise auxiliary constant, some output variable and inverter + cg2dTolerance_sq = cg2dTolerance*cg2dTolerance + minResidualSq = -1. _d 0 + eta_qrNM1 = 1. _d 0 + recip_eta_qrNM1= 1. _d 0 C-- Normalise RHS -#ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte -#endif /* ALLOW_AUTODIFF_TAMC */ rhsMax = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm - rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax) + DO j=1,sNy + DO i=1,sNx + cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm + rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax) ENDDO ENDDO ENDDO ENDDO +#ifndef ALLOW_AUTODIFF_TAMC IF (cg2dNormaliseRHS) THEN -C - Normalise RHS : -#ifdef LETS_MAKE_JAM -C _GLOBAL_MAX_R8( rhsMax, myThid ) - rhsMaxGlobal=1. -#else -#ifdef ALLOW_CONST_RHSMAX - rhsMaxGlobal=1. -#else - rhsMaxGlobal=rhsMax - _GLOBAL_MAX_R8( rhsMaxGlobal, myThid ) -#endif /* ALLOW_CONST_RHSMAX */ -#endif -#ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte -#endif /* ALLOW_AUTODIFF_TAMC */ - IF ( rhsMaxGlobal .NE. 0. ) THEN - rhsNorm = 1. _d 0 / rhsMaxGlobal - ELSE +C- Normalise RHS : + _GLOBAL_MAX_RL( rhsMax, myThid ) rhsNorm = 1. _d 0 - ENDIF - -#ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte -#endif /* ALLOW_AUTODIFF_TAMC */ - DO bj=myByLo(myThid),myByHi(myThid) - DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm - cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm + IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax + DO bj=myByLo(myThid),myByHi(myThid) + DO bi=myBxLo(myThid),myBxHi(myThid) + DO j=1,sNy + DO i=1,sNx + cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm + cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm + ENDDO ENDDO ENDDO ENDDO - ENDDO -C- end Normalise RHS +C- end Normalise RHS ENDIF +#endif /* ndef ALLOW_AUTODIFF_TAMC */ C-- Update overlaps - _EXCH_XY_R8( cg2d_b, myThid ) - _EXCH_XY_R8( cg2d_x, myThid ) -CcnhDebugStarts -C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid ) -C suff = 'normalised' -C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid) -CcnhDebugEnds + CALL EXCH_XY_RL( cg2d_x, myThid ) C-- Initial residual calculation - err = 0. _d 0 - err_sq = 0. _d 0 - sumRHS = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_s(I,J,bi,bj) = 0. - cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) - - & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj) - & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj) - & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj) - & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj) - & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* - & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm -CML & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm + errTile(bi,bj) = 0. _d 0 + sumRHStile(bi,bj) = 0. _d 0 + DO j=0,sNy+1 + DO i=0,sNx+1 + cg2d_s(i,j,bi,bj) = 0. + ENDDO + ENDDO + DO j=1,sNy + DO i=1,sNx + cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) - + & (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj) + & +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj) + & +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj) + & +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj) + & +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj) & ) - err_sq = err_sq + - & cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj) - sumRHS = sumRHS + - & cg2d_b(I,J,bi,bj) + errTile(bi,bj) = errTile(bi,bj) + & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj) + sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_r ) - CALL EXCH_XY_O1_R8_JAM( cg2d_s ) -#else - _EXCH_XY_R8( cg2d_r, myThid ) - _EXCH_XY_R8( cg2d_s, myThid ) -#endif - _GLOBAL_SUM_R8( sumRHS, myThid ) - _GLOBAL_SUM_R8( err_sq, myThid ) - if ( err_sq .ne. 0. ) then - err = SQRT(err_sq) - else - err = 0. - end if - actualIts = 0 - actualResidual = err - - _BEGIN_MASTER( myThid ) - write(standardMessageUnit,'(A,1P2E22.14)') - & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal - _END_MASTER( ) -C _BARRIER -c _BEGIN_MASTER( myThid ) -c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', -c & actualIts, actualResidual -c _END_MASTER( ) - firstResidual=actualResidual - cg2dTolerance_sq = cg2dTolerance**2 +c CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_r, myThid ) + CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid ) + CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid ) + actualIts = 0 + IF ( err_sq .NE. 0. ) THEN + firstResidual = SQRT(err_sq) + ELSE + firstResidual = 0. + ENDIF + + printResidual = .FALSE. + IF ( debugLevel .GE. debLevZero ) THEN + _BEGIN_MASTER( myThid ) + printResidual = printResidualFreq.GE.1 + WRITE(standardmessageunit,'(A,1P2E22.14)') + & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax + _END_MASTER( myThid ) + ENDIF C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Cml begin main solver loop @@ -284,76 +233,58 @@ DO it2d=1, numIters #ifdef ALLOW_LOOP_DIRECTIVE CML it2d = 0 -CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters ) +CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters ) CML it2d = it2d+1 #endif /* ALLOW_LOOP_DIRECTIVE */ #ifdef ALLOW_AUTODIFF_TAMC icg2dkey = (ikey-1)*numItersMax + it2d -CMLCADJ STORE err = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ -CML IF ( err .LT. cg2dTolerance ) THEN - IF ( err_sq .LT. cg2dTolerance_sq ) THEN -Cml DO NOTHING - ELSE + IF ( err_sq .GE. cg2dTolerance_sq ) THEN -CcnhDebugStarts -C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' residual = ', -C & actualResidual -CcnhDebugEnds C-- Solve preconditioning equation and update C-- conjugate direction vector "s". - eta_qrN = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_z(I,J,bi,bj) = - & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj) - & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj) - & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj) - & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj) - & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj) -CcnhDebugStarts -C cg2d_z(I,J,bi,bj) = cg2d_r(I ,J ,bi,bj) -CcnhDebugEnds - eta_qrN = eta_qrN - & +cg2d_z(I,J,bi,bj)*cg2d_r(I,J,bi,bj) + eta_qrNtile(bi,bj) = 0. _d 0 + DO j=1,sNy + DO i=1,sNx + cg2d_z(i,j,bi,bj) = + & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj) + & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj) + & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj) + & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj) + & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj) + eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj) + & +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO - _GLOBAL_SUM_R8(eta_qrN, myThid) -CcnhDebugStarts -C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' eta_qrN = ',eta_qrN -CcnhDebugEnds + CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid ) #ifdef ALLOW_AUTODIFF_TAMC -CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ CML cgBeta = eta_qrN/eta_qrNM1 cgBeta = eta_qrN*recip_eta_qrNM1 -CcnhDebugStarts -C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' beta = ',cgBeta -CcnhDebugEnds -Cml store normalisation factor for the next interation -Cml (in case there is one). +Cml store normalisation factor for the next interation (in case there is one). CML store the inverse of the normalization factor for higher precision CML eta_qrNM1 = eta_qrN - recip_eta_qrNM1 = 1./eta_qrN + recip_eta_qrNM1 = 1. _d 0/eta_qrN DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_s(I,J,bi,bj) = cg2d_z(I,J,bi,bj) - & + cgBeta*cg2d_s(I,J,bi,bj) + DO j=1,sNy + DO i=1,sNx + cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj) + & + cgBeta*cg2d_s(i,j,bi,bj) ENDDO ENDDO ENDDO @@ -361,131 +292,107 @@ C-- Do exchanges that require messages i.e. between C-- processes. -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_s ) -#else - _EXCH_XY_R8( cg2d_s, myThid ) -#endif +c CALL EXCH_S3D_RL( cg2d_s, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_s, myThid ) C== Evaluate laplace operator on conjugate gradient vector C== q = A.s - alpha = 0. _d 0 - alpha_aux = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE -CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* not ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_q(I,J,bi,bj) = - & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj) - & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj) - & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj) - & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj) - & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* - & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm -CML & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm - alpha_aux = alpha_aux+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj) + alphaTile(bi,bj) = 0. _d 0 + DO j=1,sNy + DO i=1,sNx + cg2d_q(i,j,bi,bj) = + & aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj) + & +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj) + & +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj) + & +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj) + & +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj) + alphaTile(bi,bj) = alphaTile(bi,bj) + & + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO - _GLOBAL_SUM_R8(alpha_aux,myThid) -CcnhDebugStarts -C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' SUM(s*q)= ',alpha_aux -CcnhDebugEnds - alpha = eta_qrN/alpha_aux -CcnhDebugStarts -C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' alpha= ',alpha -CcnhDebugEnds - -C== Update solution and residual vectors + CALL GLOBAL_SUM_TILE_RL( alphaTile, alphaSum, myThid ) + alpha = eta_qrN/alphaSum + +C== Update simultaneously solution and residual vectors (and Iter number) C Now compute "interior" points. - err = 0. _d 0 - err_sq = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE -CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj) - cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj) - err_sq = err_sq+cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj) + errTile(bi,bj) = 0. _d 0 + DO j=1,sNy + DO i=1,sNx + cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj) + cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj) + errTile(bi,bj) = errTile(bi,bj) + & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj) ENDDO ENDDO ENDDO ENDDO + actualIts = it2d - _GLOBAL_SUM_R8( err_sq , myThid ) - if ( err_sq .ne. 0. ) then - err = SQRT(err_sq) - else - err = 0. - end if - actualIts = it2d - actualResidual = err + CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid ) + IF ( printResidual ) THEN + IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN + WRITE(msgBuf,'(A,I6,A,1PE21.14)') + & ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq) + CALL PRINT_MESSAGE( msgBuf, standardMessageUnit, + & SQUEEZE_RIGHT, myThid ) + ENDIF + ENDIF -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_r ) -#else - _EXCH_XY_R8( cg2d_r, myThid ) - _EXCH_XY_R8( cg2d_x, myThid ) -#endif +c CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_r, myThid ) -Cml end of IF ( err .LT. cg2dTolerance ) THEN; ELSE +Cml end of if "err >= cg2dTolerance" block ; end main solver loop ENDIF -Cml end main solver loop ENDDO +#ifndef ALLOW_AUTODIFF_TAMC IF (cg2dNormaliseRHS) THEN -#ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte -#endif /* ALLOW_AUTODIFF_TAMC */ C-- Un-normalise the answer DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) - DO J=1,sNy - DO I=1,sNx - cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm + DO j=1,sNy + DO i=1,sNx + cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm ENDDO ENDDO ENDDO ENDDO ENDIF - -C The following exchange was moved up to solve_for_pressure -C for compatibility with TAMC. -C _EXCH_XY_R8(cg2d_x, myThid ) -c _BEGIN_MASTER( myThid ) -c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', -c & actualIts, actualResidual -c _END_MASTER( ) +#endif /* ndef ALLOW_AUTODIFF_TAMC */ C-- Return parameters to caller - lastResidual=actualResidual - numIters=actualIts + IF ( err_sq .NE. 0. ) THEN + lastResidual = SQRT(err_sq) + ELSE + lastResidual = 0. + ENDIF + numIters = actualIts #endif /* ALLOW_CG2D_NSA */ RETURN END -# if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE)) -C +#if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE)) + C These routines are routinely part of the TAMC/TAF library that is C not included in the MITcgm, therefore they are mimicked here. -C + subroutine adstore(chardum,int1,idow,int2,int3,icount) implicit none @@ -496,7 +403,7 @@ character*(*) chardum integer int1, int2, int3, idow, icount -C the length of this vector must be greater or equal +C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING @@ -507,7 +414,7 @@ integer istoreidow(nidow) common /istorecommon/ istoreidow - print *, 'adstore: ', chardum, int1, idow, int2, int3, icount + print *, 'adstore: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow @@ -529,8 +436,7 @@ character*(*) chardum integer int1, int2, int3, idow, icount - -C the length of this vector must be greater or equal +C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING @@ -541,7 +447,7 @@ integer istoreidow(nidow) common /istorecommon/ istoreidow - print *, 'adresto: ', chardum, int1, idow, int2, int3, icount + print *, 'adresto: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow @@ -552,6 +458,4 @@ return end -# endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */ - - +#endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */