--- MITgcm/model/src/cg2d_nsa.F 2009/04/28 18:01:14 1.2 +++ MITgcm/model/src/cg2d_nsa.F 2009/07/11 21:45:44 1.3 @@ -1,18 +1,18 @@ -C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg2d_nsa.F,v 1.2 2009/04/28 18:01:14 jmc Exp $ +C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/model/src/cg2d_nsa.F,v 1.3 2009/07/11 21:45:44 jmc Exp $ C $Name: $ #include "CPP_OPTIONS.h" #ifdef ALLOW_USE_MPI C HACK to avoid global_max # define ALLOW_CONST_RHSMAX -#endif +#endif CML THIS DOES NOT WORK +++++ #undef ALLOW_LOOP_DIRECTIVE CBOP C !ROUTINE: CG2D_NSA C !INTERFACE: - SUBROUTINE CG2D_NSA( + SUBROUTINE CG2D_NSA( I cg2d_b, U cg2d_x, O firstResidual, @@ -21,30 +21,30 @@ I myThid ) C !DESCRIPTION: \bv C *==========================================================* -C | SUBROUTINE CG2D_NSA -C | o Two-dimensional grid problem conjugate-gradient -C | inverter (with preconditioner). +C | SUBROUTINE CG2D_NSA +C | o Two-dimensional grid problem conjugate-gradient +C | inverter (with preconditioner). C | o This version is used only in the case when the matrix -C | operator is not "self-adjoint" (NSA). Any remaining +C | operator is not "self-adjoint" (NSA). Any remaining C | residuals will immediately reported to the department C | of homeland security. C *==========================================================* -C | Con. grad is an iterative procedure for solving Ax = b. -C | It requires the A be symmetric. -C | This implementation assumes A is a five-diagonal -C | matrix of the form that arises in the discrete -C | representation of the del^2 operator in a -C | two-dimensional space. -C | Notes: -C | ====== -C | This implementation can support shared-memory -C | multi-threaded execution. In order to do this COMMON -C | blocks are used for many of the arrays - even ones that -C | are only used for intermedaite results. This design is -C | OK if you want to all the threads to collaborate on -C | solving the same problem. On the other hand if you want -C | the threads to solve several different problems -C | concurrently this implementation will not work. +C | Con. grad is an iterative procedure for solving Ax = b. +C | It requires the A be symmetric. +C | This implementation assumes A is a five-diagonal +C | matrix of the form that arises in the discrete +C | representation of the del^2 operator in a +C | two-dimensional space. +C | Notes: +C | ====== +C | This implementation can support shared-memory +C | multi-threaded execution. In order to do this COMMON +C | blocks are used for many of the arrays - even ones that +C | are only used for intermedaite results. This design is +C | OK if you want to all the threads to collaborate on +C | solving the same problem. On the other hand if you want +C | the threads to solve several different problems +C | concurrently this implementation will not work. C *==========================================================* C \ev @@ -54,9 +54,9 @@ #include "SIZE.h" #include "EEPARAMS.h" #include "PARAMS.h" -#include "GRID.h" #include "CG2D.h" -#include "SURFACE.h" +c#include "GRID.h" +c#include "SURFACE.h" #ifdef ALLOW_AUTODIFF_TAMC # include "tamc.h" # include "tamc_keys.h" @@ -64,13 +64,13 @@ C !INPUT/OUTPUT PARAMETERS: C === Routine arguments === -C myThid - Thread on which I am working. -C cg2d_b - The source term or "right hand side" -C cg2d_x - The solution -C firstResidual - the initial residual before any iterations -C lastResidual - the actual residual reached -C numIters - Entry: the maximum number of iterations allowed -C Exit: the actual number of iterations used +C cg2d_b :: The source term or "right hand side" +C cg2d_x :: The solution +C firstResidual :: the initial residual before any iterations +C lastResidual :: the actual residual reached +C numIters :: Entry: the maximum number of iterations allowed +C Exit: the actual number of iterations used +C myThid :: Thread on which I am working. _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL firstResidual @@ -81,26 +81,25 @@ #ifdef ALLOW_CG2D_NSA C !LOCAL VARIABLES: C === Local variables ==== -C actualIts - Number of iterations taken -C actualResidual - residual -C bi - Block index in X and Y. -C bj -C eta_qrN - Used in computing search directions +C actualIts :: Number of iterations taken +C actualResidual :: residual +C bi, bj :: Block index in X and Y. +C eta_qrN :: Used in computing search directions C eta_qrNM1 suffix N and NM1 denote current and C cgBeta previous iterations respectively. -C recip_eta_qrNM1 reciprocal of eta_qrNM1 -C alpha -C alpha_aux - to avoid the statement: alpha = 1./alpha (for TAMC/TAF) -C sumRHS - Sum of right-hand-side. Sometimes this is a +C recip_eta_qrNM1 :: reciprocal of eta_qrNM1 +C alpha +C alpha_aux :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF) +C sumRHS :: Sum of right-hand-side. Sometimes this is a C useful debuggin/trouble shooting diagnostic. C For neumann problems sumRHS needs to be ~0. C or they converge at a non-zero residual. -C err - Measure of residual of Ax - b, usually the norm. -C err_sq - square of err (for TAMC/TAF) -C I, J, N - Loop counters ( N counts CG iterations ) +C err :: Measure of residual of Ax - b, usually the norm. +C err_sq :: square of err (for TAMC/TAF) +C I, J, it2d :: Loop counters ( it2d counts CG iterations ) INTEGER actualIts _RL actualResidual - INTEGER bi, bj + INTEGER bi, bj INTEGER I, J, it2d _RL err @@ -119,7 +118,7 @@ #ifdef ALLOW_AUTODIFF_TAMC IF ( numIters .GT. numItersMax ) THEN - WRITE(standardMessageUnit,'(A,I10)') + WRITE(standardMessageUnit,'(A,I10)') & 'CG2D_NSA: numIters > numItersMax = ', numItersMax STOP 'NON-NORMAL in CG2D_NSA' ENDIF @@ -150,7 +149,7 @@ C-- Normalise RHS #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ rhsMax = 0. _d 0 DO bj=myByLo(myThid),myByHi(myThid) @@ -178,7 +177,7 @@ #endif /* ALLOW_CONST_RHSMAX */ #endif #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ IF ( rhsMaxGlobal .NE. 0. ) THEN rhsNorm = 1. _d 0 / rhsMaxGlobal @@ -187,8 +186,8 @@ ENDIF #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) @@ -204,8 +203,8 @@ ENDIF C-- Update overlaps - _EXCH_XY_RL( cg2d_b, myThid ) - _EXCH_XY_RL( cg2d_x, myThid ) +c CALL EXCH_XY_RL( cg2d_b, myThid ) + CALL EXCH_XY_RL( cg2d_x, myThid ) CcnhDebugStarts C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid ) C suff = 'normalised' @@ -217,28 +216,34 @@ err_sq = 0. _d 0 sumRHS = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) + DO J=1-1,sNy+1 + DO I=1-1,sNx+1 + cg2d_s(I,J,bi,bj) = 0. + ENDDO + ENDDO DO J=1,sNy DO I=1,sNx - cg2d_s(I,J,bi,bj) = 0. cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) - & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj) & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj) & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj) & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj) - & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj) - & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* - & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm -CML & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm + & +aC2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) & ) - err_sq = err_sq + +c & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) +c & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) +c & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj) +c & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj) +c & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* +c & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm +cML & cg2d_x(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm +c & ) + err_sq = err_sq + & cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj) sumRHS = sumRHS + & cg2d_b(I,J,bi,bj) @@ -247,32 +252,29 @@ ENDDO ENDDO -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_r ) - CALL EXCH_XY_O1_R8_JAM( cg2d_s ) -#else - _EXCH_XY_RL( cg2d_r, myThid ) - _EXCH_XY_RL( cg2d_s, myThid ) -#endif - _GLOBAL_SUM_RL( sumRHS, myThid ) - _GLOBAL_SUM_RL( err_sq, myThid ) - if ( err_sq .ne. 0. ) then +c CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_r, myThid ) + _GLOBAL_SUM_RL( sumRHS, myThid ) + _GLOBAL_SUM_RL( err_sq, myThid ) + IF ( err_sq .NE. 0. ) THEN err = SQRT(err_sq) - else + ELSE err = 0. - end if + ENDIF actualIts = 0 actualResidual = err - - _BEGIN_MASTER( myThid ) - write(standardMessageUnit,'(A,1P2E22.14)') - & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal - _END_MASTER( ) + + IF ( debugLevel .GE. debLevZero ) THEN + _BEGIN_MASTER( myThid ) + WRITE(standardmessageunit,'(A,1P2E22.14)') + & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMaxGlobal + _END_MASTER( myThid ) + ENDIF C _BARRIER c _BEGIN_MASTER( myThid ) -c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', +c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', c & actualIts, actualResidual -c _END_MASTER( ) +c _END_MASTER( myThid ) firstResidual=actualResidual cg2dTolerance_sq = cg2dTolerance**2 @@ -284,14 +286,14 @@ DO it2d=1, numIters #ifdef ALLOW_LOOP_DIRECTIVE CML it2d = 0 -CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters ) +CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters ) CML it2d = it2d+1 #endif /* ALLOW_LOOP_DIRECTIVE */ #ifdef ALLOW_AUTODIFF_TAMC icg2dkey = (ikey-1)*numItersMax + it2d -CMLCADJ STORE err = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CMLCADJ STORE err = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ CML IF ( err .LT. cg2dTolerance ) THEN IF ( err_sq .LT. cg2dTolerance_sq ) THEN @@ -306,14 +308,14 @@ C-- conjugate direction vector "s". eta_qrN = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx - cg2d_z(I,J,bi,bj) = + cg2d_z(I,J,bi,bj) = & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj) & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj) & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj) @@ -334,15 +336,15 @@ C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' eta_qrN = ',eta_qrN CcnhDebugEnds #ifdef ALLOW_AUTODIFF_TAMC -CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte -CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ CML cgBeta = eta_qrN/eta_qrNM1 cgBeta = eta_qrN*recip_eta_qrNM1 CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' beta = ',cgBeta CcnhDebugEnds -Cml store normalisation factor for the next interation +Cml store normalisation factor for the next interation Cml (in case there is one). CML store the inverse of the normalization factor for higher precision CML eta_qrNM1 = eta_qrN @@ -361,11 +363,8 @@ C-- Do exchanges that require messages i.e. between C-- processes. -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_s ) -#else - _EXCH_XY_RL( cg2d_s, myThid ) -#endif +c CALL EXCH_S3D_RL( cg2d_s, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_s, myThid ) C== Evaluate laplace operator on conjugate gradient vector C== q = A.s @@ -373,26 +372,27 @@ alpha_aux = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE -CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* not ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) DO bi=myBxLo(myThid),myBxHi(myThid) DO J=1,sNy DO I=1,sNx - cg2d_q(I,J,bi,bj) = + cg2d_q(I,J,bi,bj) = & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj) & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj) & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj) & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj) - & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj) - & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* - & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm -CML & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm - alpha_aux = alpha_aux+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj) + & +aC2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) +c & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) +c & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) +c & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj) +c & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj) +c & -freeSurfFac*_rA(i,j,bi,bj)*recip_Bo(i,j,bi,bj)* +c & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTfreesurf*cg2dNorm +cML & cg2d_s(I ,J ,bi,bj)/deltaTMom/deltaTMom*cg2dNorm + alpha_aux = alpha_aux+cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj) ENDDO ENDDO ENDDO @@ -405,14 +405,14 @@ CcnhDebugStarts C WRITE(*,*) ' CG2D_NSA: Iteration ',it2d-1,' alpha= ',alpha CcnhDebugEnds - + C== Update solution and residual vectors C Now compute "interior" points. err = 0. _d 0 err_sq = 0. _d 0 #ifdef ALLOW_AUTODIFF_TAMC #ifndef ALLOW_LOOP_DIRECTIVE -CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte +CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte #endif /* ALLOW_LOOP_DIRECTIVE */ #endif /* ALLOW_AUTODIFF_TAMC */ DO bj=myByLo(myThid),myByHi(myThid) @@ -436,12 +436,8 @@ actualIts = it2d actualResidual = err -#ifdef LETS_MAKE_JAM - CALL EXCH_XY_O1_R8_JAM( cg2d_r ) -#else - _EXCH_XY_RL( cg2d_r, myThid ) - _EXCH_XY_RL( cg2d_x, myThid ) -#endif +c CALL EXCH_S3D_RL( cg2d_r, 1, myThid ) + CALL EXCH_XY_RL ( cg2d_r, myThid ) Cml end of IF ( err .LT. cg2dTolerance ) THEN; ELSE ENDIF @@ -450,8 +446,8 @@ IF (cg2dNormaliseRHS) THEN #ifdef ALLOW_AUTODIFF_TAMC -CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte -CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE rhsNorm = comlev1_cg2d, key = ikey, byte = isbyte +CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte #endif /* ALLOW_AUTODIFF_TAMC */ C-- Un-normalise the answer DO bj=myByLo(myThid),myByHi(myThid) @@ -469,7 +465,7 @@ C for compatibility with TAMC. C _EXCH_XY_RL(cg2d_x, myThid ) c _BEGIN_MASTER( myThid ) -c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', +c WRITE(*,'(A,I6,1PE30.14)') ' CG2D_NSA iters, err = ', c & actualIts, actualResidual c _END_MASTER( ) @@ -496,7 +492,7 @@ character*(*) chardum integer int1, int2, int3, idow, icount -C the length of this vector must be greater or equal +C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING @@ -507,7 +503,7 @@ integer istoreidow(nidow) common /istorecommon/ istoreidow - print *, 'adstore: ', chardum, int1, idow, int2, int3, icount + print *, 'adstore: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow @@ -530,7 +526,7 @@ integer int1, int2, int3, idow, icount -C the length of this vector must be greater or equal +C the length of this vector must be greater or equal C twice the number of timesteps integer nidow #ifdef ALLOW_TAMC_CHECKPOINTING @@ -541,7 +537,7 @@ integer istoreidow(nidow) common /istorecommon/ istoreidow - print *, 'adresto: ', chardum, int1, idow, int2, int3, icount + print *, 'adresto: ', chardum, int1, idow, int2, int3, icount if ( icount .gt. nidow ) then print *, 'adstore: error: icount > nidow = ', nidow @@ -554,4 +550,3 @@ end # endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */ -