SUBROUTINE CG2D C /==========================================================\ C | SUBROUTINE CG2D | C | o Two-dimensional grid problem conjugate-gradient | C | inverter (with preconditioner). | C |==========================================================| C | Con. grad is an iterative procedure for solving Ax = b. | C | It requires the A be symmetric. | C | This implementation assumes A is a five-diagonal | C | matrix of the form that arises in the discrete | C | representation of the del^2 operator in a | C | two-dimensional space. | C | Notes: | C | ====== | C | This implementation can support shared-memory | C | multi-threaded execution. In order to do this COMMON | C | blocks are used for many of the arrays - even ones that | C | are only used for intermedaite results. This design is | C | OK if you want to all the threads to collaborate on | C | solving the same problem. On the other hand if you want | C | the threads to solve several different problems | C | concurrently this implementation will not work. | C \==========================================================/ IMPLICIT NONE C === Global data === #include "SIZE.h" #include "EEPARAMS.h" #include "PARAMS.h" #include "CG2D.h" C === Routine arguments === C myThid - Thread on which I am working. INTEGER myThid C === Local variables ==== C actualIts - Number of iterations taken C actualResidual - residual C bi - Block index in X and Y. C bj C etaN - Used in computing search directions C etaNM1 suffix N and NM1 denote current and C beta previous iterations respectively. C alpha C sumRHS - Sum of right-hand-side. Sometimes this is a C useful debuggin/trouble shooting diagnostic. C For neumann problems sumRHS needs to be ~0. C or they converge at a non-zero residual. C err - Measure of residual of Ax - b, usually the norm. C I, J, N - Loop counters ( N counts CG iterations ) INTEGER actualIts REAL actualResidual INTEGER bi, bj INTEGER I, J, N REAL err REAL errSum REAL etaN REAL etaNM1 REAL etaNSum REAL beta REAL alpha REAL alphaSum REAL sumRHS REAL temp C-- Initialise inverter etaNM1 = 1. D0 C-- Initial residual calculation err = 0. _d 0 sumRHS = 0. _d 0 DO J=1,sNy DO I=1,sNx cg2d_s(I,J) = 0. cg2d_r(I,J) = cg2d_b(I,J) - & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J ) & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1) & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J ) & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J ) & ) err = err + cg2d_r(I,J)*cg2d_r(I,J) sumRHS = sumRHS + cg2d_b(I,J) ENDDO ENDDO CALL EXCH_XY_R8( cg2d_r ) CALL EXCH_XY_R8( cg2d_s ) CALL GSUM_R8( temp, err ) err = temp CALL GSUM_R8( temp, sumRHS ) sumRHS = temp actualIts = 0 actualResidual = SQRT(err) WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual IF ( actualResidual .EQ. 0. ) STOP 'ABNORMAL END: RESIDUAL 0' C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< DO 10 N=1, cg2dMaxIters C-- Solve preconditioning equation and update C-- conjugate direction vector "s". etaN = 0. _d 0 DO J=1,sNy DO I=1,sNx cg2d_q(I,J) = & pW(I ,J )*cg2d_r(I-1,J )+pW(I+1,J )*cg2d_r(I+1,J ) & +pS(I ,J )*cg2d_r(I ,J-1)+pS(I ,J+1)*cg2d_r(I ,J+1) & +pC(I ,J )*cg2d_r(I ,J ) etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J) ENDDO ENDDO CALL GSUM_R8( temp, etaN ) etaN = temp beta = etaN/etaNM1 etaNM1 = etaN DO J=1,sNy DO I=1,sNx cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J) ENDDO ENDDO C-- Do exchanges that require messages i.e. between C-- processes. CALL EXCH_XY_R8( cg2d_s ) C-- Ten extra exchanges #ifdef TEN_EXTRA_EXCHS CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) CALL EXCH_XY_R8( cg2d_s ) #endif C== Evaluate laplace operator on conjugate gradient vector C== q = A.s alpha = 0. _d 0 DO J=1,sNy DO I=1,sNx cg2d_q(I,J) = & aW2d(I ,J )*cg2d_s(I-1,J )+aW2d(I+1,J )*cg2d_s(I+1,J ) & +aS2d(I ,J )*cg2d_s(I ,J-1)+aS2d(I ,J+1)*cg2d_s(I ,J+1) & -aW2d(I ,J )*cg2d_s(I ,J )-aW2d(I+1,J )*cg2d_s(I ,J ) & -aS2d(I ,J )*cg2d_s(I ,J )-aS2d(I ,J+1)*cg2d_s(I ,J ) alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J) ENDDO ENDDO CALL GSUM_R8( temp, alpha ) #ifdef HUNDRED_EXTRA_SUMS C-- Hundred extra global sums CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) CALL GSUM_R8( temp, alpha ) #endif alpha = temp alpha = etaN/alpha C== Update solution and residual vectors C Now compute "interior" points. err = 0. _d 0 DO J=1,sNy DO I=1,sNx cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J) cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J) err = err+cg2d_r(I,J)*cg2d_r(I,J) ENDDO ENDDO CALL GSUM_R8( temp, err ) err = temp err = SQRT(err) actualIts = N actualResidual = err CcnhDebugStarts C WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual CcnhDebugEnds IF ( err .LT. cg2dTargetResidual ) GOTO 11 CALL EXCH_XY_R8(cg2d_r ) 10 CONTINUE 11 CONTINUE CALL EXCH_XY_R8(cg2d_x ) WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual C Calc Ax to check result DO J=1,sNy DO I=1,sNx cg2d_Ax(I,J) = & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J ) & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1) & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J ) & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J ) & ) cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J) ENDDO ENDDO CALL EXCH_XY_R8(cg2d_Ax ) CALL EXCH_XY_R8(cg2d_r ) END