| 1 | 
ce107 | 
1.3 | 
C       $Id: cg2d.F,v 1.2 2006/05/12 22:21:21 ce107 Exp $        | 
| 2 | 
ce107 | 
1.1 | 
      SUBROUTINE CG2D | 
| 3 | 
  | 
  | 
C     /==========================================================\ | 
| 4 | 
  | 
  | 
C     | SUBROUTINE CG2D                                          | | 
| 5 | 
  | 
  | 
C     | o Two-dimensional grid problem conjugate-gradient        | | 
| 6 | 
  | 
  | 
C     |   inverter (with preconditioner).                        | | 
| 7 | 
  | 
  | 
C     |==========================================================| | 
| 8 | 
  | 
  | 
C     | Con. grad is an iterative procedure for solving Ax = b.  | | 
| 9 | 
  | 
  | 
C     | It requires the A be symmetric.                          | | 
| 10 | 
  | 
  | 
C     | This implementation assumes A is a five-diagonal         | | 
| 11 | 
  | 
  | 
C     | matrix of the form that arises in the discrete           | | 
| 12 | 
  | 
  | 
C     | representation of the del^2 operator in a                | | 
| 13 | 
  | 
  | 
C     | two-dimensional space.                                   | | 
| 14 | 
  | 
  | 
C     | Notes:                                                   | | 
| 15 | 
  | 
  | 
C     | ======                                                   | | 
| 16 | 
  | 
  | 
C     | This implementation can support shared-memory            |  | 
| 17 | 
  | 
  | 
C     | multi-threaded execution. In order to do this COMMON     |  | 
| 18 | 
  | 
  | 
C     | blocks are used for many of the arrays - even ones that  |  | 
| 19 | 
  | 
  | 
C     | are only used for intermedaite results. This design is   |  | 
| 20 | 
  | 
  | 
C     | OK if you want to all the threads to collaborate on      |  | 
| 21 | 
  | 
  | 
C     | solving the same problem. On the other hand if you want  |  | 
| 22 | 
  | 
  | 
C     | the threads to solve several different problems          |  | 
| 23 | 
  | 
  | 
C     | concurrently this implementation will not work.          | | 
| 24 | 
  | 
  | 
C     \==========================================================/ | 
| 25 | 
  | 
  | 
      IMPLICIT NONE | 
| 26 | 
  | 
  | 
 | 
| 27 | 
  | 
  | 
C     === Global data === | 
| 28 | 
  | 
  | 
#include "SIZE.h" | 
| 29 | 
  | 
  | 
#include "EEPARAMS.h" | 
| 30 | 
  | 
  | 
#include "PARAMS.h" | 
| 31 | 
  | 
  | 
#include "CG2D.h" | 
| 32 | 
ce107 | 
1.2 | 
#if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS) | 
| 33 | 
  | 
  | 
#if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP) | 
| 34 | 
  | 
  | 
#include "PAPI.h" | 
| 35 | 
  | 
  | 
#endif | 
| 36 | 
  | 
  | 
#endif | 
| 37 | 
ce107 | 
1.1 | 
 | 
| 38 | 
  | 
  | 
C     === Routine arguments === | 
| 39 | 
  | 
  | 
C     myThid - Thread on which I am working. | 
| 40 | 
  | 
  | 
      INTEGER myThid | 
| 41 | 
  | 
  | 
 | 
| 42 | 
  | 
  | 
C     === Local variables ==== | 
| 43 | 
  | 
  | 
C     actualIts      - Number of iterations taken | 
| 44 | 
  | 
  | 
C     actualResidual - residual | 
| 45 | 
  | 
  | 
C     bi          - Block index in X and Y. | 
| 46 | 
  | 
  | 
C     bj | 
| 47 | 
  | 
  | 
C     etaN        - Used in computing search directions | 
| 48 | 
  | 
  | 
C     etaNM1        suffix N and NM1 denote current and | 
| 49 | 
  | 
  | 
C     beta          previous iterations respectively. | 
| 50 | 
  | 
  | 
C     alpha   | 
| 51 | 
  | 
  | 
C     sumRHS      - Sum of right-hand-side. Sometimes this is a | 
| 52 | 
  | 
  | 
C                   useful debuggin/trouble shooting diagnostic. | 
| 53 | 
  | 
  | 
C                   For neumann problems sumRHS needs to be ~0. | 
| 54 | 
  | 
  | 
C                   or they converge at a non-zero residual. | 
| 55 | 
  | 
  | 
C     err         - Measure of residual of Ax - b, usually the norm. | 
| 56 | 
  | 
  | 
C     I, J, N     - Loop counters ( N counts CG iterations ) | 
| 57 | 
  | 
  | 
      INTEGER actualIts | 
| 58 | 
  | 
  | 
      INTEGER bi, bj               | 
| 59 | 
  | 
  | 
      INTEGER I, J, N | 
| 60 | 
ce107 | 
1.2 | 
#ifdef USE_MIXED_PRECISION | 
| 61 | 
  | 
  | 
      REAL*8    actualResidual | 
| 62 | 
  | 
  | 
      REAL*8    err | 
| 63 | 
  | 
  | 
      REAL*8    errSum | 
| 64 | 
  | 
  | 
      REAL*8    etaN | 
| 65 | 
  | 
  | 
      REAL*8    etaNM1 | 
| 66 | 
  | 
  | 
      REAL*8    etaNSum | 
| 67 | 
  | 
  | 
      REAL*8    beta | 
| 68 | 
  | 
  | 
      REAL*8    alpha | 
| 69 | 
  | 
  | 
      REAL*8    alphaSum | 
| 70 | 
  | 
  | 
      REAL*8    sumRHS | 
| 71 | 
  | 
  | 
      REAL*8    temp | 
| 72 | 
  | 
  | 
#else | 
| 73 | 
  | 
  | 
      Real    actualResidual | 
| 74 | 
  | 
  | 
      Real    err | 
| 75 | 
  | 
  | 
      Real    errSum | 
| 76 | 
  | 
  | 
      Real    etaN | 
| 77 | 
  | 
  | 
      Real    etaNM1 | 
| 78 | 
  | 
  | 
      Real    etaNSum | 
| 79 | 
  | 
  | 
      Real    beta | 
| 80 | 
  | 
  | 
      Real    alpha | 
| 81 | 
  | 
  | 
      Real    alphaSum | 
| 82 | 
  | 
  | 
      Real    sumRHS | 
| 83 | 
  | 
  | 
      Real    temp | 
| 84 | 
  | 
  | 
#endif | 
| 85 | 
ce107 | 
1.1 | 
 | 
| 86 | 
  | 
  | 
C--   Initialise inverter | 
| 87 | 
ce107 | 
1.2 | 
      etaNM1              = 1. _d 0 | 
| 88 | 
ce107 | 
1.1 | 
 | 
| 89 | 
  | 
  | 
C--   Initial residual calculation | 
| 90 | 
  | 
  | 
      err    = 0. _d 0 | 
| 91 | 
  | 
  | 
      sumRHS = 0. _d 0 | 
| 92 | 
  | 
  | 
      DO J=1,sNy | 
| 93 | 
  | 
  | 
       DO I=1,sNx | 
| 94 | 
ce107 | 
1.2 | 
        cg2d_s(I,J) = 0. _d 0 | 
| 95 | 
ce107 | 
1.1 | 
         cg2d_r(I,J) = cg2d_b(I,J) - | 
| 96 | 
  | 
  | 
     &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 97 | 
  | 
  | 
     &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 98 | 
  | 
  | 
     &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 99 | 
  | 
  | 
     &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 100 | 
  | 
  | 
     &   ) | 
| 101 | 
  | 
  | 
          err    = err    + cg2d_r(I,J)*cg2d_r(I,J) | 
| 102 | 
  | 
  | 
          sumRHS = sumRHS + cg2d_b(I,J) | 
| 103 | 
  | 
  | 
       ENDDO | 
| 104 | 
  | 
  | 
      ENDDO | 
| 105 | 
  | 
  | 
      CALL EXCH_XY_R8( cg2d_r    ) | 
| 106 | 
  | 
  | 
      CALL EXCH_XY_R8( cg2d_s    ) | 
| 107 | 
  | 
  | 
      CALL GSUM_R8( temp, err    ) | 
| 108 | 
  | 
  | 
      err    = temp | 
| 109 | 
  | 
  | 
      CALL GSUM_R8( temp, sumRHS ) | 
| 110 | 
  | 
  | 
      sumRHS = temp | 
| 111 | 
  | 
  | 
 | 
| 112 | 
  | 
  | 
      actualIts      = 0 | 
| 113 | 
  | 
  | 
      actualResidual = SQRT(err) | 
| 114 | 
  | 
  | 
      WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 115 | 
ce107 | 
1.2 | 
      IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0' | 
| 116 | 
ce107 | 
1.1 | 
 | 
| 117 | 
  | 
  | 
C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | 
| 118 | 
  | 
  | 
      DO 10 N=1, cg2dMaxIters | 
| 119 | 
  | 
  | 
C--    Solve preconditioning equation and update | 
| 120 | 
  | 
  | 
C--    conjugate direction vector "s". | 
| 121 | 
  | 
  | 
       etaN = 0. _d 0 | 
| 122 | 
  | 
  | 
       DO J=1,sNy | 
| 123 | 
  | 
  | 
        DO I=1,sNx | 
| 124 | 
  | 
  | 
         cg2d_q(I,J) =  | 
| 125 | 
  | 
  | 
     &    pW(I  ,J  )*cg2d_r(I-1,J  )+pW(I+1,J  )*cg2d_r(I+1,J  ) | 
| 126 | 
  | 
  | 
     &   +pS(I  ,J  )*cg2d_r(I  ,J-1)+pS(I  ,J+1)*cg2d_r(I  ,J+1) | 
| 127 | 
  | 
  | 
     &   +pC(I  ,J  )*cg2d_r(I  ,J  ) | 
| 128 | 
  | 
  | 
         etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J) | 
| 129 | 
  | 
  | 
        ENDDO | 
| 130 | 
  | 
  | 
       ENDDO | 
| 131 | 
  | 
  | 
 | 
| 132 | 
  | 
  | 
       CALL GSUM_R8( temp, etaN ) | 
| 133 | 
  | 
  | 
       etaN   = temp | 
| 134 | 
  | 
  | 
       beta   = etaN/etaNM1 | 
| 135 | 
  | 
  | 
       etaNM1 = etaN | 
| 136 | 
  | 
  | 
 | 
| 137 | 
  | 
  | 
       DO J=1,sNy | 
| 138 | 
  | 
  | 
        DO I=1,sNx | 
| 139 | 
  | 
  | 
         cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J) | 
| 140 | 
  | 
  | 
        ENDDO | 
| 141 | 
  | 
  | 
       ENDDO | 
| 142 | 
  | 
  | 
 | 
| 143 | 
  | 
  | 
C--    Do exchanges that require messages i.e. between | 
| 144 | 
  | 
  | 
C--    processes. | 
| 145 | 
  | 
  | 
       CALL EXCH_XY_R8( cg2d_s ) | 
| 146 | 
  | 
  | 
 | 
| 147 | 
  | 
  | 
C--    Ten extra exchanges | 
| 148 | 
  | 
  | 
#ifdef TEN_EXTRA_EXCHS | 
| 149 | 
ce107 | 
1.3 | 
       DO J=1,10 | 
| 150 | 
  | 
  | 
          CALL EXCH_XY_R8( cg2d_s ) | 
| 151 | 
  | 
  | 
       ENDDO | 
| 152 | 
ce107 | 
1.1 | 
#endif | 
| 153 | 
  | 
  | 
 | 
| 154 | 
  | 
  | 
C==    Evaluate laplace operator on conjugate gradient vector | 
| 155 | 
  | 
  | 
C==    q = A.s | 
| 156 | 
  | 
  | 
       alpha = 0. _d 0 | 
| 157 | 
  | 
  | 
       DO J=1,sNy | 
| 158 | 
  | 
  | 
        DO I=1,sNx | 
| 159 | 
  | 
  | 
         cg2d_q(I,J) =  | 
| 160 | 
  | 
  | 
     &   aW2d(I  ,J  )*cg2d_s(I-1,J  )+aW2d(I+1,J  )*cg2d_s(I+1,J  ) | 
| 161 | 
  | 
  | 
     &  +aS2d(I  ,J  )*cg2d_s(I  ,J-1)+aS2d(I  ,J+1)*cg2d_s(I  ,J+1) | 
| 162 | 
  | 
  | 
     &  -aW2d(I  ,J  )*cg2d_s(I  ,J  )-aW2d(I+1,J  )*cg2d_s(I  ,J  ) | 
| 163 | 
  | 
  | 
     &  -aS2d(I  ,J  )*cg2d_s(I  ,J  )-aS2d(I  ,J+1)*cg2d_s(I  ,J  ) | 
| 164 | 
  | 
  | 
        alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J) | 
| 165 | 
  | 
  | 
        ENDDO | 
| 166 | 
  | 
  | 
       ENDDO | 
| 167 | 
  | 
  | 
       CALL GSUM_R8( temp, alpha ) | 
| 168 | 
  | 
  | 
 | 
| 169 | 
  | 
  | 
#ifdef HUNDRED_EXTRA_SUMS | 
| 170 | 
  | 
  | 
C--    Hundred extra global sums | 
| 171 | 
ce107 | 
1.3 | 
       DO J=1,100 | 
| 172 | 
  | 
  | 
          CALL GSUM_R8( temp, alpha ) | 
| 173 | 
  | 
  | 
       ENDDO | 
| 174 | 
ce107 | 
1.1 | 
#endif | 
| 175 | 
  | 
  | 
 | 
| 176 | 
  | 
  | 
       alpha = temp | 
| 177 | 
  | 
  | 
       alpha = etaN/alpha | 
| 178 | 
  | 
  | 
      | 
| 179 | 
  | 
  | 
C==    Update solution and residual vectors | 
| 180 | 
  | 
  | 
C      Now compute "interior" points. | 
| 181 | 
  | 
  | 
       err = 0. _d 0 | 
| 182 | 
  | 
  | 
       DO J=1,sNy | 
| 183 | 
  | 
  | 
        DO I=1,sNx | 
| 184 | 
  | 
  | 
         cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J) | 
| 185 | 
  | 
  | 
         cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J) | 
| 186 | 
  | 
  | 
         err = err+cg2d_r(I,J)*cg2d_r(I,J) | 
| 187 | 
  | 
  | 
        ENDDO | 
| 188 | 
  | 
  | 
       ENDDO | 
| 189 | 
  | 
  | 
 | 
| 190 | 
  | 
  | 
       CALL GSUM_R8( temp, err ) | 
| 191 | 
  | 
  | 
 | 
| 192 | 
  | 
  | 
       err = temp | 
| 193 | 
  | 
  | 
       err = SQRT(err) | 
| 194 | 
  | 
  | 
       actualIts      = N | 
| 195 | 
  | 
  | 
       actualResidual = err | 
| 196 | 
ce107 | 
1.2 | 
#ifdef RESIDUAL_PER_ITERATION | 
| 197 | 
  | 
  | 
       WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 198 | 
  | 
  | 
#endif | 
| 199 | 
ce107 | 
1.1 | 
       IF ( err .LT. cg2dTargetResidual ) GOTO 11 | 
| 200 | 
  | 
  | 
       CALL EXCH_XY_R8(cg2d_r ) | 
| 201 | 
ce107 | 
1.2 | 
#ifdef PAPI_PER_ITERATION | 
| 202 | 
  | 
  | 
#ifdef USE_PAPI_FLOPS | 
| 203 | 
  | 
  | 
       call PAPIF_flops(real_time, proc_time, flpops, mflops, check) | 
| 204 | 
  | 
  | 
       WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 205 | 
  | 
  | 
     $      mflops, ' user ', mflops*proc_time/real_time, | 
| 206 | 
  | 
  | 
     $      ' wallclock Mflop/s during iteration ', N | 
| 207 | 
  | 
  | 
#else | 
| 208 | 
  | 
  | 
#ifdef USE_PAPI_FLIPS | 
| 209 | 
  | 
  | 
       call PAPIF_flips(real_time, proc_time, flpops, mflops, check) | 
| 210 | 
  | 
  | 
       WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 211 | 
  | 
  | 
     $      mflops, ' user ', mflops*proc_time/real_time, | 
| 212 | 
  | 
  | 
     $      ' wallclock Mflip/s during iteration ', N  | 
| 213 | 
  | 
  | 
#endif | 
| 214 | 
  | 
  | 
#endif | 
| 215 | 
  | 
  | 
#endif | 
| 216 | 
ce107 | 
1.1 | 
   10 CONTINUE | 
| 217 | 
  | 
  | 
   11 CONTINUE | 
| 218 | 
  | 
  | 
      CALL EXCH_XY_R8(cg2d_x ) | 
| 219 | 
ce107 | 
1.2 | 
#ifdef PAPI_PER_TIMESTEP | 
| 220 | 
  | 
  | 
#ifdef USE_PAPI_FLOPS | 
| 221 | 
  | 
  | 
       call PAPIF_flops(real_time, proc_time, flpops, mflops, check) | 
| 222 | 
  | 
  | 
       WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 223 | 
  | 
  | 
     $      mflops, ' user ', mflops*proc_time/real_time, | 
| 224 | 
  | 
  | 
     $      ' wallclock Mflop/s during iteration ', N  | 
| 225 | 
  | 
  | 
#else | 
| 226 | 
  | 
  | 
#ifdef USE_PAPI_FLIPS | 
| 227 | 
  | 
  | 
       call PAPIF_flips(real_time, proc_time, flpops, mflops, check) | 
| 228 | 
  | 
  | 
       WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 229 | 
  | 
  | 
     $      mflops, ' user ', mflops*proc_time/real_time, | 
| 230 | 
  | 
  | 
     $      ' wallclock Mflip/s during iteration ', N  | 
| 231 | 
  | 
  | 
#endif | 
| 232 | 
  | 
  | 
#endif | 
| 233 | 
  | 
  | 
#endif | 
| 234 | 
ce107 | 
1.1 | 
      WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 235 | 
  | 
  | 
 | 
| 236 | 
  | 
  | 
C     Calc Ax to check result | 
| 237 | 
  | 
  | 
      DO J=1,sNy | 
| 238 | 
  | 
  | 
       DO I=1,sNx | 
| 239 | 
  | 
  | 
         cg2d_Ax(I,J) = | 
| 240 | 
  | 
  | 
     &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 241 | 
  | 
  | 
     &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 242 | 
  | 
  | 
     &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 243 | 
  | 
  | 
     &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 244 | 
  | 
  | 
     &   ) | 
| 245 | 
  | 
  | 
         cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J) | 
| 246 | 
  | 
  | 
       ENDDO | 
| 247 | 
  | 
  | 
      ENDDO | 
| 248 | 
  | 
  | 
      CALL EXCH_XY_R8(cg2d_Ax ) | 
| 249 | 
  | 
  | 
      CALL EXCH_XY_R8(cg2d_r  ) | 
| 250 | 
  | 
  | 
 | 
| 251 | 
  | 
  | 
      END |