| 1 | SUBROUTINE CG2D | 
| 2 | C     /==========================================================\ | 
| 3 | C     | SUBROUTINE CG2D                                          | | 
| 4 | C     | o Two-dimensional grid problem conjugate-gradient        | | 
| 5 | C     |   inverter (with preconditioner).                        | | 
| 6 | C     |==========================================================| | 
| 7 | C     | Con. grad is an iterative procedure for solving Ax = b.  | | 
| 8 | C     | It requires the A be symmetric.                          | | 
| 9 | C     | This implementation assumes A is a five-diagonal         | | 
| 10 | C     | matrix of the form that arises in the discrete           | | 
| 11 | C     | representation of the del^2 operator in a                | | 
| 12 | C     | two-dimensional space.                                   | | 
| 13 | C     | Notes:                                                   | | 
| 14 | C     | ======                                                   | | 
| 15 | C     | This implementation can support shared-memory            | | 
| 16 | C     | multi-threaded execution. In order to do this COMMON     | | 
| 17 | C     | blocks are used for many of the arrays - even ones that  | | 
| 18 | C     | are only used for intermedaite results. This design is   | | 
| 19 | C     | OK if you want to all the threads to collaborate on      | | 
| 20 | C     | solving the same problem. On the other hand if you want  | | 
| 21 | C     | the threads to solve several different problems          | | 
| 22 | C     | concurrently this implementation will not work.          | | 
| 23 | C     \==========================================================/ | 
| 24 | IMPLICIT NONE | 
| 25 |  | 
| 26 | C     === Global data === | 
| 27 | #include "SIZE.h" | 
| 28 | #include "EEPARAMS.h" | 
| 29 | #include "PARAMS.h" | 
| 30 | #include "CG2D.h" | 
| 31 |  | 
| 32 | C     === Routine arguments === | 
| 33 | C     myThid - Thread on which I am working. | 
| 34 | INTEGER myThid | 
| 35 |  | 
| 36 | C     === Local variables ==== | 
| 37 | C     actualIts      - Number of iterations taken | 
| 38 | C     actualResidual - residual | 
| 39 | C     bi          - Block index in X and Y. | 
| 40 | C     bj | 
| 41 | C     etaN        - Used in computing search directions | 
| 42 | C     etaNM1        suffix N and NM1 denote current and | 
| 43 | C     beta          previous iterations respectively. | 
| 44 | C     alpha | 
| 45 | C     sumRHS      - Sum of right-hand-side. Sometimes this is a | 
| 46 | C                   useful debuggin/trouble shooting diagnostic. | 
| 47 | C                   For neumann problems sumRHS needs to be ~0. | 
| 48 | C                   or they converge at a non-zero residual. | 
| 49 | C     err         - Measure of residual of Ax - b, usually the norm. | 
| 50 | C     I, J, N     - Loop counters ( N counts CG iterations ) | 
| 51 | INTEGER actualIts | 
| 52 | REAL    actualResidual | 
| 53 | INTEGER bi, bj | 
| 54 | INTEGER I, J, N | 
| 55 | REAL    err | 
| 56 | REAL    errSum | 
| 57 | REAL    etaN | 
| 58 | REAL    etaNM1 | 
| 59 | REAL    etaNSum | 
| 60 | REAL    beta | 
| 61 | REAL    alpha | 
| 62 | REAL    alphaSum | 
| 63 | REAL    sumRHS | 
| 64 | REAL    temp | 
| 65 |  | 
| 66 | C--   Initialise inverter | 
| 67 | etaNM1              = 1. D0 | 
| 68 |  | 
| 69 | C--   Initial residual calculation | 
| 70 | err    = 0. _d 0 | 
| 71 | sumRHS = 0. _d 0 | 
| 72 | DO J=1,sNy | 
| 73 | DO I=1,sNx | 
| 74 | cg2d_s(I,J) = 0. | 
| 75 | cg2d_r(I,J) = cg2d_b(I,J) - | 
| 76 | &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 77 | &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 78 | &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 79 | &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 80 | &   ) | 
| 81 | err    = err    + cg2d_r(I,J)*cg2d_r(I,J) | 
| 82 | sumRHS = sumRHS + cg2d_b(I,J) | 
| 83 | ENDDO | 
| 84 | ENDDO | 
| 85 | CALL EXCH_XY_R8( cg2d_r    ) | 
| 86 | CALL EXCH_XY_R8( cg2d_s    ) | 
| 87 | CALL GSUM_R8( temp, err    ) | 
| 88 | err    = temp | 
| 89 | CALL GSUM_R8( temp, sumRHS ) | 
| 90 | sumRHS = temp | 
| 91 |  | 
| 92 | actualIts      = 0 | 
| 93 | actualResidual = SQRT(err) | 
| 94 | WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 95 | IF ( actualResidual .EQ. 0. ) STOP 'ABNORMAL END: RESIDUAL 0' | 
| 96 |  | 
| 97 | C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | 
| 98 | DO 10 N=1, cg2dMaxIters | 
| 99 | C--    Solve preconditioning equation and update | 
| 100 | C--    conjugate direction vector "s". | 
| 101 | etaN = 0. _d 0 | 
| 102 | DO J=1,sNy | 
| 103 | DO I=1,sNx | 
| 104 | cg2d_q(I,J) = | 
| 105 | &    pW(I  ,J  )*cg2d_r(I-1,J  )+pW(I+1,J  )*cg2d_r(I+1,J  ) | 
| 106 | &   +pS(I  ,J  )*cg2d_r(I  ,J-1)+pS(I  ,J+1)*cg2d_r(I  ,J+1) | 
| 107 | &   +pC(I  ,J  )*cg2d_r(I  ,J  ) | 
| 108 | etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J) | 
| 109 | ENDDO | 
| 110 | ENDDO | 
| 111 |  | 
| 112 | CALL GSUM_R8( temp, etaN ) | 
| 113 | etaN   = temp | 
| 114 | beta   = etaN/etaNM1 | 
| 115 | etaNM1 = etaN | 
| 116 |  | 
| 117 | DO J=1,sNy | 
| 118 | DO I=1,sNx | 
| 119 | cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J) | 
| 120 | ENDDO | 
| 121 | ENDDO | 
| 122 |  | 
| 123 | C--    Do exchanges that require messages i.e. between | 
| 124 | C--    processes. | 
| 125 | CALL EXCH_XY_R8( cg2d_s ) | 
| 126 |  | 
| 127 | C--    Ten extra exchanges | 
| 128 | #ifdef TEN_EXTRA_EXCHS | 
| 129 | CALL EXCH_XY_R8( cg2d_s ) | 
| 130 | CALL EXCH_XY_R8( cg2d_s ) | 
| 131 | CALL EXCH_XY_R8( cg2d_s ) | 
| 132 | CALL EXCH_XY_R8( cg2d_s ) | 
| 133 | CALL EXCH_XY_R8( cg2d_s ) | 
| 134 | CALL EXCH_XY_R8( cg2d_s ) | 
| 135 | CALL EXCH_XY_R8( cg2d_s ) | 
| 136 | CALL EXCH_XY_R8( cg2d_s ) | 
| 137 | CALL EXCH_XY_R8( cg2d_s ) | 
| 138 | CALL EXCH_XY_R8( cg2d_s ) | 
| 139 | #endif | 
| 140 |  | 
| 141 | C==    Evaluate laplace operator on conjugate gradient vector | 
| 142 | C==    q = A.s | 
| 143 | alpha = 0. _d 0 | 
| 144 | DO J=1,sNy | 
| 145 | DO I=1,sNx | 
| 146 | cg2d_q(I,J) = | 
| 147 | &   aW2d(I  ,J  )*cg2d_s(I-1,J  )+aW2d(I+1,J  )*cg2d_s(I+1,J  ) | 
| 148 | &  +aS2d(I  ,J  )*cg2d_s(I  ,J-1)+aS2d(I  ,J+1)*cg2d_s(I  ,J+1) | 
| 149 | &  -aW2d(I  ,J  )*cg2d_s(I  ,J  )-aW2d(I+1,J  )*cg2d_s(I  ,J  ) | 
| 150 | &  -aS2d(I  ,J  )*cg2d_s(I  ,J  )-aS2d(I  ,J+1)*cg2d_s(I  ,J  ) | 
| 151 | alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J) | 
| 152 | ENDDO | 
| 153 | ENDDO | 
| 154 | CALL GSUM_R8( temp, alpha ) | 
| 155 |  | 
| 156 | #ifdef HUNDRED_EXTRA_SUMS | 
| 157 | C--    Hundred extra global sums | 
| 158 | CALL GSUM_R8( temp, alpha ) | 
| 159 | CALL GSUM_R8( temp, alpha ) | 
| 160 | CALL GSUM_R8( temp, alpha ) | 
| 161 | CALL GSUM_R8( temp, alpha ) | 
| 162 | CALL GSUM_R8( temp, alpha ) | 
| 163 | CALL GSUM_R8( temp, alpha ) | 
| 164 | CALL GSUM_R8( temp, alpha ) | 
| 165 | CALL GSUM_R8( temp, alpha ) | 
| 166 | CALL GSUM_R8( temp, alpha ) | 
| 167 | CALL GSUM_R8( temp, alpha ) | 
| 168 | CALL GSUM_R8( temp, alpha ) | 
| 169 | CALL GSUM_R8( temp, alpha ) | 
| 170 | CALL GSUM_R8( temp, alpha ) | 
| 171 | CALL GSUM_R8( temp, alpha ) | 
| 172 | CALL GSUM_R8( temp, alpha ) | 
| 173 | CALL GSUM_R8( temp, alpha ) | 
| 174 | CALL GSUM_R8( temp, alpha ) | 
| 175 | CALL GSUM_R8( temp, alpha ) | 
| 176 | CALL GSUM_R8( temp, alpha ) | 
| 177 | CALL GSUM_R8( temp, alpha ) | 
| 178 | CALL GSUM_R8( temp, alpha ) | 
| 179 | CALL GSUM_R8( temp, alpha ) | 
| 180 | CALL GSUM_R8( temp, alpha ) | 
| 181 | CALL GSUM_R8( temp, alpha ) | 
| 182 | CALL GSUM_R8( temp, alpha ) | 
| 183 | CALL GSUM_R8( temp, alpha ) | 
| 184 | CALL GSUM_R8( temp, alpha ) | 
| 185 | CALL GSUM_R8( temp, alpha ) | 
| 186 | CALL GSUM_R8( temp, alpha ) | 
| 187 | CALL GSUM_R8( temp, alpha ) | 
| 188 | CALL GSUM_R8( temp, alpha ) | 
| 189 | CALL GSUM_R8( temp, alpha ) | 
| 190 | CALL GSUM_R8( temp, alpha ) | 
| 191 | CALL GSUM_R8( temp, alpha ) | 
| 192 | CALL GSUM_R8( temp, alpha ) | 
| 193 | CALL GSUM_R8( temp, alpha ) | 
| 194 | CALL GSUM_R8( temp, alpha ) | 
| 195 | CALL GSUM_R8( temp, alpha ) | 
| 196 | CALL GSUM_R8( temp, alpha ) | 
| 197 | CALL GSUM_R8( temp, alpha ) | 
| 198 | CALL GSUM_R8( temp, alpha ) | 
| 199 | CALL GSUM_R8( temp, alpha ) | 
| 200 | CALL GSUM_R8( temp, alpha ) | 
| 201 | CALL GSUM_R8( temp, alpha ) | 
| 202 | CALL GSUM_R8( temp, alpha ) | 
| 203 | CALL GSUM_R8( temp, alpha ) | 
| 204 | CALL GSUM_R8( temp, alpha ) | 
| 205 | CALL GSUM_R8( temp, alpha ) | 
| 206 | CALL GSUM_R8( temp, alpha ) | 
| 207 | CALL GSUM_R8( temp, alpha ) | 
| 208 | CALL GSUM_R8( temp, alpha ) | 
| 209 | CALL GSUM_R8( temp, alpha ) | 
| 210 | CALL GSUM_R8( temp, alpha ) | 
| 211 | CALL GSUM_R8( temp, alpha ) | 
| 212 | CALL GSUM_R8( temp, alpha ) | 
| 213 | CALL GSUM_R8( temp, alpha ) | 
| 214 | CALL GSUM_R8( temp, alpha ) | 
| 215 | CALL GSUM_R8( temp, alpha ) | 
| 216 | CALL GSUM_R8( temp, alpha ) | 
| 217 | CALL GSUM_R8( temp, alpha ) | 
| 218 | CALL GSUM_R8( temp, alpha ) | 
| 219 | CALL GSUM_R8( temp, alpha ) | 
| 220 | CALL GSUM_R8( temp, alpha ) | 
| 221 | CALL GSUM_R8( temp, alpha ) | 
| 222 | CALL GSUM_R8( temp, alpha ) | 
| 223 | CALL GSUM_R8( temp, alpha ) | 
| 224 | CALL GSUM_R8( temp, alpha ) | 
| 225 | CALL GSUM_R8( temp, alpha ) | 
| 226 | CALL GSUM_R8( temp, alpha ) | 
| 227 | CALL GSUM_R8( temp, alpha ) | 
| 228 | CALL GSUM_R8( temp, alpha ) | 
| 229 | CALL GSUM_R8( temp, alpha ) | 
| 230 | CALL GSUM_R8( temp, alpha ) | 
| 231 | CALL GSUM_R8( temp, alpha ) | 
| 232 | CALL GSUM_R8( temp, alpha ) | 
| 233 | CALL GSUM_R8( temp, alpha ) | 
| 234 | CALL GSUM_R8( temp, alpha ) | 
| 235 | CALL GSUM_R8( temp, alpha ) | 
| 236 | CALL GSUM_R8( temp, alpha ) | 
| 237 | CALL GSUM_R8( temp, alpha ) | 
| 238 | CALL GSUM_R8( temp, alpha ) | 
| 239 | CALL GSUM_R8( temp, alpha ) | 
| 240 | CALL GSUM_R8( temp, alpha ) | 
| 241 | CALL GSUM_R8( temp, alpha ) | 
| 242 | CALL GSUM_R8( temp, alpha ) | 
| 243 | CALL GSUM_R8( temp, alpha ) | 
| 244 | CALL GSUM_R8( temp, alpha ) | 
| 245 | CALL GSUM_R8( temp, alpha ) | 
| 246 | CALL GSUM_R8( temp, alpha ) | 
| 247 | CALL GSUM_R8( temp, alpha ) | 
| 248 | CALL GSUM_R8( temp, alpha ) | 
| 249 | CALL GSUM_R8( temp, alpha ) | 
| 250 | CALL GSUM_R8( temp, alpha ) | 
| 251 | CALL GSUM_R8( temp, alpha ) | 
| 252 | CALL GSUM_R8( temp, alpha ) | 
| 253 | CALL GSUM_R8( temp, alpha ) | 
| 254 | CALL GSUM_R8( temp, alpha ) | 
| 255 | CALL GSUM_R8( temp, alpha ) | 
| 256 | CALL GSUM_R8( temp, alpha ) | 
| 257 | CALL GSUM_R8( temp, alpha ) | 
| 258 | #endif | 
| 259 |  | 
| 260 | alpha = temp | 
| 261 | alpha = etaN/alpha | 
| 262 |  | 
| 263 | C==    Update solution and residual vectors | 
| 264 | C      Now compute "interior" points. | 
| 265 | err = 0. _d 0 | 
| 266 | DO J=1,sNy | 
| 267 | DO I=1,sNx | 
| 268 | cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J) | 
| 269 | cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J) | 
| 270 | err = err+cg2d_r(I,J)*cg2d_r(I,J) | 
| 271 | ENDDO | 
| 272 | ENDDO | 
| 273 |  | 
| 274 | CALL GSUM_R8( temp, err ) | 
| 275 |  | 
| 276 | err = temp | 
| 277 | err = SQRT(err) | 
| 278 | actualIts      = N | 
| 279 | actualResidual = err | 
| 280 | CcnhDebugStarts | 
| 281 | C      WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 282 | CcnhDebugEnds | 
| 283 | IF ( err .LT. cg2dTargetResidual ) GOTO 11 | 
| 284 | CALL EXCH_XY_R8(cg2d_r ) | 
| 285 | 10 CONTINUE | 
| 286 | 11 CONTINUE | 
| 287 | CALL EXCH_XY_R8(cg2d_x ) | 
| 288 | WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 289 |  | 
| 290 | C     Calc Ax to check result | 
| 291 | DO J=1,sNy | 
| 292 | DO I=1,sNx | 
| 293 | cg2d_Ax(I,J) = | 
| 294 | &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 295 | &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 296 | &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 297 | &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 298 | &   ) | 
| 299 | cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J) | 
| 300 | ENDDO | 
| 301 | ENDDO | 
| 302 | CALL EXCH_XY_R8(cg2d_Ax ) | 
| 303 | CALL EXCH_XY_R8(cg2d_r  ) | 
| 304 |  | 
| 305 | END |