| 1 | ce107 | 1.2 | C       $Id$ | 
| 2 | ce107 | 1.1 | SUBROUTINE CG2D | 
| 3 |  |  | C     /==========================================================\ | 
| 4 |  |  | C     | SUBROUTINE CG2D                                          | | 
| 5 |  |  | C     | o Two-dimensional grid problem conjugate-gradient        | | 
| 6 |  |  | C     |   inverter (with preconditioner).                        | | 
| 7 |  |  | C     |==========================================================| | 
| 8 |  |  | C     | Con. grad is an iterative procedure for solving Ax = b.  | | 
| 9 |  |  | C     | It requires the A be symmetric.                          | | 
| 10 |  |  | C     | This implementation assumes A is a five-diagonal         | | 
| 11 |  |  | C     | matrix of the form that arises in the discrete           | | 
| 12 |  |  | C     | representation of the del^2 operator in a                | | 
| 13 |  |  | C     | two-dimensional space.                                   | | 
| 14 |  |  | C     | Notes:                                                   | | 
| 15 |  |  | C     | ======                                                   | | 
| 16 |  |  | C     | This implementation can support shared-memory            | | 
| 17 |  |  | C     | multi-threaded execution. In order to do this COMMON     | | 
| 18 |  |  | C     | blocks are used for many of the arrays - even ones that  | | 
| 19 |  |  | C     | are only used for intermedaite results. This design is   | | 
| 20 |  |  | C     | OK if you want to all the threads to collaborate on      | | 
| 21 |  |  | C     | solving the same problem. On the other hand if you want  | | 
| 22 |  |  | C     | the threads to solve several different problems          | | 
| 23 |  |  | C     | concurrently this implementation will not work.          | | 
| 24 |  |  | C     \==========================================================/ | 
| 25 |  |  | IMPLICIT NONE | 
| 26 |  |  |  | 
| 27 |  |  | C     === Global data === | 
| 28 |  |  | #include "SIZE.h" | 
| 29 |  |  | #include "EEPARAMS.h" | 
| 30 |  |  | #include "PARAMS.h" | 
| 31 |  |  | #include "CG2D.h" | 
| 32 | ce107 | 1.2 | #if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS) | 
| 33 |  |  | #if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP) | 
| 34 |  |  | #include "PAPI.h" | 
| 35 |  |  | #endif | 
| 36 |  |  | #endif | 
| 37 | ce107 | 1.1 |  | 
| 38 |  |  | C     === Routine arguments === | 
| 39 |  |  | C     myThid - Thread on which I am working. | 
| 40 |  |  | INTEGER myThid | 
| 41 |  |  |  | 
| 42 |  |  | C     === Local variables ==== | 
| 43 |  |  | C     actualIts      - Number of iterations taken | 
| 44 |  |  | C     actualResidual - residual | 
| 45 |  |  | C     bi          - Block index in X and Y. | 
| 46 |  |  | C     bj | 
| 47 |  |  | C     etaN        - Used in computing search directions | 
| 48 |  |  | C     etaNM1        suffix N and NM1 denote current and | 
| 49 |  |  | C     beta          previous iterations respectively. | 
| 50 |  |  | C     alpha | 
| 51 |  |  | C     sumRHS      - Sum of right-hand-side. Sometimes this is a | 
| 52 |  |  | C                   useful debuggin/trouble shooting diagnostic. | 
| 53 |  |  | C                   For neumann problems sumRHS needs to be ~0. | 
| 54 |  |  | C                   or they converge at a non-zero residual. | 
| 55 |  |  | C     err         - Measure of residual of Ax - b, usually the norm. | 
| 56 |  |  | C     I, J, N     - Loop counters ( N counts CG iterations ) | 
| 57 |  |  | INTEGER actualIts | 
| 58 |  |  | INTEGER bi, bj | 
| 59 |  |  | INTEGER I, J, N | 
| 60 | ce107 | 1.2 | #ifdef USE_MIXED_PRECISION | 
| 61 |  |  | REAL*8    actualResidual | 
| 62 |  |  | REAL*8    err | 
| 63 |  |  | REAL*8    errSum | 
| 64 |  |  | REAL*8    etaN | 
| 65 |  |  | REAL*8    etaNM1 | 
| 66 |  |  | REAL*8    etaNSum | 
| 67 |  |  | REAL*8    beta | 
| 68 |  |  | REAL*8    alpha | 
| 69 |  |  | REAL*8    alphaSum | 
| 70 |  |  | REAL*8    sumRHS | 
| 71 |  |  | REAL*8    temp | 
| 72 |  |  | #else | 
| 73 |  |  | Real    actualResidual | 
| 74 |  |  | Real    err | 
| 75 |  |  | Real    errSum | 
| 76 |  |  | Real    etaN | 
| 77 |  |  | Real    etaNM1 | 
| 78 |  |  | Real    etaNSum | 
| 79 |  |  | Real    beta | 
| 80 |  |  | Real    alpha | 
| 81 |  |  | Real    alphaSum | 
| 82 |  |  | Real    sumRHS | 
| 83 |  |  | Real    temp | 
| 84 |  |  | #endif | 
| 85 | ce107 | 1.1 |  | 
| 86 |  |  | C--   Initialise inverter | 
| 87 | ce107 | 1.2 | etaNM1              = 1. _d 0 | 
| 88 | ce107 | 1.1 |  | 
| 89 |  |  | C--   Initial residual calculation | 
| 90 |  |  | err    = 0. _d 0 | 
| 91 |  |  | sumRHS = 0. _d 0 | 
| 92 |  |  | DO J=1,sNy | 
| 93 |  |  | DO I=1,sNx | 
| 94 | ce107 | 1.2 | cg2d_s(I,J) = 0. _d 0 | 
| 95 | ce107 | 1.1 | cg2d_r(I,J) = cg2d_b(I,J) - | 
| 96 |  |  | &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 97 |  |  | &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 98 |  |  | &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 99 |  |  | &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 100 |  |  | &   ) | 
| 101 |  |  | err    = err    + cg2d_r(I,J)*cg2d_r(I,J) | 
| 102 |  |  | sumRHS = sumRHS + cg2d_b(I,J) | 
| 103 |  |  | ENDDO | 
| 104 |  |  | ENDDO | 
| 105 |  |  | CALL EXCH_XY_R8( cg2d_r    ) | 
| 106 |  |  | CALL EXCH_XY_R8( cg2d_s    ) | 
| 107 |  |  | CALL GSUM_R8( temp, err    ) | 
| 108 |  |  | err    = temp | 
| 109 |  |  | CALL GSUM_R8( temp, sumRHS ) | 
| 110 |  |  | sumRHS = temp | 
| 111 |  |  |  | 
| 112 |  |  | actualIts      = 0 | 
| 113 |  |  | actualResidual = SQRT(err) | 
| 114 |  |  | WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 115 | ce107 | 1.2 | IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0' | 
| 116 | ce107 | 1.1 |  | 
| 117 |  |  | C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | 
| 118 |  |  | DO 10 N=1, cg2dMaxIters | 
| 119 |  |  | C--    Solve preconditioning equation and update | 
| 120 |  |  | C--    conjugate direction vector "s". | 
| 121 |  |  | etaN = 0. _d 0 | 
| 122 |  |  | DO J=1,sNy | 
| 123 |  |  | DO I=1,sNx | 
| 124 |  |  | cg2d_q(I,J) = | 
| 125 |  |  | &    pW(I  ,J  )*cg2d_r(I-1,J  )+pW(I+1,J  )*cg2d_r(I+1,J  ) | 
| 126 |  |  | &   +pS(I  ,J  )*cg2d_r(I  ,J-1)+pS(I  ,J+1)*cg2d_r(I  ,J+1) | 
| 127 |  |  | &   +pC(I  ,J  )*cg2d_r(I  ,J  ) | 
| 128 |  |  | etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J) | 
| 129 |  |  | ENDDO | 
| 130 |  |  | ENDDO | 
| 131 |  |  |  | 
| 132 |  |  | CALL GSUM_R8( temp, etaN ) | 
| 133 |  |  | etaN   = temp | 
| 134 |  |  | beta   = etaN/etaNM1 | 
| 135 |  |  | etaNM1 = etaN | 
| 136 |  |  |  | 
| 137 |  |  | DO J=1,sNy | 
| 138 |  |  | DO I=1,sNx | 
| 139 |  |  | cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J) | 
| 140 |  |  | ENDDO | 
| 141 |  |  | ENDDO | 
| 142 |  |  |  | 
| 143 |  |  | C--    Do exchanges that require messages i.e. between | 
| 144 |  |  | C--    processes. | 
| 145 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 146 |  |  |  | 
| 147 |  |  | C--    Ten extra exchanges | 
| 148 |  |  | #ifdef TEN_EXTRA_EXCHS | 
| 149 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 150 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 151 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 152 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 153 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 154 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 155 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 156 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 157 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 158 |  |  | CALL EXCH_XY_R8( cg2d_s ) | 
| 159 |  |  | #endif | 
| 160 |  |  |  | 
| 161 |  |  | C==    Evaluate laplace operator on conjugate gradient vector | 
| 162 |  |  | C==    q = A.s | 
| 163 |  |  | alpha = 0. _d 0 | 
| 164 |  |  | DO J=1,sNy | 
| 165 |  |  | DO I=1,sNx | 
| 166 |  |  | cg2d_q(I,J) = | 
| 167 |  |  | &   aW2d(I  ,J  )*cg2d_s(I-1,J  )+aW2d(I+1,J  )*cg2d_s(I+1,J  ) | 
| 168 |  |  | &  +aS2d(I  ,J  )*cg2d_s(I  ,J-1)+aS2d(I  ,J+1)*cg2d_s(I  ,J+1) | 
| 169 |  |  | &  -aW2d(I  ,J  )*cg2d_s(I  ,J  )-aW2d(I+1,J  )*cg2d_s(I  ,J  ) | 
| 170 |  |  | &  -aS2d(I  ,J  )*cg2d_s(I  ,J  )-aS2d(I  ,J+1)*cg2d_s(I  ,J  ) | 
| 171 |  |  | alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J) | 
| 172 |  |  | ENDDO | 
| 173 |  |  | ENDDO | 
| 174 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 175 |  |  |  | 
| 176 |  |  | #ifdef HUNDRED_EXTRA_SUMS | 
| 177 |  |  | C--    Hundred extra global sums | 
| 178 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 179 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 180 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 181 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 182 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 183 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 184 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 185 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 186 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 187 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 188 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 189 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 190 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 191 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 192 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 193 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 194 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 195 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 196 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 197 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 198 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 199 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 200 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 201 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 202 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 203 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 204 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 205 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 206 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 207 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 208 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 209 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 210 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 211 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 212 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 213 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 214 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 215 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 216 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 217 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 218 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 219 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 220 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 221 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 222 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 223 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 224 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 225 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 226 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 227 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 228 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 229 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 230 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 231 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 232 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 233 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 234 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 235 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 236 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 237 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 238 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 239 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 240 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 241 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 242 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 243 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 244 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 245 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 246 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 247 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 248 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 249 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 250 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 251 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 252 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 253 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 254 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 255 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 256 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 257 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 258 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 259 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 260 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 261 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 262 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 263 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 264 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 265 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 266 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 267 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 268 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 269 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 270 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 271 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 272 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 273 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 274 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 275 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 276 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 277 |  |  | CALL GSUM_R8( temp, alpha ) | 
| 278 |  |  | #endif | 
| 279 |  |  |  | 
| 280 |  |  | alpha = temp | 
| 281 |  |  | alpha = etaN/alpha | 
| 282 |  |  |  | 
| 283 |  |  | C==    Update solution and residual vectors | 
| 284 |  |  | C      Now compute "interior" points. | 
| 285 |  |  | err = 0. _d 0 | 
| 286 |  |  | DO J=1,sNy | 
| 287 |  |  | DO I=1,sNx | 
| 288 |  |  | cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J) | 
| 289 |  |  | cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J) | 
| 290 |  |  | err = err+cg2d_r(I,J)*cg2d_r(I,J) | 
| 291 |  |  | ENDDO | 
| 292 |  |  | ENDDO | 
| 293 |  |  |  | 
| 294 |  |  | CALL GSUM_R8( temp, err ) | 
| 295 |  |  |  | 
| 296 |  |  | err = temp | 
| 297 |  |  | err = SQRT(err) | 
| 298 |  |  | actualIts      = N | 
| 299 |  |  | actualResidual = err | 
| 300 | ce107 | 1.2 | #ifdef RESIDUAL_PER_ITERATION | 
| 301 |  |  | WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 302 |  |  | #endif | 
| 303 | ce107 | 1.1 | IF ( err .LT. cg2dTargetResidual ) GOTO 11 | 
| 304 |  |  | CALL EXCH_XY_R8(cg2d_r ) | 
| 305 | ce107 | 1.2 | #ifdef PAPI_PER_ITERATION | 
| 306 |  |  | #ifdef USE_PAPI_FLOPS | 
| 307 |  |  | call PAPIF_flops(real_time, proc_time, flpops, mflops, check) | 
| 308 |  |  | WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 309 |  |  | $      mflops, ' user ', mflops*proc_time/real_time, | 
| 310 |  |  | $      ' wallclock Mflop/s during iteration ', N | 
| 311 |  |  | #else | 
| 312 |  |  | #ifdef USE_PAPI_FLIPS | 
| 313 |  |  | call PAPIF_flips(real_time, proc_time, flpops, mflops, check) | 
| 314 |  |  | WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 315 |  |  | $      mflops, ' user ', mflops*proc_time/real_time, | 
| 316 |  |  | $      ' wallclock Mflip/s during iteration ', N | 
| 317 |  |  | #endif | 
| 318 |  |  | #endif | 
| 319 |  |  | #endif | 
| 320 | ce107 | 1.1 | 10 CONTINUE | 
| 321 |  |  | 11 CONTINUE | 
| 322 |  |  | CALL EXCH_XY_R8(cg2d_x ) | 
| 323 | ce107 | 1.2 | #ifdef PAPI_PER_TIMESTEP | 
| 324 |  |  | #ifdef USE_PAPI_FLOPS | 
| 325 |  |  | call PAPIF_flops(real_time, proc_time, flpops, mflops, check) | 
| 326 |  |  | WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 327 |  |  | $      mflops, ' user ', mflops*proc_time/real_time, | 
| 328 |  |  | $      ' wallclock Mflop/s during iteration ', N | 
| 329 |  |  | #else | 
| 330 |  |  | #ifdef USE_PAPI_FLIPS | 
| 331 |  |  | call PAPIF_flips(real_time, proc_time, flpops, mflops, check) | 
| 332 |  |  | WRITE(6,'(F10.3,A7,F10.3,A37,I8)') | 
| 333 |  |  | $      mflops, ' user ', mflops*proc_time/real_time, | 
| 334 |  |  | $      ' wallclock Mflip/s during iteration ', N | 
| 335 |  |  | #endif | 
| 336 |  |  | #endif | 
| 337 |  |  | #endif | 
| 338 | ce107 | 1.1 | WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual | 
| 339 |  |  |  | 
| 340 |  |  | C     Calc Ax to check result | 
| 341 |  |  | DO J=1,sNy | 
| 342 |  |  | DO I=1,sNx | 
| 343 |  |  | cg2d_Ax(I,J) = | 
| 344 |  |  | &   ( aW2d(I  ,J  )*cg2d_x(I-1,J  )+aW2d(I+1,J  )*cg2d_x(I+1,J  ) | 
| 345 |  |  | &    +aS2d(I  ,J  )*cg2d_x(I  ,J-1)+aS2d(I  ,J+1)*cg2d_x(I  ,J+1) | 
| 346 |  |  | &    -aW2d(I  ,J  )*cg2d_x(I  ,J  )-aW2d(I+1,J  )*cg2d_x(I  ,J  ) | 
| 347 |  |  | &    -aS2d(I  ,J  )*cg2d_x(I  ,J  )-aS2d(I  ,J+1)*cg2d_x(I  ,J  ) | 
| 348 |  |  | &   ) | 
| 349 |  |  | cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J) | 
| 350 |  |  | ENDDO | 
| 351 |  |  | ENDDO | 
| 352 |  |  | CALL EXCH_XY_R8(cg2d_Ax ) | 
| 353 |  |  | CALL EXCH_XY_R8(cg2d_r  ) | 
| 354 |  |  |  | 
| 355 |  |  | END |