| 1 | dgoldberg | 1.1 | C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.25 2012/05/11 23:34:06 jmc Exp $ | 
| 2 |  |  | C $Name:  $ | 
| 3 |  |  |  | 
| 4 |  |  | #include "CPP_OPTIONS.h" | 
| 5 |  |  | #ifdef TARGET_NEC_SX | 
| 6 |  |  | C     set a sensible default for the outer loop unrolling parameter that can | 
| 7 |  |  | C     be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h | 
| 8 |  |  | #ifndef CG3D_OUTERLOOPITERS | 
| 9 |  |  | # define CG3D_OUTERLOOPITERS 10 | 
| 10 |  |  | #endif | 
| 11 |  |  | #endif /* TARGET_NEC_SX */ | 
| 12 |  |  |  | 
| 13 |  |  | CBOP | 
| 14 |  |  | C     !ROUTINE: CG3D | 
| 15 |  |  | C     !INTERFACE: | 
| 16 |  |  | SUBROUTINE CG3D( | 
| 17 |  |  | U                cg3d_b, cg3d_x, | 
| 18 |  |  | O                firstResidual, lastResidual, | 
| 19 |  |  | U                numIters, | 
| 20 |  |  | I                myIter, myThid ) | 
| 21 |  |  | C     !DESCRIPTION: \bv | 
| 22 |  |  | C     *==========================================================* | 
| 23 |  |  | C     | SUBROUTINE CG3D | 
| 24 |  |  | C     | o Three-dimensional grid problem conjugate-gradient | 
| 25 |  |  | C     |   inverter (with preconditioner). | 
| 26 |  |  | C     *==========================================================* | 
| 27 |  |  | C     | Con. grad is an iterative procedure for solving Ax = b. | 
| 28 |  |  | C     | It requires the A be symmetric. | 
| 29 |  |  | C     | This implementation assumes A is a seven-diagonal | 
| 30 |  |  | C     | matrix of the form that arises in the discrete | 
| 31 |  |  | C     | representation of the del^2 operator in a | 
| 32 |  |  | C     | three-dimensional space. | 
| 33 |  |  | C     | Notes: | 
| 34 |  |  | C     | ====== | 
| 35 |  |  | C     | This implementation can support shared-memory | 
| 36 |  |  | C     | multi-threaded execution. In order to do this COMMON | 
| 37 |  |  | C     | blocks are used for many of the arrays - even ones that | 
| 38 |  |  | C     | are only used for intermedaite results. This design is | 
| 39 |  |  | C     | OK if you want to all the threads to collaborate on | 
| 40 |  |  | C     | solving the same problem. On the other hand if you want | 
| 41 |  |  | C     | the threads to solve several different problems | 
| 42 |  |  | C     | concurrently this implementation will not work. | 
| 43 |  |  | C     *==========================================================* | 
| 44 |  |  | C     \ev | 
| 45 |  |  |  | 
| 46 |  |  | C     !USES: | 
| 47 |  |  | IMPLICIT NONE | 
| 48 |  |  | C     === Global data === | 
| 49 |  |  | #include "SIZE.h" | 
| 50 |  |  | #include "EEPARAMS.h" | 
| 51 |  |  | #include "PARAMS.h" | 
| 52 |  |  | #include "GRID.h" | 
| 53 |  |  | #include "SURFACE.h" | 
| 54 |  |  | #include "CG3D.h" | 
| 55 |  |  |  | 
| 56 |  |  | C     !INPUT/OUTPUT PARAMETERS: | 
| 57 |  |  | C     === Routine arguments === | 
| 58 |  |  | C     cg3d_b    :: The source term or "right hand side" (output: normalised RHS) | 
| 59 |  |  | C     cg3d_x    :: The solution (input: first guess) | 
| 60 |  |  | C     firstResidual :: the initial residual before any iterations | 
| 61 |  |  | C     minResidualSq :: the lowest residual reached (squared) | 
| 62 |  |  | CC    lastResidual  :: the actual residual reached | 
| 63 |  |  | C     numIters  :: Inp: the maximum number of iterations allowed | 
| 64 |  |  | C                  Out: the actual number of iterations used | 
| 65 |  |  | CC    nIterMin  :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol. | 
| 66 |  |  | CC                 Out: iteration number corresponding to lowest residual | 
| 67 |  |  | C     myIter    :: Current iteration number in simulation | 
| 68 |  |  | C     myThid    :: my Thread Id number | 
| 69 |  |  | _RL     cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) | 
| 70 |  |  | _RL     cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) | 
| 71 |  |  | _RL     firstResidual | 
| 72 |  |  | _RL     lastResidual | 
| 73 |  |  | INTEGER numIters | 
| 74 |  |  | INTEGER myIter | 
| 75 |  |  | INTEGER myThid | 
| 76 |  |  |  | 
| 77 |  |  | #ifdef ALLOW_NONHYDROSTATIC | 
| 78 |  |  |  | 
| 79 |  |  | C     !LOCAL VARIABLES: | 
| 80 |  |  | C     === Local variables ==== | 
| 81 |  |  | C     bi, bj     :: tile index in X and Y. | 
| 82 |  |  | C     i, j, k    :: Loop counters | 
| 83 |  |  | C     it3d       :: Loop counter for CG iterations | 
| 84 |  |  | C     actualIts  :: actual CG iteration number | 
| 85 |  |  | C     err_sq     :: Measure of the square of the residual of Ax - b. | 
| 86 |  |  | C     eta_qrN    :: Used in computing search directions; suffix N and NM1 | 
| 87 |  |  | C     eta_qrNM1     denote current and previous iterations respectively. | 
| 88 |  |  | C     cgBeta     :: coeff used to update conjugate direction vector "s". | 
| 89 |  |  | C     alpha      :: coeff used to update solution & residual | 
| 90 |  |  | C     sumRHS     :: Sum of right-hand-side. Sometimes this is a useful | 
| 91 |  |  | C                   debugging/trouble shooting diagnostic. For neumann problems | 
| 92 |  |  | C                   sumRHS needs to be ~0 or it converge at a non-zero residual. | 
| 93 |  |  | C     cg2d_min   :: used to store solution corresponding to lowest residual. | 
| 94 |  |  | C     msgBuf     :: Informational/error message buffer | 
| 95 |  |  | INTEGER bi, bj | 
| 96 |  |  | INTEGER i, j, k, it3d | 
| 97 |  |  | INTEGER actualIts | 
| 98 |  |  | INTEGER km1, kp1 | 
| 99 |  |  | _RL     maskM1, maskP1 | 
| 100 |  |  | _RL     cg3dTolerance_sq | 
| 101 |  |  | _RL     err_sq,  errTile(nSx,nSy) | 
| 102 |  |  | _RL     eta_qrN, eta_qrNtile(nSx,nSy) | 
| 103 |  |  | _RL     eta_qrNM1 | 
| 104 |  |  | _RL     cgBeta | 
| 105 |  |  | _RL     alpha ,  alphaTile(nSx,nSy) | 
| 106 |  |  | _RL     sumRHS,  sumRHStile(nSx,nSy) | 
| 107 |  |  | _RL     rhsMax | 
| 108 |  |  | _RL     rhsNorm | 
| 109 |  |  | CHARACTER*(MAX_LEN_MBUF) msgBuf | 
| 110 |  |  | LOGICAL printResidual | 
| 111 |  |  | _RL     surfFac | 
| 112 |  |  | #ifdef NONLIN_FRSURF | 
| 113 |  |  | INTEGER ks | 
| 114 |  |  | _RL     surfTerm(sNx,sNy) | 
| 115 |  |  | #endif /* NONLIN_FRSURF */ | 
| 116 |  |  | CEOP | 
| 117 |  |  |  | 
| 118 |  |  | #ifdef ALLOW_PETSC | 
| 119 |  |  |  | 
| 120 |  |  | IF (use_cg3d_petsc) THEN | 
| 121 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 122 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 123 |  |  | DO k=1,Nr | 
| 124 |  |  | DO j=1,sNy | 
| 125 |  |  | DO i=1,sNx | 
| 126 |  |  | cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*cg3dNorm | 
| 127 |  |  | &                          * maskC(i,j,k,bi,bj) | 
| 128 |  |  | ENDDO | 
| 129 |  |  | ENDDO | 
| 130 |  |  | ENDDO | 
| 131 |  |  | ENDDO | 
| 132 |  |  | ENDDO | 
| 133 |  |  |  | 
| 134 |  |  |  | 
| 135 |  |  | !      SUBROUTINE CG3D_PETSC( | 
| 136 |  |  | !     U                               cg3d_x,     ! solution vector | 
| 137 |  |  | !     U                               cg3d_b,     ! rhs | 
| 138 |  |  | !     I                               tolerance, | 
| 139 |  |  | !     U                               maxIter, | 
| 140 |  |  | !     I                               myIter, | 
| 141 |  |  | !     I                               myThid ) | 
| 142 |  |  |  | 
| 143 |  |  | CALL CG3D_PETSC( | 
| 144 |  |  | U                               cg3d_x,     ! solution vector | 
| 145 |  |  | U                               cg3d_b,     ! rhs | 
| 146 |  |  | I                               cg3dtargetresidual, | 
| 147 |  |  | I                               numIters, | 
| 148 |  |  | I                               myIter, | 
| 149 |  |  | I                               myThid ) | 
| 150 |  |  |  | 
| 151 |  |  |  | 
| 152 |  |  | ELSE | 
| 153 |  |  |  | 
| 154 |  |  | #endif | 
| 155 |  |  |  | 
| 156 |  |  | C--   Initialise auxiliary constant, some output variable | 
| 157 |  |  | cg3dTolerance_sq = cg3dTargetResidual*cg3dTargetResidual | 
| 158 |  |  | IF ( select_rStar .NE. 0 ) THEN | 
| 159 |  |  | surfFac = freeSurfFac | 
| 160 |  |  | ELSE | 
| 161 |  |  | surfFac = 0. | 
| 162 |  |  | ENDIF | 
| 163 |  |  | #ifdef NONLIN_FRSURF | 
| 164 |  |  | DO j=1,sNy | 
| 165 |  |  | DO i=1,sNx | 
| 166 |  |  | surfTerm(i,j) = 0. | 
| 167 |  |  | ENDDO | 
| 168 |  |  | ENDDO | 
| 169 |  |  | #endif /* NONLIN_FRSURF */ | 
| 170 |  |  |  | 
| 171 |  |  | C--   Initialise inverter | 
| 172 |  |  | eta_qrNM1 = 1. _d 0 | 
| 173 |  |  |  | 
| 174 |  |  | C--   Normalise RHS | 
| 175 |  |  | rhsMax = 0. _d 0 | 
| 176 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 177 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 178 |  |  | DO k=1,Nr | 
| 179 |  |  | DO j=1,sNy | 
| 180 |  |  | DO i=1,sNx | 
| 181 |  |  | cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*cg3dNorm | 
| 182 |  |  | &                          * maskC(i,j,k,bi,bj) | 
| 183 |  |  | rhsMax = MAX(ABS(cg3d_b(i,j,k,bi,bj)),rhsMax) | 
| 184 |  |  | ENDDO | 
| 185 |  |  | ENDDO | 
| 186 |  |  | ENDDO | 
| 187 |  |  | ENDDO | 
| 188 |  |  | ENDDO | 
| 189 |  |  | _GLOBAL_MAX_RL( rhsMax, myThid ) | 
| 190 |  |  | rhsNorm = 1. _d 0 | 
| 191 |  |  | IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax | 
| 192 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 193 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 194 |  |  | DO k=1,Nr | 
| 195 |  |  | DO j=1,sNy | 
| 196 |  |  | DO i=1,sNx | 
| 197 |  |  | cg3d_b(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj)*rhsNorm | 
| 198 |  |  | cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)*rhsNorm | 
| 199 |  |  | ENDDO | 
| 200 |  |  | ENDDO | 
| 201 |  |  | ENDDO | 
| 202 |  |  | ENDDO | 
| 203 |  |  | ENDDO | 
| 204 |  |  |  | 
| 205 |  |  | C--   Update overlaps | 
| 206 |  |  | _EXCH_XYZ_RL( cg3d_x, myThid ) | 
| 207 |  |  |  | 
| 208 |  |  | C--   Initial residual calculation (with free-Surface term) | 
| 209 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 210 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 211 |  |  | errTile(bi,bj)    = 0. _d 0 | 
| 212 |  |  | sumRHStile(bi,bj) = 0. _d 0 | 
| 213 |  |  | #ifdef NONLIN_FRSURF | 
| 214 |  |  | IF ( select_rStar .NE. 0 ) THEN | 
| 215 |  |  | DO j=1,sNy | 
| 216 |  |  | DO i=1,sNx | 
| 217 |  |  | surfTerm(i,j) = 0. | 
| 218 |  |  | ENDDO | 
| 219 |  |  | ENDDO | 
| 220 |  |  | DO k=1,Nr | 
| 221 |  |  | DO j=1,sNy | 
| 222 |  |  | DO i=1,sNx | 
| 223 |  |  | surfTerm(i,j) = surfTerm(i,j) | 
| 224 |  |  | &         +cg3d_x(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 225 |  |  | ENDDO | 
| 226 |  |  | ENDDO | 
| 227 |  |  | ENDDO | 
| 228 |  |  | DO j=1,sNy | 
| 229 |  |  | DO i=1,sNx | 
| 230 |  |  | ks = kSurfC(i,j,bi,bj) | 
| 231 |  |  | surfTerm(i,j) = surfTerm(i,j)*cg3dNorm | 
| 232 |  |  | &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj) | 
| 233 |  |  | &              *rA(i,j,bi,bj)*deepFac2F(ks) | 
| 234 |  |  | &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf | 
| 235 |  |  | ENDDO | 
| 236 |  |  | ENDDO | 
| 237 |  |  | ENDIF | 
| 238 |  |  | #endif /* NONLIN_FRSURF */ | 
| 239 |  |  | DO k=1,Nr | 
| 240 |  |  | km1 = MAX(k-1, 1 ) | 
| 241 |  |  | kp1 = MIN(k+1, Nr) | 
| 242 |  |  | maskM1 = 1. _d 0 | 
| 243 |  |  | maskP1 = 1. _d 0 | 
| 244 |  |  | IF ( k .EQ. 1 ) maskM1 = 0. _d 0 | 
| 245 |  |  | IF ( k .EQ. Nr) maskP1 = 0. _d 0 | 
| 246 |  |  | #ifdef TARGET_NEC_SX | 
| 247 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 248 |  |  | #endif /* TARGET_NEC_SX */ | 
| 249 |  |  | DO j=1,sNy | 
| 250 |  |  | DO i=1,sNx | 
| 251 |  |  | cg3d_r(i,j,k,bi,bj) = cg3d_b(i,j,k,bi,bj) | 
| 252 |  |  | &     -( 0. | 
| 253 |  |  | &       +aW3d( i, j, k, bi,bj)*cg3d_x(i-1,j, k, bi,bj) | 
| 254 |  |  | &       +aW3d(i+1,j, k, bi,bj)*cg3d_x(i+1,j, k, bi,bj) | 
| 255 |  |  | &       +aS3d( i, j, k, bi,bj)*cg3d_x( i,j-1,k, bi,bj) | 
| 256 |  |  | &       +aS3d( i,j+1,k, bi,bj)*cg3d_x( i,j+1,k, bi,bj) | 
| 257 |  |  | &       +aV3d( i, j, k, bi,bj)*cg3d_x( i, j,km1,bi,bj)*maskM1 | 
| 258 |  |  | &       +aV3d( i, j,kp1,bi,bj)*cg3d_x( i, j,kp1,bi,bj)*maskP1 | 
| 259 |  |  | &       +aC3d( i, j, k, bi,bj)*cg3d_x( i, j, k, bi,bj) | 
| 260 |  |  | #ifdef NONLIN_FRSURF | 
| 261 |  |  | &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 262 |  |  | #endif /* NONLIN_FRSURF */ | 
| 263 |  |  | &      ) | 
| 264 |  |  | errTile(bi,bj) = errTile(bi,bj) | 
| 265 |  |  | &                    +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj) | 
| 266 |  |  | sumRHStile(bi,bj) = sumRHStile(bi,bj)+cg3d_b(i,j,k,bi,bj) | 
| 267 |  |  | ENDDO | 
| 268 |  |  | ENDDO | 
| 269 |  |  | DO j=0,sNy+1 | 
| 270 |  |  | DO i=0,sNx+1 | 
| 271 |  |  | cg3d_s(i,j,k,bi,bj) = 0. | 
| 272 |  |  | ENDDO | 
| 273 |  |  | ENDDO | 
| 274 |  |  | ENDDO | 
| 275 |  |  | ENDDO | 
| 276 |  |  | ENDDO | 
| 277 |  |  | CALL EXCH_S3D_RL( cg3d_r, Nr, myThid ) | 
| 278 |  |  | CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid ) | 
| 279 |  |  | CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid ) | 
| 280 |  |  | IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN | 
| 281 |  |  | CALL WRITE_FLD_S3D_RL( | 
| 282 |  |  | I        'cg3d_r_I', 'I10', 1, Nr, cg3d_r, myIter, myThid ) | 
| 283 |  |  | ENDIF | 
| 284 |  |  |  | 
| 285 |  |  | actualIts = 0 | 
| 286 |  |  | firstResidual = SQRT(err_sq) | 
| 287 |  |  |  | 
| 288 |  |  | printResidual = .FALSE. | 
| 289 |  |  | IF ( debugLevel .GE. debLevZero ) THEN | 
| 290 |  |  | _BEGIN_MASTER( myThid ) | 
| 291 |  |  | printResidual = printResidualFreq.GE.1 | 
| 292 |  |  | WRITE(standardmessageunit,'(A,1P2E22.14)') | 
| 293 |  |  | &     ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax | 
| 294 |  |  | _END_MASTER( myThid ) | 
| 295 |  |  | ENDIF | 
| 296 |  |  |  | 
| 297 |  |  | IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11 | 
| 298 |  |  |  | 
| 299 |  |  | C     >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< | 
| 300 |  |  | DO 10 it3d=1, numIters | 
| 301 |  |  |  | 
| 302 |  |  | C--    Solve preconditioning equation and update | 
| 303 |  |  | C--    conjugate direction vector "s". | 
| 304 |  |  | C      Note. On the next two loops over all tiles the inner loop ranges | 
| 305 |  |  | C            in sNx and sNy are expanded by 1 to avoid a communication | 
| 306 |  |  | C            step. However this entails a bit of gynamastics because we only | 
| 307 |  |  | C            want eta_qrN for the interior points. | 
| 308 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 309 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 310 |  |  | eta_qrNtile(bi,bj) = 0. _d 0 | 
| 311 |  |  | DO k=1,1 | 
| 312 |  |  | #ifdef TARGET_NEC_SX | 
| 313 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 314 |  |  | #endif /* TARGET_NEC_SX */ | 
| 315 |  |  | DO j=0,sNy+1 | 
| 316 |  |  | DO i=0,sNx+1 | 
| 317 |  |  | cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj) | 
| 318 |  |  | &                        *cg3d_r(i,j,k,bi,bj) | 
| 319 |  |  | ENDDO | 
| 320 |  |  | ENDDO | 
| 321 |  |  | ENDDO | 
| 322 |  |  | DO k=2,Nr | 
| 323 |  |  | #ifdef TARGET_NEC_SX | 
| 324 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 325 |  |  | #endif /* TARGET_NEC_SX */ | 
| 326 |  |  | DO j=0,sNy+1 | 
| 327 |  |  | DO i=0,sNx+1 | 
| 328 |  |  | cg3d_q(i,j,k,bi,bj) = zMC(i,j,k,bi,bj) | 
| 329 |  |  | &                      *( cg3d_r(i,j,k,bi,bj) | 
| 330 |  |  | &                         -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj) | 
| 331 |  |  | &                       ) | 
| 332 |  |  | ENDDO | 
| 333 |  |  | ENDDO | 
| 334 |  |  | ENDDO | 
| 335 |  |  | DO k=Nr,Nr | 
| 336 |  |  | #ifdef TARGET_NEC_SX | 
| 337 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 338 |  |  | #endif /* TARGET_NEC_SX */ | 
| 339 |  |  | DO j=1,sNy | 
| 340 |  |  | DO i=1,sNx | 
| 341 |  |  | eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj) | 
| 342 |  |  | &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj) | 
| 343 |  |  | ENDDO | 
| 344 |  |  | ENDDO | 
| 345 |  |  | ENDDO | 
| 346 |  |  | DO k=Nr-1,1,-1 | 
| 347 |  |  | #ifdef TARGET_NEC_SX | 
| 348 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 349 |  |  | #endif /* TARGET_NEC_SX */ | 
| 350 |  |  | DO j=0,sNy+1 | 
| 351 |  |  | DO i=0,sNx+1 | 
| 352 |  |  | cg3d_q(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj) | 
| 353 |  |  | &                         -zMU(i,j,k,bi,bj)*cg3d_q(i,j,k+1,bi,bj) | 
| 354 |  |  | ENDDO | 
| 355 |  |  | ENDDO | 
| 356 |  |  | #ifdef TARGET_NEC_SX | 
| 357 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 358 |  |  | #endif /* TARGET_NEC_SX */ | 
| 359 |  |  | DO j=1,sNy | 
| 360 |  |  | DO i=1,sNx | 
| 361 |  |  | eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj) | 
| 362 |  |  | &                        +cg3d_q(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj) | 
| 363 |  |  | ENDDO | 
| 364 |  |  | ENDDO | 
| 365 |  |  | ENDDO | 
| 366 |  |  | ENDDO | 
| 367 |  |  | ENDDO | 
| 368 |  |  |  | 
| 369 |  |  | CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid ) | 
| 370 |  |  | cgBeta   = eta_qrN/eta_qrNM1 | 
| 371 |  |  | CcnhDebugStarts | 
| 372 |  |  | c      WRITE(*,*) ' CG3D: Iteration ', it3d-1, | 
| 373 |  |  | c    &            ' eta_qrN=', eta_qrN, ' beta=', cgBeta | 
| 374 |  |  | CcnhDebugEnds | 
| 375 |  |  | eta_qrNM1 = eta_qrN | 
| 376 |  |  |  | 
| 377 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 378 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 379 |  |  | DO k=1,Nr | 
| 380 |  |  | DO j=0,sNy+1 | 
| 381 |  |  | DO i=0,sNx+1 | 
| 382 |  |  | cg3d_s(i,j,k,bi,bj) = cg3d_q(i,j,k,bi,bj) | 
| 383 |  |  | &                   + cgBeta*cg3d_s(i,j,k,bi,bj) | 
| 384 |  |  | ENDDO | 
| 385 |  |  | ENDDO | 
| 386 |  |  | ENDDO | 
| 387 |  |  | ENDDO | 
| 388 |  |  | ENDDO | 
| 389 |  |  |  | 
| 390 |  |  | C==    Evaluate laplace operator on conjugate gradient vector | 
| 391 |  |  | C==    q = A.s | 
| 392 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 393 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 394 |  |  | alphaTile(bi,bj) = 0. _d 0 | 
| 395 |  |  | #ifdef NONLIN_FRSURF | 
| 396 |  |  | IF ( select_rStar .NE. 0 ) THEN | 
| 397 |  |  | DO j=1,sNy | 
| 398 |  |  | DO i=1,sNx | 
| 399 |  |  | surfTerm(i,j) = 0. | 
| 400 |  |  | ENDDO | 
| 401 |  |  | ENDDO | 
| 402 |  |  | DO k=1,Nr | 
| 403 |  |  | DO j=1,sNy | 
| 404 |  |  | DO i=1,sNx | 
| 405 |  |  | surfTerm(i,j) = surfTerm(i,j) | 
| 406 |  |  | &         +cg3d_s(i,j,k,bi,bj)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 407 |  |  | ENDDO | 
| 408 |  |  | ENDDO | 
| 409 |  |  | ENDDO | 
| 410 |  |  | DO j=1,sNy | 
| 411 |  |  | DO i=1,sNx | 
| 412 |  |  | ks = kSurfC(i,j,bi,bj) | 
| 413 |  |  | surfTerm(i,j) = surfTerm(i,j)*cg3dNorm | 
| 414 |  |  | &              *recip_Rcol(i,j,bi,bj)*recip_Rcol(i,j,bi,bj) | 
| 415 |  |  | &              *rA(i,j,bi,bj)*deepFac2F(ks) | 
| 416 |  |  | &              *recip_Bo(i,j,bi,bj)/deltaTMom/deltaTfreesurf | 
| 417 |  |  | ENDDO | 
| 418 |  |  | ENDDO | 
| 419 |  |  | ENDIF | 
| 420 |  |  | #endif /* NONLIN_FRSURF */ | 
| 421 |  |  | IF ( Nr .GT. 1 ) THEN | 
| 422 |  |  | k=1 | 
| 423 |  |  | #ifdef TARGET_NEC_SX | 
| 424 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 425 |  |  | #endif /* TARGET_NEC_SX */ | 
| 426 |  |  | DO j=1,sNy | 
| 427 |  |  | DO i=1,sNx | 
| 428 |  |  | cg3d_q(i,j,k,bi,bj) = | 
| 429 |  |  | &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj) | 
| 430 |  |  | &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj) | 
| 431 |  |  | &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj) | 
| 432 |  |  | &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj) | 
| 433 |  |  | &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj) | 
| 434 |  |  | &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj) | 
| 435 |  |  | #ifdef NONLIN_FRSURF | 
| 436 |  |  | &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 437 |  |  | #endif /* NONLIN_FRSURF */ | 
| 438 |  |  | alphaTile(bi,bj) = alphaTile(bi,bj) | 
| 439 |  |  | &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj) | 
| 440 |  |  | ENDDO | 
| 441 |  |  | ENDDO | 
| 442 |  |  | ELSE | 
| 443 |  |  | k=1 | 
| 444 |  |  | #ifdef TARGET_NEC_SX | 
| 445 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 446 |  |  | #endif /* TARGET_NEC_SX */ | 
| 447 |  |  | DO j=1,sNy | 
| 448 |  |  | DO i=1,sNx | 
| 449 |  |  | cg3d_q(i,j,k,bi,bj) = | 
| 450 |  |  | &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj) | 
| 451 |  |  | &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj) | 
| 452 |  |  | &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj) | 
| 453 |  |  | &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj) | 
| 454 |  |  | &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj) | 
| 455 |  |  | #ifdef NONLIN_FRSURF | 
| 456 |  |  | &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 457 |  |  | #endif /* NONLIN_FRSURF */ | 
| 458 |  |  | alphaTile(bi,bj) = alphaTile(bi,bj) | 
| 459 |  |  | &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj) | 
| 460 |  |  | ENDDO | 
| 461 |  |  | ENDDO | 
| 462 |  |  | ENDIF | 
| 463 |  |  | DO k=2,Nr-1 | 
| 464 |  |  | #ifdef TARGET_NEC_SX | 
| 465 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 466 |  |  | #endif /* TARGET_NEC_SX */ | 
| 467 |  |  | DO j=1,sNy | 
| 468 |  |  | DO i=1,sNx | 
| 469 |  |  | cg3d_q(i,j,k,bi,bj) = | 
| 470 |  |  | &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj) | 
| 471 |  |  | &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj) | 
| 472 |  |  | &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj) | 
| 473 |  |  | &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj) | 
| 474 |  |  | &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj) | 
| 475 |  |  | &       +aV3d( i, j,k+1,bi,bj)*cg3d_s( i, j,k+1,bi,bj) | 
| 476 |  |  | &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj) | 
| 477 |  |  | #ifdef NONLIN_FRSURF | 
| 478 |  |  | &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 479 |  |  | #endif /* NONLIN_FRSURF */ | 
| 480 |  |  | alphaTile(bi,bj) = alphaTile(bi,bj) | 
| 481 |  |  | &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj) | 
| 482 |  |  | ENDDO | 
| 483 |  |  | ENDDO | 
| 484 |  |  | ENDDO | 
| 485 |  |  | IF ( Nr .GT. 1 ) THEN | 
| 486 |  |  | k=Nr | 
| 487 |  |  | #ifdef TARGET_NEC_SX | 
| 488 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 489 |  |  | #endif /* TARGET_NEC_SX */ | 
| 490 |  |  | DO j=1,sNy | 
| 491 |  |  | DO i=1,sNx | 
| 492 |  |  | cg3d_q(i,j,k,bi,bj) = | 
| 493 |  |  | &        aW3d( i, j, k, bi,bj)*cg3d_s(i-1,j, k, bi,bj) | 
| 494 |  |  | &       +aW3d(i+1,j, k, bi,bj)*cg3d_s(i+1,j, k, bi,bj) | 
| 495 |  |  | &       +aS3d( i, j, k, bi,bj)*cg3d_s( i,j-1,k, bi,bj) | 
| 496 |  |  | &       +aS3d( i,j+1,k, bi,bj)*cg3d_s( i,j+1,k, bi,bj) | 
| 497 |  |  | &       +aV3d( i, j, k, bi,bj)*cg3d_s( i, j,k-1,bi,bj) | 
| 498 |  |  | &       +aC3d( i, j, k, bi,bj)*cg3d_s( i, j, k, bi,bj) | 
| 499 |  |  | #ifdef NONLIN_FRSURF | 
| 500 |  |  | &       -surfFac*surfTerm(i,j)*drF(k)*h0FacC(i,j,k,bi,bj) | 
| 501 |  |  | #endif /* NONLIN_FRSURF */ | 
| 502 |  |  | alphaTile(bi,bj) = alphaTile(bi,bj) | 
| 503 |  |  | &             +cg3d_s(i,j,k,bi,bj)*cg3d_q(i,j,k,bi,bj) | 
| 504 |  |  | ENDDO | 
| 505 |  |  | ENDDO | 
| 506 |  |  | ENDIF | 
| 507 |  |  | ENDDO | 
| 508 |  |  | ENDDO | 
| 509 |  |  | CALL GLOBAL_SUM_TILE_RL( alphaTile,  alpha,  myThid ) | 
| 510 |  |  | CcnhDebugStarts | 
| 511 |  |  | c      WRITE(*,*) ' CG3D: Iteration ', it3d-1, | 
| 512 |  |  | c    &            ' SUM(s*q)=', alpha, ' alpha=', eta_qrN/alpha | 
| 513 |  |  | CcnhDebugEnds | 
| 514 |  |  | alpha = eta_qrN/alpha | 
| 515 |  |  |  | 
| 516 |  |  | C==    Update simultaneously solution and residual vectors (and Iter number) | 
| 517 |  |  | C      Now compute "interior" points. | 
| 518 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 519 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 520 |  |  | errTile(bi,bj) = 0. _d 0 | 
| 521 |  |  | DO k=1,Nr | 
| 522 |  |  | #ifdef TARGET_NEC_SX | 
| 523 |  |  | !CDIR OUTERUNROLL=CG3D_OUTERLOOPITERS | 
| 524 |  |  | #endif /* TARGET_NEC_SX */ | 
| 525 |  |  | DO j=1,sNy | 
| 526 |  |  | DO i=1,sNx | 
| 527 |  |  | cg3d_x(i,j,k,bi,bj)=cg3d_x(i,j,k,bi,bj) | 
| 528 |  |  | &            +alpha*cg3d_s(i,j,k,bi,bj) | 
| 529 |  |  | cg3d_r(i,j,k,bi,bj)=cg3d_r(i,j,k,bi,bj) | 
| 530 |  |  | &            -alpha*cg3d_q(i,j,k,bi,bj) | 
| 531 |  |  | errTile(bi,bj) = errTile(bi,bj) | 
| 532 |  |  | &            +cg3d_r(i,j,k,bi,bj)*cg3d_r(i,j,k,bi,bj) | 
| 533 |  |  | ENDDO | 
| 534 |  |  | ENDDO | 
| 535 |  |  | ENDDO | 
| 536 |  |  | ENDDO | 
| 537 |  |  | ENDDO | 
| 538 |  |  | actualIts = it3d | 
| 539 |  |  |  | 
| 540 |  |  | CALL GLOBAL_SUM_TILE_RL( errTile,    err_sq, myThid ) | 
| 541 |  |  | IF ( printResidual ) THEN | 
| 542 |  |  | IF ( MOD( it3d-1, printResidualFreq ).EQ.0 ) THEN | 
| 543 |  |  | WRITE(msgBuf,'(A,I6,A,1PE21.14)') | 
| 544 |  |  | &    ' cg3d: iter=', it3d, ' ; resid.= ', SQRT(err_sq) | 
| 545 |  |  | CALL PRINT_MESSAGE( msgBuf, standardMessageUnit, | 
| 546 |  |  | &                       SQUEEZE_RIGHT, myThid ) | 
| 547 |  |  | ENDIF | 
| 548 |  |  | ENDIF | 
| 549 |  |  | IF ( err_sq .LT. cg3dTolerance_sq ) GOTO 11 | 
| 550 |  |  | CALL EXCH_S3D_RL( cg3d_r, Nr, myThid ) | 
| 551 |  |  |  | 
| 552 |  |  | 10 CONTINUE | 
| 553 |  |  | 11 CONTINUE | 
| 554 |  |  |  | 
| 555 |  |  | IF ( debugLevel.GE.debLevC .AND. diagFreq.GT.0. ) THEN | 
| 556 |  |  | CALL WRITE_FLD_S3D_RL( | 
| 557 |  |  | I        'cg3d_r_F', 'I10', 1, Nr, cg3d_r, myIter, myThid ) | 
| 558 |  |  | ENDIF | 
| 559 |  |  |  | 
| 560 |  |  | C--   Un-normalise the answer | 
| 561 |  |  | DO bj=myByLo(myThid),myByHi(myThid) | 
| 562 |  |  | DO bi=myBxLo(myThid),myBxHi(myThid) | 
| 563 |  |  | DO k=1,Nr | 
| 564 |  |  | DO j=1,sNy | 
| 565 |  |  | DO i=1,sNx | 
| 566 |  |  | cg3d_x(i,j,k,bi,bj) = cg3d_x(i,j,k,bi,bj)/rhsNorm | 
| 567 |  |  | ENDDO | 
| 568 |  |  | ENDDO | 
| 569 |  |  | ENDDO | 
| 570 |  |  | ENDDO | 
| 571 |  |  | ENDDO | 
| 572 |  |  |  | 
| 573 |  |  | C--   Return parameters to caller | 
| 574 |  |  | lastResidual = SQRT(err_sq) | 
| 575 |  |  | numIters = actualIts | 
| 576 |  |  |  | 
| 577 |  |  |  | 
| 578 |  |  | #ifdef ALLOW_PETSC | 
| 579 |  |  | ENDIF | 
| 580 |  |  | #endif | 
| 581 |  |  |  | 
| 582 |  |  | #endif /* ALLOW_NONHYDROSTATIC */ | 
| 583 |  |  |  | 
| 584 |  |  | RETURN | 
| 585 |  |  | END |