/[MITgcm]/MITgcm/model/src/cg3d.F
ViewVC logotype

Contents of /MITgcm/model/src/cg3d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.20 - (show annotations) (download)
Tue Apr 28 18:01:14 2009 UTC (15 years ago) by jmc
Branch: MAIN
CVS Tags: checkpoint61n, checkpoint61q, checkpoint61o, checkpoint61m, checkpoint61v, checkpoint61w, checkpoint61t, checkpoint61u, checkpoint61r, checkpoint61s, checkpoint61p, checkpoint61x, checkpoint61y
Changes since 1.19: +10 -10 lines
change macros (EXCH & GLOBAL_SUM/MAX) sufix _R4/_R8 to _RS/_RL
 when applied to _RS/_RL variable

1 C $Header: /u/gcmpack/MITgcm/model/src/cg3d.F,v 1.19 2007/09/04 14:54:58 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5
6 CBOP
7 C !ROUTINE: CG3D
8 C !INTERFACE:
9 SUBROUTINE CG3D(
10 I cg3d_b,
11 U cg3d_x,
12 O firstResidual,
13 O lastResidual,
14 U numIters,
15 I myThid )
16 C !DESCRIPTION: \bv
17 C *==========================================================*
18 C | SUBROUTINE CG3D
19 C | o Three-dimensional grid problem conjugate-gradient
20 C | inverter (with preconditioner).
21 C *==========================================================*
22 C | Con. grad is an iterative procedure for solving Ax = b.
23 C | It requires the A be symmetric.
24 C | This implementation assumes A is a seven-diagonal
25 C | matrix of the form that arises in the discrete
26 C | representation of the del^2 operator in a
27 C | three-dimensional space.
28 C | Notes:
29 C | ======
30 C | This implementation can support shared-memory
31 C | multi-threaded execution. In order to do this COMMON
32 C | blocks are used for many of the arrays - even ones that
33 C | are only used for intermedaite results. This design is
34 C | OK if you want to all the threads to collaborate on
35 C | solving the same problem. On the other hand if you want
36 C | the threads to solve several different problems
37 C | concurrently this implementation will not work.
38 C *==========================================================*
39 C \ev
40
41 C !USES:
42 IMPLICIT NONE
43 C === Global data ===
44 #include "SIZE.h"
45 #include "EEPARAMS.h"
46 #include "PARAMS.h"
47 #include "GRID.h"
48 #include "CG3D.h"
49
50 C !INPUT/OUTPUT PARAMETERS:
51 C === Routine arguments ===
52 C myThid - Thread on which I am working.
53 C cg3d_b - The source term or "right hand side"
54 C cg3d_x - The solution
55 C firstResidual - the initial residual before any iterations
56 C lastResidual - the actual residual reached
57 C numIters - Entry: the maximum number of iterations allowed
58 C Exit: the actual number of iterations used
59 _RL cg3d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
60 _RL cg3d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy)
61 _RL firstResidual
62 _RL lastResidual
63 INTEGER numIters
64 INTEGER myThid
65
66
67 #ifdef ALLOW_NONHYDROSTATIC
68
69 C !LOCAL VARIABLES:
70 C === Local variables ====
71 C actualIts - Number of iterations taken
72 C actualResidual - residual
73 C bi - Block index in X and Y.
74 C bj
75 C eta_qrN - Used in computing search directions
76 C eta_qrNM1 suffix N and NM1 denote current and
77 C cgBeta previous iterations respectively.
78 C alpha
79 C sumRHS - Sum of right-hand-side. Sometimes this is a
80 C useful debuggin/trouble shooting diagnostic.
81 C For neumann problems sumRHS needs to be ~0.
82 C or they converge at a non-zero residual.
83 C err - Measure of residual of Ax - b, usually the norm.
84 C I, J, K, N - Loop counters ( N counts CG iterations )
85 INTEGER actualIts
86 _RL actualResidual
87 INTEGER bi, bj
88 INTEGER I, J, K, it3d
89 INTEGER Km1, Kp1
90 _RL maskM1, maskP1
91 _RL err, errTile(nSx,nSy)
92 _RL eta_qrN,eta_qrNtile(nSx,nSy)
93 _RL eta_qrNM1
94 _RL cgBeta
95 _RL alpha , alphaTile(nSx,nSy)
96 _RL sumRHS, sumRHStile(nSx,nSy)
97 _RL rhsMax
98 _RL rhsNorm
99 CEOP
100
101
102 C-- Initialise inverter
103 eta_qrNM1 = 1. D0
104
105 C-- Normalise RHS
106 rhsMax = 0. _d 0
107 DO bj=myByLo(myThid),myByHi(myThid)
108 DO bi=myBxLo(myThid),myBxHi(myThid)
109 DO K=1,Nr
110 DO J=1,sNy
111 DO I=1,sNx
112 cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*cg3dNorm
113 & * maskC(I,J,K,bi,bj)
114 rhsMax = MAX(ABS(cg3d_b(I,J,K,bi,bj)),rhsMax)
115 ENDDO
116 ENDDO
117 ENDDO
118 ENDDO
119 ENDDO
120 _GLOBAL_MAX_RL( rhsMax, myThid )
121 rhsNorm = 1. _d 0
122 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
123 DO bj=myByLo(myThid),myByHi(myThid)
124 DO bi=myBxLo(myThid),myBxHi(myThid)
125 DO K=1,Nr
126 DO J=1,sNy
127 DO I=1,sNx
128 cg3d_b(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj)*rhsNorm
129 cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)*rhsNorm
130 ENDDO
131 ENDDO
132 ENDDO
133 ENDDO
134 ENDDO
135
136 C-- Update overlaps
137 c _EXCH_XYZ_RL( cg3d_b, myThid )
138 _EXCH_XYZ_RL( cg3d_x, myThid )
139
140 C-- Initial residual calculation (with free-Surface term)
141 err = 0. _d 0
142 sumRHS = 0. _d 0
143 DO bj=myByLo(myThid),myByHi(myThid)
144 DO bi=myBxLo(myThid),myBxHi(myThid)
145 errTile(bi,bj) = 0. _d 0
146 sumRHStile(bi,bj) = 0. _d 0
147 DO K=1,Nr
148 Km1 = MAX(K-1, 1 )
149 Kp1 = MIN(K+1, Nr)
150 maskM1 = 1. _d 0
151 maskP1 = 1. _d 0
152 IF ( K .EQ. 1 ) maskM1 = 0. _d 0
153 IF ( K .EQ. Nr) maskP1 = 0. _d 0
154
155 DO J=1,sNy
156 DO I=1,sNx
157 cg3d_r(I,J,K,bi,bj) = cg3d_b(I,J,K,bi,bj) -( 0.
158 & +aW3d(I ,J ,K ,bi,bj)*cg3d_x(I-1,J ,K ,bi,bj)
159 & +aW3d(I+1,J ,K ,bi,bj)*cg3d_x(I+1,J ,K ,bi,bj)
160 & +aS3d(I ,J ,K ,bi,bj)*cg3d_x(I ,J-1,K ,bi,bj)
161 & +aS3d(I ,J+1,K ,bi,bj)*cg3d_x(I ,J+1,K ,bi,bj)
162 & +aV3d(I ,J ,K ,bi,bj)*cg3d_x(I ,J ,Km1,bi,bj)*maskM1
163 & +aV3d(I ,J ,Kp1,bi,bj)*cg3d_x(I ,J ,Kp1,bi,bj)*maskP1
164 & +aC3d(I ,J ,K ,bi,bj)*cg3d_x(I ,J ,K ,bi,bj)
165 & )
166 errTile(bi,bj) = errTile(bi,bj)
167 & +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
168 sumRHStile(bi,bj) = sumRHStile(bi,bj)
169 & +cg3d_b(I,J,K,bi,bj)
170 ENDDO
171 ENDDO
172 DO J=1-1,sNy+1
173 DO I=1-1,sNx+1
174 cg3d_s(I,J,K,bi,bj) = 0.
175 ENDDO
176 ENDDO
177 ENDDO
178 c err = err + errTile(bi,bj)
179 c sumRHS = sumRHS + sumRHStile(bi,bj)
180 ENDDO
181 ENDDO
182 CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
183 c CALL EXCH_S3D_RL( cg3d_s, Nr, myThid )
184 c _GLOBAL_SUM_RL( sumRHS, myThid )
185 c _GLOBAL_SUM_RL( err , myThid )
186 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
187 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
188
189 IF ( debugLevel .GE. debLevZero ) THEN
190 _BEGIN_MASTER( myThid )
191 write(standardmessageunit,'(A,1P2E22.14)')
192 & ' cg3d: Sum(rhs),rhsMax = ',sumRHS,rhsMax
193 _END_MASTER( myThid )
194 ENDIF
195
196 actualIts = 0
197 actualResidual = SQRT(err)
198 C _BARRIER
199 c _BEGIN_MASTER( myThid )
200 c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
201 c & actualIts, actualResidual
202 c _END_MASTER( myThid )
203 firstResidual=actualResidual
204
205 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
206 DO 10 it3d=1, numIters
207
208 CcnhDebugStarts
209 c IF ( mod(it3d-1,10).EQ.0)
210 c & WRITE(*,*) ' CG3D: Iteration ',it3d-1,
211 c & ' residual = ',actualResidual
212 CcnhDebugEnds
213 IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
214 C-- Solve preconditioning equation and update
215 C-- conjugate direction vector "s".
216 C Note. On the next to loops over all tiles the inner loop ranges
217 C in sNx and sNy are expanded by 1 to avoid a communication
218 C step. However this entails a bit of gynamastics because we only
219 C want eta_qrN for the interior points.
220 eta_qrN = 0. _d 0
221 DO bj=myByLo(myThid),myByHi(myThid)
222 DO bi=myBxLo(myThid),myBxHi(myThid)
223 eta_qrNtile(bi,bj) = 0. _d 0
224 DO K=1,1
225 DO J=1-1,sNy+1
226 DO I=1-1,sNx+1
227 cg3d_q(I,J,K,bi,bj) =
228 & zMC(I ,J ,K,bi,bj)*cg3d_r(I ,J ,K,bi,bj)
229 ENDDO
230 ENDDO
231 ENDDO
232 DO K=2,Nr
233 DO J=1-1,sNy+1
234 DO I=1-1,sNx+1
235 cg3d_q(I,J,K,bi,bj) =
236 & zMC(I,J,K,bi,bj)*(cg3d_r(I,J,K ,bi,bj)
237 & -zML(I,J,K,bi,bj)*cg3d_q(I,J,K-1,bi,bj))
238 ENDDO
239 ENDDO
240 ENDDO
241 DO K=Nr,Nr
242 caja IF (Nr .GT. 1) THEN
243 caja DO J=1-1,sNy+1
244 caja DO I=1-1,sNx+1
245 caja cg3d_q(I,J,K,bi,bj) =
246 caja & zMC(i,j,k,bi,bj)*(cg3d_r(i,j,k ,bi,bj)
247 caja & -zML(i,j,k,bi,bj)*cg3d_q(i,j,k-1,bi,bj))
248 caja ENDDO
249 caja ENDDO
250 caja ENDIF
251 DO J=1,sNy
252 DO I=1,sNx
253 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
254 & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
255 ENDDO
256 ENDDO
257 ENDDO
258 DO K=Nr-1,1,-1
259 DO J=1-1,sNy+1
260 DO I=1-1,sNx+1
261 cg3d_q(I,J,K,bi,bj) =
262 & cg3d_q(I,J,K,bi,bj)
263 & -zMU(I,J,K,bi,bj)*cg3d_q(I,J,K+1,bi,bj)
264 ENDDO
265 ENDDO
266 DO J=1,sNy
267 DO I=1,sNx
268 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
269 & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
270 ENDDO
271 ENDDO
272 ENDDO
273 c eta_qrN = eta_qrN + eta_qrNtile(bi,bj)
274 ENDDO
275 ENDDO
276 caja
277 caja eta_qrN=0.
278 caja DO bj=myByLo(myThid),myByHi(myThid)
279 caja DO bi=myBxLo(myThid),myBxHi(myThid)
280 caja DO K=1,Nr
281 caja DO J=1,sNy
282 caja DO I=1,sNx
283 caja eta_qrN = eta_qrN
284 caja & +cg3d_q(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
285 caja ENDDO
286 caja ENDDO
287 caja ENDDO
288 caja ENDDO
289 caja ENDDO
290 caja
291
292 c _GLOBAL_SUM_RL(eta_qrN, myThid)
293 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
294 CcnhDebugStarts
295 C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' eta_qrN = ',eta_qrN
296 CcnhDebugEnds
297 cgBeta = eta_qrN/eta_qrNM1
298 CcnhDebugStarts
299 C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' beta = ',cgBeta
300 CcnhDebugEnds
301 eta_qrNM1 = eta_qrN
302
303 DO bj=myByLo(myThid),myByHi(myThid)
304 DO bi=myBxLo(myThid),myBxHi(myThid)
305 DO K=1,Nr
306 DO J=1-1,sNy+1
307 DO I=1-1,sNx+1
308 cg3d_s(I,J,K,bi,bj) = cg3d_q(I,J,K,bi,bj)
309 & + cgBeta*cg3d_s(I,J,K,bi,bj)
310 ENDDO
311 ENDDO
312 ENDDO
313 ENDDO
314 ENDDO
315
316 C== Evaluate laplace operator on conjugate gradient vector
317 C== q = A.s
318 alpha = 0. _d 0
319 DO bj=myByLo(myThid),myByHi(myThid)
320 DO bi=myBxLo(myThid),myBxHi(myThid)
321 alphaTile(bi,bj) = 0. _d 0
322 IF ( Nr .GT. 1 ) THEN
323 DO K=1,1
324 DO J=1,sNy
325 DO I=1,sNx
326 cg3d_q(I,J,K,bi,bj) =
327 & aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
328 & +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
329 & +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
330 & +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
331 & +aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K+1,bi,bj)
332 & +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
333 alphaTile(bi,bj) = alphaTile(bi,bj)
334 & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
335 ENDDO
336 ENDDO
337 ENDDO
338 ELSE
339 DO K=1,1
340 DO J=1,sNy
341 DO I=1,sNx
342 cg3d_q(I,J,K,bi,bj) =
343 & aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
344 & +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
345 & +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
346 & +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
347 & +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
348 alphaTile(bi,bj) = alphaTile(bi,bj)
349 & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
350 ENDDO
351 ENDDO
352 ENDDO
353 ENDIF
354 DO K=2,Nr-1
355 DO J=1,sNy
356 DO I=1,sNx
357 cg3d_q(I,J,K,bi,bj) =
358 & aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
359 & +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
360 & +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
361 & +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
362 & +aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K-1,bi,bj)
363 & +aV3d(I ,J ,K+1,bi,bj)*cg3d_s(I ,J ,K+1,bi,bj)
364 & +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
365 alphaTile(bi,bj) = alphaTile(bi,bj)
366 & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
367 ENDDO
368 ENDDO
369 ENDDO
370 IF ( Nr .GT. 1 ) THEN
371 DO K=Nr,Nr
372 DO J=1,sNy
373 DO I=1,sNx
374 cg3d_q(I,J,K,bi,bj) =
375 & aW3d(I ,J ,K ,bi,bj)*cg3d_s(I-1,J ,K ,bi,bj)
376 & +aW3d(I+1,J ,K ,bi,bj)*cg3d_s(I+1,J ,K ,bi,bj)
377 & +aS3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J-1,K ,bi,bj)
378 & +aS3d(I ,J+1,K ,bi,bj)*cg3d_s(I ,J+1,K ,bi,bj)
379 & +aV3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K-1,bi,bj)
380 & +aC3d(I ,J ,K ,bi,bj)*cg3d_s(I ,J ,K ,bi,bj)
381 alphaTile(bi,bj) = alphaTile(bi,bj)
382 & +cg3d_s(I,J,K,bi,bj)*cg3d_q(I,J,K,bi,bj)
383 ENDDO
384 ENDDO
385 ENDDO
386 ENDIF
387 c alpha = alpha + alphaTile(bi,bj)
388 ENDDO
389 ENDDO
390 c _GLOBAL_SUM_RL(alpha,myThid)
391 CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
392 CcnhDebugStarts
393 C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' SUM(s*q)= ',alpha
394 CcnhDebugEnds
395 alpha = eta_qrN/alpha
396 CcnhDebugStarts
397 C WRITE(*,*) ' CG3D: Iteration ',it3d-1,' alpha= ',alpha
398 CcnhDebugEnds
399
400 C== Update solution and residual vectors
401 C Now compute "interior" points.
402 err = 0. _d 0
403 DO bj=myByLo(myThid),myByHi(myThid)
404 DO bi=myBxLo(myThid),myBxHi(myThid)
405 errTile(bi,bj) = 0. _d 0
406 DO K=1,Nr
407 DO J=1,sNy
408 DO I=1,sNx
409 cg3d_x(I,J,K,bi,bj)=cg3d_x(I,J,K,bi,bj)
410 & +alpha*cg3d_s(I,J,K,bi,bj)
411 cg3d_r(I,J,K,bi,bj)=cg3d_r(I,J,K,bi,bj)
412 & -alpha*cg3d_q(I,J,K,bi,bj)
413 errTile(bi,bj) = errTile(bi,bj)
414 & +cg3d_r(I,J,K,bi,bj)*cg3d_r(I,J,K,bi,bj)
415 ENDDO
416 ENDDO
417 ENDDO
418 c err = err + errTile(bi,bj)
419 ENDDO
420 ENDDO
421
422 c _GLOBAL_SUM_RL( err , myThid )
423 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
424 err = SQRT(err)
425 actualIts = it3d
426 actualResidual = err
427 IF ( actualResidual .LT. cg3dTargetResidual ) GOTO 11
428 CALL EXCH_S3D_RL( cg3d_r, Nr, myThid )
429
430 10 CONTINUE
431 11 CONTINUE
432
433 C-- Un-normalise the answer
434 DO bj=myByLo(myThid),myByHi(myThid)
435 DO bi=myBxLo(myThid),myBxHi(myThid)
436 DO K=1,Nr
437 DO J=1,sNy
438 DO I=1,sNx
439 cg3d_x(I,J,K,bi,bj) = cg3d_x(I,J,K,bi,bj)/rhsNorm
440 ENDDO
441 ENDDO
442 ENDDO
443 ENDDO
444 ENDDO
445
446 Cadj _EXCH_XYZ_RL(cg3d_x, myThid )
447 c _BEGIN_MASTER( myThid )
448 c WRITE(*,'(A,I6,1PE30.14)') ' CG3D iters, err = ',
449 c & actualIts, actualResidual
450 c _END_MASTER( myThid )
451 lastResidual=actualResidual
452 numIters=actualIts
453
454 #endif /* ALLOW_NONHYDROSTATIC */
455
456 RETURN
457 END

  ViewVC Help
Powered by ViewVC 1.1.22