/[MITgcm]/MITgcm/model/src/cg2d_sr.F
ViewVC logotype

Contents of /MITgcm/model/src/cg2d_sr.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (show annotations) (download)
Fri May 11 23:28:48 2012 UTC (11 years, 11 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint64y, checkpoint64x, checkpoint64z, checkpoint64q, checkpoint64p, checkpoint64s, checkpoint64r, checkpoint64u, checkpoint64t, checkpoint64w, checkpoint64v, checkpoint64i, checkpoint64h, checkpoint64k, checkpoint64j, checkpoint64m, checkpoint64l, checkpoint64o, checkpoint64n, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, checkpoint63p, checkpoint63q, checkpoint63r, checkpoint63s, checkpoint63n, checkpoint63o, checkpoint64, checkpoint65, checkpoint66g, checkpoint66f, checkpoint66e, checkpoint66d, checkpoint66c, checkpoint66b, checkpoint66a, checkpoint66o, checkpoint66n, checkpoint66m, checkpoint66l, checkpoint66k, checkpoint66j, checkpoint66i, checkpoint66h, checkpoint65z, checkpoint65x, checkpoint65y, checkpoint65r, checkpoint65s, checkpoint65p, checkpoint65q, checkpoint65v, checkpoint65w, checkpoint65t, checkpoint65u, checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65n, checkpoint65o, checkpoint65l, checkpoint65m, checkpoint65b, checkpoint65c, checkpoint65a, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e, HEAD
Changes since 1.5: +185 -154 lines
- reduce by 1 (now: cg2dMaxIters-1) the upper bound of iteration loop
- if going over cg2dMaxIters-1, add a final update of the global-residual
- allow to store and use lowest-residual solution

1 C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.5 2011/12/22 19:00:58 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5 #ifdef TARGET_NEC_SX
6 C set a sensible default for the outer loop unrolling parameter that can
7 C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8 #ifndef CG2D_OUTERLOOPITERS
9 # define CG2D_OUTERLOOPITERS 10
10 #endif
11 #endif /* TARGET_NEC_SX */
12
13 CBOP
14 C !ROUTINE: CG2D_SR
15 C !INTERFACE:
16 SUBROUTINE CG2D_SR(
17 U cg2d_b, cg2d_x,
18 O firstResidual, minResidualSq, lastResidual,
19 U numIters, nIterMin,
20 I myThid )
21 C !DESCRIPTION: \bv
22 C *==========================================================*
23 C | SUBROUTINE CG2D
24 C | o Two-dimensional grid problem conjugate-gradient
25 C | inverter (with preconditioner).
26 C *==========================================================*
27 C | Con. grad is an iterative procedure for solving Ax = b.
28 C | It requires the A be symmetric.
29 C | This implementation assumes A is a five-diagonal
30 C | matrix of the form that arises in the discrete
31 C | representation of the del^2 operator in a
32 C | two-dimensional space.
33 C | Notes:
34 C | ======
35 C | This implementation can support shared-memory
36 C | multi-threaded execution. In order to do this COMMON
37 C | blocks are used for many of the arrays - even ones that
38 C | are only used for intermedaite results. This design is
39 C | OK if you want to all the threads to collaborate on
40 C | solving the same problem. On the other hand if you want
41 C | the threads to solve several different problems
42 C | concurrently this implementation will not work.
43 C |
44 C | This version implements the single-reduction CG algorithm of
45 C | d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
46 C | C. Wolfe, November 2009, clwolfe@ucsd.edu
47 C *==========================================================*
48 C \ev
49
50 C !USES:
51 IMPLICIT NONE
52 C === Global data ===
53 #include "SIZE.h"
54 #include "EEPARAMS.h"
55 #include "PARAMS.h"
56 #include "CG2D.h"
57
58 C !INPUT/OUTPUT PARAMETERS:
59 C === Routine arguments ===
60 C cg2d_b :: The source term or "right hand side" (output: normalised RHS)
61 C cg2d_x :: The solution (input: first guess)
62 C firstResidual :: the initial residual before any iterations
63 C minResidualSq :: the lowest residual reached (squared)
64 C lastResidual :: the actual residual reached
65 C numIters :: Inp: the maximum number of iterations allowed
66 C Out: the actual number of iterations used
67 C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
68 C Out: iteration number corresponding to lowest residual
69 C myThid :: Thread on which I am working.
70 _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
71 _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
72 _RL firstResidual
73 _RL minResidualSq
74 _RL lastResidual
75 INTEGER numIters
76 INTEGER nIterMin
77 INTEGER myThid
78
79 #ifdef ALLOW_SRCG
80
81 C !LOCAL VARIABLES:
82 C === Local variables ====
83 C bi, bj :: tile index in X and Y.
84 C i, j, it2d :: Loop counters ( it2d counts CG iterations )
85 C actualIts :: actual CG iteration number
86 C err_sq :: Measure of the square of the residual of Ax - b.
87 C eta_qrN :: Used in computing search directions; suffix N and NM1
88 C eta_qrNM1 denote current and previous iterations respectively.
89 C cgBeta :: coeff used to update conjugate direction vector "s".
90 C alpha :: coeff used to update solution & residual
91 C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
92 C debugging/trouble shooting diagnostic. For neumann problems
93 C sumRHS needs to be ~0 or it converge at a non-zero residual.
94 C cg2d_min :: used to store solution corresponding to lowest residual.
95 C msgBuf :: Informational/error message buffer
96 INTEGER bi, bj
97 INTEGER i, j, it2d
98 c INTEGER actualIts
99 _RL cg2dTolerance_sq
100 _RL err_sq, errTile(nSx,nSy)
101 _RL eta_qrN, eta_qrNtile(nSx,nSy)
102 _RL eta_qrNM1
103 _RL cgBeta
104 _RL alpha, alphaTile(nSx,nSy)
105 _RL sigma
106 _RL delta, deltaTile(nSx,nSy)
107 _RL sumRHS, sumRHStile(nSx,nSy)
108 _RL rhsMax
109 _RL rhsNorm
110 _RL cg2d_min(1:sNx,1:sNy,nSx,nSy)
111 CHARACTER*(MAX_LEN_MBUF) msgBuf
112 LOGICAL printResidual
113 CEOP
114
115 C-- Initialise auxiliary constant, some output variable and inverter
116 cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
117 minResidualSq = -1. _d 0
118 eta_qrNM1 = 1. _d 0
119
120 C-- Normalise RHS
121 rhsMax = 0. _d 0
122 DO bj=myByLo(myThid),myByHi(myThid)
123 DO bi=myBxLo(myThid),myBxHi(myThid)
124 DO j=1,sNy
125 DO i=1,sNx
126 cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
127 rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
128 ENDDO
129 ENDDO
130 ENDDO
131 ENDDO
132
133 IF (cg2dNormaliseRHS) THEN
134 C- Normalise RHS :
135 _GLOBAL_MAX_RL( rhsMax, myThid )
136 rhsNorm = 1. _d 0
137 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
138 DO bj=myByLo(myThid),myByHi(myThid)
139 DO bi=myBxLo(myThid),myBxHi(myThid)
140 DO j=1,sNy
141 DO i=1,sNx
142 cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
143 cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
144 ENDDO
145 ENDDO
146 ENDDO
147 ENDDO
148 C- end Normalise RHS
149 ENDIF
150
151 C-- Update overlaps
152 CALL EXCH_XY_RL( cg2d_x, myThid )
153
154 C-- Initial residual calculation
155 DO bj=myByLo(myThid),myByHi(myThid)
156 DO bi=myBxLo(myThid),myBxHi(myThid)
157 IF ( nIterMin.GE.0 ) THEN
158 DO j=1,sNy
159 DO i=1,sNx
160 cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
161 ENDDO
162 ENDDO
163 ENDIF
164 DO j=0,sNy+1
165 DO i=0,sNx+1
166 cg2d_s(i,j,bi,bj) = 0.
167 ENDDO
168 ENDDO
169 sumRHStile(bi,bj) = 0. _d 0
170 errTile(bi,bj) = 0. _d 0
171 #ifdef TARGET_NEC_SX
172 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
173 #endif /* TARGET_NEC_SX */
174 DO j=1,sNy
175 DO i=1,sNx
176 cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
177 & (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
178 & +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
179 & +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
180 & +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
181 & +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
182 & )
183 errTile(bi,bj) = errTile(bi,bj)
184 & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
185 sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
186 ENDDO
187 ENDDO
188 ENDDO
189 ENDDO
190
191 CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
192
193 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
194 CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
195
196 it2d = 0
197 c actualIts = 0
198 firstResidual = SQRT(err_sq)
199 IF ( nIterMin.GE.0 ) THEN
200 nIterMin = 0
201 minResidualSq = err_sq
202 ENDIF
203
204 printResidual = .FALSE.
205 IF ( debugLevel .GE. debLevZero ) THEN
206 _BEGIN_MASTER( myThid )
207 printResidual = printResidualFreq.GE.1
208 WRITE(standardmessageunit,'(A,1P2E22.14)')
209 & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
210 _END_MASTER( myThid )
211 ENDIF
212
213 IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
214
215 C DER (1999) do one iteration outside of the loop to start things up.
216 C-- Solve preconditioning equation and update
217 C-- conjugate direction vector "s".
218 DO bj=myByLo(myThid),myByHi(myThid)
219 DO bi=myBxLo(myThid),myBxHi(myThid)
220 eta_qrNtile(bi,bj) = 0. _d 0
221 #ifdef TARGET_NEC_SX
222 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
223 #endif /* TARGET_NEC_SX */
224 DO j=1,sNy
225 DO i=1,sNx
226 cg2d_y(i,j,bi,bj) =
227 & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
228 & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
229 & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
230 & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
231 & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
232 cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj)
233 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
234 & +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
235 ENDDO
236 ENDDO
237 ENDDO
238 ENDDO
239
240 CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
241 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
242
243 eta_qrNM1 = eta_qrN
244
245 C== Evaluate laplace operator on conjugate gradient vector
246 C== q = A.s
247 DO bj=myByLo(myThid),myByHi(myThid)
248 DO bi=myBxLo(myThid),myBxHi(myThid)
249 alphaTile(bi,bj) = 0. _d 0
250 #ifdef TARGET_NEC_SX
251 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
252 #endif /* TARGET_NEC_SX */
253 DO j=1,sNy
254 DO i=1,sNx
255 cg2d_q(i,j,bi,bj) =
256 & aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
257 & +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
258 & +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
259 & +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
260 & +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
261 alphaTile(bi,bj) = alphaTile(bi,bj)
262 & + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
263 ENDDO
264 ENDDO
265 ENDDO
266 ENDDO
267
268 CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
269
270 sigma = eta_qrN/alpha
271
272 C== Update simultaneously solution and residual vectors (and Iter number)
273 C Now compute "interior" points.
274 DO bj=myByLo(myThid),myByHi(myThid)
275 DO bi=myBxLo(myThid),myBxHi(myThid)
276 DO j=1,sNy
277 DO i=1,sNx
278 cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+sigma*cg2d_s(i,j,bi,bj)
279 cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-sigma*cg2d_q(i,j,bi,bj)
280 ENDDO
281 ENDDO
282 ENDDO
283 ENDDO
284 c actualIts = 1
285
286 CALL EXCH_S3D_RL( cg2d_r,1, myThid )
287
288 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
289 DO 10 it2d=1, numIters-1
290 c actualIts = it2d
291
292 C-- Solve preconditioning equation and update
293 C-- conjugate direction vector "s".
294 C-- z = M^-1 r
295 DO bj=myByLo(myThid),myByHi(myThid)
296 DO bi=myBxLo(myThid),myBxHi(myThid)
297 #ifdef TARGET_NEC_SX
298 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
299 #endif /* TARGET_NEC_SX */
300 DO j=1,sNy
301 DO i=1,sNx
302 cg2d_y(i,j,bi,bj) =
303 & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
304 & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
305 & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
306 & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
307 & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
308 ENDDO
309 ENDDO
310 ENDDO
311 ENDDO
312
313 CALL EXCH_S3D_RL( cg2d_y, 1, myThid )
314
315 C== v = A.z
316 C-- eta_qr = <z,r>
317 C-- delta = <z,v>
318 C-- Do the error calcuation here to consolidate global reductions
319 DO bj=myByLo(myThid),myByHi(myThid)
320 DO bi=myBxLo(myThid),myBxHi(myThid)
321 eta_qrNtile(bi,bj) = 0. _d 0
322 deltaTile(bi,bj) = 0. _d 0
323 errTile(bi,bj) = 0. _d 0
324 #ifdef TARGET_NEC_SX
325 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
326 #endif /* TARGET_NEC_SX */
327 DO j=1,sNy
328 DO i=1,sNx
329 cg2d_v(i,j,bi,bj) =
330 & aW2d(i ,j ,bi,bj)*cg2d_y(i-1,j ,bi,bj)
331 & +aW2d(i+1,j ,bi,bj)*cg2d_y(i+1,j ,bi,bj)
332 & +aS2d(i ,j ,bi,bj)*cg2d_y(i ,j-1,bi,bj)
333 & +aS2d(i ,j+1,bi,bj)*cg2d_y(i ,j+1,bi,bj)
334 & +aC2d(i ,j ,bi,bj)*cg2d_y(i ,j ,bi,bj)
335 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
336 & +cg2d_y(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
337 deltaTile(bi,bj) = deltaTile(bi,bj)
338 & +cg2d_y(i,j,bi,bj)*cg2d_v(i,j,bi,bj)
339 errTile(bi,bj) = errTile(bi,bj)
340 & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
341 ENDDO
342 ENDDO
343 sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
344 sumPhi(2,bi,bj) = deltaTile(bi,bj)
345 sumPhi(3,bi,bj) = errTile(bi,bj)
346 ENDDO
347 ENDDO
348
349 C CALL GLOBAL_SUM_TILE_RL( eta_qrNtile, eta_qrN,myThid )
350 C CALL GLOBAL_SUM_TILE_RL( deltaTile, delta, myThid )
351 C CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
352 CALL GLOBAL_VEC_SUM_R8( 3, 3, sumPhi, myThid )
353
354 eta_qrN = sumPhi(1,1,1)
355 delta = sumPhi(2,1,1)
356 err_sq = sumPhi(3,1,1)
357
358 IF ( printResidual ) THEN
359 IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
360 WRITE(msgBuf,'(A,I6,A,1PE21.14)')
361 & ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
362 CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
363 & SQUEEZE_RIGHT, myThid )
364 ENDIF
365 ENDIF
366 IF ( err_sq .LT. cg2dTolerance_sq ) GOTO 11
367 IF ( err_sq .LT. minResidualSq ) THEN
368 C- Store lowest residual solution
369 minResidualSq = err_sq
370 nIterMin = it2d
371 DO bj=myByLo(myThid),myByHi(myThid)
372 DO bi=myBxLo(myThid),myBxHi(myThid)
373 DO j=1,sNy
374 DO i=1,sNx
375 cg2d_min(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
376 ENDDO
377 ENDDO
378 ENDDO
379 ENDDO
380 ENDIF
381
382 cgBeta = eta_qrN/eta_qrNM1
383 eta_qrNM1 = eta_qrN
384 alpha = delta - (cgBeta*cgBeta)*alpha
385 sigma = eta_qrN/alpha
386
387 C== Update simultaneously solution and residual vectors (and Iter number)
388 DO bj=myByLo(myThid),myByHi(myThid)
389 DO bi=myBxLo(myThid),myBxHi(myThid)
390 DO j=1,sNy
391 DO i=1,sNx
392 cg2d_s(i,j,bi,bj) = cg2d_y(i,j,bi,bj)
393 & + cgBeta*cg2d_s(i,j,bi,bj)
394 cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)
395 & + sigma*cg2d_s(i,j,bi,bj)
396 cg2d_q(i,j,bi,bj) = cg2d_v(i,j,bi,bj)
397 & + cgBeta*cg2d_q(i,j,bi,bj)
398 cg2d_r(i,j,bi,bj) = cg2d_r(i,j,bi,bj)
399 & - sigma*cg2d_q(i,j,bi,bj)
400 ENDDO
401 ENDDO
402 ENDDO
403 ENDDO
404 c actualIts = it2d + 1
405
406 CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
407
408 10 CONTINUE
409 DO bj=myByLo(myThid),myByHi(myThid)
410 DO bi=myBxLo(myThid),myBxHi(myThid)
411 errTile(bi,bj) = 0. _d 0
412 DO j=1,sNy
413 DO i=1,sNx
414 errTile(bi,bj) = errTile(bi,bj)
415 & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
416 ENDDO
417 ENDDO
418 ENDDO
419 ENDDO
420 CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
421 11 CONTINUE
422
423 IF ( nIterMin.GE.0 .AND. err_sq .GT. minResidualSq ) THEN
424 C- use the lowest residual solution (instead of current one = last residual)
425 DO bj=myByLo(myThid),myByHi(myThid)
426 DO bi=myBxLo(myThid),myBxHi(myThid)
427 DO j=1,sNy
428 DO i=1,sNx
429 cg2d_x(i,j,bi,bj) = cg2d_min(i,j,bi,bj)
430 ENDDO
431 ENDDO
432 ENDDO
433 ENDDO
434 ENDIF
435
436 IF (cg2dNormaliseRHS) THEN
437 C-- Un-normalise the answer
438 DO bj=myByLo(myThid),myByHi(myThid)
439 DO bi=myBxLo(myThid),myBxHi(myThid)
440 DO j=1,sNy
441 DO i=1,sNx
442 cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
443 ENDDO
444 ENDDO
445 ENDDO
446 ENDDO
447 ENDIF
448
449 C-- Return parameters to caller
450 lastResidual = SQRT(err_sq)
451 numIters = it2d
452 c numIters = actualIts
453
454 #endif /* ALLOW_SRCG */
455 RETURN
456 END

  ViewVC Help
Powered by ViewVC 1.1.22