/[MITgcm]/MITgcm/model/src/cg2d_nsa.F
ViewVC logotype

Contents of /MITgcm/model/src/cg2d_nsa.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (show annotations) (download)
Tue Jul 17 21:31:52 2012 UTC (11 years, 10 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint63q
Changes since 1.4: +60 -79 lines
cleaning:
 - stop if cg2dNormaliseRHS=T and AUTODIFF
 - replace GLOBAL_SUM calls with GLOBAL_SUM_TILE.

1 C $Header: /u/gcmpack/MITgcm/model/src/cg2d_nsa.F,v 1.4 2012/05/11 23:29:13 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5
6 CML THIS DOES NOT WORK +++++
7 #undef ALLOW_LOOP_DIRECTIVE
8 CBOP
9 C !ROUTINE: CG2D_NSA
10 C !INTERFACE:
11 SUBROUTINE CG2D_NSA(
12 U cg2d_b, cg2d_x,
13 O firstResidual, minResidualSq, lastResidual,
14 U numIters, nIterMin,
15 I myThid )
16 C !DESCRIPTION: \bv
17 C *==========================================================*
18 C | SUBROUTINE CG2D_NSA
19 C | o Two-dimensional grid problem conjugate-gradient
20 C | inverter (with preconditioner).
21 C | o This version is used only in the case when the matrix
22 C | operator is not "self-adjoint" (NSA). Any remaining
23 C | residuals will immediately reported to the department
24 C | of homeland security.
25 C *==========================================================*
26 C | Con. grad is an iterative procedure for solving Ax = b.
27 C | It requires the A be symmetric.
28 C | This implementation assumes A is a five-diagonal
29 C | matrix of the form that arises in the discrete
30 C | representation of the del^2 operator in a
31 C | two-dimensional space.
32 C | Notes:
33 C | ======
34 C | This implementation can support shared-memory
35 C | multi-threaded execution. In order to do this COMMON
36 C | blocks are used for many of the arrays - even ones that
37 C | are only used for intermedaite results. This design is
38 C | OK if you want to all the threads to collaborate on
39 C | solving the same problem. On the other hand if you want
40 C | the threads to solve several different problems
41 C | concurrently this implementation will not work.
42 C *==========================================================*
43 C \ev
44
45 C !USES:
46 IMPLICIT NONE
47 C === Global data ===
48 #include "SIZE.h"
49 #include "EEPARAMS.h"
50 #include "PARAMS.h"
51 #include "CG2D.h"
52 #ifdef ALLOW_AUTODIFF_TAMC
53 # include "tamc.h"
54 # include "tamc_keys.h"
55 #endif
56
57 C !INPUT/OUTPUT PARAMETERS:
58 C === Routine arguments ===
59 C cg2d_b :: The source term or "right hand side" (Output: normalised RHS)
60 C cg2d_x :: The solution (Input: first guess)
61 C firstResidual :: the initial residual before any iterations
62 C minResidualSq :: the lowest residual reached (squared)
63 C lastResidual :: the actual residual reached
64 C numIters :: Inp: the maximum number of iterations allowed
65 C Out: the actual number of iterations used
66 C nIterMin :: Inp: decide to store (if >=0) or not (if <0) lowest res. sol.
67 C Out: iteration number corresponding to lowest residual
68 C myThid :: Thread on which I am working.
69 _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
70 _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
71 _RL firstResidual
72 _RL minResidualSq
73 _RL lastResidual
74 INTEGER numIters
75 INTEGER nIterMin
76 INTEGER myThid
77
78 #ifdef ALLOW_CG2D_NSA
79 C !LOCAL VARIABLES:
80 C === Local variables ====
81 C bi, bj :: tile index in X and Y.
82 C i, j, it2d :: Loop counters ( it2d counts CG iterations )
83 C actualIts :: actual CG iteration number
84 C err_sq :: Measure of the square of the residual of Ax - b.
85 C eta_qrN :: Used in computing search directions; suffix N and NM1
86 C eta_qrNM1 denote current and previous iterations respectively.
87 C recip_eta_qrNM1 :: reciprocal of eta_qrNM1
88 C cgBeta :: coeff used to update conjugate direction vector "s".
89 C alpha :: coeff used to update solution & residual
90 C alphaSum :: to avoid the statement: alpha = 1./alpha (for TAMC/TAF)
91 C sumRHS :: Sum of right-hand-side. Sometimes this is a useful
92 C debugging/trouble shooting diagnostic. For neumann problems
93 C sumRHS needs to be ~0 or it converge at a non-zero residual.
94 C cg2d_min :: used to store solution corresponding to lowest residual.
95 C msgBuf :: Informational/error message buffer
96 INTEGER bi, bj
97 INTEGER i, j, it2d
98 INTEGER actualIts
99 _RL cg2dTolerance_sq
100 _RL err_sq, errTile(nSx,nSy)
101 _RL eta_qrN, eta_qrNtile(nSx,nSy)
102 _RL eta_qrNM1, recip_eta_qrNM1
103 _RL cgBeta, alpha
104 _RL alphaSum,alphaTile(nSx,nSy)
105 _RL sumRHS, sumRHStile(nSx,nSy)
106 _RL rhsMax, rhsNorm
107 CHARACTER*(MAX_LEN_MBUF) msgBuf
108 LOGICAL printResidual
109 CEOP
110
111 #ifdef ALLOW_AUTODIFF_TAMC
112 IF ( numIters .GT. numItersMax ) THEN
113 WRITE(msgBuf,'(A,I10)')
114 & 'CG2D_NSA: numIters > numItersMax =', numItersMax
115 CALL PRINT_ERROR( msgBuf, myThid )
116 STOP 'ABNORMAL END: S/R CG2D_NSA'
117 ENDIF
118 IF ( cg2dNormaliseRHS ) THEN
119 WRITE(msgBuf,'(A)') 'CG2D_NSA: cg2dNormaliseRHS is disabled'
120 CALL PRINT_ERROR( msgBuf, myThid )
121 WRITE(msgBuf,'(A)')
122 & 'set cg2dTargetResWunit (instead of cg2dTargetResidual)'
123 CALL PRINT_ERROR( msgBuf, myThid )
124 STOP 'ABNORMAL END: S/R CG2D_NSA'
125 ENDIF
126 #endif /* ALLOW_AUTODIFF_TAMC */
127
128 #ifdef ALLOW_AUTODIFF_TAMC
129 act1 = myThid - 1
130 max1 = nTx*nTy
131 act2 = ikey_dynamics - 1
132 ikey = (act1 + 1) + act2*max1
133 #endif /* ALLOW_AUTODIFF_TAMC */
134
135 C-- Initialise auxiliary constant, some output variable and inverter
136 cg2dTolerance_sq = cg2dTolerance*cg2dTolerance
137 minResidualSq = -1. _d 0
138 eta_qrNM1 = 1. _d 0
139 recip_eta_qrNM1= 1. _d 0
140
141 C-- Normalise RHS
142 rhsMax = 0. _d 0
143 DO bj=myByLo(myThid),myByHi(myThid)
144 DO bi=myBxLo(myThid),myBxHi(myThid)
145 DO j=1,sNy
146 DO i=1,sNx
147 cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*cg2dNorm
148 rhsMax = MAX(ABS(cg2d_b(i,j,bi,bj)),rhsMax)
149 ENDDO
150 ENDDO
151 ENDDO
152 ENDDO
153
154 #ifndef ALLOW_AUTODIFF_TAMC
155 IF (cg2dNormaliseRHS) THEN
156 C- Normalise RHS :
157 _GLOBAL_MAX_RL( rhsMax, myThid )
158 rhsNorm = 1. _d 0
159 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
160 DO bj=myByLo(myThid),myByHi(myThid)
161 DO bi=myBxLo(myThid),myBxHi(myThid)
162 DO j=1,sNy
163 DO i=1,sNx
164 cg2d_b(i,j,bi,bj) = cg2d_b(i,j,bi,bj)*rhsNorm
165 cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)*rhsNorm
166 ENDDO
167 ENDDO
168 ENDDO
169 ENDDO
170 C- end Normalise RHS
171 ENDIF
172 #endif /* ndef ALLOW_AUTODIFF_TAMC */
173
174 C-- Update overlaps
175 CALL EXCH_XY_RL( cg2d_x, myThid )
176
177 C-- Initial residual calculation
178 #ifdef ALLOW_AUTODIFF_TAMC
179 CADJ STORE cg2d_b = comlev1_cg2d, key = ikey, byte = isbyte
180 CADJ STORE cg2d_x = comlev1_cg2d, key = ikey, byte = isbyte
181 #endif /* ALLOW_AUTODIFF_TAMC */
182 DO bj=myByLo(myThid),myByHi(myThid)
183 DO bi=myBxLo(myThid),myBxHi(myThid)
184 errTile(bi,bj) = 0. _d 0
185 sumRHStile(bi,bj) = 0. _d 0
186 DO j=0,sNy+1
187 DO i=0,sNx+1
188 cg2d_s(i,j,bi,bj) = 0.
189 ENDDO
190 ENDDO
191 DO j=1,sNy
192 DO i=1,sNx
193 cg2d_r(i,j,bi,bj) = cg2d_b(i,j,bi,bj) -
194 & (aW2d(i ,j ,bi,bj)*cg2d_x(i-1,j ,bi,bj)
195 & +aW2d(i+1,j ,bi,bj)*cg2d_x(i+1,j ,bi,bj)
196 & +aS2d(i ,j ,bi,bj)*cg2d_x(i ,j-1,bi,bj)
197 & +aS2d(i ,j+1,bi,bj)*cg2d_x(i ,j+1,bi,bj)
198 & +aC2d(i ,j ,bi,bj)*cg2d_x(i ,j ,bi,bj)
199 & )
200 errTile(bi,bj) = errTile(bi,bj)
201 & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
202 sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(i,j,bi,bj)
203 ENDDO
204 ENDDO
205 ENDDO
206 ENDDO
207
208 c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
209 CALL EXCH_XY_RL ( cg2d_r, myThid )
210 CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
211 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
212 actualIts = 0
213 IF ( err_sq .NE. 0. ) THEN
214 firstResidual = SQRT(err_sq)
215 ELSE
216 firstResidual = 0.
217 ENDIF
218
219 printResidual = .FALSE.
220 IF ( debugLevel .GE. debLevZero ) THEN
221 _BEGIN_MASTER( myThid )
222 printResidual = printResidualFreq.GE.1
223 WRITE(standardmessageunit,'(A,1P2E22.14)')
224 & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
225 _END_MASTER( myThid )
226 ENDIF
227
228 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
229 Cml begin main solver loop
230 #if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
231 CADJ LOOP = iteration, cg2d_x = comlev_cg2d_iter
232 #endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */
233 DO it2d=1, numIters
234 #ifdef ALLOW_LOOP_DIRECTIVE
235 CML it2d = 0
236 CML DO WHILE ( err_sq .GT. cg2dTolerance_sq .and. it2d .LT. numIters )
237 CML it2d = it2d+1
238 #endif /* ALLOW_LOOP_DIRECTIVE */
239
240 #ifdef ALLOW_AUTODIFF_TAMC
241 icg2dkey = (ikey-1)*numItersMax + it2d
242 CADJ STORE err_sq = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
243 #endif /* ALLOW_AUTODIFF_TAMC */
244 IF ( err_sq .GE. cg2dTolerance_sq ) THEN
245
246 C-- Solve preconditioning equation and update
247 C-- conjugate direction vector "s".
248 #ifdef ALLOW_AUTODIFF_TAMC
249 CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
250 CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
251 #endif /* ALLOW_AUTODIFF_TAMC */
252 DO bj=myByLo(myThid),myByHi(myThid)
253 DO bi=myBxLo(myThid),myBxHi(myThid)
254 eta_qrNtile(bi,bj) = 0. _d 0
255 DO j=1,sNy
256 DO i=1,sNx
257 cg2d_z(i,j,bi,bj) =
258 & pC(i ,j ,bi,bj)*cg2d_r(i ,j ,bi,bj)
259 & +pW(i ,j ,bi,bj)*cg2d_r(i-1,j ,bi,bj)
260 & +pW(i+1,j ,bi,bj)*cg2d_r(i+1,j ,bi,bj)
261 & +pS(i ,j ,bi,bj)*cg2d_r(i ,j-1,bi,bj)
262 & +pS(i ,j+1,bi,bj)*cg2d_r(i ,j+1,bi,bj)
263 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
264 & +cg2d_z(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
265 ENDDO
266 ENDDO
267 ENDDO
268 ENDDO
269
270 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
271 #ifdef ALLOW_AUTODIFF_TAMC
272 CMLCADJ STORE eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
273 CADJ STORE recip_eta_qrNM1 = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
274 #endif /* ALLOW_AUTODIFF_TAMC */
275 CML cgBeta = eta_qrN/eta_qrNM1
276 cgBeta = eta_qrN*recip_eta_qrNM1
277 Cml store normalisation factor for the next interation (in case there is one).
278 CML store the inverse of the normalization factor for higher precision
279 CML eta_qrNM1 = eta_qrN
280 recip_eta_qrNM1 = 1. _d 0/eta_qrN
281
282 DO bj=myByLo(myThid),myByHi(myThid)
283 DO bi=myBxLo(myThid),myBxHi(myThid)
284 DO j=1,sNy
285 DO i=1,sNx
286 cg2d_s(i,j,bi,bj) = cg2d_z(i,j,bi,bj)
287 & + cgBeta*cg2d_s(i,j,bi,bj)
288 ENDDO
289 ENDDO
290 ENDDO
291 ENDDO
292
293 C-- Do exchanges that require messages i.e. between
294 C-- processes.
295 c CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
296 CALL EXCH_XY_RL ( cg2d_s, myThid )
297
298 C== Evaluate laplace operator on conjugate gradient vector
299 C== q = A.s
300 #ifdef ALLOW_AUTODIFF_TAMC
301 #ifndef ALLOW_LOOP_DIRECTIVE
302 CADJ STORE cg2d_s = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
303 #endif /* not ALLOW_LOOP_DIRECTIVE */
304 #endif /* ALLOW_AUTODIFF_TAMC */
305 DO bj=myByLo(myThid),myByHi(myThid)
306 DO bi=myBxLo(myThid),myBxHi(myThid)
307 alphaTile(bi,bj) = 0. _d 0
308 DO j=1,sNy
309 DO i=1,sNx
310 cg2d_q(i,j,bi,bj) =
311 & aW2d(i ,j ,bi,bj)*cg2d_s(i-1,j ,bi,bj)
312 & +aW2d(i+1,j ,bi,bj)*cg2d_s(i+1,j ,bi,bj)
313 & +aS2d(i ,j ,bi,bj)*cg2d_s(i ,j-1,bi,bj)
314 & +aS2d(i ,j+1,bi,bj)*cg2d_s(i ,j+1,bi,bj)
315 & +aC2d(i ,j ,bi,bj)*cg2d_s(i ,j ,bi,bj)
316 alphaTile(bi,bj) = alphaTile(bi,bj)
317 & + cg2d_s(i,j,bi,bj)*cg2d_q(i,j,bi,bj)
318 ENDDO
319 ENDDO
320 ENDDO
321 ENDDO
322 CALL GLOBAL_SUM_TILE_RL( alphaTile, alphaSum, myThid )
323 alpha = eta_qrN/alphaSum
324
325 C== Update simultaneously solution and residual vectors (and Iter number)
326 C Now compute "interior" points.
327 #ifdef ALLOW_AUTODIFF_TAMC
328 #ifndef ALLOW_LOOP_DIRECTIVE
329 CADJ STORE cg2d_r = comlev1_cg2d_iter, key = icg2dkey, byte = isbyte
330 #endif /* ALLOW_LOOP_DIRECTIVE */
331 #endif /* ALLOW_AUTODIFF_TAMC */
332 DO bj=myByLo(myThid),myByHi(myThid)
333 DO bi=myBxLo(myThid),myBxHi(myThid)
334 errTile(bi,bj) = 0. _d 0
335 DO j=1,sNy
336 DO i=1,sNx
337 cg2d_x(i,j,bi,bj)=cg2d_x(i,j,bi,bj)+alpha*cg2d_s(i,j,bi,bj)
338 cg2d_r(i,j,bi,bj)=cg2d_r(i,j,bi,bj)-alpha*cg2d_q(i,j,bi,bj)
339 errTile(bi,bj) = errTile(bi,bj)
340 & + cg2d_r(i,j,bi,bj)*cg2d_r(i,j,bi,bj)
341 ENDDO
342 ENDDO
343 ENDDO
344 ENDDO
345 actualIts = it2d
346
347 CALL GLOBAL_SUM_TILE_RL( errTile, err_sq, myThid )
348 IF ( printResidual ) THEN
349 IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
350 WRITE(msgBuf,'(A,I6,A,1PE21.14)')
351 & ' cg2d: iter=', it2d, ' ; resid.= ', SQRT(err_sq)
352 CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
353 & SQUEEZE_RIGHT, myThid )
354 ENDIF
355 ENDIF
356
357 c CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
358 CALL EXCH_XY_RL ( cg2d_r, myThid )
359
360 Cml end of if "err >= cg2dTolerance" block ; end main solver loop
361 ENDIF
362 ENDDO
363
364 #ifndef ALLOW_AUTODIFF_TAMC
365 IF (cg2dNormaliseRHS) THEN
366 C-- Un-normalise the answer
367 DO bj=myByLo(myThid),myByHi(myThid)
368 DO bi=myBxLo(myThid),myBxHi(myThid)
369 DO j=1,sNy
370 DO i=1,sNx
371 cg2d_x(i,j,bi,bj) = cg2d_x(i,j,bi,bj)/rhsNorm
372 ENDDO
373 ENDDO
374 ENDDO
375 ENDDO
376 ENDIF
377 #endif /* ndef ALLOW_AUTODIFF_TAMC */
378
379 C-- Return parameters to caller
380 IF ( err_sq .NE. 0. ) THEN
381 lastResidual = SQRT(err_sq)
382 ELSE
383 lastResidual = 0.
384 ENDIF
385 numIters = actualIts
386
387 #endif /* ALLOW_CG2D_NSA */
388 RETURN
389 END
390
391 #if ((defined ALLOW_AUTODIFF_TAMC) && (defined ALLOW_LOOP_DIRECTIVE))
392
393 C These routines are routinely part of the TAMC/TAF library that is
394 C not included in the MITcgm, therefore they are mimicked here.
395
396 subroutine adstore(chardum,int1,idow,int2,int3,icount)
397
398 implicit none
399
400 #include "SIZE.h"
401 #include "tamc.h"
402
403 character*(*) chardum
404 integer int1, int2, int3, idow, icount
405
406 C the length of this vector must be greater or equal
407 C twice the number of timesteps
408 integer nidow
409 #ifdef ALLOW_TAMC_CHECKPOINTING
410 parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
411 #else
412 parameter ( nidow = 1000000 )
413 #endif /* ALLOW_TAMC_CHECKPOINTING */
414 integer istoreidow(nidow)
415 common /istorecommon/ istoreidow
416
417 print *, 'adstore: ', chardum, int1, idow, int2, int3, icount
418
419 if ( icount .gt. nidow ) then
420 print *, 'adstore: error: icount > nidow = ', nidow
421 stop 'ABNORMAL STOP in adstore'
422 endif
423
424 istoreidow(icount) = idow
425
426 return
427 end
428
429 subroutine adresto(chardum,int1,idow,int2,int3,icount)
430
431 implicit none
432
433 #include "SIZE.h"
434 #include "tamc.h"
435
436 character*(*) chardum
437 integer int1, int2, int3, idow, icount
438
439 C the length of this vector must be greater or equal
440 C twice the number of timesteps
441 integer nidow
442 #ifdef ALLOW_TAMC_CHECKPOINTING
443 parameter ( nidow = 2*nchklev_1*nchklev_2*nchklev_3 )
444 #else
445 parameter ( nidow = 1000000 )
446 #endif /* ALLOW_TAMC_CHECKPOINTING */
447 integer istoreidow(nidow)
448 common /istorecommon/ istoreidow
449
450 print *, 'adresto: ', chardum, int1, idow, int2, int3, icount
451
452 if ( icount .gt. nidow ) then
453 print *, 'adstore: error: icount > nidow = ', nidow
454 stop 'ABNORMAL STOP in adstore'
455 endif
456
457 idow = istoreidow(icount)
458
459 return
460 end
461 #endif /* ALLOW_AUTODIFF_TAMC and ALLOW_LOOP_DIRECTIVE */

  ViewVC Help
Powered by ViewVC 1.1.22