/[MITgcm]/MITgcm/model/src/cg2d_sr.F
ViewVC logotype

Contents of /MITgcm/model/src/cg2d_sr.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download)
Wed Jun 8 01:46:34 2011 UTC (12 years, 10 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint63d, checkpoint63e, checkpoint63f, checkpoint63g, checkpoint63a, checkpoint63b, checkpoint63c, checkpoint63, checkpoint62z
Changes since 1.3: +10 -9 lines
use new parameter "printResidualFreq" to print residual in CG iterations

1 C $Header: /u/gcmpack/MITgcm/model/src/cg2d_sr.F,v 1.3 2010/03/16 00:08:27 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5 #ifdef TARGET_NEC_SX
6 C set a sensible default for the outer loop unrolling parameter that can
7 C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8 #ifndef CG2D_OUTERLOOPITERS
9 # define CG2D_OUTERLOOPITERS 10
10 #endif
11 #endif /* TARGET_NEC_SX */
12
13 CBOP
14 C !ROUTINE: CG2D_SR
15 C !INTERFACE:
16 SUBROUTINE CG2D_SR(
17 I cg2d_b,
18 U cg2d_x,
19 O firstResidual,
20 O lastResidual,
21 U numIters,
22 I myThid )
23 C !DESCRIPTION: \bv
24 C *==========================================================*
25 C | SUBROUTINE CG2D
26 C | o Two-dimensional grid problem conjugate-gradient
27 C | inverter (with preconditioner).
28 C *==========================================================*
29 C | Con. grad is an iterative procedure for solving Ax = b.
30 C | It requires the A be symmetric.
31 C | This implementation assumes A is a five-diagonal
32 C | matrix of the form that arises in the discrete
33 C | representation of the del^2 operator in a
34 C | two-dimensional space.
35 C | Notes:
36 C | ======
37 C | This implementation can support shared-memory
38 C | multi-threaded execution. In order to do this COMMON
39 C | blocks are used for many of the arrays - even ones that
40 C | are only used for intermedaite results. This design is
41 C | OK if you want to all the threads to collaborate on
42 C | solving the same problem. On the other hand if you want
43 C | the threads to solve several different problems
44 C | concurrently this implementation will not work.
45 C |
46 C | This version implements the single-reduction CG algorithm of
47 C | d Azevedo, Eijkhout, and Romine (Lapack Working Note 56, 1999).
48 C | C. Wolfe, November 2009, clwolfe@ucsd.edu
49 C *==========================================================*
50 C \ev
51
52 C !USES:
53 IMPLICIT NONE
54 C === Global data ===
55 #include "SIZE.h"
56 #include "EEPARAMS.h"
57 #include "PARAMS.h"
58 #include "CG2D.h"
59 c#include "GRID.h"
60 c#include "SURFACE.h"
61
62 C !INPUT/OUTPUT PARAMETERS:
63 C === Routine arguments ===
64 C myThid :: Thread on which I am working.
65 C cg2d_b :: The source term or "right hand side"
66 C cg2d_x :: The solution
67 C firstResidual :: the initial residual before any iterations
68 C lastResidual :: the actual residual reached
69 C numIters :: Entry: the maximum number of iterations allowed
70 C Exit: the actual number of iterations used
71 _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
72 _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
73 _RL firstResidual
74 _RL lastResidual
75 INTEGER numIters
76 INTEGER myThid
77
78 #ifdef ALLOW_SRCG
79
80 C !LOCAL VARIABLES:
81 C === Local variables ====
82 C actualIts :: Number of iterations taken
83 C actualResidual :: residual
84 C bi, bj :: Block index in X and Y.
85 C eta_qrN :: Used in computing search directions
86 C eta_qrNM1 suffix N and NM1 denote current and
87 C cgBeta previous iterations respectively.
88 C alpha
89 C sumRHS :: Sum of right-hand-side. Sometimes this is a
90 C useful debuggin/trouble shooting diagnostic.
91 C For neumann problems sumRHS needs to be ~0.
92 C or they converge at a non-zero residual.
93 C err :: Measure of residual of Ax - b, usually the norm.
94 C I, J, it2d :: Loop counters ( it2d counts CG iterations )
95 INTEGER actualIts
96 _RL actualResidual
97 INTEGER bi, bj
98 INTEGER I, J, it2d
99 _RL err, errTile(nSx,nSy)
100 _RL eta_qrN,eta_qrNtile(nSx,nSy)
101 _RL eta_qrNM1
102 _RL cgBeta
103 _RL alpha, alphaTile(nSx,nSy)
104 _RL sigma, sigmaTile(nSx,nSy)
105 _RL delta, deltaTile(nSx,nSy)
106 _RL sumRHS, sumRHStile(nSx,nSy)
107 _RL rhsMax
108 _RL rhsNorm
109 CHARACTER*(MAX_LEN_MBUF) msgBuf
110 LOGICAL printResidual
111 CEOP
112
113
114 C-- Initialise inverter
115 eta_qrNM1 = 1. _d 0
116
117 C-- Normalise RHS
118 rhsMax = 0. _d 0
119 DO bj=myByLo(myThid),myByHi(myThid)
120 DO bi=myBxLo(myThid),myBxHi(myThid)
121 DO J=1,sNy
122 DO I=1,sNx
123 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
124 rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
125 ENDDO
126 ENDDO
127 ENDDO
128 ENDDO
129
130 IF (cg2dNormaliseRHS) THEN
131 C- Normalise RHS :
132 _GLOBAL_MAX_RL( rhsMax, myThid )
133 rhsNorm = 1. _d 0
134 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
135 DO bj=myByLo(myThid),myByHi(myThid)
136 DO bi=myBxLo(myThid),myBxHi(myThid)
137 DO J=1,sNy
138 DO I=1,sNx
139 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
140 cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
141 ENDDO
142 ENDDO
143 ENDDO
144 ENDDO
145 C- end Normalise RHS
146 ENDIF
147
148 C-- Update overlaps
149 CALL EXCH_XY_RL( cg2d_x, myThid )
150
151 C-- Initial residual calculation
152 err = 0. _d 0
153 sumRHS = 0. _d 0
154 DO bj=myByLo(myThid),myByHi(myThid)
155 DO bi=myBxLo(myThid),myBxHi(myThid)
156 DO J=1-1,sNy+1
157 DO I=1-1,sNx+1
158 cg2d_s(I,J,bi,bj) = 0.
159 ENDDO
160 ENDDO
161 sumRHStile(bi,bj) = 0. _d 0
162 errTile(bi,bj) = 0. _d 0
163 #ifdef TARGET_NEC_SX
164 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
165 #endif /* TARGET_NEC_SX */
166 DO J=1,sNy
167 DO I=1,sNx
168 cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
169 & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
170 & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
171 & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
172 & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
173 & +aC2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
174 & )
175 errTile(bi,bj) = errTile(bi,bj)
176 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
177 sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(I,J,bi,bj)
178 ENDDO
179 ENDDO
180 ENDDO
181 ENDDO
182
183 CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
184
185 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
186 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
187
188 err = SQRT(err)
189 actualIts = 0
190 actualResidual = err
191
192 printResidual = .FALSE.
193 IF ( debugLevel .GE. debLevZero ) THEN
194 _BEGIN_MASTER( myThid )
195 printResidual = printResidualFreq.GE.1
196 WRITE(standardmessageunit,'(A,1P2E22.14)')
197 & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
198 _END_MASTER( myThid )
199 ENDIF
200 firstResidual=actualResidual
201
202 IF ( err .LT. cg2dTolerance ) GOTO 11
203
204 C DER (1999) do one iteration outside of the loop to start things up.
205 C-- Solve preconditioning equation and update
206 C-- conjugate direction vector "s".
207 eta_qrN = 0. _d 0
208 DO bj=myByLo(myThid),myByHi(myThid)
209 DO bi=myBxLo(myThid),myBxHi(myThid)
210 eta_qrNtile(bi,bj) = 0. _d 0
211 #ifdef TARGET_NEC_SX
212 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
213 #endif /* TARGET_NEC_SX */
214 DO J=1,sNy
215 DO I=1,sNx
216 cg2d_y(I,J,bi,bj) =
217 & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
218 & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
219 & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
220 & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
221 & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
222 cg2d_s(I,J,bi,bj) = cg2d_y(I,J,bi,bj)
223 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
224 & +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
225 ENDDO
226 ENDDO
227 ENDDO
228 ENDDO
229
230 CALL EXCH_S3D_RL( cg2d_s, 1, myThid )
231 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
232
233 eta_qrNM1 = eta_qrN
234
235 C== Evaluate laplace operator on conjugate gradient vector
236 C== q = A.s
237 alpha = 0. _d 0
238 DO bj=myByLo(myThid),myByHi(myThid)
239 DO bi=myBxLo(myThid),myBxHi(myThid)
240 alphaTile(bi,bj) = 0. _d 0
241 #ifdef TARGET_NEC_SX
242 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
243 #endif /* TARGET_NEC_SX */
244 DO J=1,sNy
245 DO I=1,sNx
246 cg2d_q(I,J,bi,bj) =
247 & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj)
248 & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj)
249 & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj)
250 & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj)
251 & +aC2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
252 alphaTile(bi,bj) = alphaTile(bi,bj)
253 & + cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
254 ENDDO
255 ENDDO
256 ENDDO
257 ENDDO
258
259 CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
260
261 sigma = eta_qrN/alpha
262
263 C== Update solution and residual vectors
264 C Now compute "interior" points.
265 DO bj=myByLo(myThid),myByHi(myThid)
266 DO bi=myBxLo(myThid),myBxHi(myThid)
267 errTile(bi,bj) = 0. _d 0
268 DO J=1,sNy
269 DO I=1,sNx
270 cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+sigma*cg2d_s(I,J,bi,bj)
271 cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-sigma*cg2d_q(I,J,bi,bj)
272 ENDDO
273 ENDDO
274 ENDDO
275 ENDDO
276
277 CALL EXCH_S3D_RL( cg2d_r,1, myThid )
278
279 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
280 DO 10 it2d=1, numIters
281
282 C-- Solve preconditioning equation and update
283 C-- conjugate direction vector "s".
284 C-- z = M^-1 r
285 DO bj=myByLo(myThid),myByHi(myThid)
286 DO bi=myBxLo(myThid),myBxHi(myThid)
287 #ifdef TARGET_NEC_SX
288 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
289 #endif /* TARGET_NEC_SX */
290 DO J=1,sNy
291 DO I=1,sNx
292 cg2d_y(I,J,bi,bj) =
293 & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
294 & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
295 & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
296 & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
297 & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
298 ENDDO
299 ENDDO
300 ENDDO
301 ENDDO
302
303 CALL EXCH_S3D_RL( cg2d_y, 1, myThid )
304
305 C== v = A.z
306 C-- eta_qr = <z,r>
307 C-- delta = <z,v>
308 C-- Do the error calcuation here to consolidate global reductions
309 eta_qrN = 0. _d 0
310 delta = 0. _d 0
311 err = 0. _d 0
312 DO bj=myByLo(myThid),myByHi(myThid)
313 DO bi=myBxLo(myThid),myBxHi(myThid)
314 eta_qrNtile(bi,bj) = 0. _d 0
315 deltaTile(bi,bj) = 0. _d 0
316 errTile(bi,bj) = 0. _d 0
317 #ifdef TARGET_NEC_SX
318 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
319 #endif /* TARGET_NEC_SX */
320 DO J=1,sNy
321 DO I=1,sNx
322 cg2d_v(I,J,bi,bj) =
323 & aW2d(I ,J ,bi,bj)*cg2d_y(I-1,J ,bi,bj)
324 & +aW2d(I+1,J ,bi,bj)*cg2d_y(I+1,J ,bi,bj)
325 & +aS2d(I ,J ,bi,bj)*cg2d_y(I ,J-1,bi,bj)
326 & +aS2d(I ,J+1,bi,bj)*cg2d_y(I ,J+1,bi,bj)
327 & +aC2d(I ,J ,bi,bj)*cg2d_y(I ,J ,bi,bj)
328 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
329 & +cg2d_y(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
330 deltaTile(bi,bj) = deltaTile(bi,bj)
331 & +cg2d_y(I,J,bi,bj)*cg2d_v(I,J,bi,bj)
332 errTile(bi,bj) = errTile(bi,bj)
333 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
334 ENDDO
335 ENDDO
336 ENDDO
337 ENDDO
338
339 C CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
340 C CALL GLOBAL_SUM_TILE_RL( deltaTile,delta,myThid )
341 C CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
342 DO bj=myByLo(myThid),myByHi(myThid)
343 DO bi=myBxLo(myThid),myBxHi(myThid)
344 sumPhi(1,bi,bj) = eta_qrNtile(bi,bj)
345 sumPhi(2,bi,bj) = deltaTile(bi,bj)
346 sumPhi(3,bi,bj) = errTile(bi,bj)
347 ENDDO
348 ENDDO
349
350 C global_vec_sum_r8 does not call BAR2 on input
351 CALL BAR2( myThid)
352 CALL GLOBAL_VEC_SUM_R8(3,3,sumPhi,myThid)
353
354 eta_qrN = sumPhi(1,1,1)
355 delta = sumPhi(2,1,1)
356 err = sumPhi(3,1,1)
357
358 err = SQRT(err)
359 actualIts = it2d
360 actualResidual = err
361 IF ( printResidual ) THEN
362 IF ( MOD( it2d-1, printResidualFreq ).EQ.0 ) THEN
363 WRITE(msgBuf,'(A,I6,A,1PE21.14)')
364 & ' cg2d: iter=', actualIts, ' ; resid.= ', actualResidual
365 CALL PRINT_MESSAGE( msgBuf, standardMessageUnit,
366 & SQUEEZE_RIGHT, myThid )
367 ENDIF
368 ENDIF
369 IF ( err .LT. cg2dTolerance ) GOTO 11
370
371 cgBeta = eta_qrN/eta_qrNM1
372 eta_qrNM1 = eta_qrN
373 alpha = delta - cgBeta**2*alpha
374 sigma = eta_qrN/alpha
375
376 DO bj=myByLo(myThid),myByHi(myThid)
377 DO bi=myBxLo(myThid),myBxHi(myThid)
378 DO J=1,sNy
379 DO I=1,sNx
380 cg2d_s(I,J,bi,bj) = cg2d_y(I,J,bi,bj)
381 & + cgBeta*cg2d_s(I,J,bi,bj)
382 cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)
383 & + sigma*cg2d_s(I,J,bi,bj)
384 cg2d_q(I,J,bi,bj) = cg2d_v(I,J,bi,bj)
385 & + cgBeta*cg2d_q(I,J,bi,bj)
386 cg2d_r(I,J,bi,bj) = cg2d_r(I,J,bi,bj)
387 & - sigma*cg2d_q(I,J,bi,bj)
388 ENDDO
389 ENDDO
390 ENDDO
391 ENDDO
392
393 CALL EXCH_S3D_RL( cg2d_r, 1, myThid )
394
395 10 CONTINUE
396 11 CONTINUE
397
398 IF (cg2dNormaliseRHS) THEN
399 C-- Un-normalise the answer
400 DO bj=myByLo(myThid),myByHi(myThid)
401 DO bi=myBxLo(myThid),myBxHi(myThid)
402 DO J=1,sNy
403 DO I=1,sNx
404 cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm
405 ENDDO
406 ENDDO
407 ENDDO
408 ENDDO
409 ENDIF
410
411 C-- Return parameters to caller
412 lastResidual=actualResidual
413 numIters=actualIts
414
415 C The following exchange was moved up to solve_for_pressure
416 C for compatibility with TAMC.
417 C _EXCH_XY_R8(cg2d_x, myThid )
418 c _BEGIN_MASTER( myThid )
419 c WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
420 c & actualIts, actualResidual
421 c _END_MASTER( myThid )
422
423 #endif /* ALLOW_SRCG */
424 RETURN
425 END

  ViewVC Help
Powered by ViewVC 1.1.22