/[MITgcm]/MITgcm/model/src/cg2d.F
ViewVC logotype

Contents of /MITgcm/model/src/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.52 - (show annotations) (download)
Sat May 16 13:42:15 2009 UTC (14 years, 11 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint61q, checkpoint61o, checkpoint61r, checkpoint61s, checkpoint61p
Changes since 1.51: +7 -32 lines
- fix GLOBAL_SUM_SINGLECPU when using Exch2; re-use same buffers and same
  gather/scatter S/R as with SingleCpuIO (=> 1 less 2D global RL array).

1 C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.51 2009/04/28 18:01:14 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5 #ifdef TARGET_NEC_SX
6 C set a sensible default for the outer loop unrolling parameter that can
7 C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8 #ifndef CG2D_OUTERLOOPITERS
9 # define CG2D_OUTERLOOPITERS 10
10 #endif
11 #endif /* TARGET_NEC_SX */
12
13 CBOP
14 C !ROUTINE: CG2D
15 C !INTERFACE:
16 SUBROUTINE CG2D(
17 I cg2d_b,
18 U cg2d_x,
19 O firstResidual,
20 O lastResidual,
21 U numIters,
22 I myThid )
23 C !DESCRIPTION: \bv
24 C *==========================================================*
25 C | SUBROUTINE CG2D
26 C | o Two-dimensional grid problem conjugate-gradient
27 C | inverter (with preconditioner).
28 C *==========================================================*
29 C | Con. grad is an iterative procedure for solving Ax = b.
30 C | It requires the A be symmetric.
31 C | This implementation assumes A is a five-diagonal
32 C | matrix of the form that arises in the discrete
33 C | representation of the del^2 operator in a
34 C | two-dimensional space.
35 C | Notes:
36 C | ======
37 C | This implementation can support shared-memory
38 C | multi-threaded execution. In order to do this COMMON
39 C | blocks are used for many of the arrays - even ones that
40 C | are only used for intermedaite results. This design is
41 C | OK if you want to all the threads to collaborate on
42 C | solving the same problem. On the other hand if you want
43 C | the threads to solve several different problems
44 C | concurrently this implementation will not work.
45 C *==========================================================*
46 C \ev
47
48 C !USES:
49 IMPLICIT NONE
50 C === Global data ===
51 #include "SIZE.h"
52 #include "EEPARAMS.h"
53 #include "PARAMS.h"
54 #include "CG2D.h"
55 c#include "GRID.h"
56 c#include "SURFACE.h"
57
58 C !INPUT/OUTPUT PARAMETERS:
59 C === Routine arguments ===
60 C myThid :: Thread on which I am working.
61 C cg2d_b :: The source term or "right hand side"
62 C cg2d_x :: The solution
63 C firstResidual :: the initial residual before any iterations
64 C lastResidual :: the actual residual reached
65 C numIters :: Entry: the maximum number of iterations allowed
66 C Exit: the actual number of iterations used
67 _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
68 _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
69 _RL firstResidual
70 _RL lastResidual
71 INTEGER numIters
72 INTEGER myThid
73
74 C !LOCAL VARIABLES:
75 C === Local variables ====
76 C actualIts :: Number of iterations taken
77 C actualResidual :: residual
78 C bi, bj :: Block index in X and Y.
79 C eta_qrN :: Used in computing search directions
80 C eta_qrNM1 suffix N and NM1 denote current and
81 C cgBeta previous iterations respectively.
82 C alpha
83 C sumRHS :: Sum of right-hand-side. Sometimes this is a
84 C useful debuggin/trouble shooting diagnostic.
85 C For neumann problems sumRHS needs to be ~0.
86 C or they converge at a non-zero residual.
87 C err :: Measure of residual of Ax - b, usually the norm.
88 C I, J, it2d :: Loop counters ( it2d counts CG iterations )
89 INTEGER actualIts
90 _RL actualResidual
91 INTEGER bi, bj
92 INTEGER I, J, it2d
93 c INTEGER ks
94 _RL err, errTile(nSx,nSy)
95 _RL eta_qrN,eta_qrNtile(nSx,nSy)
96 _RL eta_qrNM1
97 _RL cgBeta
98 _RL alpha, alphaTile(nSx,nSy)
99 _RL sumRHS, sumRHStile(nSx,nSy)
100 _RL rhsMax
101 _RL rhsNorm
102 #ifdef CG2D_SINGLECPU_SUM
103 _RL localBuf(1:sNx,1:sNy,nSx,nSy)
104 #endif
105 CHARACTER*(MAX_LEN_MBUF) msgBuf
106 CEOP
107
108
109 CcnhDebugStarts
110 C CHARACTER*(MAX_LEN_FNAM) suff
111 CcnhDebugEnds
112
113
114 C-- Initialise inverter
115 eta_qrNM1 = 1. _d 0
116
117 CcnhDebugStarts
118 C _EXCH_XY_RL( cg2d_b, myThid )
119 C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
120 C suff = 'unnormalised'
121 C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
122 C STOP
123 CcnhDebugEnds
124
125 C-- Normalise RHS
126 rhsMax = 0. _d 0
127 DO bj=myByLo(myThid),myByHi(myThid)
128 DO bi=myBxLo(myThid),myBxHi(myThid)
129 DO J=1,sNy
130 DO I=1,sNx
131 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
132 rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
133 ENDDO
134 ENDDO
135 ENDDO
136 ENDDO
137
138 IF (cg2dNormaliseRHS) THEN
139 C- Normalise RHS :
140 #ifdef LETS_MAKE_JAM
141 C _GLOBAL_MAX_RL( rhsMax, myThid )
142 rhsMax=1.
143 #else
144 _GLOBAL_MAX_RL( rhsMax, myThid )
145 #endif
146 rhsNorm = 1. _d 0
147 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
148 DO bj=myByLo(myThid),myByHi(myThid)
149 DO bi=myBxLo(myThid),myBxHi(myThid)
150 DO J=1,sNy
151 DO I=1,sNx
152 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
153 cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
154 ENDDO
155 ENDDO
156 ENDDO
157 ENDDO
158 C- end Normalise RHS
159 ENDIF
160
161 C-- Update overlaps
162 _EXCH_XY_RL( cg2d_b, myThid )
163 _EXCH_XY_RL( cg2d_x, myThid )
164 CcnhDebugStarts
165 C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
166 C suff = 'normalised'
167 C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
168 CcnhDebugEnds
169
170 C-- Initial residual calculation
171 DO bj=myByLo(myThid),myByHi(myThid)
172 DO bi=myBxLo(myThid),myBxHi(myThid)
173 sumRHStile(bi,bj) = 0. _d 0
174 errTile(bi,bj) = 0. _d 0
175 #ifdef TARGET_NEC_SX
176 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
177 #endif /* TARGET_NEC_SX */
178 DO J=1,sNy
179 DO I=1,sNx
180 c ks = ksurfC(I,J,bi,bj)
181 cg2d_s(I,J,bi,bj) = 0.
182 cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
183 & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
184 & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
185 & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
186 & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
187 & +aC2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
188 & )
189 c & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
190 c & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
191 c & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
192 c & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj)
193 c & -freeSurfFac*_rA(i,j,bi,bj)*deepFac2F(ks)*recip_Bo(i,j,bi,bj)
194 c & *cg2d_x(I ,J ,bi,bj)
195 c & /deltaTMom/deltaTfreesurf*cg2dNorm
196 c & )
197 #ifdef CG2D_SINGLECPU_SUM
198 localBuf(I,J,bi,bj) = cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
199 #else
200 errTile(bi,bj) = errTile(bi,bj)
201 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
202 sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(I,J,bi,bj)
203 #endif
204 ENDDO
205 ENDDO
206 ENDDO
207 ENDDO
208 #ifdef LETS_MAKE_JAM
209 CALL EXCH_XY_O1_R8_JAM( cg2d_r )
210 #else
211 CALL EXCH_XY_RL( cg2d_r, myThid )
212 #endif
213 #ifdef LETS_MAKE_JAM
214 CALL EXCH_XY_O1_R8_JAM( cg2d_s )
215 #else
216 CALL EXCH_XY_RL( cg2d_s, myThid )
217 #endif
218 #ifdef CG2D_SINGLECPU_SUM
219 CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err, 0, 0, myThid)
220 CALL GLOBAL_SUM_SINGLECPU_RL(cg2d_b, sumRHS, OLx, OLy, myThid)
221 #else
222 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
223 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
224 #endif
225 err = SQRT(err)
226 actualIts = 0
227 actualResidual = err
228
229 IF ( debugLevel .GE. debLevZero ) THEN
230 _BEGIN_MASTER( myThid )
231 WRITE(standardmessageunit,'(A,1P2E22.14)')
232 & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
233 _END_MASTER( myThid )
234 ENDIF
235 C _BARRIER
236 c _BEGIN_MASTER( myThid )
237 c WRITE(standardmessageunit,'(A,I6,1PE30.14)')
238 c & ' CG2D iters, err = ',
239 c & actualIts, actualResidual
240 c _END_MASTER( myThid )
241 firstResidual=actualResidual
242
243 IF ( err .LT. cg2dTolerance ) GOTO 11
244
245 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
246 DO 10 it2d=1, numIters
247
248 CcnhDebugStarts
249 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' residual = ',
250 C & actualResidual
251 CcnhDebugEnds
252 C-- Solve preconditioning equation and update
253 C-- conjugate direction vector "s".
254 DO bj=myByLo(myThid),myByHi(myThid)
255 DO bi=myBxLo(myThid),myBxHi(myThid)
256 eta_qrNtile(bi,bj) = 0. _d 0
257 #ifdef TARGET_NEC_SX
258 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
259 #endif /* TARGET_NEC_SX */
260 DO J=1,sNy
261 DO I=1,sNx
262 cg2d_q(I,J,bi,bj) =
263 & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
264 & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
265 & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
266 & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
267 & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
268 CcnhDebugStarts
269 C cg2d_q(I,J,bi,bj) = cg2d_r(I ,J ,bi,bj)
270 CcnhDebugEnds
271 #ifdef CG2D_SINGLECPU_SUM
272 localBuf(I,J,bi,bj) =
273 & cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
274 #else
275 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
276 & +cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
277 #endif
278 ENDDO
279 ENDDO
280 ENDDO
281 ENDDO
282
283 #ifdef CG2D_SINGLECPU_SUM
284 CALL GLOBAL_SUM_SINGLECPU_RL( localBuf,eta_qrN,0,0,myThid )
285 #else
286 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
287 #endif
288 CcnhDebugStarts
289 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' eta_qrN = ',eta_qrN
290 CcnhDebugEnds
291 cgBeta = eta_qrN/eta_qrNM1
292 CcnhDebugStarts
293 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' beta = ',cgBeta
294 CcnhDebugEnds
295 eta_qrNM1 = eta_qrN
296
297 DO bj=myByLo(myThid),myByHi(myThid)
298 DO bi=myBxLo(myThid),myBxHi(myThid)
299 DO J=1,sNy
300 DO I=1,sNx
301 cg2d_s(I,J,bi,bj) = cg2d_q(I,J,bi,bj)
302 & + cgBeta*cg2d_s(I,J,bi,bj)
303 ENDDO
304 ENDDO
305 ENDDO
306 ENDDO
307
308 C-- Do exchanges that require messages i.e. between
309 C-- processes.
310 C _EXCH_XY_RL( cg2d_s, myThid )
311 #ifdef LETS_MAKE_JAM
312 CALL EXCH_XY_O1_R8_JAM( cg2d_s )
313 #else
314 CALL EXCH_XY_RL( cg2d_s, myThid )
315 #endif
316
317 C== Evaluate laplace operator on conjugate gradient vector
318 C== q = A.s
319 DO bj=myByLo(myThid),myByHi(myThid)
320 DO bi=myBxLo(myThid),myBxHi(myThid)
321 alphaTile(bi,bj) = 0. _d 0
322 #ifdef TARGET_NEC_SX
323 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
324 #endif /* TARGET_NEC_SX */
325 DO J=1,sNy
326 DO I=1,sNx
327 c ks = ksurfC(I,J,bi,bj)
328 cg2d_q(I,J,bi,bj) =
329 & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj)
330 & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj)
331 & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj)
332 & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj)
333 & +aC2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
334 c & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
335 c & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
336 c & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
337 c & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj)
338 c & -freeSurfFac*_rA(i,j,bi,bj)*deepFac2F(ks)*recip_Bo(i,j,bi,bj)
339 c & *cg2d_s(I ,J ,bi,bj)
340 c & /deltaTMom/deltaTfreesurf*cg2dNorm
341 #ifdef CG2D_SINGLECPU_SUM
342 localBuf(I,J,bi,bj) = cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
343 #else
344 alphaTile(bi,bj) = alphaTile(bi,bj)
345 & + cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
346 #endif
347 ENDDO
348 ENDDO
349 ENDDO
350 ENDDO
351 #ifdef CG2D_SINGLECPU_SUM
352 CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, alpha, 0, 0, myThid)
353 #else
354 CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
355 #endif
356 CcnhDebugStarts
357 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' SUM(s*q)= ',alpha
358 CcnhDebugEnds
359 alpha = eta_qrN/alpha
360 CcnhDebugStarts
361 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' alpha= ',alpha
362 CcnhDebugEnds
363
364 C== Update solution and residual vectors
365 C Now compute "interior" points.
366 DO bj=myByLo(myThid),myByHi(myThid)
367 DO bi=myBxLo(myThid),myBxHi(myThid)
368 errTile(bi,bj) = 0. _d 0
369 DO J=1,sNy
370 DO I=1,sNx
371 cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj)
372 cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj)
373 #ifdef CG2D_SINGLECPU_SUM
374 localBuf(I,J,bi,bj) = cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
375 #else
376 errTile(bi,bj) = errTile(bi,bj)
377 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
378 #endif
379 ENDDO
380 ENDDO
381 ENDDO
382 ENDDO
383
384 #ifdef CG2D_SINGLECPU_SUM
385 CALL GLOBAL_SUM_SINGLECPU_RL(localBuf, err, 0, 0, myThid)
386 #else
387 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
388 #endif
389 err = SQRT(err)
390 actualIts = it2d
391 actualResidual = err
392 IF ( debugLevel.GT.debLevB ) THEN
393 c IF ( DIFFERENT_MULTIPLE(monitorFreq,myTime,deltaTClock)
394 c & ) THEN
395 _BEGIN_MASTER( myThid )
396 WRITE(msgBuf,'(A,I6,A,1PE21.14)')
397 & ' cg2d: iter=', actualIts, ' ; resid.= ', actualResidual
398 CALL PRINT_MESSAGE(msgBuf,standardMessageUnit,SQUEEZE_RIGHT,1)
399 _END_MASTER( myThid )
400 c ENDIF
401 ENDIF
402 IF ( err .LT. cg2dTolerance ) GOTO 11
403
404 #ifdef LETS_MAKE_JAM
405 CALL EXCH_XY_O1_R8_JAM( cg2d_r )
406 #else
407 CALL EXCH_XY_RL( cg2d_r, myThid )
408 #endif
409
410 10 CONTINUE
411 11 CONTINUE
412
413 IF (cg2dNormaliseRHS) THEN
414 C-- Un-normalise the answer
415 DO bj=myByLo(myThid),myByHi(myThid)
416 DO bi=myBxLo(myThid),myBxHi(myThid)
417 DO J=1,sNy
418 DO I=1,sNx
419 cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm
420 ENDDO
421 ENDDO
422 ENDDO
423 ENDDO
424 ENDIF
425
426 C The following exchange was moved up to solve_for_pressure
427 C for compatibility with TAMC.
428 C _EXCH_XY_RL(cg2d_x, myThid )
429 c _BEGIN_MASTER( myThid )
430 c WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
431 c & actualIts, actualResidual
432 c _END_MASTER( myThid )
433
434 C-- Return parameters to caller
435 lastResidual=actualResidual
436 numIters=actualIts
437
438 CcnhDebugStarts
439 C CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
440 C err = 0. _d 0
441 C DO bj=myByLo(myThid),myByHi(myThid)
442 C DO bi=myBxLo(myThid),myBxHi(myThid)
443 C DO J=1,sNy
444 C DO I=1,sNx
445 C cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
446 C & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
447 C & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
448 C & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
449 C & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
450 C & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
451 C & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
452 C & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
453 C & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj))
454 C err = err +
455 C & cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
456 C ENDDO
457 C ENDDO
458 C ENDDO
459 C ENDDO
460 C _GLOBAL_SUM_RL( err , myThid )
461 C write(*,*) 'cg2d: Ax - b = ',SQRT(err)
462 CcnhDebugEnds
463
464 RETURN
465 END

  ViewVC Help
Powered by ViewVC 1.1.22