/[MITgcm]/MITgcm/model/src/cg2d.F
ViewVC logotype

Contents of /MITgcm/model/src/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.49 - (show annotations) (download)
Tue Dec 4 10:22:15 2007 UTC (16 years, 6 months ago) by mlosch
Branch: MAIN
CVS Tags: checkpoint59l
Changes since 1.48: +17 -1 lines
add 3 compiler directives that speed up this routine by 30% on a NEC
SX vector computer.

1 C $Header: /u/gcmpack/MITgcm/model/src/cg2d.F,v 1.48 2007/09/04 14:54:57 jmc Exp $
2 C $Name: $
3
4 #include "CPP_OPTIONS.h"
5 #ifdef TARGET_NEC_SX
6 C set a sensible default for the outer loop unrolling parameter that can
7 C be overriden in the Makefile with the DEFINES macro or in CPP_OPTIONS.h
8 #ifndef CG2D_OUTERLOOPITERS
9 # define CG2D_OUTERLOOPITERS 10
10 #endif
11 #endif /* TARGET_NEC_SX */
12
13 CBOP
14 C !ROUTINE: CG2D
15 C !INTERFACE:
16 SUBROUTINE CG2D(
17 I cg2d_b,
18 U cg2d_x,
19 O firstResidual,
20 O lastResidual,
21 U numIters,
22 I myThid )
23 C !DESCRIPTION: \bv
24 C *==========================================================*
25 C | SUBROUTINE CG2D
26 C | o Two-dimensional grid problem conjugate-gradient
27 C | inverter (with preconditioner).
28 C *==========================================================*
29 C | Con. grad is an iterative procedure for solving Ax = b.
30 C | It requires the A be symmetric.
31 C | This implementation assumes A is a five-diagonal
32 C | matrix of the form that arises in the discrete
33 C | representation of the del^2 operator in a
34 C | two-dimensional space.
35 C | Notes:
36 C | ======
37 C | This implementation can support shared-memory
38 C | multi-threaded execution. In order to do this COMMON
39 C | blocks are used for many of the arrays - even ones that
40 C | are only used for intermedaite results. This design is
41 C | OK if you want to all the threads to collaborate on
42 C | solving the same problem. On the other hand if you want
43 C | the threads to solve several different problems
44 C | concurrently this implementation will not work.
45 C *==========================================================*
46 C \ev
47
48 C !USES:
49 IMPLICIT NONE
50 C === Global data ===
51 #include "SIZE.h"
52 #include "EEPARAMS.h"
53 #include "PARAMS.h"
54 #include "CG2D.h"
55 c#include "GRID.h"
56 c#include "SURFACE.h"
57
58 C !INPUT/OUTPUT PARAMETERS:
59 C === Routine arguments ===
60 C myThid :: Thread on which I am working.
61 C cg2d_b :: The source term or "right hand side"
62 C cg2d_x :: The solution
63 C firstResidual :: the initial residual before any iterations
64 C lastResidual :: the actual residual reached
65 C numIters :: Entry: the maximum number of iterations allowed
66 C Exit: the actual number of iterations used
67 _RL cg2d_b(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
68 _RL cg2d_x(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
69 _RL firstResidual
70 _RL lastResidual
71 INTEGER numIters
72 INTEGER myThid
73
74 C !LOCAL VARIABLES:
75 C === Local variables ====
76 C actualIts :: Number of iterations taken
77 C actualResidual :: residual
78 C bi, bj :: Block index in X and Y.
79 C eta_qrN :: Used in computing search directions
80 C eta_qrNM1 suffix N and NM1 denote current and
81 C cgBeta previous iterations respectively.
82 C alpha
83 C sumRHS :: Sum of right-hand-side. Sometimes this is a
84 C useful debuggin/trouble shooting diagnostic.
85 C For neumann problems sumRHS needs to be ~0.
86 C or they converge at a non-zero residual.
87 C err :: Measure of residual of Ax - b, usually the norm.
88 C I, J, it2d :: Loop counters ( it2d counts CG iterations )
89 INTEGER actualIts
90 _RL actualResidual
91 INTEGER bi, bj
92 INTEGER I, J, it2d
93 c INTEGER ks
94 _RL err, errTile(nSx,nSy)
95 _RL eta_qrN,eta_qrNtile(nSx,nSy)
96 _RL eta_qrNM1
97 _RL cgBeta
98 _RL alpha, alphaTile(nSx,nSy)
99 _RL sumRHS, sumRHStile(nSx,nSy)
100 _RL rhsMax
101 _RL rhsNorm
102 CHARACTER*(MAX_LEN_MBUF) msgBuf
103 CEOP
104
105
106 CcnhDebugStarts
107 C CHARACTER*(MAX_LEN_FNAM) suff
108 CcnhDebugEnds
109
110
111 C-- Initialise inverter
112 eta_qrNM1 = 1. _d 0
113
114 CcnhDebugStarts
115 C _EXCH_XY_R8( cg2d_b, myThid )
116 C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.0 CG2D_B' , 1, myThid )
117 C suff = 'unnormalised'
118 C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
119 C STOP
120 CcnhDebugEnds
121
122 C-- Normalise RHS
123 rhsMax = 0. _d 0
124 DO bj=myByLo(myThid),myByHi(myThid)
125 DO bi=myBxLo(myThid),myBxHi(myThid)
126 DO J=1,sNy
127 DO I=1,sNx
128 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*cg2dNorm
129 rhsMax = MAX(ABS(cg2d_b(I,J,bi,bj)),rhsMax)
130 ENDDO
131 ENDDO
132 ENDDO
133 ENDDO
134
135 IF (cg2dNormaliseRHS) THEN
136 C- Normalise RHS :
137 #ifdef LETS_MAKE_JAM
138 C _GLOBAL_MAX_R8( rhsMax, myThid )
139 rhsMax=1.
140 #else
141 _GLOBAL_MAX_R8( rhsMax, myThid )
142 #endif
143 rhsNorm = 1. _d 0
144 IF ( rhsMax .NE. 0. ) rhsNorm = 1. _d 0 / rhsMax
145 DO bj=myByLo(myThid),myByHi(myThid)
146 DO bi=myBxLo(myThid),myBxHi(myThid)
147 DO J=1,sNy
148 DO I=1,sNx
149 cg2d_b(I,J,bi,bj) = cg2d_b(I,J,bi,bj)*rhsNorm
150 cg2d_x(I,J,bi,bj) = cg2d_x(I,J,bi,bj)*rhsNorm
151 ENDDO
152 ENDDO
153 ENDDO
154 ENDDO
155 C- end Normalise RHS
156 ENDIF
157
158 C-- Update overlaps
159 _EXCH_XY_R8( cg2d_b, myThid )
160 _EXCH_XY_R8( cg2d_x, myThid )
161 CcnhDebugStarts
162 C CALL PLOT_FIELD_XYRL( cg2d_b, 'CG2D.1 CG2D_B' , 1, myThid )
163 C suff = 'normalised'
164 C CALL WRITE_FLD_XY_RL ( 'cg2d_b.',suff, cg2d_b, 1, myThid)
165 CcnhDebugEnds
166
167 C-- Initial residual calculation
168 err = 0. _d 0
169 sumRHS = 0. _d 0
170 DO bj=myByLo(myThid),myByHi(myThid)
171 DO bi=myBxLo(myThid),myBxHi(myThid)
172 sumRHStile(bi,bj) = 0. _d 0
173 errTile(bi,bj) = 0. _d 0
174 #ifdef TARGET_NEC_SX
175 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
176 #endif /* TARGET_NEC_SX */
177 DO J=1,sNy
178 DO I=1,sNx
179 c ks = ksurfC(I,J,bi,bj)
180 cg2d_s(I,J,bi,bj) = 0.
181 cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
182 & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
183 & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
184 & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
185 & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
186 & +aC2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
187 & )
188 c & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
189 c & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
190 c & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
191 c & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj)
192 c & -freeSurfFac*_rA(i,j,bi,bj)*deepFac2F(ks)*recip_Bo(i,j,bi,bj)
193 c & *cg2d_x(I ,J ,bi,bj)
194 c & /deltaTMom/deltaTfreesurf*cg2dNorm
195 c & )
196 errTile(bi,bj) = errTile(bi,bj)
197 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
198 sumRHStile(bi,bj) = sumRHStile(bi,bj) + cg2d_b(I,J,bi,bj)
199 ENDDO
200 ENDDO
201 c sumRHS = sumRHS + sumRHStile(bi,bj)
202 c err = err + errTile(bi,bj)
203 ENDDO
204 ENDDO
205 #ifdef LETS_MAKE_JAM
206 CALL EXCH_XY_O1_R8_JAM( cg2d_r )
207 #else
208 CALL EXCH_XY_RL( cg2d_r, myThid )
209 #endif
210 #ifdef LETS_MAKE_JAM
211 CALL EXCH_XY_O1_R8_JAM( cg2d_s )
212 #else
213 CALL EXCH_XY_RL( cg2d_s, myThid )
214 #endif
215 c _GLOBAL_SUM_R8( sumRHS, myThid )
216 c _GLOBAL_SUM_R8( err , myThid )
217 CALL GLOBAL_SUM_TILE_RL( sumRHStile, sumRHS, myThid )
218 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
219 err = SQRT(err)
220 actualIts = 0
221 actualResidual = err
222
223 IF ( debugLevel .GE. debLevZero ) THEN
224 _BEGIN_MASTER( myThid )
225 WRITE(standardmessageunit,'(A,1P2E22.14)')
226 & ' cg2d: Sum(rhs),rhsMax = ', sumRHS,rhsMax
227 _END_MASTER( myThid )
228 ENDIF
229 C _BARRIER
230 c _BEGIN_MASTER( myThid )
231 c WRITE(standardmessageunit,'(A,I6,1PE30.14)')
232 c & ' CG2D iters, err = ',
233 c & actualIts, actualResidual
234 c _END_MASTER( myThid )
235 firstResidual=actualResidual
236
237 IF ( err .LT. cg2dTolerance ) GOTO 11
238
239 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
240 DO 10 it2d=1, numIters
241
242 CcnhDebugStarts
243 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' residual = ',
244 C & actualResidual
245 CcnhDebugEnds
246 C-- Solve preconditioning equation and update
247 C-- conjugate direction vector "s".
248 eta_qrN = 0. _d 0
249 DO bj=myByLo(myThid),myByHi(myThid)
250 DO bi=myBxLo(myThid),myBxHi(myThid)
251 eta_qrNtile(bi,bj) = 0. _d 0
252 #ifdef TARGET_NEC_SX
253 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
254 #endif /* TARGET_NEC_SX */
255 DO J=1,sNy
256 DO I=1,sNx
257 cg2d_q(I,J,bi,bj) =
258 & pC(I ,J ,bi,bj)*cg2d_r(I ,J ,bi,bj)
259 & +pW(I ,J ,bi,bj)*cg2d_r(I-1,J ,bi,bj)
260 & +pW(I+1,J ,bi,bj)*cg2d_r(I+1,J ,bi,bj)
261 & +pS(I ,J ,bi,bj)*cg2d_r(I ,J-1,bi,bj)
262 & +pS(I ,J+1,bi,bj)*cg2d_r(I ,J+1,bi,bj)
263 CcnhDebugStarts
264 C cg2d_q(I,J,bi,bj) = cg2d_r(I ,J ,bi,bj)
265 CcnhDebugEnds
266 eta_qrNtile(bi,bj) = eta_qrNtile(bi,bj)
267 & +cg2d_q(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
268 ENDDO
269 ENDDO
270 c eta_qrN = eta_qrN + eta_qrNtile(bi,bj)
271 ENDDO
272 ENDDO
273
274 c _GLOBAL_SUM_R8(eta_qrN, myThid)
275 CALL GLOBAL_SUM_TILE_RL( eta_qrNtile,eta_qrN,myThid )
276 CcnhDebugStarts
277 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' eta_qrN = ',eta_qrN
278 CcnhDebugEnds
279 cgBeta = eta_qrN/eta_qrNM1
280 CcnhDebugStarts
281 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' beta = ',cgBeta
282 CcnhDebugEnds
283 eta_qrNM1 = eta_qrN
284
285 DO bj=myByLo(myThid),myByHi(myThid)
286 DO bi=myBxLo(myThid),myBxHi(myThid)
287 DO J=1,sNy
288 DO I=1,sNx
289 cg2d_s(I,J,bi,bj) = cg2d_q(I,J,bi,bj)
290 & + cgBeta*cg2d_s(I,J,bi,bj)
291 ENDDO
292 ENDDO
293 ENDDO
294 ENDDO
295
296 C-- Do exchanges that require messages i.e. between
297 C-- processes.
298 C _EXCH_XY_R8( cg2d_s, myThid )
299 #ifdef LETS_MAKE_JAM
300 CALL EXCH_XY_O1_R8_JAM( cg2d_s )
301 #else
302 CALL EXCH_XY_RL( cg2d_s, myThid )
303 #endif
304
305 C== Evaluate laplace operator on conjugate gradient vector
306 C== q = A.s
307 alpha = 0. _d 0
308 DO bj=myByLo(myThid),myByHi(myThid)
309 DO bi=myBxLo(myThid),myBxHi(myThid)
310 alphaTile(bi,bj) = 0. _d 0
311 #ifdef TARGET_NEC_SX
312 !CDIR OUTERUNROLL=CG2D_OUTERLOOPITERS
313 #endif /* TARGET_NEC_SX */
314 DO J=1,sNy
315 DO I=1,sNx
316 c ks = ksurfC(I,J,bi,bj)
317 cg2d_q(I,J,bi,bj) =
318 & aW2d(I ,J ,bi,bj)*cg2d_s(I-1,J ,bi,bj)
319 & +aW2d(I+1,J ,bi,bj)*cg2d_s(I+1,J ,bi,bj)
320 & +aS2d(I ,J ,bi,bj)*cg2d_s(I ,J-1,bi,bj)
321 & +aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J+1,bi,bj)
322 & +aC2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
323 c & -aW2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
324 c & -aW2d(I+1,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
325 c & -aS2d(I ,J ,bi,bj)*cg2d_s(I ,J ,bi,bj)
326 c & -aS2d(I ,J+1,bi,bj)*cg2d_s(I ,J ,bi,bj)
327 c & -freeSurfFac*_rA(i,j,bi,bj)*deepFac2F(ks)*recip_Bo(i,j,bi,bj)
328 c & *cg2d_s(I ,J ,bi,bj)
329 c & /deltaTMom/deltaTfreesurf*cg2dNorm
330 alphaTile(bi,bj) = alphaTile(bi,bj)
331 & + cg2d_s(I,J,bi,bj)*cg2d_q(I,J,bi,bj)
332 ENDDO
333 ENDDO
334 c alpha = alpha + alphaTile(bi,bj)
335 ENDDO
336 ENDDO
337 c _GLOBAL_SUM_R8(alpha,myThid)
338 CALL GLOBAL_SUM_TILE_RL( alphaTile, alpha, myThid )
339 CcnhDebugStarts
340 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' SUM(s*q)= ',alpha
341 CcnhDebugEnds
342 alpha = eta_qrN/alpha
343 CcnhDebugStarts
344 C WRITE(*,*) ' CG2D: Iteration ',it2d-1,' alpha= ',alpha
345 CcnhDebugEnds
346
347 C== Update solution and residual vectors
348 C Now compute "interior" points.
349 err = 0. _d 0
350 DO bj=myByLo(myThid),myByHi(myThid)
351 DO bi=myBxLo(myThid),myBxHi(myThid)
352 errTile(bi,bj) = 0. _d 0
353 DO J=1,sNy
354 DO I=1,sNx
355 cg2d_x(I,J,bi,bj)=cg2d_x(I,J,bi,bj)+alpha*cg2d_s(I,J,bi,bj)
356 cg2d_r(I,J,bi,bj)=cg2d_r(I,J,bi,bj)-alpha*cg2d_q(I,J,bi,bj)
357 errTile(bi,bj) = errTile(bi,bj)
358 & + cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
359 ENDDO
360 ENDDO
361 c err = err + errTile(bi,bj)
362 ENDDO
363 ENDDO
364
365 c _GLOBAL_SUM_R8( err , myThid )
366 CALL GLOBAL_SUM_TILE_RL( errTile, err, myThid )
367 err = SQRT(err)
368 actualIts = it2d
369 actualResidual = err
370 IF ( debugLevel.GT.debLevB ) THEN
371 c IF ( DIFFERENT_MULTIPLE(monitorFreq,myTime,deltaTClock)
372 c & ) THEN
373 _BEGIN_MASTER( myThid )
374 WRITE(msgBuf,'(A,I6,A,1PE21.14)')
375 & ' cg2d: iter=', actualIts, ' ; resid.= ', actualResidual
376 CALL PRINT_MESSAGE(msgBuf,standardMessageUnit,SQUEEZE_RIGHT,1)
377 _END_MASTER( myThid )
378 c ENDIF
379 ENDIF
380 IF ( err .LT. cg2dTolerance ) GOTO 11
381
382 #ifdef LETS_MAKE_JAM
383 CALL EXCH_XY_O1_R8_JAM( cg2d_r )
384 #else
385 CALL EXCH_XY_RL( cg2d_r, myThid )
386 #endif
387
388 10 CONTINUE
389 11 CONTINUE
390
391 IF (cg2dNormaliseRHS) THEN
392 C-- Un-normalise the answer
393 DO bj=myByLo(myThid),myByHi(myThid)
394 DO bi=myBxLo(myThid),myBxHi(myThid)
395 DO J=1,sNy
396 DO I=1,sNx
397 cg2d_x(I ,J ,bi,bj) = cg2d_x(I ,J ,bi,bj)/rhsNorm
398 ENDDO
399 ENDDO
400 ENDDO
401 ENDDO
402 ENDIF
403
404 C The following exchange was moved up to solve_for_pressure
405 C for compatibility with TAMC.
406 C _EXCH_XY_R8(cg2d_x, myThid )
407 c _BEGIN_MASTER( myThid )
408 c WRITE(*,'(A,I6,1PE30.14)') ' CG2D iters, err = ',
409 c & actualIts, actualResidual
410 c _END_MASTER( myThid )
411
412 C-- Return parameters to caller
413 lastResidual=actualResidual
414 numIters=actualIts
415
416 CcnhDebugStarts
417 C CALL PLOT_FIELD_XYRL( cg2d_x, 'CALC_MOM_RHS CG2D_X' , 1, myThid )
418 C err = 0. _d 0
419 C DO bj=myByLo(myThid),myByHi(myThid)
420 C DO bi=myBxLo(myThid),myBxHi(myThid)
421 C DO J=1,sNy
422 C DO I=1,sNx
423 C cg2d_r(I,J,bi,bj) = cg2d_b(I,J,bi,bj) -
424 C & (aW2d(I ,J ,bi,bj)*cg2d_x(I-1,J ,bi,bj)
425 C & +aW2d(I+1,J ,bi,bj)*cg2d_x(I+1,J ,bi,bj)
426 C & +aS2d(I ,J ,bi,bj)*cg2d_x(I ,J-1,bi,bj)
427 C & +aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J+1,bi,bj)
428 C & -aW2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
429 C & -aW2d(I+1,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
430 C & -aS2d(I ,J ,bi,bj)*cg2d_x(I ,J ,bi,bj)
431 C & -aS2d(I ,J+1,bi,bj)*cg2d_x(I ,J ,bi,bj))
432 C err = err +
433 C & cg2d_r(I,J,bi,bj)*cg2d_r(I,J,bi,bj)
434 C ENDDO
435 C ENDDO
436 C ENDDO
437 C ENDDO
438 C _GLOBAL_SUM_R8( err , myThid )
439 C write(*,*) 'cg2d: Ax - b = ',SQRT(err)
440 CcnhDebugEnds
441
442 RETURN
443 END

  ViewVC Help
Powered by ViewVC 1.1.22