/[MITgcm]/MITgcm_contrib/cg2d_bench/cg2d.F
ViewVC logotype

Contents of /MITgcm_contrib/cg2d_bench/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (show annotations) (download)
Fri May 12 22:21:21 2006 UTC (17 years, 11 months ago) by ce107
Branch: MAIN
Changes since 1.1: +67 -17 lines
Modified to use PAPI, allow single/double precision and printing of residual
at every iteration.

1 C $Id$
2 SUBROUTINE CG2D
3 C /==========================================================\
4 C | SUBROUTINE CG2D |
5 C | o Two-dimensional grid problem conjugate-gradient |
6 C | inverter (with preconditioner). |
7 C |==========================================================|
8 C | Con. grad is an iterative procedure for solving Ax = b. |
9 C | It requires the A be symmetric. |
10 C | This implementation assumes A is a five-diagonal |
11 C | matrix of the form that arises in the discrete |
12 C | representation of the del^2 operator in a |
13 C | two-dimensional space. |
14 C | Notes: |
15 C | ====== |
16 C | This implementation can support shared-memory |
17 C | multi-threaded execution. In order to do this COMMON |
18 C | blocks are used for many of the arrays - even ones that |
19 C | are only used for intermedaite results. This design is |
20 C | OK if you want to all the threads to collaborate on |
21 C | solving the same problem. On the other hand if you want |
22 C | the threads to solve several different problems |
23 C | concurrently this implementation will not work. |
24 C \==========================================================/
25 IMPLICIT NONE
26
27 C === Global data ===
28 #include "SIZE.h"
29 #include "EEPARAMS.h"
30 #include "PARAMS.h"
31 #include "CG2D.h"
32 #if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS)
33 #if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP)
34 #include "PAPI.h"
35 #endif
36 #endif
37
38 C === Routine arguments ===
39 C myThid - Thread on which I am working.
40 INTEGER myThid
41
42 C === Local variables ====
43 C actualIts - Number of iterations taken
44 C actualResidual - residual
45 C bi - Block index in X and Y.
46 C bj
47 C etaN - Used in computing search directions
48 C etaNM1 suffix N and NM1 denote current and
49 C beta previous iterations respectively.
50 C alpha
51 C sumRHS - Sum of right-hand-side. Sometimes this is a
52 C useful debuggin/trouble shooting diagnostic.
53 C For neumann problems sumRHS needs to be ~0.
54 C or they converge at a non-zero residual.
55 C err - Measure of residual of Ax - b, usually the norm.
56 C I, J, N - Loop counters ( N counts CG iterations )
57 INTEGER actualIts
58 INTEGER bi, bj
59 INTEGER I, J, N
60 #ifdef USE_MIXED_PRECISION
61 REAL*8 actualResidual
62 REAL*8 err
63 REAL*8 errSum
64 REAL*8 etaN
65 REAL*8 etaNM1
66 REAL*8 etaNSum
67 REAL*8 beta
68 REAL*8 alpha
69 REAL*8 alphaSum
70 REAL*8 sumRHS
71 REAL*8 temp
72 #else
73 Real actualResidual
74 Real err
75 Real errSum
76 Real etaN
77 Real etaNM1
78 Real etaNSum
79 Real beta
80 Real alpha
81 Real alphaSum
82 Real sumRHS
83 Real temp
84 #endif
85
86 C-- Initialise inverter
87 etaNM1 = 1. _d 0
88
89 C-- Initial residual calculation
90 err = 0. _d 0
91 sumRHS = 0. _d 0
92 DO J=1,sNy
93 DO I=1,sNx
94 cg2d_s(I,J) = 0. _d 0
95 cg2d_r(I,J) = cg2d_b(I,J) -
96 & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
97 & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
98 & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
99 & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
100 & )
101 err = err + cg2d_r(I,J)*cg2d_r(I,J)
102 sumRHS = sumRHS + cg2d_b(I,J)
103 ENDDO
104 ENDDO
105 CALL EXCH_XY_R8( cg2d_r )
106 CALL EXCH_XY_R8( cg2d_s )
107 CALL GSUM_R8( temp, err )
108 err = temp
109 CALL GSUM_R8( temp, sumRHS )
110 sumRHS = temp
111
112 actualIts = 0
113 actualResidual = SQRT(err)
114 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
115 IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0'
116
117 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
118 DO 10 N=1, cg2dMaxIters
119 C-- Solve preconditioning equation and update
120 C-- conjugate direction vector "s".
121 etaN = 0. _d 0
122 DO J=1,sNy
123 DO I=1,sNx
124 cg2d_q(I,J) =
125 & pW(I ,J )*cg2d_r(I-1,J )+pW(I+1,J )*cg2d_r(I+1,J )
126 & +pS(I ,J )*cg2d_r(I ,J-1)+pS(I ,J+1)*cg2d_r(I ,J+1)
127 & +pC(I ,J )*cg2d_r(I ,J )
128 etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J)
129 ENDDO
130 ENDDO
131
132 CALL GSUM_R8( temp, etaN )
133 etaN = temp
134 beta = etaN/etaNM1
135 etaNM1 = etaN
136
137 DO J=1,sNy
138 DO I=1,sNx
139 cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J)
140 ENDDO
141 ENDDO
142
143 C-- Do exchanges that require messages i.e. between
144 C-- processes.
145 CALL EXCH_XY_R8( cg2d_s )
146
147 C-- Ten extra exchanges
148 #ifdef TEN_EXTRA_EXCHS
149 CALL EXCH_XY_R8( cg2d_s )
150 CALL EXCH_XY_R8( cg2d_s )
151 CALL EXCH_XY_R8( cg2d_s )
152 CALL EXCH_XY_R8( cg2d_s )
153 CALL EXCH_XY_R8( cg2d_s )
154 CALL EXCH_XY_R8( cg2d_s )
155 CALL EXCH_XY_R8( cg2d_s )
156 CALL EXCH_XY_R8( cg2d_s )
157 CALL EXCH_XY_R8( cg2d_s )
158 CALL EXCH_XY_R8( cg2d_s )
159 #endif
160
161 C== Evaluate laplace operator on conjugate gradient vector
162 C== q = A.s
163 alpha = 0. _d 0
164 DO J=1,sNy
165 DO I=1,sNx
166 cg2d_q(I,J) =
167 & aW2d(I ,J )*cg2d_s(I-1,J )+aW2d(I+1,J )*cg2d_s(I+1,J )
168 & +aS2d(I ,J )*cg2d_s(I ,J-1)+aS2d(I ,J+1)*cg2d_s(I ,J+1)
169 & -aW2d(I ,J )*cg2d_s(I ,J )-aW2d(I+1,J )*cg2d_s(I ,J )
170 & -aS2d(I ,J )*cg2d_s(I ,J )-aS2d(I ,J+1)*cg2d_s(I ,J )
171 alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J)
172 ENDDO
173 ENDDO
174 CALL GSUM_R8( temp, alpha )
175
176 #ifdef HUNDRED_EXTRA_SUMS
177 C-- Hundred extra global sums
178 CALL GSUM_R8( temp, alpha )
179 CALL GSUM_R8( temp, alpha )
180 CALL GSUM_R8( temp, alpha )
181 CALL GSUM_R8( temp, alpha )
182 CALL GSUM_R8( temp, alpha )
183 CALL GSUM_R8( temp, alpha )
184 CALL GSUM_R8( temp, alpha )
185 CALL GSUM_R8( temp, alpha )
186 CALL GSUM_R8( temp, alpha )
187 CALL GSUM_R8( temp, alpha )
188 CALL GSUM_R8( temp, alpha )
189 CALL GSUM_R8( temp, alpha )
190 CALL GSUM_R8( temp, alpha )
191 CALL GSUM_R8( temp, alpha )
192 CALL GSUM_R8( temp, alpha )
193 CALL GSUM_R8( temp, alpha )
194 CALL GSUM_R8( temp, alpha )
195 CALL GSUM_R8( temp, alpha )
196 CALL GSUM_R8( temp, alpha )
197 CALL GSUM_R8( temp, alpha )
198 CALL GSUM_R8( temp, alpha )
199 CALL GSUM_R8( temp, alpha )
200 CALL GSUM_R8( temp, alpha )
201 CALL GSUM_R8( temp, alpha )
202 CALL GSUM_R8( temp, alpha )
203 CALL GSUM_R8( temp, alpha )
204 CALL GSUM_R8( temp, alpha )
205 CALL GSUM_R8( temp, alpha )
206 CALL GSUM_R8( temp, alpha )
207 CALL GSUM_R8( temp, alpha )
208 CALL GSUM_R8( temp, alpha )
209 CALL GSUM_R8( temp, alpha )
210 CALL GSUM_R8( temp, alpha )
211 CALL GSUM_R8( temp, alpha )
212 CALL GSUM_R8( temp, alpha )
213 CALL GSUM_R8( temp, alpha )
214 CALL GSUM_R8( temp, alpha )
215 CALL GSUM_R8( temp, alpha )
216 CALL GSUM_R8( temp, alpha )
217 CALL GSUM_R8( temp, alpha )
218 CALL GSUM_R8( temp, alpha )
219 CALL GSUM_R8( temp, alpha )
220 CALL GSUM_R8( temp, alpha )
221 CALL GSUM_R8( temp, alpha )
222 CALL GSUM_R8( temp, alpha )
223 CALL GSUM_R8( temp, alpha )
224 CALL GSUM_R8( temp, alpha )
225 CALL GSUM_R8( temp, alpha )
226 CALL GSUM_R8( temp, alpha )
227 CALL GSUM_R8( temp, alpha )
228 CALL GSUM_R8( temp, alpha )
229 CALL GSUM_R8( temp, alpha )
230 CALL GSUM_R8( temp, alpha )
231 CALL GSUM_R8( temp, alpha )
232 CALL GSUM_R8( temp, alpha )
233 CALL GSUM_R8( temp, alpha )
234 CALL GSUM_R8( temp, alpha )
235 CALL GSUM_R8( temp, alpha )
236 CALL GSUM_R8( temp, alpha )
237 CALL GSUM_R8( temp, alpha )
238 CALL GSUM_R8( temp, alpha )
239 CALL GSUM_R8( temp, alpha )
240 CALL GSUM_R8( temp, alpha )
241 CALL GSUM_R8( temp, alpha )
242 CALL GSUM_R8( temp, alpha )
243 CALL GSUM_R8( temp, alpha )
244 CALL GSUM_R8( temp, alpha )
245 CALL GSUM_R8( temp, alpha )
246 CALL GSUM_R8( temp, alpha )
247 CALL GSUM_R8( temp, alpha )
248 CALL GSUM_R8( temp, alpha )
249 CALL GSUM_R8( temp, alpha )
250 CALL GSUM_R8( temp, alpha )
251 CALL GSUM_R8( temp, alpha )
252 CALL GSUM_R8( temp, alpha )
253 CALL GSUM_R8( temp, alpha )
254 CALL GSUM_R8( temp, alpha )
255 CALL GSUM_R8( temp, alpha )
256 CALL GSUM_R8( temp, alpha )
257 CALL GSUM_R8( temp, alpha )
258 CALL GSUM_R8( temp, alpha )
259 CALL GSUM_R8( temp, alpha )
260 CALL GSUM_R8( temp, alpha )
261 CALL GSUM_R8( temp, alpha )
262 CALL GSUM_R8( temp, alpha )
263 CALL GSUM_R8( temp, alpha )
264 CALL GSUM_R8( temp, alpha )
265 CALL GSUM_R8( temp, alpha )
266 CALL GSUM_R8( temp, alpha )
267 CALL GSUM_R8( temp, alpha )
268 CALL GSUM_R8( temp, alpha )
269 CALL GSUM_R8( temp, alpha )
270 CALL GSUM_R8( temp, alpha )
271 CALL GSUM_R8( temp, alpha )
272 CALL GSUM_R8( temp, alpha )
273 CALL GSUM_R8( temp, alpha )
274 CALL GSUM_R8( temp, alpha )
275 CALL GSUM_R8( temp, alpha )
276 CALL GSUM_R8( temp, alpha )
277 CALL GSUM_R8( temp, alpha )
278 #endif
279
280 alpha = temp
281 alpha = etaN/alpha
282
283 C== Update solution and residual vectors
284 C Now compute "interior" points.
285 err = 0. _d 0
286 DO J=1,sNy
287 DO I=1,sNx
288 cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J)
289 cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J)
290 err = err+cg2d_r(I,J)*cg2d_r(I,J)
291 ENDDO
292 ENDDO
293
294 CALL GSUM_R8( temp, err )
295
296 err = temp
297 err = SQRT(err)
298 actualIts = N
299 actualResidual = err
300 #ifdef RESIDUAL_PER_ITERATION
301 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
302 #endif
303 IF ( err .LT. cg2dTargetResidual ) GOTO 11
304 CALL EXCH_XY_R8(cg2d_r )
305 #ifdef PAPI_PER_ITERATION
306 #ifdef USE_PAPI_FLOPS
307 call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
308 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
309 $ mflops, ' user ', mflops*proc_time/real_time,
310 $ ' wallclock Mflop/s during iteration ', N
311 #else
312 #ifdef USE_PAPI_FLIPS
313 call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
314 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
315 $ mflops, ' user ', mflops*proc_time/real_time,
316 $ ' wallclock Mflip/s during iteration ', N
317 #endif
318 #endif
319 #endif
320 10 CONTINUE
321 11 CONTINUE
322 CALL EXCH_XY_R8(cg2d_x )
323 #ifdef PAPI_PER_TIMESTEP
324 #ifdef USE_PAPI_FLOPS
325 call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
326 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
327 $ mflops, ' user ', mflops*proc_time/real_time,
328 $ ' wallclock Mflop/s during iteration ', N
329 #else
330 #ifdef USE_PAPI_FLIPS
331 call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
332 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
333 $ mflops, ' user ', mflops*proc_time/real_time,
334 $ ' wallclock Mflip/s during iteration ', N
335 #endif
336 #endif
337 #endif
338 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
339
340 C Calc Ax to check result
341 DO J=1,sNy
342 DO I=1,sNx
343 cg2d_Ax(I,J) =
344 & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
345 & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
346 & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
347 & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
348 & )
349 cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J)
350 ENDDO
351 ENDDO
352 CALL EXCH_XY_R8(cg2d_Ax )
353 CALL EXCH_XY_R8(cg2d_r )
354
355 END

  ViewVC Help
Powered by ViewVC 1.1.22