/[MITgcm]/MITgcm_contrib/cg2d_bench/cg2d.F
ViewVC logotype

Annotation of /MITgcm_contrib/cg2d_bench/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (hide annotations) (download)
Fri May 12 22:21:21 2006 UTC (19 years, 2 months ago) by ce107
Branch: MAIN
Changes since 1.1: +67 -17 lines
Modified to use PAPI, allow single/double precision and printing of residual
at every iteration.

1 ce107 1.2 C $Id$
2 ce107 1.1 SUBROUTINE CG2D
3     C /==========================================================\
4     C | SUBROUTINE CG2D |
5     C | o Two-dimensional grid problem conjugate-gradient |
6     C | inverter (with preconditioner). |
7     C |==========================================================|
8     C | Con. grad is an iterative procedure for solving Ax = b. |
9     C | It requires the A be symmetric. |
10     C | This implementation assumes A is a five-diagonal |
11     C | matrix of the form that arises in the discrete |
12     C | representation of the del^2 operator in a |
13     C | two-dimensional space. |
14     C | Notes: |
15     C | ====== |
16     C | This implementation can support shared-memory |
17     C | multi-threaded execution. In order to do this COMMON |
18     C | blocks are used for many of the arrays - even ones that |
19     C | are only used for intermedaite results. This design is |
20     C | OK if you want to all the threads to collaborate on |
21     C | solving the same problem. On the other hand if you want |
22     C | the threads to solve several different problems |
23     C | concurrently this implementation will not work. |
24     C \==========================================================/
25     IMPLICIT NONE
26    
27     C === Global data ===
28     #include "SIZE.h"
29     #include "EEPARAMS.h"
30     #include "PARAMS.h"
31     #include "CG2D.h"
32 ce107 1.2 #if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS)
33     #if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP)
34     #include "PAPI.h"
35     #endif
36     #endif
37 ce107 1.1
38     C === Routine arguments ===
39     C myThid - Thread on which I am working.
40     INTEGER myThid
41    
42     C === Local variables ====
43     C actualIts - Number of iterations taken
44     C actualResidual - residual
45     C bi - Block index in X and Y.
46     C bj
47     C etaN - Used in computing search directions
48     C etaNM1 suffix N and NM1 denote current and
49     C beta previous iterations respectively.
50     C alpha
51     C sumRHS - Sum of right-hand-side. Sometimes this is a
52     C useful debuggin/trouble shooting diagnostic.
53     C For neumann problems sumRHS needs to be ~0.
54     C or they converge at a non-zero residual.
55     C err - Measure of residual of Ax - b, usually the norm.
56     C I, J, N - Loop counters ( N counts CG iterations )
57     INTEGER actualIts
58     INTEGER bi, bj
59     INTEGER I, J, N
60 ce107 1.2 #ifdef USE_MIXED_PRECISION
61     REAL*8 actualResidual
62     REAL*8 err
63     REAL*8 errSum
64     REAL*8 etaN
65     REAL*8 etaNM1
66     REAL*8 etaNSum
67     REAL*8 beta
68     REAL*8 alpha
69     REAL*8 alphaSum
70     REAL*8 sumRHS
71     REAL*8 temp
72     #else
73     Real actualResidual
74     Real err
75     Real errSum
76     Real etaN
77     Real etaNM1
78     Real etaNSum
79     Real beta
80     Real alpha
81     Real alphaSum
82     Real sumRHS
83     Real temp
84     #endif
85 ce107 1.1
86     C-- Initialise inverter
87 ce107 1.2 etaNM1 = 1. _d 0
88 ce107 1.1
89     C-- Initial residual calculation
90     err = 0. _d 0
91     sumRHS = 0. _d 0
92     DO J=1,sNy
93     DO I=1,sNx
94 ce107 1.2 cg2d_s(I,J) = 0. _d 0
95 ce107 1.1 cg2d_r(I,J) = cg2d_b(I,J) -
96     & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
97     & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
98     & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
99     & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
100     & )
101     err = err + cg2d_r(I,J)*cg2d_r(I,J)
102     sumRHS = sumRHS + cg2d_b(I,J)
103     ENDDO
104     ENDDO
105     CALL EXCH_XY_R8( cg2d_r )
106     CALL EXCH_XY_R8( cg2d_s )
107     CALL GSUM_R8( temp, err )
108     err = temp
109     CALL GSUM_R8( temp, sumRHS )
110     sumRHS = temp
111    
112     actualIts = 0
113     actualResidual = SQRT(err)
114     WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
115 ce107 1.2 IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0'
116 ce107 1.1
117     C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
118     DO 10 N=1, cg2dMaxIters
119     C-- Solve preconditioning equation and update
120     C-- conjugate direction vector "s".
121     etaN = 0. _d 0
122     DO J=1,sNy
123     DO I=1,sNx
124     cg2d_q(I,J) =
125     & pW(I ,J )*cg2d_r(I-1,J )+pW(I+1,J )*cg2d_r(I+1,J )
126     & +pS(I ,J )*cg2d_r(I ,J-1)+pS(I ,J+1)*cg2d_r(I ,J+1)
127     & +pC(I ,J )*cg2d_r(I ,J )
128     etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J)
129     ENDDO
130     ENDDO
131    
132     CALL GSUM_R8( temp, etaN )
133     etaN = temp
134     beta = etaN/etaNM1
135     etaNM1 = etaN
136    
137     DO J=1,sNy
138     DO I=1,sNx
139     cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J)
140     ENDDO
141     ENDDO
142    
143     C-- Do exchanges that require messages i.e. between
144     C-- processes.
145     CALL EXCH_XY_R8( cg2d_s )
146    
147     C-- Ten extra exchanges
148     #ifdef TEN_EXTRA_EXCHS
149     CALL EXCH_XY_R8( cg2d_s )
150     CALL EXCH_XY_R8( cg2d_s )
151     CALL EXCH_XY_R8( cg2d_s )
152     CALL EXCH_XY_R8( cg2d_s )
153     CALL EXCH_XY_R8( cg2d_s )
154     CALL EXCH_XY_R8( cg2d_s )
155     CALL EXCH_XY_R8( cg2d_s )
156     CALL EXCH_XY_R8( cg2d_s )
157     CALL EXCH_XY_R8( cg2d_s )
158     CALL EXCH_XY_R8( cg2d_s )
159     #endif
160    
161     C== Evaluate laplace operator on conjugate gradient vector
162     C== q = A.s
163     alpha = 0. _d 0
164     DO J=1,sNy
165     DO I=1,sNx
166     cg2d_q(I,J) =
167     & aW2d(I ,J )*cg2d_s(I-1,J )+aW2d(I+1,J )*cg2d_s(I+1,J )
168     & +aS2d(I ,J )*cg2d_s(I ,J-1)+aS2d(I ,J+1)*cg2d_s(I ,J+1)
169     & -aW2d(I ,J )*cg2d_s(I ,J )-aW2d(I+1,J )*cg2d_s(I ,J )
170     & -aS2d(I ,J )*cg2d_s(I ,J )-aS2d(I ,J+1)*cg2d_s(I ,J )
171     alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J)
172     ENDDO
173     ENDDO
174     CALL GSUM_R8( temp, alpha )
175    
176     #ifdef HUNDRED_EXTRA_SUMS
177     C-- Hundred extra global sums
178     CALL GSUM_R8( temp, alpha )
179     CALL GSUM_R8( temp, alpha )
180     CALL GSUM_R8( temp, alpha )
181     CALL GSUM_R8( temp, alpha )
182     CALL GSUM_R8( temp, alpha )
183     CALL GSUM_R8( temp, alpha )
184     CALL GSUM_R8( temp, alpha )
185     CALL GSUM_R8( temp, alpha )
186     CALL GSUM_R8( temp, alpha )
187     CALL GSUM_R8( temp, alpha )
188     CALL GSUM_R8( temp, alpha )
189     CALL GSUM_R8( temp, alpha )
190     CALL GSUM_R8( temp, alpha )
191     CALL GSUM_R8( temp, alpha )
192     CALL GSUM_R8( temp, alpha )
193     CALL GSUM_R8( temp, alpha )
194     CALL GSUM_R8( temp, alpha )
195     CALL GSUM_R8( temp, alpha )
196     CALL GSUM_R8( temp, alpha )
197     CALL GSUM_R8( temp, alpha )
198     CALL GSUM_R8( temp, alpha )
199     CALL GSUM_R8( temp, alpha )
200     CALL GSUM_R8( temp, alpha )
201     CALL GSUM_R8( temp, alpha )
202     CALL GSUM_R8( temp, alpha )
203     CALL GSUM_R8( temp, alpha )
204     CALL GSUM_R8( temp, alpha )
205     CALL GSUM_R8( temp, alpha )
206     CALL GSUM_R8( temp, alpha )
207     CALL GSUM_R8( temp, alpha )
208     CALL GSUM_R8( temp, alpha )
209     CALL GSUM_R8( temp, alpha )
210     CALL GSUM_R8( temp, alpha )
211     CALL GSUM_R8( temp, alpha )
212     CALL GSUM_R8( temp, alpha )
213     CALL GSUM_R8( temp, alpha )
214     CALL GSUM_R8( temp, alpha )
215     CALL GSUM_R8( temp, alpha )
216     CALL GSUM_R8( temp, alpha )
217     CALL GSUM_R8( temp, alpha )
218     CALL GSUM_R8( temp, alpha )
219     CALL GSUM_R8( temp, alpha )
220     CALL GSUM_R8( temp, alpha )
221     CALL GSUM_R8( temp, alpha )
222     CALL GSUM_R8( temp, alpha )
223     CALL GSUM_R8( temp, alpha )
224     CALL GSUM_R8( temp, alpha )
225     CALL GSUM_R8( temp, alpha )
226     CALL GSUM_R8( temp, alpha )
227     CALL GSUM_R8( temp, alpha )
228     CALL GSUM_R8( temp, alpha )
229     CALL GSUM_R8( temp, alpha )
230     CALL GSUM_R8( temp, alpha )
231     CALL GSUM_R8( temp, alpha )
232     CALL GSUM_R8( temp, alpha )
233     CALL GSUM_R8( temp, alpha )
234     CALL GSUM_R8( temp, alpha )
235     CALL GSUM_R8( temp, alpha )
236     CALL GSUM_R8( temp, alpha )
237     CALL GSUM_R8( temp, alpha )
238     CALL GSUM_R8( temp, alpha )
239     CALL GSUM_R8( temp, alpha )
240     CALL GSUM_R8( temp, alpha )
241     CALL GSUM_R8( temp, alpha )
242     CALL GSUM_R8( temp, alpha )
243     CALL GSUM_R8( temp, alpha )
244     CALL GSUM_R8( temp, alpha )
245     CALL GSUM_R8( temp, alpha )
246     CALL GSUM_R8( temp, alpha )
247     CALL GSUM_R8( temp, alpha )
248     CALL GSUM_R8( temp, alpha )
249     CALL GSUM_R8( temp, alpha )
250     CALL GSUM_R8( temp, alpha )
251     CALL GSUM_R8( temp, alpha )
252     CALL GSUM_R8( temp, alpha )
253     CALL GSUM_R8( temp, alpha )
254     CALL GSUM_R8( temp, alpha )
255     CALL GSUM_R8( temp, alpha )
256     CALL GSUM_R8( temp, alpha )
257     CALL GSUM_R8( temp, alpha )
258     CALL GSUM_R8( temp, alpha )
259     CALL GSUM_R8( temp, alpha )
260     CALL GSUM_R8( temp, alpha )
261     CALL GSUM_R8( temp, alpha )
262     CALL GSUM_R8( temp, alpha )
263     CALL GSUM_R8( temp, alpha )
264     CALL GSUM_R8( temp, alpha )
265     CALL GSUM_R8( temp, alpha )
266     CALL GSUM_R8( temp, alpha )
267     CALL GSUM_R8( temp, alpha )
268     CALL GSUM_R8( temp, alpha )
269     CALL GSUM_R8( temp, alpha )
270     CALL GSUM_R8( temp, alpha )
271     CALL GSUM_R8( temp, alpha )
272     CALL GSUM_R8( temp, alpha )
273     CALL GSUM_R8( temp, alpha )
274     CALL GSUM_R8( temp, alpha )
275     CALL GSUM_R8( temp, alpha )
276     CALL GSUM_R8( temp, alpha )
277     CALL GSUM_R8( temp, alpha )
278     #endif
279    
280     alpha = temp
281     alpha = etaN/alpha
282    
283     C== Update solution and residual vectors
284     C Now compute "interior" points.
285     err = 0. _d 0
286     DO J=1,sNy
287     DO I=1,sNx
288     cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J)
289     cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J)
290     err = err+cg2d_r(I,J)*cg2d_r(I,J)
291     ENDDO
292     ENDDO
293    
294     CALL GSUM_R8( temp, err )
295    
296     err = temp
297     err = SQRT(err)
298     actualIts = N
299     actualResidual = err
300 ce107 1.2 #ifdef RESIDUAL_PER_ITERATION
301     WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
302     #endif
303 ce107 1.1 IF ( err .LT. cg2dTargetResidual ) GOTO 11
304     CALL EXCH_XY_R8(cg2d_r )
305 ce107 1.2 #ifdef PAPI_PER_ITERATION
306     #ifdef USE_PAPI_FLOPS
307     call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
308     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
309     $ mflops, ' user ', mflops*proc_time/real_time,
310     $ ' wallclock Mflop/s during iteration ', N
311     #else
312     #ifdef USE_PAPI_FLIPS
313     call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
314     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
315     $ mflops, ' user ', mflops*proc_time/real_time,
316     $ ' wallclock Mflip/s during iteration ', N
317     #endif
318     #endif
319     #endif
320 ce107 1.1 10 CONTINUE
321     11 CONTINUE
322     CALL EXCH_XY_R8(cg2d_x )
323 ce107 1.2 #ifdef PAPI_PER_TIMESTEP
324     #ifdef USE_PAPI_FLOPS
325     call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
326     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
327     $ mflops, ' user ', mflops*proc_time/real_time,
328     $ ' wallclock Mflop/s during iteration ', N
329     #else
330     #ifdef USE_PAPI_FLIPS
331     call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
332     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
333     $ mflops, ' user ', mflops*proc_time/real_time,
334     $ ' wallclock Mflip/s during iteration ', N
335     #endif
336     #endif
337     #endif
338 ce107 1.1 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
339    
340     C Calc Ax to check result
341     DO J=1,sNy
342     DO I=1,sNx
343     cg2d_Ax(I,J) =
344     & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
345     & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
346     & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
347     & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
348     & )
349     cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J)
350     ENDDO
351     ENDDO
352     CALL EXCH_XY_R8(cg2d_Ax )
353     CALL EXCH_XY_R8(cg2d_r )
354    
355     END

  ViewVC Help
Powered by ViewVC 1.1.22