/[MITgcm]/MITgcm_contrib/cg2d_bench/cg2d.F
ViewVC logotype

Annotation of /MITgcm_contrib/cg2d_bench/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (hide annotations) (download)
Wed May 31 16:27:25 2006 UTC (17 years, 10 months ago) by ce107
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +7 -111 lines
Provide initial value for target residual and cleanup repeated comm calls.

1 ce107 1.3 C $Id: cg2d.F,v 1.2 2006/05/12 22:21:21 ce107 Exp $
2 ce107 1.1 SUBROUTINE CG2D
3     C /==========================================================\
4     C | SUBROUTINE CG2D |
5     C | o Two-dimensional grid problem conjugate-gradient |
6     C | inverter (with preconditioner). |
7     C |==========================================================|
8     C | Con. grad is an iterative procedure for solving Ax = b. |
9     C | It requires the A be symmetric. |
10     C | This implementation assumes A is a five-diagonal |
11     C | matrix of the form that arises in the discrete |
12     C | representation of the del^2 operator in a |
13     C | two-dimensional space. |
14     C | Notes: |
15     C | ====== |
16     C | This implementation can support shared-memory |
17     C | multi-threaded execution. In order to do this COMMON |
18     C | blocks are used for many of the arrays - even ones that |
19     C | are only used for intermedaite results. This design is |
20     C | OK if you want to all the threads to collaborate on |
21     C | solving the same problem. On the other hand if you want |
22     C | the threads to solve several different problems |
23     C | concurrently this implementation will not work. |
24     C \==========================================================/
25     IMPLICIT NONE
26    
27     C === Global data ===
28     #include "SIZE.h"
29     #include "EEPARAMS.h"
30     #include "PARAMS.h"
31     #include "CG2D.h"
32 ce107 1.2 #if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS)
33     #if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP)
34     #include "PAPI.h"
35     #endif
36     #endif
37 ce107 1.1
38     C === Routine arguments ===
39     C myThid - Thread on which I am working.
40     INTEGER myThid
41    
42     C === Local variables ====
43     C actualIts - Number of iterations taken
44     C actualResidual - residual
45     C bi - Block index in X and Y.
46     C bj
47     C etaN - Used in computing search directions
48     C etaNM1 suffix N and NM1 denote current and
49     C beta previous iterations respectively.
50     C alpha
51     C sumRHS - Sum of right-hand-side. Sometimes this is a
52     C useful debuggin/trouble shooting diagnostic.
53     C For neumann problems sumRHS needs to be ~0.
54     C or they converge at a non-zero residual.
55     C err - Measure of residual of Ax - b, usually the norm.
56     C I, J, N - Loop counters ( N counts CG iterations )
57     INTEGER actualIts
58     INTEGER bi, bj
59     INTEGER I, J, N
60 ce107 1.2 #ifdef USE_MIXED_PRECISION
61     REAL*8 actualResidual
62     REAL*8 err
63     REAL*8 errSum
64     REAL*8 etaN
65     REAL*8 etaNM1
66     REAL*8 etaNSum
67     REAL*8 beta
68     REAL*8 alpha
69     REAL*8 alphaSum
70     REAL*8 sumRHS
71     REAL*8 temp
72     #else
73     Real actualResidual
74     Real err
75     Real errSum
76     Real etaN
77     Real etaNM1
78     Real etaNSum
79     Real beta
80     Real alpha
81     Real alphaSum
82     Real sumRHS
83     Real temp
84     #endif
85 ce107 1.1
86     C-- Initialise inverter
87 ce107 1.2 etaNM1 = 1. _d 0
88 ce107 1.1
89     C-- Initial residual calculation
90     err = 0. _d 0
91     sumRHS = 0. _d 0
92     DO J=1,sNy
93     DO I=1,sNx
94 ce107 1.2 cg2d_s(I,J) = 0. _d 0
95 ce107 1.1 cg2d_r(I,J) = cg2d_b(I,J) -
96     & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
97     & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
98     & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
99     & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
100     & )
101     err = err + cg2d_r(I,J)*cg2d_r(I,J)
102     sumRHS = sumRHS + cg2d_b(I,J)
103     ENDDO
104     ENDDO
105     CALL EXCH_XY_R8( cg2d_r )
106     CALL EXCH_XY_R8( cg2d_s )
107     CALL GSUM_R8( temp, err )
108     err = temp
109     CALL GSUM_R8( temp, sumRHS )
110     sumRHS = temp
111    
112     actualIts = 0
113     actualResidual = SQRT(err)
114     WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
115 ce107 1.2 IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0'
116 ce107 1.1
117     C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
118     DO 10 N=1, cg2dMaxIters
119     C-- Solve preconditioning equation and update
120     C-- conjugate direction vector "s".
121     etaN = 0. _d 0
122     DO J=1,sNy
123     DO I=1,sNx
124     cg2d_q(I,J) =
125     & pW(I ,J )*cg2d_r(I-1,J )+pW(I+1,J )*cg2d_r(I+1,J )
126     & +pS(I ,J )*cg2d_r(I ,J-1)+pS(I ,J+1)*cg2d_r(I ,J+1)
127     & +pC(I ,J )*cg2d_r(I ,J )
128     etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J)
129     ENDDO
130     ENDDO
131    
132     CALL GSUM_R8( temp, etaN )
133     etaN = temp
134     beta = etaN/etaNM1
135     etaNM1 = etaN
136    
137     DO J=1,sNy
138     DO I=1,sNx
139     cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J)
140     ENDDO
141     ENDDO
142    
143     C-- Do exchanges that require messages i.e. between
144     C-- processes.
145     CALL EXCH_XY_R8( cg2d_s )
146    
147     C-- Ten extra exchanges
148     #ifdef TEN_EXTRA_EXCHS
149 ce107 1.3 DO J=1,10
150     CALL EXCH_XY_R8( cg2d_s )
151     ENDDO
152 ce107 1.1 #endif
153    
154     C== Evaluate laplace operator on conjugate gradient vector
155     C== q = A.s
156     alpha = 0. _d 0
157     DO J=1,sNy
158     DO I=1,sNx
159     cg2d_q(I,J) =
160     & aW2d(I ,J )*cg2d_s(I-1,J )+aW2d(I+1,J )*cg2d_s(I+1,J )
161     & +aS2d(I ,J )*cg2d_s(I ,J-1)+aS2d(I ,J+1)*cg2d_s(I ,J+1)
162     & -aW2d(I ,J )*cg2d_s(I ,J )-aW2d(I+1,J )*cg2d_s(I ,J )
163     & -aS2d(I ,J )*cg2d_s(I ,J )-aS2d(I ,J+1)*cg2d_s(I ,J )
164     alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J)
165     ENDDO
166     ENDDO
167     CALL GSUM_R8( temp, alpha )
168    
169     #ifdef HUNDRED_EXTRA_SUMS
170     C-- Hundred extra global sums
171 ce107 1.3 DO J=1,100
172     CALL GSUM_R8( temp, alpha )
173     ENDDO
174 ce107 1.1 #endif
175    
176     alpha = temp
177     alpha = etaN/alpha
178    
179     C== Update solution and residual vectors
180     C Now compute "interior" points.
181     err = 0. _d 0
182     DO J=1,sNy
183     DO I=1,sNx
184     cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J)
185     cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J)
186     err = err+cg2d_r(I,J)*cg2d_r(I,J)
187     ENDDO
188     ENDDO
189    
190     CALL GSUM_R8( temp, err )
191    
192     err = temp
193     err = SQRT(err)
194     actualIts = N
195     actualResidual = err
196 ce107 1.2 #ifdef RESIDUAL_PER_ITERATION
197     WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
198     #endif
199 ce107 1.1 IF ( err .LT. cg2dTargetResidual ) GOTO 11
200     CALL EXCH_XY_R8(cg2d_r )
201 ce107 1.2 #ifdef PAPI_PER_ITERATION
202     #ifdef USE_PAPI_FLOPS
203     call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
204     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
205     $ mflops, ' user ', mflops*proc_time/real_time,
206     $ ' wallclock Mflop/s during iteration ', N
207     #else
208     #ifdef USE_PAPI_FLIPS
209     call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
210     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
211     $ mflops, ' user ', mflops*proc_time/real_time,
212     $ ' wallclock Mflip/s during iteration ', N
213     #endif
214     #endif
215     #endif
216 ce107 1.1 10 CONTINUE
217     11 CONTINUE
218     CALL EXCH_XY_R8(cg2d_x )
219 ce107 1.2 #ifdef PAPI_PER_TIMESTEP
220     #ifdef USE_PAPI_FLOPS
221     call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
222     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
223     $ mflops, ' user ', mflops*proc_time/real_time,
224     $ ' wallclock Mflop/s during iteration ', N
225     #else
226     #ifdef USE_PAPI_FLIPS
227     call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
228     WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
229     $ mflops, ' user ', mflops*proc_time/real_time,
230     $ ' wallclock Mflip/s during iteration ', N
231     #endif
232     #endif
233     #endif
234 ce107 1.1 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
235    
236     C Calc Ax to check result
237     DO J=1,sNy
238     DO I=1,sNx
239     cg2d_Ax(I,J) =
240     & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
241     & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
242     & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
243     & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
244     & )
245     cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J)
246     ENDDO
247     ENDDO
248     CALL EXCH_XY_R8(cg2d_Ax )
249     CALL EXCH_XY_R8(cg2d_r )
250    
251     END

  ViewVC Help
Powered by ViewVC 1.1.22