/[MITgcm]/MITgcm_contrib/cg2d_bench/cg2d.F
ViewVC logotype

Contents of /MITgcm_contrib/cg2d_bench/cg2d.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download)
Wed May 31 16:27:25 2006 UTC (12 years, 6 months ago) by ce107
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +7 -111 lines
Provide initial value for target residual and cleanup repeated comm calls.

1 C $Id: cg2d.F,v 1.2 2006/05/12 22:21:21 ce107 Exp $
2 SUBROUTINE CG2D
3 C /==========================================================\
4 C | SUBROUTINE CG2D |
5 C | o Two-dimensional grid problem conjugate-gradient |
6 C | inverter (with preconditioner). |
7 C |==========================================================|
8 C | Con. grad is an iterative procedure for solving Ax = b. |
9 C | It requires the A be symmetric. |
10 C | This implementation assumes A is a five-diagonal |
11 C | matrix of the form that arises in the discrete |
12 C | representation of the del^2 operator in a |
13 C | two-dimensional space. |
14 C | Notes: |
15 C | ====== |
16 C | This implementation can support shared-memory |
17 C | multi-threaded execution. In order to do this COMMON |
18 C | blocks are used for many of the arrays - even ones that |
19 C | are only used for intermedaite results. This design is |
20 C | OK if you want to all the threads to collaborate on |
21 C | solving the same problem. On the other hand if you want |
22 C | the threads to solve several different problems |
23 C | concurrently this implementation will not work. |
24 C \==========================================================/
25 IMPLICIT NONE
26
27 C === Global data ===
28 #include "SIZE.h"
29 #include "EEPARAMS.h"
30 #include "PARAMS.h"
31 #include "CG2D.h"
32 #if defined(USE_PAPI_FLOPS) || defined(USE_PAPI_FLIPS)
33 #if defined(PAPI_PER_ITERATION) || defined(PAPI_PER_TIMESTEP)
34 #include "PAPI.h"
35 #endif
36 #endif
37
38 C === Routine arguments ===
39 C myThid - Thread on which I am working.
40 INTEGER myThid
41
42 C === Local variables ====
43 C actualIts - Number of iterations taken
44 C actualResidual - residual
45 C bi - Block index in X and Y.
46 C bj
47 C etaN - Used in computing search directions
48 C etaNM1 suffix N and NM1 denote current and
49 C beta previous iterations respectively.
50 C alpha
51 C sumRHS - Sum of right-hand-side. Sometimes this is a
52 C useful debuggin/trouble shooting diagnostic.
53 C For neumann problems sumRHS needs to be ~0.
54 C or they converge at a non-zero residual.
55 C err - Measure of residual of Ax - b, usually the norm.
56 C I, J, N - Loop counters ( N counts CG iterations )
57 INTEGER actualIts
58 INTEGER bi, bj
59 INTEGER I, J, N
60 #ifdef USE_MIXED_PRECISION
61 REAL*8 actualResidual
62 REAL*8 err
63 REAL*8 errSum
64 REAL*8 etaN
65 REAL*8 etaNM1
66 REAL*8 etaNSum
67 REAL*8 beta
68 REAL*8 alpha
69 REAL*8 alphaSum
70 REAL*8 sumRHS
71 REAL*8 temp
72 #else
73 Real actualResidual
74 Real err
75 Real errSum
76 Real etaN
77 Real etaNM1
78 Real etaNSum
79 Real beta
80 Real alpha
81 Real alphaSum
82 Real sumRHS
83 Real temp
84 #endif
85
86 C-- Initialise inverter
87 etaNM1 = 1. _d 0
88
89 C-- Initial residual calculation
90 err = 0. _d 0
91 sumRHS = 0. _d 0
92 DO J=1,sNy
93 DO I=1,sNx
94 cg2d_s(I,J) = 0. _d 0
95 cg2d_r(I,J) = cg2d_b(I,J) -
96 & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
97 & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
98 & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
99 & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
100 & )
101 err = err + cg2d_r(I,J)*cg2d_r(I,J)
102 sumRHS = sumRHS + cg2d_b(I,J)
103 ENDDO
104 ENDDO
105 CALL EXCH_XY_R8( cg2d_r )
106 CALL EXCH_XY_R8( cg2d_s )
107 CALL GSUM_R8( temp, err )
108 err = temp
109 CALL GSUM_R8( temp, sumRHS )
110 sumRHS = temp
111
112 actualIts = 0
113 actualResidual = SQRT(err)
114 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
115 IF ( actualResidual .EQ. 0. _d 0) STOP 'ABNORMAL END: RESIDUAL 0'
116
117 C >>>>>>>>>>>>>>> BEGIN SOLVER <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
118 DO 10 N=1, cg2dMaxIters
119 C-- Solve preconditioning equation and update
120 C-- conjugate direction vector "s".
121 etaN = 0. _d 0
122 DO J=1,sNy
123 DO I=1,sNx
124 cg2d_q(I,J) =
125 & pW(I ,J )*cg2d_r(I-1,J )+pW(I+1,J )*cg2d_r(I+1,J )
126 & +pS(I ,J )*cg2d_r(I ,J-1)+pS(I ,J+1)*cg2d_r(I ,J+1)
127 & +pC(I ,J )*cg2d_r(I ,J )
128 etaN = etaN+cg2d_q(I,J)*cg2d_r(I,J)
129 ENDDO
130 ENDDO
131
132 CALL GSUM_R8( temp, etaN )
133 etaN = temp
134 beta = etaN/etaNM1
135 etaNM1 = etaN
136
137 DO J=1,sNy
138 DO I=1,sNx
139 cg2d_s(I,J) = cg2d_q(I,J) + beta*cg2d_s(I,J)
140 ENDDO
141 ENDDO
142
143 C-- Do exchanges that require messages i.e. between
144 C-- processes.
145 CALL EXCH_XY_R8( cg2d_s )
146
147 C-- Ten extra exchanges
148 #ifdef TEN_EXTRA_EXCHS
149 DO J=1,10
150 CALL EXCH_XY_R8( cg2d_s )
151 ENDDO
152 #endif
153
154 C== Evaluate laplace operator on conjugate gradient vector
155 C== q = A.s
156 alpha = 0. _d 0
157 DO J=1,sNy
158 DO I=1,sNx
159 cg2d_q(I,J) =
160 & aW2d(I ,J )*cg2d_s(I-1,J )+aW2d(I+1,J )*cg2d_s(I+1,J )
161 & +aS2d(I ,J )*cg2d_s(I ,J-1)+aS2d(I ,J+1)*cg2d_s(I ,J+1)
162 & -aW2d(I ,J )*cg2d_s(I ,J )-aW2d(I+1,J )*cg2d_s(I ,J )
163 & -aS2d(I ,J )*cg2d_s(I ,J )-aS2d(I ,J+1)*cg2d_s(I ,J )
164 alpha = alpha+cg2d_s(I,J)*cg2d_q(I,J)
165 ENDDO
166 ENDDO
167 CALL GSUM_R8( temp, alpha )
168
169 #ifdef HUNDRED_EXTRA_SUMS
170 C-- Hundred extra global sums
171 DO J=1,100
172 CALL GSUM_R8( temp, alpha )
173 ENDDO
174 #endif
175
176 alpha = temp
177 alpha = etaN/alpha
178
179 C== Update solution and residual vectors
180 C Now compute "interior" points.
181 err = 0. _d 0
182 DO J=1,sNy
183 DO I=1,sNx
184 cg2d_x(I,J)=cg2d_x(I,J)+alpha*cg2d_s(I,J)
185 cg2d_r(I,J)=cg2d_r(I,J)-alpha*cg2d_q(I,J)
186 err = err+cg2d_r(I,J)*cg2d_r(I,J)
187 ENDDO
188 ENDDO
189
190 CALL GSUM_R8( temp, err )
191
192 err = temp
193 err = SQRT(err)
194 actualIts = N
195 actualResidual = err
196 #ifdef RESIDUAL_PER_ITERATION
197 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
198 #endif
199 IF ( err .LT. cg2dTargetResidual ) GOTO 11
200 CALL EXCH_XY_R8(cg2d_r )
201 #ifdef PAPI_PER_ITERATION
202 #ifdef USE_PAPI_FLOPS
203 call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
204 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
205 $ mflops, ' user ', mflops*proc_time/real_time,
206 $ ' wallclock Mflop/s during iteration ', N
207 #else
208 #ifdef USE_PAPI_FLIPS
209 call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
210 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
211 $ mflops, ' user ', mflops*proc_time/real_time,
212 $ ' wallclock Mflip/s during iteration ', N
213 #endif
214 #endif
215 #endif
216 10 CONTINUE
217 11 CONTINUE
218 CALL EXCH_XY_R8(cg2d_x )
219 #ifdef PAPI_PER_TIMESTEP
220 #ifdef USE_PAPI_FLOPS
221 call PAPIF_flops(real_time, proc_time, flpops, mflops, check)
222 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
223 $ mflops, ' user ', mflops*proc_time/real_time,
224 $ ' wallclock Mflop/s during iteration ', N
225 #else
226 #ifdef USE_PAPI_FLIPS
227 call PAPIF_flips(real_time, proc_time, flpops, mflops, check)
228 WRITE(6,'(F10.3,A7,F10.3,A37,I8)')
229 $ mflops, ' user ', mflops*proc_time/real_time,
230 $ ' wallclock Mflip/s during iteration ', N
231 #endif
232 #endif
233 #endif
234 WRITE(6,*) ' CG2D iters, err = ', actualIts, actualResidual
235
236 C Calc Ax to check result
237 DO J=1,sNy
238 DO I=1,sNx
239 cg2d_Ax(I,J) =
240 & ( aW2d(I ,J )*cg2d_x(I-1,J )+aW2d(I+1,J )*cg2d_x(I+1,J )
241 & +aS2d(I ,J )*cg2d_x(I ,J-1)+aS2d(I ,J+1)*cg2d_x(I ,J+1)
242 & -aW2d(I ,J )*cg2d_x(I ,J )-aW2d(I+1,J )*cg2d_x(I ,J )
243 & -aS2d(I ,J )*cg2d_x(I ,J )-aS2d(I ,J+1)*cg2d_x(I ,J )
244 & )
245 cg2d_r(I,J) = cg2d_b(I,J)-cg2d_Ax(I,J)
246 ENDDO
247 ENDDO
248 CALL EXCH_XY_R8(cg2d_Ax )
249 CALL EXCH_XY_R8(cg2d_r )
250
251 END

  ViewVC Help
Powered by ViewVC 1.1.22