1 |
C $Header: /u/gcmpack/MITgcm/pkg/streamice/streamice_cg_solve_petsc.F,v 1.5 2015/06/15 14:34:38 dgoldberg Exp $ |
2 |
C $Name: $ |
3 |
|
4 |
#include "CPP_OPTIONS.h" |
5 |
|
6 |
C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----| |
7 |
|
8 |
CBOP |
9 |
SUBROUTINE CG3D_PETSC( |
10 |
U cg3d_x, ! solution vector |
11 |
U cg3d_b, ! rhs |
12 |
I tolerance, |
13 |
U maxIter, |
14 |
I myIter, |
15 |
I myThid ) |
16 |
|
17 |
C /============================================================\ |
18 |
C | SUBROUTINE | |
19 |
C | o | |
20 |
C |============================================================| |
21 |
C | | |
22 |
C \============================================================/ |
23 |
IMPLICIT NONE |
24 |
|
25 |
#include "SIZE.h" |
26 |
#include "EEPARAMS.h" |
27 |
#include "PARAMS.h" |
28 |
#include "CG3D.h" |
29 |
|
30 |
#ifdef ALLOW_PETSC |
31 |
#include "finclude/petsc.h" |
32 |
#include "CG3D_PETSC.h" |
33 |
! UNCOMMENT IF V3.0 |
34 |
!#include "finclude/petscvec.h" |
35 |
!#include "finclude/petscmat.h" |
36 |
!#include "finclude/petscksp.h" |
37 |
!#include "finclude/petscpc.h" |
38 |
#endif |
39 |
C === Global variables === |
40 |
|
41 |
|
42 |
C !INPUT/OUTPUT ARGUMENTS |
43 |
C cg_Uin, cg_Vin - input and output velocities |
44 |
C cg_Bu, cg_Bv - driving stress |
45 |
INTEGER myThid |
46 |
INTEGER maxiter |
47 |
INTEGER myIter |
48 |
_RL tolerance |
49 |
_RL cg3d_x (1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) |
50 |
_RL cg3d_b (1-OLx:sNx+OLx,1-OLy:sNy+OLy,Nr,nSx,nSy) |
51 |
|
52 |
|
53 |
C LOCAL VARIABLES |
54 |
INTEGER i, j, bi, bj, cg_halo, conv_flag |
55 |
INTEGER iter, col_iter, k |
56 |
_RL dot_p1, dot_p2, alpha_k, beta_k, resid, resid_0 |
57 |
_RL dot_p1_tile (nSx,nSy) |
58 |
_RL dot_p2_tile (nSx,nSy) |
59 |
CHARACTER*(MAX_LEN_MBUF) msgBuf |
60 |
! LOGICAL create_mat, destroy_mat |
61 |
|
62 |
|
63 |
#ifdef ALLOW_PETSC |
64 |
INTEGER indices(Nr*(snx*nsx*sny*nsy)) |
65 |
INTEGER cg3d_dofs_cum_sum (0:nPx*nPy*MAX_CG3D_PETSC_CPUINVERT-1), |
66 |
& idx(1) |
67 |
_RL rhs_values(Nr*(snx*nsx*sny*nsy)) |
68 |
_RL solution_values(Nr*(snx*nsx*sny*nsy)) |
69 |
_RL mat_values (7,1), mat_val_return(1) |
70 |
INTEGER indices_col(7) |
71 |
INTEGER local_dofs, global_dofs, dof_index, dof_index_col |
72 |
INTEGER local_offset |
73 |
INTEGER iters |
74 |
INTEGER color, rank |
75 |
PC subpc |
76 |
KSP subksp(1) |
77 |
Vec rhs |
78 |
Vec solution |
79 |
PetscErrorCode ierr |
80 |
#ifdef ALLOW_USE_MPI |
81 |
integer mpiRC, mpiMyWid |
82 |
#endif |
83 |
#endif |
84 |
|
85 |
|
86 |
! print *, "GOT HERE CG3D", myByLo(mythid), myByHi(myThid), |
87 |
! & myBxLo(mythid), myBxHi(myThid) |
88 |
! RETURN |
89 |
|
90 |
#ifdef ALLOW_PETSC |
91 |
|
92 |
IF ((myIter.eq.1) .OR. (.not.cg3d_petsc_reuse_mat)) THEN |
93 |
cg3d_need2create_mat = .TRUE. |
94 |
ELSE |
95 |
cg3d_need2create_mat = .FALSE. |
96 |
ENDIF |
97 |
|
98 |
IF ((myIter.eq.nTimeSteps) .OR. (.not.cg3d_petsc_reuse_mat)) THEN |
99 |
cg3d_need2destroy_mat = .TRUE. |
100 |
ELSE |
101 |
cg3d_need2destroy_mat = .FALSE. |
102 |
ENDIF |
103 |
|
104 |
|
105 |
#ifdef ALLOW_USE_MPI |
106 |
|
107 |
|
108 |
CALL MPI_COMM_RANK( MPI_COMM_WORLD, mpiMyWId, mpiRC ) |
109 |
local_dofs = cg3d_dofs_process (mpiMyWid) |
110 |
global_dofs = 0 |
111 |
|
112 |
|
113 |
cg3d_dofs_cum_sum(0) = 0 |
114 |
|
115 |
DO i=0,nPx*nPy*MAX_CG3D_PETSC_CPUINVERT-1 |
116 |
IF (i.le.cg3d_petsc_cpuInVert) THEN |
117 |
global_dofs = global_dofs + cg3d_dofs_process (i) |
118 |
if (i.ge.1) then |
119 |
cg3d_dofs_cum_sum(i) = cg3d_dofs_cum_sum(i-1)+ |
120 |
& cg3d_dofs_process(i-1) |
121 |
endif |
122 |
ENDIF |
123 |
ENDDO |
124 |
|
125 |
local_offset = cg3d_dofs_cum_sum(mpimywid) |
126 |
|
127 |
#else |
128 |
|
129 |
local_dofs = cg3d_dofs_process (0) |
130 |
global_dofs = local_dofs |
131 |
local_offset = 0 |
132 |
|
133 |
#endif |
134 |
|
135 |
|
136 |
!---------------------- |
137 |
|
138 |
|
139 |
call VecCreate(PETSC_COMM_WORLD, rhs, ierr) |
140 |
call VecSetSizes(rhs, local_dofs, global_dofs, ierr) |
141 |
call VecSetType(rhs, VECMPI, ierr) |
142 |
|
143 |
call VecCreate(PETSC_COMM_WORLD, solution, ierr) |
144 |
call VecSetSizes(solution, local_dofs, global_dofs, ierr) |
145 |
call VecSetType(solution, VECMPI, ierr) |
146 |
|
147 |
do i=1,local_dofs |
148 |
indices(i) = i-1 + local_offset |
149 |
end do |
150 |
do i=1,Nr*nSx*nSy*sNx*sNy |
151 |
rhs_values (i) = 0. _d 0 |
152 |
solution_values (i) = 0. _d 0 |
153 |
enddo |
154 |
|
155 |
! gather rhs and initial guess values to populate petsc vectors |
156 |
|
157 |
DO bj = myByLo(myThid), myByHi(myThid) |
158 |
DO bi = myBxLo(myThid), myBxHi(myThid) |
159 |
DO j=1,sNy |
160 |
DO i=1,sNx |
161 |
DO k=1,Nr |
162 |
|
163 |
color = INT(cg3d_petsc_color(i,j,k,bi,bj)) |
164 |
rank = cg3d_color_rank (color) |
165 |
dof_index = INT(cg3d_petsc_dofs(i,j,k,bi,bj)) |
166 |
& - local_offset |
167 |
|
168 |
if (dof_index.ge.0 .and. rank.eq.mpimywid) THEN |
169 |
|
170 |
rhs_values(dof_index+1) = cg3d_b(i,j,k,bi,bj) |
171 |
solution_values(dof_index+1) = cg3d_x(i,j,k,bi,bj) |
172 |
|
173 |
endif |
174 |
|
175 |
ENDDO |
176 |
ENDDO |
177 |
ENDDO |
178 |
ENDDO |
179 |
ENDDO |
180 |
|
181 |
call VecSetValues(rhs, local_dofs, indices, rhs_values, |
182 |
& INSERT_VALUES, ierr) |
183 |
call VecAssemblyBegin(rhs, ierr) |
184 |
call VecAssemblyEnd(rhs, ierr) |
185 |
|
186 |
|
187 |
call VecSetValues(solution, local_dofs, indices, |
188 |
& solution_values, INSERT_VALUES, ierr) |
189 |
call VecAssemblyBegin(solution, ierr) |
190 |
call VecAssemblyEnd(solution, ierr) |
191 |
|
192 |
|
193 |
|
194 |
if (cg3d_need2create_mat) then |
195 |
CALL TIMER_START ('CG3D_PETSC_MATCREATE',myThid) |
196 |
|
197 |
! IF USING v3.0 THEN |
198 |
! call MatCreateMPIAIJ (PETSC_COMM_WORLD, |
199 |
call MatCreateAIJ (PETSC_COMM_WORLD, |
200 |
& local_dofs, local_dofs, |
201 |
& global_dofs, global_dofs, |
202 |
& 7, PETSC_NULL_INTEGER, |
203 |
& 7, PETSC_NULL_INTEGER, |
204 |
& matrix_petsc_cg3d, ierr) |
205 |
|
206 |
|
207 |
! populate petsc matrix |
208 |
|
209 |
DO bj = myByLo(myThid), myByHi(myThid) |
210 |
DO bi = myBxLo(myThid), myBxHi(myThid) |
211 |
DO j=1,sNy |
212 |
DO i=1,sNx |
213 |
DO k=1,Nr |
214 |
|
215 |
dof_index = INT(cg3d_petsc_dofs(i,j,k,bi,bj)) |
216 |
! & - local_offset |
217 |
color = INT(cg3d_petsc_color(i,j,k,bi,bj)) |
218 |
rank = cg3d_color_rank (color) |
219 |
|
220 |
IF (dof_index .ge. 0 .and. rank.eq.mpimywid) THEN |
221 |
|
222 |
DO col_iter=1,7 |
223 |
indices_col(col_iter) = 0 |
224 |
mat_values(col_iter,1) = 0. _d 0 |
225 |
ENDDO |
226 |
col_iter = 0 |
227 |
|
228 |
! ASSUME THE STENCIL |
229 |
! ac3d (i,j,k) --> 1 |
230 |
! aw3d (i,j,k) --> 2 |
231 |
! as3d (i,j,k) --> 3 |
232 |
! av3d (i,j,k) --> 4 |
233 |
! aw3d (i+1,j,k) --> 5 |
234 |
! as3d (i,j+1,k) --> 6 |
235 |
! av3d (i,j,k+1) --> 7 |
236 |
|
237 |
dof_index_col = cg3d_petsc_dofs(i,j,k,bi,bj) |
238 |
if (dof_index_col.ge.0) then |
239 |
col_iter = col_iter + 1 |
240 |
mat_values(col_iter,1) = ac3d(i,j,k,bi,bj) |
241 |
indices_col(col_iter) = dof_index_col |
242 |
endif |
243 |
|
244 |
dof_index_col = cg3d_petsc_dofs(i-1,j,k,bi,bj) |
245 |
if (dof_index_col.ge.0) then |
246 |
col_iter = col_iter + 1 |
247 |
mat_values(col_iter,1) = aw3d(i,j,k,bi,bj) |
248 |
indices_col(col_iter) = dof_index_col |
249 |
endif |
250 |
|
251 |
dof_index_col = cg3d_petsc_dofs(i,j-1,k,bi,bj) |
252 |
if (dof_index_col.ge.0) then |
253 |
col_iter = col_iter + 1 |
254 |
mat_values(col_iter,1) = as3d(i,j,k,bi,bj) |
255 |
indices_col(col_iter) = dof_index_col |
256 |
endif |
257 |
|
258 |
if (k.gt.1) then |
259 |
dof_index_col = cg3d_petsc_dofs(i,j,k-1,bi,bj) |
260 |
if (dof_index_col.ge.0) then |
261 |
col_iter = col_iter + 1 |
262 |
mat_values(col_iter,1) = av3d(i,j,k,bi,bj) |
263 |
indices_col(col_iter) = dof_index_col |
264 |
endif |
265 |
endif |
266 |
|
267 |
dof_index_col = cg3d_petsc_dofs(i+1,j,k,bi,bj) |
268 |
if (dof_index_col.ge.0) then |
269 |
col_iter = col_iter + 1 |
270 |
mat_values(col_iter,1) = aw3d(i+1,j,k,bi,bj) |
271 |
indices_col(col_iter) = dof_index_col |
272 |
endif |
273 |
|
274 |
dof_index_col = cg3d_petsc_dofs(i,j+1,k,bi,bj) |
275 |
if (dof_index_col.ge.0) then |
276 |
col_iter = col_iter + 1 |
277 |
mat_values(col_iter,1) = as3d(i,j+1,k,bi,bj) |
278 |
indices_col(col_iter) = dof_index_col |
279 |
endif |
280 |
|
281 |
if (k.lt.Nr) then |
282 |
dof_index_col = cg3d_petsc_dofs(i,j,k+1,bi,bj) |
283 |
if (dof_index_col.ge.0) then |
284 |
col_iter = col_iter + 1 |
285 |
mat_values(col_iter,1) = av3d(i,j,k+1,bi,bj) |
286 |
indices_col(col_iter) = dof_index_col |
287 |
endif |
288 |
endif |
289 |
|
290 |
|
291 |
call matSetValues (matrix_petsc_cg3d, 1, dof_index, col_iter, |
292 |
& indices_col, |
293 |
& mat_values,INSERT_VALUES,ierr) |
294 |
|
295 |
|
296 |
ENDIF |
297 |
|
298 |
|
299 |
ENDDO |
300 |
ENDDO |
301 |
ENDDO |
302 |
ENDDO |
303 |
ENDDO |
304 |
|
305 |
call MatAssemblyBegin(matrix_petsc_cg3d,MAT_FINAL_ASSEMBLY,ierr) |
306 |
call MatAssemblyEnd(matrix_petsc_cg3d,MAT_FINAL_ASSEMBLY,ierr) |
307 |
|
308 |
|
309 |
call KSPCreate(PETSC_COMM_WORLD, ksp_cg3d, ierr) |
310 |
call KSPSetOperators(ksp_cg3d, |
311 |
& matrix_petsc_cg3d, matrix_petsc_cg3d, |
312 |
& DIFFERENT_NONZERO_PATTERN, ierr) |
313 |
|
314 |
IF (CG3D_PETSC_PRECOND_TYPE.eq.'MUMPS') then |
315 |
call KSPSetType(ksp_cg3d,KSPPREONLY,ierr) |
316 |
ELSE |
317 |
SELECT CASE (CG3D_PETSC_SOLVER_TYPE) |
318 |
CASE ('CG') |
319 |
PRINT *, "PETSC SOLVER: SELECTED CG" |
320 |
call KSPSetType(ksp_cg3d, KSPCG, ierr) |
321 |
CASE ('GMRES') |
322 |
PRINT *, "PETSC SOLVER: SELECTED GMRES" |
323 |
call KSPSetType(ksp_cg3d, KSPGMRES, ierr) |
324 |
CASE ('BICG') |
325 |
PRINT *, "PETSC SOLVER: SELECTED BICG" |
326 |
call KSPSetType(ksp_cg3d, KSPBICG, ierr) |
327 |
CASE DEFAULT |
328 |
PRINT *, "PETSC SOLVER: SELECTED DEFAULT" |
329 |
call KSPSetType(ksp_cg3d, KSPCG, ierr) |
330 |
END SELECT |
331 |
ENDIF |
332 |
|
333 |
call KSPGetPC(ksp_cg3d, pc_cg3d, ierr) |
334 |
call KSPSetTolerances(ksp_cg3d,tolerance, |
335 |
& PETSC_DEFAULT_DOUBLE_PRECISION, |
336 |
& PETSC_DEFAULT_DOUBLE_PRECISION, |
337 |
& maxiter,ierr) |
338 |
|
339 |
SELECT CASE (CG3D_PETSC_PRECOND_TYPE) |
340 |
CASE ('BLOCKJACOBI') |
341 |
PRINT *, "PETSC PRECOND: SELECTED BJACOBI" |
342 |
call PCSetType(pc_cg3d, PCBJACOBI, ierr) |
343 |
call kspsetup (ksp_cg3d, ierr) |
344 |
call PCBJacobiGetSubKSP (pc_cg3d,PETSC_NULL_INTEGER, |
345 |
& PETSC_NULL_INTEGER, |
346 |
& subksp,ierr); |
347 |
call KSPGetPC (subksp, subpc, ierr) |
348 |
call PCSetType (subpc, PCILU, ierr) |
349 |
! call PCFactorSetLevels(subpc,0,ierr) |
350 |
CASE ('JACOBI') |
351 |
PRINT *, "PETSC PRECOND: SELECTED JACOBI" |
352 |
call PCSetType(pc_cg3d, PCJACOBI, ierr) |
353 |
CASE ('ILU') |
354 |
PRINT *, "PETSC PRECOND: SELECTED ILU" |
355 |
call PCSetType(pc_cg3d, PCILU, ierr) |
356 |
|
357 |
CASE ('GAMG') |
358 |
PRINT *, "PETSC PRECOND: SELECTED GAMG" |
359 |
call PCSetType(pc_cg3d, PCGAMG, ierr) |
360 |
call PCGAMGSetCoarseEqLim(pc_cg3d,10000,ierr) |
361 |
call PCGAMGSetSymGraph(pc_cg3d, PETSC_TRUE,ierr) |
362 |
call PCGAMGSetNSmooths(pc_cg3d, 0,ierr) |
363 |
call PCGAMGSetThreshold(pc_cg3d, .001,ierr) |
364 |
! call PCGAMGSetReuseProl(pc,PETSC_FALSE,ierr) |
365 |
call kspsetup (ksp_cg3d, ierr) |
366 |
|
367 |
CASE ('MUMPS') |
368 |
PRINT *, "PETSC PRECOND: SELECTED MUMPS" |
369 |
call PCSetType(pc_cg3d,PCLU,ierr) |
370 |
call PCFactorSetMatSolverPackage(pc_cg3d,MATSOLVERMUMPS,ierr) |
371 |
call PCFactorSetUpMatSolverPackage(pc_cg3d,ierr) |
372 |
call PCFactorGetMatrix(pc_cg3d,mumpsFac_cg3d,ierr) |
373 |
call MatMumpsSetIcntl(mumpsfac_cg3d,7,2,ierr) |
374 |
call MatMumpsSetIcntl(mumpsfac_cg3d,24,1,ierr) |
375 |
! call MatMumpsSetCntl(mumpsfac,3,2.e-6,ierr) |
376 |
call kspsetup (ksp_cg3d, ierr) |
377 |
|
378 |
CASE DEFAULT |
379 |
PRINT *, "PETSC PRECOND: SELECTED DEFAULT" |
380 |
call PCSetType(pc_cg3d, PCBJACOBI, ierr) |
381 |
END SELECT |
382 |
|
383 |
|
384 |
CALL TIMER_STOP ('CG3D_PETSC_MATCREATE',myThid) |
385 |
endif ! need3create_mat |
386 |
|
387 |
|
388 |
CALL TIMER_START ('CG3D_PETSC_SOLVE',myThid) |
389 |
|
390 |
call KSPSolve(ksp_cg3d, rhs, solution, ierr) |
391 |
|
392 |
|
393 |
CALL TIMER_STOP ('CG3D_PETSC_SOLVE',myThid) |
394 |
|
395 |
call KSPGetIterationNumber(ksp_cg3d,iters,ierr) |
396 |
|
397 |
maxIter = iters |
398 |
|
399 |
call VecGetValues(solution,local_dofs,indices, |
400 |
& solution_values,ierr) |
401 |
|
402 |
DO bj = myByLo(myThid), myByHi(myThid) |
403 |
DO bi = myBxLo(myThid), myBxHi(myThid) |
404 |
DO j=1,sNy |
405 |
DO i=1,sNx |
406 |
DO k=1,Nr |
407 |
|
408 |
dof_index = INT(cg3d_petsc_dofs(i,j,k,bi,bj)) |
409 |
& - local_offset |
410 |
color = INT(cg3d_petsc_color(i,j,k,bi,bj)) |
411 |
rank = cg3d_color_rank (color) |
412 |
if (dof_index.ge.0.and.rank.eq.mpimywid) THEN |
413 |
cg3d_x(i,j,k,bi,bj) = solution_values(dof_index+1) |
414 |
endif |
415 |
|
416 |
ENDDO |
417 |
ENDDO |
418 |
ENDDO |
419 |
ENDDO |
420 |
ENDDO |
421 |
|
422 |
if (cg3d_need2destroy_mat) then |
423 |
call KSPDestroy (ksp_cg3d, ierr) |
424 |
call MatDestroy (matrix_petsc_cg3d, ierr) |
425 |
endif |
426 |
call VecDestroy (rhs, ierr) |
427 |
call VecDestroy (solution, ierr) |
428 |
|
429 |
|
430 |
|
431 |
|
432 |
#endif |
433 |
|
434 |
|
435 |
|
436 |
RETURN |
437 |
END |