itd/code/seaice_fgmres.F

C $Header: /u/gcmpack/MITgcm/pkg/seaice/seaice_fgmres.F,v 1.2 2012/10/21 04:34:07 heimbach Exp $
C $Name:  $

#include "SEAICE_OPTIONS.h"

C--   File seaice_fgmres.F: seaice fgmres dynamical (linear) solver S/R:
C--   Contents
C--   o SEAICE_FGMRES_DRIVER
C--   o SEAICE_MAP2VEC
C--   o SEAICE_FGMRES
C--   o SCALPROD

CBOP
C     !ROUTINE: SEAICE_FGMRES_DRIVER
C     !INTERFACE:

      SUBROUTINE SEAICE_FGMRES_DRIVER(
     I     uIceRes, vIceRes, 
     U     duIce, dvIce, 
     U     iCode, 
     I     FGMRESeps,  
     I     newtonIter, krylovIter, myTime, myIter, myThid )

C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE SEAICE_FGMRES_DRIVER
C     | o driver routine for fgmres
C     | o does the conversion between 2D fields and 1D vector
C     |   back and forth
C     *==========================================================*
C     | written by Martin Losch, Oct 2012
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE

C     === Global variables ===
#include "SIZE.h"
#include "EEPARAMS.h"
#include "SEAICE_SIZE.h"
#include "SEAICE_PARAMS.h"

C     !INPUT/OUTPUT PARAMETERS:
C     === Routine arguments ===
C     myTime :: Simulation time
C     myIter :: Simulation timestep number
C     myThid :: my Thread Id. number
C     newtonIter :: current iterate of Newton iteration
C     krylovIter :: current iterate of Newton iteration
C     iCode  :: FGMRES parameter to determine next step
      _RL     myTime
      INTEGER myIter
      INTEGER myThid
      INTEGER newtonIter
      INTEGER krylovIter
      INTEGER iCode
C     FGMRESeps :: tolerance for FGMRES
      _RL FGMRESeps
C     du/vIce   :: solution vector
      _RL duIce(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL dvIce(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
C     u/vIceRes :: residual F(u)
      _RL uIceRes(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL vIceRes(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)

#if ( defined (SEAICE_CGRID) && \
      defined (SEAICE_ALLOW_JFNK) && \
      defined (SEAICE_ALLOW_DYNAMICS) )
C     Local variables:
C     k :: loop indices
      INTEGER k
C     FGMRES parameters
C     n  :: size of the input vector(s)
C     im :: size of Krylov space
C     ifgmres :: interation counter
C     iout :: control output of fgmres
      INTEGER n
      PARAMETER ( n  = 2*sNx*sNy*nSx*nSy )
      INTEGER im
      PARAMETER ( im = 50 )
      INTEGER ifgmres, iout
C     work arrays
      _RL rhs(n), sol(n)
      _RL vv(n,im+1), w(n,im)
      _RL wk1(n), wk2(n)
C     need to store some of the fgmres parameters and fields so that
C     they are not forgotten between Krylov iterations
      COMMON /FGMRES_I/ ifgmres
      COMMON /FGMRES_RL/ sol, rhs, vv, w
CEOP

C     iout=1 give a little bit of output
      iout=1

C     For now, let only the master thread do all the work
C     - copy from 2D arrays to 1D-vector
C     - perform fgmres step (including global sums)
C     - copy from 1D-vector to 2D arrays
C     not sure if this works properly

      _BEGIN_MASTER ( myThid )
      IF ( iCode .EQ. 0 ) THEN
C     first guess is zero because it is a correction
C     wk2 needs to reset for iCode = 0, because it contains
C     remains of the previous Krylov iteration
       DO k=1,n
        sol(k) = 0. _d 0
        wk2(k) = 0. _d 0
       ENDDO
      ELSEIF ( iCode .EQ. 3 ) THEN
       CALL SEAICE_MAP2VEC(n,uIceRes,vIceRes,rhs,.TRUE.,myThid)
C     change sign because we are solving J*u = -F
       DO k=1,n
        rhs(k) = -rhs(k)
       ENDDO
      ELSE
C     map preconditioner results or Jacobian times vector, 
C     stored in du/vIce to wk2
       CALL SEAICE_MAP2VEC(n,duIce,dvIce,wk2,.TRUE.,myThid)
      ENDIF
C     
      CALL SEAICE_FGMRES (n,im,rhs,sol,ifgmres,vv,w,wk1,wk2, 
     &     FGMRESeps,SEAICEkrylovIterMax,
     &     iout,icode,krylovIter,myThid) 
C     
      IF ( iCode .EQ. 0 ) THEN
C     map sol(ution) vector to du/vIce
       CALL SEAICE_MAP2VEC(n,duIce,dvIce,sol,.FALSE.,myThid)
      ELSE
C     map work vector to du/vIce to either compute a preconditioner
C     solution (wk1=rhs) or a Jacobian times wk1
       CALL SEAICE_MAP2VEC(n,duIce,dvIce,wk1,.FALSE.,myThid)
      ENDIF
      _END_MASTER ( myThid )
        
C     Fill overlaps in updated fields
      CALL EXCH_UV_XY_RL( duIce, dvIce,.TRUE.,myThid)

      RETURN
      END

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: SEAICE_MAP2VEC
C     !INTERFACE:

      SUBROUTINE SEAICE_MAP2VEC(
     I     n, 
     O     xfld2d, yfld2d, 
     U     vector, 
     I     map2vec, myThid )

C     !DESCRIPTION: \bv
C     *==========================================================*
C     | SUBROUTINE SEAICE_MAP2VEC
C     | o maps 2 2D-fields to vector and back
C     *==========================================================*
C     | written by Martin Losch, Oct 2012
C     *==========================================================*
C     \ev

C     !USES:
      IMPLICIT NONE

C     === Global variables ===
#include "SIZE.h"
#include "EEPARAMS.h"
C     === Routine arguments ===
      INTEGER n
      LOGICAL map2vec
      INTEGER myThid
      _RL xfld2d (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL yfld2d (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
      _RL vector (n)
C     === local variables ===
      INTEGER I, J, bi, bj
      INTEGER ii, jj, ib, jb, m
CEOP
      
      m = n/2
      IF ( map2vec ) THEN
       DO bj=myByLo(myThid),myByHi(myThid)
        jb = nSx*sNy*sNx*(bj-1)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         ib = jb + sNy*sNx*(bi-1)
         DO J=1,sNy
          jj = ib + sNx*(J-1)
          DO I=1,sNx
           ii = jj + I
           vector(ii)   = xfld2d(I,J,bi,bj)
           vector(ii+m) = yfld2d(I,J,bi,bj)
          ENDDO
         ENDDO
        ENDDO
       ENDDO       
      ELSE
       DO bj=myByLo(myThid),myByHi(myThid)
        jb = nSx*sNy*sNx*(bj-1)
        DO bi=myBxLo(myThid),myBxHi(myThid)
         ib = jb + sNy*sNx*(bi-1)
         DO J=1,sNy
          jj = ib + sNx*(J-1)
          DO I=1,sNx
           ii = jj + I
           xfld2d(I,J,bi,bj) = vector(ii)
           yfld2d(I,J,bi,bj) = vector(ii+m)
          ENDDO
         ENDDO
        ENDDO
       ENDDO       
      ENDIF

      RETURN
      END

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: SEAICE_FGMRES
C     !INTERFACE:

      SUBROUTINE SEAICE_FGMRES (n,im,rhs,sol,i,vv,w,wk1, wk2, 
     &     eps,maxits,iout,icode,its,myThid) 

C-----------------------------------------------------------------------
C mlosch Oct 2012: modified the routine further to be compliant with
C MITgcm standards:
C f90 -> F
C !-comment -> C-comment
C double precision -> _RL
C implicit none
C 
C jfl Dec 1st 2006. We modified the routine so that it is double precison.
C Here are the modifications:
C 1) implicit real (a-h,o-z) becomes implicit real*8 (a-h,o-z) 
C 2) real bocomes real*8
C 3) subroutine scopy.f has been changed for dcopy.f
C 4) subroutine saxpy.f has been changed for daxpy.f
C 5) function sdot.f has been changed for ddot.f
C 6) 1e-08 becomes 1d-08
C
C Be careful with the dcopy, daxpy and ddot code...there is a slight 
C difference with the single precision versions (scopy, saxpy and sdot).
C In the single precision versions, the array are declared sightly differently.
C It is written for single precision:
C
C modified 12/3/93, array(1) declarations changed to array(*)
C-----------------------------------------------------------------------

      implicit none
CML   implicit double precision (a-h,o-z) !jfl modification
      integer myThid
      integer n, im, maxits, iout, icode
      _RL rhs(*), sol(*), vv(n,im+1), w(n,im)
      _RL wk1(n), wk2(n), eps
C-----------------------------------------------------------------------
C flexible GMRES routine. This is a version of GMRES which allows a 
C a variable preconditioner. Implemented with a reverse communication 
C protocole for flexibility -
C DISTRIBUTED VERSION (USES DISTDOT FOR DDOT) 
C explicit (exact) residual norms for restarts  
C written by Y. Saad, modified by A. Malevsky, version February 1, 1995
C-----------------------------------------------------------------------
C This Is A Reverse Communication Implementation. 
C------------------------------------------------- 
C USAGE: (see also comments for icode below). FGMRES
C should be put in a loop and the loop should be active for as
C long as icode is not equal to 0. On return fgmres will
C    1) either be requesting the new preconditioned vector applied
C       to wk1 in case icode.eq.1 (result should be put in wk2) 
C    2) or be requesting the product of A applied to the vector wk1
C       in case icode.eq.2 (result should be put in wk2) 
C    3) or be terminated in case icode .eq. 0. 
C on entry always set icode = 0. So icode should be set back to zero
C upon convergence.
C-----------------------------------------------------------------------
C Here is a typical way of running fgmres: 
C
C      icode = 0
C 1    continue
C      call fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,iout,icode)
C
C      if (icode .eq. 1) then
C         call  precon(n, wk1, wk2)    <--- user variable preconditioning
C         goto 1
C      else if (icode .ge. 2) then
C         call  matvec (n,wk1, wk2)    <--- user matrix vector product. 
C         goto 1
C      else 
C         ----- done ---- 
C         .........
C-----------------------------------------------------------------------
C list of parameters 
C------------------- 
C
C n     == integer. the dimension of the problem
C im    == size of Krylov subspace:  should not exceed 50 in this
C          version (can be reset in code. looking at comment below)
C rhs   == vector of length n containing the right hand side
C sol   == initial guess on input, approximate solution on output
C vv    == work space of size n x (im+1)
C w     == work space of length n x im 
C wk1,
C wk2,  == two work vectors of length n each used for the reverse
C          communication protocole. When on return (icode .ne. 1)
C          the user should call fgmres again with wk2 = precon * wk1
C          and icode untouched. When icode.eq.1 then it means that
C          convergence has taken place.
C          
C eps   == tolerance for stopping criterion. process is stopped
C          as soon as ( ||.|| is the euclidean norm):
C          || current residual||/||initial residual|| <= eps
C
C maxits== maximum number of iterations allowed
C
C iout  == output unit number number for printing intermediate results
C          if (iout .le. 0) no statistics are printed.
C 
C icode = integer. indicator for the reverse communication protocole.
C         ON ENTRY : icode should be set to icode = 0.
C         ON RETURN: 
C       * icode .eq. 1 value means that fgmres has not finished
C         and that it is requesting a preconditioned vector before
C         continuing. The user must compute M**(-1) wk1, where M is
C         the preconditioing  matrix (may vary at each call) and wk1 is
C         the vector as provided by fgmres upun return, and put the 
C         result in wk2. Then fgmres must be called again without
C         changing any other argument. 
C       * icode .eq. 2 value means that fgmres has not finished
C         and that it is requesting a matrix vector product before
C         continuing. The user must compute  A * wk1, where A is the
C         coefficient  matrix and wk1 is the vector provided by 
C         upon return. The result of the operation is to be put in
C         the vector wk2. Then fgmres must be called again without
C         changing any other argument. 
C       * icode .eq. 0 means that fgmres has finished and sol contains 
C         the approximate solution.
C         comment: typically fgmres must be implemented in a loop
C         with fgmres being called as long icode is returned with 
C         a value .ne. 0. 
C-----------------------------------------------------------------------
C     local variables -- !jfl modif
      integer imax 
      parameter ( imax = 50 )
      _RL hh(4*imax+1,4*imax),c(4*imax),s(4*imax)
      _RL rs(4*imax+1),t,ro
C-------------------------------------------------------------
C     arnoldi size should not exceed 50 in this version..
C-------------------------------------------------------------
      integer i, its, i1, ii, j, jj, k, k1!, n1
      _RL r0, gam, epsmac, eps1

CEOP
      save
      data epsmac/1.d-16/
C     
C     computed goto 
C     
      if ( im .gt. imax ) stop 'size of krylov space > 50'
      goto (100,200,300,11) icode +1
 100  continue
CML      n1 = n + 1
      its = 0
C-------------------------------------------------------------
C     **  outer loop starts here..
C--------------compute initial residual vector --------------
C 10   continue
CML      call dcopy (n, sol, 1, wk1, 1) !jfl modification
      do k=1,n
       wk1(k)=sol(k)
      enddo
      icode = 3
      return
 11   continue
      do j=1,n
         vv(j,1) = rhs(j) - wk2(j) 
      enddo
CML 20   ro = ddot(n, vv, 1, vv,1) !jfl modification
 20   call scalprod(n, vv, vv, ro, myThid)
      ro = sqrt(ro)
      if (ro .eq. 0.0d0) goto 999 
      t = 1.0d0/ ro 
      do j=1, n
         vv(j,1) = vv(j,1)*t 
      enddo
      if (its .eq. 0) eps1=eps
      if (its .eq. 0) r0 = ro
      if (iout .gt. 0) write(*, 199) its, ro!&
C           print *,'chau',its, ro !write(iout, 199) its, ro
C     
C     initialize 1-st term  of rhs of hessenberg system..
C     
      rs(1) = ro
      i = 0
 4    i=i+1
      its = its + 1
      i1 = i + 1
      do k=1, n
         wk1(k) = vv(k,i) 
      enddo
C     
C     return
C     
      icode = 1

      return
 200  continue
      do k=1, n
         w(k,i) = wk2(k) 
      enddo
C     
C     call matvec operation
C     
      icode = 2
CML      call dcopy(n, wk2, 1, wk1, 1) !jfl modification
      do k=1,n
       wk1(k)=wk2(k)
      enddo
C
C     return
C     
      return
 300  continue
C     
C     first call to ope corresponds to intialization goto back to 11.
C     
C      if (icode .eq. 3) goto 11
CML      call  dcopy (n, wk2, 1, vv(1,i1), 1) !jfl modification
      do k=1,n
       vv(k,i1)=wk2(k)
      enddo
C     
C     modified gram - schmidt...
C     
      do j=1, i
CML         t = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
         call scalprod(n, vv(1,j), vv(1,i1), t, myThid)
         hh(j,i) = t
CML         call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) !jfl modification
CML      enddo
CML      do j=1, i
CML         t = hh(j,i)
         do k=1,n
          vv(k,i1) = vv(k,i1) - t*vv(k,j)
         enddo
      enddo
CML      t = sqrt(ddot(n, vv(1,i1), 1, vv(1,i1), 1)) !jfl modification
      call scalprod(n, vv(1,i1), vv(1,i1), t, myThid)
      t = sqrt(t)
      hh(i1,i) = t
      if (t .eq. 0.0d0) goto 58
      t = 1.0d0 / t
      do k=1,n
         vv(k,i1) = vv(k,i1)*t
      enddo
C     
C     done with modified gram schimd and arnoldi step. 
C     now  update factorization of hh
C     
 58   if (i .eq. 1) goto 121
C     
C     perfrom previous transformations  on i-th column of h
C     
      do k=2,i
         k1 = k-1
         t = hh(k1,i)
         hh(k1,i) = c(k1)*t + s(k1)*hh(k,i)
         hh(k,i) = -s(k1)*t + c(k1)*hh(k,i)
      enddo
 121  gam = sqrt(hh(i,i)**2 + hh(i1,i)**2)
      if (gam .eq. 0.0d0) gam = epsmac
C-----------#determine next plane rotation  #-------------------
      c(i) = hh(i,i)/gam
      s(i) = hh(i1,i)/gam
      rs(i1) = -s(i)*rs(i)
      rs(i) =  c(i)*rs(i)
C     
C     determine res. norm. and test for convergence-
C     
      hh(i,i) = c(i)*hh(i,i) + s(i)*hh(i1,i)
      ro = abs(rs(i1))
      if (iout .gt. 0) write(*, 199) its, ro
      if (i .lt. im .and. (ro .gt. eps1))  goto 4
C     
C     now compute solution. first solve upper triangular system.
C     
      rs(i) = rs(i)/hh(i,i)
      do ii=2,i
         k=i-ii+1
         k1 = k+1
         t=rs(k)
         do j=k1,i
            t = t-hh(k,j)*rs(j)
         enddo
         rs(k) = t/hh(k,k)
      enddo
C     
C     done with back substitution..
C     now form linear combination to get solution
C     
      do j=1, i
       t = rs(j)
C         call daxpy(n, t, w(1,j), 1, sol,1) !jfl modification
       do k=1,n
        sol(k) = sol(k) + t*w(k,j)
       enddo
      enddo
C     
C     test for return 
C     
      print *, 'ml-fgmres: its, maxits: ', its, maxits, ro, '<', eps1
      if (ro .le. eps1 .or. its .ge. maxits) goto 999
C     
C     else compute residual vector and continue..
C     
C       goto 10

      do j=1,i
        jj = i1-j+1
        rs(jj-1) = -s(jj-1)*rs(jj)
        rs(jj) = c(jj-1)*rs(jj)
      enddo
      do j=1,i1
        t = rs(j)
        if (j .eq. 1)  t = t-1.0d0
CML        call daxpy (n, t, vv(1,j), 1,  vv, 1)
        do k=1,n
         vv(k,1) = vv(k,1) + t*vv(k,j)
        enddo
      enddo
C     
C     restart outer loop.
C     
      goto 20
 999  icode = 0

 199  format('   -- fgmres its =', i4, ' res. norm =', d26.16)
C     
      return 
C-----end-of-fgmres----------------------------------------------------- 
C-----------------------------------------------------------------------
      end

C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
CBOP
C     !ROUTINE: SCALPROD
C     !INTERFACE:

      subroutine scalprod(n,dx,dy,t,myThid)

C
C     forms the dot product of two vectors.
C     uses unrolled loops for increments equal to one.
C     jack dongarra, linpack, 3/11/78.
C     ML: code stolen from BLAS and adapted for parallel applications
C
      implicit none
#include "SIZE.h"
#include "EEPARAMS.h"
#include "EESUPPORT.h"
      integer myThid
      _RL dx(n),dy(n)
      real*8 dtemp
      real*8 t
      integer i,m,mp1,n
#ifdef ALLOW_USE_MPI
      INTEGER mpiRC
#endif   /* ALLOW_USE_MPI */
CEOP
C
      m = mod(n,5)
      dtemp = 0. _d 0
      t     = 0. _d 0
      if( m .eq. 0 ) go to 40
      do i = 1,m
       dtemp = dtemp + dx(i)*dy(i)
      enddo
      if( n .lt. 5 ) go to 60
   40 mp1 = m + 1
      do i = mp1,n,5
       dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + 
     &      dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + 
     &      dx(i + 4)*dy(i + 4)
      enddo
   60 continue
C     sum over all processors
#ifdef ALLOW_USE_MPI
      t = dtemp
      IF ( usingMPI ) THEN
       CALL MPI_Allreduce(t,dtemp,1,MPI_DOUBLE_PRECISION,MPI_SUM,
     &                   MPI_COMM_MODEL,mpiRC)
      ENDIF
#endif /* ALLOW_USE_MPI */
      t = dtemp
      
CML      return
CML      end
CML
CML      subroutine daxpy(n,da,dx,incx,dy,incy)
CMLC                                                                                                                                                
CMLC     constant times a vector plus a vector.                
CMLC     uses unrolled loops for increments equal to one.      
CMLC     jack dongarra, linpack, 3/11/78.                      
CMLC                                                           
CML      _RL dx(n),dy(n),da
CML      integer i,incx,incy,ix,iy,m,mp1,n
CMLC                                                           
CML      if(n.le.0)return
CML      if (da .eq. 0.0d0) return
CML      if(incx.eq.1.and.incy.eq.1)go to 20
CMLC                                                           
CMLC        code for unequal increments or equal increments    
CMLC          not equal to 1                                   
CMLC                                                           
CML      ix = 1
CML      iy = 1
CML      if(incx.lt.0)ix = (-n+1)*incx + 1
CML      if(incy.lt.0)iy = (-n+1)*incy + 1
CML      do 10 i = 1,n
CML        dy(iy) = dy(iy) + da*dx(ix)
CML        ix = ix + incx
CML        iy = iy + incy
CML   10 continue
CML      return
CMLC                                                           
CMLC        code for both increments equal to 1                
CMLC                                                           
CMLC                                                           
CMLC        clean-up loop                                      
CMLC                                                           
CML   20 m = mod(n,4)
CML      if( m .eq. 0 ) go to 40
CML      do 30 i = 1,m
CML        dy(i) = dy(i) + da*dx(i)
CML   30 continue
CML      if( n .lt. 4 ) return
CML   40 mp1 = m + 1
CML      do 50 i = mp1,n,4
CML        dy(i) = dy(i) + da*dx(i)
CML        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
CML        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
CML        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
CML   50 continue
CML      return
CML      end
CML
CML      subroutine  dcopy(n,dx,incx,dy,incy)
CMLC                                                           
CMLC     copies a vector, x, to a vector, y.                   
CMLC     uses unrolled loops for increments equal to one.      
CMLC     jack dongarra, linpack, 3/11/78.                      
CMLC                                                           
CML      _RL dx(n),dy(n)
CML      integer i,incx,incy,ix,iy,m,mp1,n
CMLC                                                           
CML      if(n.le.0)return
CML      if(incx.eq.1.and.incy.eq.1)go to 20
CMLC                                                           
CMLC        code for unequal increments or equal increments    
CMLC          not equal to 1                                   
CMLC                                                           
CML      ix = 1
CML      iy = 1
CML      if(incx.lt.0)ix = (-n+1)*incx + 1
CML      if(incy.lt.0)iy = (-n+1)*incy + 1
CML      do 10 i = 1,n
CML        dy(iy) = dx(ix)
CML        ix = ix + incx
CML        iy = iy + incy
CML   10 continue
CML      return
CMLC                                                           
CMLC        code for both increments equal to 1                
CMLC                                                           
CMLC                                                           
CMLC        clean-up loop                                      
CMLC                                                           
CML   20 m = mod(n,7)
CML      if( m .eq. 0 ) go to 40
CML      do 30 i = 1,m
CML        dy(i) = dx(i)
CML   30 continue
CML      if( n .lt. 7 ) return
CML   40 mp1 = m + 1
CML      do 50 i = mp1,n,7
CML        dy(i) = dx(i)
CML        dy(i + 1) = dx(i + 1)
CML        dy(i + 2) = dx(i + 2)
CML        dy(i + 3) = dx(i + 3)
CML        dy(i + 4) = dx(i + 4)
CML        dy(i + 5) = dx(i + 5)
CML        dy(i + 6) = dx(i + 6)
CML   50 continue

#endif /* SEAICE_ALLOW_DYNAMICS and SEAICE_CGRID and SEAICE_ALLOW_JFNK */

      RETURN
      END
1	torge	1.1	C $Header: /u/gcmpack/MITgcm/pkg/seaice/seaice_fgmres.F,v 1.2 2012/10/21 04:34:07 heimbach Exp $
2			C $Name: $
3
4			#include "SEAICE_OPTIONS.h"
5
6			C-- File seaice_fgmres.F: seaice fgmres dynamical (linear) solver S/R:
7			C-- Contents
8			C-- o SEAICE_FGMRES_DRIVER
9			C-- o SEAICE_MAP2VEC
10			C-- o SEAICE_FGMRES
11			C-- o SCALPROD
12
13			CBOP
14			C !ROUTINE: SEAICE_FGMRES_DRIVER
15			C !INTERFACE:
16
17			SUBROUTINE SEAICE_FGMRES_DRIVER(
18			I uIceRes, vIceRes,
19			U duIce, dvIce,
20			U iCode,
21			I FGMRESeps,
22			I newtonIter, krylovIter, myTime, myIter, myThid )
23
24			C !DESCRIPTION: \bv
25			C ==========================================================
26			C \| SUBROUTINE SEAICE_FGMRES_DRIVER
27			C \| o driver routine for fgmres
28			C \| o does the conversion between 2D fields and 1D vector
29			C \| back and forth
30			C ==========================================================
31			C \| written by Martin Losch, Oct 2012
32			C ==========================================================
33			C \ev
34
35			C !USES:
36			IMPLICIT NONE
37
38			C === Global variables ===
39			#include "SIZE.h"
40			#include "EEPARAMS.h"
41			#include "SEAICE_SIZE.h"
42			#include "SEAICE_PARAMS.h"
43
44			C !INPUT/OUTPUT PARAMETERS:
45			C === Routine arguments ===
46			C myTime :: Simulation time
47			C myIter :: Simulation timestep number
48			C myThid :: my Thread Id. number
49			C newtonIter :: current iterate of Newton iteration
50			C krylovIter :: current iterate of Newton iteration
51			C iCode :: FGMRES parameter to determine next step
52			_RL myTime
53			INTEGER myIter
54			INTEGER myThid
55			INTEGER newtonIter
56			INTEGER krylovIter
57			INTEGER iCode
58			C FGMRESeps :: tolerance for FGMRES
59			_RL FGMRESeps
60			C du/vIce :: solution vector
61			_RL duIce(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
62			_RL dvIce(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
63			C u/vIceRes :: residual F(u)
64			_RL uIceRes(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
65			_RL vIceRes(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
66
67			#if ( defined (SEAICE_CGRID) && \
68			defined (SEAICE_ALLOW_JFNK) && \
69			defined (SEAICE_ALLOW_DYNAMICS) )
70			C Local variables:
71			C k :: loop indices
72			INTEGER k
73			C FGMRES parameters
74			C n :: size of the input vector(s)
75			C im :: size of Krylov space
76			C ifgmres :: interation counter
77			C iout :: control output of fgmres
78			INTEGER n
79			PARAMETER ( n = 2sNxsNynSxnSy )
80			INTEGER im
81			PARAMETER ( im = 50 )
82			INTEGER ifgmres, iout
83			C work arrays
84			_RL rhs(n), sol(n)
85			_RL vv(n,im+1), w(n,im)
86			_RL wk1(n), wk2(n)
87			C need to store some of the fgmres parameters and fields so that
88			C they are not forgotten between Krylov iterations
89			COMMON /FGMRES_I/ ifgmres
90			COMMON /FGMRES_RL/ sol, rhs, vv, w
91			CEOP
92
93			C iout=1 give a little bit of output
94			iout=1
95
96			C For now, let only the master thread do all the work
97			C - copy from 2D arrays to 1D-vector
98			C - perform fgmres step (including global sums)
99			C - copy from 1D-vector to 2D arrays
100			C not sure if this works properly
101
102			_BEGIN_MASTER ( myThid )
103			IF ( iCode .EQ. 0 ) THEN
104			C first guess is zero because it is a correction
105			C wk2 needs to reset for iCode = 0, because it contains
106			C remains of the previous Krylov iteration
107			DO k=1,n
108			sol(k) = 0. _d 0
109			wk2(k) = 0. _d 0
110			ENDDO
111			ELSEIF ( iCode .EQ. 3 ) THEN
112			CALL SEAICE_MAP2VEC(n,uIceRes,vIceRes,rhs,.TRUE.,myThid)
113			C change sign because we are solving J*u = -F
114			DO k=1,n
115			rhs(k) = -rhs(k)
116			ENDDO
117			ELSE
118			C map preconditioner results or Jacobian times vector,
119			C stored in du/vIce to wk2
120			CALL SEAICE_MAP2VEC(n,duIce,dvIce,wk2,.TRUE.,myThid)
121			ENDIF
122			C
123			CALL SEAICE_FGMRES (n,im,rhs,sol,ifgmres,vv,w,wk1,wk2,
124			& FGMRESeps,SEAICEkrylovIterMax,
125			& iout,icode,krylovIter,myThid)
126			C
127			IF ( iCode .EQ. 0 ) THEN
128			C map sol(ution) vector to du/vIce
129			CALL SEAICE_MAP2VEC(n,duIce,dvIce,sol,.FALSE.,myThid)
130			ELSE
131			C map work vector to du/vIce to either compute a preconditioner
132			C solution (wk1=rhs) or a Jacobian times wk1
133			CALL SEAICE_MAP2VEC(n,duIce,dvIce,wk1,.FALSE.,myThid)
134			ENDIF
135			_END_MASTER ( myThid )
136
137			C Fill overlaps in updated fields
138			CALL EXCH_UV_XY_RL( duIce, dvIce,.TRUE.,myThid)
139
140			RETURN
141			END
142
143			C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-\|--+----\|
144			CBOP
145			C !ROUTINE: SEAICE_MAP2VEC
146			C !INTERFACE:
147
148			SUBROUTINE SEAICE_MAP2VEC(
149			I n,
150			O xfld2d, yfld2d,
151			U vector,
152			I map2vec, myThid )
153
154			C !DESCRIPTION: \bv
155			C ==========================================================
156			C \| SUBROUTINE SEAICE_MAP2VEC
157			C \| o maps 2 2D-fields to vector and back
158			C ==========================================================
159			C \| written by Martin Losch, Oct 2012
160			C ==========================================================
161			C \ev
162
163			C !USES:
164			IMPLICIT NONE
165
166			C === Global variables ===
167			#include "SIZE.h"
168			#include "EEPARAMS.h"
169			C === Routine arguments ===
170			INTEGER n
171			LOGICAL map2vec
172			INTEGER myThid
173			_RL xfld2d (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
174			_RL yfld2d (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy)
175			_RL vector (n)
176			C === local variables ===
177			INTEGER I, J, bi, bj
178			INTEGER ii, jj, ib, jb, m
179			CEOP
180
181			m = n/2
182			IF ( map2vec ) THEN
183			DO bj=myByLo(myThid),myByHi(myThid)
184			jb = nSxsNysNx*(bj-1)
185			DO bi=myBxLo(myThid),myBxHi(myThid)
186			ib = jb + sNysNx(bi-1)
187			DO J=1,sNy
188			jj = ib + sNx*(J-1)
189			DO I=1,sNx
190			ii = jj + I
191			vector(ii) = xfld2d(I,J,bi,bj)
192			vector(ii+m) = yfld2d(I,J,bi,bj)
193			ENDDO
194			ENDDO
195			ENDDO
196			ENDDO
197			ELSE
198			DO bj=myByLo(myThid),myByHi(myThid)
199			jb = nSxsNysNx*(bj-1)
200			DO bi=myBxLo(myThid),myBxHi(myThid)
201			ib = jb + sNysNx(bi-1)
202			DO J=1,sNy
203			jj = ib + sNx*(J-1)
204			DO I=1,sNx
205			ii = jj + I
206			xfld2d(I,J,bi,bj) = vector(ii)
207			yfld2d(I,J,bi,bj) = vector(ii+m)
208			ENDDO
209			ENDDO
210			ENDDO
211			ENDDO
212			ENDIF
213
214			RETURN
215			END
216
217			C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-\|--+----\|
218			CBOP
219			C !ROUTINE: SEAICE_FGMRES
220			C !INTERFACE:
221
222			SUBROUTINE SEAICE_FGMRES (n,im,rhs,sol,i,vv,w,wk1, wk2,
223			& eps,maxits,iout,icode,its,myThid)
224
225			C-----------------------------------------------------------------------
226			C mlosch Oct 2012: modified the routine further to be compliant with
227			C MITgcm standards:
228			C f90 -> F
229			C !-comment -> C-comment
230			C double precision -> _RL
231			C implicit none
232			C
233			C jfl Dec 1st 2006. We modified the routine so that it is double precison.
234			C Here are the modifications:
235			C 1) implicit real (a-h,o-z) becomes implicit real*8 (a-h,o-z)
236			C 2) real bocomes real*8
237			C 3) subroutine scopy.f has been changed for dcopy.f
238			C 4) subroutine saxpy.f has been changed for daxpy.f
239			C 5) function sdot.f has been changed for ddot.f
240			C 6) 1e-08 becomes 1d-08
241			C
242			C Be careful with the dcopy, daxpy and ddot code...there is a slight
243			C difference with the single precision versions (scopy, saxpy and sdot).
244			C In the single precision versions, the array are declared sightly differently.
245			C It is written for single precision:
246			C
247			C modified 12/3/93, array(1) declarations changed to array(*)
248			C-----------------------------------------------------------------------
249
250			implicit none
251			CML implicit double precision (a-h,o-z) !jfl modification
252			integer myThid
253			integer n, im, maxits, iout, icode
254			_RL rhs(), sol(), vv(n,im+1), w(n,im)
255			_RL wk1(n), wk2(n), eps
256			C-----------------------------------------------------------------------
257			C flexible GMRES routine. This is a version of GMRES which allows a
258			C a variable preconditioner. Implemented with a reverse communication
259			C protocole for flexibility -
260			C DISTRIBUTED VERSION (USES DISTDOT FOR DDOT)
261			C explicit (exact) residual norms for restarts
262			C written by Y. Saad, modified by A. Malevsky, version February 1, 1995
263			C-----------------------------------------------------------------------
264			C This Is A Reverse Communication Implementation.
265			C-------------------------------------------------
266			C USAGE: (see also comments for icode below). FGMRES
267			C should be put in a loop and the loop should be active for as
268			C long as icode is not equal to 0. On return fgmres will
269			C 1) either be requesting the new preconditioned vector applied
270			C to wk1 in case icode.eq.1 (result should be put in wk2)
271			C 2) or be requesting the product of A applied to the vector wk1
272			C in case icode.eq.2 (result should be put in wk2)
273			C 3) or be terminated in case icode .eq. 0.
274			C on entry always set icode = 0. So icode should be set back to zero
275			C upon convergence.
276			C-----------------------------------------------------------------------
277			C Here is a typical way of running fgmres:
278			C
279			C icode = 0
280			C 1 continue
281			C call fgmres (n,im,rhs,sol,i,vv,w,wk1, wk2,eps,maxits,iout,icode)
282			C
283			C if (icode .eq. 1) then
284			C call precon(n, wk1, wk2) <--- user variable preconditioning
285			C goto 1
286			C else if (icode .ge. 2) then
287			C call matvec (n,wk1, wk2) <--- user matrix vector product.
288			C goto 1
289			C else
290			C ----- done ----
291			C .........
292			C-----------------------------------------------------------------------
293			C list of parameters
294			C-------------------
295			C
296			C n == integer. the dimension of the problem
297			C im == size of Krylov subspace: should not exceed 50 in this
298			C version (can be reset in code. looking at comment below)
299			C rhs == vector of length n containing the right hand side
300			C sol == initial guess on input, approximate solution on output
301			C vv == work space of size n x (im+1)
302			C w == work space of length n x im
303			C wk1,
304			C wk2, == two work vectors of length n each used for the reverse
305			C communication protocole. When on return (icode .ne. 1)
306			C the user should call fgmres again with wk2 = precon * wk1
307			C and icode untouched. When icode.eq.1 then it means that
308			C convergence has taken place.
309			C
310			C eps == tolerance for stopping criterion. process is stopped
311			C as soon as ( \|\|.\|\| is the euclidean norm):
312			C \|\| current residual\|\|/\|\|initial residual\|\| <= eps
313			C
314			C maxits== maximum number of iterations allowed
315			C
316			C iout == output unit number number for printing intermediate results
317			C if (iout .le. 0) no statistics are printed.
318			C
319			C icode = integer. indicator for the reverse communication protocole.
320			C ON ENTRY : icode should be set to icode = 0.
321			C ON RETURN:
322			C * icode .eq. 1 value means that fgmres has not finished
323			C and that it is requesting a preconditioned vector before
324			C continuing. The user must compute M**(-1) wk1, where M is
325			C the preconditioing matrix (may vary at each call) and wk1 is
326			C the vector as provided by fgmres upun return, and put the
327			C result in wk2. Then fgmres must be called again without
328			C changing any other argument.
329			C * icode .eq. 2 value means that fgmres has not finished
330			C and that it is requesting a matrix vector product before
331			C continuing. The user must compute A * wk1, where A is the
332			C coefficient matrix and wk1 is the vector provided by
333			C upon return. The result of the operation is to be put in
334			C the vector wk2. Then fgmres must be called again without
335			C changing any other argument.
336			C * icode .eq. 0 means that fgmres has finished and sol contains
337			C the approximate solution.
338			C comment: typically fgmres must be implemented in a loop
339			C with fgmres being called as long icode is returned with
340			C a value .ne. 0.
341			C-----------------------------------------------------------------------
342			C local variables -- !jfl modif
343			integer imax
344			parameter ( imax = 50 )
345			_RL hh(4imax+1,4imax),c(4imax),s(4imax)
346			_RL rs(4*imax+1),t,ro
347			C-------------------------------------------------------------
348			C arnoldi size should not exceed 50 in this version..
349			C-------------------------------------------------------------
350			integer i, its, i1, ii, j, jj, k, k1!, n1
351			_RL r0, gam, epsmac, eps1
352
353			CEOP
354			save
355			data epsmac/1.d-16/
356			C
357			C computed goto
358			C
359			if ( im .gt. imax ) stop 'size of krylov space > 50'
360			goto (100,200,300,11) icode +1
361			100 continue
362			CML n1 = n + 1
363			its = 0
364			C-------------------------------------------------------------
365			C ** outer loop starts here..
366			C--------------compute initial residual vector --------------
367			C 10 continue
368			CML call dcopy (n, sol, 1, wk1, 1) !jfl modification
369			do k=1,n
370			wk1(k)=sol(k)
371			enddo
372			icode = 3
373			return
374			11 continue
375			do j=1,n
376			vv(j,1) = rhs(j) - wk2(j)
377			enddo
378			CML 20 ro = ddot(n, vv, 1, vv,1) !jfl modification
379			20 call scalprod(n, vv, vv, ro, myThid)
380			ro = sqrt(ro)
381			if (ro .eq. 0.0d0) goto 999
382			t = 1.0d0/ ro
383			do j=1, n
384			vv(j,1) = vv(j,1)*t
385			enddo
386			if (its .eq. 0) eps1=eps
387			if (its .eq. 0) r0 = ro
388			if (iout .gt. 0) write(*, 199) its, ro!&
389			C print *,'chau',its, ro !write(iout, 199) its, ro
390			C
391			C initialize 1-st term of rhs of hessenberg system..
392			C
393			rs(1) = ro
394			i = 0
395			4 i=i+1
396			its = its + 1
397			i1 = i + 1
398			do k=1, n
399			wk1(k) = vv(k,i)
400			enddo
401			C
402			C return
403			C
404			icode = 1
405
406			return
407			200 continue
408			do k=1, n
409			w(k,i) = wk2(k)
410			enddo
411			C
412			C call matvec operation
413			C
414			icode = 2
415			CML call dcopy(n, wk2, 1, wk1, 1) !jfl modification
416			do k=1,n
417			wk1(k)=wk2(k)
418			enddo
419			C
420			C return
421			C
422			return
423			300 continue
424			C
425			C first call to ope corresponds to intialization goto back to 11.
426			C
427			C if (icode .eq. 3) goto 11
428			CML call dcopy (n, wk2, 1, vv(1,i1), 1) !jfl modification
429			do k=1,n
430			vv(k,i1)=wk2(k)
431			enddo
432			C
433			C modified gram - schmidt...
434			C
435			do j=1, i
436			CML t = ddot(n, vv(1,j), 1, vv(1,i1), 1) !jfl modification
437			call scalprod(n, vv(1,j), vv(1,i1), t, myThid)
438			hh(j,i) = t
439			CML call daxpy(n, -t, vv(1,j), 1, vv(1,i1), 1) !jfl modification
440			CML enddo
441			CML do j=1, i
442			CML t = hh(j,i)
443			do k=1,n
444			vv(k,i1) = vv(k,i1) - t*vv(k,j)
445			enddo
446			enddo
447			CML t = sqrt(ddot(n, vv(1,i1), 1, vv(1,i1), 1)) !jfl modification
448			call scalprod(n, vv(1,i1), vv(1,i1), t, myThid)
449			t = sqrt(t)
450			hh(i1,i) = t
451			if (t .eq. 0.0d0) goto 58
452			t = 1.0d0 / t
453			do k=1,n
454			vv(k,i1) = vv(k,i1)*t
455			enddo
456			C
457			C done with modified gram schimd and arnoldi step.
458			C now update factorization of hh
459			C
460			58 if (i .eq. 1) goto 121
461			C
462			C perfrom previous transformations on i-th column of h
463			C
464			do k=2,i
465			k1 = k-1
466			t = hh(k1,i)
467			hh(k1,i) = c(k1)t + s(k1)hh(k,i)
468			hh(k,i) = -s(k1)t + c(k1)hh(k,i)
469			enddo
470			121 gam = sqrt(hh(i,i)2 + hh(i1,i)2)
471			if (gam .eq. 0.0d0) gam = epsmac
472			C-----------#determine next plane rotation #-------------------
473			c(i) = hh(i,i)/gam
474			s(i) = hh(i1,i)/gam
475			rs(i1) = -s(i)*rs(i)
476			rs(i) = c(i)*rs(i)
477			C
478			C determine res. norm. and test for convergence-
479			C
480			hh(i,i) = c(i)hh(i,i) + s(i)hh(i1,i)
481			ro = abs(rs(i1))
482			if (iout .gt. 0) write(*, 199) its, ro
483			if (i .lt. im .and. (ro .gt. eps1)) goto 4
484			C
485			C now compute solution. first solve upper triangular system.
486			C
487			rs(i) = rs(i)/hh(i,i)
488			do ii=2,i
489			k=i-ii+1
490			k1 = k+1
491			t=rs(k)
492			do j=k1,i
493			t = t-hh(k,j)*rs(j)
494			enddo
495			rs(k) = t/hh(k,k)
496			enddo
497			C
498			C done with back substitution..
499			C now form linear combination to get solution
500			C
501			do j=1, i
502			t = rs(j)
503			C call daxpy(n, t, w(1,j), 1, sol,1) !jfl modification
504			do k=1,n
505			sol(k) = sol(k) + t*w(k,j)
506			enddo
507			enddo
508			C
509			C test for return
510			C
511			print *, 'ml-fgmres: its, maxits: ', its, maxits, ro, '<', eps1
512			if (ro .le. eps1 .or. its .ge. maxits) goto 999
513			C
514			C else compute residual vector and continue..
515			C
516			C goto 10
517
518			do j=1,i
519			jj = i1-j+1
520			rs(jj-1) = -s(jj-1)*rs(jj)
521			rs(jj) = c(jj-1)*rs(jj)
522			enddo
523			do j=1,i1
524			t = rs(j)
525			if (j .eq. 1) t = t-1.0d0
526			CML call daxpy (n, t, vv(1,j), 1, vv, 1)
527			do k=1,n
528			vv(k,1) = vv(k,1) + t*vv(k,j)
529			enddo
530			enddo
531			C
532			C restart outer loop.
533			C
534			goto 20
535			999 icode = 0
536
537			199 format(' -- fgmres its =', i4, ' res. norm =', d26.16)
538			C
539			return
540			C-----end-of-fgmres-----------------------------------------------------
541			C-----------------------------------------------------------------------
542			end
543
544			C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-\|--+----\|
545			CBOP
546			C !ROUTINE: SCALPROD
547			C !INTERFACE:
548
549			subroutine scalprod(n,dx,dy,t,myThid)
550
551			C
552			C forms the dot product of two vectors.
553			C uses unrolled loops for increments equal to one.
554			C jack dongarra, linpack, 3/11/78.
555			C ML: code stolen from BLAS and adapted for parallel applications
556			C
557			implicit none
558			#include "SIZE.h"
559			#include "EEPARAMS.h"
560			#include "EESUPPORT.h"
561			integer myThid
562			_RL dx(n),dy(n)
563			real*8 dtemp
564			real*8 t
565			integer i,m,mp1,n
566			#ifdef ALLOW_USE_MPI
567			INTEGER mpiRC
568			#endif /* ALLOW_USE_MPI */
569			CEOP
570			C
571			m = mod(n,5)
572			dtemp = 0. _d 0
573			t = 0. _d 0
574			if( m .eq. 0 ) go to 40
575			do i = 1,m
576			dtemp = dtemp + dx(i)*dy(i)
577			enddo
578			if( n .lt. 5 ) go to 60
579			40 mp1 = m + 1
580			do i = mp1,n,5
581			dtemp = dtemp + dx(i)dy(i) + dx(i + 1)dy(i + 1) +
582			& dx(i + 2)dy(i + 2) + dx(i + 3)dy(i + 3) +
583			& dx(i + 4)*dy(i + 4)
584			enddo
585			60 continue
586			C sum over all processors
587			#ifdef ALLOW_USE_MPI
588			t = dtemp
589			IF ( usingMPI ) THEN
590			CALL MPI_Allreduce(t,dtemp,1,MPI_DOUBLE_PRECISION,MPI_SUM,
591			& MPI_COMM_MODEL,mpiRC)
592			ENDIF
593			#endif /* ALLOW_USE_MPI */
594			t = dtemp
595
596			CML return
597			CML end
598			CML
599			CML subroutine daxpy(n,da,dx,incx,dy,incy)
600			CMLC
601			CMLC constant times a vector plus a vector.
602			CMLC uses unrolled loops for increments equal to one.
603			CMLC jack dongarra, linpack, 3/11/78.
604			CMLC
605			CML _RL dx(n),dy(n),da
606			CML integer i,incx,incy,ix,iy,m,mp1,n
607			CMLC
608			CML if(n.le.0)return
609			CML if (da .eq. 0.0d0) return
610			CML if(incx.eq.1.and.incy.eq.1)go to 20
611			CMLC
612			CMLC code for unequal increments or equal increments
613			CMLC not equal to 1
614			CMLC
615			CML ix = 1
616			CML iy = 1
617			CML if(incx.lt.0)ix = (-n+1)*incx + 1
618			CML if(incy.lt.0)iy = (-n+1)*incy + 1
619			CML do 10 i = 1,n
620			CML dy(iy) = dy(iy) + da*dx(ix)
621			CML ix = ix + incx
622			CML iy = iy + incy
623			CML 10 continue
624			CML return
625			CMLC
626			CMLC code for both increments equal to 1
627			CMLC
628			CMLC
629			CMLC clean-up loop
630			CMLC
631			CML 20 m = mod(n,4)
632			CML if( m .eq. 0 ) go to 40
633			CML do 30 i = 1,m
634			CML dy(i) = dy(i) + da*dx(i)
635			CML 30 continue
636			CML if( n .lt. 4 ) return
637			CML 40 mp1 = m + 1
638			CML do 50 i = mp1,n,4
639			CML dy(i) = dy(i) + da*dx(i)
640			CML dy(i + 1) = dy(i + 1) + da*dx(i + 1)
641			CML dy(i + 2) = dy(i + 2) + da*dx(i + 2)
642			CML dy(i + 3) = dy(i + 3) + da*dx(i + 3)
643			CML 50 continue
644			CML return
645			CML end
646			CML
647			CML subroutine dcopy(n,dx,incx,dy,incy)
648			CMLC
649			CMLC copies a vector, x, to a vector, y.
650			CMLC uses unrolled loops for increments equal to one.
651			CMLC jack dongarra, linpack, 3/11/78.
652			CMLC
653			CML _RL dx(n),dy(n)
654			CML integer i,incx,incy,ix,iy,m,mp1,n
655			CMLC
656			CML if(n.le.0)return
657			CML if(incx.eq.1.and.incy.eq.1)go to 20
658			CMLC
659			CMLC code for unequal increments or equal increments
660			CMLC not equal to 1
661			CMLC
662			CML ix = 1
663			CML iy = 1
664			CML if(incx.lt.0)ix = (-n+1)*incx + 1
665			CML if(incy.lt.0)iy = (-n+1)*incy + 1
666			CML do 10 i = 1,n
667			CML dy(iy) = dx(ix)
668			CML ix = ix + incx
669			CML iy = iy + incy
670			CML 10 continue
671			CML return
672			CMLC
673			CMLC code for both increments equal to 1
674			CMLC
675			CMLC
676			CMLC clean-up loop
677			CMLC
678			CML 20 m = mod(n,7)
679			CML if( m .eq. 0 ) go to 40
680			CML do 30 i = 1,m
681			CML dy(i) = dx(i)
682			CML 30 continue
683			CML if( n .lt. 7 ) return
684			CML 40 mp1 = m + 1
685			CML do 50 i = mp1,n,7
686			CML dy(i) = dx(i)
687			CML dy(i + 1) = dx(i + 1)
688			CML dy(i + 2) = dx(i + 2)
689			CML dy(i + 3) = dx(i + 3)
690			CML dy(i + 4) = dx(i + 4)
691			CML dy(i + 5) = dx(i + 5)
692			CML dy(i + 6) = dx(i + 6)
693			CML 50 continue
694
695			#endif /* SEAICE_ALLOW_DYNAMICS and SEAICE_CGRID and SEAICE_ALLOW_JFNK */
696
697			RETURN
698			END