--- MITgcm/pkg/exf/exf_interp.F 2006/07/01 03:20:33 1.16 +++ MITgcm/pkg/exf/exf_interp.F 2008/01/24 08:29:51 1.23 @@ -1,4 +1,8 @@ +C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/pkg/exf/exf_interp.F,v 1.23 2008/01/24 08:29:51 mlosch Exp $ +C $Name: $ + #include "EXF_OPTIONS.h" + CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC C Flux Coupler using C C Bilinear interpolation of forcing fields C @@ -9,27 +13,34 @@ C C CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC - real*8 function lagran(i,x,a,sp) + _RL FUNCTION LAGRAN(i,x,a,sp) - INTEGER i,k,sp + INTEGER i _RS x - real*8 a(4) - real*8 numer,denom - - numer = 1.D0 - denom = 1.D0 + _RL a(4) + INTEGER sp +C- local variables: + INTEGER k + _RL numer,denom + + numer = 1. _d 0 + denom = 1. _d 0 + +#ifdef TARGET_NEC_SX +!CDIR UNROLL=8 +#endif /* TARGET_NEC_SX */ do k=1,sp - if ( k .ne. i) then + if ( k .ne. i) then denom = denom*(a(i) - a(k)) numer = numer*(x - a(k)) - endif + endif enddo lagran = numer/denom - return - end + RETURN + END SUBROUTINE exf_interp( @@ -43,18 +54,18 @@ implicit none -C infile = name of the input file (direct access binary) -C filePrec = file precicision (currently not used, assumes real*4) -C arrout = output arrays (different for each processor) -C irecord = record number in global file -C xG,yG = coordinates for output grid -C lon_0, lat_0 = lon and lat of sw corner of global input grid -C lon_inc = scalar x-grid increment -C lat_inc = vector y-grid increments -C nx_in, ny_in = input x-grid and y-grid size -C method = 1,11,21 for bilinear; 2,12,22 for bicubic -C 1,2 for tracer; 11,12 for U; 21,22 for V -C mythid = thread id +C infile (string) :: name of the binary input file (direct access) +C filePrec (integer) :: number of bits per word in file (32 or 64) +C arrout ( _RL ) :: output array +C irecord (integer) :: record number to read +C xG,yG :: coordinates for output grid to interpolate to +C lon_0, lat_0 :: lon and lat of sw corner of global input grid +C lon_inc :: scalar x-grid increment +C lat_inc :: vector y-grid increments +C nx_in,ny_in (integer) :: size in x & y direction of input file to read +C method :: 1,11,21 for bilinear; 2,12,22 for bicubic +C :: 1,2 for tracer; 11,12 for U; 21,22 for V +C myThid (integer) :: My Thread Id number C #include "SIZE.h" @@ -65,24 +76,33 @@ character*(*) infile integer filePrec, irecord, nx_in, ny_in _RL arrayout(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) - _RS xG_in (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) + _RS xG_in (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RS yG (1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) _RL lon_0, lon_inc _RL lat_0, lat_inc(ny_in-1) integer method, mythid +C functions + external lagran + _RL lagran + C local variables integer e_ind(snx,sny),w_ind(snx,sny) integer n_ind(snx,sny),s_ind(snx,sny) - real*8 px_ind(4), py_ind(4), ew_val(4) - external lagran - real*8 lagran - real*4 arrayin(-1:nx_in+2 , -1:ny_in+2) - real*8 x_in (-1:nx_in+2), y_in(-1:ny_in+2) - real*8 ninety PARAMETER ( ninety = 90. ) + _RL px_ind(4), py_ind(4), ew_val(4) + _RL arrayin(-1:nx_in+2 , -1:ny_in+2) + _RL NorthValue + _RL x_in (-1:nx_in+2), y_in(-1:ny_in+2) integer i, j, k, l, js, bi, bj, sp, interp_unit +#ifdef TARGET_NEC_SX + integer ic, ii, icnt + integer inx(snx*sny,2) + _RL ew_val1, ew_val2, ew_val3, ew_val4 +#endif _RS xG(1-OLx:sNx+OLx,1-OLy:sNy+OLy,nSx,nSy) - _RS threeSixtyRS, NorthValue + _RL ninety + PARAMETER ( ninety = 90. ) + _RS threeSixtyRS PARAMETER ( threeSixtyRS = 360. ) C put xG in interval [ lon_0 , lon_0+360 [ @@ -102,39 +122,40 @@ I infile, filePrec, O arrayin, I irecord, nx_in, ny_in, mythid) - _BARRIER -C _BEGIN_MASTER( myThid ) - -C setup input grid - do i=-1,nx_in+2 - x_in(i) = lon_0 + (i-1)*lon_inc - enddo +C setup input longitude grid + do i=-1,nx_in+2 + x_in(i) = lon_0 + (i-1)*lon_inc + enddo - y_in(0) = lat_0 - lat_inc(1) - y_in(-1)= lat_0 - 2.*lat_inc(1) - y_in(1) = lat_0 - do j=2,ny_in - y_in(j) = y_in(j-1) + lat_inc(j-1) - enddo -c y_in(ny_in+1) = y_in(ny_in) + lat_inc(ny_in-1) -c y_in(ny_in+2) = y_in(ny_in) + 2.*lat_inc(ny_in-1) - y_in(ny_in+1) = min( y_in(ny_in) + lat_inc(ny_in-1), ninety ) - y_in(ny_in+2) = min( y_in(ny_in) + 2.*lat_inc(ny_in-1), ninety ) +C setup input latitude grid + y_in(0) = lat_0 - lat_inc(1) + y_in(-1)= lat_0 - 2.*lat_inc(1) + y_in(1) = lat_0 + do j=2,ny_in + y_in(j) = y_in(j-1) + lat_inc(j-1) + enddo + do j=ny_in+1,ny_in+2 + if (y_in(j-1).eq.ninety) then + y_in(j) = 2 * ninety - y_in(j-2) + else + y_in(j) = min( y_in(j-1)+lat_inc(ny_in-1), ninety ) + endif + enddo C enlarge boundary - do j=1,ny_in - arrayin(0,j) = arrayin(nx_in,j) - arrayin(-1,j) = arrayin(nx_in-1,j) - arrayin(nx_in+1,j) = arrayin(1,j) - arrayin(nx_in+2,j) = arrayin(2,j) - enddo - do i=-1,nx_in+2 - arrayin(i,0) = arrayin(i,1) - arrayin(i,-1) = arrayin(i,1) - arrayin(i,ny_in+1) = arrayin(i,ny_in) - arrayin(i,ny_in+2) = arrayin(i,ny_in) - enddo + do j=1,ny_in + arrayin(0,j) = arrayin(nx_in,j) + arrayin(-1,j) = arrayin(nx_in-1,j) + arrayin(nx_in+1,j) = arrayin(1,j) + arrayin(nx_in+2,j) = arrayin(2,j) + enddo + do i=-1,nx_in+2 + arrayin(i,0) = arrayin(i,1) + arrayin(i,-1) = arrayin(i,1) + arrayin(i,ny_in+1) = arrayin(i,ny_in) + arrayin(i,ny_in+2) = arrayin(i,ny_in) + enddo C For tracer (method=1,2) set to northernmost zonal-mean value C at 90N to avoid sharp zonal gradients near the Pole. @@ -142,41 +163,25 @@ C gradient at North Pole C For V (method=11,12) set to northernmost zonal value at 90N, C as is already done above in order to allow cross-PoleArctic flow - if (y_in(ny_in+1).eq.ninety) then + do j=ny_in,ny_in+2 + if (y_in(j).eq.ninety) then if (method.eq.1 .or. method.eq.2) then - NorthValue = 0 + NorthValue = 0. do i=1,nx_in - NorthValue = NorthValue + arrayin(i,ny_in) + NorthValue = NorthValue + arrayin(i,j) enddo NorthValue = NorthValue / nx_in do i=-1,nx_in+2 - arrayin(i,ny_in+1) = NorthValue + arrayin(i,j) = NorthValue enddo elseif (method.eq.11 .or. method.eq.12) then do i=-1,nx_in+2 - arrayin(i,ny_in+1) = 0 - enddo - endif - endif - if (y_in(ny_in+2).eq.ninety) then - if (method.eq.1 .or. method.eq.2) then - NorthValue = 0 - do i=1,nx_in - NorthValue = NorthValue + arrayin(i,ny_in) - enddo - NorthValue = NorthValue / nx_in - do i=-1,nx_in+2 - arrayin(i,ny_in+2) = NorthValue - enddo - elseif (method.eq.11 .or. method.eq.12) then - do i=-1,nx_in+2 - arrayin(i,ny_in+2) = 0 + arrayin(i,j) = 0. enddo endif endif + enddo -C _END_MASTER( myThid ) - do bj = mybylo(mythid), mybyhi(mythid) do bi = mybxlo(mythid), mybxhi(mythid) @@ -203,7 +208,7 @@ endif #endif /* ALLOW_DEBUG */ -C compute interpolation indices +C compute interpolation indices do i=1,snx do j=1,sny if (xG(i,j,bi,bj)-x_in(1) .ge. 0.) then @@ -212,6 +217,13 @@ w_ind(i,j) = int((xG(i,j,bi,bj)-x_in(1))/lon_inc) endif e_ind(i,j) = w_ind(i,j) + 1 + enddo + enddo +#ifndef TARGET_NEC_SX +C use the original and more readable variant of the algorithm that +C has unvectorizable while-loops for each (i,j) + do i=1,snx + do j=1,sny js = ny_in*.5 do while (yG(i,j,bi,bj) .lt. y_in(js)) js = (js - 1)*.5 @@ -220,7 +232,63 @@ js = js + 1 enddo s_ind(i,j) = js - n_ind(i,j) = js + 1 + enddo + enddo +#else /* TARGET_NEC_SX defined */ +C this variant vectorizes more efficiently than the original one because +C it moves the while loops out of the i,j loops (loop pushing) but +C it is ugly and incomprehensible + icnt = 0 + do j=1,sny + do i=1,snx + s_ind(i,j) = ny_in*.5 + icnt = icnt+1 + inx(icnt,1) = i + inx(icnt,2) = j + enddo + enddo + do while (icnt .gt. 0) + ii = 0 +!CDIR NODEP + do ic=1,icnt + i = inx(ic,1) + j = inx(ic,2) + if (yG(i,j,bi,bj) .lt. y_in(s_ind(i,j))) then + s_ind(i,j) = (s_ind(i,j) - 1)*.5 + ii = ii+1 + inx(ii,1) = i + inx(ii,2) = j + endif + enddo + icnt = ii + enddo + icnt = 0 + do j=1,sny + do i=1,snx + icnt = icnt+1 + inx(icnt,1) = i + inx(icnt,2) = j + enddo + enddo + do while (icnt .gt. 0) + ii = 0 +!CDIR NODEP + do ic=1,icnt + i = inx(ic,1) + j = inx(ic,2) + if (yG(i,j,bi,bj) .ge. y_in(s_ind(i,j)+1)) then + s_ind(i,j) = s_ind(i,j) + 1 + ii = ii+1 + inx(ii,1) = i + inx(ii,2) = j + endif + enddo + icnt = ii + enddo +#endif /* TARGET_NEC_SX defined */ + do i=1,snx + do j=1,sny + n_ind(i,j) = s_ind(i,j) + 1 enddo enddo @@ -235,6 +303,7 @@ px_ind(l+1) = x_in(w_ind(i,j)+l) py_ind(l+1) = y_in(s_ind(i,j)+l) enddo +#ifndef TARGET_NEC_SX do k=1,2 ew_val(k) = arrayin(w_ind(i,j),s_ind(i,j)+k-1) & *lagran(1,xG(i,j,bi,bj),px_ind,sp) @@ -243,6 +312,19 @@ arrayout(i,j,bi,bj)=arrayout(i,j,bi,bj) & +ew_val(k)*lagran(k,yG(i,j,bi,bj),py_ind,sp) enddo +#else + ew_val1 = arrayin(w_ind(i,j),s_ind(i,j)+1-1) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j),s_ind(i,j)+1-1) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + ew_val2 = arrayin(w_ind(i,j),s_ind(i,j)+2-1) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j),s_ind(i,j)+2-1) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + arrayout(i,j,bi,bj)= + & +ew_val1*lagran(1,yG(i,j,bi,bj),py_ind,sp) + & +ew_val2*lagran(2,yG(i,j,bi,bj),py_ind,sp) +#endif /* TARGET_NEC_SX defined */ enddo enddo elseif (method .eq. 2 .or. method.eq.12 .or. method.eq.22) then @@ -256,6 +338,7 @@ px_ind(l+2) = x_in(w_ind(i,j)+l) py_ind(l+2) = y_in(s_ind(i,j)+l) enddo +#ifndef TARGET_NEC_SX do k=1,4 ew_val(k) = & arrayin(w_ind(i,j)-1,s_ind(i,j)+k-2) @@ -263,12 +346,55 @@ & +arrayin(w_ind(i,j) ,s_ind(i,j)+k-2) & *lagran(2,xG(i,j,bi,bj),px_ind,sp) & +arrayin(e_ind(i,j) ,s_ind(i,j)+k-2) - & *lagran(3,xG(i,j,bi,bj),px_ind,sp) + & *lagran(3,xG(i,j,bi,bj),px_ind,sp) & +arrayin(e_ind(i,j)+1,s_ind(i,j)+k-2) & *lagran(4,xG(i,j,bi,bj),px_ind,sp) - arrayout(i,j,bi,bj)=arrayout(i,j,bi,bj) + arrayout(i,j,bi,bj)=arrayout(i,j,bi,bj) & +ew_val(k)*lagran(k,yG(i,j,bi,bj),py_ind,sp) enddo +#else + ew_val1 = + & arrayin(w_ind(i,j)-1,s_ind(i,j)+1-2) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(w_ind(i,j) ,s_ind(i,j)+1-2) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j) ,s_ind(i,j)+1-2) + & *lagran(3,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j)+1,s_ind(i,j)+1-2) + & *lagran(4,xG(i,j,bi,bj),px_ind,sp) + ew_val2 = + & arrayin(w_ind(i,j)-1,s_ind(i,j)+2-2) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(w_ind(i,j) ,s_ind(i,j)+2-2) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j) ,s_ind(i,j)+2-2) + & *lagran(3,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j)+1,s_ind(i,j)+2-2) + & *lagran(4,xG(i,j,bi,bj),px_ind,sp) + ew_val3 = + & arrayin(w_ind(i,j)-1,s_ind(i,j)+3-2) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(w_ind(i,j) ,s_ind(i,j)+3-2) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j) ,s_ind(i,j)+3-2) + & *lagran(3,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j)+1,s_ind(i,j)+3-2) + & *lagran(4,xG(i,j,bi,bj),px_ind,sp) + ew_val4 = + & arrayin(w_ind(i,j)-1,s_ind(i,j)+4-2) + & *lagran(1,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(w_ind(i,j) ,s_ind(i,j)+4-2) + & *lagran(2,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j) ,s_ind(i,j)+4-2) + & *lagran(3,xG(i,j,bi,bj),px_ind,sp) + & +arrayin(e_ind(i,j)+1,s_ind(i,j)+4-2) + & *lagran(4,xG(i,j,bi,bj),px_ind,sp) + arrayout(i,j,bi,bj)= + & +ew_val1*lagran(1,yG(i,j,bi,bj),py_ind,sp) + & +ew_val2*lagran(2,yG(i,j,bi,bj),py_ind,sp) + & +ew_val3*lagran(3,yG(i,j,bi,bj),py_ind,sp) + & +ew_val4*lagran(4,yG(i,j,bi,bj),py_ind,sp) +#endif /* TARGET_NEC_SX defined */ enddo enddo else @@ -277,4 +403,5 @@ enddo enddo + RETURN END