| 1 | #! /bin/sh | 
| 2 |  | 
| 3 | # $Header: /u/gcmpack/MITgcm_contrib/jmc_script/run_cpl_test.aces,v 1.2 2011/08/01 23:52:33 jmc Exp $ | 
| 4 | # $Name:  $ | 
| 5 |  | 
| 6 | #- default: | 
| 7 | Npr=3 ; NpOc=1 ; | 
| 8 | #Npr=25; NpOc=12; | 
| 9 | MTH= | 
| 10 | MTHo= | 
| 11 | MTHa= | 
| 12 | GMKopt='-ieee' | 
| 13 | #GMKopt= | 
| 14 | rnkO=1 ; rnkA=`expr $rnkO + $NpOc` | 
| 15 |  | 
| 16 | #- parse options: | 
| 17 | if [ $# -ge 1 ] ; then if test $1 = '-mth' ; then | 
| 18 | MTH='-omp' ; shift | 
| 19 | if test -f input_ocn/eedata.mth ; then MTHo=$MTH ; fi | 
| 20 | if test -f input_atm/eedata.mth ; then MTHa=$MTH ; fi | 
| 21 | fi ; fi | 
| 22 | if [ $# -ne 1 ] | 
| 23 | then | 
| 24 | echo 'Usage:'`basename $0`' [opt] step' | 
| 25 | echo ' => test coupled set-up on ACES cluster' | 
| 26 | echo 'opt = -mth : compile and run (if eedata.mth) 2-threads for ocn & atm' | 
| 27 | echo ' step = 0 : clean all directories' | 
| 28 | echo ' step = 1 : compile the 3 executables (cpl,ocn,atm)' | 
| 29 | echo ' step = 2 : copy input files and dir(s)' | 
| 30 | echo " step = 3 : run with $Npr mpi processes" | 
| 31 | echo ' step = 4 : check the results' | 
| 32 | echo ' step = 5 : remove output files in rank_0,1,2 dir.' | 
| 33 | exit | 
| 34 | fi | 
| 35 | kpr=$1 | 
| 36 | dir=`pwd` | 
| 37 |  | 
| 38 | #============================================================================ | 
| 39 |  | 
| 40 | #- only works from a 3 (or more) nodes PBS bach session on ACES cluster | 
| 41 | #  (tested on ao)  ( qsub -I -l nodes=3   -or- | 
| 42 | #                    qsub -I -l nodes=3:ppn=2  ) | 
| 43 | # need to be on one computer node, | 
| 44 | #  and with the correct module loaded (i.e, one of the 3): | 
| 45 | # > module load mpich/gnu | 
| 46 | #   export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-gcc/include' | 
| 47 | # > module load mpich/intel netcdf/3.6.1/icc | 
| 48 | #   export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-intel/include' | 
| 49 | # > module load mpich/pgi | 
| 50 | #   export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-pgi/include' | 
| 51 | #   export NETCDF_ROOT='/usr/local/pkg/pgi/pgi-5.2/linux86/5.2' | 
| 52 |  | 
| 53 | if test 'dummy'$PBS_NODEFILE = 'dummy' | 
| 54 | then | 
| 55 | echo ' $PBS_NODEFILE not defined' | 
| 56 | echo "for now, works only from a (at least) $Npr nodes PBS bach session" | 
| 57 | exit 8 | 
| 58 | else nprc=`wc -l $PBS_NODEFILE | awk '{print $1}'` | 
| 59 | if [ $nprc -lt $Npr ] ; then | 
| 60 | echo ' Nb of nodes from $PBS_NODEFILE =' $nprc | 
| 61 | echo "for now, works only from a (at least) $Npr nodes PBS bach session" | 
| 62 | exit 8 | 
| 63 | fi | 
| 64 | fi | 
| 65 | umask 0022 | 
| 66 |  | 
| 67 | #============================================================================ | 
| 68 |  | 
| 69 | if test $kpr = 0 | 
| 70 | then | 
| 71 | rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} | 
| 72 | rm -f build_???/TTT.*make.* build_???/TTT.mkdepend.* | 
| 73 | /bin/rm -r -f rank_? rank_1? rank_2? | 
| 74 | if test -f build_cpl/Makefile ; then cd build_cpl ; make Clean ; cd .. ; fi | 
| 75 | if test -f build_ocn/Makefile ; then cd build_ocn ; make Clean ; cd .. ; fi | 
| 76 | if test -f build_atm/Makefile ; then cd build_atm ; make Clean ; cd .. ; fi | 
| 77 | fi | 
| 78 | if test $kpr = 5 | 
| 79 | then | 
| 80 | echo 'remove output files in rank_0,1,2 dir.' | 
| 81 | rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} | 
| 82 | if test -d rank_0 ; then cd rank_0 ; rm -f Coupler.0000.clog ; cd .. ; fi | 
| 83 | if test -d rank_$rnkO | 
| 84 | then cd rank_$rnkO ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi | 
| 85 | if test -d rank_$rnkA | 
| 86 | then cd rank_$rnkA ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi | 
| 87 | fi | 
| 88 |  | 
| 89 | if test $kpr = 1 | 
| 90 | then | 
| 91 |  | 
| 92 | #- find the optfile that corresponds to the currently loaded mpich module | 
| 93 | . /etc/profile.d/modules.sh | 
| 94 | module list -t >& tmp_compiler | 
| 95 | compiler=`sed -n 's/mpich.*\///p' tmp_compiler` | 
| 96 | case $compiler in | 
| 97 | 'gnu')   OPTFILE="../../tools/build_options/linux_ia32_g77" ;; | 
| 98 | 'intel') OPTFILE="../../tools/build_options/linux_ia32_ifort" ;; | 
| 99 | 'pgi')   OPTFILE="../../tools/build_options/linux_ia32_pgf77" ;; | 
| 100 | *) echo 'ERROR: cannot use compiler:' $compiler ; | 
| 101 | echo ""; cat tmp_compiler ; exit 7 ;; | 
| 102 | esac | 
| 103 | rm -f tmp_compiler | 
| 104 | echo " Using optfile: $OPTFILE  (compiler=$compiler) $MTH" | 
| 105 | zz=`echo $OPTFILE | grep -c '^\/'` | 
| 106 | if test $zz = 0 ; then OPTFILE="../$OPTFILE" ; fi | 
| 107 | #--- | 
| 108 |  | 
| 109 | echo '==== compile coupler:' | 
| 110 | cd build_cpl | 
| 111 | echo ' --- genmake2 (cpl):' | 
| 112 | ../../../tools/genmake2 -of $OPTFILE -mpi $GMKopt >  TTT.genmake.$$ | 
| 113 | tail -5 TTT.genmake.$$ | 
| 114 | echo ' --- make depend (cpl):' | 
| 115 | make depend > TTT.mkdepend.$$ | 
| 116 | tail -5 TTT.mkdepend.$$ | 
| 117 | echo ' --- make (cpl):' | 
| 118 | make > TTT.make.$$ 2>&1 | 
| 119 | tail -10 TTT.make.$$ | 
| 120 | echo ' ' ; cd $dir | 
| 121 |  | 
| 122 | echo '==== compile OGCM:' | 
| 123 | cd build_ocn | 
| 124 | echo ' --- genmake2 (ocn):' | 
| 125 | ../../../tools/genmake2 -of $OPTFILE -mpi $MTHo $GMKopt >  TTT.genmake.$$ | 
| 126 | tail -5 TTT.genmake.$$ | 
| 127 | echo ' --- make depend (ocn):' | 
| 128 | make depend > TTT.mkdepend.$$ | 
| 129 | tail -10 TTT.mkdepend.$$ | 
| 130 | echo ' --- make (ocn):' | 
| 131 | make > TTT.make.$$ 2>&1 | 
| 132 | tail -10 TTT.make.$$ | 
| 133 | echo ' ' ; cd $dir | 
| 134 |  | 
| 135 | echo '==== compile AGCM:' | 
| 136 | cd build_atm | 
| 137 | echo ' --- genmake2 (atm):' | 
| 138 | ../../../tools/genmake2 -of $OPTFILE -mpi $MTHa $GMKopt >  TTT.genmake.$$ | 
| 139 | tail -5 TTT.genmake.$$ | 
| 140 | echo ' --- make depend (atm):' | 
| 141 | make depend > TTT.mkdepend.$$ | 
| 142 | tail -10 TTT.mkdepend.$$ | 
| 143 | echo ' --- make (atm):' | 
| 144 | make > TTT.make.$$ 2>&1 | 
| 145 | tail -10 TTT.make.$$ | 
| 146 | echo ' ' ; cd $dir | 
| 147 |  | 
| 148 | ls -l build_???/mitgcmuv | 
| 149 |  | 
| 150 | fi | 
| 151 |  | 
| 152 | if test $kpr = 2 | 
| 153 | then | 
| 154 | echo 'rm dir:' rank_? rank_1? rank_2? | 
| 155 | /bin/rm -r -f rank_? rank_1? rank_2? | 
| 156 | echo 'Link files from dir:' input_cpl '->' rank_0 | 
| 157 | mkdir rank_0 | 
| 158 | ( cd rank_0 ; ln -s ../input_cpl/* . ) | 
| 159 |  | 
| 160 | n=$rnkO | 
| 161 | echo 'Link files from dir:' input_ocn '->' rank_$n | 
| 162 | mkdir rank_$n | 
| 163 | cd rank_$n | 
| 164 | ln -s ../input_ocn/* . | 
| 165 | if test -x prepare_run ; then ./prepare_run ; fi | 
| 166 | if test "x$MTHo" != x ; then | 
| 167 | echo " MTH run: mv -f eedata.mth eedata" | 
| 168 | if test -h eedata ; then rm -f eedata ; fi | 
| 169 | mv -f eedata.mth eedata | 
| 170 | fi | 
| 171 | cd $dir | 
| 172 | n=`expr $n + 1` | 
| 173 | while [ $n -le $NpOc ] ; do | 
| 174 | ln -s rank_$rnkO rank_$n | 
| 175 | n=`expr $n + 1` | 
| 176 | done | 
| 177 |  | 
| 178 | n=$rnkA | 
| 179 | echo 'Link files from dir:' input_atm '->' rank_$n | 
| 180 | mkdir rank_$n | 
| 181 | cd rank_$n | 
| 182 | ln -s ../input_atm/* . | 
| 183 | if test -x prepare_run ; then ./prepare_run ; fi | 
| 184 | if test "x$MTHa" != x ; then | 
| 185 | echo " MTH run: mv -f eedata.mth eedata" | 
| 186 | if test -h eedata ; then rm -f eedata ; fi | 
| 187 | mv -f eedata.mth eedata | 
| 188 | fi | 
| 189 | cd $dir | 
| 190 | n=`expr $n + 1` | 
| 191 | while [ $n -lt $Npr ] ; do | 
| 192 | ln -s rank_$rnkA rank_$n | 
| 193 | n=`expr $n + 1` | 
| 194 | done | 
| 195 |  | 
| 196 | fi | 
| 197 |  | 
| 198 | if test $kpr = 3 | 
| 199 | then | 
| 200 | ROOTDIR=$dir | 
| 201 | # rm -f rank_?/pickup*.ckptA.00?.00?.??ta | 
| 202 | echo $ROOTDIR | 
| 203 | tmpfil=TTT.$$ | 
| 204 |  | 
| 205 | #--- running on the same node: | 
| 206 | list='' ; nc=0; xx=`hostname` | 
| 207 | while [ $nc -lt $Npr ] ; do list="$list $xx" ; nc=`expr $nc + 1` ; done | 
| 208 | #-- On darwin cluster node (from qrsh session): | 
| 209 | #JOB_ID=`qstat | sed -n '3,$ p' | grep " $USER " | awk '{print $1}'` | 
| 210 | #NODEFILE="/tmp/$JOB_ID.1.darwin/machines" | 
| 211 | #echo " JOB_ID = '$JOB_ID' ; NODEFILE = '$NODEFILE'" | 
| 212 | #-- On ACES cluster (in PBS batch job): | 
| 213 | NODEFILE=$PBS_NODEFILE | 
| 214 | #--- running on different nodes: | 
| 215 | ls -l $NODEFILE | 
| 216 | nprc=`cat $NODEFILE | uniq | wc -l` | 
| 217 | if [ $nprc -ge $Npr ] ; then | 
| 218 | list=`cat $NODEFILE | uniq | head -$Npr` | 
| 219 | else | 
| 220 | list=`cat $NODEFILE | head -$Npr` | 
| 221 | fi | 
| 222 |  | 
| 223 | nc=0; nn=0; dd1=cpl ; | 
| 224 | rm -f pr_group ; touch pr_group | 
| 225 | for xx in $list | 
| 226 | do | 
| 227 | echo $xx $nn $dir/build_$dd1/mitgcmuv >> pr_group | 
| 228 | nc=`expr $nc + 1` | 
| 229 | if [ $nc -le $NpOc ] ; then dd1=ocn ; else dd1=atm ; fi | 
| 230 | nn=1 | 
| 231 | done | 
| 232 | NpAt=`expr $Npr - 1 - $NpOc` | 
| 233 | RunOpt="-np 1 ./build_cpl/mitgcmuv" | 
| 234 | RunOpt="$RunOpt : -np $NpOc ./build_ocn/mitgcmuv" | 
| 235 | RunOpt="$RunOpt : -np $NpAt ./build_atm/mitgcmuv" | 
| 236 |  | 
| 237 | cd $ROOTDIR | 
| 238 | if test "x$MTH" != x ; then | 
| 239 | export OMP_NUM_THREADS=2 ; export KMP_STACKSIZE=400m | 
| 240 | if test "x$MTHo" != x ; then | 
| 241 | echo -n " run OCN ($MTHo) with $OMP_NUM_THREADS threads ;" | 
| 242 | fi | 
| 243 | if test "x$MTHa" != x ; then | 
| 244 | echo -n " run ATM ($MTHa) with $OMP_NUM_THREADS threads ;" | 
| 245 | fi | 
| 246 | echo "" | 
| 247 | fi | 
| 248 | mpich=`which mpirun` | 
| 249 | echo $mpich | grep 'mpich-mx' > /dev/null 2>&1 | 
| 250 | mpichmx=$? | 
| 251 | echo $mpich | grep 'mpich2' > /dev/null 2>&1 | 
| 252 | mpich2=$? | 
| 253 | if test $mpich2 == 0 ; then | 
| 254 | #- with Hydra mpich2 (on baudelaire): | 
| 255 | echo "execute 'mpirun $RunOpt' :" | 
| 256 | mpirun $RunOpt  > std_outp 2>&1 | 
| 257 | elif test $mpichmx == 0 ; then | 
| 258 | #- with mpich-mx (on beagle): | 
| 259 | echo "execute 'mpirun -pg pr_group -v ./build_cpl/mitgcmuv' :" | 
| 260 | mpirun -pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 | 
| 261 | else | 
| 262 | # /usr/local/pkg/mpi/mpi-1.2.4..8a-gm-1.5/pgi/bin/mpirun.ch_gm -pg pr_group -wd $ROOTDIR --gm-kill 5 -v  ./build_cpl/mitgcmuv > std_outp 2>&1 | 
| 263 | #- with mpich-1 (on danton, ACES): | 
| 264 | echo "execute 'mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv' :" | 
| 265 | mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 | 
| 266 | fi | 
| 267 | tail -20 std_outp | 
| 268 | ls -l rank_$rnkO/pickup*.ckptA.001.001.data | 
| 269 | ls -l rank_$rnkA/pickup*.ckptA.001.001.data | 
| 270 |  | 
| 271 | fi | 
| 272 |  | 
| 273 | if test $kpr = 4 | 
| 274 | then | 
| 275 | CompRes="$HOME/bin/comp_res" | 
| 276 |  | 
| 277 | if test -f rank_$rnkO/STDOUT.0000 | 
| 278 | then echo '==> check Ocean output:' | 
| 279 | $CompRes rank_$rnkO/STDOUT.0000 results/ocnSTDOUT.0000 | 
| 280 | mv -f comp_res.log comp_res.ocn | 
| 281 | echo ' ' | 
| 282 | else echo "No Ocean output file in rank_$rnkO" ; fi | 
| 283 |  | 
| 284 | if test -f rank_$rnkA/STDOUT.0000 | 
| 285 | then | 
| 286 | echo '==> check Atmos output:' | 
| 287 | $CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 | 
| 288 | mv -f comp_res.log comp_res.atm | 
| 289 | echo '==> check Land output:' | 
| 290 | $CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 L | 
| 291 | mv -f comp_res.log comp_res.land | 
| 292 | echo '==> check thSIce output:' | 
| 293 | $CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 I | 
| 294 | mv -f comp_res.log comp_res.sice | 
| 295 | echo ' ' | 
| 296 | else echo "No Atmos output file in rank_$rnkA" ; fi | 
| 297 |  | 
| 298 | fi | 
| 299 |  | 
| 300 | exit 0 |