| 1 |
jmc |
1.1 |
#! /bin/sh |
| 2 |
|
|
|
| 3 |
jmc |
1.3 |
# $Header: /u/gcmpack/MITgcm_contrib/jmc_script/run_cpl_test.aces,v 1.2 2011/08/01 23:52:33 jmc Exp $ |
| 4 |
jmc |
1.1 |
# $Name: $ |
| 5 |
|
|
|
| 6 |
|
|
#- default: |
| 7 |
|
|
Npr=3 ; NpOc=1 ; |
| 8 |
|
|
#Npr=25; NpOc=12; |
| 9 |
|
|
MTH= |
| 10 |
|
|
MTHo= |
| 11 |
|
|
MTHa= |
| 12 |
|
|
GMKopt='-ieee' |
| 13 |
|
|
#GMKopt= |
| 14 |
|
|
rnkO=1 ; rnkA=`expr $rnkO + $NpOc` |
| 15 |
|
|
|
| 16 |
|
|
#- parse options: |
| 17 |
|
|
if [ $# -ge 1 ] ; then if test $1 = '-mth' ; then |
| 18 |
|
|
MTH='-omp' ; shift |
| 19 |
|
|
if test -f input_ocn/eedata.mth ; then MTHo=$MTH ; fi |
| 20 |
|
|
if test -f input_atm/eedata.mth ; then MTHa=$MTH ; fi |
| 21 |
|
|
fi ; fi |
| 22 |
|
|
if [ $# -ne 1 ] |
| 23 |
|
|
then |
| 24 |
|
|
echo 'Usage:'`basename $0`' [opt] step' |
| 25 |
|
|
echo ' => test coupled set-up on ACES cluster' |
| 26 |
|
|
echo 'opt = -mth : compile and run (if eedata.mth) 2-threads for ocn & atm' |
| 27 |
|
|
echo ' step = 0 : clean all directories' |
| 28 |
|
|
echo ' step = 1 : compile the 3 executables (cpl,ocn,atm)' |
| 29 |
|
|
echo ' step = 2 : copy input files and dir(s)' |
| 30 |
|
|
echo " step = 3 : run with $Npr mpi processes" |
| 31 |
|
|
echo ' step = 4 : check the results' |
| 32 |
|
|
echo ' step = 5 : remove output files in rank_0,1,2 dir.' |
| 33 |
|
|
exit |
| 34 |
|
|
fi |
| 35 |
|
|
kpr=$1 |
| 36 |
|
|
dir=`pwd` |
| 37 |
|
|
|
| 38 |
|
|
#============================================================================ |
| 39 |
|
|
|
| 40 |
|
|
#- only works from a 3 (or more) nodes PBS bach session on ACES cluster |
| 41 |
|
|
# (tested on ao) ( qsub -I -l nodes=3 -or- |
| 42 |
|
|
# qsub -I -l nodes=3:ppn=2 ) |
| 43 |
|
|
# need to be on one computer node, |
| 44 |
|
|
# and with the correct module loaded (i.e, one of the 3): |
| 45 |
|
|
# > module load mpich/gnu |
| 46 |
jmc |
1.3 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-gcc/include' |
| 47 |
jmc |
1.1 |
# > module load mpich/intel netcdf/3.6.1/icc |
| 48 |
jmc |
1.3 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-intel/include' |
| 49 |
jmc |
1.1 |
# > module load mpich/pgi |
| 50 |
jmc |
1.3 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-pgi/include' |
| 51 |
|
|
# export NETCDF_ROOT='/usr/local/pkg/pgi/pgi-5.2/linux86/5.2' |
| 52 |
|
|
|
| 53 |
jmc |
1.1 |
if test 'dummy'$PBS_NODEFILE = 'dummy' |
| 54 |
|
|
then |
| 55 |
|
|
echo ' $PBS_NODEFILE not defined' |
| 56 |
|
|
echo "for now, works only from a (at least) $Npr nodes PBS bach session" |
| 57 |
|
|
exit 8 |
| 58 |
|
|
else nprc=`wc -l $PBS_NODEFILE | awk '{print $1}'` |
| 59 |
|
|
if [ $nprc -lt $Npr ] ; then |
| 60 |
|
|
echo ' Nb of nodes from $PBS_NODEFILE =' $nprc |
| 61 |
|
|
echo "for now, works only from a (at least) $Npr nodes PBS bach session" |
| 62 |
|
|
exit 8 |
| 63 |
|
|
fi |
| 64 |
|
|
fi |
| 65 |
jmc |
1.2 |
umask 0022 |
| 66 |
jmc |
1.1 |
|
| 67 |
|
|
#============================================================================ |
| 68 |
|
|
|
| 69 |
|
|
if test $kpr = 0 |
| 70 |
|
|
then |
| 71 |
|
|
rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} |
| 72 |
|
|
rm -f build_???/TTT.*make.* build_???/TTT.mkdepend.* |
| 73 |
|
|
/bin/rm -r -f rank_? rank_1? rank_2? |
| 74 |
|
|
if test -f build_cpl/Makefile ; then cd build_cpl ; make Clean ; cd .. ; fi |
| 75 |
|
|
if test -f build_ocn/Makefile ; then cd build_ocn ; make Clean ; cd .. ; fi |
| 76 |
|
|
if test -f build_atm/Makefile ; then cd build_atm ; make Clean ; cd .. ; fi |
| 77 |
|
|
fi |
| 78 |
|
|
if test $kpr = 5 |
| 79 |
|
|
then |
| 80 |
|
|
echo 'remove output files in rank_0,1,2 dir.' |
| 81 |
|
|
rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} |
| 82 |
|
|
if test -d rank_0 ; then cd rank_0 ; rm -f Coupler.0000.clog ; cd .. ; fi |
| 83 |
|
|
if test -d rank_$rnkO |
| 84 |
|
|
then cd rank_$rnkO ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi |
| 85 |
|
|
if test -d rank_$rnkA |
| 86 |
|
|
then cd rank_$rnkA ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi |
| 87 |
|
|
fi |
| 88 |
|
|
|
| 89 |
|
|
if test $kpr = 1 |
| 90 |
|
|
then |
| 91 |
|
|
|
| 92 |
|
|
#- find the optfile that corresponds to the currently loaded mpich module |
| 93 |
|
|
. /etc/profile.d/modules.sh |
| 94 |
|
|
module list -t >& tmp_compiler |
| 95 |
|
|
compiler=`sed -n 's/mpich.*\///p' tmp_compiler` |
| 96 |
|
|
case $compiler in |
| 97 |
jmc |
1.3 |
'gnu') OPTFILE="../../tools/build_options/linux_ia32_g77" ;; |
| 98 |
|
|
'intel') OPTFILE="../../tools/build_options/linux_ia32_ifort" ;; |
| 99 |
|
|
'pgi') OPTFILE="../../tools/build_options/linux_ia32_pgf77" ;; |
| 100 |
jmc |
1.1 |
*) echo 'ERROR: cannot use compiler:' $compiler ; |
| 101 |
|
|
echo ""; cat tmp_compiler ; exit 7 ;; |
| 102 |
|
|
esac |
| 103 |
|
|
rm -f tmp_compiler |
| 104 |
|
|
echo " Using optfile: $OPTFILE (compiler=$compiler) $MTH" |
| 105 |
|
|
zz=`echo $OPTFILE | grep -c '^\/'` |
| 106 |
|
|
if test $zz = 0 ; then OPTFILE="../$OPTFILE" ; fi |
| 107 |
|
|
#--- |
| 108 |
|
|
|
| 109 |
|
|
echo '==== compile coupler:' |
| 110 |
|
|
cd build_cpl |
| 111 |
|
|
echo ' --- genmake2 (cpl):' |
| 112 |
|
|
../../../tools/genmake2 -of $OPTFILE -mpi $GMKopt > TTT.genmake.$$ |
| 113 |
|
|
tail -5 TTT.genmake.$$ |
| 114 |
|
|
echo ' --- make depend (cpl):' |
| 115 |
|
|
make depend > TTT.mkdepend.$$ |
| 116 |
|
|
tail -5 TTT.mkdepend.$$ |
| 117 |
|
|
echo ' --- make (cpl):' |
| 118 |
|
|
make > TTT.make.$$ 2>&1 |
| 119 |
|
|
tail -10 TTT.make.$$ |
| 120 |
|
|
echo ' ' ; cd $dir |
| 121 |
|
|
|
| 122 |
|
|
echo '==== compile OGCM:' |
| 123 |
|
|
cd build_ocn |
| 124 |
|
|
echo ' --- genmake2 (ocn):' |
| 125 |
|
|
../../../tools/genmake2 -of $OPTFILE -mpi $MTHo $GMKopt > TTT.genmake.$$ |
| 126 |
|
|
tail -5 TTT.genmake.$$ |
| 127 |
|
|
echo ' --- make depend (ocn):' |
| 128 |
|
|
make depend > TTT.mkdepend.$$ |
| 129 |
|
|
tail -10 TTT.mkdepend.$$ |
| 130 |
|
|
echo ' --- make (ocn):' |
| 131 |
|
|
make > TTT.make.$$ 2>&1 |
| 132 |
|
|
tail -10 TTT.make.$$ |
| 133 |
|
|
echo ' ' ; cd $dir |
| 134 |
|
|
|
| 135 |
|
|
echo '==== compile AGCM:' |
| 136 |
|
|
cd build_atm |
| 137 |
|
|
echo ' --- genmake2 (atm):' |
| 138 |
|
|
../../../tools/genmake2 -of $OPTFILE -mpi $MTHa $GMKopt > TTT.genmake.$$ |
| 139 |
|
|
tail -5 TTT.genmake.$$ |
| 140 |
|
|
echo ' --- make depend (atm):' |
| 141 |
|
|
make depend > TTT.mkdepend.$$ |
| 142 |
|
|
tail -10 TTT.mkdepend.$$ |
| 143 |
|
|
echo ' --- make (atm):' |
| 144 |
|
|
make > TTT.make.$$ 2>&1 |
| 145 |
|
|
tail -10 TTT.make.$$ |
| 146 |
|
|
echo ' ' ; cd $dir |
| 147 |
|
|
|
| 148 |
|
|
ls -l build_???/mitgcmuv |
| 149 |
|
|
|
| 150 |
|
|
fi |
| 151 |
|
|
|
| 152 |
|
|
if test $kpr = 2 |
| 153 |
|
|
then |
| 154 |
|
|
echo 'rm dir:' rank_? rank_1? rank_2? |
| 155 |
|
|
/bin/rm -r -f rank_? rank_1? rank_2? |
| 156 |
|
|
echo 'Link files from dir:' input_cpl '->' rank_0 |
| 157 |
|
|
mkdir rank_0 |
| 158 |
|
|
( cd rank_0 ; ln -s ../input_cpl/* . ) |
| 159 |
|
|
|
| 160 |
|
|
n=$rnkO |
| 161 |
|
|
echo 'Link files from dir:' input_ocn '->' rank_$n |
| 162 |
|
|
mkdir rank_$n |
| 163 |
|
|
cd rank_$n |
| 164 |
|
|
ln -s ../input_ocn/* . |
| 165 |
|
|
if test -x prepare_run ; then ./prepare_run ; fi |
| 166 |
|
|
if test "x$MTHo" != x ; then |
| 167 |
|
|
echo " MTH run: mv -f eedata.mth eedata" |
| 168 |
|
|
if test -h eedata ; then rm -f eedata ; fi |
| 169 |
|
|
mv -f eedata.mth eedata |
| 170 |
|
|
fi |
| 171 |
|
|
cd $dir |
| 172 |
|
|
n=`expr $n + 1` |
| 173 |
|
|
while [ $n -le $NpOc ] ; do |
| 174 |
|
|
ln -s rank_$rnkO rank_$n |
| 175 |
|
|
n=`expr $n + 1` |
| 176 |
|
|
done |
| 177 |
|
|
|
| 178 |
|
|
n=$rnkA |
| 179 |
|
|
echo 'Link files from dir:' input_atm '->' rank_$n |
| 180 |
|
|
mkdir rank_$n |
| 181 |
|
|
cd rank_$n |
| 182 |
|
|
ln -s ../input_atm/* . |
| 183 |
|
|
if test -x prepare_run ; then ./prepare_run ; fi |
| 184 |
|
|
if test "x$MTHa" != x ; then |
| 185 |
|
|
echo " MTH run: mv -f eedata.mth eedata" |
| 186 |
|
|
if test -h eedata ; then rm -f eedata ; fi |
| 187 |
|
|
mv -f eedata.mth eedata |
| 188 |
|
|
fi |
| 189 |
|
|
cd $dir |
| 190 |
|
|
n=`expr $n + 1` |
| 191 |
|
|
while [ $n -lt $Npr ] ; do |
| 192 |
|
|
ln -s rank_$rnkA rank_$n |
| 193 |
|
|
n=`expr $n + 1` |
| 194 |
|
|
done |
| 195 |
|
|
|
| 196 |
|
|
fi |
| 197 |
|
|
|
| 198 |
|
|
if test $kpr = 3 |
| 199 |
|
|
then |
| 200 |
|
|
ROOTDIR=$dir |
| 201 |
|
|
# rm -f rank_?/pickup*.ckptA.00?.00?.??ta |
| 202 |
|
|
echo $ROOTDIR |
| 203 |
|
|
tmpfil=TTT.$$ |
| 204 |
|
|
|
| 205 |
|
|
#--- running on the same node: |
| 206 |
|
|
list='' ; nc=0; xx=`hostname` |
| 207 |
|
|
while [ $nc -lt $Npr ] ; do list="$list $xx" ; nc=`expr $nc + 1` ; done |
| 208 |
|
|
#-- On darwin cluster node (from qrsh session): |
| 209 |
|
|
#JOB_ID=`qstat | sed -n '3,$ p' | grep " $USER " | awk '{print $1}'` |
| 210 |
|
|
#NODEFILE="/tmp/$JOB_ID.1.darwin/machines" |
| 211 |
|
|
#echo " JOB_ID = '$JOB_ID' ; NODEFILE = '$NODEFILE'" |
| 212 |
|
|
#-- On ACES cluster (in PBS batch job): |
| 213 |
|
|
NODEFILE=$PBS_NODEFILE |
| 214 |
|
|
#--- running on different nodes: |
| 215 |
|
|
ls -l $NODEFILE |
| 216 |
|
|
nprc=`cat $NODEFILE | uniq | wc -l` |
| 217 |
|
|
if [ $nprc -ge $Npr ] ; then |
| 218 |
|
|
list=`cat $NODEFILE | uniq | head -$Npr` |
| 219 |
|
|
else |
| 220 |
|
|
list=`cat $NODEFILE | head -$Npr` |
| 221 |
|
|
fi |
| 222 |
|
|
|
| 223 |
|
|
nc=0; nn=0; dd1=cpl ; |
| 224 |
|
|
rm -f pr_group ; touch pr_group |
| 225 |
|
|
for xx in $list |
| 226 |
|
|
do |
| 227 |
|
|
echo $xx $nn $dir/build_$dd1/mitgcmuv >> pr_group |
| 228 |
|
|
nc=`expr $nc + 1` |
| 229 |
|
|
if [ $nc -le $NpOc ] ; then dd1=ocn ; else dd1=atm ; fi |
| 230 |
|
|
nn=1 |
| 231 |
|
|
done |
| 232 |
jmc |
1.3 |
NpAt=`expr $Npr - 1 - $NpOc` |
| 233 |
|
|
RunOpt="-np 1 ./build_cpl/mitgcmuv" |
| 234 |
|
|
RunOpt="$RunOpt : -np $NpOc ./build_ocn/mitgcmuv" |
| 235 |
|
|
RunOpt="$RunOpt : -np $NpAt ./build_atm/mitgcmuv" |
| 236 |
jmc |
1.1 |
|
| 237 |
|
|
cd $ROOTDIR |
| 238 |
|
|
if test "x$MTH" != x ; then |
| 239 |
|
|
export OMP_NUM_THREADS=2 ; export KMP_STACKSIZE=400m |
| 240 |
|
|
if test "x$MTHo" != x ; then |
| 241 |
|
|
echo -n " run OCN ($MTHo) with $OMP_NUM_THREADS threads ;" |
| 242 |
|
|
fi |
| 243 |
|
|
if test "x$MTHa" != x ; then |
| 244 |
|
|
echo -n " run ATM ($MTHa) with $OMP_NUM_THREADS threads ;" |
| 245 |
|
|
fi |
| 246 |
|
|
echo "" |
| 247 |
|
|
fi |
| 248 |
jmc |
1.3 |
mpich=`which mpirun` |
| 249 |
|
|
echo $mpich | grep 'mpich-mx' > /dev/null 2>&1 |
| 250 |
|
|
mpichmx=$? |
| 251 |
|
|
echo $mpich | grep 'mpich2' > /dev/null 2>&1 |
| 252 |
|
|
mpich2=$? |
| 253 |
|
|
if test $mpich2 == 0 ; then |
| 254 |
|
|
#- with Hydra mpich2 (on baudelaire): |
| 255 |
|
|
echo "execute 'mpirun $RunOpt' :" |
| 256 |
|
|
mpirun $RunOpt > std_outp 2>&1 |
| 257 |
|
|
elif test $mpichmx == 0 ; then |
| 258 |
|
|
#- with mpich-mx (on beagle): |
| 259 |
|
|
echo "execute 'mpirun -pg pr_group -v ./build_cpl/mitgcmuv' :" |
| 260 |
|
|
mpirun -pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
| 261 |
|
|
else |
| 262 |
|
|
# /usr/local/pkg/mpi/mpi-1.2.4..8a-gm-1.5/pgi/bin/mpirun.ch_gm -pg pr_group -wd $ROOTDIR --gm-kill 5 -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
| 263 |
|
|
#- with mpich-1 (on danton, ACES): |
| 264 |
|
|
echo "execute 'mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv' :" |
| 265 |
|
|
mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
| 266 |
|
|
fi |
| 267 |
jmc |
1.1 |
tail -20 std_outp |
| 268 |
|
|
ls -l rank_$rnkO/pickup*.ckptA.001.001.data |
| 269 |
|
|
ls -l rank_$rnkA/pickup*.ckptA.001.001.data |
| 270 |
|
|
|
| 271 |
|
|
fi |
| 272 |
|
|
|
| 273 |
|
|
if test $kpr = 4 |
| 274 |
|
|
then |
| 275 |
jmc |
1.3 |
CompRes="$HOME/bin/comp_res" |
| 276 |
jmc |
1.1 |
|
| 277 |
|
|
if test -f rank_$rnkO/STDOUT.0000 |
| 278 |
|
|
then echo '==> check Ocean output:' |
| 279 |
jmc |
1.3 |
$CompRes rank_$rnkO/STDOUT.0000 results/ocnSTDOUT.0000 |
| 280 |
jmc |
1.1 |
mv -f comp_res.log comp_res.ocn |
| 281 |
|
|
echo ' ' |
| 282 |
|
|
else echo "No Ocean output file in rank_$rnkO" ; fi |
| 283 |
|
|
|
| 284 |
|
|
if test -f rank_$rnkA/STDOUT.0000 |
| 285 |
|
|
then |
| 286 |
|
|
echo '==> check Atmos output:' |
| 287 |
jmc |
1.3 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 |
| 288 |
jmc |
1.1 |
mv -f comp_res.log comp_res.atm |
| 289 |
|
|
echo '==> check Land output:' |
| 290 |
jmc |
1.3 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 L |
| 291 |
jmc |
1.1 |
mv -f comp_res.log comp_res.land |
| 292 |
|
|
echo '==> check thSIce output:' |
| 293 |
jmc |
1.3 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 I |
| 294 |
jmc |
1.1 |
mv -f comp_res.log comp_res.sice |
| 295 |
|
|
echo ' ' |
| 296 |
|
|
else echo "No Atmos output file in rank_$rnkA" ; fi |
| 297 |
|
|
|
| 298 |
|
|
fi |
| 299 |
|
|
|
| 300 |
|
|
exit 0 |