1 |
#! /bin/sh |
2 |
|
3 |
# $Header: /u/gcmpack/MITgcm_contrib/jmc_script/run_cpl_test.aces,v 1.2 2011/08/01 23:52:33 jmc Exp $ |
4 |
# $Name: $ |
5 |
|
6 |
#- default: |
7 |
Npr=3 ; NpOc=1 ; |
8 |
#Npr=25; NpOc=12; |
9 |
MTH= |
10 |
MTHo= |
11 |
MTHa= |
12 |
GMKopt='-ieee' |
13 |
#GMKopt= |
14 |
rnkO=1 ; rnkA=`expr $rnkO + $NpOc` |
15 |
|
16 |
#- parse options: |
17 |
if [ $# -ge 1 ] ; then if test $1 = '-mth' ; then |
18 |
MTH='-omp' ; shift |
19 |
if test -f input_ocn/eedata.mth ; then MTHo=$MTH ; fi |
20 |
if test -f input_atm/eedata.mth ; then MTHa=$MTH ; fi |
21 |
fi ; fi |
22 |
if [ $# -ne 1 ] |
23 |
then |
24 |
echo 'Usage:'`basename $0`' [opt] step' |
25 |
echo ' => test coupled set-up on ACES cluster' |
26 |
echo 'opt = -mth : compile and run (if eedata.mth) 2-threads for ocn & atm' |
27 |
echo ' step = 0 : clean all directories' |
28 |
echo ' step = 1 : compile the 3 executables (cpl,ocn,atm)' |
29 |
echo ' step = 2 : copy input files and dir(s)' |
30 |
echo " step = 3 : run with $Npr mpi processes" |
31 |
echo ' step = 4 : check the results' |
32 |
echo ' step = 5 : remove output files in rank_0,1,2 dir.' |
33 |
exit |
34 |
fi |
35 |
kpr=$1 |
36 |
dir=`pwd` |
37 |
|
38 |
#============================================================================ |
39 |
|
40 |
#- only works from a 3 (or more) nodes PBS bach session on ACES cluster |
41 |
# (tested on ao) ( qsub -I -l nodes=3 -or- |
42 |
# qsub -I -l nodes=3:ppn=2 ) |
43 |
# need to be on one computer node, |
44 |
# and with the correct module loaded (i.e, one of the 3): |
45 |
# > module load mpich/gnu |
46 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-gcc/include' |
47 |
# > module load mpich/intel netcdf/3.6.1/icc |
48 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-intel/include' |
49 |
# > module load mpich/pgi |
50 |
# export MPI_INC_DIR='/usr/local/pkg/mpich/mpich-pgi/include' |
51 |
# export NETCDF_ROOT='/usr/local/pkg/pgi/pgi-5.2/linux86/5.2' |
52 |
|
53 |
if test 'dummy'$PBS_NODEFILE = 'dummy' |
54 |
then |
55 |
echo ' $PBS_NODEFILE not defined' |
56 |
echo "for now, works only from a (at least) $Npr nodes PBS bach session" |
57 |
exit 8 |
58 |
else nprc=`wc -l $PBS_NODEFILE | awk '{print $1}'` |
59 |
if [ $nprc -lt $Npr ] ; then |
60 |
echo ' Nb of nodes from $PBS_NODEFILE =' $nprc |
61 |
echo "for now, works only from a (at least) $Npr nodes PBS bach session" |
62 |
exit 8 |
63 |
fi |
64 |
fi |
65 |
umask 0022 |
66 |
|
67 |
#============================================================================ |
68 |
|
69 |
if test $kpr = 0 |
70 |
then |
71 |
rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} |
72 |
rm -f build_???/TTT.*make.* build_???/TTT.mkdepend.* |
73 |
/bin/rm -r -f rank_? rank_1? rank_2? |
74 |
if test -f build_cpl/Makefile ; then cd build_cpl ; make Clean ; cd .. ; fi |
75 |
if test -f build_ocn/Makefile ; then cd build_ocn ; make Clean ; cd .. ; fi |
76 |
if test -f build_atm/Makefile ; then cd build_atm ; make Clean ; cd .. ; fi |
77 |
fi |
78 |
if test $kpr = 5 |
79 |
then |
80 |
echo 'remove output files in rank_0,1,2 dir.' |
81 |
rm -f pr_group std_outp comp_res.{ocn,atm,land,sice} |
82 |
if test -d rank_0 ; then cd rank_0 ; rm -f Coupler.0000.clog ; cd .. ; fi |
83 |
if test -d rank_$rnkO |
84 |
then cd rank_$rnkO ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi |
85 |
if test -d rank_$rnkA |
86 |
then cd rank_$rnkA ; rm -f *.data *.meta STD???.0000 UV-*.0001.clog ; cd .. ; fi |
87 |
fi |
88 |
|
89 |
if test $kpr = 1 |
90 |
then |
91 |
|
92 |
#- find the optfile that corresponds to the currently loaded mpich module |
93 |
. /etc/profile.d/modules.sh |
94 |
module list -t >& tmp_compiler |
95 |
compiler=`sed -n 's/mpich.*\///p' tmp_compiler` |
96 |
case $compiler in |
97 |
'gnu') OPTFILE="../../tools/build_options/linux_ia32_g77" ;; |
98 |
'intel') OPTFILE="../../tools/build_options/linux_ia32_ifort" ;; |
99 |
'pgi') OPTFILE="../../tools/build_options/linux_ia32_pgf77" ;; |
100 |
*) echo 'ERROR: cannot use compiler:' $compiler ; |
101 |
echo ""; cat tmp_compiler ; exit 7 ;; |
102 |
esac |
103 |
rm -f tmp_compiler |
104 |
echo " Using optfile: $OPTFILE (compiler=$compiler) $MTH" |
105 |
zz=`echo $OPTFILE | grep -c '^\/'` |
106 |
if test $zz = 0 ; then OPTFILE="../$OPTFILE" ; fi |
107 |
#--- |
108 |
|
109 |
echo '==== compile coupler:' |
110 |
cd build_cpl |
111 |
echo ' --- genmake2 (cpl):' |
112 |
../../../tools/genmake2 -of $OPTFILE -mpi $GMKopt > TTT.genmake.$$ |
113 |
tail -5 TTT.genmake.$$ |
114 |
echo ' --- make depend (cpl):' |
115 |
make depend > TTT.mkdepend.$$ |
116 |
tail -5 TTT.mkdepend.$$ |
117 |
echo ' --- make (cpl):' |
118 |
make > TTT.make.$$ 2>&1 |
119 |
tail -10 TTT.make.$$ |
120 |
echo ' ' ; cd $dir |
121 |
|
122 |
echo '==== compile OGCM:' |
123 |
cd build_ocn |
124 |
echo ' --- genmake2 (ocn):' |
125 |
../../../tools/genmake2 -of $OPTFILE -mpi $MTHo $GMKopt > TTT.genmake.$$ |
126 |
tail -5 TTT.genmake.$$ |
127 |
echo ' --- make depend (ocn):' |
128 |
make depend > TTT.mkdepend.$$ |
129 |
tail -10 TTT.mkdepend.$$ |
130 |
echo ' --- make (ocn):' |
131 |
make > TTT.make.$$ 2>&1 |
132 |
tail -10 TTT.make.$$ |
133 |
echo ' ' ; cd $dir |
134 |
|
135 |
echo '==== compile AGCM:' |
136 |
cd build_atm |
137 |
echo ' --- genmake2 (atm):' |
138 |
../../../tools/genmake2 -of $OPTFILE -mpi $MTHa $GMKopt > TTT.genmake.$$ |
139 |
tail -5 TTT.genmake.$$ |
140 |
echo ' --- make depend (atm):' |
141 |
make depend > TTT.mkdepend.$$ |
142 |
tail -10 TTT.mkdepend.$$ |
143 |
echo ' --- make (atm):' |
144 |
make > TTT.make.$$ 2>&1 |
145 |
tail -10 TTT.make.$$ |
146 |
echo ' ' ; cd $dir |
147 |
|
148 |
ls -l build_???/mitgcmuv |
149 |
|
150 |
fi |
151 |
|
152 |
if test $kpr = 2 |
153 |
then |
154 |
echo 'rm dir:' rank_? rank_1? rank_2? |
155 |
/bin/rm -r -f rank_? rank_1? rank_2? |
156 |
echo 'Link files from dir:' input_cpl '->' rank_0 |
157 |
mkdir rank_0 |
158 |
( cd rank_0 ; ln -s ../input_cpl/* . ) |
159 |
|
160 |
n=$rnkO |
161 |
echo 'Link files from dir:' input_ocn '->' rank_$n |
162 |
mkdir rank_$n |
163 |
cd rank_$n |
164 |
ln -s ../input_ocn/* . |
165 |
if test -x prepare_run ; then ./prepare_run ; fi |
166 |
if test "x$MTHo" != x ; then |
167 |
echo " MTH run: mv -f eedata.mth eedata" |
168 |
if test -h eedata ; then rm -f eedata ; fi |
169 |
mv -f eedata.mth eedata |
170 |
fi |
171 |
cd $dir |
172 |
n=`expr $n + 1` |
173 |
while [ $n -le $NpOc ] ; do |
174 |
ln -s rank_$rnkO rank_$n |
175 |
n=`expr $n + 1` |
176 |
done |
177 |
|
178 |
n=$rnkA |
179 |
echo 'Link files from dir:' input_atm '->' rank_$n |
180 |
mkdir rank_$n |
181 |
cd rank_$n |
182 |
ln -s ../input_atm/* . |
183 |
if test -x prepare_run ; then ./prepare_run ; fi |
184 |
if test "x$MTHa" != x ; then |
185 |
echo " MTH run: mv -f eedata.mth eedata" |
186 |
if test -h eedata ; then rm -f eedata ; fi |
187 |
mv -f eedata.mth eedata |
188 |
fi |
189 |
cd $dir |
190 |
n=`expr $n + 1` |
191 |
while [ $n -lt $Npr ] ; do |
192 |
ln -s rank_$rnkA rank_$n |
193 |
n=`expr $n + 1` |
194 |
done |
195 |
|
196 |
fi |
197 |
|
198 |
if test $kpr = 3 |
199 |
then |
200 |
ROOTDIR=$dir |
201 |
# rm -f rank_?/pickup*.ckptA.00?.00?.??ta |
202 |
echo $ROOTDIR |
203 |
tmpfil=TTT.$$ |
204 |
|
205 |
#--- running on the same node: |
206 |
list='' ; nc=0; xx=`hostname` |
207 |
while [ $nc -lt $Npr ] ; do list="$list $xx" ; nc=`expr $nc + 1` ; done |
208 |
#-- On darwin cluster node (from qrsh session): |
209 |
#JOB_ID=`qstat | sed -n '3,$ p' | grep " $USER " | awk '{print $1}'` |
210 |
#NODEFILE="/tmp/$JOB_ID.1.darwin/machines" |
211 |
#echo " JOB_ID = '$JOB_ID' ; NODEFILE = '$NODEFILE'" |
212 |
#-- On ACES cluster (in PBS batch job): |
213 |
NODEFILE=$PBS_NODEFILE |
214 |
#--- running on different nodes: |
215 |
ls -l $NODEFILE |
216 |
nprc=`cat $NODEFILE | uniq | wc -l` |
217 |
if [ $nprc -ge $Npr ] ; then |
218 |
list=`cat $NODEFILE | uniq | head -$Npr` |
219 |
else |
220 |
list=`cat $NODEFILE | head -$Npr` |
221 |
fi |
222 |
|
223 |
nc=0; nn=0; dd1=cpl ; |
224 |
rm -f pr_group ; touch pr_group |
225 |
for xx in $list |
226 |
do |
227 |
echo $xx $nn $dir/build_$dd1/mitgcmuv >> pr_group |
228 |
nc=`expr $nc + 1` |
229 |
if [ $nc -le $NpOc ] ; then dd1=ocn ; else dd1=atm ; fi |
230 |
nn=1 |
231 |
done |
232 |
NpAt=`expr $Npr - 1 - $NpOc` |
233 |
RunOpt="-np 1 ./build_cpl/mitgcmuv" |
234 |
RunOpt="$RunOpt : -np $NpOc ./build_ocn/mitgcmuv" |
235 |
RunOpt="$RunOpt : -np $NpAt ./build_atm/mitgcmuv" |
236 |
|
237 |
cd $ROOTDIR |
238 |
if test "x$MTH" != x ; then |
239 |
export OMP_NUM_THREADS=2 ; export KMP_STACKSIZE=400m |
240 |
if test "x$MTHo" != x ; then |
241 |
echo -n " run OCN ($MTHo) with $OMP_NUM_THREADS threads ;" |
242 |
fi |
243 |
if test "x$MTHa" != x ; then |
244 |
echo -n " run ATM ($MTHa) with $OMP_NUM_THREADS threads ;" |
245 |
fi |
246 |
echo "" |
247 |
fi |
248 |
mpich=`which mpirun` |
249 |
echo $mpich | grep 'mpich-mx' > /dev/null 2>&1 |
250 |
mpichmx=$? |
251 |
echo $mpich | grep 'mpich2' > /dev/null 2>&1 |
252 |
mpich2=$? |
253 |
if test $mpich2 == 0 ; then |
254 |
#- with Hydra mpich2 (on baudelaire): |
255 |
echo "execute 'mpirun $RunOpt' :" |
256 |
mpirun $RunOpt > std_outp 2>&1 |
257 |
elif test $mpichmx == 0 ; then |
258 |
#- with mpich-mx (on beagle): |
259 |
echo "execute 'mpirun -pg pr_group -v ./build_cpl/mitgcmuv' :" |
260 |
mpirun -pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
261 |
else |
262 |
# /usr/local/pkg/mpi/mpi-1.2.4..8a-gm-1.5/pgi/bin/mpirun.ch_gm -pg pr_group -wd $ROOTDIR --gm-kill 5 -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
263 |
#- with mpich-1 (on danton, ACES): |
264 |
echo "execute 'mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv' :" |
265 |
mpirun -p4pg pr_group -v ./build_cpl/mitgcmuv > std_outp 2>&1 |
266 |
fi |
267 |
tail -20 std_outp |
268 |
ls -l rank_$rnkO/pickup*.ckptA.001.001.data |
269 |
ls -l rank_$rnkA/pickup*.ckptA.001.001.data |
270 |
|
271 |
fi |
272 |
|
273 |
if test $kpr = 4 |
274 |
then |
275 |
CompRes="$HOME/bin/comp_res" |
276 |
|
277 |
if test -f rank_$rnkO/STDOUT.0000 |
278 |
then echo '==> check Ocean output:' |
279 |
$CompRes rank_$rnkO/STDOUT.0000 results/ocnSTDOUT.0000 |
280 |
mv -f comp_res.log comp_res.ocn |
281 |
echo ' ' |
282 |
else echo "No Ocean output file in rank_$rnkO" ; fi |
283 |
|
284 |
if test -f rank_$rnkA/STDOUT.0000 |
285 |
then |
286 |
echo '==> check Atmos output:' |
287 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 |
288 |
mv -f comp_res.log comp_res.atm |
289 |
echo '==> check Land output:' |
290 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 L |
291 |
mv -f comp_res.log comp_res.land |
292 |
echo '==> check thSIce output:' |
293 |
$CompRes rank_$rnkA/STDOUT.0000 results/atmSTDOUT.0000 I |
294 |
mv -f comp_res.log comp_res.sice |
295 |
echo ' ' |
296 |
else echo "No Atmos output file in rank_$rnkA" ; fi |
297 |
|
298 |
fi |
299 |
|
300 |
exit 0 |