/[MITgcm]/MITgcm_contrib/test_scripts/engaging/test_engag_ifc_mpi
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/engaging/test_engag_ifc_mpi

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.23 - (hide annotations) (download)
Thu Oct 19 14:43:00 2023 UTC (20 months, 4 weeks ago) by jmc
Branch: MAIN
Changes since 1.22: +9 -3 lines
check testreport exit value before running restart test

1 jmc 1.1 #!/bin/bash
2 jmc 1.9 #SBATCH -J ifcMpi_tst
3 jmc 1.1 #SBATCH -p sched_mit_hill
4 jmc 1.21 #SBATCH -t 10:00:00
5 jmc 1.1 #SBATCH --mem-per-cpu 4000
6     #SBATCH -N 2
7     #SBATCH --tasks-per-node 4
8 jmc 1.23 # #SBATCH -x node122
9     # #SBATCH -x node[051,052,065,066]
10 jmc 1.9 #SBATCH -e /home/jm_c/test_engaging/output/ifcMpi_tst.stderr
11     #SBATCH -o /home/jm_c/test_engaging/output/ifcMpi_tst.stdout
12 jmc 1.15 #SBATCH --no-requeue
13 jmc 1.1
14 jmc 1.23 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/engaging/test_engag_ifc_mpi,v 1.22 2023/02/19 15:02:22 jmc Exp $
15 jmc 1.1 # $Name: $
16    
17 jmc 1.22 if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi
18 jmc 1.1 # Note: added "ulimit -s unlimited" in file "~/.bashrc"
19     # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI
20    
21     umask 0022
22     #- to get case insensitive "ls" (and order of tested experiments)
23     export LC_ALL="en_US.UTF-8"
24     echo " running on: "`hostname`
25 jmc 1.22 headNode='eofe8'
26 jmc 1.1
27     dNam='engaging'
28 jmc 1.22 HERE="$HOME/test_${dNam}"
29 jmc 1.1 OUTP="$HERE/output"; SavD="$HERE/send"
30 jmc 1.22 SEND="ssh $headNode $SavD/mpack"
31 jmc 1.1 TST_DIR="/pool001/jm_c/test_$dNam"
32 jmc 1.7 tmpFil="/tmp/"`basename $0`".$$"
33     cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q'
34 jmc 1.9 #- which GitHub repository to use and how to access it:
35 jmc 1.10 git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other'
36 jmc 1.7 #git_repo='altMITgcm'; #git_code='MITgcm66h'
37 jmc 1.9 #--
38     git_repo="https://github.com/$git_repo"
39     #git_repo="git://github.com/$git_repo"
40     #git_repo="git@github.com:$git_repo"
41 jmc 1.1
42 jmc 1.18 dblTr=0 ; typ='' ; addExp='' ; skipExp=''
43     sfx='ifcMpi'; dblTr=1
44 jmc 1.1 addExp='global_oce_cs32 global_oce_llc90'
45     module add slurm
46     module add gcc
47     module add engaging/intel/2013.1.046
48     #export MPI_INC_DIR="$MPI_HOME/include"
49     OPTFILE="../tools/build_options/linux_amd64_ifort+impi"
50     mpiCMD="mpirun -env I_MPI_DEBUG 2 -n TR_NPROC ./mitgcmuv" #- currently not used
51 jmc 1.4 options="$typ -MPI 8"
52 jmc 1.5 #options="-j 4 $options"
53 jmc 1.1 #export OMP_NUM_THREADS=2
54     #export KMP_SLAVE_STACK_SIZE=400m
55     #options="$options -gsl"
56     #export GSL_IEEE_MODE=double-precision,mask-underflow,mask-denormalized
57     ulimit -s unlimited
58     #- need this to get "staf" & "do_make_syntax.sh":
59     export PATH="$PATH:$HOME/bin"
60    
61 jmc 1.7 gcmDIR="MITgcm_$sfx"
62 jmc 1.1 dAlt=`date +%d` ; dAlt=`expr $dAlt % 3`
63     if [ $dAlt -eq 1 ] ; then options="$options -ur4 -match 5" ; fi
64     options="$options -devel"
65 jmc 1.22 if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi
66 jmc 1.1
67     checkOut=1 ; #options="$options -do"
68 jmc 1.18 #options="$options -nc" ; checkOut=1 ; dblTr=0
69     #options="$options -q" ; checkOut=0 ; dblTr=0
70 jmc 1.1
71 jmc 1.7 if test -d $TST_DIR ; then
72     echo "start from TST_DIR='$TST_DIR' at: "`date`
73     else
74     #if test ! -d $TST_DIR ; then mkdir $TST_DIR ; fi
75     #if test ! -d $TST_DIR ; then
76     # echo "ERROR: Can't create directory \"$TST_DIR\""
77     # exit 1
78     #fi
79     #echo "start in new dir TST_DIR='$TST_DIR' at: "`date`
80     echo "ERROR: missing directory \"$TST_DIR\""
81     exit 1
82     fi
83     cd $TST_DIR
84     pwd
85 jmc 1.16 df .
86 jmc 1.7
87 jmc 1.1 NSLOTS=$SLURM_NTASKS
88     THEDATE=`date`
89     echo '********************************************************************************'
90     echo 'Start job '$THEDATE
91     echo 'NSLOTS = '$NSLOTS
92     echo '======= NODELIST ==============================================================='
93     echo $SLURM_NODELIST
94     cat /etc/redhat-release
95     echo '======= env ===================================================================='
96     env | grep SLURM
97     echo '======= modules ================================================================'
98     module list 2>&1
99     echo '================================================================================'
100    
101     #-
102     MPI_MFile="${OUTP}/mf_${sfx}" #- currently not used
103     #mpiCMD="mpirun -hostfile TR_MFILE -n TR_NPROC ./mitgcmuv"
104     #- make the testreport MPI_MFILE:
105     listNODES=`echo $SLURM_NODELIST | sed -e 's/\[/ /' -e 's/\]//' -e 's/,/ /' -e 's/-/ /'`
106     # duplicate the 2 listed nodes into a 6 list file:
107     /bin/rm -f $MPI_MFile ; touch $MPI_MFile
108     for nc in `seq 1 4` ; do pfx=''
109     for nd in $listNODES ; do
110     if test "x$pfx" = x ; then pfx=$nd ; else
111     echo "${pfx}${nd}" >> $MPI_MFile
112     fi
113     done
114     done
115    
116     #- check for disk space: relative space (99%) or absolute (10.G):
117     dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'`
118     if [ $dsp -gt 99 ] ; then
119     #dsp=`df -P . | tail -1 | awk '{print $4}'`
120     #if [ $dsp -le 100000000 ] ; then
121     echo 'Not enough space on this disk => do not run testreport.'
122     df .
123     exit
124     fi
125 jmc 1.10
126     if [ $checkOut -eq 1 ] ; then
127     if test ! -e $gcmDIR/.git/config ; then
128     echo "no file: $gcmDIR/.git/config => try to download a fresh clone"
129     checkOut=2
130     fi
131     if test "x$addExp" != x ; then
132     if test ! -e $gcmDIR/$git_other/.git/config ; then
133     echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone"
134     checkOut=2
135     fi
136     fi
137     fi
138 jmc 1.1 if [ $checkOut -eq 1 ] ; then
139     echo "cleaning output from $gcmDIR/verification :"
140     #- remove previous output tar files and tar & remove previous output-dir
141     /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz
142     ( cd $gcmDIR/verification
143     listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null`
144     for dd in $listD
145     do
146     if test -d $dd ; then
147     tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd
148 jmc 1.7 retVal=$?
149     if test "x$retVal" != x0 ; then
150 jmc 1.1 echo "ERROR in tar+gzip prev outp-dir: $dd"
151 jmc 1.7 echo " on '"`hostname`"' (return val=$retVal) but continue"
152 jmc 1.1 fi
153     fi
154     done )
155     echo "clean tst_2+2 + testreport output (+ Makefile_syntax files)"
156     ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
157     ( cd $gcmDIR/verification ; ./testreport $typ -clean )
158     ( cd $gcmDIR/verification ; rm -f */build/Makefile_syntax )
159     ( cd $gcmDIR/verification ; rm -f */build/port_rand.i */build/ptracers_set_iolabel.i )
160 jmc 1.22 if test "x$addExp" != x ; then
161     ( cd $gcmDIR/verification
162     listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null`
163     echo " + remove local links: $listD"
164     /bin/rm -f $listD
165     )
166     fi
167 jmc 1.10 echo "Update $git_code code in dir $gcmDIR :"
168 jmc 1.12 ( cd $gcmDIR ; git pull ) 2>&1
169 jmc 1.7 retVal=$?
170     if test "x$retVal" != x0 ; then
171     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
172 jmc 1.1 exit
173     fi
174 jmc 1.12 echo " and checkout master:"
175     ( cd $gcmDIR ; git checkout master -- . ) 2>&1
176 jmc 1.10 if test "x$addExp" != x ; then
177     echo "Update $git_other code in dir $gcmDIR/$git_other :"
178 jmc 1.12 ( cd $gcmDIR/$git_other ; git pull ) 2>&1
179 jmc 1.10 retVal=$?
180     if test "x$retVal" != x0 ; then
181     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
182     exit
183     fi
184 jmc 1.12 echo " and checkout master:"
185     ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1
186 jmc 1.1 fi
187     fi
188 jmc 1.10
189 jmc 1.1 if [ $checkOut -eq 2 ] ; then
190     if test -e $gcmDIR ; then
191     echo -n "Removing working copy: $gcmDIR ..."
192     rm -rf $gcmDIR
193     echo " done"
194     fi
195 jmc 1.10 echo "Make a clone of $git_code from repo: $git_repo ..."
196 jmc 1.9 git clone $git_repo/${git_code}.git $gcmDIR 2> $tmpFil
197 jmc 1.7 retVal=$?
198     if test $retVal = 0 ; then
199     echo ' done' ; rm -f $tmpFil
200     else
201     echo " Error: 'git clone' returned: $retVal"
202     cat $tmpFil ; rm -f $tmpFil
203 jmc 1.1 exit
204 jmc 1.7 fi
205 jmc 1.10 if test "x$addExp" != x ; then
206     echo "Make a clone of $git_other from repo: $git_repo ..."
207     ( cd $gcmDIR ; git clone $git_repo/${git_other}.git 2> $tmpFil )
208     retVal=$?
209     if test $retVal = 0 ; then
210     echo ' done' ; rm -f $tmpFil
211     else
212     echo " Error: 'git clone' returned: $retVal"
213     cat $tmpFil ; rm -f $tmpFil
214     exit
215     fi
216     fi
217 jmc 1.1 if test -d $gcmDIR/verification ; then
218     /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs
219     /usr/bin/find $gcmDIR -type f | xargs chmod g+r
220     fi
221     fi
222    
223 jmc 1.14 #- change dir to $gcmDIR/verification + add link for additional experiments:
224 jmc 1.1 if test -e $gcmDIR/verification ; then
225     if [ $checkOut -lt 2 ] ; then
226     echo " dir $gcmDIR/verification exist" ; fi
227     cd $gcmDIR/verification
228     for exp2add in $addExp ; do
229 jmc 1.10 test -r $exp2add && /bin/rm -rf $exp2add
230     if test -d ../$git_other/$exp2add ; then
231     echo " add $exp2add link from $git_other"
232     ln -s ../$git_other/$exp2add .
233 jmc 1.1 if test $exp2add = 'global_oce_cs32' ; then
234 jmc 1.10 echo " link dir 'other_input/core2_cnyf' in here"
235 jmc 1.11 ( cd ../${git_other}/${exp2add}
236     test -L core2_cnyf && /bin/rm -f core2_cnyf
237     ln -s ../../../other_input/core2_cnyf . )
238 jmc 1.1 fi
239     if test $exp2add = 'global_oce_llc90' ; then
240 jmc 1.10 echo " link dir 'other_input/gael_oce_llc90_input' to 'input_fields'"
241     ( cd ../${git_other}/${exp2add}
242 jmc 1.11 test -L input_fields && /bin/rm -f input_fields
243 jmc 1.10 ln -s ../../../other_input/gael_oce_llc90_input input_fields
244     echo " link dirs: 'core2_cnyf' & 'global_oce_input_fields/*' in input_verifs"
245 jmc 1.12 test ! -e input_verifs && mkdir input_verifs
246     ( cd input_verifs ; /bin/rm -f *
247 jmc 1.10 ln -s ../../../../other_input/core2_cnyf .
248     ln -s ../../../../other_input/global_oce_input_fields/* . )
249     )
250 jmc 1.1 fi
251 jmc 1.10 else
252     echo " missing dir: $git_other/$exp2add"
253     continue
254 jmc 1.1 fi
255     done
256     else
257     echo "no dir: $gcmDIR/verification => exit"
258     exit
259     fi
260    
261 jmc 1.18 if [ $dblTr -eq 1 ] ; then
262 jmc 1.1 echo ''
263 jmc 1.5 #- 0) just make all module header ( *__genmod.mod files) using modified Makefile
264 jmc 1.22 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
265 jmc 1.16 -j 4 -nc -repl_mk do_make_syntax.sh -obj -dd
266 jmc 1.22 ./testreport $options -of $OPTFILE -skd "$skipExp" \
267 jmc 1.16 -j 4 -nc -repl_mk do_make_syntax.sh -obj -dd
268 jmc 1.22 options="$options -q"
269 jmc 1.1
270     echo ''
271 jmc 1.5 #- 1) just compile ("-nr"), using "-j 4" to speed up
272 jmc 1.22 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
273     -j 4 -nr -odir ${dNam}-$sfx
274     ./testreport $options -of $OPTFILE -skd "$skipExp" \
275     -j 4 -nr -odir ${dNam}-$sfx
276 jmc 1.5 nFc=`grep -c '^Y . N N ' tr_out.txt`
277     echo " <= fail to compile $nFc experiments"
278 jmc 1.6 fi
279 jmc 1.5
280     echo ''
281     #- 2) run and report results ; also finish to compile those who failed with "-j"
282 jmc 1.2 #echo ./testreport $options -of $OPTFILE -command \'$mpiCMD\' -mf $MPI_MFile \
283 jmc 1.22 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
284 jmc 1.18 -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
285 jmc 1.2 #./testreport $options -of $OPTFILE -command "$mpiCMD" -mf $MPI_MFile \
286 jmc 1.22 ./testreport $options -of $OPTFILE -skd "$skipExp" \
287 jmc 1.18 -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
288 jmc 1.23 retVal=$?
289 jmc 1.1
290 jmc 1.23 if test "x$retVal" != x0 ; then
291     echo "<== testreport returned retVal=${retVal}, expecting 0"
292     echo " -> skip restart test 'do_tst_2+2'"
293     else
294 jmc 1.1 echo ''
295 jmc 1.5 #- 3) test restart and report results
296 jmc 1.2 #echo ../tools/do_tst_2+2 -mpi -exe \'$mpiCMD\' -mf $MPI_MFile \
297     echo ../tools/do_tst_2+2 -mpi \
298 jmc 1.6 -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
299 jmc 1.2 #../tools/do_tst_2+2 -mpi -exe "$mpiCMD" -mf $MPI_MFile \
300     ../tools/do_tst_2+2 -mpi \
301 jmc 1.6 -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
302 jmc 1.23 fi

  ViewVC Help
Powered by ViewVC 1.1.22