/[MITgcm]/MITgcm_contrib/test_scripts/svante/test_svante_ifc_mpi
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/svante/test_svante_ifc_mpi

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.21 - (hide annotations) (download)
Thu Oct 19 14:38:38 2023 UTC (20 months, 4 weeks ago) by jmc
Branch: MAIN
Changes since 1.20: +7 -2 lines
check testreport exit value before running restart test

1 jmc 1.1 #!/bin/bash
2 jmc 1.20 #SBATCH -J ifcMpi_tst
3 jmc 1.19 #SBATCH -p edr
4 jmc 1.10 #SBATCH -t 23:30:00
5 jmc 1.1 #SBATCH --mem-per-cpu 4000
6     #SBATCH -N 1
7     #SBATCH --tasks-per-node 6
8 jmc 1.19 # #SBATCH -x curly,larry,moe,shemp
9 jmc 1.20 #SBATCH -e /home/jm_c/test_svante/output/ifcMpi_tst.stderr
10     #SBATCH -o /home/jm_c/test_svante/output/ifcMpi_tst.stdout
11 jmc 1.2 #SBATCH --no-requeue
12 jmc 1.1
13 jmc 1.21 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_svante_ifc_mpi,v 1.20 2023/02/19 14:56:32 jmc Exp $
14 jmc 1.1 # $Name: $
15    
16     if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi
17     if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi
18     # Note: added "ulimit -s unlimited" in file "~/.bashrc"
19     # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI
20    
21     umask 0022
22     #- to get case insensitive "ls" (and order of tested experiments)
23     export LC_ALL="en_US.UTF-8"
24     echo " running on: "`hostname`
25     headNode='svante-login'
26    
27     dNam='svante'
28     HERE="$HOME/test_${dNam}"
29     OUTP="$HERE/output"; SavD="$HERE/send"
30     SEND="ssh $headNode $SavD/mpack"
31 jmc 1.12 #TST_DISK="/net/fs09/d1/jm_c"
32     TST_DISK="/scratch/jm_c"
33 jmc 1.17 #TST_DIR="$TST_DISK/test_${dNam}"
34     TST_DIR="test_${dNam}"
35 jmc 1.11 #- where local copy of code is (need to be consistent with "test_submit_svante"):
36 jmc 1.12 #srcDIR='.'
37     srcDIR=$HERE
38 jmc 1.11 srcCode="MITgcm_today"
39    
40 jmc 1.16 #- following lines are not used here:
41     cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q'
42     #- which GitHub repository to use and how to access it:
43     git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other'
44     #git_repo='altMITgcm'; #git_code='MITgcm66h'
45     #--
46     git_repo="https://github.com/$git_repo"
47     #git_repo="git://github.com/$git_repo"
48     #git_repo="git@github.com:$git_repo"
49 jmc 1.1
50 jmc 1.20 dblTr=0 ; typ='' ; addExp='' ; skipExp=''
51     sfx='ifcMpi'; dblTr=1
52 jmc 1.18 module add intel/2021.4.0
53 jmc 1.1 module add openmpi
54     #module add netcdf
55 jmc 1.18 OPTFILE="../tools/build_options/linux_amd64_ifort"
56 jmc 1.15 export GENERIC='on' # <-- to prevent the use of "-xHost" option
57 jmc 1.1 #- needed for DIVA with MPI:
58     #export MPI_INC_DIR=$INC_MPI
59 jmc 1.13 options="$typ -MPI 6"
60 jmc 1.20 #- need this to get "staf" & "do_make_syntax.sh":
61     export PATH="$PATH:$HOME/bin"
62 jmc 1.1
63 jmc 1.16 gcmDIR="MITgcm_$sfx"
64 jmc 1.1 dAlt=`date +%d` ; dAlt=`expr $dAlt % 3`
65 jmc 1.16 if [ $dAlt -eq 1 ] ; then options="$options -fast"
66     else options="$options -devel" ; fi
67 jmc 1.20 if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi
68 jmc 1.1
69     checkOut=2 ; #options="$options -do"
70 jmc 1.20 #options="$options -nc" ; checkOut=1 ; dblTr=0
71     #options="$options -q" ; checkOut=0 ; dblTr=0
72 jmc 1.1
73 jmc 1.17 echo "cd $TST_DISK ; pwd (x1)"
74 jmc 1.16 cd $TST_DISK
75 jmc 1.17 pwd ; ls -l
76     if test ! -d $TST_DIR ; then sleep 5 ; pwd ; ls -l ; fi
77 jmc 1.16 if test ! -d $TST_DIR ; then
78     echo -n "Creating a working dir: $TST_DIR ..."
79     mkdir $TST_DIR
80     retVal=$?
81     if test "x$retVal" != x0 ; then
82 jmc 1.20 if test ! -d $TST_DIR ; then
83     echo " FAIL"
84     echo "Error: unable to make dir: $TST_DIR (err=$retVal ) from $TST_DISK --> Exit"
85     exit 1
86     else echo " FAIL but dir now exists ! -> continue" ; fi
87     else echo " done" ; fi
88 jmc 1.16 fi
89 jmc 1.17 echo "start from DIR='$TST_DISK/$TST_DIR' at: "`date`
90 jmc 1.16 cd $TST_DIR
91     pwd
92    
93 jmc 1.1 NSLOTS=$SLURM_NTASKS
94     THEDATE=`date`
95     echo '********************************************************************************'
96     echo 'Start job '$THEDATE
97     echo 'NSLOTS = '$NSLOTS
98     echo '======= NODELIST ==============================================================='
99     echo $SLURM_NODELIST
100     cat /etc/redhat-release
101     echo '======= env ===================================================================='
102     env | grep SLURM
103     echo '======= modules ================================================================'
104     module list 2>&1
105     echo '================================================================================'
106    
107     #- check for disk space: relative space (99%) or absolute (10.G):
108     dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'`
109     if [ $dsp -gt 99 ] ; then
110     #dsp=`df -P . | tail -1 | awk '{print $4}'`
111     #if [ $dsp -le 100000000 ] ; then
112     echo 'Not enough space on this disk => do not run testreport.'
113     df .
114     exit
115     fi
116 jmc 1.16
117 jmc 1.1 if [ $checkOut -eq 1 ] ; then
118 jmc 1.20 if test ! -e $gcmDIR/.git/config ; then
119     echo "no file: $gcmDIR/.git/config => try to download a fresh clone"
120     checkOut=2
121     fi
122     if test "x$addExp" != x ; then
123     if test ! -e $gcmDIR/$git_other/.git/config ; then
124     echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone"
125     checkOut=2
126     fi
127     fi
128     fi
129     if [ $checkOut -eq 1 ] ; then
130 jmc 1.1 echo "cleaning output from $gcmDIR/verification :"
131     #- remove previous output tar files and tar & remove previous output-dir
132     /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz
133     ( cd $gcmDIR/verification
134     listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null`
135     for dd in $listD
136     do
137     if test -d $dd ; then
138     tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd
139 jmc 1.11 retVal=$?
140     if test "x$retVal" != x0 ; then
141 jmc 1.1 echo "ERROR in tar+gzip prev outp-dir: $dd"
142 jmc 1.11 echo " on '"`hostname`"' (return val=$retVal) but continue"
143 jmc 1.1 fi
144     fi
145     done )
146 jmc 1.16 echo "clean tst_2+2 + testreport output (+ Makefile_syntax files)"
147 jmc 1.1 ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
148     ( cd $gcmDIR/verification ; ./testreport $typ -clean )
149 jmc 1.16 ( cd $gcmDIR/verification ; rm -f */build/Makefile_syntax )
150     ( cd $gcmDIR/verification ; rm -f */build/port_rand.i */build/ptracers_set_iolabel.i )
151     if test "x$addExp" != x ; then
152     ( cd $gcmDIR/verification
153     listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null`
154     echo " + remove local links: $listD"
155     /bin/rm -f $listD
156     )
157     fi
158     echo "Update $git_code code in dir $gcmDIR :"
159     ( cd $gcmDIR ; git pull ) 2>&1
160     retVal=$?
161     if test "x$retVal" != x0 ; then
162     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
163     exit
164     fi
165     echo " and checkout master:"
166     ( cd $gcmDIR ; git checkout master -- . ) 2>&1
167     if test "x$addExp" != x ; then
168     echo "Update $git_other code in dir $gcmDIR/$git_other :"
169     ( cd $gcmDIR/$git_other ; git pull ) 2>&1
170 jmc 1.11 retVal=$?
171     if test "x$retVal" != x0 ; then
172 jmc 1.16 echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
173 jmc 1.1 exit
174     fi
175 jmc 1.16 echo " and checkout master:"
176     ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1
177 jmc 1.1 fi
178     fi
179 jmc 1.16
180 jmc 1.1 if [ $checkOut -eq 2 ] ; then
181     if test -e $gcmDIR ; then
182     echo -n "Removing working copy: $gcmDIR ..."
183     rm -rf $gcmDIR
184     echo " done"
185     fi
186 jmc 1.16 # make a local copy (instead of making a new clone):
187 jmc 1.1 today=`date +%Y%m%d`
188 jmc 1.11 nCount=0; updFile="${srcDIR}/updated_code"
189 jmc 1.1 updDate=0 ; test -f $updFile && updDate=`cat $updFile`
190     while [ $today -gt $updDate ] ; do
191     nCount=`expr $nCount + 1`
192     if [ $nCount -gt 40 ] ; then
193     echo " waiting too long (nCount=$nCount) for updated code"
194     echo " today=$today , updDate=$updDate "
195     ls -l $updFile
196     exit
197     fi
198     sleep 60
199     updDate=0 ; test -f $updFile && updDate=`cat $updFile`
200     done
201     ls -l $updFile
202     echo " waited nCount=$nCount for updated code ($updDate) to copy"
203 jmc 1.11 if test -d $srcDIR/$srcCode ; then
204     echo -n "Make local copy of dir '$srcDIR/$srcCode' to: $gcmDIR ..."
205     cp -pra $srcDIR/$srcCode $gcmDIR
206 jmc 1.1 echo " done"
207 jmc 1.11 else echo " dir: $srcDIR/$srcCode missing => exit" ; exit ; fi
208 jmc 1.1 fi
209    
210 jmc 1.20 #- change dir to $gcmDIR/verification + add link for additional experiments:
211 jmc 1.1 if test -e $gcmDIR/verification ; then
212     if [ $checkOut -lt 2 ] ; then
213     echo " dir $gcmDIR/verification exist" ; fi
214     cd $gcmDIR/verification
215 jmc 1.16 for exp2add in $addExp ; do
216     test -r $exp2add && /bin/rm -rf $exp2add
217     if test -d ../$git_other/$exp2add ; then
218     echo " add $exp2add link from $git_other"
219     ln -s ../$git_other/$exp2add .
220     if test $exp2add = 'global_oce_cs32' ; then
221     echo " link dir 'other_input/core2_cnyf' in here"
222     ( cd ../${git_other}/${exp2add}
223     test -L core2_cnyf && /bin/rm -f core2_cnyf
224     ln -s ../../../other_input/core2_cnyf . )
225     fi
226     if test $exp2add = 'global_oce_llc90' ; then
227     echo " link dir 'other_input/gael_oce_llc90_input' to 'input_fields'"
228     ( cd ../${git_other}/${exp2add}
229     test -L input_fields && /bin/rm -f input_fields
230     ln -s ../../../other_input/gael_oce_llc90_input input_fields
231     echo " link dirs: 'core2_cnyf' & 'global_oce_input_fields/*' in input_verifs"
232     test ! -e input_verifs && mkdir input_verifs
233     ( cd input_verifs ; /bin/rm -f *
234     ln -s ../../../../other_input/core2_cnyf .
235     ln -s ../../../../other_input/global_oce_input_fields/* . )
236     )
237     fi
238     else
239     echo " missing dir: $git_other/$exp2add"
240     continue
241     fi
242     done
243 jmc 1.1 else
244     echo "no dir: $gcmDIR/verification => exit"
245     exit
246     fi
247    
248 jmc 1.20 if [ $dblTr -eq 1 ] ; then
249     if [ $dAlt -ne 1 ] ; then
250 jmc 1.1 echo ''
251 jmc 1.20 #- 0) just make all module header ( *__genmod.mod files) using modified Makefile
252 jmc 1.5 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
253 jmc 1.14 -j 4 -repl_mk do_make_syntax.sh -obj -dd
254 jmc 1.5 ./testreport $options -of $OPTFILE -skd "$skipExp" \
255 jmc 1.14 -j 4 -repl_mk do_make_syntax.sh -obj -dd 2>&1
256 jmc 1.20 options="$options -q"
257     fi
258    
259 jmc 1.1 echo ''
260 jmc 1.20 #- 1) just compile ("-nr"), using "-j 4" to speed up
261 jmc 1.7 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
262 jmc 1.20 -j 4 -nr -odir ${dNam}-$sfx
263 jmc 1.7 ./testreport $options -of $OPTFILE -skd "$skipExp" \
264 jmc 1.20 -j 4 -nr -odir ${dNam}-$sfx
265 jmc 1.7 nFc=`grep -c '^Y . N N ' tr_out.txt`
266     echo " <= fail to compile $nFc experiments"
267 jmc 1.20 if [ $dAlt -eq 1 ] ; then options="$options -q" ; fi
268 jmc 1.7 fi
269 jmc 1.16
270 jmc 1.7 echo ''
271 jmc 1.20 #- 2) run and report results ; also finish to compile those who failed with "-j"
272 jmc 1.5 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
273 jmc 1.20 -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
274 jmc 1.5 ./testreport $options -of $OPTFILE -skd "$skipExp" \
275 jmc 1.20 -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
276 jmc 1.21 retVal=$?
277 jmc 1.20
278 jmc 1.21 if test "x$retVal" != x0 ; then
279     echo "<== testreport returned retVal=${retVal}, expecting 0"
280     echo " -> skip restart test 'do_tst_2+2'"
281     else
282 jmc 1.1 echo ''
283 jmc 1.20 #- 3) test restart and report results
284 jmc 1.5 echo ../tools/do_tst_2+2 -mpi \
285 jmc 1.9 -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
286 jmc 1.5 ../tools/do_tst_2+2 -mpi \
287 jmc 1.9 -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
288 jmc 1.21 fi

  ViewVC Help
Powered by ViewVC 1.1.22