/[MITgcm]/MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (hide annotations) (download)
Thu Oct 19 14:43:00 2023 UTC (20 months, 4 weeks ago) by jmc
Branch: MAIN
Changes since 1.3: +8 -1 lines
check testreport exit value before running restart test

1 jmc 1.1 #!/bin/bash
2     #SBATCH -J o64Mpi_tst
3     #SBATCH -p sched_mit_hill
4 jmc 1.2 #SBATCH -t 06:00:00
5 jmc 1.1 #SBATCH --mem-per-cpu 4000
6     #SBATCH -n 6
7     #SBATCH -N 2
8 jmc 1.4 #SBATCH -x node122
9 jmc 1.1 # #SBATCH -x node[360,365]
10     #SBATCH -e /home/jm_c/test_engaging/output/o64Mpi_tst.stderr
11     #SBATCH -o /home/jm_c/test_engaging/output/o64Mpi_tst.stdout
12     #SBATCH --no-requeue
13    
14 jmc 1.4 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi,v 1.3 2023/02/19 15:02:22 jmc Exp $
15 jmc 1.1 # $Name: $
16    
17 jmc 1.3 if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi
18 jmc 1.1 # Note: added "ulimit -s unlimited" in file "~/.bashrc"
19     # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI
20    
21     umask 0022
22     #- to get case insensitive "ls" (and order of tested experiments)
23     export LC_ALL="en_US.UTF-8"
24     echo " running on: "`hostname`
25 jmc 1.3 headNode='eofe8'
26 jmc 1.1
27     dNam='engaging'
28 jmc 1.3 HERE="$HOME/test_${dNam}"
29 jmc 1.1 OUTP="$HERE/output"; SavD="$HERE/send"
30 jmc 1.3 SEND="ssh $headNode $SavD/mpack"
31 jmc 1.1 TST_DIR="/pool001/jm_c/test_$dNam"
32     tmpFil="/tmp/"`basename $0`".$$"
33     cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q'
34     #- which GitHub repository to use and how to access it:
35     git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other'
36     #git_repo='altMITgcm'; #git_code='MITgcm66h'
37     #--
38     git_repo="https://github.com/$git_repo"
39     #git_repo="git://github.com/$git_repo"
40     #git_repo="git@github.com:$git_repo"
41    
42     dblTr=0 ; typ='' ; addExp='' ; skipExp=''
43     sfx='o64Mpi'; dblTr=1
44     #- currently, no NetCDF => no pkg/profiles
45     #skipExp="$skipExp global_oce_biogeo_bling"
46     module add open64
47     module add mvapich2/open64/64/2.0b
48     export MPI_INC_DIR="$MPI_HOME/include"
49     OPTFILE="../tools/build_options/linux_amd64_open64"
50     #options="-j 2 -devel -gsl"
51     options="$typ -MPI 6"
52     #export OMP_NUM_THREADS=2
53     #export OMP_SLAVE_STACK_SIZE=400m
54     #export GSL_IEEE_MODE=double-precision,mask-underflow,mask-denormalized
55     ulimit -s unlimited
56     #- need this to get "staf":
57     #export PATH="$PATH:$HOME/bin"
58    
59     gcmDIR="MITgcm_$sfx"
60     dAlt=`date +%d` ; dAlt=`expr $dAlt % 3`
61     if [ $dAlt -eq 1 ] ; then options="$options -fast"
62     else options="$options -devel" ; fi
63     if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi
64    
65     checkOut=1 ; #options="$options -do"
66     #options="$options -nc" ; checkOut=1
67     #options="$options -q" ; checkOut=0 ; dblTr=0
68    
69     if test -d $TST_DIR ; then
70     echo "start from TST_DIR='$TST_DIR' at: "`date`
71     else
72     #if test ! -d $TST_DIR ; then mkdir $TST_DIR ; fi
73     #if test ! -d $TST_DIR ; then
74     # echo "ERROR: Can't create directory \"$TST_DIR\""
75     # exit 1
76     #fi
77     #echo "start in new dir TST_DIR='$TST_DIR' at: "`date`
78     echo "ERROR: missing directory \"$TST_DIR\""
79     exit 1
80     fi
81     cd $TST_DIR
82     pwd
83     df .
84    
85     NSLOTS=$SLURM_NTASKS
86     THEDATE=`date`
87     echo '********************************************************************************'
88     echo 'Start job '$THEDATE
89     echo 'NSLOTS = '$NSLOTS
90     echo '======= NODELIST ==============================================================='
91     echo $SLURM_NODELIST
92     cat /etc/redhat-release
93     echo '======= env ===================================================================='
94     env | grep SLURM
95     echo '======= modules ================================================================'
96     module list 2>&1
97     echo '================================================================================'
98    
99     #- check for disk space: relative space (99%) or absolute (10.G):
100     dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'`
101     if [ $dsp -gt 99 ] ; then
102     #dsp=`df -P . | tail -1 | awk '{print $4}'`
103     #if [ $dsp -le 100000000 ] ; then
104     echo 'Not enough space on this disk => do not run testreport.'
105     df .
106     exit
107     fi
108     if [ $checkOut -eq 1 ] ; then
109     if test ! -e $gcmDIR/.git/config ; then
110     echo "no file: $gcmDIR/.git/config => try to download a fresh clone"
111     checkOut=2
112     fi
113     if test "x$addExp" != x ; then
114     if test ! -e $gcmDIR/$git_other/.git/config ; then
115     echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone"
116     checkOut=2
117     fi
118     fi
119     fi
120     if [ $checkOut -eq 1 ] ; then
121     echo "cleaning output from $gcmDIR/verification :"
122     #- remove previous output tar files and tar & remove previous output-dir
123     /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz
124     ( cd $gcmDIR/verification
125     listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null`
126     for dd in $listD
127     do
128     if test -d $dd ; then
129     tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd
130     retVal=$?
131     if test "x$retVal" != x0 ; then
132     echo "ERROR in tar+gzip prev outp-dir: $dd"
133     echo " on '"`hostname`"' (return val=$retVal) but continue"
134     fi
135     fi
136     done )
137 jmc 1.3 echo "clean tst_2+2 + testreport output"
138 jmc 1.1 ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
139     ( cd $gcmDIR/verification ; ./testreport $typ -clean )
140 jmc 1.3 if test "x$addExp" != x ; then
141     ( cd $gcmDIR/verification
142     listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null`
143     echo " + remove local links: $listD"
144     /bin/rm -f $listD
145     )
146     fi
147 jmc 1.1 echo "Update $git_code code in dir $gcmDIR :"
148     ( cd $gcmDIR ; git pull ) 2>&1
149     retVal=$?
150     if test "x$retVal" != x0 ; then
151     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
152     exit
153     fi
154     echo " and checkout master:"
155     ( cd $gcmDIR ; git checkout master -- . ) 2>&1
156     if test "x$addExp" != x ; then
157     echo "Update $git_other code in dir $gcmDIR/$git_other :"
158     ( cd $gcmDIR/$git_other ; git pull ) 2>&1
159     retVal=$?
160     if test "x$retVal" != x0 ; then
161     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
162     exit
163     fi
164     echo " and checkout master:"
165     ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1
166     fi
167     fi
168    
169     if [ $checkOut -eq 2 ] ; then
170     if test -e $gcmDIR ; then
171     echo -n "Removing working copy: $gcmDIR ..."
172     rm -rf $gcmDIR
173     echo " done"
174     fi
175     echo "Make a clone of $git_code from repo: $git_repo ..."
176     git clone $git_repo/${git_code}.git $gcmDIR 2> $tmpFil
177     retVal=$?
178     if test $retVal = 0 ; then
179     echo ' done' ; rm -f $tmpFil
180     else
181     echo " Error: 'git clone' returned: $retVal"
182     cat $tmpFil ; rm -f $tmpFil
183     exit
184     fi
185     if test "x$addExp" != x ; then
186     echo "Make a clone of $git_other from repo: $git_repo ..."
187     ( cd $gcmDIR ; git clone $git_repo/${git_other}.git 2> $tmpFil )
188     retVal=$?
189     if test $retVal = 0 ; then
190     echo ' done' ; rm -f $tmpFil
191     else
192     echo " Error: 'git clone' returned: $retVal"
193     cat $tmpFil ; rm -f $tmpFil
194     exit
195     fi
196     fi
197     if test -d $gcmDIR/verification ; then
198     /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs
199     /usr/bin/find $gcmDIR -type f | xargs chmod g+r
200     fi
201     fi
202    
203     #- change dir to $gcmDIR/verification + add link for additional experiments:
204     if test -e $gcmDIR/verification ; then
205     if [ $checkOut -lt 2 ] ; then
206     echo " dir $gcmDIR/verification exist" ; fi
207     cd $gcmDIR/verification
208     for exp2add in $addExp ; do
209     test -r $exp2add && /bin/rm -rf $exp2add
210     if test -d ../$git_other/$exp2add ; then
211     echo " add $exp2add link from $git_other"
212     ln -s ../$git_other/$exp2add .
213     else
214     echo " missing dir: $git_other/$exp2add"
215     continue
216     fi
217     done
218     else
219     echo "no dir: $gcmDIR/verification => exit"
220     exit
221     fi
222    
223     if [ $dblTr -eq 1 ] ; then
224     echo ''
225 jmc 1.3 #- 1) just compile ("-nr"), using "-j 2" to speed up
226 jmc 1.1 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
227 jmc 1.3 -j 2 -nr -odir ${dNam}-$sfx
228 jmc 1.1 ./testreport $options -of $OPTFILE -skd "$skipExp" \
229 jmc 1.3 -j 2 -nr -odir ${dNam}-$sfx
230 jmc 1.1 options="$options -q"
231     fi
232    
233     echo ''
234     #- 2) run and report results ; also finish to compile those who failed with "-j"
235     echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
236     -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
237     ./testreport $options -of $OPTFILE -skd "$skipExp" \
238     -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
239 jmc 1.4 retVal=$?
240 jmc 1.1
241 jmc 1.4 if test "x$retVal" != x0 ; then
242     echo "<== testreport returned retVal=${retVal}, expecting 0"
243     echo " -> skip restart test 'do_tst_2+2'"
244     else
245 jmc 1.1 echo ''
246     #- 3) test restart and report results
247     echo ../tools/do_tst_2+2 -mpi \
248     -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
249     ../tools/do_tst_2+2 -mpi \
250     -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
251 jmc 1.4 fi
252 jmc 1.1
253     # exit 0
254     # echo -n "-- SLURM_TASKS_PER_NODE= $SLURM_TASKS_PER_NODE ; " >> ${OUTP}/tracer_adj.log
255     # date >> ${OUTP}/tracer_adj.log
256     # grep 'My Processor Name' tutorial_tracer_adjsens/run/STDOUT.000? \
257     # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log
258     # grep 'My Processor Name' tutorial_tracer_adjsens/tr_run.som81/STDOUT.000? \
259     # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log

  ViewVC Help
Powered by ViewVC 1.1.22