/[MITgcm]/MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi
ViewVC logotype

Contents of /MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (show annotations) (download)
Sat Dec 30 16:55:12 2023 UTC (18 months, 2 weeks ago) by jmc
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +6 -6 lines
print time few more times

1 #!/bin/bash
2 #SBATCH -J o64Mpi_tst
3 #SBATCH -p sched_mit_hill
4 #SBATCH -t 06:00:00
5 #SBATCH --mem-per-cpu 4000
6 #SBATCH -n 6
7 #SBATCH -N 2
8 #SBATCH -x node122
9 # #SBATCH -x node[360,365]
10 #SBATCH -e /home/jm_c/test_engaging/output/o64Mpi_tst.stderr
11 #SBATCH -o /home/jm_c/test_engaging/output/o64Mpi_tst.stdout
12 #SBATCH --no-requeue
13
14 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/engaging/test_engag_op64_mpi,v 1.5 2023/11/06 14:23:01 jmc Exp $
15 # $Name: $
16
17 if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi
18 # Note: added "ulimit -s unlimited" in file "~/.bashrc"
19 # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI
20
21 umask 0022
22 #- to get case insensitive "ls" (and order of tested experiments)
23 export LC_ALL="en_US.UTF-8"
24 echo " running on: "`hostname`
25 headNode='eofe8'
26
27 dNam='engaging'
28 HERE="$HOME/test_${dNam}"
29 OUTP="$HERE/output"; SavD="$HERE/send"
30 SEND="ssh $headNode $SavD/mpack"
31 TST_DIR="/pool001/jm_c/test_$dNam"
32 tmpFil="/tmp/"`basename $0`".$$"
33 cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q'
34 #- which GitHub repository to use and how to access it:
35 git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other'
36 #git_repo='altMITgcm'; #git_code='MITgcm66h'
37 #--
38 git_repo="https://github.com/$git_repo"
39 #git_repo="git://github.com/$git_repo"
40 #git_repo="git@github.com:$git_repo"
41
42 dblTr=0 ; typ='' ; addExp='' ; skipExp=''
43 sfx='o64Mpi'; dblTr=1
44 #- currently, no NetCDF => no pkg/profiles
45 #skipExp="$skipExp global_oce_biogeo_bling"
46 module add open64
47 module add mvapich2/open64/64/2.0b
48 export MPI_INC_DIR="$MPI_HOME/include"
49 OPTFILE="../tools/build_options/linux_amd64_open64"
50 #options="-j 2 -devel -gsl"
51 options="$typ -MPI 6"
52 #export OMP_NUM_THREADS=2
53 #export OMP_SLAVE_STACK_SIZE=400m
54 #export GSL_IEEE_MODE=double-precision,mask-underflow,mask-denormalized
55 ulimit -s unlimited
56 #- need this to get "staf":
57 #export PATH="$PATH:$HOME/bin"
58
59 gcmDIR="MITgcm_$sfx"
60 dAlt=`date +%d` ; dAlt=`expr $dAlt % 3`
61 if [ $dAlt -eq 1 ] ; then options="$options -fast"
62 else options="$options -devel" ; fi
63 if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi
64
65 checkOut=1 ; #options="$options -do"
66 #options="$options -nc" ; checkOut=1
67 #options="$options -q" ; checkOut=0 ; dblTr=0
68 # dblTr=-1 #- skip testreport completely (only run "do_tst_2+2")
69
70 if test -d $TST_DIR ; then
71 echo "start from TST_DIR='$TST_DIR' at: "`date`
72 else
73 #if test ! -d $TST_DIR ; then mkdir $TST_DIR ; fi
74 #if test ! -d $TST_DIR ; then
75 # echo "ERROR: Can't create directory \"$TST_DIR\""
76 # exit 1
77 #fi
78 #echo "start in new dir TST_DIR='$TST_DIR' at: "`date`
79 echo "ERROR: missing directory \"$TST_DIR\""
80 exit 1
81 fi
82 cd $TST_DIR
83 pwd
84 df .
85
86 NSLOTS=$SLURM_NTASKS
87 THEDATE=`date`
88 echo '********************************************************************************'
89 echo 'Start job '$THEDATE
90 echo 'NSLOTS = '$NSLOTS
91 echo '======= NODELIST ==============================================================='
92 echo $SLURM_NODELIST
93 cat /etc/redhat-release
94 echo '======= env ===================================================================='
95 env | grep SLURM
96 echo '======= modules ================================================================'
97 module list 2>&1
98 echo '================================================================================'
99
100 #- check for disk space: relative space (99%) or absolute (10.G):
101 dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'`
102 if [ $dsp -gt 99 ] ; then
103 #dsp=`df -P . | tail -1 | awk '{print $4}'`
104 #if [ $dsp -le 100000000 ] ; then
105 echo 'Not enough space on this disk => do not run testreport.'
106 df .
107 exit
108 fi
109 if [ $checkOut -eq 1 ] ; then
110 if test ! -e $gcmDIR/.git/config ; then
111 echo "no file: $gcmDIR/.git/config => try to download a fresh clone"
112 checkOut=2
113 fi
114 if test "x$addExp" != x ; then
115 if test ! -e $gcmDIR/$git_other/.git/config ; then
116 echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone"
117 checkOut=2
118 fi
119 fi
120 fi
121 if [ $checkOut -eq 1 ] ; then
122 echo "cleaning output from $gcmDIR/verification @ "`date +"%H:%M:%S"`" :"
123 #- remove previous output tar files and tar & remove previous output-dir
124 /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz
125 ( cd $gcmDIR/verification
126 listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null`
127 for dd in $listD
128 do
129 if test -d $dd ; then
130 tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd
131 retVal=$?
132 if test "x$retVal" != x0 ; then
133 echo "ERROR in tar+gzip prev outp-dir: $dd"
134 echo " on '"`hostname`"' (return val=$retVal) but continue"
135 fi
136 fi
137 done )
138 echo "clean tst_2+2 + testreport output"
139 ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
140 ( cd $gcmDIR/verification ; ./testreport $typ -clean )
141 if test "x$addExp" != x ; then
142 ( cd $gcmDIR/verification
143 listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null`
144 echo " + remove local links: $listD"
145 /bin/rm -f $listD
146 )
147 fi
148 echo "Update $git_code code in dir $gcmDIR @ "`date +"%H:%M:%S"`" :"
149 ( cd $gcmDIR ; git pull ) 2>&1
150 retVal=$?
151 if test "x$retVal" != x0 ; then
152 echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
153 exit
154 fi
155 echo " and checkout master @ "`date +"%H:%M:%S"`" :"
156 ( cd $gcmDIR ; git checkout master -- . ) 2>&1
157 if test "x$addExp" != x ; then
158 echo "Update $git_other code in dir $gcmDIR/$git_other @ "`date +"%H:%M:%S"`" :"
159 ( cd $gcmDIR/$git_other ; git pull ) 2>&1
160 retVal=$?
161 if test "x$retVal" != x0 ; then
162 echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
163 exit
164 fi
165 echo " and checkout master @ "`date +"%H:%M:%S"`" :"
166 ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1
167 fi
168 fi
169
170 if [ $checkOut -eq 2 ] ; then
171 if test -e $gcmDIR ; then
172 echo -n "Removing working copy: $gcmDIR ..."
173 rm -rf $gcmDIR
174 echo " done"
175 fi
176 echo "Make a clone of $git_code from repo: $git_repo ..."
177 git clone $git_repo/${git_code}.git $gcmDIR 2> $tmpFil
178 retVal=$?
179 if test $retVal = 0 ; then
180 echo ' done' ; rm -f $tmpFil
181 else
182 echo " Error: 'git clone' returned: $retVal"
183 cat $tmpFil ; rm -f $tmpFil
184 exit
185 fi
186 if test "x$addExp" != x ; then
187 echo "Make a clone of $git_other from repo: $git_repo ..."
188 ( cd $gcmDIR ; git clone $git_repo/${git_other}.git 2> $tmpFil )
189 retVal=$?
190 if test $retVal = 0 ; then
191 echo ' done' ; rm -f $tmpFil
192 else
193 echo " Error: 'git clone' returned: $retVal"
194 cat $tmpFil ; rm -f $tmpFil
195 exit
196 fi
197 fi
198 if test -d $gcmDIR/verification ; then
199 /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs
200 /usr/bin/find $gcmDIR -type f | xargs chmod g+r
201 fi
202 fi
203
204 #- change dir to $gcmDIR/verification + add link for additional experiments:
205 if test -e $gcmDIR/verification ; then
206 if [ $checkOut -lt 2 ] ; then
207 echo " dir $gcmDIR/verification exist" ; fi
208 cd $gcmDIR/verification
209 for exp2add in $addExp ; do
210 test -r $exp2add && /bin/rm -rf $exp2add
211 if test -d ../$git_other/$exp2add ; then
212 echo " add $exp2add link from $git_other"
213 ln -s ../$git_other/$exp2add .
214 else
215 echo " missing dir: $git_other/$exp2add"
216 continue
217 fi
218 done
219 else
220 echo "no dir: $gcmDIR/verification => exit"
221 exit
222 fi
223
224 if [ $dblTr -eq 1 ] ; then
225 echo ''
226 #- 1) just compile ("-nr"), using "-j 2" to speed up
227 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
228 -j 2 -nr -odir ${dNam}-$sfx
229 ./testreport $options -of $OPTFILE -skd "$skipExp" \
230 -j 2 -nr -odir ${dNam}-$sfx
231 options="$options -q"
232 fi
233
234 if [ $dblTr -ge 0 ] ; then
235 echo ''
236 #- 2) run and report results ; also finish to compile those who failed with "-j"
237 echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
238 -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
239 ./testreport $options -of $OPTFILE -skd "$skipExp" \
240 -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
241 retVal=$?
242 else retVal=0 ; fi
243
244 if test "x$retVal" != x0 ; then
245 echo "<== testreport returned retVal=${retVal}, expecting 0"
246 echo " -> skip restart test 'do_tst_2+2'"
247 else
248 echo ''
249 #- 3) test restart and report results
250 echo ../tools/do_tst_2+2 -mpi \
251 -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
252 ../tools/do_tst_2+2 -mpi \
253 -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
254 fi
255
256 # exit 0
257 # echo -n "-- SLURM_TASKS_PER_NODE= $SLURM_TASKS_PER_NODE ; " >> ${OUTP}/tracer_adj.log
258 # date >> ${OUTP}/tracer_adj.log
259 # grep 'My Processor Name' tutorial_tracer_adjsens/run/STDOUT.000? \
260 # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log
261 # grep 'My Processor Name' tutorial_tracer_adjsens/tr_run.som81/STDOUT.000? \
262 # | sed 's/tutorial_tracer_adjsens\// /' >> ${OUTP}/tracer_adj.log

  ViewVC Help
Powered by ViewVC 1.1.22