/[MITgcm]/MITgcm_contrib/test_scripts/svante/test_svante_pgi_mpi
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/svante/test_svante_pgi_mpi

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.23 - (hide annotations) (download)
Mon Nov 6 13:35:01 2023 UTC (20 months, 1 week ago) by jmc
Branch: MAIN
CVS Tags: HEAD
Changes since 1.22: +3 -1 lines
Add new script to make tar file of output-dir results and
save this backup file after testreport and restart test.

1 jmc 1.1 #!/bin/bash
2 jmc 1.21 #SBATCH -J pgiMpi_tst
3 jmc 1.5 #SBATCH -p fdr
4 jmc 1.14 #SBATCH -t 23:30:00
5 jmc 1.5 #SBATCH --mem-per-cpu 4000
6     #SBATCH -N 1
7     #SBATCH --tasks-per-node 6
8 jmc 1.20 # #SBATCH -x curly,larry,moe,shemp
9 jmc 1.21 #SBATCH -e /home/jm_c/test_svante/output/pgiMpi_tst.stderr
10     #SBATCH -o /home/jm_c/test_svante/output/pgiMpi_tst.stdout
11 jmc 1.7 #SBATCH --no-requeue
12 jmc 1.1
13 jmc 1.23 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_svante_pgi_mpi,v 1.22 2023/10/19 14:38:38 jmc Exp $
14 jmc 1.1 # $Name: $
15    
16 jmc 1.5 if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi
17     if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi
18 jmc 1.1 # Note: added "ulimit -s unlimited" in file "~/.bashrc"
19     # to pass big test (the 2 fizhi-cs-* test & adjoint tests) with MPI
20    
21     umask 0022
22     #- to get case insensitive "ls" (and order of tested experiments)
23     export LC_ALL="en_US.UTF-8"
24     echo " running on: "`hostname`
25 jmc 1.6 headNode='svante-login'
26 jmc 1.1
27     dNam='svante'
28 jmc 1.2 HERE="$HOME/test_${dNam}"
29 jmc 1.1 OUTP="$HERE/output"; SavD="$HERE/send"
30 jmc 1.6 SEND="ssh $headNode $SavD/mpack"
31 jmc 1.15 #TST_DISK="/net/fs09/d1/jm_c"
32     TST_DISK="/scratch/jm_c"
33 jmc 1.18 #TST_DIR="$TST_DISK/test_${dNam}"
34     TST_DIR="test_${dNam}"
35 jmc 1.14 #- where local copy of code is (need to be consistent with "test_submit_svante"):
36 jmc 1.15 #srcDIR='.'
37     srcDIR=$HERE
38 jmc 1.14 srcCode="MITgcm_today"
39    
40 jmc 1.17 #- following lines are not used here:
41     cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q'
42     #- which GitHub repository to use and how to access it:
43     git_repo='MITgcm'; git_code='MITgcm' ; git_other='verification_other'
44     #git_repo='altMITgcm'; #git_code='MITgcm66h'
45     #--
46     git_repo="https://github.com/$git_repo"
47     #git_repo="git://github.com/$git_repo"
48     #git_repo="git@github.com:$git_repo"
49 jmc 1.1
50 jmc 1.21 dblTr=0 ; typ='' ; addExp='' ; skipExp=''
51     sfx='pgiMpi'
52 jmc 1.5 module add pgi/16.9
53 jmc 1.1 module add openmpi
54 jmc 1.5 module add netcdf
55 jmc 1.1 OPTFILE="../tools/build_options/linux_amd64_pgf77"
56     #- needed for DIVA with MPI:
57 jmc 1.5 #export MPI_INC_DIR="/home/software/pgi/16.9/linux86-64/2016/mpi/openmpi-1.10.2/include"
58 jmc 1.1 options="$typ -MPI 6"
59     #- need this to get "staf":
60     #export PATH="$PATH:$HOME/bin"
61    
62 jmc 1.17 gcmDIR="MITgcm_$sfx"
63 jmc 1.1 dAlt=`date +%d` ; dAlt=`expr $dAlt % 3`
64     if [ $dAlt -eq 1 ] ; then options="$options -fast"
65     else options="$options -devel" ; fi
66 jmc 1.21 if test "x$skipExp" != x ; then skipExp=`echo $skipExp | sed 's/^ *//'` ; fi
67 jmc 1.1
68     checkOut=2 ; #options="$options -do"
69     #options="$options -nc" ; checkOut=1
70     #options="$options -q" ; checkOut=0
71    
72 jmc 1.18 echo "cd $TST_DISK ; pwd (x1)"
73 jmc 1.17 cd $TST_DISK
74 jmc 1.18 pwd ; ls -l
75     if test ! -d $TST_DIR ; then sleep 5 ; pwd ; ls -l ; fi
76 jmc 1.17 if test ! -d $TST_DIR ; then
77     echo -n "Creating a working dir: $TST_DIR ..."
78     mkdir $TST_DIR
79     retVal=$?
80     if test "x$retVal" != x0 ; then
81 jmc 1.21 if test ! -d $TST_DIR ; then
82     echo " FAIL"
83     echo "Error: unable to make dir: $TST_DIR (err=$retVal ) from $TST_DISK --> Exit"
84     exit 1
85     else echo " FAIL but dir now exists ! -> continue" ; fi
86     else echo " done" ; fi
87 jmc 1.17 fi
88 jmc 1.18 echo "start from DIR='$TST_DISK/$TST_DIR' at: "`date`
89 jmc 1.17 cd $TST_DIR
90     pwd
91    
92 jmc 1.5 NSLOTS=$SLURM_NTASKS
93     THEDATE=`date`
94     echo '********************************************************************************'
95     echo 'Start job '$THEDATE
96     echo 'NSLOTS = '$NSLOTS
97     echo '======= NODELIST ==============================================================='
98     echo $SLURM_NODELIST
99     cat /etc/redhat-release
100     echo '======= env ===================================================================='
101     env | grep SLURM
102     echo '======= modules ================================================================'
103     module list 2>&1
104     echo '================================================================================'
105 jmc 1.1
106     #- check for disk space: relative space (99%) or absolute (10.G):
107     dsp=`df -P . | tail -1 | awk '{print $5}' | sed 's/%$//'`
108     if [ $dsp -gt 99 ] ; then
109     #dsp=`df -P . | tail -1 | awk '{print $4}'`
110     #if [ $dsp -le 100000000 ] ; then
111     echo 'Not enough space on this disk => do not run testreport.'
112     df .
113     exit
114     fi
115 jmc 1.17
116 jmc 1.1 if [ $checkOut -eq 1 ] ; then
117 jmc 1.21 if test ! -e $gcmDIR/.git/config ; then
118     echo "no file: $gcmDIR/.git/config => try to download a fresh clone"
119     checkOut=2
120     fi
121     if test "x$addExp" != x ; then
122     if test ! -e $gcmDIR/$git_other/.git/config ; then
123     echo "no file: $gcmDIR/$git_other/.git/config => try a fresh clone"
124     checkOut=2
125     fi
126     fi
127     fi
128     if [ $checkOut -eq 1 ] ; then
129 jmc 1.1 echo "cleaning output from $gcmDIR/verification :"
130     #- remove previous output tar files and tar & remove previous output-dir
131     /bin/rm -f $gcmDIR/verification/??_${dNam}-${sfx}_????????_?.tar.gz
132     ( cd $gcmDIR/verification
133     listD=`ls -1 -d ??_${dNam}-${sfx}_????????_? 2> /dev/null`
134     for dd in $listD
135     do
136     if test -d $dd ; then
137     tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd
138 jmc 1.14 retVal=$?
139     if test "x$retVal" != x0 ; then
140 jmc 1.1 echo "ERROR in tar+gzip prev outp-dir: $dd"
141 jmc 1.14 echo " on '"`hostname`"' (return val=$retVal) but continue"
142 jmc 1.1 fi
143     fi
144     done )
145 jmc 1.17 echo "clean tst_2+2 + testreport output"
146 jmc 1.1 ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
147     ( cd $gcmDIR/verification ; ./testreport $typ -clean )
148 jmc 1.17 if test "x$addExp" != x ; then
149     ( cd $gcmDIR/verification
150     listD=`ls -o | grep '^l' | awk '{print $8}' 2> /dev/null`
151     echo " + remove local links: $listD"
152     /bin/rm -f $listD
153     )
154     fi
155     echo "Update $git_code code in dir $gcmDIR :"
156     ( cd $gcmDIR ; git pull ) 2>&1
157 jmc 1.14 retVal=$?
158     if test "x$retVal" != x0 ; then
159 jmc 1.17 echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
160 jmc 1.1 exit
161     fi
162 jmc 1.17 echo " and checkout master:"
163     ( cd $gcmDIR ; git checkout master -- . ) 2>&1
164     if test "x$addExp" != x ; then
165     echo "Update $git_other code in dir $gcmDIR/$git_other :"
166     ( cd $gcmDIR/$git_other ; git pull ) 2>&1
167     retVal=$?
168     if test "x$retVal" != x0 ; then
169     echo "git pull on '"`hostname`"' fail (return val=$retVal) => exit"
170     exit
171     fi
172     echo " and checkout master:"
173     ( cd $gcmDIR/$git_other ; git checkout master -- . ) 2>&1
174 jmc 1.1 fi
175     fi
176 jmc 1.17
177 jmc 1.1 if [ $checkOut -eq 2 ] ; then
178     if test -e $gcmDIR ; then
179     echo -n "Removing working copy: $gcmDIR ..."
180     rm -rf $gcmDIR
181     echo " done"
182     fi
183 jmc 1.17 # make a local copy (instead of making a new clone):
184 jmc 1.1 today=`date +%Y%m%d`
185 jmc 1.14 nCount=0; updFile="${srcDIR}/updated_code"
186 jmc 1.1 updDate=0 ; test -f $updFile && updDate=`cat $updFile`
187     while [ $today -gt $updDate ] ; do
188     nCount=`expr $nCount + 1`
189     if [ $nCount -gt 40 ] ; then
190     echo " waiting too long (nCount=$nCount) for updated code"
191     echo " today=$today , updDate=$updDate "
192     ls -l $updFile
193     exit
194     fi
195     sleep 60
196     updDate=0 ; test -f $updFile && updDate=`cat $updFile`
197     done
198     ls -l $updFile
199     echo " waited nCount=$nCount for updated code ($updDate) to copy"
200 jmc 1.14 if test -d $srcDIR/$srcCode ; then
201     echo -n "Make local copy of dir '$srcDIR/$srcCode' to: $gcmDIR ..."
202     cp -pra $srcDIR/$srcCode $gcmDIR
203 jmc 1.1 echo " done"
204 jmc 1.14 else echo " dir: $srcDIR/$srcCode missing => exit" ; exit ; fi
205 jmc 1.1 fi
206    
207 jmc 1.21 #- change dir to $gcmDIR/verification + add link for additional experiments:
208 jmc 1.1 if test -e $gcmDIR/verification ; then
209     if [ $checkOut -lt 2 ] ; then
210     echo " dir $gcmDIR/verification exist" ; fi
211     cd $gcmDIR/verification
212 jmc 1.17 for exp2add in $addExp ; do
213     test -r $exp2add && /bin/rm -rf $exp2add
214     if test -d ../$git_other/$exp2add ; then
215     echo " add $exp2add link from $git_other"
216     ln -s ../$git_other/$exp2add .
217     if test $exp2add = 'global_oce_cs32' ; then
218     echo " link dir 'other_input/core2_cnyf' in here"
219     ( cd ../${git_other}/${exp2add}
220     test -L core2_cnyf && /bin/rm -f core2_cnyf
221     ln -s ../../../other_input/core2_cnyf . )
222     fi
223     if test $exp2add = 'global_oce_llc90' ; then
224     echo " link dir 'other_input/gael_oce_llc90_input' to 'input_fields'"
225     ( cd ../${git_other}/${exp2add}
226     test -L input_fields && /bin/rm -f input_fields
227     ln -s ../../../other_input/gael_oce_llc90_input input_fields
228     echo " link dirs: 'core2_cnyf' & 'global_oce_input_fields/*' in input_verifs"
229     test ! -e input_verifs && mkdir input_verifs
230     ( cd input_verifs ; /bin/rm -f *
231     ln -s ../../../../other_input/core2_cnyf .
232     ln -s ../../../../other_input/global_oce_input_fields/* . )
233     )
234     fi
235     else
236     echo " missing dir: $git_other/$exp2add"
237     continue
238     fi
239     done
240 jmc 1.1 else
241     echo "no dir: $gcmDIR/verification => exit"
242     exit
243     fi
244    
245 jmc 1.21 if [ $dblTr -eq 1 ] ; then
246     echo ''
247     #- 1) just compile ("-nr"), using "-j 4" to speed up
248     echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
249     -j 4 -nr -odir ${dNam}-$sfx
250     ./testreport $options -of $OPTFILE -skd "$skipExp" \
251     -j 4 -nr -odir ${dNam}-$sfx
252     options="$options -q"
253     fi
254    
255     echo ''
256     #- 2) run and report results ; also finish to compile those who failed with "-j"
257     echo ./testreport $options -of $OPTFILE -skd \'$skipExp\' \
258     -odir ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
259     ./testreport $options -of $OPTFILE -skd "$skipExp" \
260     -odir ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
261 jmc 1.22 retVal=$?
262 jmc 1.23 $HERE/${dNam}/backup_outp tr_${dNam}-${sfx} $OUTP/backup
263 jmc 1.21
264 jmc 1.22 if test "x$retVal" != x0 ; then
265     echo "<== testreport returned retVal=${retVal}, expecting 0"
266     echo " -> skip restart test 'do_tst_2+2'"
267     else
268 jmc 1.21 echo ''
269     #- 3) test restart and report results
270     echo ../tools/do_tst_2+2 -mpi \
271     -o ${dNam}-$sfx -send \'$SEND\' -sd $SavD -a jm_c@mitgcm.org
272     ../tools/do_tst_2+2 -mpi \
273     -o ${dNam}-$sfx -send "$SEND" -sd $SavD -a jm_c@mitgcm.org
274 jmc 1.23 $HERE/${dNam}/backup_outp rs_${dNam}-${sfx} $OUTP/backup
275 jmc 1.22 fi

  ViewVC Help
Powered by ViewVC 1.1.22