/[MITgcm]/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport
ViewVC logotype

Contents of /MITgcm_contrib/test_scripts/sxace/mitgcmtestreport

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (show annotations) (download)
Mon Mar 23 12:17:57 2020 UTC (5 years, 3 months ago) by mlosch
Branch: MAIN
Changes since 1.4: +16 -7 lines
make git sequence more robust and have git point to my local version

1 #!/bin/bash
2 # new script for running testreport on stan1.awi.de
3 # - split the testreport into 3 steps:
4 # 1/ compiling on head node (tx7.awi.de), with -norun option
5 # 2/ running on compute node (using PBS qsub), with -runonly option
6 # 3/ evaluating result on head node with -runonly option
7 #
8 # Notes:
9 # - step 2 leads to many error messages, because the OS on the compute
10 # nodes does not have the appropriate shell tools, modifying the
11 # runonly option to skip the evalution step would be nice but not
12 # necessary; you'll just have to live with the error messages
13 # - step 3 assumes that all experiments have been run successfully, i.e.
14 # that the output files are up-to-date.
15 # if not, testreport will try to run the sx ace-code on the tx7 frontend
16 # which will fail inevitably and produce more errors, maybe we can
17 # have a flag that skips everything but the evaluation step to avoid this
18 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport,v 1.4 2018/02/08 15:50:19 mlosch Exp $
19 # $Name: $
20
21 # for some reason the module command is not available in a bash script on
22 # this computer so we have to create it here
23 #module () { eval `/usr/bin/modulecmd bash $*` ; }
24 # alternatively we can source this script that contains all relevant
25 # definitions
26 source /usr/share/Modules/init/bash
27 #module use --append /sx8/user2/awisoft/modulefiles
28 # load latest compilers:
29 #module load sxf90/460
30 #module load sxc++/094
31 module load sxf90
32 module load sxc++
33 module load sxmpi
34 module load sxnetcdf
35 #
36 # make sure that we have qsub and qstat
37 #export PATH=${PATH}:/usr/bin/nqsII
38 source /etc/profile.d/nec.sh
39 #
40 VENDOR=sxf90
41 RUNIT="runit_"$VENDOR
42 HERE=`pwd`
43 EXE='mpirun -np TR_NPROC ./mitgcmuv'
44 NPROCS=2
45 MPI="-MPI $NPROCS"
46 OUTFILE=$HOME/out_${VENDOR}
47 MYOUTPUT=$HOME/testreport_${VENDOR}
48 OUTFILE=out_${VENDOR}
49 JOBNAME=test_ace
50 JOBSCRIPT=job_${VENDOR}
51 selectexperiment='-t exp2'
52 selectexperiment=''
53 # download code into this directory
54 TDIR=/ace/user/mlosch/tmp_$VENDOR
55 gcmDIR=MITgcm
56 git_repo='MITgcm'
57 git_code='MITgcm'
58
59 OPTFILE=../tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
60 #OPTFILE=/home/ace/mlosch/MITgcm/tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
61
62 RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment -small_f"
63 #
64 # create batch script
65 #
66 cat << EOF > $HERE/$JOBSCRIPT
67 #PBS -q ace-r # job queue
68 #PBS -N $JOBNAME # give the job a name
69 #PBS -l cpunum_job=$NPROCS # cpus per node
70 #PBS -l elapstim_req=2:00:00
71 #PBS -l cputim_job=2:00:00 # time limit
72 #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files
73 #PBS -j o # join i/o
74 #PBS -S /bin/sh
75 #PBS -o $OUTFILE # o Where to write output
76 #
77
78 cd \${PBS_O_WORKDIR}
79 $RUNTESTREPORT -runonly -command "$EXE" >> $MYOUTPUT 2>&1
80
81 EOF
82
83 # clean up old testreport output
84 if [ -e $MYOUTPUT ]; then
85 rm -rf $MYOUTPUT
86 fi
87 if [ -e $OUTFILE ]; then
88 rm -r $OUTFILE
89 fi
90
91 # checkOut determines how much checking out is being done
92 # checkOut = 3: new clone from GitHub and make a new copy
93 # checkOut = 2: update (git pull) existing repo and make a new copy
94 # checkOut = 1: skip update
95 # checkOut = 0: use existing test code (if available otherwise switch to 1)
96
97 checkOut=2
98
99 gitcmd=$HOME/git/git
100 tmpFil=$TDIR/error.out
101 if [ $checkOut -le 1 ] ; then
102 if test -e $TDIR/${gcmDIR}/doc ; then
103 echo $TDIR/${gcmDIR}/doc 'exist'
104 else
105 echo -n $TDIR/${gcmDIR} 'missing ; '
106 checkOut=2
107 echo "will make a new copy ( checkOut=$checkOut )"
108 fi
109 fi
110
111 if [ $checkOut -ge 2 ] ; then
112 #---- cleaning:
113 cd $TDIR
114
115 #---- Make a new clone or update existing one:
116 if test -e ${gcmDIR}/.git/config ; then
117 echo "${gcmDIR}/.git/config exist"
118 else
119 echo -n "${gcmDIR}/.git/config 'missing, "
120 checkOut=3
121 echo "will get new clone ( checkOut=$checkOut )"
122 fi
123 if [ $checkOut -eq 3 ] ; then
124 echo -n "Removing old clone: $TDIR/${gcmDIR} ..."
125 test -e $TDIR/${gcmDIR} && rm -rf $TDIR/${gcmDIR}
126 echo " done"
127 echo -n "Make a new clone of $git_code from repo: $git_repo ..."
128 ${gitcmd} clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
129 retVal=$?
130 if test $retVal = 0 ; then
131 echo ' --> done!'
132 rm -f $tmpFil
133 else
134 echo " Error: 'git clone' returned: $retVal"
135 cat $tmpFil
136 rm -f $tmpFil
137 exit 2
138 fi
139 else
140 echo "Updating current clone ( $git_code ) ..."
141 ( cd ${gcmDIR}; ${gitcmd} checkout master ; ${gitcmd} pull )
142 retVal=$?
143 if test $retVal = 0 ; then
144 echo ' --> done!'
145 else
146 echo " Error: 'git pull' returned: $retVal"
147 echo " Error: 'git pull' returned: $retVal" \
148 | mail -s "Git-error on Stan" Martin.Losch@awi.de
149 exit 2
150 fi
151 fi
152 else
153 cd $TDIR
154 fi
155
156 cd $TDIR/MITgcm/verification
157
158 # make sure that we do not use the cross compiler for testreport
159 unset CC
160 # make sure that do use the cross compiler for testreport
161 #export CC=sxcc
162
163 $RUNTESTREPORT -j 8 -norun > $MYOUTPUT 2>&1
164
165 if [ "$?" != "0" ]
166 then
167 echo "something wrong with testreport"
168 echo "keeping the working directory"
169 #else
170 # echo "check restarts"
171 # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE
172 # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE
173 # everything OK: delete working directory
174 # rm -rf $TDIR
175 fi
176
177 if [ ! -e $MYOUTPUT ]
178 then
179 touch $MYOUTPUT
180 fi
181
182 echo " " >> $MYOUTPUT
183 echo "***********************************************************" >> $MYOUTPUT
184 echo "Submitting this job script:" >> $MYOUTPUT
185 echo "***********************************************************" >> $MYOUTPUT
186 cat $HERE/$JOBSCRIPT >> $MYOUTPUT
187 echo "***********************************************************" >> $MYOUTPUT
188 echo "end of job script" >> $MYOUTPUT
189 echo "***********************************************************" >> $MYOUTPUT
190 echo " " >> $MYOUTPUT
191
192 # now submit the job that actually runs all the experiments in one go
193 echo "qsub $HERE/$JOBSCRIPT"
194 qsub $HERE/$JOBSCRIPT
195 # keep looking for the job in the job queues and wait until has disappeared
196 jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
197 while [ "${jobruns}"x != x ]
198 do
199 sleep 200
200 jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
201 echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
202 currentexp=`grep Experiment $MYOUTPUT | tail -1`
203 echo "currently running $currentexp"
204 done
205
206 # after running the experiments on the compute node run testreport
207 # for a third time to evaluate results on the head node again
208 echo " " >> $MYOUTPUT
209 echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT
210 echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT
211 #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1
212 $RUNTESTREPORT -match 10 -runonly \
213 -a "jm_c@mitgcm.org" >> $MYOUTPUT 2>&1
214 # -a "jm_c@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1
215
216 echo "end of mitgcmtestreport"

  ViewVC Help
Powered by ViewVC 1.1.22