/[MITgcm]/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport
ViewVC logotype

Contents of /MITgcm_contrib/test_scripts/sxace/mitgcmtestreport

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download)
Thu Feb 8 15:50:19 2018 UTC (7 years, 5 months ago) by mlosch
Branch: MAIN
Changes since 1.3: +58 -17 lines
first attempt of running testreport from git repository on stan

1 #!/bin/bash
2 # new script for running testreport on stan1.awi.de
3 # - split the testreport into 3 steps:
4 # 1/ compiling on head node (tx7.awi.de), with -norun option
5 # 2/ running on compute node (using PBS qsub), with -runonly option
6 # 3/ evaluating result on head node with -runonly option
7 #
8 # Notes:
9 # - step 2 leads to many error messages, because the OS on the compute
10 # nodes does not have the appropriate shell tools, modifying the
11 # runonly option to skip the evalution step would be nice but not
12 # necessary; you'll just have to live with the error messages
13 # - step 3 assumes that all experiments have been run successfully, i.e.
14 # that the output files are up-to-date.
15 # if not, testreport will try to run the sx ace-code on the tx7 frontend
16 # which will fail inevitably and produce more errors, maybe we can
17 # have a flag that skips everything but the evaluation step to avoid this
18 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport,v 1.3 2018/01/09 10:11:43 mlosch Exp $
19 # $Name: $
20
21 # for some reason the module command is not available in a bash script on
22 # this computer so we have to create it here
23 #module () { eval `/usr/bin/modulecmd bash $*` ; }
24 # alternatively we can source this script that contains all relevant
25 # definitions
26 source /usr/share/Modules/init/bash
27 #module use --append /sx8/user2/awisoft/modulefiles
28 # load latest compilers:
29 #module load sxf90/460
30 #module load sxc++/094
31 module load sxf90
32 module load sxc++
33 module load sxmpi
34 module load sxnetcdf
35 #
36 # make sure that we have qsub and qstat
37 #export PATH=${PATH}:/usr/bin/nqsII
38 source /etc/profile.d/nec.sh
39 #
40 VENDOR=sxf90
41 RUNIT="runit_"$VENDOR
42 HERE=`pwd`
43 EXE='mpirun -np TR_NPROC ./mitgcmuv'
44 NPROCS=2
45 MPI="-MPI $NPROCS"
46 OUTFILE=$HOME/out_${VENDOR}
47 MYOUTPUT=$HOME/testreport_${VENDOR}
48 OUTFILE=out_${VENDOR}
49 JOBNAME=test_ace
50 JOBSCRIPT=job_${VENDOR}
51 selectexperiment='-t exp2'
52 selectexperiment=''
53 # download code into this directory
54 TDIR=/ace/user/mlosch/tmp_$VENDOR
55 gcmDIR=MITgcm
56 git_repo='MITgcm'
57 git_code='MITgcm'
58
59 OPTFILE=../tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
60 #OPTFILE=/home/ace/mlosch/MITgcm/tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
61
62 RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment -small_f"
63 #
64 # create batch script
65 #
66 cat << EOF > $HERE/$JOBSCRIPT
67 #PBS -q ace-r # job queue
68 #PBS -N $JOBNAME # give the job a name
69 #PBS -l cpunum_job=$NPROCS # cpus per node
70 #PBS -l elapstim_req=2:00:00
71 #PBS -l cputim_job=2:00:00 # time limit
72 #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files
73 #PBS -j o # join i/o
74 #PBS -S /bin/sh
75 #PBS -o $OUTFILE # o Where to write output
76 #
77
78 cd \${PBS_O_WORKDIR}
79 $RUNTESTREPORT -runonly -command "$EXE" >> $MYOUTPUT 2>&1
80
81 EOF
82
83 # clean up old testreport output
84 if [ -e $MYOUTPUT ]; then
85 rm -rf $MYOUTPUT
86 fi
87 if [ -e $OUTFILE ]; then
88 rm -r $OUTFILE
89 fi
90
91 # checkOut determines how much checking out is being done
92 # checkOut = 3: new clone from GitHub and make a new copy
93 # checkOut = 2: update (git pull) existing repo and make a new copy
94 # checkOut = 1: skip update
95 # checkOut = 0: use existing test code (if available otherwise switch to 1)
96
97 checkOut=2
98
99 tmpFil=$TDIR/error.out
100 if [ $checkOut -le 1 ] ; then
101 if test -e $TDIR/${gcmDIR}/doc ; then
102 echo $TDIR/${gcmDIR}/doc 'exist'
103 else
104 echo -n $TDIR/${gcmDIR} 'missing ; '
105 checkOut=2
106 echo "will make a new copy ( checkOut=$checkOut )"
107 fi
108 fi
109
110 if [ $checkOut -ge 2 ] ; then
111 #---- cleaning:
112 cd $TDIR
113
114 #---- Make a new clone or update existing one:
115 if test -e ${gcmDIR}/.git/config ; then
116 echo "${gcmDIR}/.git/config exist"
117 else
118 echo -n "${gcmDIR}/.git/config 'missing, "
119 checkOut=3
120 echo "will get new clone ( checkOut=$checkOut )"
121 fi
122 if [ $checkOut -eq 3 ] ; then
123 echo -n "Removing old clone: $TDIR/${gcmDIR} ..."
124 test -e $TDIR/${gcmDIR} && rm -rf $TDIR/${gcmDIR}
125 echo " done"
126 echo -n "Make a new clone of $git_code from repo: $git_repo ..."
127 git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
128 retVal=$?
129 if test $retVal = 0 ; then
130 echo ' --> done!'
131 rm -f $tmpFil
132 else
133 echo " Error: 'git clone' returned: $retVal"
134 cat $tmpFil
135 rm -f $tmpFil
136 exit 2
137 fi
138 else
139 echo "Updating current clone ( $git_code ) ..."
140 ( cd ${gcmDIR}; git checkout master ; git pull )
141 echo ' --> done!'
142 fi
143 else
144 cd $TDIR
145 fi
146
147 cd $TDIR/MITgcm/verification
148
149 # make sure that we do not use the cross compiler for testreport
150 unset CC
151 # make sure that do use the cross compiler for testreport
152 #export CC=sxcc
153
154 $RUNTESTREPORT -j 8 -norun > $MYOUTPUT 2>&1
155
156 if [ "$?" != "0" ]
157 then
158 echo "something wrong with testreport"
159 echo "keeping the working directory"
160 #else
161 # echo "check restarts"
162 # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE
163 # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE
164 # everything OK: delete working directory
165 # rm -rf $TDIR
166 fi
167
168 if [ ! -e $MYOUTPUT ]
169 then
170 touch $MYOUTPUT
171 fi
172
173 echo " " >> $MYOUTPUT
174 echo "***********************************************************" >> $MYOUTPUT
175 echo "Submitting this job script:" >> $MYOUTPUT
176 echo "***********************************************************" >> $MYOUTPUT
177 cat $HERE/$JOBSCRIPT >> $MYOUTPUT
178 echo "***********************************************************" >> $MYOUTPUT
179 echo "end of job script" >> $MYOUTPUT
180 echo "***********************************************************" >> $MYOUTPUT
181 echo " " >> $MYOUTPUT
182
183 # now submit the job that actually runs all the experiments in one go
184 echo "qsub $HERE/$JOBSCRIPT"
185 qsub $HERE/$JOBSCRIPT
186 # keep looking for the job in the job queues and wait until has disappeared
187 jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
188 while [ "${jobruns}"x != x ]
189 do
190 sleep 200
191 jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
192 echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
193 currentexp=`grep Experiment $MYOUTPUT | tail -1`
194 echo "currently running $currentexp"
195 done
196
197 # after running the experiments on the compute node run testreport
198 # for a third time to evaluate results on the head node again
199 echo " " >> $MYOUTPUT
200 echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT
201 echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT
202 #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1
203 $RUNTESTREPORT -match 10 -runonly \
204 -a "jm_c@mitgcm.org" >> $MYOUTPUT 2>&1
205 # -a "jm_c@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1
206
207 echo "end of mitgcmtestreport"

  ViewVC Help
Powered by ViewVC 1.1.22