/[MITgcm]/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/sxace/mitgcmtestreport

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.5 - (hide annotations) (download)
Mon Mar 23 12:17:57 2020 UTC (5 years, 3 months ago) by mlosch
Branch: MAIN
Changes since 1.4: +16 -7 lines
make git sequence more robust and have git point to my local version

1 mlosch 1.1 #!/bin/bash
2     # new script for running testreport on stan1.awi.de
3     # - split the testreport into 3 steps:
4     # 1/ compiling on head node (tx7.awi.de), with -norun option
5     # 2/ running on compute node (using PBS qsub), with -runonly option
6     # 3/ evaluating result on head node with -runonly option
7 mlosch 1.5 #
8     # Notes:
9 mlosch 1.1 # - step 2 leads to many error messages, because the OS on the compute
10     # nodes does not have the appropriate shell tools, modifying the
11     # runonly option to skip the evalution step would be nice but not
12     # necessary; you'll just have to live with the error messages
13     # - step 3 assumes that all experiments have been run successfully, i.e.
14     # that the output files are up-to-date.
15     # if not, testreport will try to run the sx ace-code on the tx7 frontend
16     # which will fail inevitably and produce more errors, maybe we can
17     # have a flag that skips everything but the evaluation step to avoid this
18 mlosch 1.5 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport,v 1.4 2018/02/08 15:50:19 mlosch Exp $
19 mlosch 1.1 # $Name: $
20    
21     # for some reason the module command is not available in a bash script on
22     # this computer so we have to create it here
23     #module () { eval `/usr/bin/modulecmd bash $*` ; }
24     # alternatively we can source this script that contains all relevant
25     # definitions
26     source /usr/share/Modules/init/bash
27     #module use --append /sx8/user2/awisoft/modulefiles
28     # load latest compilers:
29     #module load sxf90/460
30     #module load sxc++/094
31     module load sxf90
32     module load sxc++
33     module load sxmpi
34     module load sxnetcdf
35     #
36     # make sure that we have qsub and qstat
37     #export PATH=${PATH}:/usr/bin/nqsII
38     source /etc/profile.d/nec.sh
39     #
40     VENDOR=sxf90
41     RUNIT="runit_"$VENDOR
42     HERE=`pwd`
43     EXE='mpirun -np TR_NPROC ./mitgcmuv'
44     NPROCS=2
45     MPI="-MPI $NPROCS"
46     OUTFILE=$HOME/out_${VENDOR}
47     MYOUTPUT=$HOME/testreport_${VENDOR}
48     OUTFILE=out_${VENDOR}
49     JOBNAME=test_ace
50     JOBSCRIPT=job_${VENDOR}
51     selectexperiment='-t exp2'
52     selectexperiment=''
53     # download code into this directory
54 mlosch 1.2 TDIR=/ace/user/mlosch/tmp_$VENDOR
55 mlosch 1.4 gcmDIR=MITgcm
56     git_repo='MITgcm'
57     git_code='MITgcm'
58 mlosch 1.1
59     OPTFILE=../tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
60 mlosch 1.2 #OPTFILE=/home/ace/mlosch/MITgcm/tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
61 mlosch 1.1
62     RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment -small_f"
63     #
64     # create batch script
65     #
66     cat << EOF > $HERE/$JOBSCRIPT
67 mlosch 1.2 #PBS -q ace-r # job queue
68 mlosch 1.1 #PBS -N $JOBNAME # give the job a name
69     #PBS -l cpunum_job=$NPROCS # cpus per node
70 mlosch 1.2 #PBS -l elapstim_req=2:00:00
71 mlosch 1.1 #PBS -l cputim_job=2:00:00 # time limit
72     #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files
73     #PBS -j o # join i/o
74     #PBS -S /bin/sh
75     #PBS -o $OUTFILE # o Where to write output
76     #
77    
78     cd \${PBS_O_WORKDIR}
79     $RUNTESTREPORT -runonly -command "$EXE" >> $MYOUTPUT 2>&1
80    
81     EOF
82    
83     # clean up old testreport output
84     if [ -e $MYOUTPUT ]; then
85     rm -rf $MYOUTPUT
86     fi
87     if [ -e $OUTFILE ]; then
88     rm -r $OUTFILE
89     fi
90 mlosch 1.4
91     # checkOut determines how much checking out is being done
92     # checkOut = 3: new clone from GitHub and make a new copy
93     # checkOut = 2: update (git pull) existing repo and make a new copy
94     # checkOut = 1: skip update
95     # checkOut = 0: use existing test code (if available otherwise switch to 1)
96    
97     checkOut=2
98    
99 mlosch 1.5 gitcmd=$HOME/git/git
100 mlosch 1.4 tmpFil=$TDIR/error.out
101     if [ $checkOut -le 1 ] ; then
102     if test -e $TDIR/${gcmDIR}/doc ; then
103     echo $TDIR/${gcmDIR}/doc 'exist'
104     else
105     echo -n $TDIR/${gcmDIR} 'missing ; '
106     checkOut=2
107     echo "will make a new copy ( checkOut=$checkOut )"
108     fi
109     fi
110    
111     if [ $checkOut -ge 2 ] ; then
112     #---- cleaning:
113     cd $TDIR
114    
115     #---- Make a new clone or update existing one:
116     if test -e ${gcmDIR}/.git/config ; then
117     echo "${gcmDIR}/.git/config exist"
118     else
119     echo -n "${gcmDIR}/.git/config 'missing, "
120     checkOut=3
121     echo "will get new clone ( checkOut=$checkOut )"
122     fi
123     if [ $checkOut -eq 3 ] ; then
124     echo -n "Removing old clone: $TDIR/${gcmDIR} ..."
125     test -e $TDIR/${gcmDIR} && rm -rf $TDIR/${gcmDIR}
126     echo " done"
127     echo -n "Make a new clone of $git_code from repo: $git_repo ..."
128 mlosch 1.5 ${gitcmd} clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
129 mlosch 1.4 retVal=$?
130     if test $retVal = 0 ; then
131     echo ' --> done!'
132     rm -f $tmpFil
133     else
134     echo " Error: 'git clone' returned: $retVal"
135     cat $tmpFil
136     rm -f $tmpFil
137     exit 2
138 mlosch 1.1 fi
139 mlosch 1.4 else
140     echo "Updating current clone ( $git_code ) ..."
141 mlosch 1.5 ( cd ${gcmDIR}; ${gitcmd} checkout master ; ${gitcmd} pull )
142     retVal=$?
143     if test $retVal = 0 ; then
144     echo ' --> done!'
145     else
146     echo " Error: 'git pull' returned: $retVal"
147     echo " Error: 'git pull' returned: $retVal" \
148     | mail -s "Git-error on Stan" Martin.Losch@awi.de
149     exit 2
150     fi
151 mlosch 1.4 fi
152 mlosch 1.1 else
153 mlosch 1.4 cd $TDIR
154 mlosch 1.1 fi
155    
156     cd $TDIR/MITgcm/verification
157    
158     # make sure that we do not use the cross compiler for testreport
159     unset CC
160     # make sure that do use the cross compiler for testreport
161     #export CC=sxcc
162    
163     $RUNTESTREPORT -j 8 -norun > $MYOUTPUT 2>&1
164    
165     if [ "$?" != "0" ]
166     then
167     echo "something wrong with testreport"
168     echo "keeping the working directory"
169     #else
170     # echo "check restarts"
171     # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE
172     # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE
173     # everything OK: delete working directory
174     # rm -rf $TDIR
175     fi
176    
177     if [ ! -e $MYOUTPUT ]
178     then
179     touch $MYOUTPUT
180     fi
181    
182     echo " " >> $MYOUTPUT
183     echo "***********************************************************" >> $MYOUTPUT
184     echo "Submitting this job script:" >> $MYOUTPUT
185     echo "***********************************************************" >> $MYOUTPUT
186     cat $HERE/$JOBSCRIPT >> $MYOUTPUT
187     echo "***********************************************************" >> $MYOUTPUT
188     echo "end of job script" >> $MYOUTPUT
189     echo "***********************************************************" >> $MYOUTPUT
190     echo " " >> $MYOUTPUT
191    
192     # now submit the job that actually runs all the experiments in one go
193 mlosch 1.4 echo "qsub $HERE/$JOBSCRIPT"
194 mlosch 1.1 qsub $HERE/$JOBSCRIPT
195     # keep looking for the job in the job queues and wait until has disappeared
196     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
197     while [ "${jobruns}"x != x ]
198 mlosch 1.5 do
199 mlosch 1.1 sleep 200
200     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
201     echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
202     currentexp=`grep Experiment $MYOUTPUT | tail -1`
203     echo "currently running $currentexp"
204     done
205    
206     # after running the experiments on the compute node run testreport
207     # for a third time to evaluate results on the head node again
208     echo " " >> $MYOUTPUT
209     echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT
210     echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT
211     #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1
212     $RUNTESTREPORT -match 10 -runonly \
213 mlosch 1.3 -a "jm_c@mitgcm.org" >> $MYOUTPUT 2>&1
214     # -a "jm_c@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1
215 mlosch 1.1
216     echo "end of mitgcmtestreport"

  ViewVC Help
Powered by ViewVC 1.1.22