/[MITgcm]/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/sxace/mitgcmtestreport

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.6 - (hide annotations) (download)
Tue Mar 24 07:40:41 2020 UTC (5 years, 3 months ago) by mlosch
Branch: MAIN
CVS Tags: HEAD
Changes since 1.5: +27 -24 lines
some tweaking, load new git module and use this git, add braces to MYOUTPUT

1 mlosch 1.1 #!/bin/bash
2     # new script for running testreport on stan1.awi.de
3     # - split the testreport into 3 steps:
4     # 1/ compiling on head node (tx7.awi.de), with -norun option
5     # 2/ running on compute node (using PBS qsub), with -runonly option
6     # 3/ evaluating result on head node with -runonly option
7 mlosch 1.5 #
8     # Notes:
9 mlosch 1.1 # - step 2 leads to many error messages, because the OS on the compute
10     # nodes does not have the appropriate shell tools, modifying the
11     # runonly option to skip the evalution step would be nice but not
12     # necessary; you'll just have to live with the error messages
13     # - step 3 assumes that all experiments have been run successfully, i.e.
14     # that the output files are up-to-date.
15     # if not, testreport will try to run the sx ace-code on the tx7 frontend
16     # which will fail inevitably and produce more errors, maybe we can
17     # have a flag that skips everything but the evaluation step to avoid this
18 mlosch 1.6 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport,v 1.5 2020/03/23 12:17:57 mlosch Exp $
19 mlosch 1.1 # $Name: $
20    
21     # for some reason the module command is not available in a bash script on
22     # this computer so we have to create it here
23     #module () { eval `/usr/bin/modulecmd bash $*` ; }
24     # alternatively we can source this script that contains all relevant
25     # definitions
26     source /usr/share/Modules/init/bash
27     #module use --append /sx8/user2/awisoft/modulefiles
28     # load latest compilers:
29     #module load sxf90/460
30     #module load sxc++/094
31     module load sxf90
32     module load sxc++
33     module load sxmpi
34     module load sxnetcdf
35     #
36 mlosch 1.6 module load git
37     #
38 mlosch 1.1 # make sure that we have qsub and qstat
39     #export PATH=${PATH}:/usr/bin/nqsII
40     source /etc/profile.d/nec.sh
41     #
42     VENDOR=sxf90
43     RUNIT="runit_"$VENDOR
44     HERE=`pwd`
45     EXE='mpirun -np TR_NPROC ./mitgcmuv'
46     NPROCS=2
47     MPI="-MPI $NPROCS"
48     OUTFILE=$HOME/out_${VENDOR}
49     MYOUTPUT=$HOME/testreport_${VENDOR}
50     OUTFILE=out_${VENDOR}
51     JOBNAME=test_ace
52     JOBSCRIPT=job_${VENDOR}
53     selectexperiment='-t exp2'
54     selectexperiment=''
55     # download code into this directory
56 mlosch 1.2 TDIR=/ace/user/mlosch/tmp_$VENDOR
57 mlosch 1.4 gcmDIR=MITgcm
58     git_repo='MITgcm'
59     git_code='MITgcm'
60 mlosch 1.1
61     OPTFILE=../tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
62 mlosch 1.2 #OPTFILE=/home/ace/mlosch/MITgcm/tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
63 mlosch 1.1
64     RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment -small_f"
65     #
66     # create batch script
67     #
68     cat << EOF > $HERE/$JOBSCRIPT
69 mlosch 1.2 #PBS -q ace-r # job queue
70 mlosch 1.1 #PBS -N $JOBNAME # give the job a name
71     #PBS -l cpunum_job=$NPROCS # cpus per node
72 mlosch 1.2 #PBS -l elapstim_req=2:00:00
73 mlosch 1.1 #PBS -l cputim_job=2:00:00 # time limit
74     #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files
75     #PBS -j o # join i/o
76     #PBS -S /bin/sh
77     #PBS -o $OUTFILE # o Where to write output
78     #
79    
80     cd \${PBS_O_WORKDIR}
81 mlosch 1.6 $RUNTESTREPORT -runonly -command "$EXE" >> ${MYOUTPUT} 2>&1
82 mlosch 1.1
83     EOF
84    
85     # clean up old testreport output
86 mlosch 1.6 if [ -e ${MYOUTPUT} ]; then
87     rm -rf ${MYOUTPUT}
88 mlosch 1.1 fi
89     if [ -e $OUTFILE ]; then
90     rm -r $OUTFILE
91     fi
92 mlosch 1.4
93     # checkOut determines how much checking out is being done
94     # checkOut = 3: new clone from GitHub and make a new copy
95     # checkOut = 2: update (git pull) existing repo and make a new copy
96     # checkOut = 1: skip update
97     # checkOut = 0: use existing test code (if available otherwise switch to 1)
98    
99     checkOut=2
100    
101 mlosch 1.6 #gitcmd=$HOME/git/git
102     gitcmd=git
103 mlosch 1.4 tmpFil=$TDIR/error.out
104     if [ $checkOut -le 1 ] ; then
105     if test -e $TDIR/${gcmDIR}/doc ; then
106     echo $TDIR/${gcmDIR}/doc 'exist'
107     else
108     echo -n $TDIR/${gcmDIR} 'missing ; '
109     checkOut=2
110     echo "will make a new copy ( checkOut=$checkOut )"
111     fi
112     fi
113    
114     if [ $checkOut -ge 2 ] ; then
115     #---- cleaning:
116     cd $TDIR
117    
118     #---- Make a new clone or update existing one:
119     if test -e ${gcmDIR}/.git/config ; then
120     echo "${gcmDIR}/.git/config exist"
121     else
122     echo -n "${gcmDIR}/.git/config 'missing, "
123     checkOut=3
124     echo "will get new clone ( checkOut=$checkOut )"
125     fi
126     if [ $checkOut -eq 3 ] ; then
127     echo -n "Removing old clone: $TDIR/${gcmDIR} ..."
128     test -e $TDIR/${gcmDIR} && rm -rf $TDIR/${gcmDIR}
129     echo " done"
130     echo -n "Make a new clone of $git_code from repo: $git_repo ..."
131 mlosch 1.5 ${gitcmd} clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
132 mlosch 1.4 retVal=$?
133     if test $retVal = 0 ; then
134     echo ' --> done!'
135     rm -f $tmpFil
136     else
137     echo " Error: 'git clone' returned: $retVal"
138     cat $tmpFil
139     rm -f $tmpFil
140     exit 2
141 mlosch 1.1 fi
142 mlosch 1.4 else
143     echo "Updating current clone ( $git_code ) ..."
144 mlosch 1.5 ( cd ${gcmDIR}; ${gitcmd} checkout master ; ${gitcmd} pull )
145     retVal=$?
146     if test $retVal = 0 ; then
147     echo ' --> done!'
148     else
149     echo " Error: 'git pull' returned: $retVal"
150     echo " Error: 'git pull' returned: $retVal" \
151     | mail -s "Git-error on Stan" Martin.Losch@awi.de
152     exit 2
153     fi
154 mlosch 1.4 fi
155 mlosch 1.1 else
156 mlosch 1.4 cd $TDIR
157 mlosch 1.1 fi
158    
159     cd $TDIR/MITgcm/verification
160    
161     # make sure that we do not use the cross compiler for testreport
162     unset CC
163     # make sure that do use the cross compiler for testreport
164     #export CC=sxcc
165    
166 mlosch 1.6 $RUNTESTREPORT -j 8 -norun > ${MYOUTPUT} 2>&1
167 mlosch 1.1
168     if [ "$?" != "0" ]
169     then
170     echo "something wrong with testreport"
171     echo "keeping the working directory"
172     #else
173     # echo "check restarts"
174     # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE
175     # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE
176     # everything OK: delete working directory
177     # rm -rf $TDIR
178     fi
179    
180 mlosch 1.6 if [ ! -e ${MYOUTPUT} ]
181 mlosch 1.1 then
182 mlosch 1.6 touch ${MYOUTPUT}
183 mlosch 1.1 fi
184    
185 mlosch 1.6 echo " " >> ${MYOUTPUT}
186     echo "***********************************************************" >>${MYOUTPUT}
187     echo "Submitting this job script:" >> ${MYOUTPUT}
188     echo "***********************************************************" >>${MYOUTPUT}
189     cat $HERE/$JOBSCRIPT >> ${MYOUTPUT}
190     echo "***********************************************************" >>${MYOUTPUT}
191     echo "end of job script" >> ${MYOUTPUT}
192     echo "***********************************************************" >>${MYOUTPUT}
193     echo " " >> ${MYOUTPUT}
194 mlosch 1.1
195     # now submit the job that actually runs all the experiments in one go
196 mlosch 1.4 echo "qsub $HERE/$JOBSCRIPT"
197 mlosch 1.1 qsub $HERE/$JOBSCRIPT
198     # keep looking for the job in the job queues and wait until has disappeared
199     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
200     while [ "${jobruns}"x != x ]
201 mlosch 1.5 do
202 mlosch 1.1 sleep 200
203     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
204     echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
205 mlosch 1.6 currentexp=`grep Experiment ${MYOUTPUT} | tail -1`
206 mlosch 1.1 echo "currently running $currentexp"
207     done
208    
209     # after running the experiments on the compute node run testreport
210     # for a third time to evaluate results on the head node again
211 mlosch 1.6 echo " " >> ${MYOUTPUT}
212     echo "now run testreport for a final time to evaluate results:" >> ${MYOUTPUT}
213     echo "$RUNTESTREPORT -match 10 -runonly" >> ${MYOUTPUT}
214     #$RUNTESTREPORT -match 10 -runonly >> ${MYOUTPUT} 2>&1
215 mlosch 1.1 $RUNTESTREPORT -match 10 -runonly \
216 mlosch 1.6 -a "jm_c@mitgcm.org" >> ${MYOUTPUT} 2>&1
217     # -a "jm_c@mitgcm.org, Martin.Losch@awi.de" >> ${MYOUTPUT} 2>&1
218 mlosch 1.1
219     echo "end of mitgcmtestreport"

  ViewVC Help
Powered by ViewVC 1.1.22