/[MITgcm]/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport
ViewVC logotype

Annotation of /MITgcm_contrib/test_scripts/sxace/mitgcmtestreport

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (hide annotations) (download)
Thu Feb 8 15:50:19 2018 UTC (7 years, 5 months ago) by mlosch
Branch: MAIN
Changes since 1.3: +58 -17 lines
first attempt of running testreport from git repository on stan

1 mlosch 1.1 #!/bin/bash
2     # new script for running testreport on stan1.awi.de
3     # - split the testreport into 3 steps:
4     # 1/ compiling on head node (tx7.awi.de), with -norun option
5     # 2/ running on compute node (using PBS qsub), with -runonly option
6     # 3/ evaluating result on head node with -runonly option
7     #
8     # Notes:
9     # - step 2 leads to many error messages, because the OS on the compute
10     # nodes does not have the appropriate shell tools, modifying the
11     # runonly option to skip the evalution step would be nice but not
12     # necessary; you'll just have to live with the error messages
13     # - step 3 assumes that all experiments have been run successfully, i.e.
14     # that the output files are up-to-date.
15     # if not, testreport will try to run the sx ace-code on the tx7 frontend
16     # which will fail inevitably and produce more errors, maybe we can
17     # have a flag that skips everything but the evaluation step to avoid this
18 mlosch 1.4 # $Header: /u/gcmpack/MITgcm_contrib/test_scripts/sxace/mitgcmtestreport,v 1.3 2018/01/09 10:11:43 mlosch Exp $
19 mlosch 1.1 # $Name: $
20    
21     # for some reason the module command is not available in a bash script on
22     # this computer so we have to create it here
23     #module () { eval `/usr/bin/modulecmd bash $*` ; }
24     # alternatively we can source this script that contains all relevant
25     # definitions
26     source /usr/share/Modules/init/bash
27     #module use --append /sx8/user2/awisoft/modulefiles
28     # load latest compilers:
29     #module load sxf90/460
30     #module load sxc++/094
31     module load sxf90
32     module load sxc++
33     module load sxmpi
34     module load sxnetcdf
35     #
36     # make sure that we have qsub and qstat
37     #export PATH=${PATH}:/usr/bin/nqsII
38     source /etc/profile.d/nec.sh
39     #
40     VENDOR=sxf90
41     RUNIT="runit_"$VENDOR
42     HERE=`pwd`
43     EXE='mpirun -np TR_NPROC ./mitgcmuv'
44     NPROCS=2
45     MPI="-MPI $NPROCS"
46     OUTFILE=$HOME/out_${VENDOR}
47     MYOUTPUT=$HOME/testreport_${VENDOR}
48     OUTFILE=out_${VENDOR}
49     JOBNAME=test_ace
50     JOBSCRIPT=job_${VENDOR}
51     selectexperiment='-t exp2'
52     selectexperiment=''
53     # download code into this directory
54 mlosch 1.2 TDIR=/ace/user/mlosch/tmp_$VENDOR
55 mlosch 1.4 gcmDIR=MITgcm
56     git_repo='MITgcm'
57     git_code='MITgcm'
58 mlosch 1.1
59     OPTFILE=../tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
60 mlosch 1.2 #OPTFILE=/home/ace/mlosch/MITgcm/tools/build_options/SUPER-UX_SX-ACE_sxf90_awi
61 mlosch 1.1
62     RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment -small_f"
63     #
64     # create batch script
65     #
66     cat << EOF > $HERE/$JOBSCRIPT
67 mlosch 1.2 #PBS -q ace-r # job queue
68 mlosch 1.1 #PBS -N $JOBNAME # give the job a name
69     #PBS -l cpunum_job=$NPROCS # cpus per node
70 mlosch 1.2 #PBS -l elapstim_req=2:00:00
71 mlosch 1.1 #PBS -l cputim_job=2:00:00 # time limit
72     #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files
73     #PBS -j o # join i/o
74     #PBS -S /bin/sh
75     #PBS -o $OUTFILE # o Where to write output
76     #
77    
78     cd \${PBS_O_WORKDIR}
79     $RUNTESTREPORT -runonly -command "$EXE" >> $MYOUTPUT 2>&1
80    
81     EOF
82    
83     # clean up old testreport output
84     if [ -e $MYOUTPUT ]; then
85     rm -rf $MYOUTPUT
86     fi
87     if [ -e $OUTFILE ]; then
88     rm -r $OUTFILE
89     fi
90 mlosch 1.4
91     # checkOut determines how much checking out is being done
92     # checkOut = 3: new clone from GitHub and make a new copy
93     # checkOut = 2: update (git pull) existing repo and make a new copy
94     # checkOut = 1: skip update
95     # checkOut = 0: use existing test code (if available otherwise switch to 1)
96    
97     checkOut=2
98    
99     tmpFil=$TDIR/error.out
100     if [ $checkOut -le 1 ] ; then
101     if test -e $TDIR/${gcmDIR}/doc ; then
102     echo $TDIR/${gcmDIR}/doc 'exist'
103     else
104     echo -n $TDIR/${gcmDIR} 'missing ; '
105     checkOut=2
106     echo "will make a new copy ( checkOut=$checkOut )"
107     fi
108     fi
109    
110     if [ $checkOut -ge 2 ] ; then
111     #---- cleaning:
112     cd $TDIR
113    
114     #---- Make a new clone or update existing one:
115     if test -e ${gcmDIR}/.git/config ; then
116     echo "${gcmDIR}/.git/config exist"
117     else
118     echo -n "${gcmDIR}/.git/config 'missing, "
119     checkOut=3
120     echo "will get new clone ( checkOut=$checkOut )"
121     fi
122     if [ $checkOut -eq 3 ] ; then
123     echo -n "Removing old clone: $TDIR/${gcmDIR} ..."
124     test -e $TDIR/${gcmDIR} && rm -rf $TDIR/${gcmDIR}
125     echo " done"
126     echo -n "Make a new clone of $git_code from repo: $git_repo ..."
127     git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
128     retVal=$?
129     if test $retVal = 0 ; then
130     echo ' --> done!'
131     rm -f $tmpFil
132     else
133     echo " Error: 'git clone' returned: $retVal"
134     cat $tmpFil
135     rm -f $tmpFil
136     exit 2
137 mlosch 1.1 fi
138 mlosch 1.4 else
139     echo "Updating current clone ( $git_code ) ..."
140     ( cd ${gcmDIR}; git checkout master ; git pull )
141     echo ' --> done!'
142     fi
143 mlosch 1.1 else
144 mlosch 1.4 cd $TDIR
145 mlosch 1.1 fi
146    
147     cd $TDIR/MITgcm/verification
148    
149     # make sure that we do not use the cross compiler for testreport
150     unset CC
151     # make sure that do use the cross compiler for testreport
152     #export CC=sxcc
153    
154     $RUNTESTREPORT -j 8 -norun > $MYOUTPUT 2>&1
155    
156     if [ "$?" != "0" ]
157     then
158     echo "something wrong with testreport"
159     echo "keeping the working directory"
160     #else
161     # echo "check restarts"
162     # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE
163     # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE
164     # everything OK: delete working directory
165     # rm -rf $TDIR
166     fi
167    
168     if [ ! -e $MYOUTPUT ]
169     then
170     touch $MYOUTPUT
171     fi
172    
173     echo " " >> $MYOUTPUT
174     echo "***********************************************************" >> $MYOUTPUT
175     echo "Submitting this job script:" >> $MYOUTPUT
176     echo "***********************************************************" >> $MYOUTPUT
177     cat $HERE/$JOBSCRIPT >> $MYOUTPUT
178     echo "***********************************************************" >> $MYOUTPUT
179     echo "end of job script" >> $MYOUTPUT
180     echo "***********************************************************" >> $MYOUTPUT
181     echo " " >> $MYOUTPUT
182    
183     # now submit the job that actually runs all the experiments in one go
184 mlosch 1.4 echo "qsub $HERE/$JOBSCRIPT"
185 mlosch 1.1 qsub $HERE/$JOBSCRIPT
186     # keep looking for the job in the job queues and wait until has disappeared
187     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
188     while [ "${jobruns}"x != x ]
189     do
190     sleep 200
191     jobruns=`qstat -n -u mlosch | grep "$JOBNAME"`
192     echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
193     currentexp=`grep Experiment $MYOUTPUT | tail -1`
194     echo "currently running $currentexp"
195     done
196    
197     # after running the experiments on the compute node run testreport
198     # for a third time to evaluate results on the head node again
199     echo " " >> $MYOUTPUT
200     echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT
201     echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT
202     #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1
203     $RUNTESTREPORT -match 10 -runonly \
204 mlosch 1.3 -a "jm_c@mitgcm.org" >> $MYOUTPUT 2>&1
205     # -a "jm_c@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1
206 mlosch 1.1
207     echo "end of mitgcmtestreport"

  ViewVC Help
Powered by ViewVC 1.1.22