#!/bin/bash # new script for running testreport on sx8.awi.de # - split the testreport into 3 steps: # 1/ compiling on head node (tx7.awi.de), with -norun option # 2/ running on compute node (using PBS qsub), with -runonly option # 3/ evaluating result on head node with -runonly option # # Notes: # - step 2 leads to many error messages, because the OS on the compute # nodes does not have the appropriate shell tools, modifying the # - runonly option to skip the evalution step would be nice but not # necessary; you'll just have to live with the error messages # - step 3 assumes that all experiments have been run successfully, i.e. # that the output files are up-to-date. # if not, testreport will try to run the sx8-code on the tx7 frontend # which will fail inevitably and produce more errors, maybe we can # have a flag that skips everything but the evaluation step to avoid this # $Header: /home/ubuntu/mnt/e9_copy/MITgcm_contrib/test_scripts/sx8/Attic/mitgcmtestreport_split,v 1.5 2015/03/02 09:13:18 mlosch Exp $ # $Name: $ # for some reason the module command is not available in a bash script on # this computer so we have to create it here module () { eval `/usr/bin/modulecmd bash $*` ; } # load latest compiler: module load sxf90/460 VENDOR=sxf90 RUNIT="runit_"$VENDOR HERE=`pwd` EXE='mpirun -np TR_NPROC ./mitgcmuv' NPROCS=2 MPI="-MPI $NPROCS" OUTFILE=/home/sx8/mlosch/out_${VENDOR} MYOUTPUT=/home/sx8/mlosch/testreport_${VENDOR} OUTFILE=out_${VENDOR} JOBNAME=testsx8 JOBSCRIPT=job_${VENDOR} #selectexperiment='-t exp2' selectexperiment='' # download code into this directory TDIR=/sx8/scr/mlosch/tmp_$VENDOR OPTFILE=../tools/build_options/SUPER-UX_SX-8_sxf90_awi RUNTESTREPORT="./testreport $MPI -of=${OPTFILE} $selectexperiment" # # create batch script # cat << EOF > $HERE/$JOBSCRIPT #PBS -q sx8-r # job queue not neccesary so far #PBS -N $JOBNAME # give the job a name #PBS -l cpunum_job=$NPROCS # cpus per node #PBS -l cputim_job=2:00:00 # time limit #PBS -l memsz_job=32gb # max accumulated memory, we need this much because of many netcdf files #PBS -j o # join i/o #PBS -S /bin/sh #PBS -o $OUTFILE # o Where to write output # cd \${PBS_O_WORKDIR} $RUNTESTREPORT -runonly -command "$EXE" >> $MYOUTPUT 2>&1 EOF # clean up old testreport output if [ -e $MYOUTPUT ]; then rm -rf $MYOUTPUT fi if [ -e $OUTFILE ]; then rm -r $OUTFILE fi if [ 0 = 0 ]; then # create directory and download code if [ -e $TDIR ]; then rm -rf $TDIR fi mkdir $TDIR cd $TDIR # cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack co MITgcm_verif_basic > cvs_co.log 2>&1 cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack co MITgcm > cvs_co.log 2>&1 if [ $status > 0 ]; then cat cvs_co.log fi else cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q update >> cvs_co.log 2>&1 if [ $status > 0 ]; then cat cvs_co.log fi fi cd $TDIR/MITgcm/verification # make sure that we do not use the cross compiler for testreport unset CC # make sure that do use the cross compiler for testreport #export CC=sxcc $RUNTESTREPORT -j 8 -norun > $MYOUTPUT 2>&1 if [ $status > 0 ] then echo "something wrong with testreport" echo "keeping the working directory" #else # echo "check restarts" # echo ../tools/do_tst_2+2 -mpi -exe \"$HERE/$RUNIT\" -a NONE # ../tools/do_tst_2+2 -mpi -exe $HERE/$RUNIT -a NONE # everything OK: delete working directory # rm -rf $TDIR fi if [ ! -e $MYOUTPUT ] then touch $MYOUTPUT fi echo " " >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo "Submitting this job script:" >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT cat $HERE/$JOBSCRIPT >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo "end of job script" >> $MYOUTPUT echo "***********************************************************" >> $MYOUTPUT echo " " >> $MYOUTPUT # now submit the job that actually runs all the experiments in one go qsub $HERE/$JOBSCRIPT # keep looking for the job in the job queues and wait until has disappeared jobruns=`qstat -n -u mlosch | grep "$JOBNAME"` while [ "${jobruns}"x != x ] do sleep 20 jobruns=`qstat -n -u mlosch | grep "$JOBNAME"` echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete" currentexp=`grep Experiment $MYOUTPUT | tail -1` echo "currently running $currentexp" done # after running the experiments on the compute node run testreport # for a third time to evaluate results on the head node again echo " " >> $MYOUTPUT echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1 $RUNTESTREPORT -match 10 -runonly \ -a "jmc@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1 echo "end of mitgcmtestreport_split"