/[MITgcm]/MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray
ViewVC logotype

Diff of /MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.1 by mlosch, Thu Jun 2 12:51:56 2016 UTC revision 1.7 by mlosch, Mon Mar 13 09:31:51 2017 UTC
# Line 1  Line 1 
1  #!/bin/bash -e  #!/bin/bash -e
2  # new script for running testreport on ollie.awi.de  # new script for running testreport on ollie.awi.de
3  # - split the testreport into 2 steps:  # - compile and run on compute nodes
4  # 1/ compiling on head node, with -norun option  # - use ssh to call mpack command from the head node ollie0
 # 2/ running on compute node (using SBATCH), with -runonly option  
5  #$Header$  #$Header$
6  #$Name$  #$Name$
7    
# Line 11  source /usr/Modules/3.2.10/init/bash Line 10  source /usr/Modules/3.2.10/init/bash
10  source /etc/profile.d/cray_pe.sh  source /etc/profile.d/cray_pe.sh
11  source /etc/profile.d/AWI.sh  source /etc/profile.d/AWI.sh
12  #  #
13    
14  module purge  module purge
 module load slurm  
15  module load craype-broadwell  module load craype-broadwell
16  module load PrgEnv-cray/1.0.0  module load PrgEnv-cray
17  module unload cudatoolkit  # to me this looks like a terrible hack, but that is what we are supposed
18  module swap mvapich2_cce cray-impi  # to use now
19  module load intel/impi-5.1.3  module unload craype-network-infiniband cudatoolkit mvapich2_cce
20  export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}:  module load craype-network-opa
21    module load mvapich2_cce
22    module load netcdf
23    #export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}:
24    # not sure why I have set these paths here again
25    export MPI_ROOT=$(dirname $(dirname `which mpicc`))
26    # $(dirname `echo $LD_LIBRARY_PATH  | awk -F: '{print $1}'`)
27    export MPI_INC_DIR=${MPI_ROOT}/include
28    
29    # there is no slurm module anymore and this is the current recommendation to
30    # have sbatch in your path (rather than running /etc/profile.d./slurm.sh)
31    export PATH=${PATH}:/global/opt/slurm/default/bin
32    
33  dNam='ollie'  dNam='ollie'
34  TST_DIR="/work/ollie/mlosch/test_$dNam"  TST_DIR="/work/ollie/mlosch/test_$dNam"
# Line 26  echo "start from TST_DIR='$TST_DIR' at: Line 36  echo "start from TST_DIR='$TST_DIR' at:
36    
37  umask 0022  umask 0022
38    
39  sfx='ftn'  sfx='cray'
40  RUNIT="runit_"$sfx  RUNIT="runit_"$sfx
41    
42  OPTFILE=../tools/build_options/linux_ia64_cray_ollie  OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie
43  options="-MPI 6"  options="-MPI 6"
44  options="$options -odir $dNam"  options="$options -odir ${dNam}-c"
 options="$options -devel"  
 #options="$options -j 2"  
45  #options="$options -j 6"  #options="$options -j 6"
46  #options="$options -t global_ocean.90x40x15"  #options="$options -t global_ocean.cs32x15"
47    
48  #EXE='mpiexec.hydra -bootstrap slurm -n TR_NPROC ./mitgcmuv'  #EXE='mpiexec.hydra -bootstrap slurm -n TR_NPROC ./mitgcmuv'
49  EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'  EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'
50    
# Line 51  if [ -e $HERE ]; then Line 60  if [ -e $HERE ]; then
60  else  else
61      mkdir $HERE      mkdir $HERE
62  fi  fi
63  OUTFILE=$HERE/testreport.out  OUTFILE=$HERE/slurm_${sfx}.out
64  MYOUTPUT=$HERE/out_$sfx  MYOUTPUT=$HERE/out_$sfx
65  if [ -e $MYOUTPUT ]; then  if [ -e $MYOUTPUT ]; then
66    rm -rf $MYOUTPUT    rm -rf $MYOUTPUT
# Line 60  if [ -e $OUTFILE ]; then Line 69  if [ -e $OUTFILE ]; then
69    rm -r $OUTFILE    rm -r $OUTFILE
70  fi  fi
71  gcmDIR="MITgcm_$sfx"  gcmDIR="MITgcm_$sfx"
 #gcmDIR="MITgcm"  
72  checkOut=1  checkOut=1
73  if [ $checkOut -eq 1 ] ; then  if [ $checkOut -eq 1 ] ; then
74   if test -d $gcmDIR/CVS ; then   if test -d $gcmDIR/CVS ; then
# Line 90  fi Line 98  fi
98    
99  cd ${gcmDIR}/verification  cd ${gcmDIR}/verification
100    
101  runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\""  cwd=\`pwd\`
102    SENDCMD="ssh ollie0 ${TST_DIR}/${gcmDIR}/tools/mpack-1.6/mpack"
103    runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\" -send \"${SENDCMD}\" -sd ${cwd}"
104    
105  if [ ! -e $MYOUTPUT ]  if [ ! -e $MYOUTPUT ]
106      then      then
107      touch $MYOUTPUT      touch $MYOUTPUT
108  fi  fi
109    
110  echo "running testreport like this:"  # echo "running testreport like this:"
111  echo ${runtestreport} -norun  # echo ${runtestreport} -norun
112  eval "${runtestreport} -norun >> $MYOUTPUT 2>&1"  # eval "${runtestreport} -norun >> $MYOUTPUT 2>&1"
113    
114  # create batch script  # create batch script
115  #  #
116  JOBNAME=tstoll  JOBNAME=tst$sfx
117  echo "creating batch script $HERE/$RUNIT"  echo "creating batch script $HERE/$RUNIT"
118  cat << EOF >| $HERE/$RUNIT  cat << EOF >| $HERE/$RUNIT
119  #!/bin/bash -x  #!/bin/bash
120  #SBATCH --job-name=${JOBNAME}  #SBATCH --job-name=${JOBNAME}
121  #SBATCH  -o ${OUTFILE}  #SBATCH  -o ${OUTFILE}
122  #SBATCH --time=3:00:00  #SBATCH --time=12:00:00
123  #SBATCH --ntasks=6  #SBATCH --ntasks=6
 #SBATCH  -p smp  
 #SBATCH --qos=short  
124    
125  ulimit -s 1048576  # still need this to be able to run a few experiments:
126    # cfc_example, fizhi, tutorial_deep_convection
127    ulimit -s unlimited
128    
129    # # for debugging
130    # export FLEXLM_DIAGNOSTICS=2
131    # export FNP_IP_ENV=1
132    # export LM_A_CONN_TIMEOUT=99
133    
134  cd \${SLURM_SUBMIT_DIR}  cd \${SLURM_SUBMIT_DIR}
135    
136  echo "running testreport like this:"  cwd=`pwd`
137  echo "${runtestreport} -runonly"  echo "running testreport like this in \${cwd}:"
138  #${runtestreport} -runonly -match 10 -a 'Martin.Losch@awi.de' >> $MYOUTPUT  echo "${runtestreport} -devel -match 10"
139  ${runtestreport} -runonly -match 10 -a 'jmc@mitcm.org' >> $MYOUTPUT  ${runtestreport} -devel -match 10 -a jmc@mitgcm.org >> $MYOUTPUT 2>&1
140    
141    echo "running testreport like this in \${cwd}:"
142    echo "./testreport -clean"
143    ./testreport -clean
144    
145    echo "running testreport like this in \${cwd}:"
146    echo "${runtestreport} -fast -match 10"
147    ${runtestreport} -fast -match 10 -a jmc@mitgcm.org >> $MYOUTPUT 2>&1
148    
149  EOF  EOF
150    
# Line 139  echo " " >> $MYOUTPUT Line 162  echo " " >> $MYOUTPUT
162    
163  sbatch $HERE/$RUNIT  sbatch $HERE/$RUNIT
164    
165  # keep looking for the job in the job queues and wait until has disappeared  # # keep looking for the job in the job queues and wait until it has disappeared
166  jobruns=somedummy  # jobruns=somedummy
167  while [ "${jobruns}"x != x ]  # while [ "${jobruns}"x != x ]
168  do  # do
169    sleep 20  #   sleep 200
170    jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`  #   jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`
171    echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"  #   echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
172    currentexp=`grep Experiment $MYOUTPUT | tail -1`  #   currentexp=`grep Experiment $MYOUTPUT | tail -1`
173    echo "currently running $currentexp"  #   echo "currently running $currentexp"
174  done  # done
175    
176    # # workaround for mailing the stuff
177    # echo "mail the stuff"
178    
 # # after running the experiments send email to jmc (cannot be done from  
 # # the compute node, yet)  
179  # MPACKCMD=../tools/mpack-1.6/mpack  # MPACKCMD=../tools/mpack-1.6/mpack
180  # fn=`ls -dtr tr_ollie* | grep -v tar.gz | tail -1`  # fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1`
181  # echo "fn $fn"  # echo "fname ${fn}"
182  # tar cf - $fn | gzip > "${fn}.tar.gz"  # tar cf - $fn | gzip > "${fn}.tar.gz"
183  # ../tools/mpack-1.6/mpack -s MITgcm-test -m 3555000 ${fn}.tar.gz mlosch@awi.de  # $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org
184  # sleep 2  # sleep 2
185  # \rm -f ${fn}.tar.gz  # rm -rf "${fn}.tar.gz"
 # ../tools/mpack-1.6/mpack -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org  
   
 # after running the experiments on the compute node run testreport  
 # for a third time to evaluate results on the head node again  
 # echo " " >> $MYOUTPUT  
 # echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT  
 # echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT  
 # #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1  
 # $RUNTESTREPORT -match 10 -runonly \  
 #     -a "jmc@mitgcm.org" >> $MYOUTPUT 2>&1  
 # #   -a "jmc@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1  
   
 # workaround for mailing the stuff  
 echo "mail the stuff"  
 # 1. set name of remote host where to do the mpack command  
 # 2. pack directory into an archive an compress it  
 # 3. copy gzipped archive to remote host  
 # 4. on the remote host execute the mpack command, that send the email  
 # 5. wait a little, just to be sure everything is done  
 # 6. remove archives  
 MPACKCMD=\${HOME}/bin/mpack  
 rmhost=rayl4.awi.de  
 pwd  
 fn=`ls -dtr tr_ollie* | grep -v tar.gz | tail -1`  
 echo "fname ${fn}"  
 tar cf - $fn | gzip > "${fn}.tar.gz"  
 eval "scp ${fn}.tar.gz ${rmhost}:"  
 #ssh -Y $rmhost "$MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz mlosch@awi.de"  
 ssh -Y $rmhost "$MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org"  
 sleep 2  
 ssh -Y $rmhost "rm -f ${fn}.tar.gz"  
 rm -rf "${fn}.tar.gz"  
 # end workaround for mailing the stuff  
186    
187  echo "end of mitgcmtestreport"  echo "end of mitgcmtestreport"
188    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

  ViewVC Help
Powered by ViewVC 1.1.22