/[MITgcm]/MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray
ViewVC logotype

Diff of /MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.1 by mlosch, Thu Jun 2 12:51:56 2016 UTC revision 1.8 by mlosch, Tue Apr 3 12:58:37 2018 UTC
# Line 1  Line 1 
1  #!/bin/bash -e  #!/bin/bash -e
2  # new script for running testreport on ollie.awi.de  # new script for running testreport on ollie.awi.de
3  # - split the testreport into 2 steps:  # - compile and run on compute nodes
4  # 1/ compiling on head node, with -norun option  # - use ssh to call mpack command from the head node ollie0
 # 2/ running on compute node (using SBATCH), with -runonly option  
5  #$Header$  #$Header$
6  #$Name$  #$Name$
7    
# Line 12  source /etc/profile.d/cray_pe.sh Line 11  source /etc/profile.d/cray_pe.sh
11  source /etc/profile.d/AWI.sh  source /etc/profile.d/AWI.sh
12  #  #
13  module purge  module purge
 module load slurm  
14  module load craype-broadwell  module load craype-broadwell
15  module load PrgEnv-cray/1.0.0  module load PrgEnv-cray/1.0.2_test
16  module unload cudatoolkit  # module load PrgEnv-cray
17  module swap mvapich2_cce cray-impi  # to me this looks like a terrible hack, but that is what we are supposed
18  module load intel/impi-5.1.3  # to use now
19  export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}:  # module unload craype-network-infiniband cudatoolkit mvapich2_cce
20    # module load craype-network-opa
21    # module load mvapich2_cce
22    module load netcdf
23    #export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}:
24    # not sure why I have to set these paths here again
25    export MPI_ROOT=$(dirname $(dirname `which mpicc`))
26    # $(dirname `echo $LD_LIBRARY_PATH  | awk -F: '{print $1}'`)
27    export MPI_INC_DIR=${MPI_ROOT}/include
28    
29    # there is no slurm module anymore and this is the current recommendation to
30    # have sbatch in your path (rather than running /etc/profile.d./slurm.sh)
31    export PATH=${PATH}:/global/opt/slurm/default/bin
32    
33  dNam='ollie'  dNam='ollie'
34  TST_DIR="/work/ollie/mlosch/test_$dNam"  TST_DIR="/work/ollie/mlosch/test_$dNam"
# Line 26  echo "start from TST_DIR='$TST_DIR' at: Line 36  echo "start from TST_DIR='$TST_DIR' at:
36    
37  umask 0022  umask 0022
38    
39  sfx='ftn'  sfx='cray'
40  RUNIT="runit_"$sfx  RUNIT="runit_"$sfx
41    
42  OPTFILE=../tools/build_options/linux_ia64_cray_ollie  OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie
43  options="-MPI 6"  options="-MPI 6"
44  options="$options -odir $dNam"  options="$options -odir ${dNam}-c"
45  options="$options -devel"  options="$options -j 6"
46  #options="$options -j 2"  #options="$options -t global_ocean.cs32x15"
47  #options="$options -j 6"  
48  #options="$options -t global_ocean.90x40x15"  #EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'
49  #EXE='mpiexec.hydra -bootstrap slurm -n TR_NPROC ./mitgcmuv'  #EXE='srun -n TR_NPROC ./mitgcmuv'
50  EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'  EXE='srun -n TR_NPROC --cpu_bind=cores ./mitgcmuv'
51    
52  if [ -e $TST_DIR ]; then  if [ -e $TST_DIR ]; then
53      echo "$TST_DIR exists"      echo "$TST_DIR exists"
# Line 51  if [ -e $HERE ]; then Line 61  if [ -e $HERE ]; then
61  else  else
62      mkdir $HERE      mkdir $HERE
63  fi  fi
64  OUTFILE=$HERE/testreport.out  OUTFILE=$HERE/slurm_${sfx}.out
65  MYOUTPUT=$HERE/out_$sfx  MYOUTPUT=$HERE/out_$sfx
66  if [ -e $MYOUTPUT ]; then  if [ -e $MYOUTPUT ]; then
67    rm -rf $MYOUTPUT    rm -rf $MYOUTPUT
# Line 59  fi Line 69  fi
69  if [ -e $OUTFILE ]; then  if [ -e $OUTFILE ]; then
70    rm -r $OUTFILE    rm -r $OUTFILE
71  fi  fi
72  gcmDIR="MITgcm_$sfx"  gcmDIR="MITgcm_${sfx}"
73  #gcmDIR="MITgcm"  git_repo='MITgcm'
74  checkOut=1  git_code='MITgcm'
75  if [ $checkOut -eq 1 ] ; then  
76   if test -d $gcmDIR/CVS ; then  # checkOut determines how much checking out is being done
77    /bin/rm -rf $gcmDIR/verification/??_${dNam}_????????_?  # checkOut = 3: new clone from GitHub and make a new copy
78    ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )  # checkOut = 2: update (git pull) existing repo and make a new copy
79    echo "cvs update of dir $gcmDIR :"  # checkOut = 1: skip update
80    ( cd $gcmDIR ; cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d ) 2>&1  # checkOut = 0: use existing test code (if available otherwise switch to 1)
81    RETVAL=$?  
82    if test "x$RETVAL" != x0 ; then  checkOut=2
83     echo "cvs update on '"`hostname`"' failed (return val=$RETVAL) => exit"  tdir=${TST_DIR}
84     exit  today=`date +%Y%m%d`
85    TODAY=`date +%d`
86    #tmpFil="/tmp/"`basename $0`".$$"
87    tmpFil=$TST_DIR/error.out
88    
89    if [ $checkOut -le 1 ] ; then
90      if test -e $TST_DIR/${gcmDIR}/doc ; then
91        echo $TST_DIR/${gcmDIR}/doc 'exist'
92      else
93        echo -n $TST_DIR/${gcmDIR} 'missing ; '
94        checkOut=2
95        echo "will make a new copy ( checkOut=$checkOut )"
96    fi    fi
97   else  fi
98    echo "no dir: $gcmDIR/CVS => try a fresh check-out"  
99    checkOut=2  if [ $checkOut -ge 2 ] ; then
100   fi    #---- cleaning:
101  fi    cd $TST_DIR
102  if [ $checkOut -eq 2 ] ; then  
103    if test -e $gcmDIR ; then    #---- Make a new clone or update existing one:
104     rm -rf $gcmDIR    if test -e ${gcmDIR}/.git/config ; then
105        echo "${gcmDIR}/.git/config exist"
106      else
107        echo -n "${gcmDIR}/.git/config 'missing "
108        checkOut=3
109        echo "will get new clone ( checkOut=$checkOut )"
110      fi
111      if [ $checkOut -eq 3 ] ; then
112        echo -n "Removing old clone: $TST_DIR/${gcmDIR} ..."
113        test -e $TST_DIR/${gcmDIR}  &&  rm -rf $TST_DIR/${gcmDIR}
114        echo "  done"
115        echo -n "Make a new clone of $git_code from repo: $git_repo ..."
116        git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
117        retVal=$?
118        if test $retVal = 0 ; then
119           echo ' --> done!'
120           rm -f $tmpFil
121        else
122           echo " Error: 'git clone' returned: $retVal"
123           cat $tmpFil
124           rm -f $tmpFil
125           exit 2
126        fi
127      else
128        echo "Updating current clone ( $git_code ) ..."
129        ( cd ${gcmDIR}; git checkout master ; git pull )
130        echo ' --> done!'
131    fi    fi
132  # cvs co MITgcm  else
133    cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack co -P -d $gcmDIR MITgcm > /dev/null 2>&1    cd $TST_DIR
   /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs  
   /usr/bin/find $gcmDIR -type f | xargs chmod g+r  
134  fi  fi
135    
136  cd ${gcmDIR}/verification  cd ${TST_DIR}/${gcmDIR}/verification
137    
138  runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\""  cwd=\`pwd\`
139    SENDCMD="ssh ollie0 ${TST_DIR}/${gcmDIR}/tools/mpack-1.6/mpack"
140    runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\" -send \"${SENDCMD}\" -sd ${cwd}"
141    emailaddress="-a jm_c@mitgcm.org"
142    
143  if [ ! -e $MYOUTPUT ]  if [ ! -e $MYOUTPUT ]
144      then      then
145      touch $MYOUTPUT      touch $MYOUTPUT
146  fi  fi
147    
148  echo "running testreport like this:"  # echo "running testreport like this:"
149  echo ${runtestreport} -norun  # echo ${runtestreport} -norun
150  eval "${runtestreport} -norun >> $MYOUTPUT 2>&1"  # eval "${runtestreport} -norun >> $MYOUTPUT 2>&1"
151    
152  # create batch script  # create batch script
153  #  #
154  JOBNAME=tstoll  JOBNAME=tst$sfx
155  echo "creating batch script $HERE/$RUNIT"  echo "creating batch script $HERE/$RUNIT"
156  cat << EOF >| $HERE/$RUNIT  cat << EOF >| $HERE/$RUNIT
157  #!/bin/bash -x  #!/bin/bash
158  #SBATCH --job-name=${JOBNAME}  #SBATCH --job-name=${JOBNAME}
159  #SBATCH  -o ${OUTFILE}  #SBATCH  -o ${OUTFILE}
160  #SBATCH --time=3:00:00  #SBATCH --time=12:00:00
161  #SBATCH --ntasks=6  #SBATCH --ntasks=6
 #SBATCH  -p smp  
 #SBATCH --qos=short  
162    
163  ulimit -s 1048576  # still need this to be able to run a few experiments:
164    # cfc_example, fizhi, tutorial_deep_convection
165    ulimit -s unlimited
166    
167    # binds OpenMP task to given cores
168    export OMP_PROC_BIND=TRUE
169    
170    # for debugging
171    # export FLEXLM_DIAGNOSTICS=2
172    # export FNP_IP_ENV=1
173    # export LM_A_CONN_TIMEOUT=99
174    
175  cd \${SLURM_SUBMIT_DIR}  cd \${SLURM_SUBMIT_DIR}
176    
177  echo "running testreport like this:"  cwd=`pwd`
178  echo "${runtestreport} -runonly"  echo "running testreport like this in \${cwd}:"
179  #${runtestreport} -runonly -match 10 -a 'Martin.Losch@awi.de' >> $MYOUTPUT  echo "${runtestreport} -devel -match 10"
180  ${runtestreport} -runonly -match 10 -a 'jmc@mitcm.org' >> $MYOUTPUT  ${runtestreport} -devel -match 10 ${emailaddress} >> $MYOUTPUT 2>&1
181    
182    echo "running testreport like this in \${cwd}:"
183    echo "./testreport -clean"
184    ./testreport -clean
185    
186    echo "running testreport like this in \${cwd}:"
187    echo "${runtestreport} -fast -match 10"
188    ${runtestreport} -fast -match 10 ${emailaddress} >> $MYOUTPUT 2>&1
189    
190  EOF  EOF
191    
# Line 137  echo "end of job script" >> $MYOUTPUT Line 201  echo "end of job script" >> $MYOUTPUT
201  echo "***********************************************************" >> $MYOUTPUT  echo "***********************************************************" >> $MYOUTPUT
202  echo " " >> $MYOUTPUT  echo " " >> $MYOUTPUT
203    
204    echo "sbatch $HERE/$RUNIT"
205  sbatch $HERE/$RUNIT  sbatch $HERE/$RUNIT
206    
207  # keep looking for the job in the job queues and wait until has disappeared  # # keep looking for the job in the job queues and wait until it has disappeared
208  jobruns=somedummy  # jobruns=somedummy
209  while [ "${jobruns}"x != x ]  # while [ "${jobruns}"x != x ]
210  do  # do
211    sleep 20  #   sleep 200
212    jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`  #   jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`
213    echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"  #   echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
214    currentexp=`grep Experiment $MYOUTPUT | tail -1`  #   currentexp=`grep Experiment $MYOUTPUT | tail -1`
215    echo "currently running $currentexp"  #   echo "currently running $currentexp"
216  done  # done
217    
218    # # workaround for mailing the stuff
219    # echo "mail the stuff"
220    
 # # after running the experiments send email to jmc (cannot be done from  
 # # the compute node, yet)  
221  # MPACKCMD=../tools/mpack-1.6/mpack  # MPACKCMD=../tools/mpack-1.6/mpack
222  # fn=`ls -dtr tr_ollie* | grep -v tar.gz | tail -1`  # fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1`
223  # echo "fn $fn"  # echo "fname ${fn}"
224  # tar cf - $fn | gzip > "${fn}.tar.gz"  # tar cf - $fn | gzip > "${fn}.tar.gz"
225  # ../tools/mpack-1.6/mpack -s MITgcm-test -m 3555000 ${fn}.tar.gz mlosch@awi.de  # $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jm_c@mitgcm.org
226  # sleep 2  # sleep 2
227  # \rm -f ${fn}.tar.gz  # rm -rf "${fn}.tar.gz"
 # ../tools/mpack-1.6/mpack -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org  
   
 # after running the experiments on the compute node run testreport  
 # for a third time to evaluate results on the head node again  
 # echo " " >> $MYOUTPUT  
 # echo "now run testreport for a final time to evaluate results:" >> $MYOUTPUT  
 # echo "$RUNTESTREPORT -match 10 -runonly" >> $MYOUTPUT  
 # #$RUNTESTREPORT -match 10 -runonly >> $MYOUTPUT 2>&1  
 # $RUNTESTREPORT -match 10 -runonly \  
 #     -a "jmc@mitgcm.org" >> $MYOUTPUT 2>&1  
 # #   -a "jmc@mitgcm.org, Martin.Losch@awi.de" >> $MYOUTPUT 2>&1  
   
 # workaround for mailing the stuff  
 echo "mail the stuff"  
 # 1. set name of remote host where to do the mpack command  
 # 2. pack directory into an archive an compress it  
 # 3. copy gzipped archive to remote host  
 # 4. on the remote host execute the mpack command, that send the email  
 # 5. wait a little, just to be sure everything is done  
 # 6. remove archives  
 MPACKCMD=\${HOME}/bin/mpack  
 rmhost=rayl4.awi.de  
 pwd  
 fn=`ls -dtr tr_ollie* | grep -v tar.gz | tail -1`  
 echo "fname ${fn}"  
 tar cf - $fn | gzip > "${fn}.tar.gz"  
 eval "scp ${fn}.tar.gz ${rmhost}:"  
 #ssh -Y $rmhost "$MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz mlosch@awi.de"  
 ssh -Y $rmhost "$MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org"  
 sleep 2  
 ssh -Y $rmhost "rm -f ${fn}.tar.gz"  
 rm -rf "${fn}.tar.gz"  
 # end workaround for mailing the stuff  
228    
229  echo "end of mitgcmtestreport"  echo "end of mitgcmtestreport"
230    

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.8

  ViewVC Help
Powered by ViewVC 1.1.22