/[MITgcm]/MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray
ViewVC logotype

Diff of /MITgcm_contrib/test_scripts/ollie/mitgcmtestreport_cray

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph | View Patch Patch

revision 1.2 by mlosch, Fri Jun 10 07:55:29 2016 UTC revision 1.10 by mlosch, Wed Jun 10 13:58:05 2020 UTC
# Line 1  Line 1 
1  #!/bin/bash -e  #!/bin/bash -e
2  # new script for running testreport on ollie.awi.de  # new script for running testreport on ollie.awi.de
3  # - split the testreport into 2 steps:  # - compile and run on compute nodes
4  # 1/ compiling on head node, with -norun option  # - use ssh to call mpack command from the head node ollie0
 # 2/ running on compute node (using SBATCH), with -runonly option  
5  #$Header$  #$Header$
6  #$Name$  #$Name$
7    
8  # needed for cron-job  # needed for cron-job
9  source /usr/Modules/3.2.10/init/bash  #source /usr/Modules/current/init/bash
10  source /etc/profile.d/cray_pe.sh  source /etc/profile.d/cray_pe.sh
11  source /etc/profile.d/AWI.sh  # this seems to be enough to make the module cmd work
12    source /etc/profile.d/modules.sh
13  #  #
14  module purge  module purge
15  module load slurm  module load PrgEnv-cray
16  module load craype-broadwell  module load netcdf
17  module load PrgEnv-cray/1.0.0  # set the netcdf root directory here, because the definitions always
18  module unload cudatoolkit  # change with different "default" netcdf modules
19  module swap mvapich2_cce cray-impi  export NETCDF_ROOT=`nc-config --prefix`
20  module load intel/impi-5.1.3  # not sure why I have to set these paths here again
21  export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}:  export MPI_ROOT=$(dirname $(dirname `which mpicc`))
22    # $(dirname `echo $LD_LIBRARY_PATH  | awk -F: '{print $1}'`)
23    export MPI_INC_DIR=${MPI_ROOT}/include
24    
25    # there is no slurm module anymore and this is the current recommendation to
26    # have sbatch in your path (rather than running /etc/profile.d./slurm.sh)
27    export PATH=${PATH}:/global/opt/slurm/default/bin
28    
29  dNam='ollie'  dNam='ollie'
30  TST_DIR="/work/ollie/mlosch/test_$dNam"  TST_DIR="/work/ollie/mlosch/test_$dNam"
# Line 32  RUNIT="runit_"$sfx Line 38  RUNIT="runit_"$sfx
38  OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie  OPTFILE=../tools/build_options/linux_ia64_${sfx}_ollie
39  options="-MPI 6"  options="-MPI 6"
40  options="$options -odir ${dNam}-c"  options="$options -odir ${dNam}-c"
41  options="$options -devel"  options="$options -j 6"
42  #options="$options -j 2"  #options="$options -t global_ocean.cs32x15"
43  #options="$options -j 6"  
44  #options="$options -t global_ocean.cs32x15 -q"  #EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'
45  #EXE='mpiexec.hydra -bootstrap slurm -n TR_NPROC ./mitgcmuv'  #EXE='srun -n TR_NPROC ./mitgcmuv'
46  EXE='srun --mpi=pmi2 -n TR_NPROC ./mitgcmuv'  EXE='srun -n TR_NPROC --cpu_bind=cores ./mitgcmuv'
47    
48  if [ -e $TST_DIR ]; then  if [ -e $TST_DIR ]; then
49      echo "$TST_DIR exists"      echo "$TST_DIR exists"
50  else  else
51      mkdir $TST_DIR      mkdir $TST_DIR
52  fi  fi
53  cd $TST_DIR  cd $TST_DIR
54  HERE=$TST_DIR/output  HERE=$TST_DIR/output
55  if [ -e $HERE ]; then  if [ -e $HERE ]; then
56      echo "$HERE"      echo "$HERE"
57  else  else
58      mkdir $HERE      mkdir $HERE
59  fi  fi
60  OUTFILE=$HERE/slurm_${sfx}.out  OUTFILE=$HERE/slurm_${sfx}.out
# Line 59  fi Line 65  fi
65  if [ -e $OUTFILE ]; then  if [ -e $OUTFILE ]; then
66    rm -r $OUTFILE    rm -r $OUTFILE
67  fi  fi
68  gcmDIR="MITgcm_$sfx"  gcmDIR="MITgcm_${sfx}"
69  #gcmDIR="MITgcm"  git_repo='MITgcm'
70  checkOut=1  git_code='MITgcm'
71  if [ $checkOut -eq 1 ] ; then  
72   if test -d $gcmDIR/CVS ; then  # checkOut determines how much checking out is being done
73    /bin/rm -rf $gcmDIR/verification/??_${dNam}_????????_?  # checkOut = 3: new clone from GitHub and make a new copy
74    ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )  # checkOut = 2: update (git pull) existing repo and make a new copy
75    echo "cvs update of dir $gcmDIR :"  # checkOut = 1: skip update
76    ( cd $gcmDIR ; cvs -q -d :pserver:cvsanon@mitgcm.org:/u/gcmpack update -P -d ) 2>&1  # checkOut = 0: use existing test code (if available otherwise switch to 1)
77    RETVAL=$?  
78    if test "x$RETVAL" != x0 ; then  checkOut=2
79     echo "cvs update on '"`hostname`"' failed (return val=$RETVAL) => exit"  tdir=${TST_DIR}
80     exit  today=`date +%Y%m%d`
81    TODAY=`date +%d`
82    #tmpFil="/tmp/"`basename $0`".$$"
83    tmpFil=$TST_DIR/error.out
84    
85    if [ $checkOut -le 1 ] ; then
86      if test -e $TST_DIR/${gcmDIR}/doc ; then
87        echo $TST_DIR/${gcmDIR}/doc 'exist'
88      else
89        echo -n "$TST_DIR/${gcmDIR} missing ; "
90        checkOut=2
91        echo "will make a new copy ( checkOut=$checkOut )"
92    fi    fi
93   else  fi
94    echo "no dir: $gcmDIR/CVS => try a fresh check-out"  
95    checkOut=2  if [ $checkOut -ge 2 ] ; then
96   fi    #---- cleaning:
97  fi    cd $TST_DIR
98  if [ $checkOut -eq 2 ] ; then  
99    if test -e $gcmDIR ; then    #---- Make a new clone or update existing one:
100     rm -rf $gcmDIR    if test -e ${gcmDIR}/.git/config ; then
101        echo "${gcmDIR}/.git/config exist"
102      else
103        echo -n "${gcmDIR}/.git/config missing ; "
104        checkOut=3
105        echo "will get new clone ( checkOut=$checkOut )"
106    fi    fi
107  # cvs co MITgcm    if [ $checkOut -eq 3 ] ; then
108    cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack co -P -d $gcmDIR MITgcm > /dev/null 2>&1      echo -n "Removing old clone: $TST_DIR/${gcmDIR} ..."
109    /usr/bin/find $gcmDIR -type d | xargs chmod g+rxs      test -e $TST_DIR/${gcmDIR}  &&  rm -rf $TST_DIR/${gcmDIR}
110    /usr/bin/find $gcmDIR -type f | xargs chmod g+r      echo "  done"
111        echo -n "Make a new clone of $git_code from repo: $git_repo ..."
112        git clone https://github.com/$git_repo/${git_code}.git ${gcmDIR} 2> $tmpFil
113        retVal=$?
114        if test $retVal = 0 ; then
115           echo ' --> done!'
116           rm -f $tmpFil
117        else
118           echo " Error: 'git clone' returned: $retVal"
119           cat $tmpFil
120           rm -f $tmpFil
121           exit 2
122        fi
123      else
124    #    echo "clean tst_2+2 + testreport output"
125        ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean )
126        ( cd $gcmDIR/verification ; ./testreport -clean )
127        echo "Updating current clone ( $git_code ) ..."
128        ( cd ${gcmDIR}; git checkout master ; git pull ; git ls-files -d | xargs git checkout -- )
129        echo ' --> done!'
130      fi
131    else
132      cd $TST_DIR
133  fi  fi
134    
135  cd ${gcmDIR}/verification  cd ${TST_DIR}/${gcmDIR}/verification
136    
137  runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\""  cwd=\`pwd\`
138    SENDCMD="ssh ollie0 ${TST_DIR}/${gcmDIR}/tools/mpack-1.6/mpack"
139    runtestreport="./testreport $options -of $OPTFILE -command \"${EXE}\" -send \"${SENDCMD}\" -sd ${cwd}"
140    emailaddress="-a jm_c@mitgcm.org"
141    testrestart="../tools/do_tst_2+2 -mpi -exe \"${EXE}\" -o ${dNam}-c -send \"${SENDCMD}\" -sd ${cwd}"
142    
143  if [ ! -e $MYOUTPUT ]  if [ ! -e $MYOUTPUT ]
144      then      then
# Line 106  fi Line 154  fi
154  JOBNAME=tst$sfx  JOBNAME=tst$sfx
155  echo "creating batch script $HERE/$RUNIT"  echo "creating batch script $HERE/$RUNIT"
156  cat << EOF >| $HERE/$RUNIT  cat << EOF >| $HERE/$RUNIT
157  #!/bin/bash -x  #!/bin/bash
158  #SBATCH --job-name=${JOBNAME}  #SBATCH --job-name=${JOBNAME}
159  #SBATCH  -o ${OUTFILE}  #SBATCH  -o ${OUTFILE}
160  #SBATCH --time=6:00:00  #SBATCH --time=12:00:00
161  #SBATCH --ntasks=6  #SBATCH --ntasks=6
 #SBATCH  -p smp  
162    
163  ulimit -s 1048576  # still need this to be able to run a few experiments:
164    # cfc_example, fizhi, tutorial_deep_convection
165    ulimit -s unlimited
166    
167    # binds OpenMP task to given cores
168    export OMP_PROC_BIND=TRUE
169    
170    # for debugging
171    # export FLEXLM_DIAGNOSTICS=2
172    # export FNP_IP_ENV=1
173    # export LM_A_CONN_TIMEOUT=99
174    
175  cd \${SLURM_SUBMIT_DIR}  cd \${SLURM_SUBMIT_DIR}
176    
177  echo "running testreport like this:"  cwd=`pwd`
178  echo "${runtestreport} -runonly"  echo "running testreport like this in \${cwd}:"
179  #${runtestreport} -runonly -match 10 -a 'Martin.Losch@awi.de' >> $MYOUTPUT  echo "${runtestreport} -devel -match 10"
180  #${runtestreport} -runonly -match 10 -a 'jmc@mitcm.org' >> $MYOUTPUT  ${runtestreport} -devel -match 10 ${emailaddress} >> $MYOUTPUT 2>&1
181  ${runtestreport} -match 10 >> $MYOUTPUT  
182    echo "running testreport like this in \${cwd}:"
183    echo "./testreport -clean"
184    ./testreport -clean
185    
186    # Hack,hack,hack to avoid running dome:
187    rm -f dome/results/output.txt
188    echo "running testreport like this in \${cwd}:"
189    echo "${runtestreport} -fast -match 10"
190    ${runtestreport} -fast -match 10 ${emailaddress} >> $MYOUTPUT 2>&1
191    
192    echo "running restart test like this in \${cwd}:"
193    echo "${testrestart}"
194    ${testrestart} ${emailaddress} >> $MYOUTPUT 2>&1
195    
196  EOF  EOF
197    
# Line 137  echo "end of job script" >> $MYOUTPUT Line 207  echo "end of job script" >> $MYOUTPUT
207  echo "***********************************************************" >> $MYOUTPUT  echo "***********************************************************" >> $MYOUTPUT
208  echo " " >> $MYOUTPUT  echo " " >> $MYOUTPUT
209    
210    echo "sbatch $HERE/$RUNIT"
211  sbatch $HERE/$RUNIT  sbatch $HERE/$RUNIT
212    
213  # keep looking for the job in the job queues and wait until has disappeared  # # keep looking for the job in the job queues and wait until it has disappeared
214  jobruns=somedummy  # jobruns=somedummy
215  while [ "${jobruns}"x != x ]  # while [ "${jobruns}"x != x ]
216  do  # do
217    sleep 200  #   sleep 200
218    jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`  #   jobruns=`squeue --noheader -u mlosch | grep "$JOBNAME" | awk '{print $1}'`
219    echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"  #   echo "waiting for job ${jobruns%% *} ($JOBNAME) to complete"
220    currentexp=`grep Experiment $MYOUTPUT | tail -1`  #   currentexp=`grep Experiment $MYOUTPUT | tail -1`
221    echo "currently running $currentexp"  #   echo "currently running $currentexp"
222  done  # done
223    
224  # workaround for mailing the stuff  # # workaround for mailing the stuff
225  echo "mail the stuff"  # echo "mail the stuff"
226    
227  MPACKCMD=../tools/mpack-1.6/mpack  # MPACKCMD=../tools/mpack-1.6/mpack
228  fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1`  # fn=`ls -dtr tr_$dNam* | grep -v tar.gz | tail -1`
229  echo "fname ${fn}"  # echo "fname ${fn}"
230  tar cf - $fn | gzip > "${fn}.tar.gz"  # tar cf - $fn | gzip > "${fn}.tar.gz"
231  $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jmc@mitgcm.org  # $MPACKCMD -s MITgcm-test -m 3555000 ${fn}.tar.gz jm_c@mitgcm.org
232  sleep 2  # sleep 2
233  rm -rf "${fn}.tar.gz"  # rm -rf "${fn}.tar.gz"
234    
235  echo "end of mitgcmtestreport"  echo "end of mitgcmtestreport"
   
   
   
   
   

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.10

  ViewVC Help
Powered by ViewVC 1.1.22