1 |
#! /usr/bin/env bash |
2 |
|
3 |
# $Header: /u/gcmpack/MITgcm_contrib/test_scripts/svante/test_comp_pgiAdm.sh,v 1.4 2017/01/16 14:20:00 jmc Exp $ |
4 |
|
5 |
# Test script for MITgcm to run on head-node of svante cluster (svante-login.mit.edu) |
6 |
# to just generate source code (*.f) including TAF output src code. |
7 |
|
8 |
headNode=`hostname -s` |
9 |
#QSUB="qsub" |
10 |
#QSTAT="qstat -u $USER" |
11 |
#dNam=$headNode |
12 |
QSUB="/usr/bin/sbatch" |
13 |
#QSTAT="/usr/bin/qstat -u $USER" |
14 |
QLIST="/usr/bin/squeue -u $USER" |
15 |
dNam='svante' |
16 |
HERE="$HOME/test_${dNam}" |
17 |
|
18 |
SUB_DIR="$HERE/$dNam" |
19 |
OUT_DIR="$HERE/output" |
20 |
TST_DISK="/net/fs09/d0/jm_c" |
21 |
TST_DIR="$TST_DISK/test_${dNam}" |
22 |
#SUB_DIR="$HERE/temp" |
23 |
|
24 |
dInWeek=`date +%a` |
25 |
TODAY=`date +%d` |
26 |
|
27 |
#- main options |
28 |
sfx='pgiAdm'; typ='-adm' |
29 |
addExp='' |
30 |
|
31 |
logPfix="test_comp_$sfx" |
32 |
BATCH_SCRIPT="run_tst_${sfx}.slurm" |
33 |
#- job name ($JOB) & output-file name ( $JOB.std??? ) must match |
34 |
# definition within $BATCH_SCRIPT slurm script |
35 |
JOB="tst_$sfx" |
36 |
sJob=`printf "%8.8s" $JOB` #- squeue truncate name to only 1rst 8c |
37 |
|
38 |
#------------------------------- |
39 |
# checkOut=2 : download new code ; |
40 |
# =1 : update code (if no existing code -> swith to 2) |
41 |
# =0 : use existing code (if no existing code -> swith to 2) |
42 |
checkOut=1 |
43 |
option= |
44 |
|
45 |
#option="-nc" ; checkOut=1 |
46 |
#option="-q" ; checkOut=0 |
47 |
|
48 |
dAlt=`date +%d` ; dAlt=`expr $dAlt % 3` |
49 |
if [ $dAlt -eq 1 ] ; then options="$options -fast" |
50 |
else options="$options -devel" ; fi |
51 |
|
52 |
#- defaults |
53 |
umask 0022 |
54 |
if [ -d ~/bin ]; then export PATH=$PATH:~/bin ; fi |
55 |
#- to get case insensitive "ls" (and order of tested experiments) |
56 |
export LC_ALL="en_US.UTF-8" |
57 |
# Turn off stack limit for FIZHI & AD-tests |
58 |
ulimit -s unlimited |
59 |
|
60 |
if test -f /etc/profile.d/modules.sh ; then . /etc/profile.d/modules.sh ; fi |
61 |
if test -f /etc/profile.d/zz_modules.sh ; then . /etc/profile.d/zz_modules.sh ; fi |
62 |
|
63 |
#- method to acces CVS: |
64 |
cmdCVS='cvs -d :pserver:cvsanon@mitgcm.org:/u/gcmpack -q' |
65 |
|
66 |
#-- clean up old log files and start a new one: |
67 |
LOG_FIL="$OUT_DIR/$logPfix."`date +%m%d`".log" |
68 |
cd $OUT_DIR |
69 |
|
70 |
rm -f $logPfix.*.log_bak |
71 |
if test -f $LOG_FIL ; then mv -f $LOG_FIL ${LOG_FIL}_bak ; fi |
72 |
echo -n '-- Starting: ' | tee -a $LOG_FIL |
73 |
date | tee -a $LOG_FIL |
74 |
|
75 |
n=$(( `ls $logPfix.*.log | wc -l` - 10 )) |
76 |
if test $n -gt 0 ; then |
77 |
echo ' remove old log files:' | tee -a $LOG_FIL |
78 |
ls -lt $logPfix.*.log | tail -"$n" | tee -a $LOG_FIL |
79 |
ls -t $logPfix.*.log | tail -"$n" | xargs rm -f |
80 |
fi |
81 |
|
82 |
#- load standard modules: |
83 |
module add slurm |
84 |
|
85 |
#- load specific modules & set ENV variables: |
86 |
module add pgi/16.9 |
87 |
module add openmpi |
88 |
module add netcdf |
89 |
OPTFILE="../tools/build_options/linux_amd64_pgf77" |
90 |
MPI=6 |
91 |
#- needed for DIVA with MPI: |
92 |
export MPI_INC_DIR="/home/software/pgi/16.9/linux86-64/2016/mpi/openmpi-1.10.2/include" |
93 |
|
94 |
echo '======= modules =======================================' | tee -a $LOG_FIL |
95 |
module list 2>&1 | tee -a $LOG_FIL |
96 |
echo '=======================================================' | tee -a $LOG_FIL |
97 |
|
98 |
dInWeek=`date +%a` |
99 |
TODAY=`date +%d` |
100 |
#tst_list='gads gadm gfo+rs gmpi gmth gmp2+rs ifc pgi' |
101 |
#if test "x$dInWeek" = xSun ; then tst_list="$tst_list tlm oad" ; fi |
102 |
tst_list='pgiAdm' |
103 |
|
104 |
echo "cd $TST_DISK ; pwd (x2)" | tee -a $LOG_FIL |
105 |
cd $TST_DISK 2>&1 | tee -a $LOG_FIL |
106 |
pwd | tee -a $LOG_FIL |
107 |
if test ! -d $TST_DIR ; then |
108 |
echo -n "Creating a working dir: $TST_DIR ..." | tee -a $LOG_FIL |
109 |
#/bin/rm -rf $TST_DIR |
110 |
mkdir $TST_DIR |
111 |
retVal=$? |
112 |
if test "x$retVal" != x0 ; then |
113 |
echo "Error: unable to make dir: $TST_DIR (err=$retVal ) --> Exit" | tee -a $LOG_FIL |
114 |
exit 1 |
115 |
fi |
116 |
fi |
117 |
cd $TST_DIR |
118 |
pwd | tee -a $LOG_FIL |
119 |
|
120 |
#------------------------------------------------------------------------ |
121 |
|
122 |
echo "================================================================" |
123 |
gcmDIR="MITgcm_$sfx" |
124 |
|
125 |
#- check day and time: |
126 |
curDay=`date +%d` ; curHour=`date +%H` |
127 |
if [ $curDay -ne $TODAY ] ; then |
128 |
date ; echo "day is over => skip test $sfx" | tee -a $LOG_FIL |
129 |
exit 2 |
130 |
fi |
131 |
if [ $curHour -ge 18 ] ; then |
132 |
date ; echo "too late to run test $sfx" | tee -a $LOG_FIL |
133 |
exit 2 |
134 |
fi |
135 |
#- check for unfinished jobs |
136 |
#job_exist=`$QSTAT | grep $JOB | wc -l` |
137 |
job_exist=`$QLIST | grep $sJob | wc -l` |
138 |
if test "x$job_exist" != x0 ; then |
139 |
echo $BATCH_SCRIPT | tee -a $LOG_FIL |
140 |
echo "job '$JOB' still in queue:" | tee -a $LOG_FIL |
141 |
#$QSTAT | grep $JOB | tee -a $LOG_FIL |
142 |
$QLIST | grep $sJob | tee -a $LOG_FIL |
143 |
echo " => skip this test" | tee -a $LOG_FIL |
144 |
exit 2 |
145 |
fi |
146 |
#-- move previous output file |
147 |
outList=`( cd $OUT_DIR ; ls $JOB.std??? 2> /dev/null )` |
148 |
if test "x$outList" != x ; then |
149 |
echo -n " moving job $JOB old output files:" | tee -a $LOG_FIL |
150 |
if test -d $OUT_DIR/prev ; then |
151 |
for xx in $outList ; do |
152 |
pp=$OUT_DIR/prev/$xx ; echo -n " $xx" | tee -a $LOG_FIL |
153 |
test -f $pp.sav && mv -f $pp.sav $pp.old |
154 |
test -f $pp && mv -f $pp $pp.sav |
155 |
chmod a+r $OUT_DIR/$xx ; mv -f $OUT_DIR/$xx $OUT_DIR/prev |
156 |
done |
157 |
echo " to dir ./prev" | tee -a $LOG_FIL |
158 |
else |
159 |
echo " <-- missing dir $OUT_DIR/prev" | tee -a $LOG_FIL |
160 |
fi |
161 |
else echo " no old output files from job '$JOB'" | tee -a $LOG_FIL |
162 |
fi |
163 |
if test -d prev ; then |
164 |
#-- save previous summary: tr_out.txt* tst_2+2_out.txt |
165 |
oldS=`( cd ${gcmDIR}/verification ; ls t*_out.txt* ) 2> /dev/null` |
166 |
for xx in $oldS ; do |
167 |
#ss=`/bin/ls -l ${gcmDIR}/verification/$xx | awk '{print $6 $7}'` |
168 |
ss=`/bin/ls -l --time-style=iso ${gcmDIR}/verification/$xx | awk '{print $6}'` |
169 |
yy=`echo $xx | sed -e "s/\.txt.old/.$sfx.c/" \ |
170 |
-e "s/2_out.txt/2.$sfx./" -e "s/\.txt/.$sfx.r/"` |
171 |
cp -p ${gcmDIR}/verification/$xx prev/${yy}$ss |
172 |
done |
173 |
fi |
174 |
|
175 |
#- clean and update code |
176 |
if [ $checkOut -eq 1 ] ; then |
177 |
if test -d $gcmDIR/CVS ; then |
178 |
echo "cleaning output from $gcmDIR/verification :" | tee -a $LOG_FIL |
179 |
#- remove previous output tar files and tar & remove previous output-dir |
180 |
/bin/rm -f $gcmDIR/verification/??_${dNam}*_????????_?.tar.gz |
181 |
( cd $gcmDIR/verification |
182 |
listD=`ls -1 -d tr_${headNode}_????????_? ??_${dNam}-${sfx}_????????_? 2> /dev/null` |
183 |
for dd in $listD |
184 |
do |
185 |
if test -d $dd ; then |
186 |
tar -cf ${dd}".tar" $dd > /dev/null 2>&1 && gzip ${dd}".tar" && /bin/rm -rf $dd |
187 |
retVal=$? |
188 |
if test "x$retVal" != x0 ; then |
189 |
echo "ERROR in tar+gzip prev outp-dir: $dd" |
190 |
echo " on '"`hostname`"' (return val=$retVal) but continue" |
191 |
fi |
192 |
fi |
193 |
done ) |
194 |
# ( cd $gcmDIR/verification ; ../tools/do_tst_2+2 -clean ) >> $LOG_FIL 2>&1 |
195 |
( cd $gcmDIR/verification ; ./testreport $typ -clean ) >> $LOG_FIL 2>&1 |
196 |
echo "cvs update of dir $gcmDIR :" | tee -a $LOG_FIL |
197 |
( cd $gcmDIR ; $cmdCVS update -P -d ) >> $LOG_FIL 2>&1 |
198 |
retVal=$? |
199 |
if test "x$retVal" != x0 ; then |
200 |
echo "cvs update on '"`hostname`"' fail (return val=$retVal) => exit" |
201 |
exit 3 |
202 |
fi |
203 |
else |
204 |
echo "no dir: $gcmDIR/CVS => try a fresh check-out" | tee -a $LOG_FIL |
205 |
checkOut=2 |
206 |
fi |
207 |
fi |
208 |
#- download new code |
209 |
if [ $checkOut -eq 2 ] ; then |
210 |
test -e $gcmDIR && rm -rf $gcmDIR |
211 |
echo -n "Downloading the MITgcm code using: $cmdCVS ..." | tee -a $LOG_FIL |
212 |
$cmdCVS co -P -d $gcmDIR MITgcm > /dev/null |
213 |
echo " done" | tee -a $LOG_FIL |
214 |
for exp2add in $addExp ; do |
215 |
echo " add dir: $exp2add (from Contrib:verification_other)"| tee -a $LOG_FIL |
216 |
( cd $gcmDIR/verification ; $cmdCVS co -P -d $exp2add \ |
217 |
MITgcm_contrib/verification_other/$exp2add > /dev/null 2>&1 ) |
218 |
done |
219 |
/usr/bin/find $gcmDIR -type d | xargs chmod g+rxs |
220 |
/usr/bin/find $gcmDIR -type f | xargs chmod g+r |
221 |
fi |
222 |
#--------------------------------------------------- |
223 |
#-- set the testreport command: |
224 |
comm="./testreport $typ" |
225 |
|
226 |
#-- run the testreport command: |
227 |
echo -n "Running testreport using" | tee -a $LOG_FIL |
228 |
if test $MPI != 0 ; then comm="$comm -MPI $MPI" ; fi |
229 |
if test "x$options" != x ; then comm="$comm $options" ; fi |
230 |
if test "x$OPTFILE" != x ; then |
231 |
comm="$comm -of=$OPTFILE" |
232 |
fi |
233 |
echo " option '-src' (only fortran source-files):" | tee -a $LOG_FIL |
234 |
comm="$comm -src" |
235 |
echo " \"eval $comm\"" | tee -a $LOG_FIL |
236 |
echo "======================" |
237 |
( cd $gcmDIR/verification |
238 |
eval $comm >> $LOG_FIL 2>&1 |
239 |
) |
240 |
#sed -n "/^An email /,/^======== End of testreport / p" $LOG_FIL |
241 |
sed -n "/^No results email was sent/,/^======== End of testreport / p" $LOG_FIL |
242 |
echo "" | tee -a $LOG_FIL |
243 |
|
244 |
#-- submit SLURM script to run |
245 |
if test -e $SUB_DIR/$BATCH_SCRIPT ; then |
246 |
echo " submit SLURM bach script '$SUB_DIR/$BATCH_SCRIPT'" | tee -a $LOG_FIL |
247 |
$QSUB $SUB_DIR/$BATCH_SCRIPT | tee -a $LOG_FIL |
248 |
echo " job '$JOB' in queue:" | tee -a $LOG_FIL |
249 |
#$QSTAT | grep $JOB | tee -a $LOG_FIL |
250 |
$QLIST | grep $sJob | tee -a $LOG_FIL |
251 |
else |
252 |
echo " no SLURM script '$SUB_DIR/$BATCH_SCRIPT' to submit" | tee -a $LOG_FIL |
253 |
continue |
254 |
fi |
255 |
|