| 1 |
jmc |
1.1 |
#!/bin/csh |
| 2 |
|
|
# |
| 3 |
|
|
# Example PBS script to run a job on the myrinet-3 cluster. |
| 4 |
|
|
# The lines beginning #PBS set various queuing parameters. |
| 5 |
|
|
# |
| 6 |
|
|
# o -N Job Name |
| 7 |
|
|
#PBS -N oce_1 |
| 8 |
|
|
# |
| 9 |
|
|
# |
| 10 |
|
|
# o -l resource lists that control where job goes |
| 11 |
|
|
# here we ask for 3 nodes, each with the attribute "p4". |
| 12 |
|
|
#PBS -l nodes=4:p4 |
| 13 |
|
|
# |
| 14 |
|
|
# o Where to write output |
| 15 |
|
|
# #PBS -e TTT.err |
| 16 |
|
|
|
| 17 |
|
|
# #PBS -o TTT.out |
| 18 |
|
|
|
| 19 |
|
|
# |
| 20 |
|
|
# o Export all my environment variables to the job |
| 21 |
|
|
#PBS -V |
| 22 |
|
|
# |
| 23 |
|
|
|
| 24 |
|
|
echo 'start' |
| 25 |
|
|
#------------------- |
| 26 |
|
|
set namPBS=oce_1 |
| 27 |
|
|
set runDir=$SC6/oce_glob/run_1 |
| 28 |
|
|
set r2nDir=$SC6/oce_glob/run00 |
| 29 |
|
|
set executable=mitgcmuv.ifc+mpi |
| 30 |
|
|
set nit1mn=1800 |
| 31 |
|
|
set endRun=100 |
| 32 |
|
|
set partmn=25 |
| 33 |
|
|
#------------------- |
| 34 |
|
|
# echo 'run in dir :' $runDir |
| 35 |
|
|
set prc2='N' |
| 36 |
|
|
cd $runDir |
| 37 |
|
|
|
| 38 |
|
|
set its=`egrep 'nIter0' data | sed 's/ nIter0=//'| sed 's/,$//'` |
| 39 |
|
|
set nms=`expr $its / $nit1mn` |
| 40 |
jmc |
1.2 |
set nMs = `printf "%3.3i\n" $nms` |
| 41 |
jmc |
1.1 |
set nme=`expr $nms + $partmn` |
| 42 |
jmc |
1.2 |
set nMe = `printf "%3.3i\n" $nme` |
| 43 |
jmc |
1.1 |
set ite=`expr $nme \* $nit1mn` |
| 44 |
|
|
|
| 45 |
|
|
set numPBS=`echo $PBS_NODEFILE | sed 's/\/usr\/spool\/PBS\/aux\///g'| sed 's/\.cg01//g'` |
| 46 |
|
|
touch move_TTT_files |
| 47 |
|
|
echo mv $namPBS.o$numPBS TTT.out.$nMe >> move_TTT_files |
| 48 |
|
|
echo mv $namPBS.e$numPBS TTT.err.$nMe >> move_TTT_files |
| 49 |
|
|
|
| 50 |
|
|
#---------------------------------------------- |
| 51 |
|
|
echo $PBS_NODEFILE |
| 52 |
|
|
cat $PBS_NODEFILE |
| 53 |
|
|
echo 'The list above shows the nodes this job has exclusive access to.' |
| 54 |
|
|
echo 'The list can be found in the file named in the variable $PBS_NODEFILE' |
| 55 |
|
|
|
| 56 |
|
|
rm -f run_here |
| 57 |
|
|
date > run_here |
| 58 |
|
|
cat $PBS_NODEFILE >> run_here |
| 59 |
|
|
echo 'PBS job Number:' $numPBS >> run_here |
| 60 |
|
|
echo on `hostname` run $executable 'in dir:' $runDir >> run_here |
| 61 |
|
|
|
| 62 |
|
|
if ( $nms >= $endRun ) then |
| 63 |
|
|
echo 'Run already finished : month' $nms |
| 64 |
|
|
exit 9 |
| 65 |
|
|
endif |
| 66 |
|
|
|
| 67 |
|
|
echo on `hostname` run $executable 'in dir:' `pwd` |
| 68 |
|
|
echo 'start at month=' $nMs '(it=' $its '), run until mn=' $nMe '(it=' $ite ')' |
| 69 |
|
|
echo 'start at month=' $nms '(it=' $its '), run until mn=' $nme '(it=' $ite ')'>> run_here |
| 70 |
|
|
|
| 71 |
|
|
#------------------- |
| 72 |
|
|
if ( -f $r2nDir/IamReady ) then |
| 73 |
|
|
set prc2=`sed -n '2 p' $PBS_NODEFILE` |
| 74 |
|
|
echo send 2nd job on $prc2 |
| 75 |
|
|
rsh $prc2 $r2nDir/runscript >& $r2nDir/TTT.out2.$nMe & |
| 76 |
|
|
else |
| 77 |
|
|
echo 'no file dir_run_2/IamReady => no 2nd run.' |
| 78 |
|
|
endif |
| 79 |
|
|
#------------------- |
| 80 |
|
|
|
| 81 |
|
|
#cp -p run_here std_outp |
| 82 |
|
|
#echo ' ' >> std_outp |
| 83 |
|
|
# ./$executable >>& std_outp |
| 84 |
|
|
|
| 85 |
|
|
cat $PBS_NODEFILE > mf |
| 86 |
|
|
set ncpus = ( `wc -l mf | awk '{print $1}'` ) |
| 87 |
|
|
/usr/local/pkg/mpi/mpi-1.2.4..8a-gm-1.5/intel/bin/mpirun.ch_gm -machinefile mf --gm-kill 5 -v -np $ncpus ./$executable |
| 88 |
|
|
set out=$? |
| 89 |
|
|
echo 'end with status' $out |
| 90 |
|
|
|
| 91 |
jmc |
1.2 |
set iTe = `printf "%10.10i\n" $ite` |
| 92 |
jmc |
1.1 |
if ( -f pickup.$iTe.001.001.data ) then |
| 93 |
|
|
set out=0 |
| 94 |
|
|
else |
| 95 |
|
|
set out=1 |
| 96 |
|
|
endif |
| 97 |
|
|
|
| 98 |
|
|
echo 'end with status' $out |
| 99 |
|
|
|
| 100 |
|
|
if ( $out != 0 ) then |
| 101 |
|
|
echo 'bad return status => STOP here' |
| 102 |
|
|
else |
| 103 |
|
|
#mv std_outp std_outp.$nMe |
| 104 |
|
|
cp -p run_here std_outp.$nMe |
| 105 |
|
|
echo ' ' >> std_outp.$nMe |
| 106 |
|
|
cat STDOUT.0000 >> std_outp.$nMe |
| 107 |
|
|
mv -f STDOUT.000? STDERR.000? temp |
| 108 |
|
|
#- prepare new submission : |
| 109 |
|
|
sed "s/ nIter0=$its/ nIter0=$ite/" data > TTT.tmp |
| 110 |
|
|
mv TTT.tmp data |
| 111 |
|
|
endif |
| 112 |
|
|
|
| 113 |
|
|
if ( $out == 0 ) then |
| 114 |
|
|
if ( $nme >= $endRun ) then |
| 115 |
|
|
echo 'Run finished : month' $nme 'done !' |
| 116 |
|
|
else |
| 117 |
|
|
qsub runScript |
| 118 |
|
|
endif |
| 119 |
|
|
endif |
| 120 |
|
|
exit $out |