1 |
#!/bin/csh |
2 |
# |
3 |
# Example PBS script to run a job on the myrinet-3 cluster. |
4 |
# The lines beginning #PBS set various queuing parameters. |
5 |
# |
6 |
# o -N Job Name |
7 |
#PBS -N oce_1 |
8 |
# |
9 |
# |
10 |
# o -l resource lists that control where job goes |
11 |
# here we ask for 3 nodes, each with the attribute "p4". |
12 |
#PBS -l nodes=4:p4 |
13 |
# |
14 |
# o Where to write output |
15 |
# #PBS -e TTT.err |
16 |
|
17 |
# #PBS -o TTT.out |
18 |
|
19 |
# |
20 |
# o Export all my environment variables to the job |
21 |
#PBS -V |
22 |
# |
23 |
|
24 |
echo 'start' |
25 |
#------------------- |
26 |
set namPBS=oce_1 |
27 |
set runDir=$SC6/oce_glob/run_1 |
28 |
set r2nDir=$SC6/oce_glob/run00 |
29 |
set executable=mitgcmuv.ifc+mpi |
30 |
set nit1mn=1800 |
31 |
set endRun=100 |
32 |
set partmn=25 |
33 |
#------------------- |
34 |
# echo 'run in dir :' $runDir |
35 |
set prc2='N' |
36 |
cd $runDir |
37 |
|
38 |
set its=`egrep 'nIter0' data | sed 's/ nIter0=//'| sed 's/,$//'` |
39 |
set nms=`expr $its / $nit1mn` |
40 |
set nMs = `printf "%3.3i\n" $nms` |
41 |
set nme=`expr $nms + $partmn` |
42 |
set nMe = `printf "%3.3i\n" $nme` |
43 |
set ite=`expr $nme \* $nit1mn` |
44 |
|
45 |
set numPBS=`echo $PBS_NODEFILE | sed 's/\/usr\/spool\/PBS\/aux\///g'| sed 's/\.cg01//g'` |
46 |
touch move_TTT_files |
47 |
echo mv $namPBS.o$numPBS TTT.out.$nMe >> move_TTT_files |
48 |
echo mv $namPBS.e$numPBS TTT.err.$nMe >> move_TTT_files |
49 |
|
50 |
#---------------------------------------------- |
51 |
echo $PBS_NODEFILE |
52 |
cat $PBS_NODEFILE |
53 |
echo 'The list above shows the nodes this job has exclusive access to.' |
54 |
echo 'The list can be found in the file named in the variable $PBS_NODEFILE' |
55 |
|
56 |
rm -f run_here |
57 |
date > run_here |
58 |
cat $PBS_NODEFILE >> run_here |
59 |
echo 'PBS job Number:' $numPBS >> run_here |
60 |
echo on `hostname` run $executable 'in dir:' $runDir >> run_here |
61 |
|
62 |
if ( $nms >= $endRun ) then |
63 |
echo 'Run already finished : month' $nms |
64 |
exit 9 |
65 |
endif |
66 |
|
67 |
echo on `hostname` run $executable 'in dir:' `pwd` |
68 |
echo 'start at month=' $nMs '(it=' $its '), run until mn=' $nMe '(it=' $ite ')' |
69 |
echo 'start at month=' $nms '(it=' $its '), run until mn=' $nme '(it=' $ite ')'>> run_here |
70 |
|
71 |
#------------------- |
72 |
if ( -f $r2nDir/IamReady ) then |
73 |
set prc2=`sed -n '2 p' $PBS_NODEFILE` |
74 |
echo send 2nd job on $prc2 |
75 |
rsh $prc2 $r2nDir/runscript >& $r2nDir/TTT.out2.$nMe & |
76 |
else |
77 |
echo 'no file dir_run_2/IamReady => no 2nd run.' |
78 |
endif |
79 |
#------------------- |
80 |
|
81 |
#cp -p run_here std_outp |
82 |
#echo ' ' >> std_outp |
83 |
# ./$executable >>& std_outp |
84 |
|
85 |
cat $PBS_NODEFILE > mf |
86 |
set ncpus = ( `wc -l mf | awk '{print $1}'` ) |
87 |
/usr/local/pkg/mpi/mpi-1.2.4..8a-gm-1.5/intel/bin/mpirun.ch_gm -machinefile mf --gm-kill 5 -v -np $ncpus ./$executable |
88 |
set out=$? |
89 |
echo 'end with status' $out |
90 |
|
91 |
set iTe = `printf "%10.10i\n" $ite` |
92 |
if ( -f pickup.$iTe.001.001.data ) then |
93 |
set out=0 |
94 |
else |
95 |
set out=1 |
96 |
endif |
97 |
|
98 |
echo 'end with status' $out |
99 |
|
100 |
if ( $out != 0 ) then |
101 |
echo 'bad return status => STOP here' |
102 |
else |
103 |
#mv std_outp std_outp.$nMe |
104 |
cp -p run_here std_outp.$nMe |
105 |
echo ' ' >> std_outp.$nMe |
106 |
cat STDOUT.0000 >> std_outp.$nMe |
107 |
mv -f STDOUT.000? STDERR.000? temp |
108 |
#- prepare new submission : |
109 |
sed "s/ nIter0=$its/ nIter0=$ite/" data > TTT.tmp |
110 |
mv TTT.tmp data |
111 |
endif |
112 |
|
113 |
if ( $out == 0 ) then |
114 |
if ( $nme >= $endRun ) then |
115 |
echo 'Run finished : month' $nme 'done !' |
116 |
else |
117 |
qsub runScript |
118 |
endif |
119 |
endif |
120 |
exit $out |