1 |
#!/bin/bash |
2 |
|
3 |
# $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_test_mp2_mth,v 1.2 2010/01/24 18:17:35 jmc Exp $ |
4 |
# $Name: $ |
5 |
|
6 |
|
7 |
sfx='mp2' |
8 |
#EXE="mpiexec -pernode -comm pmi -np $NCPU ./mitgcmuv" |
9 |
longChk=600 |
10 |
longChk=300 |
11 |
shortChk=60 |
12 |
pNam='mpiexec' |
13 |
uNam=$USER |
14 |
HERE=`pwd` |
15 |
pLog="kill_$sfx.log" |
16 |
|
17 |
echo "start $0 from $HERE at:" `date` "by user: $uNam" |
18 |
#uNam='jmc' ; HERE='/home/jmc/test_ACES/output' ; cd $HERE |
19 |
|
20 |
while test ! -f stop_check_$sfx |
21 |
do |
22 |
sleep $longChk |
23 |
# check for defunct proc |
24 |
nZ=`ps wauxx | grep $uNam | grep $pNam | grep '<defunct>' | wc -l` |
25 |
if [ $nZ -ge 1 ] ; then |
26 |
echo "found $nZ $pNam zombie processes at:" `date` |
27 |
listZ=`ps wauxx | grep $uNam | grep $pNam | grep '<defunct>' | awk '{print $2}'` |
28 |
sleep $shortChk |
29 |
for p1Z in $listZ ; do |
30 |
p2Z=`ps -f -p $p1Z | grep $pNam | grep '<defunct>' | awk '{print $2}'` |
31 |
if test "x$p2Z" = "x$p1Z" ; then |
32 |
#-- report to permanent log file |
33 |
echo '--------------------' >> $pLog |
34 |
date >> $pLog ; uname -a >> $pLog |
35 |
ps -f -p $p1Z >> $pLog |
36 |
#-- |
37 |
ppZ=`ps -f -p $p1Z | grep $pNam | awk '{print $3}'` |
38 |
echo " try to kill parent proc: $ppZ at:" `date` | tee -a $pLog |
39 |
kill -9 $ppZ |
40 |
out=$? |
41 |
echo " return code: $out" | tee -a $pLog |
42 |
ps wauxx | grep $uNam | grep $pNam | tee -a $pLog |
43 |
echo '--------------------' >> $pLog |
44 |
else |
45 |
echo " proc: $p1Z no more Zombie at:" `date` |
46 |
fi |
47 |
done |
48 |
nZ=`ps wauxx | grep $uNam | grep $pNam | grep '<defunct>' | wc -l` |
49 |
echo " $nZ $pNam zombie process remain at:" `date` |
50 |
fi |
51 |
done |
52 |
ls -l stop_check_$sfx |
53 |
exit |