/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download)
Thu Mar 22 20:05:27 2012 UTC (12 years, 1 month ago) by jmc
Branch: MAIN
CVS Tags: checkpoint64y, checkpoint64x, checkpoint64z, checkpoint64o, checkpoint63r, checkpoint64q, checkpoint64p, checkpoint64s, checkpoint64r, checkpoint64u, checkpoint64t, checkpoint64w, checkpoint64v, checkpoint66g, checkpoint66f, checkpoint66e, checkpoint66d, checkpoint66c, checkpoint66b, checkpoint66a, checkpoint64n, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, checkpoint63m, checkpoint63q, checkpoint63l, checkpoint65z, checkpoint65x, checkpoint65y, checkpoint63n, checkpoint65r, checkpoint65s, checkpoint65p, checkpoint65q, checkpoint65v, checkpoint65w, checkpoint65t, checkpoint65u, checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65n, checkpoint65o, checkpoint65l, checkpoint65m, checkpoint65b, checkpoint65c, checkpoint65a, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e, checkpoint64i, checkpoint63o, checkpoint63p, checkpoint64h, checkpoint63s, checkpoint64k, checkpoint64, checkpoint65, checkpoint64j, checkpoint64m, checkpoint64l
Changes since 1.3: +10 -7 lines
improve printed messages

1 #!/bin/bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2,v 1.3 2011/11/23 09:38:43 jmc Exp $
4 # $Name: $
5
6 sfx='mp2'
7 #EXE="mpiexec -pernode -comm pmi -np $NCPU ./mitgcmuv"
8 longChk=300
9 shortChk=60
10 pNam='mitgcmuv'
11 uNam=$USER
12 HERE=`pwd`
13 pLog="kill_$sfx.log"
14
15 echo "start $0 +from dir: $HERE +by user: $uNam"
16 echo " on: "`hostname`" +at:" `date`
17 #uNam='jmc' ; HERE='/home/jmc/test_ACES/output' ; cd $HERE
18
19 while test ! -f stop_check_$sfx
20 do
21 # check for defunct proc
22 nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
23 if [ $nZ -ge 1 ] ; then
24 echo "===> found $nZ $pNam zombie processes at:" `date`
25 listZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | awk '{print $2}'`
26 sleep $shortChk
27 for p1Z in $listZ ; do
28 p2Z=`ps -f -p $p1Z | grep '<defunct>' | awk '{print $2}'`
29 if test "x$p2Z" = "x$p1Z" ; then
30 #-- report to permanent log file
31 date >> $pLog ; uname -a >> $pLog
32 ps -f -p $listZ | tee -a $pLog
33 # ps -f -p $p1Z | tee -a $pLog
34 ppZ=`ps -f -p $p1Z | grep $pNam | awk '{print $3}'`
35 #--- version-1 : try to kill parent of Zombie proc
36 # echo " try to kill parent proc: $ppZ at:" `date` | tee -a $pLog
37 # kill -9 $ppZ
38 # out=$?
39 # echo " return code: $out" | tee -a $pLog
40 #--- version-2 : try to kill other pNam child proc from same parent
41 echo "==> list of $pNam proc at:" `date`
42 ps -f -u $uNam | grep $pNam
43 #listP=`ps -f -u $uNam | grep $pNam | awk '{print $2 "p" $3}'`
44 listP=`ps -f -u $uNam | grep $pNam | grep -v '<defunct>' | awk '{print $2 "p" $3}'`
45 echo "==> pZ=$p1Z : try to kill proc from same parent=$ppZ" | tee -a $pLog
46 ps -f -p $ppZ
47 #echo " listP='$listP'"
48 for xx in $listP
49 do
50 pc=`echo $xx | sed 's/p/ /' | awk '{print $1}'`
51 pp=`echo $xx | sed 's/p/ /' | awk '{print $2}'`
52 #echo " xx='$xx' ; child=$pc ; parent=$pp"
53 #if test "x$pp" = "x$ppZ" -a "x$pc" != "x$p1Z" ; then
54 if test "x$pp" = "x$ppZ" ; then
55 ps -f -p $pc | tee -a $pLog
56 echo " killing proc: $pc" | tee -a $pLog
57 kill -9 $pc
58 out=$?
59 echo " return code: $out" | tee -a $pLog
60 fi
61 done
62 #---
63 echo "==> list of remaining $pNam proc:" | tee -a $pLog
64 ps -f -u $uNam | grep $pNam | tee -a $pLog
65 echo '--------------------' | tee -a $pLog
66 else
67 echo " proc: $p1Z no more Zombie at:" `date`
68 fi
69 done
70 nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
71 echo " --> $nZ $pNam zombie process remain at:" `date`
72 fi
73 sleep $longChk
74 done
75 ls -l stop_check_$sfx
76 exit

  ViewVC Help
Powered by ViewVC 1.1.22