/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2
ViewVC logotype

Annotation of /MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (hide annotations) (download)
Thu Mar 22 20:05:27 2012 UTC (12 years, 2 months ago) by jmc
Branch: MAIN
CVS Tags: checkpoint64y, checkpoint64x, checkpoint64z, checkpoint64o, checkpoint63r, checkpoint64q, checkpoint64p, checkpoint64s, checkpoint64r, checkpoint64u, checkpoint64t, checkpoint64w, checkpoint64v, checkpoint66g, checkpoint66f, checkpoint66e, checkpoint66d, checkpoint66c, checkpoint66b, checkpoint66a, checkpoint64n, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, checkpoint63m, checkpoint63q, checkpoint63l, checkpoint65z, checkpoint65x, checkpoint65y, checkpoint63n, checkpoint65r, checkpoint65s, checkpoint65p, checkpoint65q, checkpoint65v, checkpoint65w, checkpoint65t, checkpoint65u, checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65n, checkpoint65o, checkpoint65l, checkpoint65m, checkpoint65b, checkpoint65c, checkpoint65a, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e, checkpoint64i, checkpoint63o, checkpoint63p, checkpoint64h, checkpoint63s, checkpoint64k, checkpoint64, checkpoint65, checkpoint64j, checkpoint64m, checkpoint64l
Changes since 1.3: +10 -7 lines
improve printed messages

1 jmc 1.1 #!/bin/bash
2    
3 jmc 1.4 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/check_aces_mp2,v 1.3 2011/11/23 09:38:43 jmc Exp $
4 jmc 1.1 # $Name: $
5    
6     sfx='mp2'
7     #EXE="mpiexec -pernode -comm pmi -np $NCPU ./mitgcmuv"
8     longChk=300
9     shortChk=60
10     pNam='mitgcmuv'
11     uNam=$USER
12     HERE=`pwd`
13     pLog="kill_$sfx.log"
14    
15 jmc 1.2 echo "start $0 +from dir: $HERE +by user: $uNam"
16     echo " on: "`hostname`" +at:" `date`
17 jmc 1.1 #uNam='jmc' ; HERE='/home/jmc/test_ACES/output' ; cd $HERE
18    
19     while test ! -f stop_check_$sfx
20     do
21     # check for defunct proc
22     nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
23     if [ $nZ -ge 1 ] ; then
24 jmc 1.2 echo "===> found $nZ $pNam zombie processes at:" `date`
25 jmc 1.1 listZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | awk '{print $2}'`
26     sleep $shortChk
27     for p1Z in $listZ ; do
28     p2Z=`ps -f -p $p1Z | grep '<defunct>' | awk '{print $2}'`
29     if test "x$p2Z" = "x$p1Z" ; then
30     #-- report to permanent log file
31     date >> $pLog ; uname -a >> $pLog
32 jmc 1.4 ps -f -p $listZ | tee -a $pLog
33     # ps -f -p $p1Z | tee -a $pLog
34 jmc 1.1 ppZ=`ps -f -p $p1Z | grep $pNam | awk '{print $3}'`
35     #--- version-1 : try to kill parent of Zombie proc
36     # echo " try to kill parent proc: $ppZ at:" `date` | tee -a $pLog
37     # kill -9 $ppZ
38     # out=$?
39     # echo " return code: $out" | tee -a $pLog
40     #--- version-2 : try to kill other pNam child proc from same parent
41 jmc 1.4 echo "==> list of $pNam proc at:" `date`
42 jmc 1.1 ps -f -u $uNam | grep $pNam
43 jmc 1.4 #listP=`ps -f -u $uNam | grep $pNam | awk '{print $2 "p" $3}'`
44     listP=`ps -f -u $uNam | grep $pNam | grep -v '<defunct>' | awk '{print $2 "p" $3}'`
45     echo "==> pZ=$p1Z : try to kill proc from same parent=$ppZ" | tee -a $pLog
46 jmc 1.1 ps -f -p $ppZ
47     #echo " listP='$listP'"
48     for xx in $listP
49     do
50     pc=`echo $xx | sed 's/p/ /' | awk '{print $1}'`
51     pp=`echo $xx | sed 's/p/ /' | awk '{print $2}'`
52     #echo " xx='$xx' ; child=$pc ; parent=$pp"
53 jmc 1.4 #if test "x$pp" = "x$ppZ" -a "x$pc" != "x$p1Z" ; then
54     if test "x$pp" = "x$ppZ" ; then
55 jmc 1.1 ps -f -p $pc | tee -a $pLog
56     echo " killing proc: $pc" | tee -a $pLog
57 jmc 1.3 kill -9 $pc
58 jmc 1.1 out=$?
59     echo " return code: $out" | tee -a $pLog
60     fi
61     done
62     #---
63 jmc 1.4 echo "==> list of remaining $pNam proc:" | tee -a $pLog
64 jmc 1.1 ps -f -u $uNam | grep $pNam | tee -a $pLog
65 jmc 1.2 echo '--------------------' | tee -a $pLog
66 jmc 1.1 else
67     echo " proc: $p1Z no more Zombie at:" `date`
68     fi
69     done
70     nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
71 jmc 1.2 echo " --> $nZ $pNam zombie process remain at:" `date`
72 jmc 1.1 fi
73     sleep $longChk
74     done
75     ls -l stop_check_$sfx
76     exit

  ViewVC Help
Powered by ViewVC 1.1.22