/[MITgcm]/MITgcm/tools/example_scripts/ACESgrid/aces_check_mp2
ViewVC logotype

Contents of /MITgcm/tools/example_scripts/ACESgrid/aces_check_mp2

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.4 - (show annotations) (download)
Wed May 17 17:58:54 2017 UTC (7 years ago) by jmc
Branch: MAIN
CVS Tags: HEAD
Changes since 1.3: +1 -1 lines
FILE REMOVED
remove testing scripts for aces-grid (login) & old geo cluster (both retired)

1 #!/bin/bash
2
3 # $Header: /u/gcmpack/MITgcm/tools/example_scripts/ACESgrid/aces_check_mp2,v 1.3 2011/11/23 09:38:43 jmc Exp $
4 # $Name: $
5
6 sfx='mp2'
7 #EXE="mpiexec -pernode -comm pmi -np $NCPU ./mitgcmuv"
8 longChk=300
9 shortChk=60
10 pNam='mpiexec'
11 uNam=$USER
12 HERE=`pwd`
13 pLog="kill_$sfx.log"
14
15 echo "start $0 +from dir: $HERE +by user: $uNam"
16 echo " on: "`hostname`" +at:" `date`
17 #uNam='jmc' ; HERE='/home/jmc/test_ACES/output' ; cd $HERE
18
19 while test ! -f stop_check_$sfx
20 do
21 # check for defunct proc
22 nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
23 if [ $nZ -ge 1 ] ; then
24 echo "===> found $nZ $pNam zombie processes at:" `date`
25 listZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | awk '{print $2}'`
26 sleep $shortChk
27 for p1Z in $listZ ; do
28 p2Z=`ps -f -p $p1Z | grep '<defunct>' | awk '{print $2}'`
29 if test "x$p2Z" = "x$p1Z" ; then
30 #-- report to permanent log file
31 date >> $pLog ; uname -a >> $pLog
32 ps -f -p $p1Z | tee -a $pLog
33 ppZ=`ps -f -p $p1Z | grep $pNam | awk '{print $3}'`
34 #--- version-1 : try to kill parent of Zombie proc
35 echo " try to kill parent proc: $ppZ at:" `date` | tee -a $pLog
36 kill -9 $ppZ
37 out=$?
38 echo " return code: $out" | tee -a $pLog
39 #--- version-2 : try to kill other pNam child proc from same parent
40 # echo " list of $pNam proc at:" `date`
41 # ps -f -u $uNam | grep $pNam
42 # listP=`ps -f -u $uNam | grep $pNam | awk '{print $2 "p" $3}'`
43 # echo " try to kill proc from same parent=$ppZ" | tee -a $pLog
44 # ps -f -p $ppZ
45 # #echo " listP='$listP'"
46 # for xx in $listP
47 # do
48 # pc=`echo $xx | sed 's/p/ /' | awk '{print $1}'`
49 # pp=`echo $xx | sed 's/p/ /' | awk '{print $2}'`
50 # #echo " xx='$xx' ; child=$pc ; parent=$pp"
51 # if test "x$pp" = "x$ppZ" -a "x$pc" != "x$p1Z" ; then
52 # ps -f -p $pc | tee -a $pLog
53 # echo " killing proc: $pc" | tee -a $pLog
54 # kill -9 $pc
55 # out=$?
56 # echo " return code: $out" | tee -a $pLog
57 # fi
58 # done
59 #---
60 echo " list of remaining $pNam proc:" | tee -a $pLog
61 ps -f -u $uNam | grep $pNam | tee -a $pLog
62 echo '--------------------' | tee -a $pLog
63 else
64 echo " proc: $p1Z no more Zombie at:" `date`
65 fi
66 done
67 nZ=`ps -f -u $uNam | grep $pNam | grep '<defunct>' | wc -l`
68 echo " --> $nZ $pNam zombie process remain at:" `date`
69 fi
70 sleep $longChk
71 done
72 ls -l stop_check_$sfx
73 exit

  ViewVC Help
Powered by ViewVC 1.1.22