/[MITgcm]/MITgcm/eesupp/src/check_threads.F
ViewVC logotype

Contents of /MITgcm/eesupp/src/check_threads.F

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.16 - (show annotations) (download)
Fri Mar 30 14:18:07 2012 UTC (12 years, 1 month ago) by jmc
Branch: MAIN
CVS Tags: checkpoint65z, checkpoint65x, checkpoint65y, checkpoint65r, checkpoint65s, checkpoint65p, checkpoint65q, checkpoint65v, checkpoint65w, checkpoint65t, checkpoint65u, checkpoint65j, checkpoint65k, checkpoint65h, checkpoint65i, checkpoint65n, checkpoint65l, checkpoint65m, checkpoint65b, checkpoint65c, checkpoint65a, checkpoint65f, checkpoint65g, checkpoint65d, checkpoint65e, checkpoint64, checkpoint65, checkpoint66g, checkpoint66f, checkpoint66e, checkpoint66d, checkpoint66c, checkpoint66b, checkpoint66a, checkpoint66o, checkpoint66n, checkpoint66m, checkpoint66l, checkpoint66k, checkpoint66j, checkpoint66i, checkpoint66h, checkpoint63p, checkpoint63q, checkpoint63r, checkpoint63s, checkpoint63l, checkpoint63m, checkpoint63n, checkpoint63o, checkpoint65o, checkpoint64y, checkpoint64x, checkpoint64z, checkpoint64q, checkpoint64p, checkpoint64s, checkpoint64r, checkpoint64u, checkpoint64t, checkpoint64w, checkpoint64v, checkpoint64i, checkpoint64h, checkpoint64k, checkpoint64j, checkpoint64m, checkpoint64l, checkpoint64o, checkpoint64n, checkpoint64a, checkpoint64c, checkpoint64b, checkpoint64e, checkpoint64d, checkpoint64g, checkpoint64f, HEAD
Changes since 1.15: +59 -12 lines
- specific code if using OpenMP threading; improve checking of # of threads
  across processors (e.g., different env OMP_NUM_THREADS setting);
  do not stop but set eeBootError=T.

1 C $Header: /u/gcmpack/MITgcm/eesupp/src/check_threads.F,v 1.15 2011/09/01 14:55:24 jmc Exp $
2 C $Name: $
3
4 #include "CPP_EEOPTIONS.h"
5
6 CBOP
7 C !ROUTINE: CHECK_THREADS
8
9 C !INTERFACE:
10 SUBROUTINE CHECK_THREADS( myThid )
11
12 C !DESCRIPTION:
13 C *==========================================================
14 C | SUBROUTINE CHECK\_THREADS
15 C | o Check that all the threads we need did indeed start.
16 C *==========================================================
17 C | This routine is called during the initialisation phase
18 C | to check whether all the threads have started.
19 C | It is invoked by every thread and if any thread finds an
20 C | error it should set its error flag.
21 C | Notes:
22 C | Different mechanisms may be required on different
23 C | platforms to actually perform the check. For example as
24 C | coded here each thread checks for a semaphore set by the
25 C | other threads to see if they are running.
26 C | It is also possible for a system to schedule threads
27 C | sequentially, unless some system call is made to yield
28 C | the process. This routine would detect this situation too
29 C | and allow a programmer to modify this routine and the
30 C | barrier code to allow threads to be scheduled more
31 C | appropriately.
32 C *==========================================================
33
34 C !USES:
35 IMPLICIT NONE
36 C == Global variables ==
37 #include "SIZE.h"
38 #include "EEPARAMS.h"
39 #include "EESUPPORT.h"
40
41 C !INPUT PARAMETERS:
42 C == Routine arguments ==
43 C myThid :: My thread number
44 INTEGER myThid
45
46 C !FUNCTIONS:
47 #ifdef USE_OMP_THREADING
48 INTEGER OMP_GET_NUM_THREADS
49 EXTERNAL OMP_GET_NUM_THREADS
50 #endif
51
52 C !LOCAL VARIABLES:
53 C == Local variables ==
54 C I :: Loop counter
55 C numberThreadRunning :: Count of number of threads this thread
56 C thinks are running.
57 C nChecks :: Number of times checked for all threads. After so
58 C many checks give up and report an error.
59 C msgBuf :: Informational/error message buffer
60 INTEGER nChecks
61 CHARACTER*(MAX_LEN_MBUF) msgBuf
62 #ifdef USE_OMP_THREADING
63 #ifdef ALLOW_USE_MPI
64 INTEGER myErr, mpiRC
65 #endif
66 #else /* USE_OMP_THREADING */
67 INTEGER I, numberThreadsRunning
68 #endif /* USE_OMP_THREADING */
69 CEOP
70
71 C---+----1----+----2----+----3----+----4----+----5----+----6----+----7-|--+----|
72
73 #ifdef USE_OMP_THREADING
74 C-- Check early-on that number of threads match
75
76 IF ( OMP_GET_NUM_THREADS() .NE. nThreads ) THEN
77 C- This process has problems in multi-threads setting (detected by
78 C all pseudo-threads); note: cannot use any Barrier in this context
79
80 WRITE(msgBuf,'(2A,I6)') 'CHECK_THREADS:',
81 & ' from "eedata", nThreads=', nThreads
82 CALL PRINT_ERROR( msgBuf, myThid )
83 WRITE(msgBuf,'(2A,I6)') ' not equal to ',
84 & 'Env.Var. OMP_NUM_THREADS=', OMP_GET_NUM_THREADS()
85 CALL PRINT_ERROR( msgBuf, myThid )
86 thError(myThid) = .TRUE.
87 eeBootError = .TRUE.
88 IF ( myThid.EQ.1 ) THEN
89 C- one pseudo-thread (thId=1) export the error to other MPI processes
90 nChecks = 1
91 #ifdef ALLOW_USE_MPI
92 IF ( usingMPI ) THEN
93 myErr = nChecks
94 CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
95 & MPI_SUM,MPI_COMM_MODEL,mpiRC )
96 ENDIF
97 #endif /* ALLOW_USE_MPI */
98 ENDIF
99
100 ELSE
101 C- this process has a working multi-threads setting
102
103 threadIsRunning(myThid) = .TRUE.
104 IF ( myThid.EQ.1 ) THEN
105 C- master collects error from other MPI processes
106 nChecks = 0
107 #ifdef ALLOW_USE_MPI
108 IF ( usingMPI ) THEN
109 myErr = nChecks
110 CALL MPI_Allreduce( myErr,nChecks,1,MPI_INTEGER,
111 & MPI_SUM,MPI_COMM_MODEL,mpiRC )
112 ENDIF
113 #endif /* ALLOW_USE_MPI */
114 IF ( nChecks.NE.0 ) THEN
115 WRITE(msgBuf,'(A,I5,A)') 'CHECK_THREADS:', nChecks,
116 & ' error(s) from other Processes'
117 CALL PRINT_ERROR( msgBuf, myThid )
118 eeBootError = .TRUE.
119 ENDIF
120 ENDIF
121 C- ensure all threads leave with updated eeBootError (shared) value
122 C$OMP BARRIER
123
124 ENDIF
125
126 #else /* ndef USE_OMP_THREADING */
127
128 threadIsRunning(myThid) = .TRUE.
129 nChecks = 0
130
131 10 CONTINUE
132 numberThreadsRunning = 0
133 nChecks = nChecks + 1
134 DO I = 1, nThreads
135 IF ( threadIsRunning(I) )
136 & numberThreadsRunning = numberThreadsRunning+1
137 ENDDO
138 IF ( nChecks .GT. 10 ) THEN
139 thError(myThid) = .TRUE.
140 eeBootError = .TRUE.
141 WRITE(msgBuf,'(A,I5,A,I5,A)')
142 & 'CHECK_THREADS: Only ',numberThreadsRunning,
143 & ' thread(s), ',nThreads,' are needed for this config!'
144 CALL PRINT_ERROR( msgBuf, myThid )
145 C-- Not enough threads are running so halt the program.
146 C I did not want this here but it is the only place I have found that
147 C KAP deadlocks if it there are fewer threads than iterations in a BLOCKED(1)
148 C loop. The deadlock appears to be in the routine mppjoin which never
149 C returns. I tried putting the STOP in main or breaking out of the loop in main
150 C but this causes KAP to insert a call to mppjoin - which then deadlocks!
151 IF ( myThid .EQ. 1 ) THEN
152 STOP 'ABNORMAL END: S/R CHECK_THREADS'
153 ENDIF
154 GOTO 11
155 ENDIF
156 IF ( numberThreadsRunning .NE. nThreads ) THEN
157 #ifdef HAVE_SYSTEM
158 CALL SYSTEM('sleep 1')
159 #endif
160 GOTO 10
161 ENDIF
162 11 CONTINUE
163
164 #endif /* ndef USE_OMP_THREADING */
165
166 C-- check barrier synchronization: 1rst (initial) call.
167 IF ( .NOT. eeBootError ) THEN
168 CALL BAR_CHECK( 1, myThid )
169 ENDIF
170
171 RETURN
172 END

  ViewVC Help
Powered by ViewVC 1.1.22