| 1 |
C $Header: /u/gcmpack/MITgcm/eesupp/inc/CPP_EEMACROS.h,v 1.23 2010/08/12 21:38:58 jmc Exp $ |
| 2 |
C $Name: $ |
| 3 |
|
| 4 |
CBOP |
| 5 |
C !ROUTINE: CPP_EEMACROS.h |
| 6 |
C !INTERFACE: |
| 7 |
C include "CPP_EEMACROS.h " |
| 8 |
C !DESCRIPTION: |
| 9 |
C *==========================================================* |
| 10 |
C | CPP_EEMACROS.h |
| 11 |
C *==========================================================* |
| 12 |
C | C preprocessor "execution environment" supporting |
| 13 |
C | macros. Use this file to define macros for simplifying |
| 14 |
C | execution environment in which a model runs - as opposed |
| 15 |
C | to the dynamical problem the model solves. |
| 16 |
C *==========================================================* |
| 17 |
CEOP |
| 18 |
|
| 19 |
#ifndef _CPP_EEMACROS_H_ |
| 20 |
#define _CPP_EEMACROS_H_ |
| 21 |
|
| 22 |
C In general the following convention applies: |
| 23 |
C ALLOW - indicates an feature will be included but it may |
| 24 |
C CAN have a run-time flag to allow it to be switched |
| 25 |
C on and off. |
| 26 |
C If ALLOW or CAN directives are "undef'd" this generally |
| 27 |
C means that the feature will not be available i.e. it |
| 28 |
C will not be included in the compiled code and so no |
| 29 |
C run-time option to use the feature will be available. |
| 30 |
C |
| 31 |
C ALWAYS - indicates the choice will be fixed at compile time |
| 32 |
C so no run-time option will be present |
| 33 |
|
| 34 |
C Flag used to indicate which flavour of multi-threading |
| 35 |
C compiler directives to use. Only set one of these. |
| 36 |
C USE_SOLARIS_THREADING - Takes directives for SUN Workshop |
| 37 |
C compiler. |
| 38 |
C USE_KAP_THREADING - Takes directives for Kuck and |
| 39 |
C Associates multi-threading compiler |
| 40 |
C ( used on Digital platforms ). |
| 41 |
C USE_IRIX_THREADING - Takes directives for SGI MIPS |
| 42 |
C Pro Fortran compiler. |
| 43 |
C USE_EXEMPLAR_THREADING - Takes directives for HP SPP series |
| 44 |
C compiler. |
| 45 |
C USE_C90_THREADING - Takes directives for CRAY/SGI C90 |
| 46 |
C system F90 compiler. |
| 47 |
#ifdef TARGET_SUN |
| 48 |
#define USE_SOLARIS_THREADING |
| 49 |
#define USING_THREADS |
| 50 |
#endif |
| 51 |
|
| 52 |
#ifdef TARGET_DEC |
| 53 |
#define USE_KAP_THREADING |
| 54 |
#define USING_THREADS |
| 55 |
#endif |
| 56 |
|
| 57 |
#ifdef TARGET_SGI |
| 58 |
#define USE_IRIX_THREADING |
| 59 |
#define USING_THREADS |
| 60 |
#endif |
| 61 |
|
| 62 |
#ifdef TARGET_HP |
| 63 |
#define USE_EXEMPLAR_THREADING |
| 64 |
#define USING_THREADS |
| 65 |
#endif |
| 66 |
|
| 67 |
#ifdef TARGET_CRAY_VECTOR |
| 68 |
#define USE_C90_THREADING |
| 69 |
#define USING_THREADS |
| 70 |
#endif |
| 71 |
|
| 72 |
#ifdef USE_OMP_THREADING |
| 73 |
#define USING_THREADS |
| 74 |
#endif |
| 75 |
|
| 76 |
C-- Define the mapping for the _BARRIER macro |
| 77 |
C On some systems low-level hardware support can be accessed through |
| 78 |
C compiler directives here. |
| 79 |
#define _BARRIER CALL BARRIER(myThid) |
| 80 |
|
| 81 |
C-- Define the mapping for the BEGIN_CRIT() and END_CRIT() macros. |
| 82 |
C On some systems we simply execute this section only using the |
| 83 |
C master thread i.e. its not really a critical section. We can |
| 84 |
C do this because we do not use critical sections in any critical |
| 85 |
C sections of our code! |
| 86 |
#define _BEGIN_CRIT(a) _BEGIN_MASTER(a) |
| 87 |
#define _END_CRIT(a) _END_MASTER(a) |
| 88 |
|
| 89 |
C-- Define the mapping for the BEGIN_MASTER_SECTION() and |
| 90 |
C END_MASTER_SECTION() macros. These are generally implemented by |
| 91 |
C simply choosing a particular thread to be "the master" and have |
| 92 |
C it alone execute the BEGIN_MASTER..., END_MASTER.. sections. |
| 93 |
|
| 94 |
#define _BEGIN_MASTER(a) IF ( a .EQ. 1 ) THEN |
| 95 |
#define _END_MASTER(a) ENDIF |
| 96 |
CcnhDebugStarts |
| 97 |
C Alternate form to the above macros that increments (decrements) a counter each |
| 98 |
C time a MASTER section is entered (exited). This counter can then be checked in barrier |
| 99 |
C to try and detect calls to BARRIER within single threaded sections. |
| 100 |
C Using these macros requires two changes to Makefile - these changes are written |
| 101 |
C below. |
| 102 |
C 1 - add a filter to the CPP command to kill off commented _MASTER lines |
| 103 |
C 2 - add a filter to the CPP output the converts the string N EWLINE to an actual newline. |
| 104 |
C The N EWLINE needs to be changes to have no space when this macro and Makefile changes |
| 105 |
C are used. Its in here with a space to stop it getting parsed by the CPP stage in these |
| 106 |
C comments. |
| 107 |
C #define _BEGIN_MASTER(a) IF ( a .EQ. 1 ) THEN N EWLINE CALL BARRIER_MS(a) |
| 108 |
C #define _END_MASTER(a) CALL BARRIER_MU(a) N EWLINE ENDIF |
| 109 |
C 'CPP = cat $< | $(TOOLSDIR)/set64bitConst.sh | grep -v '^[cC].*_MASTER' | cpp -traditional -P' |
| 110 |
C .F.f: |
| 111 |
C $(CPP) $(DEFINES) $(INCLUDES) | sed 's/N EWLINE/\n/' > $@ |
| 112 |
CcnhDebugEnds |
| 113 |
|
| 114 |
C-- Control storage of floating point operands |
| 115 |
C On many systems it improves performance only to use |
| 116 |
C 8-byte precision for time stepped variables. |
| 117 |
C Constant in time terms ( geometric factors etc.. ) |
| 118 |
C can use 4-byte precision, reducing memory utilisation and |
| 119 |
C boosting performance because of a smaller working |
| 120 |
C set size. However, on vector CRAY systems this degrades |
| 121 |
C performance. |
| 122 |
C- Note: global_sum/max macros were used to switch to JAM routines (obsolete); |
| 123 |
C in addition, since only the R4 & R8 S/R are coded, GLOBAL RS & RL macros |
| 124 |
C enable to call the corresponding R4 or R8 S/R. |
| 125 |
#ifdef REAL4_IS_SLOW |
| 126 |
#define _RS Real*8 |
| 127 |
#define RS_IS_REAL8 |
| 128 |
#define _GLOBAL_SUM_RS(a,b) CALL GLOBAL_SUM_R8 ( a, b) |
| 129 |
#define _GLOBAL_MAX_RS(a,b) CALL GLOBAL_MAX_R8 ( a, b ) |
| 130 |
#define _MPI_TYPE_RS MPI_DOUBLE_PRECISION |
| 131 |
#ifdef USE_OLD_MACROS_R4R8toRSRL |
| 132 |
#define _GLOBAL_SUM_R4(a,b) CALL GLOBAL_SUM_R8 ( a, b ) |
| 133 |
#define _GLOBAL_MAX_R4(a,b) CALL GLOBAL_MAX_R8 ( a, b ) |
| 134 |
#endif |
| 135 |
#else |
| 136 |
#define _RS Real*4 |
| 137 |
#define RS_IS_REAL4 |
| 138 |
#define _GLOBAL_SUM_RS(a,b) CALL GLOBAL_SUM_R4 ( a, b ) |
| 139 |
#define _GLOBAL_MAX_RS(a,b) CALL GLOBAL_MAX_R4 ( a, b ) |
| 140 |
#define _MPI_TYPE_RS MPI_REAL |
| 141 |
#ifdef USE_OLD_MACROS_R4R8toRSRL |
| 142 |
cph Needed for some backward compatibility with broken packages |
| 143 |
#define _GLOBAL_SUM_R4(a,b) CALL GLOBAL_SUM_R4 ( a, b ) |
| 144 |
#define _GLOBAL_MAX_R4(a,b) CALL GLOBAL_MAX_R4 ( a, b ) |
| 145 |
#endif |
| 146 |
#endif |
| 147 |
|
| 148 |
#define _RL Real*8 |
| 149 |
#define RL_IS_REAL8 |
| 150 |
#define _GLOBAL_SUM_RL(a,b) CALL GLOBAL_SUM_R8 ( a, b ) |
| 151 |
#define _GLOBAL_MAX_RL(a,b) CALL GLOBAL_MAX_R8 ( a, b ) |
| 152 |
#ifdef USE_OLD_MACROS_R4R8toRSRL |
| 153 |
cph Needed for some backward compatibility with broken packages |
| 154 |
#define _GLOBAL_SUM_R8(a,b) CALL GLOBAL_SUM_R8 ( a, b ) |
| 155 |
#define _GLOBAL_MAX_R8(a,b) CALL GLOBAL_MAX_R8 ( a, b ) |
| 156 |
#endif |
| 157 |
#define _MPI_TYPE_RL MPI_DOUBLE_PRECISION |
| 158 |
|
| 159 |
#define _MPI_TYPE_R4 MPI_REAL |
| 160 |
#if (defined (TARGET_SGI) || defined (TARGET_AIX) || defined (TARGET_LAM)) |
| 161 |
#define _MPI_TYPE_R8 MPI_DOUBLE_PRECISION |
| 162 |
#else |
| 163 |
#define _MPI_TYPE_R8 MPI_REAL8 |
| 164 |
#endif |
| 165 |
#define _R4 Real*4 |
| 166 |
#define _R8 Real*8 |
| 167 |
|
| 168 |
C- Note: a) exch macros were used to switch to JAM routines (obsolete) |
| 169 |
C b) exch R4 & R8 macros are not practically used ; if needed, |
| 170 |
C will directly call the corrresponding S/R. |
| 171 |
#define _EXCH_XY_RS(a,b) CALL EXCH_XY_RS ( a, b ) |
| 172 |
#define _EXCH_XY_RL(a,b) CALL EXCH_XY_RL ( a, b ) |
| 173 |
#define _EXCH_XYZ_RS(a,b) CALL EXCH_XYZ_RS ( a, b ) |
| 174 |
#define _EXCH_XYZ_RL(a,b) CALL EXCH_XYZ_RL ( a, b ) |
| 175 |
#ifdef USE_OLD_MACROS_R4R8toRSRL |
| 176 |
cph Needed for some backward compatibility with broken packages |
| 177 |
#define _EXCH_XY_R4(a,b) CALL EXCH_XY_RS ( a, b ) |
| 178 |
#define _EXCH_XY_R8(a,b) CALL EXCH_XY_RL ( a, b ) |
| 179 |
#define _EXCH_XYZ_R4(a,b) CALL EXCH_XYZ_RS ( a, b ) |
| 180 |
#define _EXCH_XYZ_R8(a,b) CALL EXCH_XYZ_RL ( a, b ) |
| 181 |
#endif |
| 182 |
|
| 183 |
C-- Control use of JAM routines for Artic network (no longer supported) |
| 184 |
C These invoke optimized versions of "exchange" and "sum" that |
| 185 |
C utilize the programmable aspect of Artic cards. |
| 186 |
CXXX No longer supported ; started to remove JAM routines. |
| 187 |
CXXX #ifdef LETS_MAKE_JAM |
| 188 |
CXXX #define _GLOBAL_SUM_RS(a,b) CALL GLOBAL_SUM_R8_JAM ( a, b) |
| 189 |
CXXX #define _GLOBAL_SUM_RL(a,b) CALL GLOBAL_SUM_R8_JAM ( a, b ) |
| 190 |
CXXX #define _EXCH_XY_RS(a,b) CALL EXCH_XY_R8_JAM ( a, b ) |
| 191 |
CXXX #define _EXCH_XY_RL(a,b) CALL EXCH_XY_R8_JAM ( a, b ) |
| 192 |
CXXX #define _EXCH_XYZ_RS(a,b) CALL EXCH_XYZ_R8_JAM ( a, b ) |
| 193 |
CXXX #define _EXCH_XYZ_RL(a,b) CALL EXCH_XYZ_R8_JAM ( a, b ) |
| 194 |
CXXX #endif |
| 195 |
|
| 196 |
C-- Control use of "double" precision constants. |
| 197 |
C Use D0 where it means REAL*8 but not where it means REAL*16 |
| 198 |
#ifdef REAL_D0_IS_16BYTES |
| 199 |
#define D0 |
| 200 |
#endif |
| 201 |
|
| 202 |
C-- Substitue for 1.D variables |
| 203 |
C Sun compilers do not use 8-byte precision for literals |
| 204 |
C unless .Dnn is specified. CRAY vector machines use 16-byte |
| 205 |
C precision when they see .Dnn which runs very slowly! |
| 206 |
#ifdef REAL_D0_IS_16BYTES |
| 207 |
#define _F64( a ) a |
| 208 |
#endif |
| 209 |
#ifndef REAL_D0_IS_16BYTES |
| 210 |
#define _F64( a ) DFLOAT( a ) |
| 211 |
#endif |
| 212 |
|
| 213 |
#endif /* _CPP_EEMACROS_H_ */ |