--- MITgcm/eesupp/inc/CPP_EEOPTIONS.h 1998/10/28 03:11:33 1.10 +++ MITgcm/eesupp/inc/CPP_EEOPTIONS.h 2017/08/09 15:18:49 1.42 @@ -1,8 +1,15 @@ -C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/eesupp/inc/CPP_EEOPTIONS.h,v 1.10 1998/10/28 03:11:33 cnh Exp $ +C $Header: /home/ubuntu/mnt/e9_copy/MITgcm/eesupp/inc/CPP_EEOPTIONS.h,v 1.42 2017/08/09 15:18:49 mlosch Exp $ +C $Name: $ + +CBOP +C !ROUTINE: CPP_EEOPTIONS.h +C !INTERFACE: +C include "CPP_EEOPTIONS.h" C -C /==========================================================\ -C | CPP_EEOPTIONS.h | -C |==========================================================| +C !DESCRIPTION: +C *==========================================================* +C | CPP\_EEOPTIONS.h | +C *==========================================================* C | C preprocessor "execution environment" supporting | C | flags. Use this file to set flags controlling the | C | execution environment in which a model runs - as opposed | @@ -21,7 +28,8 @@ C | once an experimental configuration has been | C | identified, rebuild the code with the appropriate | C | options set at compile time. | -C \==========================================================/ +C *==========================================================* +CEOP #ifndef _CPP_EEOPTIONS_H_ #define _CPP_EEOPTIONS_H_ @@ -38,69 +46,69 @@ C ALWAYS - indicates the choice will be fixed at compile time C so no run-time option will be present -C Flag used to indicate whether Fortran formatted write +C=== Macro related options === +C-- Control storage of floating point operands +C On many systems it improves performance only to use +C 8-byte precision for time stepped variables. +C Constant in time terms ( geometric factors etc.. ) +C can use 4-byte precision, reducing memory utilisation and +C boosting performance because of a smaller working set size. +C However, on vector CRAY systems this degrades performance. +C Enable to switch REAL4_IS_SLOW from genmake2 (with LET_RS_BE_REAL4): +#ifdef LET_RS_BE_REAL4 +#undef REAL4_IS_SLOW +#else /* LET_RS_BE_REAL4 */ +#define REAL4_IS_SLOW +#endif /* LET_RS_BE_REAL4 */ + +C-- Control use of "double" precision constants. +C Use D0 where it means REAL*8 but not where it means REAL*16 +#define D0 d0 + +C-- Enable some old macro conventions for backward compatibility +#undef USE_OLD_MACROS_R4R8toRSRL + +C=== IO related options === +C-- Flag used to indicate whether Fortran formatted write C and read are threadsafe. On SGI the routines can be thread C safe, on Sun it is not possible - if you are unsure then C undef this option. -#undef FMTFTN_IO_THREADSAFE +#undef FMTFTN_IO_THREAD_SAFE -C Flag used to indicate which flavour of multi-threading -C compiler directives to use. Only set one of these. -C USE_SOLARIS_THREADING - Takes directives for SUN Workshop -C compiler. -C USE_KAP_THREADING - Takes directives for Kuck and -C Associates multi-threading compiler -C ( used on Digital platforms ). -C USE_IRIX_THREADING - Takes directives for SGI MIPS -C Pro Fortran compiler. -C USE_EXEMPLAR_THREADING - Takes directives for HP SPP series -C compiler. -C USE_C90_THREADING - Takes directives for CRAY/SGI C90 -C system F90 compiler. -#ifdef TARGET_SUN -#define USE_SOLARIS_THREADING -#endif - -#ifdef TARGET_DEC -#define USE_KAP_THREADING -#endif - -#ifdef TARGET_SGI -#define USE_IRIX_THREADING -#endif - -#ifdef TARGET_HP -#define USE_EXEMPLAR_THREADING -#endif - -#ifdef TARGET_CRAY_VECTOR -#define USE_C90_THREADING -#endif - -C-- Define the mapping for the _BARRIER macro -C On some systems low-level hardware support can be accessed through -C compiler directives here. -#define _BARRIER CALL BARRIER(myThid) - -C-- Define the mapping for the BEGIN_CRIT() and END_CRIT() macros. -C On some systems we simply execute this section only using the -C master thread i.e. its not really a critical section. We can -C do this because we do not use critical sections in any critical -C sections of our code! -#define _BEGIN_CRIT(a) _BEGIN_MASTER(a) -#define _END_CRIT(a) _END_MASTER(a) - -C-- Define the mapping for the BEGIN_MASTER_SECTION() and -C END_MASTER_SECTION() macros. These are generally implemented by -C simply choosing a particular thread to be "the master" and have -C it alone execute the BEGIN_MASTER..., END_MASTER.. sections. -#define _BEGIN_MASTER(a) IF ( a .EQ. 1 ) THEN -#define _END_MASTER(a) ENDIF +C-- Flag used to indicate whether Binary write to Local file (i.e., +C a different file for each tile) and read are thread-safe. +#undef LOCBIN_IO_THREAD_SAFE + +C-- Flag to turn off the writing of error message to ioUnit zero +#undef DISABLE_WRITE_TO_UNIT_ZERO + +C-- Alternative formulation of BYTESWAP, faster than +C compiler flag -byteswapio on the Altix. +#undef FAST_BYTESWAP + +C-- Flag to turn on old default of opening scratch files with the +C STATUS='SCRATCH' option. This method, while perfectly FORTRAN-standard, +C caused filename conflicts on some multi-node/multi-processor platforms +C in the past and has been replace by something (hopefully) more robust. +#undef USE_FORTRAN_SCRATCH_FILES + +C-- Flag defined for eeboot_minimal.F, eeset_parms.F and open_copy_data_file.F +C to write STDOUT, STDERR and scratch files from process 0 only. +C WARNING: to use only when absolutely confident that the setup is working +C since any message (error/warning/print) from any proc <> 0 will be lost. +#undef SINGLE_DISK_IO + +C=== MPI, EXCH and GLOBAL_SUM related options === +C-- Flag turns off MPI_SEND ready_to_receive polling in the +C gather_* subroutines to speed up integrations. +#undef DISABLE_MPI_READY_TO_RECEIVE C-- Control MPI based parallel processing -#undef ALLOW_USE_MPI -#undef ALWAYS_USE_MPI - +CXXX We no longer select the use of MPI via this file (CPP_EEOPTIONS.h) +CXXX To use MPI, use an appropriate genmake2 options file or use +CXXX genmake2 -mpi . +CXXX #undef ALLOW_USE_MPI + C-- Control use of communication that might overlap computation. C Under MPI selects/deselects "non-blocking" sends and receives. #define ALLOW_ASYNC_COMMUNICATION @@ -111,54 +119,8 @@ #define ALLOW_SYNC_COMMUNICATION #undef ALWAYS_USE_SYNC_COMMUNICATION -C-- Control storage of floating point operands -C On many systems it improves performance only to use -C 8-byte precision for time stepped variables. -C Constant in time terms ( geometric factors etc.. ) -C can use 4-byte precision, reducing memory utilisation and -C boosting performance because of a smaller working -C set size. However, on vector CRAY systems this degrades -C performance. -#define REAL4_IS_SLOW - -#ifdef REAL4_IS_SLOW -#define real Real*8 -#define REAL Real*8 -#define _RS Real*8 -#define _RL Real*8 -#define RS_IS_REAL8 -#define _EXCH_XY_R4(a,b) CALL EXCH_XY_R8 ( a, b ) -#define _EXCH_XYZ_R4(a,b) CALL EXCH_XYZ_R8 ( a, b ) -#define _GLOBAL_SUM_R4(a,b,c) CALL GLOBAL_SUM_R8( a, b , c) -#define _GLOBAL_MAX_R4(a,b,c) CALL GLOBAL_MAX_R8( a, b , c) -#endif - -#ifndef REAL4_IS_SLOW -#define real Real*4 -#define REAL Real*8 -#define _RS Real*4 -#define _RL Real*8 -#define RS_IS_REAL4 -#define _EXCH_XY_R4(a,b) CALL EXCH_XY_R4 ( a, b ) -#define _EXCH_XYZ_R4(a,b) CALL EXCH_XYZ_R4 ( a, b ) -#define _GLOBAL_SUM_R4(a,b,c) CALL GLOBAL_SUM_R4( a, b , c) -#define _GLOBAL_MAX_R4(a,b,c) CALL GLOBAL_MAX_R4( a, b , c) -#endif - -#define _EXCH_XY_R8(a,b) CALL EXCH_XY_R8 ( a, b ) -#define _EXCH_XYZ_R8(a,b) CALL EXCH_XYZ_R8 ( a, b ) -#define _GLOBAL_SUM_R8(a,b,c) CALL GLOBAL_SUM_R8( a, b , c) -#define _GLOBAL_MAX_R8(a,b,c) CALL GLOBAL_MAX_R8( a, b , c) - -C-- Control use of "double" precision constants. -C Use D0 where it means REAL*8 but not where it means REAL*16 -#define D0 d0 -#ifdef REAL_D0_IS_16BYTES -#define D0 -#endif - C-- Control XY periodicity in processor to grid mappings -C Note: Model code does not need to know whether a domain is +C Note: Model code does not need to know whether a domain is C periodic because it has overlap regions for every box. C Model assume that these values have been C filled in some way. @@ -167,17 +129,32 @@ #define CAN_PREVENT_X_PERIODICITY #define CAN_PREVENT_Y_PERIODICITY -C-- Substitue for 1.D variables -C Sun compilers do not use 8-byte precision for literals -C unless .Dnn is specified. CRAY vector machines use 16-byte -C precision when they see .Dnn which runs very slowly! -#ifdef REAL_D0_IS_16BYTES -#define _d -#define _F64( a ) a -#endif -#ifndef REAL_D0_IS_16BYTES -#define _d D -#define _F64( a ) DFLOAT( a ) -#endif +C-- disconnect tiles (no exchange between tiles, just fill-in edges +C assuming locally periodic subdomain) +#undef DISCONNECTED_TILES + +C-- Always cumulate tile local-sum in the same order by applying MPI allreduce +C to array of tiles ; can get slower with large number of tiles (big set-up) +#define GLOBAL_SUM_ORDER_TILES + +C-- Alternative way of doing global sum without MPI allreduce call +C but instead, explicit MPI send & recv calls. Expected to be slower. +#undef GLOBAL_SUM_SEND_RECV + +C-- Alternative way of doing global sum on a single CPU +C to eliminate tiling-dependent roundoff errors. Note: This is slow. +#undef CG2D_SINGLECPU_SUM + +C=== Other options (to add/remove pieces of code) === +C-- Flag to turn on checking for errors from all threads and procs +C (calling S/R STOP_IF_ERROR) before stopping. +#define USE_ERROR_STOP + +C-- Control use of communication with other component: +C allow to import and export from/to Coupler interface. +#undef COMPONENT_MODULE #endif /* _CPP_EEOPTIONS_H_ */ + +#include "CPP_EEMACROS.h" +