1 |
#!/bin/bash |
#!/bin/bash |
2 |
# |
# |
3 |
# composed and tested by ce107 on ross/weddell |
# $Header$ |
4 |
# for more speed on Core2 processors replace -xW with -xT |
# $Name$ |
5 |
# for more speed on Pentium4 based EM64T processors replaces -xW with -xP |
# |
6 |
|
|
7 |
|
# Composed and tested by ce107 on ross/weddell (Opteron system) |
8 |
|
# Should work fine on EM64T and other AMD64 compatible Intel systems |
9 |
|
# a) Processor specific flags: |
10 |
|
# 1) for more speed on Core2 processors replace -xW with -xT |
11 |
|
# 2) for more speed on Pentium4 based EM64T processors replaces -xW with -xP |
12 |
|
# b) For more speed, provided your data size doesn't exceed 2GB you can |
13 |
|
# remove -fPIC which carries a performance penalty of 2-6%. |
14 |
|
# c) Provided that the libraries you link to are compiled with -fPIC this |
15 |
|
# optfile should work. |
16 |
|
# d) You can replace -fPIC with -mcmodel=medium which may perform faster |
17 |
|
# than -fPIC and still support data sizes over 2GB per process but all |
18 |
|
# the libraries you link to must be compiled with -fPIC or -mcmodel=medium |
19 |
|
# e) Changed from -O3 to -O2 to avoid buggy Intel v.10 compilers. Speed |
20 |
|
# impact appears to be minimal. |
21 |
|
|
22 |
|
#------- |
23 |
|
# run with OpenMP: needs to set environment var. OMP_NUM_THREADS |
24 |
|
# and generally, needs to increase the stack-size: |
25 |
|
# - sh,bash: |
26 |
|
# > export OMP_NUM_THREADS=2 |
27 |
|
# > export KMP_STACKSIZE=400m |
28 |
|
# - csh,tcsh: |
29 |
|
# > setenv OMP_NUM_THREADS 2 |
30 |
|
# > setenv KMP_STACKSIZE 400m |
31 |
|
#------- |
32 |
|
|
33 |
FC=ifort |
FC=ifort |
34 |
F90C=ifort |
F90C=ifort |
35 |
CC=icc |
CC=icc |
36 |
LINK='mpif90 -i-static -no-ipo' |
LINK='ifort -i-dynamic -no-ipo' |
37 |
|
|
38 |
DEFINES='-DWORDLENGTH=4' |
DEFINES='-DWORDLENGTH=4' |
39 |
CPP='cpp -traditional -P' |
CPP='cpp -traditional -P' |
40 |
F90FIXEDFORMAT='-fixed -Tf' |
F90FIXEDFORMAT='-fixed -Tf' |
41 |
|
EXTENDED_SRC_FLAG='-132' |
42 |
|
OMPFLAG='-openmp' |
43 |
|
|
44 |
NOOPTFLAGS='-O0 -g -m64 -fPIC' |
NOOPTFLAGS='-O0 -g -m64 -fPIC' |
45 |
NOOPTFILES='' |
NOOPTFILES='' |
49 |
LIBS='' |
LIBS='' |
50 |
|
|
51 |
if test "x$DEVEL" != x ; then |
if test "x$DEVEL" != x ; then |
52 |
FFLAGS='-132 -r8 -i4 -w95 -W0 -WB -convert big_endian -assume byterecl -fPIC -O0 -g -noalign -fpstkchk -check all -fpe0 -traceback -ftrapuv -fpmodel except -warn all' |
FFLAGS="$FFLAGS -w95 -W0 -WB -convert big_endian -assume byterecl -fPIC -O0 -g -noalign -fpstkchk -check all -fpe0 -traceback -ftrapuv -fpmodel except -warn all" |
53 |
else |
else |
54 |
FFLAGS='-132 -r8 -i4 -w95 -W0 -WB -convert big_endian -assume byterecli -fPIC' |
FFLAGS="-$FFLAGS w95 -W0 -WB -convert big_endian -assume byterecl -fPIC" |
55 |
fi |
fi |
56 |
|
#- might want to use '-r8' for fizhi pkg: |
57 |
|
#FFLAGS="$FFLAGS -r8" |
58 |
|
|
59 |
# Note that the -mp switch is for ieee "maintain precision" and is |
# Note that the -mp switch is for ieee "maintain precision" and is |
60 |
# roughly equivalent to -ieee |
# roughly equivalent to -ieee |
61 |
if test "x$IEEE" = x ; then |
if test "x$IEEE" = x ; then |
62 |
# No need for IEEE-754 |
# No need for IEEE-754 |
63 |
FOPTIM='-O3 -align -xW -ip' |
FOPTIM='-O2 -align -xW -ip' |
64 |
else |
else |
65 |
# Try to follow IEEE-754 |
# Try to follow IEEE-754 |
66 |
FOPTIM='-O2 -align -xW -ip -mp' |
FOPTIM='-O2 -align -xW -ip -mp' |
67 |
fi |
fi |
68 |
F90FLAGS=$FFLAGS |
F90FLAGS=$FFLAGS |
69 |
F90OPTIM=$FOPTIM |
F90OPTIM=$FOPTIM |
70 |
CFLAGS='-O3 -ip -fPIC' |
CFLAGS='-O2 -ip -fPIC' |
71 |
|
|
72 |
if [ "x$NETCDF_ROOT" != x ] ; then |
if [ "x$NETCDF_ROOT" != x ] ; then |
73 |
INCLUDEDIRS="${NETCDF_ROOT}/include" |
INCLUDEDIRS="${NETCDF_ROOT}/include" |