1 |
Format for raw output files from MITgcmUV |
2 |
========================================= |
3 |
|
4 |
Introduction |
5 |
------------ |
6 |
When running in parallel mode with multiple processes the MITgcmUV |
7 |
model operates as N separate programs, each responsible for its "local" |
8 |
region of the "total" model domain. Synchronisation and sharing of data between |
9 |
these processes is done explicitly by calls to data exchange and |
10 |
barrier routines. Consequently there is no single program that has |
11 |
a view of the whole model domain as the code is running. Any simple |
12 |
I/O can only operate on the local region of the model domain - I/O |
13 |
operations to and from datasets that represent the total domain need |
14 |
to address the multiple process behavior explicitly. |
15 |
Under MITgcmUV there are a set of I/O support routines that mask the |
16 |
details of this process and enable end-users to read and write datasets |
17 |
in a straight-forward manner. The routines use the following design |
18 |
strategy: |
19 |
o Input datasets are for the total domain |
20 |
o Output datasets are for the local domain |
21 |
o A separate program "joinds" is provided which joins a set of |
22 |
local domain datasets together to form total model domain dataset. |
23 |
|
24 |
MITgcmUV IO support routines |
25 |
---------------------------- |
26 |
o SUBROUTINE READ_FLD_XY_RS( pref, suff, fld, time, thid ) |
27 |
_RS fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nSx,nSy) |
28 |
|
29 |
o SUBROUTINE READ_FLD_XY_RL( pref, suff, fld, time, thid ) |
30 |
_RL fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nSx,nSy) |
31 |
|
32 |
o SUBROUTINE READ_FLD_XYZ_RS( pref, suff, fld, time, thid ) |
33 |
_RS fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nZ,nSx,nSy) |
34 |
|
35 |
o SUBROUTINE READ_FLD_XYZ_RL( pref, suff, fld, time, thid ) |
36 |
_RL fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nZ,nSx,nSy) |
37 |
|
38 |
o SUBROUTINE WRITE_FLD_XY_RS( pref, suff, fld, time, thid ) |
39 |
_RS fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nSx,nSy) |
40 |
|
41 |
o SUBROUTINE WRITE_FLD_XY_RL( pref, suff, fld, time, thid ) |
42 |
_RL fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nSx,nSy) |
43 |
|
44 |
o SUBROUTINE WRITE_FLD_XYZ_RS( pref, suff, fld, time, thid ) |
45 |
_RS fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nZ,nSx,nSy) |
46 |
|
47 |
o SUBROUTINE WRITE_FLD_XYZ_RL( pref, suff, fld, time, thid ) |
48 |
_RL fld(1-OLx:sNx+OLx,1-OLy,sNy+OLy,nZ,nSx,nSy) |
49 |
|
50 |
all routines |
51 |
CHARACTER*(*) pref |
52 |
CHARACTER*(*) suff |
53 |
INTEGER time |
54 |
INTEGER thid |
55 |
|
56 |
macros |
57 |
_RS -> REAL*4 or REAL*8 |
58 |
_RL -> REAL*8 |
59 |
|
60 |
pref - String used in prefix part of file name. |
61 |
Examples 'theta.' = temperature |
62 |
from 'uVel.' = zonal velocity |
63 |
MITgcmUV 'vVel.' = meridional velocity |
64 |
'salt.' = salinity |
65 |
suff - String used in suffix part of file name. |
66 |
Examples '0000000100' = iteration number |
67 |
from 'ckptA' = checkpoint file |
68 |
MITgcmUV |
69 |
fld - Two or three dimensional REAL*4 or REAL*8 srray. |
70 |
Examples theta = temperature field |
71 |
from cg2d_x = surface elevation field |
72 |
MITgcmUV |
73 |
time - Time level in the calling subroutine |
74 |
this - Thread id of the calling subroutine |
75 |
|
76 |
Dataset format |
77 |
-------------- |
78 |
Datasets are written using the standard Fortran 77 sequential binary |
79 |
file format. The Fortran IO statements in he model code do not specify any |
80 |
particular format, however, compile and run-time flags are used on some platforms. |
81 |
On DEC platforms by default the IO form is set to big-endian with a compile time |
82 |
flag. On CRAY platforms a runtime flag is normally used to select IEEE |
83 |
representation. The Fortran 77 sequential binary file format is |
84 |
4 byte header |
85 |
data |
86 |
4 byte terminator |
87 |
The header and terminator are unsigned integers which give the length |
88 |
of the data section in bytes. This is format is standard over all UNIX |
89 |
platforms. In Fortran this style of file is generated by code of the |
90 |
form |
91 |
|
92 |
REAL A(dim1, dim2, ..... ) |
93 |
OPEN(unitnumber,filename,FORM='FORMATTED') |
94 |
WRITE(unitnumber) A |
95 |
END |
96 |
|
97 |
The data is sequenced in the standard Fortran convention of the left-most |
98 |
index varying fastest. This convention holds for any dimension of datsets |
99 |
one-dimensional, two-dimensional, three-dimensional and four-dimensional or |
100 |
more datasets are all written this way. |
101 |
|
102 |
Multiprocess support |
103 |
-------------------- |
104 |
The format described above is used for multi-process simulations. In this |
105 |
case the data written to separate files with each process writing data that |
106 |
is local to it. To support this approach a file naming convention is used and a second |
107 |
file of "meta" information accompanines the data. The naming convention |
108 |
is used to avoid duplicate names and to make it easy to identify sets of |
109 |
files that together represent the total domain data. The meta file contains |
110 |
information about the extent of the sub-domain within each file. |
111 |
The naming convention used is |
112 |
PREF.SUFF.pPNUMBER.tTNUMBER.data |
113 |
PREF.SUFF.pPNUMBER.tTNUMBER.meta |
114 |
|
115 |
where |
116 |
PREF - Is a field identifying the data within the file. For |
117 |
temperature PREF is T, for zonal velocity PREF is U etc... |
118 |
SUFF - Is a field identifying the "instance" of the data within the |
119 |
file. The instance is typically the time level. In general |
120 |
the instance will be a model timestep number. |
121 |
PNUMBER - Is a process number used to identitfy which process of |
122 |
a multi-process run generated this data. The number ranges |
123 |
from 0 to (number of processors)-1. |
124 |
TNUMBER - Is a thread number used to identify which thread of a |
125 |
multi-threaded run generated this data. The number ranges |
126 |
from 0 to (number of threads)-1. |
127 |
|
128 |
the .data suffix identifies the file containing the actual data. |
129 |
the .meta suffix identifies the file containing textual information |
130 |
indicating the extent of the domain written to the .data file. |
131 |
|
132 |
.meta file Format |
133 |
----------------- |
134 |
This file contains a set of parameters that are specified using the |
135 |
generic parameter specification format used in GCMPACK software. This |
136 |
format consists of a sequence of assignments and comments |
137 |
Assignments have the form |
138 |
keyword =[ val-list ]; |
139 |
|
140 |
where |
141 |
keyword is a text string |
142 |
val-list is a sequence of one or more fields separated by commas |
143 |
|
144 |
Comments are preceeded by // or # characters or contained in |
145 |
/* */ pairs. |
146 |
The keywords contained in a .meta file are |
147 |
id - This is a numeric identifier. It can be used to |
148 |
verify consistency over a set of .meta files. |
149 |
nDims - This is a single integer indicating the dimensionality |
150 |
of the data in the .data file. |
151 |
dimList - This is a sequence of triplets. There is one triplet for |
152 |
each dimension and the triplets are ordered in the same |
153 |
way as the dimensions. Each triplet is made of three integers. |
154 |
The first integer gives the domain extent globally for |
155 |
the associated dimension. |
156 |
The second integer gives the low coordinate for the values |
157 |
within .data file for the associated dimension. |
158 |
The third integer gives the high coordinate for the values |
159 |
within .data file for the associated dimension. |
160 |
Thus for a .data file containing the north-west quadrant of |
161 |
a global domain of size 90 x 40 the .meta might read |
162 |
nDims = [ 2 ]; |
163 |
dimList = [ 90, 46, 90, 40, 1, 20]; |
164 |
For a global domain of size 90 x 40 x 33 the .meta file |
165 |
would read |
166 |
nDims = [ 3 ]; |
167 |
dimList = [ 90, 46, 90, 40, 1, 20, 33, 1, 33]; |
168 |
|
169 |
|
170 |
|
171 |
Example matlab program to join files |
172 |
------------------------------------ |
173 |
The following matlab script joins together a collection of files that |
174 |
were written in split form. The files to join are indicated by a user |
175 |
defined PREF.SUFF pair. e.g. T.0000002800. The script uses the UNIX |
176 |
ls command to find all files starting with T.0000002800 and then |
177 |
scans the .meta files to extract the dimensions. It then merges all |
178 |
the sections together to form a complete representation of the global |
179 |
dataset. |
180 |
>> function [AA] = rdmeta(fname,varargin) |
181 |
>> % |
182 |
>> % Read MITgcmUV Meta/Data files |
183 |
>> % |
184 |
>> % A = RDMETA(FNAME) reads data described by meta/data file format. |
185 |
>> % FNAME is a string containing the "head" of the file names. |
186 |
>> % |
187 |
>> % eg. To load the meta-data files |
188 |
>> % T.0000002880.p0000.t0000.meta, T.0000002880.p0000.t0000.data |
189 |
>> % T.0000002880.p0001.t0000.meta, T.0000002880.p0001.t0000.data |
190 |
>> % T.0000002880.p0002.t0000.meta, T.0000002880.p0002.t0000.data |
191 |
>> % T.0000002880.p0003.t0000.meta, T.0000002880.p0003.t0000.data |
192 |
>> % use |
193 |
>> % >> A=rdmeta('T.0000002880'); |
194 |
>> % |
195 |
>> % A = RDMETA(FNAME,MACHINEFORMAT) allows the machine format to be specified |
196 |
>> % which MACHINEFORMAT is on of the following strings: |
197 |
>> % |
198 |
>> % 'native' or 'n' - local machine format - the default |
199 |
>> % 'ieee-le' or 'l' - IEEE floating point with little-endian |
200 |
>> % byte ordering |
201 |
>> % 'ieee-be' or 'b' - IEEE floating point with big-endian |
202 |
>> % byte ordering |
203 |
>> % 'vaxd' or 'd' - VAX D floating point and VAX ordering |
204 |
>> % 'vaxg' or 'g' - VAX G floating point and VAX ordering |
205 |
>> % 'cray' or 'c' - Cray floating point with big-endian |
206 |
>> % byte ordering |
207 |
>> % 'ieee-le.l64' or 'a' - IEEE floating point with little-endian |
208 |
>> % byte ordering and 64 bit long data type |
209 |
>> % 'ieee-be.l64' or 's' - IEEE floating point with big-endian byte |
210 |
>> % ordering and 64 bit long data type. |
211 |
>> % |
212 |
>> |
213 |
>> % Default options |
214 |
>> ieee='n'; |
215 |
>> |
216 |
>> % Check optional arguments |
217 |
>> args=char(varargin); |
218 |
>> while (size(args,1) > 0) |
219 |
>> if deblank(args(1,:)) == 'n' | deblank(args(1,:)) == 'native' |
220 |
>> ieee='n'; |
221 |
>> elseif deblank(args(1,:)) == 'l' | deblank(args(1,:)) == 'ieee-le' |
222 |
>> ieee='l'; |
223 |
>> elseif deblank(args(1,:)) == 'b' | deblank(args(1,:)) == 'ieee-be' |
224 |
>> ieee='b'; |
225 |
>> elseif deblank(args(1,:)) == 'c' | deblank(args(1,:)) == 'cray' |
226 |
>> ieee='c'; |
227 |
>> elseif deblank(args(1,:)) == 'a' | deblank(args(1,:)) == 'ieee-le.l64' |
228 |
>> ieee='a'; |
229 |
>> elseif deblank(args(1,:)) == 's' | deblank(args(1,:)) == 'ieee-be.l64' |
230 |
>> ieee='s'; |
231 |
>> else |
232 |
>> sprintf(['Optional argument ' args(1,:) ' is unknown']) |
233 |
>> return |
234 |
>> end |
235 |
>> args=args(2:end,:); |
236 |
>> end |
237 |
>> |
238 |
>> % Match name of all meta-files |
239 |
>> eval(['ls ' fname '*.meta;']); |
240 |
>> allfiles=ans; |
241 |
>> |
242 |
>> % Beginning and end of strings |
243 |
>> Iend=findstr(allfiles,'.meta')+4; |
244 |
>> Ibeg=[1 Iend(1:end-1)+2]; |
245 |
>> |
246 |
>> % Loop through allfiles |
247 |
>> for j=1:prod(size(Ibeg)), |
248 |
>> |
249 |
>> % Read meta- and data-file |
250 |
>> [A,N] = localrdmeta(allfiles(Ibeg(j):Iend(j)),ieee); |
251 |
>> |
252 |
>> bdims=N(1,:); |
253 |
>> r0=N(2,:); |
254 |
>> rN=N(3,:); |
255 |
>> ndims=prod(size(bdims)); |
256 |
>> if (ndims == 1) |
257 |
>> AA(r0(1):rN(1))=A; |
258 |
>> elseif (ndims == 2) |
259 |
>> AA(r0(1):rN(1),r0(2):rN(2))=A; |
260 |
>> elseif (ndims == 3) |
261 |
>> AA(r0(1):rN(1),r0(2):rN(2),r0(3):rN(3))=A; |
262 |
>> elseif (ndims == 4) |
263 |
>> AA(r0(1):rN(1),r0(2):rN(2),r0(3):rN(3),r0(4):rN(4))=A; |
264 |
>> else |
265 |
>> sprintf('Dimension of data set is larger than currently coded. Sorry!') |
266 |
>> return |
267 |
>> end |
268 |
>> |
269 |
>> end |
270 |
>> |
271 |
>> %------------------------------------------------------------------------------- |
272 |
>> |
273 |
>> function [A,N] = localrdmeta(fname,ieee) |
274 |
>> |
275 |
>> mname=fname; |
276 |
>> dname=strrep(mname,'.meta','.data'); |
277 |
>> |
278 |
>> % Read and interpret Meta file |
279 |
>> fid = fopen(mname,'r'); |
280 |
>> if (fid == -1) |
281 |
>> sprintf(['Fila e' mname ' could not be opened']) |
282 |
>> return |
283 |
>> end |
284 |
>> |
285 |
>> % Scan each line of the Meta file |
286 |
>> allstr=' '; |
287 |
>> keepgoing = 1; |
288 |
>> while keepgoing > 0, |
289 |
>> line = fgetl(fid); |
290 |
>> if (line == -1) |
291 |
>> keepgoing=-1; |
292 |
>> else |
293 |
>> % Strip out "(PID.TID *.*)" by finding first ")" |
294 |
>> ind=findstr([line ')'],')'); line=line(ind(1)+1:end); |
295 |
>> % Remove comments of form // |
296 |
>> line=[line ' //']; ind=findstr(line,'//'); line=line(1:ind(1)-1); |
297 |
>> % Add to total string |
298 |
>> allstr=[allstr line]; |
299 |
>> end |
300 |
>> end |
301 |
>> |
302 |
>> % Close meta file |
303 |
>> fclose(fid); |
304 |
>> |
305 |
>> % Strip out comments of form /* ... */ |
306 |
>> ind1=findstr(allstr,'/*'); ind2=findstr(allstr,'*/'); |
307 |
>> if size(ind1) ~= size(ind2) |
308 |
>> sprintf('The /* ... */ comments are not properly paired') |
309 |
>> return |
310 |
>> end |
311 |
>> while size(ind1,2) > 0 |
312 |
>> allstr=[allstr(1:ind1(1)-1) allstr(ind2(1)+3:end)]; |
313 |
>> ind1=findstr(allstr,'/*'); ind2=findstr(allstr,'*/'); |
314 |
>> end |
315 |
>> |
316 |
>> eval(lower(allstr)); |
317 |
>> |
318 |
>> N=reshape( dimlist , 3 , prod(size(dimlist))/3 ); |
319 |
>> |
320 |
>> A=allstr; |
321 |
>> % Open data file |
322 |
>> fid=fopen(dname,'r',ieee); |
323 |
>> |
324 |
>> % Read record size in bytes |
325 |
>> recsz=fread(fid,1,'uint32'); |
326 |
>> ldims=N(3,:)-N(2,:)+1; |
327 |
>> numels=prod(ldims); |
328 |
>> |
329 |
>> rat=recsz/numels; |
330 |
>> if rat == 4 |
331 |
>> A=fread(fid,numels,'real*4'); |
332 |
>> elseif rat == 8 |
333 |
>> A=fread(fid,numels,'real*8'); |
334 |
>> else |
335 |
>> sprintf('Ratio between record size and size in meta-file inconsistent') |
336 |
>> sprintf(' Implied size in meta-file = %d', numels ) |
337 |
>> sprintf(' Record size in data-file = %d', recsz ) |
338 |
>> return |
339 |
>> end |
340 |
>> |
341 |
>> erecsz=fread(fid,1,'uint32'); |
342 |
>> if erecsz ~= recsz |
343 |
>> sprintf('WARNING: Record sizes at beginning and end of file are inconsistent') |
344 |
>> end |
345 |
>> |
346 |
>> fclose(fid); |
347 |
>> |
348 |
>> A=reshape(A,ldims); |
349 |
>> |