1 |
#!/usr/local/bin/perl -w |
2 |
|
3 |
# MITgcmUV DataSet joining utility. |
4 |
# Tested with perl 4.0 and newer. |
5 |
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25} |
6 |
# Zhangfan XING, xing@pacific.jpl.nasa.gov |
7 |
# Adapted to work with MDS I/O format by adcroft@mit.edu, 5/7/1999 |
8 |
# |
9 |
# LOGS: |
10 |
# 980707, version 0.0.1, basically works |
11 |
# 980721, version 0.2.0, proper handling of data file's header and terminator |
12 |
# for diff bytesex. |
13 |
# 990507, HACK'd by AJA. Needs to be properly merged with the original joinds |
14 |
|
15 |
#------ |
16 |
# usage |
17 |
#------ |
18 |
sub usage { |
19 |
print STDERR |
20 |
"\nUsage:$0 [-Ddir0 -Ddir1 ...] " . |
21 |
"prefix suffix [(little-endian|big-endian)]\n"; |
22 |
print STDERR "\nMITgcmUV DataSet joining utility, version 0.3.0\n"; |
23 |
print STDERR |
24 |
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n"; |
25 |
print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n"; |
26 |
exit 1; |
27 |
} |
28 |
|
29 |
#------------------------------ |
30 |
# product of a list of integers |
31 |
#------------------------------ |
32 |
sub listprod { |
33 |
local ($product) = 1; |
34 |
local ($x); |
35 |
foreach $x (@_) { |
36 |
$product *= $x; |
37 |
} |
38 |
$product; |
39 |
} |
40 |
|
41 |
#---------------- |
42 |
# @list1 + @list2 |
43 |
#---------------- |
44 |
sub lists_add { |
45 |
local (*l1,*l2) = @_; |
46 |
($#l1 == $#l2) || return undef; |
47 |
|
48 |
local (@l); |
49 |
for (local($i)=0;$i<=$#l1;$i++) { |
50 |
$l[$i]=$l1[$i]+$l2[$i]; |
51 |
} |
52 |
@l; |
53 |
} |
54 |
|
55 |
#------------- |
56 |
# pos to index |
57 |
# 0-based. |
58 |
#------------- |
59 |
sub pos2index { |
60 |
|
61 |
local ($pos,@dim) = @_; |
62 |
local ($rightmost) = pop(@dim); |
63 |
|
64 |
local (@index,$d); |
65 |
foreach $d (@dim) { |
66 |
push(@index,$pos%$d); |
67 |
$pos = int($pos/$d); |
68 |
} |
69 |
|
70 |
# self-guarding |
71 |
unless ($rightmost > $pos) { |
72 |
return undef; |
73 |
} |
74 |
|
75 |
push(@index,$pos); |
76 |
@index; |
77 |
} |
78 |
|
79 |
#------------- |
80 |
# index to pos |
81 |
# 0-based. |
82 |
#------------- |
83 |
sub index2pos { |
84 |
local (*index,*dim) = @_; |
85 |
|
86 |
return undef unless ($#index == $#dim); |
87 |
|
88 |
local ($pos) = $index[$#index]; |
89 |
for (local($i)=$#dim;$i>0;$i--) { |
90 |
$pos = $pos * $dim[$i-1] + $index[$i-1]; |
91 |
} |
92 |
$pos; |
93 |
} |
94 |
|
95 |
#------------------------- |
96 |
# check machine's bytesex. |
97 |
# returns "little-endian" or "big-endian" |
98 |
# or dies if unable to figure out |
99 |
#------------------------- |
100 |
sub mach_bytesex { |
101 |
|
102 |
local ($foo) = pack("s2",1,2); |
103 |
if ($foo eq "\1\0\2\0") { |
104 |
return "little-endian"; |
105 |
} elsif ($foo eq "\0\1\0\2") { |
106 |
return "big-endian"; |
107 |
} else { |
108 |
die "Your machine has a strange bytesex.\n". |
109 |
"Email your platform info to xing\@pacific.jpl.nasa.gov\n"; |
110 |
} |
111 |
} |
112 |
|
113 |
#-------------------------------------------------- |
114 |
# check bytesex of a fortran unformatted data file |
115 |
# current machine's bytesex is used as a reference. |
116 |
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown" |
117 |
#-------------------------------------------------- |
118 |
sub file_bytesex { |
119 |
|
120 |
# only if this platform's bytesex is either big- or little-endian |
121 |
# otherwise dies. Hope this won't happen. |
122 |
local($mach_bytesex) = &mach_bytesex(); |
123 |
|
124 |
local ($file) = shift; |
125 |
local (*FILE); |
126 |
|
127 |
open(FILE,$file) || die "$file: $!\n"; |
128 |
|
129 |
local(@fstat) = stat(FILE); |
130 |
local ($size) = $fstat[7] - 8; # total data size in bytes |
131 |
|
132 |
local($hdr,$tmr) = ("",""); |
133 |
read(FILE,$hdr,4); |
134 |
seek(FILE,-4,2); |
135 |
read(FILE,$tmr,4); |
136 |
close(FILE); |
137 |
|
138 |
# this part checks for self-consistency of Fortran unformatted file |
139 |
($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n"; |
140 |
|
141 |
local ($ori) = unpack("I",$hdr); |
142 |
local ($rev) = unpack("I",join("",reverse(split(//,$hdr)))); |
143 |
|
144 |
($ori != $size && $rev != $size) && |
145 |
return "unknown"; |
146 |
|
147 |
($ori == $size && $rev == $size) && |
148 |
return "undecidable"; |
149 |
|
150 |
local ($opposite) = ($mach_bytesex eq "little-endian") ? |
151 |
"big-endian" : "little-endian"; |
152 |
|
153 |
return ($ori == $size) ? $mach_bytesex : $opposite; |
154 |
|
155 |
} |
156 |
|
157 |
#-------------------------------- |
158 |
# check meta info for one dataset |
159 |
#-------------------------------- |
160 |
|
161 |
sub check_meta { |
162 |
|
163 |
local ($ds,$dir) = @_; |
164 |
local ($fmeta) = "$dir/$ds.meta"; |
165 |
|
166 |
#~~~~~~~~~~~~~~~~ |
167 |
# check meta info |
168 |
#~~~~~~~~~~~~~~~~ |
169 |
|
170 |
undef $/; # read to the end of file |
171 |
open(MFILE,"<$fmeta") || die "$fmeta: $!\n"; |
172 |
$_=<MFILE>; |
173 |
close(MFILE); |
174 |
$/ = "\n"; # never mess up |
175 |
|
176 |
s/\([^)]*\)//g; #rm (.*) |
177 |
s/\/\/[^\n]*\n//g; #rm comment lines |
178 |
s/\/\*.*\*\///g; #rm inline comments |
179 |
s/\s+//g; #rm white spaces |
180 |
/nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];nrecords=\[(.+)\];timeStepNumber=\[(.+)\];/ |
181 |
|| die "$fmeta: meta file format error\n"; |
182 |
local ($nDims_,$dimList_,$format_,$nrecords_,$timeStepNumber_) = ($1,$2,$3,$4,$5); |
183 |
|
184 |
# check Identifier |
185 |
(defined $timeStepNumber) || ($timeStepNumber = $timeStepNumber_); |
186 |
($timeStepNumber eq $timeStepNumber_) || |
187 |
die "$fmeta: timeStepNumber $timeStepNumber_ inconsistent with other dataset\n"; |
188 |
|
189 |
# check Number of dimensions |
190 |
(defined $nDims) || ($nDims = $nDims_); |
191 |
($nDims eq $nDims_) || |
192 |
die "$fmeta: nDims $nDims_ inconsistent with other dataset\n"; |
193 |
|
194 |
# check Field format |
195 |
(defined $format) || ($format = $format_); |
196 |
($format eq $format_) || |
197 |
die "$fmeta: format $format_ inconsistent with other dataset\n"; |
198 |
|
199 |
# check dimList |
200 |
# calc dimesions and leading index of this subset |
201 |
local (@dimList_) = split(/,/,$dimList_); |
202 |
|
203 |
($nDims_*3 == $#dimList_+1) || |
204 |
die "$fmeta: nDims and dimList conflicting\n"; |
205 |
|
206 |
local (@Dim,@dim,@Index0) = (); |
207 |
for (local($i)=0;$i<$nDims_;$i++) { |
208 |
push(@Dim,$dimList_[$i*3]); |
209 |
push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1); |
210 |
push(@Index0,$dimList_[$i*3+1]-1); |
211 |
} |
212 |
local ($Dim_) = join(",",@Dim); |
213 |
local ($dim_) = join(",",@dim); |
214 |
|
215 |
(defined $Dim) || ($Dim = $Dim_); |
216 |
($Dim eq $Dim_) || |
217 |
die "$fmeta: dimList Global inconsistent with other dataset\n"; |
218 |
|
219 |
(defined $dim) || ($dim = $dim_); |
220 |
($dim eq $dim_) || |
221 |
die "$fmeta: dimList Local inconsistent with other dataset\n"; |
222 |
|
223 |
$ds_Index0{$ds} = join(",", @Index0); |
224 |
|
225 |
# print STDOUT "Okay $fmeta\n"; |
226 |
} |
227 |
|
228 |
#------------------------------- |
229 |
# check completeness of datasets |
230 |
# need to be more sophisticated |
231 |
#------------------------------- |
232 |
sub check_entirety { |
233 |
|
234 |
local (*Dim,*dim,*ds_Index0) = @_; |
235 |
|
236 |
local ($N) = &listprod(@Dim); |
237 |
local ($n) = &listprod(@dim); |
238 |
($N) || return 0; # against null dimension |
239 |
($n) || return 0; # against null dimension |
240 |
($N%$n) && return 0; # $N/$n must be a whole number |
241 |
|
242 |
local (@ds) = keys %ds_Index0; |
243 |
($#ds+1 == $N/$n) || return 0; # Num of datasets must match subdomain |
244 |
|
245 |
1; |
246 |
} |
247 |
|
248 |
#------------------ |
249 |
# merge one dataset |
250 |
# assume @Dim, @dim and $bytes existing |
251 |
# assume $Byte_Reorder existing |
252 |
#------------------ |
253 |
sub merge_data { |
254 |
|
255 |
local ($ds,$dir,*Index0) = @_; |
256 |
local ($fdata) = "$dir/$ds.data"; |
257 |
|
258 |
# data size of one subset in bytes as told by meta info |
259 |
local ($size) = &listprod(@dim) * $bytes; |
260 |
|
261 |
open(DFILE, "<$fdata") || die "$fdata: $!\n"; |
262 |
|
263 |
local ($raw) = ""; |
264 |
#aja sysread(DFILE,$raw,4); |
265 |
# Swap header if bytesex is diff from machine's |
266 |
local ($hdr); |
267 |
if ($Byte_Reorder) { |
268 |
$hdr = unpack("I",join("",reverse(split(//,$raw)))); |
269 |
} else { |
270 |
$hdr = unpack("I",$raw); |
271 |
} |
272 |
|
273 |
#aja ($size == $hdr) || |
274 |
#aja die "$fdata: $hdr bytes inconsistent with meta info\n"; |
275 |
|
276 |
print STDOUT "$ds.data: $size bytes, okay, "; |
277 |
|
278 |
# seek(DFILE,4,0); # rewind back to the beginning of data |
279 |
|
280 |
local ($data) = ""; # old perl (< 4.0) needs this to |
281 |
sysread(DFILE,$data,$size); # avoid warning by sysread() |
282 |
local ($len_chunk) = $dim[0] * $bytes; |
283 |
local ($num_chunk) = $size/$len_chunk; |
284 |
|
285 |
local ($pos,@index,$Pos,@Index); |
286 |
for (local($i)=0;$i<$num_chunk;$i++) { |
287 |
$pos = $i * $dim[0]; |
288 |
@index = &pos2index($pos,@dim); |
289 |
@Index = &lists_add(*index,*Index0); |
290 |
$Pos = &index2pos(*Index,*Dim); |
291 |
#aja seek(FILE,$Pos*$bytes+4,0); |
292 |
seek(FILE,$Pos*$bytes,0); |
293 |
syswrite(FILE,$data,$len_chunk,$pos*$bytes); |
294 |
} |
295 |
|
296 |
close(DFILE); |
297 |
|
298 |
print STDOUT "merged from $dir\n"; |
299 |
} |
300 |
|
301 |
#============ |
302 |
# main script |
303 |
#============ |
304 |
|
305 |
#------------ |
306 |
# parse @ARGV |
307 |
#............ |
308 |
|
309 |
($#ARGV >= 1) || &usage(); |
310 |
|
311 |
undef @dirs; |
312 |
while (1) { |
313 |
$x = shift(@ARGV); |
314 |
unless ($x =~ /^-D(.+)$/) { |
315 |
unshift(@ARGV,$x); |
316 |
last; |
317 |
} |
318 |
push(@dirs,$1); |
319 |
} |
320 |
(@dirs) || push(@dirs,"."); |
321 |
# @dirs is not empty after this line. |
322 |
#print STDOUT join(" ",@dirs), "\n"; |
323 |
|
324 |
($#ARGV >= 1) || &usage(); |
325 |
|
326 |
# data set prefix and suffix |
327 |
$pref = shift(@ARGV); |
328 |
$suff = shift(@ARGV); |
329 |
|
330 |
($#ARGV >= 1) && &usage(); |
331 |
undef $forced_bytesex; |
332 |
if (@ARGV) { |
333 |
$forced_bytesex = shift(@ARGV); |
334 |
$forced_bytesex =~ /^(little|big)-endian$/ || &usage(); |
335 |
} |
336 |
#print STDOUT $forced_bytesex, "\n"; |
337 |
|
338 |
#-------------------------- |
339 |
# obtain a list of datasets |
340 |
#.......................... |
341 |
|
342 |
# %ds_dir is a hash to store the directory that a dataset is in. |
343 |
# After this step, it is assured that, for a dataset $ds, |
344 |
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}. |
345 |
|
346 |
%ds_dir = (); |
347 |
foreach $dir (@dirs) { |
348 |
opendir(DIR, $dir) || die "$dir: $!\n"; |
349 |
@fmeta = grep(/^$pref\.$suff\.\d+\.\d+\.meta$/, readdir(DIR)); |
350 |
closedir(DIR); |
351 |
foreach $fmeta (@fmeta) { |
352 |
$ds = $fmeta; $ds =~ s/\.meta$//g; |
353 |
(defined $ds_dir{$ds}) && |
354 |
die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n"; |
355 |
(-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n"; |
356 |
$ds_dir{$ds} = $dir; |
357 |
} |
358 |
} |
359 |
|
360 |
@ds = sort(keys %ds_dir); # list of datasets |
361 |
(@ds) || die "No dataset found.\n"; |
362 |
print STDOUT "There are ", $#ds+1, " datasets.\n"; |
363 |
|
364 |
#--------------------------------- |
365 |
# check meta info for all datasets |
366 |
#................................. |
367 |
|
368 |
undef $timeStepNumber; |
369 |
undef $nDims; |
370 |
undef $format; |
371 |
|
372 |
undef $Dim; |
373 |
undef $dim; |
374 |
undef %ds_Index0; |
375 |
|
376 |
#.............................................. |
377 |
# check each meta file and set some global vars |
378 |
|
379 |
foreach $ds (@ds) { |
380 |
&check_meta($ds,$ds_dir{$ds}); |
381 |
} |
382 |
print STDOUT "All existing meta files are self- and mutually consistent.\n"; |
383 |
|
384 |
#print join(" ",$timeStepNumber,$nDims,$format,$Dim,$dim), "\n"; |
385 |
#foreach $ds (@ds) { |
386 |
# $dir = $ds_dir{$ds}; |
387 |
# $Index0 = $ds_Index0{$ds}; |
388 |
# print "$ds\n"; |
389 |
# print "$Index0\n"; |
390 |
#} |
391 |
|
392 |
@Dim = split(/,/,$Dim); |
393 |
@dim = split(/,/,$dim); |
394 |
|
395 |
#................................ |
396 |
# check meta info in its entirety |
397 |
|
398 |
&check_entirety(*Dim,*dim,*ds_Index0) || |
399 |
die "Datasets are not complete!\n"; |
400 |
|
401 |
print STDOUT "Datasets are complete.\n"; |
402 |
|
403 |
#........... |
404 |
# set $bytes |
405 |
|
406 |
if ($format eq "float32") { |
407 |
$bytes = 4; |
408 |
} elsif ($format eq "float64") { |
409 |
$bytes = 8 |
410 |
} else { |
411 |
die "format '$format' unknown\n"; |
412 |
} |
413 |
|
414 |
#--------------------------- |
415 |
# check and merge data files |
416 |
#........................... |
417 |
|
418 |
#........................ |
419 |
# check machine's bytesex |
420 |
# it dies if neither little- nor big-endian. |
421 |
|
422 |
$Mach_Bytesex = &mach_bytesex(); |
423 |
print STDOUT "Current machine's endianness: $Mach_Bytesex\n"; |
424 |
|
425 |
#................... |
426 |
# check file bytesex and resolve related issues |
427 |
#aja undef $File_Bytesex; |
428 |
#aja foreach $ds (@ds) { |
429 |
#aja $fdata = "$ds.data"; |
430 |
#aja $file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata"); |
431 |
#aja ($file_bytesex eq "unknown") && |
432 |
#aja die "$fdata: endianness is neither little- nor big-endian.\n"; |
433 |
#aja print STDOUT "$fdata: $file_bytesex\n"; |
434 |
#aja unless ($File_Bytesex) { |
435 |
#aja $File_Bytesex = $file_bytesex; |
436 |
#aja } else { |
437 |
#aja ($File_Bytesex eq $file_bytesex) || |
438 |
#aja die "Data files are mutually inconsistent in endianness\n"; |
439 |
#aja } |
440 |
#aja } |
441 |
$File_Bytesex = 'big-endian'; |
442 |
|
443 |
#------------------ |
444 |
# set $Byte_Reorder, which controls swapping of bytes in |
445 |
# header and terminator of Fortran unformatted data files. |
446 |
#aja $Byte_Reorder = 0; |
447 |
$Byte_Reorder = 0; |
448 |
|
449 |
# if machine and data file have the same bytesex, no need for swapping |
450 |
#aja ($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
451 |
|
452 |
# if we can't determine bytesex of data file, need forced one from @ARGV. |
453 |
if ($File_Bytesex eq "undecidable") { |
454 |
# if no forced bytesex available, dies. |
455 |
($forced_bytesex) || |
456 |
die "Endianness of data files is undecidable, " . |
457 |
"you have to give one at command line.\n"; |
458 |
($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
459 |
print STDOUT "Endianness of data files is undecidable.\n"; |
460 |
print STDOUT "Data file header/tail will be treated as "; |
461 |
print STDOUT "$forced_bytesex as you have instructed.\n"; |
462 |
# otherwise |
463 |
} else { |
464 |
# give a warining, if swapping is needed. |
465 |
($Byte_Reorder) && |
466 |
print STDOUT |
467 |
"Please note: data files have different bytesex than machine!\n"; |
468 |
} |
469 |
|
470 |
#................ |
471 |
# merge data sets |
472 |
|
473 |
$Size = &listprod(@Dim) * $bytes; |
474 |
|
475 |
$fout = "$pref.$suff.data"; |
476 |
|
477 |
open(FILE, ">$fout") || die "$fout: $!\n"; |
478 |
|
479 |
# prepare header and teminator. Do byte reordering if necessary |
480 |
$HdrTmr = pack("I",$Size); |
481 |
($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr)))); |
482 |
|
483 |
# write 4 byte header |
484 |
#aja syswrite(FILE,$HdrTmr,4); |
485 |
|
486 |
# merge each dataset |
487 |
foreach $ds (@ds) { |
488 |
$dir = $ds_dir{$ds}; |
489 |
@Index0 = split(/,/,$ds_Index0{$ds}); |
490 |
&merge_data($ds,$dir,*Index0); |
491 |
} |
492 |
|
493 |
# write 4 byte terminator |
494 |
#aja seek(FILE,$Size+4,0); |
495 |
#aja syswrite(FILE,$HdrTmr,4); |
496 |
|
497 |
close(FILE); |
498 |
|
499 |
print STDOUT "Global data (" . |
500 |
join("x",@Dim) . |
501 |
") is in ./$fout (endianness is $File_Bytesex).\n"; |
502 |
|
503 |
exit 0; |