1 |
#!/usr/local/bin/perl -w |
2 |
|
3 |
# MITgcmUV dataset joining utility. |
4 |
# Tested with perl 4.0 and newer. |
5 |
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25} |
6 |
# Zhangfan XING, xing@pacific.jpl.nasa.gov |
7 |
# |
8 |
# LOGS: |
9 |
# 980707, version 0.0.1, basically works |
10 |
# 980721, version 0.2.0, proper handling of data file's header and terminator |
11 |
# for diff bytesex. |
12 |
|
13 |
#------ |
14 |
# usage |
15 |
#------ |
16 |
sub usage { |
17 |
print STDERR |
18 |
"\nUsage:$0 [-Ddir0 -Ddir1 ...] " . |
19 |
"prefix suffix [(little-endian|big-endian)]\n"; |
20 |
print STDERR "\nMITgcmUV dataset joining utility, version 0.2.0\n"; |
21 |
print STDERR |
22 |
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n"; |
23 |
print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n"; |
24 |
exit 1; |
25 |
} |
26 |
|
27 |
#------------------------------ |
28 |
# product of a list of integers |
29 |
#------------------------------ |
30 |
sub listprod { |
31 |
local ($product) = 1; |
32 |
local ($x); |
33 |
foreach $x (@_) { |
34 |
$product *= $x; |
35 |
} |
36 |
$product; |
37 |
} |
38 |
|
39 |
#---------------- |
40 |
# @list1 + @list2 |
41 |
#---------------- |
42 |
sub lists_add { |
43 |
local (*l1,*l2) = @_; |
44 |
($#l1 == $#l2) || return undef; |
45 |
|
46 |
local (@l); |
47 |
for (local($i)=0;$i<=$#l1;$i++) { |
48 |
$l[$i]=$l1[$i]+$l2[$i]; |
49 |
} |
50 |
@l; |
51 |
} |
52 |
|
53 |
#------------- |
54 |
# pos to index |
55 |
# 0-based. |
56 |
#------------- |
57 |
sub pos2index { |
58 |
|
59 |
local ($pos,@dim) = @_; |
60 |
local ($rightmost) = pop(@dim); |
61 |
|
62 |
local (@index,$d); |
63 |
foreach $d (@dim) { |
64 |
push(@index,$pos%$d); |
65 |
$pos = int($pos/$d); |
66 |
} |
67 |
|
68 |
# self-guarding |
69 |
unless ($rightmost > $pos) { |
70 |
return undef; |
71 |
} |
72 |
|
73 |
push(@index,$pos); |
74 |
@index; |
75 |
} |
76 |
|
77 |
#------------- |
78 |
# index to pos |
79 |
# 0-based. |
80 |
#------------- |
81 |
sub index2pos { |
82 |
local (*index,*dim) = @_; |
83 |
|
84 |
return undef unless ($#index == $#dim); |
85 |
|
86 |
local ($pos) = $index[$#index]; |
87 |
for (local($i)=$#dim;$i>0;$i--) { |
88 |
$pos = $pos * $dim[$i-1] + $index[$i-1]; |
89 |
} |
90 |
$pos; |
91 |
} |
92 |
|
93 |
#------------------------- |
94 |
# check machine's bytesex. |
95 |
# returns "little-endian" or "big-endian" |
96 |
# or dies if unable to figure out |
97 |
#------------------------- |
98 |
sub mach_bytesex { |
99 |
|
100 |
local ($foo) = pack("s2",1,2); |
101 |
if ($foo eq "\1\0\2\0") { |
102 |
return "little-endian"; |
103 |
} elsif ($foo eq "\0\1\0\2") { |
104 |
return "big-endian"; |
105 |
} else { |
106 |
die "Your machine has a strange bytesex.\n". |
107 |
"Email your platform info to xing\@pacific.jpl.nasa.gov\n"; |
108 |
} |
109 |
} |
110 |
|
111 |
#-------------------------------------------------- |
112 |
# check bytesex of a fortran unformatted data file |
113 |
# current machine's bytesex is used as a reference. |
114 |
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown" |
115 |
#-------------------------------------------------- |
116 |
sub file_bytesex { |
117 |
|
118 |
# only if this platform's bytesex is either big- or little-endian |
119 |
# otherwise dies. Hope this won't happen. |
120 |
local($mach_bytesex) = &mach_bytesex(); |
121 |
|
122 |
local ($file) = shift; |
123 |
local (*FILE); |
124 |
|
125 |
open(FILE,$file) || die "$file: $!\n"; |
126 |
|
127 |
local(@fstat) = stat(FILE); |
128 |
local ($size) = $fstat[7] - 8; # total data size in bytes |
129 |
|
130 |
local($hdr,$tmr) = ("",""); |
131 |
read(FILE,$hdr,4); |
132 |
seek(FILE,-4,2); |
133 |
read(FILE,$tmr,4); |
134 |
close(FILE); |
135 |
|
136 |
# this part checks for self-consistency of Fortran unformatted file |
137 |
($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n"; |
138 |
|
139 |
local ($ori) = unpack("I",$hdr); |
140 |
local ($rev) = unpack("I",join("",reverse(split(//,$hdr)))); |
141 |
|
142 |
($ori != $size && $rev != $size) && |
143 |
return "unknown"; |
144 |
|
145 |
($ori == $size && $rev == $size) && |
146 |
return "undecidable"; |
147 |
|
148 |
local ($opposite) = ($mach_bytesex eq "little-endian") ? |
149 |
"big-endian" : "little-endian"; |
150 |
|
151 |
return ($ori == $size) ? $mach_bytesex : $opposite; |
152 |
|
153 |
} |
154 |
|
155 |
#-------------------------------- |
156 |
# check meta info for one dataset |
157 |
#-------------------------------- |
158 |
|
159 |
sub check_meta { |
160 |
|
161 |
local ($ds,$dir) = @_; |
162 |
local ($fmeta) = "$dir/$ds.meta"; |
163 |
|
164 |
#~~~~~~~~~~~~~~~~ |
165 |
# check meta info |
166 |
#~~~~~~~~~~~~~~~~ |
167 |
|
168 |
undef $/; # read to the end of file |
169 |
open(MFILE,"<$fmeta") || die "$fmeta: $!\n"; |
170 |
$_=<MFILE>; |
171 |
close(MFILE); |
172 |
$/ = "\n"; # never mess up |
173 |
|
174 |
s/\([^)]*\)//g; #rm (.*) |
175 |
s/\/\/[^\n]*\n//g; #rm comment lines |
176 |
s/\/\*.*\*\///g; #rm inline comments |
177 |
s/\s+//g; #rm white spaces |
178 |
/id=\[(.+)\];nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];/ |
179 |
|| die "$fmeta: meta file format error\n"; |
180 |
local ($id_,$nDims_,$dimList_,$format_) = ($1,$2,$3,$4); |
181 |
|
182 |
# check Identifier |
183 |
(defined $id) || ($id = $id_); |
184 |
($id eq $id_) || |
185 |
die "$fmeta: id $id_ inconsistent with other dataset\n"; |
186 |
|
187 |
# check Number of dimensions |
188 |
(defined $nDims) || ($nDims = $nDims_); |
189 |
($nDims eq $nDims_) || |
190 |
die "$fmeta: nDims $nDims_ inconsistent with other dataset\n"; |
191 |
|
192 |
# check Field format |
193 |
(defined $format) || ($format = $format_); |
194 |
($format eq $format_) || |
195 |
die "$fmeta: format $format_ inconsistent with other dataset\n"; |
196 |
|
197 |
# check dimList |
198 |
# calc dimesions and leading index of this subset |
199 |
local (@dimList_) = split(/,/,$dimList_); |
200 |
|
201 |
($nDims_*3 == $#dimList_+1) || |
202 |
die "$fmeta: nDims and dimList conflicting\n"; |
203 |
|
204 |
local (@Dim,@dim,@Index0) = (); |
205 |
for (local($i)=0;$i<$nDims_;$i++) { |
206 |
push(@Dim,$dimList_[$i*3]); |
207 |
push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1); |
208 |
push(@Index0,$dimList_[$i*3+1]-1); |
209 |
} |
210 |
local ($Dim_) = join(",",@Dim); |
211 |
local ($dim_) = join(",",@dim); |
212 |
|
213 |
(defined $Dim) || ($Dim = $Dim_); |
214 |
($Dim eq $Dim_) || |
215 |
die "$fmeta: dimList Global inconsistent with other dataset\n"; |
216 |
|
217 |
(defined $dim) || ($dim = $dim_); |
218 |
($dim eq $dim_) || |
219 |
die "$fmeta: dimList Local inconsistent with other dataset\n"; |
220 |
|
221 |
$ds_Index0{$ds} = join(",", @Index0); |
222 |
|
223 |
# print STDOUT "Okay $fmeta\n"; |
224 |
} |
225 |
|
226 |
#------------------------------- |
227 |
# check completeness of datasets |
228 |
# need to be more sophisticated |
229 |
#------------------------------- |
230 |
sub check_entirety { |
231 |
|
232 |
local (*Dim,*dim,*ds_Index0) = @_; |
233 |
|
234 |
local ($N) = &listprod(@Dim); |
235 |
local ($n) = &listprod(@dim); |
236 |
($N) || return 0; # against null dimension |
237 |
($n) || return 0; # against null dimension |
238 |
($N%$n) && return 0; # $N/$n must be a whole number |
239 |
|
240 |
local (@ds) = keys %ds_Index0; |
241 |
($#ds+1 == $N/$n) || return 0; # Num of datasets must match subdomain |
242 |
|
243 |
1; |
244 |
} |
245 |
|
246 |
#------------------ |
247 |
# merge one dataset |
248 |
# assume @Dim, @dim and $bytes existing |
249 |
# assume $Byte_Reorder existing |
250 |
#------------------ |
251 |
sub merge_data { |
252 |
|
253 |
local ($ds,$dir,*Index0) = @_; |
254 |
local ($fdata) = "$dir/$ds.data"; |
255 |
|
256 |
# data size of one subset in bytes as told by meta info |
257 |
local ($size) = &listprod(@dim) * $bytes; |
258 |
|
259 |
open(DFILE, "<$fdata") || die "$fdata: $!\n"; |
260 |
|
261 |
local ($raw) = ""; |
262 |
sysread(DFILE,$raw,4); |
263 |
# Swap header if bytesex is diff from machine's |
264 |
local ($hdr); |
265 |
if ($Byte_Reorder) { |
266 |
$hdr = unpack("I",join("",reverse(split(//,$raw)))); |
267 |
} else { |
268 |
$hdr = unpack("I",$raw); |
269 |
} |
270 |
|
271 |
($size == $hdr) || |
272 |
die "$fdata: $hdr bytes inconsistent with meta info\n"; |
273 |
|
274 |
print STDOUT "$ds.data: $size bytes, okay, "; |
275 |
|
276 |
# seek(DFILE,4,0); # rewind back to the beginning of data |
277 |
|
278 |
local ($data) = ""; # old perl (< 4.0) needs this to |
279 |
sysread(DFILE,$data,$size); # avoid warning by sysread() |
280 |
local ($len_chunk) = $dim[0] * $bytes; |
281 |
local ($num_chunk) = $size/$len_chunk; |
282 |
|
283 |
local ($pos,@index,$Pos,@Index); |
284 |
for (local($i)=0;$i<$num_chunk;$i++) { |
285 |
$pos = $i * $dim[0]; |
286 |
@index = &pos2index($pos,@dim); |
287 |
@Index = &lists_add(*index,*Index0); |
288 |
$Pos = &index2pos(*Index,*Dim); |
289 |
seek(FILE,$Pos*$bytes+4,0); |
290 |
syswrite(FILE,$data,$len_chunk,$pos*$bytes); |
291 |
} |
292 |
|
293 |
close(DFILE); |
294 |
|
295 |
print STDOUT "merged from $dir\n"; |
296 |
} |
297 |
|
298 |
#============ |
299 |
# main script |
300 |
#============ |
301 |
|
302 |
#------------ |
303 |
# parse @ARGV |
304 |
#............ |
305 |
|
306 |
($#ARGV >= 1) || &usage(); |
307 |
|
308 |
undef @dirs; |
309 |
while (1) { |
310 |
$x = shift(@ARGV); |
311 |
unless ($x =~ /^-D(.+)$/) { |
312 |
unshift(@ARGV,$x); |
313 |
last; |
314 |
} |
315 |
push(@dirs,$1); |
316 |
} |
317 |
(@dirs) || push(@dirs,"."); |
318 |
# @dirs is not empty after this line. |
319 |
#print STDOUT join(" ",@dirs), "\n"; |
320 |
|
321 |
($#ARGV >= 1) || &usage(); |
322 |
|
323 |
# data set prefix and suffix |
324 |
$pref = shift(@ARGV); |
325 |
$suff = shift(@ARGV); |
326 |
|
327 |
($#ARGV >= 1) && &usage(); |
328 |
undef $forced_bytesex; |
329 |
if (@ARGV) { |
330 |
$forced_bytesex = shift(@ARGV); |
331 |
$forced_bytesex =~ /^(little|big)-endian$/ || &usage(); |
332 |
} |
333 |
#print STDOUT $forced_bytesex, "\n"; |
334 |
|
335 |
#-------------------------- |
336 |
# obtain a list of datasets |
337 |
#.......................... |
338 |
|
339 |
# %ds_dir is a hash to store the directory that a dataset is in. |
340 |
# After this step, it is assured that, for a dataset $ds, |
341 |
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}. |
342 |
|
343 |
%ds_dir = (); |
344 |
foreach $dir (@dirs) { |
345 |
opendir(DIR, $dir) || die "$dir: $!\n"; |
346 |
@fmeta = grep(/^$pref\.$suff\.p\d+\.t\d+\.meta$/, readdir(DIR)); |
347 |
closedir(DIR); |
348 |
foreach $fmeta (@fmeta) { |
349 |
$ds = $fmeta; $ds =~ s/\.meta$//g; |
350 |
(defined $ds_dir{$ds}) && |
351 |
die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n"; |
352 |
(-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n"; |
353 |
$ds_dir{$ds} = $dir; |
354 |
} |
355 |
} |
356 |
|
357 |
@ds = sort(keys %ds_dir); # list of datasets |
358 |
(@ds) || die "No dataset found.\n"; |
359 |
print STDOUT "There are ", $#ds+1, " datasets.\n"; |
360 |
|
361 |
#--------------------------------- |
362 |
# check meta info for all datasets |
363 |
#................................. |
364 |
|
365 |
undef $id; |
366 |
undef $nDims; |
367 |
undef $format; |
368 |
|
369 |
undef $Dim; |
370 |
undef $dim; |
371 |
undef %ds_Index0; |
372 |
|
373 |
#.............................................. |
374 |
# check each meta file and set some global vars |
375 |
|
376 |
foreach $ds (@ds) { |
377 |
&check_meta($ds,$ds_dir{$ds}); |
378 |
} |
379 |
print STDOUT "All existing meta files are self- and mutually consistent.\n"; |
380 |
|
381 |
#print join(" ",$id,$nDims,$format,$Dim,$dim), "\n"; |
382 |
#foreach $ds (@ds) { |
383 |
# $dir = $ds_dir{$ds}; |
384 |
# $Index0 = $ds_Index0{$ds}; |
385 |
# print "$ds\n"; |
386 |
# print "$Index0\n"; |
387 |
#} |
388 |
|
389 |
@Dim = split(/,/,$Dim); |
390 |
@dim = split(/,/,$dim); |
391 |
|
392 |
#................................ |
393 |
# check meta info in its entirety |
394 |
|
395 |
&check_entirety(*Dim,*dim,*ds_Index0) || |
396 |
die "Datasets are not complete!\n"; |
397 |
|
398 |
print STDOUT "Datasets are complete.\n"; |
399 |
|
400 |
#........... |
401 |
# set $bytes |
402 |
|
403 |
if ($format eq "float32") { |
404 |
$bytes = 4; |
405 |
} elsif ($format eq "float64") { |
406 |
$bytes = 8 |
407 |
} else { |
408 |
die "format '$format' unknown\n"; |
409 |
} |
410 |
|
411 |
#--------------------------- |
412 |
# check and merge data files |
413 |
#........................... |
414 |
|
415 |
#........................ |
416 |
# check machine's bytesex |
417 |
# it dies if neither little- nor big-endian. |
418 |
|
419 |
$Mach_Bytesex = &mach_bytesex(); |
420 |
print STDOUT "Current machine's endianness: $Mach_Bytesex\n"; |
421 |
|
422 |
#................... |
423 |
# check file bytesex and resolve realted issues |
424 |
undef $File_Bytesex; |
425 |
foreach $ds (@ds) { |
426 |
$fdata = "$ds.data"; |
427 |
$file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata"); |
428 |
($file_bytesex eq "unknown") && |
429 |
die "$fdata: endianness is neither little- nor big-endian.\n"; |
430 |
print STDOUT "$fdata: $file_bytesex\n"; |
431 |
unless ($File_Bytesex) { |
432 |
$File_Bytesex = $file_bytesex; |
433 |
} else { |
434 |
($File_Bytesex eq $file_bytesex) || |
435 |
die "Data files are mutually inconsistent in endianness\n"; |
436 |
} |
437 |
} |
438 |
|
439 |
#------------------ |
440 |
# set $Byte_Reorder, which controls swapping of bytes in |
441 |
# header and terminator of Fortran unformatted data files. |
442 |
$Byte_Reorder = 1; |
443 |
|
444 |
# if machine and data file have the same bytesex, no need for swapping |
445 |
($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
446 |
|
447 |
# if we can't determine bytesex of data file, need forced one from @ARGV. |
448 |
if ($File_Bytesex eq "undecidable") { |
449 |
# if no forced bytesex available, dies. |
450 |
($forced_bytesex) || |
451 |
die "Endianness of data files is undecidable, " . |
452 |
"you have to give one at command line.\n"; |
453 |
($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
454 |
print STDOUT "Endianness of data files is undecidable.\n"; |
455 |
print STDOUT "Data file header/tail will be treated as "; |
456 |
print STDOUT "$forced_bytesex as you have instructed.\n"; |
457 |
# otherwise |
458 |
} else { |
459 |
# give a warining, if swapping is needed. |
460 |
($Byte_Reorder) && |
461 |
print STDOUT |
462 |
"Please note: data files have different bytesex than machine!\n"; |
463 |
} |
464 |
|
465 |
#................ |
466 |
# merge data sets |
467 |
|
468 |
$Size = &listprod(@Dim) * $bytes; |
469 |
|
470 |
$fout = "$pref.$suff.data"; |
471 |
|
472 |
open(FILE, ">$fout") || die "$fout: $!\n"; |
473 |
|
474 |
# prepare header and teminator. Do byte reordering if necessary |
475 |
$HdrTmr = pack("I",$Size); |
476 |
($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr)))); |
477 |
|
478 |
# write 4 byte header |
479 |
syswrite(FILE,$HdrTmr,4); |
480 |
|
481 |
# merge each dataset |
482 |
foreach $ds (@ds) { |
483 |
$dir = $ds_dir{$ds}; |
484 |
@Index0 = split(/,/,$ds_Index0{$ds}); |
485 |
&merge_data($ds,$dir,*Index0); |
486 |
} |
487 |
|
488 |
# write 4 byte terminator |
489 |
seek(FILE,$Size+4,0); |
490 |
syswrite(FILE,$HdrTmr,4); |
491 |
|
492 |
close(FILE); |
493 |
|
494 |
print STDOUT "Global data (" . |
495 |
join("x",@Dim) . |
496 |
") is in ./$fout (endianness is $File_Bytesex).\n"; |
497 |
|
498 |
exit 0; |