| 1 |
#!/usr/local/bin/perl -w |
| 2 |
|
| 3 |
# MITgcmUV dataset joining utility. |
| 4 |
# Tested with perl 4.0 and newer. |
| 5 |
# Tested on Linux 2.0.27/I486, Irix 6.2/{IP22,IP25} |
| 6 |
# Zhangfan XING, xing@pacific.jpl.nasa.gov |
| 7 |
# |
| 8 |
# LOGS: |
| 9 |
# 980707, version 0.0.1, basically works |
| 10 |
# 980721, version 0.2.0, proper handling of data file's header and terminator |
| 11 |
# for diff bytesex. |
| 12 |
|
| 13 |
#------ |
| 14 |
# usage |
| 15 |
#------ |
| 16 |
sub usage { |
| 17 |
print STDERR |
| 18 |
"\nUsage:$0 [-Ddir0 -Ddir1 ...] " . |
| 19 |
"prefix suffix [(little-endian|big-endian)]\n"; |
| 20 |
print STDERR "\nMITgcmUV dataset joining utility, version 0.2.0\n"; |
| 21 |
print STDERR |
| 22 |
"Check http://escher.jpl.nasa.gov:2000/tools/ for newer version.\n"; |
| 23 |
print STDERR "Report problem to xing\@pacific.jpl.nasa.gov\n\n"; |
| 24 |
exit 1; |
| 25 |
} |
| 26 |
|
| 27 |
#------------------------------ |
| 28 |
# product of a list of integers |
| 29 |
#------------------------------ |
| 30 |
sub listprod { |
| 31 |
local ($product) = 1; |
| 32 |
local ($x); |
| 33 |
foreach $x (@_) { |
| 34 |
$product *= $x; |
| 35 |
} |
| 36 |
$product; |
| 37 |
} |
| 38 |
|
| 39 |
#---------------- |
| 40 |
# @list1 + @list2 |
| 41 |
#---------------- |
| 42 |
sub lists_add { |
| 43 |
local (*l1,*l2) = @_; |
| 44 |
($#l1 == $#l2) || return undef; |
| 45 |
|
| 46 |
local (@l); |
| 47 |
for (local($i)=0;$i<=$#l1;$i++) { |
| 48 |
$l[$i]=$l1[$i]+$l2[$i]; |
| 49 |
} |
| 50 |
@l; |
| 51 |
} |
| 52 |
|
| 53 |
#------------- |
| 54 |
# pos to index |
| 55 |
# 0-based. |
| 56 |
#------------- |
| 57 |
sub pos2index { |
| 58 |
|
| 59 |
local ($pos,@dim) = @_; |
| 60 |
local ($rightmost) = pop(@dim); |
| 61 |
|
| 62 |
local (@index,$d); |
| 63 |
foreach $d (@dim) { |
| 64 |
push(@index,$pos%$d); |
| 65 |
$pos = int($pos/$d); |
| 66 |
} |
| 67 |
|
| 68 |
# self-guarding |
| 69 |
unless ($rightmost > $pos) { |
| 70 |
return undef; |
| 71 |
} |
| 72 |
|
| 73 |
push(@index,$pos); |
| 74 |
@index; |
| 75 |
} |
| 76 |
|
| 77 |
#------------- |
| 78 |
# index to pos |
| 79 |
# 0-based. |
| 80 |
#------------- |
| 81 |
sub index2pos { |
| 82 |
local (*index,*dim) = @_; |
| 83 |
|
| 84 |
return undef unless ($#index == $#dim); |
| 85 |
|
| 86 |
local ($pos) = $index[$#index]; |
| 87 |
for (local($i)=$#dim;$i>0;$i--) { |
| 88 |
$pos = $pos * $dim[$i-1] + $index[$i-1]; |
| 89 |
} |
| 90 |
$pos; |
| 91 |
} |
| 92 |
|
| 93 |
#------------------------- |
| 94 |
# check machine's bytesex. |
| 95 |
# returns "little-endian" or "big-endian" |
| 96 |
# or dies if unable to figure out |
| 97 |
#------------------------- |
| 98 |
sub mach_bytesex { |
| 99 |
|
| 100 |
local ($foo) = pack("s2",1,2); |
| 101 |
if ($foo eq "\1\0\2\0") { |
| 102 |
return "little-endian"; |
| 103 |
} elsif ($foo eq "\0\1\0\2") { |
| 104 |
return "big-endian"; |
| 105 |
} else { |
| 106 |
die "Your machine has a strange bytesex.\n". |
| 107 |
"Email your platform info to xing\@pacific.jpl.nasa.gov\n"; |
| 108 |
} |
| 109 |
} |
| 110 |
|
| 111 |
#-------------------------------------------------- |
| 112 |
# check bytesex of a fortran unformatted data file |
| 113 |
# current machine's bytesex is used as a reference. |
| 114 |
# returns: one of "little-endian", "big-endian", "undecidable" and "unknown" |
| 115 |
#-------------------------------------------------- |
| 116 |
sub file_bytesex { |
| 117 |
|
| 118 |
# only if this platform's bytesex is either big- or little-endian |
| 119 |
# otherwise dies. Hope this won't happen. |
| 120 |
local($mach_bytesex) = &mach_bytesex(); |
| 121 |
|
| 122 |
local ($file) = shift; |
| 123 |
local (*FILE); |
| 124 |
|
| 125 |
open(FILE,$file) || die "$file: $!\n"; |
| 126 |
|
| 127 |
local(@fstat) = stat(FILE); |
| 128 |
local ($size) = $fstat[7] - 8; # total data size in bytes |
| 129 |
|
| 130 |
local($hdr,$tmr) = ("",""); |
| 131 |
read(FILE,$hdr,4); |
| 132 |
seek(FILE,-4,2); |
| 133 |
read(FILE,$tmr,4); |
| 134 |
close(FILE); |
| 135 |
|
| 136 |
# this part checks for self-consistency of Fortran unformatted file |
| 137 |
($hdr eq $tmr) || die "$file: not a Fortran unformatted data file.\n"; |
| 138 |
|
| 139 |
local ($ori) = unpack("I",$hdr); |
| 140 |
local ($rev) = unpack("I",join("",reverse(split(//,$hdr)))); |
| 141 |
|
| 142 |
($ori != $size && $rev != $size) && |
| 143 |
return "unknown"; |
| 144 |
|
| 145 |
($ori == $size && $rev == $size) && |
| 146 |
return "undecidable"; |
| 147 |
|
| 148 |
local ($opposite) = ($mach_bytesex eq "little-endian") ? |
| 149 |
"big-endian" : "little-endian"; |
| 150 |
|
| 151 |
return ($ori == $size) ? $mach_bytesex : $opposite; |
| 152 |
|
| 153 |
} |
| 154 |
|
| 155 |
#-------------------------------- |
| 156 |
# check meta info for one dataset |
| 157 |
#-------------------------------- |
| 158 |
|
| 159 |
sub check_meta { |
| 160 |
|
| 161 |
local ($ds,$dir) = @_; |
| 162 |
local ($fmeta) = "$dir/$ds.meta"; |
| 163 |
|
| 164 |
#~~~~~~~~~~~~~~~~ |
| 165 |
# check meta info |
| 166 |
#~~~~~~~~~~~~~~~~ |
| 167 |
|
| 168 |
undef $/; # read to the end of file |
| 169 |
open(MFILE,"<$fmeta") || die "$fmeta: $!\n"; |
| 170 |
$_=<MFILE>; |
| 171 |
close(MFILE); |
| 172 |
$/ = "\n"; # never mess up |
| 173 |
|
| 174 |
s/\([^)]*\)//g; #rm (.*) |
| 175 |
s/\/\/[^\n]*\n//g; #rm comment lines |
| 176 |
s/\/\*.*\*\///g; #rm inline comments |
| 177 |
s/\s+//g; #rm white spaces |
| 178 |
/id=\[(.+)\];nDims=\[(.+)\];dimList=\[(.+)\];format=\['(.+)'\];/ |
| 179 |
|| die "$fmeta: meta file format error\n"; |
| 180 |
local ($id_,$nDims_,$dimList_,$format_) = ($1,$2,$3,$4); |
| 181 |
|
| 182 |
# check Identifier |
| 183 |
(defined $id) || ($id = $id_); |
| 184 |
($id eq $id_) || |
| 185 |
die "$fmeta: id $id_ inconsistent with other dataset\n"; |
| 186 |
|
| 187 |
# check Number of dimensions |
| 188 |
(defined $nDims) || ($nDims = $nDims_); |
| 189 |
($nDims eq $nDims_) || |
| 190 |
die "$fmeta: nDims $nDims_ inconsistent with other dataset\n"; |
| 191 |
|
| 192 |
# check Field format |
| 193 |
(defined $format) || ($format = $format_); |
| 194 |
($format eq $format_) || |
| 195 |
die "$fmeta: format $format_ inconsistent with other dataset\n"; |
| 196 |
|
| 197 |
# check dimList |
| 198 |
# calc dimesions and leading index of this subset |
| 199 |
local (@dimList_) = split(/,/,$dimList_); |
| 200 |
|
| 201 |
($nDims_*3 == $#dimList_+1) || |
| 202 |
die "$fmeta: nDims and dimList conflicting\n"; |
| 203 |
|
| 204 |
local (@Dim,@dim,@Index0) = (); |
| 205 |
for (local($i)=0;$i<$nDims_;$i++) { |
| 206 |
push(@Dim,$dimList_[$i*3]); |
| 207 |
push(@dim,$dimList_[$i*3+2]-$dimList_[$i*3+1]+1); |
| 208 |
push(@Index0,$dimList_[$i*3+1]-1); |
| 209 |
} |
| 210 |
local ($Dim_) = join(",",@Dim); |
| 211 |
local ($dim_) = join(",",@dim); |
| 212 |
|
| 213 |
(defined $Dim) || ($Dim = $Dim_); |
| 214 |
($Dim eq $Dim_) || |
| 215 |
die "$fmeta: dimList Global inconsistent with other dataset\n"; |
| 216 |
|
| 217 |
(defined $dim) || ($dim = $dim_); |
| 218 |
($dim eq $dim_) || |
| 219 |
die "$fmeta: dimList Local inconsistent with other dataset\n"; |
| 220 |
|
| 221 |
$ds_Index0{$ds} = join(",", @Index0); |
| 222 |
|
| 223 |
# print STDOUT "Okay $fmeta\n"; |
| 224 |
} |
| 225 |
|
| 226 |
#------------------------------- |
| 227 |
# check completeness of datasets |
| 228 |
# need to be more sophisticated |
| 229 |
#------------------------------- |
| 230 |
sub check_entirety { |
| 231 |
|
| 232 |
local (*Dim,*dim,*ds_Index0) = @_; |
| 233 |
|
| 234 |
local ($N) = &listprod(@Dim); |
| 235 |
local ($n) = &listprod(@dim); |
| 236 |
($N) || return 0; # against null dimension |
| 237 |
($n) || return 0; # against null dimension |
| 238 |
($N%$n) && return 0; # $N/$n must be a whole number |
| 239 |
|
| 240 |
local (@ds) = keys %ds_Index0; |
| 241 |
($#ds+1 == $N/$n) || return 0; # Num of datasets must match subdomain |
| 242 |
|
| 243 |
1; |
| 244 |
} |
| 245 |
|
| 246 |
#------------------ |
| 247 |
# merge one dataset |
| 248 |
# assume @Dim, @dim and $bytes existing |
| 249 |
# assume $Byte_Reorder existing |
| 250 |
#------------------ |
| 251 |
sub merge_data { |
| 252 |
|
| 253 |
local ($ds,$dir,*Index0) = @_; |
| 254 |
local ($fdata) = "$dir/$ds.data"; |
| 255 |
|
| 256 |
# data size of one subset in bytes as told by meta info |
| 257 |
local ($size) = &listprod(@dim) * $bytes; |
| 258 |
|
| 259 |
open(DFILE, "<$fdata") || die "$fdata: $!\n"; |
| 260 |
|
| 261 |
local ($raw) = ""; |
| 262 |
sysread(DFILE,$raw,4); |
| 263 |
# Swap header if bytesex is diff from machine's |
| 264 |
local ($hdr); |
| 265 |
if ($Byte_Reorder) { |
| 266 |
$hdr = unpack("I",join("",reverse(split(//,$raw)))); |
| 267 |
} else { |
| 268 |
$hdr = unpack("I",$raw); |
| 269 |
} |
| 270 |
|
| 271 |
($size == $hdr) || |
| 272 |
die "$fdata: $hdr bytes inconsistent with meta info\n"; |
| 273 |
|
| 274 |
print STDOUT "$ds.data: $size bytes, okay, "; |
| 275 |
|
| 276 |
# seek(DFILE,4,0); # rewind back to the beginning of data |
| 277 |
|
| 278 |
local ($data) = ""; # old perl (< 4.0) needs this to |
| 279 |
sysread(DFILE,$data,$size); # avoid warning by sysread() |
| 280 |
local ($len_chunk) = $dim[0] * $bytes; |
| 281 |
local ($num_chunk) = $size/$len_chunk; |
| 282 |
|
| 283 |
local ($pos,@index,$Pos,@Index); |
| 284 |
for (local($i)=0;$i<$num_chunk;$i++) { |
| 285 |
$pos = $i * $dim[0]; |
| 286 |
@index = &pos2index($pos,@dim); |
| 287 |
@Index = &lists_add(*index,*Index0); |
| 288 |
$Pos = &index2pos(*Index,*Dim); |
| 289 |
seek(FILE,$Pos*$bytes+4,0); |
| 290 |
syswrite(FILE,$data,$len_chunk,$pos*$bytes); |
| 291 |
} |
| 292 |
|
| 293 |
close(DFILE); |
| 294 |
|
| 295 |
print STDOUT "merged from $dir\n"; |
| 296 |
} |
| 297 |
|
| 298 |
#============ |
| 299 |
# main script |
| 300 |
#============ |
| 301 |
|
| 302 |
#------------ |
| 303 |
# parse @ARGV |
| 304 |
#............ |
| 305 |
|
| 306 |
($#ARGV >= 1) || &usage(); |
| 307 |
|
| 308 |
undef @dirs; |
| 309 |
while (1) { |
| 310 |
$x = shift(@ARGV); |
| 311 |
unless ($x =~ /^-D(.+)$/) { |
| 312 |
unshift(@ARGV,$x); |
| 313 |
last; |
| 314 |
} |
| 315 |
push(@dirs,$1); |
| 316 |
} |
| 317 |
(@dirs) || push(@dirs,"."); |
| 318 |
# @dirs is not empty after this line. |
| 319 |
#print STDOUT join(" ",@dirs), "\n"; |
| 320 |
|
| 321 |
($#ARGV >= 1) || &usage(); |
| 322 |
|
| 323 |
# data set prefix and suffix |
| 324 |
$pref = shift(@ARGV); |
| 325 |
$suff = shift(@ARGV); |
| 326 |
|
| 327 |
($#ARGV >= 1) && &usage(); |
| 328 |
undef $forced_bytesex; |
| 329 |
if (@ARGV) { |
| 330 |
$forced_bytesex = shift(@ARGV); |
| 331 |
$forced_bytesex =~ /^(little|big)-endian$/ || &usage(); |
| 332 |
} |
| 333 |
#print STDOUT $forced_bytesex, "\n"; |
| 334 |
|
| 335 |
#-------------------------- |
| 336 |
# obtain a list of datasets |
| 337 |
#.......................... |
| 338 |
|
| 339 |
# %ds_dir is a hash to store the directory that a dataset is in. |
| 340 |
# After this step, it is assured that, for a dataset $ds, |
| 341 |
# both $ds.meta and $ds.data exist in a unique dir $ds_dir{$ds}. |
| 342 |
|
| 343 |
%ds_dir = (); |
| 344 |
foreach $dir (@dirs) { |
| 345 |
opendir(DIR, $dir) || die "$dir: $!\n"; |
| 346 |
@fmeta = grep(/^$pref\.$suff\.p\d+\.t\d+\.meta$/, readdir(DIR)); |
| 347 |
closedir(DIR); |
| 348 |
foreach $fmeta (@fmeta) { |
| 349 |
$ds = $fmeta; $ds =~ s/\.meta$//g; |
| 350 |
(defined $ds_dir{$ds}) && |
| 351 |
die "$fmeta appears in two dirs: $ds_dir{$ds} & $dir\n"; |
| 352 |
(-f "$dir/$ds.data") || die "In $dir, $ds.data missing\n"; |
| 353 |
$ds_dir{$ds} = $dir; |
| 354 |
} |
| 355 |
} |
| 356 |
|
| 357 |
@ds = sort(keys %ds_dir); # list of datasets |
| 358 |
(@ds) || die "No dataset found.\n"; |
| 359 |
print STDOUT "There are ", $#ds+1, " datasets.\n"; |
| 360 |
|
| 361 |
#--------------------------------- |
| 362 |
# check meta info for all datasets |
| 363 |
#................................. |
| 364 |
|
| 365 |
undef $id; |
| 366 |
undef $nDims; |
| 367 |
undef $format; |
| 368 |
|
| 369 |
undef $Dim; |
| 370 |
undef $dim; |
| 371 |
undef %ds_Index0; |
| 372 |
|
| 373 |
#.............................................. |
| 374 |
# check each meta file and set some global vars |
| 375 |
|
| 376 |
foreach $ds (@ds) { |
| 377 |
&check_meta($ds,$ds_dir{$ds}); |
| 378 |
} |
| 379 |
print STDOUT "All existing meta files are self- and mutually consistent.\n"; |
| 380 |
|
| 381 |
#print join(" ",$id,$nDims,$format,$Dim,$dim), "\n"; |
| 382 |
#foreach $ds (@ds) { |
| 383 |
# $dir = $ds_dir{$ds}; |
| 384 |
# $Index0 = $ds_Index0{$ds}; |
| 385 |
# print "$ds\n"; |
| 386 |
# print "$Index0\n"; |
| 387 |
#} |
| 388 |
|
| 389 |
@Dim = split(/,/,$Dim); |
| 390 |
@dim = split(/,/,$dim); |
| 391 |
|
| 392 |
#................................ |
| 393 |
# check meta info in its entirety |
| 394 |
|
| 395 |
&check_entirety(*Dim,*dim,*ds_Index0) || |
| 396 |
die "Datasets are not complete!\n"; |
| 397 |
|
| 398 |
print STDOUT "Datasets are complete.\n"; |
| 399 |
|
| 400 |
#........... |
| 401 |
# set $bytes |
| 402 |
|
| 403 |
if ($format eq "float32") { |
| 404 |
$bytes = 4; |
| 405 |
} elsif ($format eq "float64") { |
| 406 |
$bytes = 8 |
| 407 |
} else { |
| 408 |
die "format '$format' unknown\n"; |
| 409 |
} |
| 410 |
|
| 411 |
#--------------------------- |
| 412 |
# check and merge data files |
| 413 |
#........................... |
| 414 |
|
| 415 |
#........................ |
| 416 |
# check machine's bytesex |
| 417 |
# it dies if neither little- nor big-endian. |
| 418 |
|
| 419 |
$Mach_Bytesex = &mach_bytesex(); |
| 420 |
print STDOUT "Current machine's endianness: $Mach_Bytesex\n"; |
| 421 |
|
| 422 |
#................... |
| 423 |
# check file bytesex and resolve realted issues |
| 424 |
undef $File_Bytesex; |
| 425 |
foreach $ds (@ds) { |
| 426 |
$fdata = "$ds.data"; |
| 427 |
$file_bytesex = &file_bytesex($ds_dir{$ds}."/$fdata"); |
| 428 |
($file_bytesex eq "unknown") && |
| 429 |
die "$fdata: endianness is neither little- nor big-endian.\n"; |
| 430 |
print STDOUT "$fdata: $file_bytesex\n"; |
| 431 |
unless ($File_Bytesex) { |
| 432 |
$File_Bytesex = $file_bytesex; |
| 433 |
} else { |
| 434 |
($File_Bytesex eq $file_bytesex) || |
| 435 |
die "Data files are mutually inconsistent in endianness\n"; |
| 436 |
} |
| 437 |
} |
| 438 |
|
| 439 |
#------------------ |
| 440 |
# set $Byte_Reorder, which controls swapping of bytes in |
| 441 |
# header and terminator of Fortran unformatted data files. |
| 442 |
$Byte_Reorder = 1; |
| 443 |
|
| 444 |
# if machine and data file have the same bytesex, no need for swapping |
| 445 |
($File_Bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
| 446 |
|
| 447 |
# if we can't determine bytesex of data file, need forced one from @ARGV. |
| 448 |
if ($File_Bytesex eq "undecidable") { |
| 449 |
# if no forced bytesex available, dies. |
| 450 |
($forced_bytesex) || |
| 451 |
die "Endianness of data files is undecidable, " . |
| 452 |
"you have to give one at command line.\n"; |
| 453 |
($forced_bytesex eq $Mach_Bytesex) && ($Byte_Reorder = 0); |
| 454 |
print STDOUT "Endianness of data files is undecidable.\n"; |
| 455 |
print STDOUT "Data file header/tail will be treated as "; |
| 456 |
print STDOUT "$forced_bytesex as you have instructed.\n"; |
| 457 |
# otherwise |
| 458 |
} else { |
| 459 |
# give a warining, if swapping is needed. |
| 460 |
($Byte_Reorder) && |
| 461 |
print STDOUT |
| 462 |
"Please note: data files have different bytesex than machine!\n"; |
| 463 |
} |
| 464 |
|
| 465 |
#................ |
| 466 |
# merge data sets |
| 467 |
|
| 468 |
$Size = &listprod(@Dim) * $bytes; |
| 469 |
|
| 470 |
$fout = "$pref.$suff.data"; |
| 471 |
|
| 472 |
open(FILE, ">$fout") || die "$fout: $!\n"; |
| 473 |
|
| 474 |
# prepare header and teminator. Do byte reordering if necessary |
| 475 |
$HdrTmr = pack("I",$Size); |
| 476 |
($Byte_Reorder) && ($HdrTmr = join("",reverse(split(//,$HdrTmr)))); |
| 477 |
|
| 478 |
# write 4 byte header |
| 479 |
syswrite(FILE,$HdrTmr,4); |
| 480 |
|
| 481 |
# merge each dataset |
| 482 |
foreach $ds (@ds) { |
| 483 |
$dir = $ds_dir{$ds}; |
| 484 |
@Index0 = split(/,/,$ds_Index0{$ds}); |
| 485 |
&merge_data($ds,$dir,*Index0); |
| 486 |
} |
| 487 |
|
| 488 |
# write 4 byte terminator |
| 489 |
seek(FILE,$Size+4,0); |
| 490 |
syswrite(FILE,$HdrTmr,4); |
| 491 |
|
| 492 |
close(FILE); |
| 493 |
|
| 494 |
print STDOUT "Global data (" . |
| 495 |
join("x",@Dim) . |
| 496 |
") is in ./$fout (endianness is $File_Bytesex).\n"; |
| 497 |
|
| 498 |
exit 0; |