/[MITgcm]/MITgcm_contrib/llc_hires/llc_90/code-async/readtile_mpiio.c
ViewVC logotype

Annotation of /MITgcm_contrib/llc_hires/llc_90/code-async/readtile_mpiio.c

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.2 - (hide annotations) (download)
Tue Oct 3 02:33:27 2017 UTC (7 years, 10 months ago) by dimitri
Branch: MAIN
Changes since 1.1: +3 -3 lines
File MIME type: text/plain
This code and instructions were tested to work on pleiades with checkpoint65v
There is something not quite right about checkpoint65v dumpfreq output.
For example, 2D output files are 421080 instead of 421200 long.

1 dimitri 1.1 //
2     // MPI IO for MITgcm
3     //
4    
5     #include <stdio.h>
6     #include <string.h>
7     #include <assert.h>
8     #include <mpi.h>
9    
10    
11     // lat-lon-cap decomposition has 13 square facets
12     // facetElements1D is typically 1080, or 2160, or 4320
13    
14    
15    
16     //////////////////////////////////////////////////////////////
17     // These values filled in during "initSizesAndTypes()"
18     MPI_Datatype fieldElementalTypeMPI;
19     size_t sizeofFieldElementalType;
20    
21     long int tileSizeX;
22     long int tileSizeY;
23     long int xGhosts;
24     long int yGhosts;
25    
26     long int facetElements1D;
27     long int facetBytes2D;
28     long int facetTilesInX;
29     long int facetTilesInY;
30     long int tilesPerFacet;
31    
32     long int fieldZlevelSizeInBytes;
33     long int tileZlevelSizeInBytes;
34     long int ghostedTileZlevelSizeInBytes;
35    
36     // The first 7 facets all use the same style of layout; here we call
37     // them "section1". The last 6 facets share a layout style, but this
38     // style is different than section1. Here, we call them "section2".
39     MPI_Datatype section1_ioShape2D, section2_ioShape2D;
40     MPI_Datatype tileShape2D, ghostedTileShape2D;
41     MPI_Info ioHints;
42     //////////////////////////////////////////////////////////////
43    
44    
45    
46     int
47     getSizeOfMPIType(MPI_Datatype mpi_type)
48     {
49     switch (mpi_type) {
50    
51     case MPI_INT: case MPI_FLOAT: case MPI_REAL4:
52     return 4;
53     break;
54    
55     case MPI_LONG_INT: case MPI_DOUBLE: case MPI_REAL8:
56     return 8;
57     break;
58    
59     default:
60     assert(("unexpected mpi elemental type", 0));
61     break;
62    
63     }
64     return -1;
65     }
66    
67    
68    
69     void
70     createMPItypes(void)
71     {
72     // Create a type with the "shape" of a section1, 2D tile
73     MPI_Type_vector(tileSizeY, tileSizeX, facetElements1D,
74     fieldElementalTypeMPI, &section1_ioShape2D);
75     MPI_Type_commit(&section1_ioShape2D);
76    
77     // Create a type with the "shape" of a section2, 2D tile
78     MPI_Type_vector(tileSizeY, tileSizeX, 3*facetElements1D,
79     fieldElementalTypeMPI, &section2_ioShape2D);
80     MPI_Type_commit(&section2_ioShape2D);
81    
82    
83     // Create a type that describes a 2D tile in memory
84     MPI_Type_vector(tileSizeY, tileSizeX, tileSizeX,
85     fieldElementalTypeMPI, &tileShape2D);
86     MPI_Type_commit(&tileShape2D);
87    
88     // Create a type that describes a 2D tile in memory with ghost-cells.
89     int fullDims[2] = {tileSizeX + 2*xGhosts, tileSizeY + 2*yGhosts};
90     int tileDims[2] = {tileSizeX, tileSizeY};
91     int startElements[2] = {xGhosts, yGhosts};
92     MPI_Type_create_subarray(2, fullDims, tileDims, startElements,
93     MPI_ORDER_FORTRAN, fieldElementalTypeMPI, &ghostedTileShape2D);
94     MPI_Type_commit(&ghostedTileShape2D);
95    
96    
97     // Set up some possible hints
98     MPI_Info_create(&ioHints);
99     MPI_Info_set(ioHints, "collective_buffering", "true");
100     char blockSize[64];
101     sprintf(blockSize, "%ld", (((long)facetElements1D * 3) *
102     tileSizeY) * sizeofFieldElementalType);
103     MPI_Info_set(ioHints, "cb_block_size", blockSize);
104     }
105    
106    
107    
108     // Somehow we acquire this info at runtime
109     void
110     initSizesAndTypes(void)
111     {
112     /////////////////////////////////////////////
113     // Fundamental values
114     fieldElementalTypeMPI = MPI_DOUBLE;
115 dimitri 1.2 facetElements1D = 90;
116     tileSizeX = 30;
117     tileSizeY = 30;
118 dimitri 1.1 xGhosts = 8;
119     yGhosts = 8;
120     /////////////////////////////////////////////
121    
122     // Derived values
123     sizeofFieldElementalType = getSizeOfMPIType(fieldElementalTypeMPI);
124     long int facetElements2D = ((facetElements1D) * (facetElements1D));
125     facetBytes2D = (facetElements2D * sizeofFieldElementalType);
126     fieldZlevelSizeInBytes = (13*(facetBytes2D));
127    
128     facetTilesInX = ((facetElements1D)/(tileSizeX));
129     facetTilesInY = ((facetElements1D)/(tileSizeY));
130     tilesPerFacet = ((facetTilesInX)*(facetTilesInY));
131    
132     tileZlevelSizeInBytes = tileSizeX * tileSizeY * sizeofFieldElementalType;
133     ghostedTileZlevelSizeInBytes = (tileSizeX + 2*xGhosts) *
134     (tileSizeY + 2*yGhosts) *
135     sizeofFieldElementalType;
136    
137     // Create the specialized type definitions
138     createMPItypes();
139     }
140    
141    
142    
143     void
144     tileIO(
145     MPI_Comm comm,
146     char *filename,
147     MPI_Offset tileOffsetInFile,
148     MPI_Datatype tileLayoutInFile,
149     void *tileBuf,
150     MPI_Datatype tileLayoutInMemory,
151     int writeFlag)
152     {
153     int fileFlags;
154     MPI_File fh;
155     int (*MPI_IO)();
156    
157     int res,count;
158     MPI_Status status;
159    
160     if (writeFlag) {
161     fileFlags = MPI_MODE_WRONLY | MPI_MODE_CREATE;
162     MPI_IO = MPI_File_write_all;
163     } else {
164     fileFlags = MPI_MODE_RDONLY;
165     MPI_IO = MPI_File_read_all;
166     }
167    
168     //printf("filename is %s\n",filename);
169    
170     MPI_File_open(comm, filename, fileFlags, ioHints, &fh);
171     MPI_File_set_view(fh, tileOffsetInFile, fieldElementalTypeMPI,
172     tileLayoutInFile, "native", ioHints);
173    
174    
175     // MPI_IO(fh, tileBuf, 1, tileLayoutInMemory, MPI_STATUS_IGNORE);
176     res = MPI_IO(fh, tileBuf, 1, tileLayoutInMemory, &status);
177    
178     MPI_Get_count(&status,tileLayoutInFile,&count);
179    
180     //fprintf(stderr,"MPI: %d %d\n",res,count);
181    
182     MPI_File_close(&fh);
183     }
184    
185    
186    
187     // N.B.: tileID is 1-based, not 0-based
188     inline int
189     isInSection1(int tileID)
190     { return (tileID <= (7 * tilesPerFacet)); }
191    
192    
193     // N.B.: tileID is 1-based, not 0-based
194     long int
195     tileOffsetInField(int tileID)
196     {
197     return isInSection1(tileID) ?
198     ((tileID -= 1),
199     (((long)tileID / facetTilesInX) * tileZlevelSizeInBytes * facetTilesInX) +
200     (((long)tileID % facetTilesInX) * tileSizeX * sizeofFieldElementalType))
201     :
202     ((tileID -= 1 + (7 * tilesPerFacet)),
203     (7 * facetBytes2D) +
204     ((tileID / (3*facetTilesInX)) * tileZlevelSizeInBytes * 3*facetTilesInX) +
205     ((tileID % (3*facetTilesInX)) * tileSizeX * sizeofFieldElementalType));
206     }
207    
208    
209    
210     void
211     readField(
212     MPI_Comm appComm,
213     char *filename,
214     MPI_Offset fieldOffsetInFile,
215     void *tileBuf,
216     int tileID,
217     int zLevels)
218     {
219     int writeFlag = 0;
220     MPI_Comm sectionComm = MPI_COMM_NULL;
221     MPI_Datatype section1_ioShape, section2_ioShape;
222     MPI_Datatype inMemoryShape, tileShape, ghostedTileShape;
223    
224     int inSection1 = isInSection1(tileID);
225     MPI_Offset tileOffsetInFile = fieldOffsetInFile + tileOffsetInField(tileID);
226    
227    
228     // Create a type with the "shape" of a tile in memory,
229     // with the given number of z-levels.
230     MPI_Type_hvector(zLevels, 1, tileZlevelSizeInBytes,
231     tileShape2D, &tileShape);
232     MPI_Type_commit(&tileShape);
233    
234     // Create a type with the "shape" of a tile in memory,
235     // with ghost-cells, with the given number of z-levels.
236     MPI_Type_hvector(zLevels, 1, ghostedTileZlevelSizeInBytes,
237     ghostedTileShape2D, &ghostedTileShape);
238     MPI_Type_commit(&ghostedTileShape);
239    
240    
241     // choose the i/o type
242     inMemoryShape = tileShape;
243    
244    
245     // Split between section1 tiles and section2 tiles.
246     // If a rank has been assigned multiple tiles, it is possible that
247     // some of those tiles are in section1, and some are in section2.
248     // So we have to dynamically do the comm_split each time because we
249     // cannot absolutely guarentee that each rank will always be on the
250     // same side of the split every time.
251     MPI_Comm_split(appComm, inSection1, 0, &sectionComm);
252    
253     memset(tileBuf,-1,tileZlevelSizeInBytes*zLevels);
254    
255     if (inSection1) {
256    
257     // Create a type with the "shape" of a section1 tile
258     // in the file, with the given number of z-levels.
259     MPI_Type_hvector(zLevels, 1, fieldZlevelSizeInBytes,
260     section1_ioShape2D, &section1_ioShape);
261     MPI_Type_commit(&section1_ioShape);
262    
263     //printf("section 1: %d -> %ld (%ld + %ld)\n",tileID,tileOffsetInFile,fieldOffsetInFile,tileOffsetInField(tileID));
264    
265     // Do the i/o
266     tileIO(sectionComm, filename, tileOffsetInFile, section1_ioShape,
267     tileBuf, inMemoryShape, writeFlag);
268     // I believe (?) this is needed to ensure consistency when writting
269     if (writeFlag) MPI_Barrier(appComm);
270    
271     MPI_Type_free(&section1_ioShape);
272    
273     } else {
274    
275     // Create a type with the "shape" of a section2 tile
276     // in the file, with the given number of z-levels.
277     MPI_Type_hvector(zLevels, 1, fieldZlevelSizeInBytes,
278     section2_ioShape2D, &section2_ioShape);
279     MPI_Type_commit(&section2_ioShape);
280    
281     //printf("section 2: %d -> %ld (%ld + %ld)\n",tileID,tileOffsetInFile,fieldOffsetInFile,tileOffsetInField(tileID));
282    
283     // Do the i/o
284     // I believe (?) this is needed to ensure consistency when writting
285     if (writeFlag) MPI_Barrier(appComm);
286     tileIO(sectionComm, filename, tileOffsetInFile, section2_ioShape,
287     tileBuf, inMemoryShape, writeFlag);
288    
289     MPI_Type_free(&section2_ioShape);
290     }
291    
292     /*
293     if (tileID==315){
294     int i;
295     printf("field offset: %ld ",fieldOffsetInFile);
296     printf("tile offset: %ld ",tileOffsetInField(tileID));
297     printf("zlevels: %d ",zLevels);
298     for (i=0;i<10;++i)
299     printf("%f ",((double*)tileBuf)[i]);
300     printf("\n");
301     }
302     */
303    
304     // Clean up
305     MPI_Type_free(&tileShape);
306     MPI_Type_free(&ghostedTileShape);
307     MPI_Comm_free(&sectionComm);
308     }
309    
310    
311    
312     // Fortran interface
313     // This uses the "usual" method for passing Fortran strings:
314     // the string length is passed, by value, as an extra "hidden" argument
315     // after the end of the normal argument list. So for example, this
316     // routine would be invoked on the Fortran side like this:
317     // call readField(comm, filename, offset, tilebuf, tileid, zlevels)
318     // This method of passing FOrtran strings is NOT defined by the Fortran
319     // standard, but it is the method of choice for many compilers, including
320     // gcc (GNU/Linux), and icc (Intel).
321     //
322     // PLEASE NOTE that the "offset" field is of type "MPI_Offset", which
323     // is synonymous with the Fortran type "integer(kind=MPI_OFFSET_KIND)".
324     // This will typically be integer*8. But in particular it is almost
325     // certainly NOT of type "default integer", which means in particular
326     // that you CANNOT simply pass a constant (e.g. "0") as the argument,
327     // since that type will be of the wrong size.
328     void
329     readfield_(
330     MPI_Fint *fortranAppComm,
331     char *fortranFilename,
332     int *fieldOffsetInFileInPencils,
333     void *tileBuf,
334     int *tileID,
335     int *zLevels,
336     int filenameLength)
337     {
338     int i;
339     char namebuf[filenameLength+1];
340     char *filename = namebuf;
341    
342     MPI_Offset fieldOffsetInFile = *fieldOffsetInFileInPencils * tileSizeX * sizeofFieldElementalType;
343    
344     // Translate the MPI communicator from a Fortran-style handle
345     // into a C-style handle.
346     MPI_Comm appComm = MPI_Comm_f2c(*fortranAppComm);
347    
348     // Translate the Fortran-style string into a C-style string
349     //memset(filename, ' ', filenameLength));
350     strncpy(filename, fortranFilename, filenameLength);
351     for (i = filenameLength; (i > 0) && (' ' == filename[i-1]); --i) ;
352     filename[i] = '\0';
353     //while(' ' == *filename) ++filename;
354     assert(strlen(filename) > 0);
355    
356     //fprintf(stderr,"%d ::%s:: %d %ld \n",appComm,filename,filenameLength,fieldOffsetInFile);
357    
358     // Make the translated call
359     readField(appComm, filename, fieldOffsetInFile, tileBuf, *tileID, *zLevels);
360     }
361    
362    
363    
364     // For testing
365     void initsizesandtypes_(void) {initSizesAndTypes();}
366    
367    
368    
369    
370    
371     /////////////////////////////////////////////////////////////
372     // Test case
373     #if 0
374    
375     #define FILENAME "./dataFile"
376     long int fieldOffsetInFile = 0;
377    
378     void
379     doIO(MPI_Comm appComm)
380     {
381     int sizeZ = 3;
382     int tile1[sizeZ][tileSizeY][tileSizeX];
383     int tile2[sizeZ][tileSizeY][tileSizeX];
384     int ghostedTile[sizeZ][tileSizeY + 2*yGhosts][tileSizeX + 2*xGhosts];
385     int tileID;
386     int i,j,k;
387    
388     int appCommSize, appCommRank;
389     MPI_Comm_size(appComm, &appCommSize);
390     MPI_Comm_rank(appComm, &appCommRank);
391    
392     assert((facetTilesInX * tileSizeX) == facetElements1D);
393     assert((facetTilesInY * tileSizeY) == facetElements1D);
394    
395     // Ignore the dry tiles ("holes") for the moment
396     if (facetTilesInX * facetTilesInY * 13 != appCommSize) {
397     if (0 == appCommRank) {
398     printf("Unexpected number of ranks: is %d, expected %ld\n",
399     appCommSize, facetTilesInX * facetTilesInY * 13);
400     }
401     }
402     tileID = appCommRank + 1;
403    
404     #if 0
405     // Fill tile1 with distinguished values
406     for (k = 0; k < sizeZ; ++k) {
407     for (j = 0; j < (tileSizeY + 2*yGhosts); ++j) {
408     for (i = 0; i < (tileSizeX + 2*xGhosts); ++i) {
409     ghostedTile[k][j][i] = -appCommRank;
410     }
411     }
412     }
413     for (k = 0; k < sizeZ; ++k) {
414     for (j = 0; j < tileSizeY; ++j) {
415     for (i = 0; i < tileSizeX; ++i) {
416     tile1[k][j][i] = appCommRank;
417     ghostedTile[k][j+yGhosts][i+xGhosts] = appCommRank;
418     }
419     }
420     }
421     #endif
422    
423    
424    
425     if (0 == appCommRank) system("/bin/echo -n 'begin io: ' ; date ");
426     readField(appComm, FILENAME, 0, ghostedTile, tileID, sizeZ);
427     if (0 == appCommRank) system("/bin/echo -n 'half: ' ; date ");
428     readField(appComm, FILENAME, sizeZ*fieldZlevelSizeInBytes,
429     ghostedTile, tileID, sizeZ);
430    
431     #if 1
432     for (k = 0; k < sizeZ; ++k) {
433     for (j = 0; j < tileSizeY; ++j) {
434     for (i = 0; i < tileSizeX; ++i) {
435     int value = ghostedTile[k][j+yGhosts][i+xGhosts];
436     if (value != appCommRank) {
437     printf("Fail: %d %d %d: %d %d\n", k,j,i, value, appCommRank);
438     exit(1);
439     }
440     }
441     }
442     }
443     if (0 == appCommRank) printf("Verification complete\n");
444     #endif
445    
446     MPI_Barrier(appComm);
447     if (0 == appCommRank) system("/bin/echo -n 'finish: ' ; date ");
448    
449     MPI_Finalize();
450     }
451    
452    
453    
454     int
455     main(int argc, char *argv[])
456     {
457     MPI_Comm appComm = MPI_COMM_NULL;
458    
459     MPI_Init(&argc, &argv);
460     MPI_Comm_dup(MPI_COMM_WORLD, &appComm);
461    
462     initSizesAndTypes();
463     doIO(appComm);
464    
465     MPI_Finalize();
466     return 0;
467     }
468     #endif
469    

  ViewVC Help
Powered by ViewVC 1.1.22