Actual source code: hdf5io.c

  1: #include <petsc/private/viewerhdf5impl.h>
  2: #include <petsclayouthdf5.h>

  4: struct _n_HDF5ReadCtx {
  5:   const char *name;
  6:   hid_t       file, group, dataset, dataspace;
  7:   int         lenInd, bsInd, complexInd, rdim;
  8:   hsize_t    *dims;
  9:   PetscBool   complexVal, dim2;

 11:   // Needed for compression
 12:   PetscInt  runs;
 13:   PetscInt *cind;
 14: };
 15: typedef struct _n_HDF5ReadCtx *HDF5ReadCtx;

 17: PetscErrorCode PetscViewerHDF5CheckTimestepping_Internal(PetscViewer viewer, const char name[])
 18: {
 19:   PetscViewer_HDF5 *hdf5         = (PetscViewer_HDF5 *)viewer->data;
 20:   PetscBool         timestepping = PETSC_FALSE;

 22:   PetscFunctionBegin;
 23:   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "timestepping", PETSC_BOOL, &hdf5->defTimestepping, &timestepping));
 24:   if (timestepping != hdf5->timestepping) {
 25:     char *group;

 27:     PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
 28:     SETERRQ(PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Dataset %s/%s stored with timesteps? %s Timestepping pushed? %s", group, name, PetscBools[timestepping], PetscBools[hdf5->timestepping]);
 29:   }
 30:   PetscFunctionReturn(PETSC_SUCCESS);
 31: }

 33: static PetscErrorCode PetscViewerHDF5ReadInitialize_Private(PetscViewer viewer, const char name[], HDF5ReadCtx *ctx)
 34: {
 35:   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
 36:   HDF5ReadCtx       h    = NULL;

 38:   PetscFunctionBegin;
 39:   PetscCall(PetscViewerHDF5CheckTimestepping_Internal(viewer, name));
 40:   PetscCall(PetscNew(&h));
 41:   h->name = name;
 42:   PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &h->file, &h->group));
 43:   PetscCallHDF5Return(h->dataset, H5Dopen2, (h->group, name, H5P_DEFAULT));
 44:   PetscCallHDF5Return(h->dataspace, H5Dget_space, (h->dataset));
 45:   PetscCall(PetscViewerHDF5ReadAttribute(viewer, name, "complex", PETSC_BOOL, &h->complexVal, &h->complexVal));
 46:   if (!hdf5->horizontal) {
 47:     /* MATLAB stores column vectors horizontally */
 48:     PetscCall(PetscViewerHDF5HasAttribute(viewer, name, "MATLAB_class", &hdf5->horizontal));
 49:   }
 50:   h->runs = 0;
 51:   h->cind = NULL;
 52:   *ctx    = h;
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: static PetscErrorCode PetscViewerHDF5ReadFinalize_Private(PetscViewer viewer, HDF5ReadCtx *ctx)
 57: {
 58:   HDF5ReadCtx h;

 60:   PetscFunctionBegin;
 61:   h = *ctx;
 62:   PetscCallHDF5(H5Gclose, (h->group));
 63:   PetscCallHDF5(H5Sclose, (h->dataspace));
 64:   PetscCallHDF5(H5Dclose, (h->dataset));
 65:   PetscCall(PetscFree((*ctx)->dims));
 66:   PetscCall(PetscFree((*ctx)->cind));
 67:   PetscCall(PetscFree(*ctx));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: // Need forward declaration because we have a cyclic call chain
 72: static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer, const char[], PetscBool, PetscLayout, hid_t, void **);

 74: static PetscErrorCode PetscViewerHDF5ReadSizes_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscBool uncompress, PetscBool setup, PetscLayout *map_)
 75: {
 76:   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
 77:   PetscInt          bs, N;
 78:   PetscLayout       map;
 79:   PetscBool         compressed;

 81:   PetscFunctionBegin;
 82:   if (!*map_) PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), map_));
 83:   map = *map_;

 85:   PetscCall(PetscViewerHDF5HasAttribute(viewer, ctx->name, "compressed", &compressed));
 86:   if (compressed && uncompress) {
 87:     hid_t           inttype;
 88:     PetscLayout     cmap;
 89:     PetscInt       *lcind, N = 0;
 90:     PetscMPIInt    *counts, *displs, size, n;
 91:     const PetscInt *range;
 92:     MPI_Comm        comm;

 94: #if defined(PETSC_USE_64BIT_INDICES)
 95:     inttype = H5T_NATIVE_LLONG;
 96: #else
 97:     inttype = H5T_NATIVE_INT;
 98: #endif
 99:     PetscCall(PetscObjectGetComm((PetscObject)viewer, &comm));
100:     PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)viewer), &cmap));
101:     cmap->bs = 3;
102:     PetscCall(PetscViewerHDF5Load_Internal(viewer, ctx->name, PETSC_FALSE, cmap, inttype, (void **)&lcind));
103:     PetscCheck(!(cmap->n % 3), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Compressed IS must have an even number of entries, not %" PetscInt_FMT, cmap->n);
104:     for (PetscInt i = 0; i < cmap->n / 3; ++i) N += lcind[i * 3 + 0];
105:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N, 1, MPIU_INT, MPIU_SUM, comm));
106:     ctx->runs = cmap->N / 3;
107:     PetscCall(PetscMalloc1(cmap->N, &ctx->cind));
108:     PetscCallMPI(MPI_Comm_size(comm, &size));
109:     PetscCall(PetscLayoutGetRanges(cmap, &range));
110:     PetscCall(PetscMalloc2(size, &counts, size, &displs));
111:     for (PetscInt r = 0; r < size; ++r) {
112:       PetscCall(PetscMPIIntCast(range[r + 1] - range[r], &counts[r]));
113:       PetscCall(PetscMPIIntCast(range[r], &displs[r]));
114:     }
115:     PetscCall(PetscMPIIntCast(cmap->n, &n));
116:     PetscCallMPI(MPI_Allgatherv(lcind, n, MPIU_INT, ctx->cind, counts, displs, MPIU_INT, comm));
117:     PetscCall(PetscFree2(counts, displs));
118:     PetscCall(PetscFree(lcind));
119:     PetscCall(PetscLayoutDestroy(&cmap));

121:     ctx->dim2   = PETSC_FALSE;
122:     ctx->rdim   = 1;
123:     ctx->lenInd = 0;
124:     PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
125:     ctx->dims[0] = N;
126:     bs           = 1;
127:     goto layout;
128:   }

130:   /* Get actual number of dimensions in dataset */
131:   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, NULL, NULL));
132:   PetscCall(PetscMalloc1(ctx->rdim, &ctx->dims));
133:   PetscCallHDF5Return(ctx->rdim, H5Sget_simple_extent_dims, (ctx->dataspace, ctx->dims, NULL));

135:   /*
136:      Dimensions are in this order:
137:      [0]        timesteps (optional)
138:      [lenInd]   entries (numbers or blocks)
139:      ...
140:      [bsInd]    entries of blocks (optional)
141:      [bsInd+1]  real & imaginary part (optional)
142:       = rdim-1
143:    */

145:   /* Get entries dimension index */
146:   ctx->lenInd = 0;
147:   if (hdf5->timestepping) ++ctx->lenInd;

149:   /* Get block dimension index */
150:   if (ctx->complexVal) {
151:     ctx->bsInd      = ctx->rdim - 2;
152:     ctx->complexInd = ctx->rdim - 1;
153:   } else {
154:     ctx->bsInd      = ctx->rdim - 1;
155:     ctx->complexInd = -1;
156:   }
157:   PetscCheck(ctx->lenInd <= ctx->bsInd, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Calculated block dimension index = %d < %d = length dimension index.", ctx->bsInd, ctx->lenInd);
158:   PetscCheck(ctx->bsInd <= ctx->rdim - 1, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Calculated block dimension index = %d > %d = total number of dimensions - 1.", ctx->bsInd, ctx->rdim - 1);
159:   PetscCheck(!ctx->complexVal || ctx->dims[ctx->complexInd] == 2, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Complex numbers must have exactly 2 parts (%" PRIuHSIZE ")", ctx->dims[ctx->complexInd]);

161:   if (hdf5->horizontal) {
162:     /* support horizontal 1D arrays (MATLAB vectors) - swap meaning of blocks and entries */
163:     int t       = ctx->lenInd;
164:     ctx->lenInd = ctx->bsInd;
165:     ctx->bsInd  = t;
166:   }

168:   /* Get block size */
169:   ctx->dim2 = PETSC_FALSE;
170:   if (ctx->lenInd == ctx->bsInd) {
171:     bs = 1; /* support vectors stored as 1D array */
172:   } else {
173:     bs = (PetscInt)ctx->dims[ctx->bsInd];
174:     if (bs == 1) ctx->dim2 = PETSC_TRUE; /* vector with blocksize of 1, still stored as 2D array */
175:   }

177: layout:
178:   /* Get global size */
179:   PetscCall(PetscIntCast(bs * ctx->dims[ctx->lenInd], &N));

181:   /* Set global size, blocksize and type if not yet set */
182:   if (map->bs < 0) {
183:     PetscCall(PetscLayoutSetBlockSize(map, bs));
184:   } else PetscCheck(map->bs == bs, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Block size of array in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", bs, map->bs);
185:   if (map->N < 0) {
186:     PetscCall(PetscLayoutSetSize(map, N));
187:   } else PetscCheck(map->N == N, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Global size of array %s in file is %" PetscInt_FMT ", not %" PetscInt_FMT " as expected", ctx->name, N, map->N);
188:   if (setup) PetscCall(PetscLayoutSetUp(map));
189:   PetscFunctionReturn(PETSC_SUCCESS);
190: }

192: static PetscErrorCode PetscViewerHDF5ReadSelectHyperslab_Private(PetscViewer viewer, HDF5ReadCtx ctx, PetscLayout map, hid_t *memspace)
193: {
194:   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
195:   hsize_t          *count, *offset;
196:   PetscInt          bs, n, low;
197:   int               i;

199:   PetscFunctionBegin;
200:   /* Compute local size and ownership range */
201:   PetscCall(PetscLayoutSetUp(map));
202:   PetscCall(PetscLayoutGetBlockSize(map, &bs));
203:   PetscCall(PetscLayoutGetLocalSize(map, &n));
204:   PetscCall(PetscLayoutGetRange(map, &low, NULL));

206:   /* Each process defines a dataset and reads it from the hyperslab in the file */
207:   PetscCall(PetscMalloc2(ctx->rdim, &count, ctx->rdim, &offset));
208:   for (i = 0; i < ctx->rdim; i++) {
209:     /* By default, select all entries with no offset */
210:     offset[i] = 0;
211:     count[i]  = ctx->dims[i];
212:   }
213:   if (hdf5->timestepping) {
214:     count[0]  = 1;
215:     offset[0] = hdf5->timestep;
216:   }
217:   {
218:     PetscCall(PetscHDF5IntCast(n / bs, &count[ctx->lenInd]));
219:     PetscCall(PetscHDF5IntCast(low / bs, &offset[ctx->lenInd]));
220:   }
221:   PetscCallHDF5Return(*memspace, H5Screate_simple, (ctx->rdim, count, NULL));
222:   PetscCallHDF5(H5Sselect_hyperslab, (ctx->dataspace, H5S_SELECT_SET, offset, NULL, count, NULL));
223:   PetscCall(PetscFree2(count, offset));
224:   PetscFunctionReturn(PETSC_SUCCESS);
225: }

227: static PetscErrorCode PetscViewerHDF5ReadArray_Private(PetscViewer viewer, HDF5ReadCtx h, hid_t datatype, hid_t memspace, void *arr)
228: {
229:   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;

231:   PetscFunctionBegin;
232:   PetscCallHDF5(H5Dread, (h->dataset, datatype, memspace, h->dataspace, hdf5->dxpl_id, arr));
233:   PetscFunctionReturn(PETSC_SUCCESS);
234: }

236: static PetscErrorCode PetscViewerHDF5Load_Internal(PetscViewer viewer, const char name[], PetscBool uncompress, PetscLayout map, hid_t datatype, void **newarr)
237: {
238:   PetscBool   has;
239:   char       *group;
240:   HDF5ReadCtx h        = NULL;
241:   hid_t       memspace = 0;
242:   size_t      unitsize;
243:   void       *arr;

245:   PetscFunctionBegin;
246:   PetscCall(PetscViewerHDF5GetGroup(viewer, NULL, &group));
247:   PetscCall(PetscViewerHDF5HasDataset(viewer, name, &has));
248:   PetscCheck(has, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Object (dataset) \"%s\" not stored in group %s", name, group);
249:   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
250: #if defined(PETSC_USE_COMPLEX)
251:   if (!h->complexVal) {
252:     H5T_class_t clazz = H5Tget_class(datatype);
253:     PetscCheck(clazz != H5T_FLOAT, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as real but PETSc is configured for complex scalars. The conversion is not yet implemented. Configure with --with-scalar-type=real to read this dataset", group ? group : "", name);
254:   }
255: #else
256:   PetscCheck(!h->complexVal, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Dataset %s/%s is marked as complex but PETSc is configured for real scalars. Configure with --with-scalar-type=complex to read this dataset", group, name);
257: #endif

259:   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, uncompress, PETSC_TRUE, &map));
260:   PetscCall(PetscViewerHDF5ReadSelectHyperslab_Private(viewer, h, map, &memspace));

262:   if (h->runs && uncompress) {
263:     PetscInt *ind;

265:     PetscCall(PetscInfo(viewer, "Read compressed object with name %s of size %" PetscInt_FMT ":%" PetscInt_FMT "\n", name, map->n, map->N));
266:     // Each process stores the whole compression, so skip any leading parts
267:     PetscCall(PetscMalloc1(map->n, &ind));
268:     for (PetscInt i = 0, off = 0; i < h->runs; ++i) {
269:       for (PetscInt j = 0, inc = 0; j < h->cind[i * 3 + 0]; ++j, ++off, inc += h->cind[i * 3 + 1]) {
270:         if (off >= map->rend) {
271:           i = h->runs;
272:           break;
273:         }
274:         if (off >= map->rstart) ind[off - map->rstart] = h->cind[i * 3 + 2] + inc;
275:       }
276:     }
277:     *newarr = ind;
278:     goto cleanup;
279:   }

281:   unitsize = H5Tget_size(datatype);
282:   if (h->complexVal) unitsize *= 2;
283:   /* unitsize is size_t i.e. always unsigned, so the negative check is pointless? */
284:   PetscCheck(unitsize > 0 && unitsize <= PetscMax(sizeof(PetscInt), sizeof(PetscScalar)), PETSC_COMM_SELF, PETSC_ERR_LIB, "Sanity check failed: HDF5 function H5Tget_size(datatype) returned suspicious value %zu", unitsize);
285:   PetscCall(PetscMalloc(map->n * unitsize, &arr));

287:   PetscCall(PetscViewerHDF5ReadArray_Private(viewer, h, datatype, memspace, arr));
288:   *newarr = arr;

290: cleanup:
291:   PetscCallHDF5(H5Sclose, (memspace));
292:   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
293:   PetscCall(PetscFree(group));
294:   PetscFunctionReturn(PETSC_SUCCESS);
295: }

297: /*@C
298:   PetscViewerHDF5Load - Read a raw array from the `PETSCVIEWERHDF5` dataset in parallel

300:   Collective; No Fortran Support

302:   Input Parameters:
303: + viewer   - The `PETSCVIEWERHDF5` viewer
304: . name     - The dataset name
305: - datatype - The HDF5 datatype of the items in the dataset

307:   Input/Output Parameter:
308: . map - The layout which specifies array partitioning, on output the
309:              set up layout (with global size and blocksize according to dataset)

311:   Output Parameter:
312: . newarr - The partitioned array, a memory image of the given dataset

314:   Level: developer

316:   Notes:
317:   This is intended mainly for internal use; users should use higher level routines such as `ISLoad()`, `VecLoad()`, `DMLoad()`.

319:   The array is partitioned according to the given `PetscLayout` which is converted to an HDF5 hyperslab.

321:   This name is relative to the current group returned by `PetscViewerHDF5OpenGroup()`.

323: .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `PetscViewerHDF5PushGroup()`, `PetscViewerHDF5OpenGroup()`, `PetscViewerHDF5ReadSizes()`,
324:           `VecLoad()`, `ISLoad()`, `PetscLayout`
325: @*/
326: PetscErrorCode PetscViewerHDF5Load(PetscViewer viewer, const char name[], PetscLayout map, hid_t datatype, void **newarr)
327: {
328:   PetscFunctionBegin;
329:   PetscCall(PetscViewerHDF5Load_Internal(viewer, name, PETSC_TRUE, map, datatype, newarr));
330:   PetscFunctionReturn(PETSC_SUCCESS);
331: }

333: /*@
334:   PetscViewerHDF5ReadSizes - Read block size and global size of a `Vec` or `IS` stored in an HDF5 file.

336:   Input Parameters:
337: + viewer - The `PETSCVIEWERHDF5` viewer
338: - name   - The dataset name

340:   Output Parameters:
341: + bs - block size
342: - N  - global size

344:   Level: advanced

346:   Notes:
347:   The dataset is stored as an HDF5 dataspace with 1-4 dimensions in the order
348:   1) # timesteps (optional), 2) # blocks, 3) # elements per block (optional), 4) real and imaginary part (only for complex).

350:   The dataset can be stored as a 2D dataspace even if its blocksize is 1; see `PetscViewerHDF5SetBaseDimension2()`.

352: .seealso: `PetscViewer`, `PETSCVIEWERHDF5`, `PetscViewerHDF5Open()`, `VecLoad()`, `ISLoad()`, `VecGetSize()`, `ISGetSize()`, `PetscViewerHDF5SetBaseDimension2()`
353: @*/
354: PetscErrorCode PetscViewerHDF5ReadSizes(PetscViewer viewer, const char name[], PetscInt *bs, PetscInt *N)
355: {
356:   HDF5ReadCtx h   = NULL;
357:   PetscLayout map = NULL;

359:   PetscFunctionBegin;
361:   PetscCall(PetscViewerHDF5ReadInitialize_Private(viewer, name, &h));
362:   PetscCall(PetscViewerHDF5ReadSizes_Private(viewer, h, PETSC_TRUE, PETSC_FALSE, &map));
363:   PetscCall(PetscViewerHDF5ReadFinalize_Private(viewer, &h));
364:   if (bs) *bs = map->bs;
365:   if (N) *N = map->N;
366:   PetscCall(PetscLayoutDestroy(&map));
367:   PetscFunctionReturn(PETSC_SUCCESS);
368: }