Actual source code: plexsfc.c

  1: #include <petsc/private/dmpleximpl.h>
  2: #include <petscsf.h>
  3: #include <petsc/private/hashset.h>

  5: typedef uint64_t ZCode;

  7: PETSC_HASH_SET(ZSet, ZCode, PetscHash_UInt64, PetscHashEqual)

  9: typedef struct {
 10:   PetscInt i, j, k;
 11: } Ijk;

 13: typedef struct {
 14:   Ijk         eextent;
 15:   Ijk         vextent;
 16:   PetscMPIInt comm_size;
 17:   ZCode      *zstarts;
 18: } ZLayout;

 20: static unsigned ZCodeSplit1(ZCode z)
 21: {
 22:   z = ((z & 01001001001001001) | ((z >> 2) & 02002002002002002) | ((z >> 4) & 04004004004004004));
 23:   z = (z | (z >> 6) | (z >> 12)) & 0000000777000000777;
 24:   z = (z | (z >> 18)) & 0777777;
 25:   return (unsigned)z;
 26: }

 28: static ZCode ZEncode1(unsigned t)
 29: {
 30:   ZCode z = t;
 31:   z       = (z | (z << 18)) & 0777000000777;
 32:   z       = (z | (z << 6) | (z << 12)) & 07007007007007007;
 33:   z       = (z | (z << 2) | (z << 4)) & 0111111111111111111;
 34:   return z;
 35: }

 37: static Ijk ZCodeSplit(ZCode z)
 38: {
 39:   Ijk c;
 40:   c.i = ZCodeSplit1(z >> 2);
 41:   c.j = ZCodeSplit1(z >> 1);
 42:   c.k = ZCodeSplit1(z >> 0);
 43:   return c;
 44: }

 46: static ZCode ZEncode(Ijk c)
 47: {
 48:   ZCode z = (ZEncode1((unsigned int)c.i) << 2) | (ZEncode1((unsigned int)c.j) << 1) | ZEncode1((unsigned int)c.k);
 49:   return z;
 50: }

 52: static PetscBool IjkActive(Ijk extent, Ijk l)
 53: {
 54:   if (l.i < extent.i && l.j < extent.j && l.k < extent.k) return PETSC_TRUE;
 55:   return PETSC_FALSE;
 56: }

 58: // If z is not the base of an octet (last 3 bits 0), return 0.
 59: //
 60: // If z is the base of an octet, we recursively grow to the biggest structured octet. This is typically useful when a z
 61: // is outside the domain and we wish to skip a (possibly recursively large) octet to find our next interesting point.
 62: static ZCode ZStepOct(ZCode z)
 63: {
 64:   if (PetscUnlikely(z == 0)) return 0; // Infinite loop below if z == 0
 65:   ZCode step = 07;
 66:   for (; (z & step) == 0; step = (step << 3) | 07) { }
 67:   return step >> 3;
 68: }

 70: // Since element/vertex box extents are typically not equal powers of 2, Z codes that lie within the domain are not contiguous.
 71: static PetscErrorCode ZLayoutCreate(PetscMPIInt size, const PetscInt eextent[3], const PetscInt vextent[3], ZLayout *layout)
 72: {
 73:   PetscFunctionBegin;
 74:   layout->eextent.i = eextent[0];
 75:   layout->eextent.j = eextent[1];
 76:   layout->eextent.k = eextent[2];
 77:   layout->vextent.i = vextent[0];
 78:   layout->vextent.j = vextent[1];
 79:   layout->vextent.k = vextent[2];
 80:   layout->comm_size = size;
 81:   layout->zstarts   = NULL;
 82:   PetscCall(PetscMalloc1(size + 1, &layout->zstarts));

 84:   PetscInt total_elems = eextent[0] * eextent[1] * eextent[2];
 85:   ZCode    z           = 0;
 86:   layout->zstarts[0]   = 0;
 87:   // This loop traverses all vertices in the global domain, so is worth making fast. We use ZStepBound
 88:   for (PetscMPIInt r = 0; r < size; r++) {
 89:     PetscInt elems_needed = (total_elems / size) + (total_elems % size > r), count;
 90:     for (count = 0; count < elems_needed; z++) {
 91:       ZCode skip = ZStepOct(z); // optimistically attempt a longer step
 92:       for (ZCode s = skip;; s >>= 3) {
 93:         Ijk trial = ZCodeSplit(z + s);
 94:         if (IjkActive(layout->eextent, trial)) {
 95:           while (count + s + 1 > (ZCode)elems_needed) s >>= 3; // Shrink the octet
 96:           count += s + 1;
 97:           z += s;
 98:           break;
 99:         }
100:         if (s == 0) { // the whole skip octet is inactive
101:           z += skip;
102:           break;
103:         }
104:       }
105:     }
106:     // Pick up any extra vertices in the Z ordering before the next rank's first owned element.
107:     //
108:     // This leads to poorly balanced vertices when eextent is a power of 2, since all the fringe vertices end up
109:     // on the last rank. A possible solution is to balance the Z-order vertices independently from the cells, which will
110:     // result in a lot of element closures being remote. We could finish marking boundary conditions, then do a round of
111:     // vertex ownership smoothing (which would reorder and redistribute vertices without touching element distribution).
112:     // Another would be to have an analytic ownership criteria for vertices in the fringe veextent - eextent. This would
113:     // complicate the job of identifying an owner and its offset.
114:     //
115:     // The current recommended approach is to let `-dm_distribute 1` (default) resolve vertex ownership. This is
116:     // *mandatory* with isoperiodicity (except in special cases) to remove standed vertices from local spaces. Here's
117:     // the issue:
118:     //
119:     // Consider this partition on rank 0 (left) and rank 1.
120:     //
121:     //    4 --------  5 -- 14 --10 -- 21 --11
122:     //                |          |          |
123:     // 7 -- 16 --  8  |          |          |
124:     // |           |  3 -------  7 -------  9
125:     // |           |             |          |
126:     // 4 --------  6 ------ 10   |          |
127:     // |           |         |   6 -- 16 -- 8
128:     // |           |         |
129:     // 3 ---11---  5 --18--  9
130:     //
131:     // The periodic face SF looks like
132:     // [0] Number of roots=21, leaves=1, remote ranks=1
133:     // [0] 16 <- (0,11)
134:     // [1] Number of roots=22, leaves=2, remote ranks=2
135:     // [1] 14 <- (0,18)
136:     // [1] 21 <- (1,16)
137:     //
138:     // In handling face (0,16), rank 0 learns that (0,7) and (0,8) map to (0,3) and (0,5) respectively, thus we won't use
139:     // the point SF links to (1,4) and (1,5). Rank 1 learns about the periodic mapping of (1,5) while handling face
140:     // (1,14), but never learns that vertex (1,4) has been mapped to (0,3) by face (0,16).
141:     //
142:     // We can relatively easily inform vertex (1,4) of this mapping, but it stays in rank 1's local space despite not
143:     // being in the closure and thus not being contributed to. This would be mostly harmless except that some viewer
144:     // routines expect all local points to be somehow significant. It is not easy to analytically remove the (1,4)
145:     // vertex because the point SF and isoperiodic face SF would need to be updated to account for removal of the
146:     // stranded vertices.
147:     for (; z <= ZEncode(layout->vextent); z++) {
148:       Ijk loc = ZCodeSplit(z);
149:       if (IjkActive(layout->eextent, loc)) break;
150:       z += ZStepOct(z);
151:     }
152:     layout->zstarts[r + 1] = z;
153:   }
154:   layout->zstarts[size] = ZEncode(layout->vextent);
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }

158: static PetscInt ZLayoutElementsOnRank(const ZLayout *layout, PetscMPIInt rank)
159: {
160:   PetscInt remote_elem = 0;
161:   for (ZCode rz = layout->zstarts[rank]; rz < layout->zstarts[rank + 1]; rz++) {
162:     Ijk loc = ZCodeSplit(rz);
163:     if (IjkActive(layout->eextent, loc)) remote_elem++;
164:     else rz += ZStepOct(rz);
165:   }
166:   return remote_elem;
167: }

169: static PetscInt ZCodeFind(ZCode key, PetscInt n, const ZCode X[])
170: {
171:   PetscInt lo = 0, hi = n;

173:   if (n == 0) return -1;
174:   while (hi - lo > 1) {
175:     PetscInt mid = lo + (hi - lo) / 2;
176:     if (key < X[mid]) hi = mid;
177:     else lo = mid;
178:   }
179:   return key == X[lo] ? lo : -(lo + (key > X[lo]) + 1);
180: }

182: static PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(DM dm, const ZLayout *layout, const ZCode *vert_z, PetscSegBuffer per_faces[3], const PetscReal *lower, const PetscReal *upper, const DMBoundaryType *periodicity, PetscSegBuffer donor_face_closure[3], PetscSegBuffer my_donor_faces[3])
183: {
184:   MPI_Comm    comm;
185:   PetscInt    dim, vStart, vEnd;
186:   PetscMPIInt size;
187:   PetscSF     face_sfs[3];
188:   PetscScalar transforms[3][4][4] = {{{0}}};

190:   PetscFunctionBegin;
191:   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
192:   PetscCallMPI(MPI_Comm_size(comm, &size));
193:   PetscCall(DMGetDimension(dm, &dim));
194:   const PetscInt csize = PetscPowInt(2, dim - 1);
195:   PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));

197:   PetscInt num_directions = 0;
198:   for (PetscInt direction = 0; direction < dim; direction++) {
199:     PetscCount   num_faces;
200:     PetscInt    *faces;
201:     ZCode       *donor_verts, *donor_minz;
202:     PetscSFNode *leaf;
203:     PetscCount   num_multiroots = 0;
204:     PetscInt     pStart, pEnd;
205:     PetscBool    sorted;
206:     PetscInt     inum_faces;

208:     if (periodicity[direction] != DM_BOUNDARY_PERIODIC) continue;
209:     PetscCall(PetscSegBufferGetSize(per_faces[direction], &num_faces));
210:     PetscCall(PetscSegBufferExtractInPlace(per_faces[direction], &faces));
211:     PetscCall(PetscSegBufferExtractInPlace(donor_face_closure[direction], &donor_verts));
212:     PetscCall(PetscMalloc1(num_faces, &donor_minz));
213:     PetscCall(PetscMalloc1(num_faces, &leaf));
214:     for (PetscCount i = 0; i < num_faces; i++) {
215:       ZCode minz = donor_verts[i * csize];

217:       for (PetscInt j = 1; j < csize; j++) minz = PetscMin(minz, donor_verts[i * csize + j]);
218:       donor_minz[i] = minz;
219:     }
220:     PetscCall(PetscIntCast(num_faces, &inum_faces));
221:     PetscCall(PetscSortedInt64(inum_faces, (const PetscInt64 *)donor_minz, &sorted));
222:     // If a donor vertex were chosen to broker multiple faces, we would have a logic error.
223:     // Checking for sorting is a cheap check that there are no duplicates.
224:     PetscCheck(sorted, PETSC_COMM_SELF, PETSC_ERR_PLIB, "minz not sorted; possible duplicates not checked");
225:     for (PetscCount i = 0; i < num_faces;) {
226:       ZCode       z           = donor_minz[i];
227:       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout->zstarts), remote_count = 0;

229:       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
230:       // Process all the vertices on this rank
231:       for (ZCode rz = layout->zstarts[remote_rank]; rz < layout->zstarts[remote_rank + 1]; rz++) {
232:         Ijk loc = ZCodeSplit(rz);

234:         if (rz == z) {
235:           leaf[i].rank  = remote_rank;
236:           leaf[i].index = remote_count;
237:           i++;
238:           if (i == num_faces) break;
239:           z = donor_minz[i];
240:         }
241:         if (IjkActive(layout->vextent, loc)) remote_count++;
242:       }
243:     }
244:     PetscCall(PetscFree(donor_minz));
245:     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &face_sfs[num_directions]));
246:     PetscCall(PetscSFSetGraph(face_sfs[num_directions], vEnd - vStart, inum_faces, NULL, PETSC_USE_POINTER, leaf, PETSC_USE_POINTER));
247:     const PetscInt *my_donor_degree;
248:     PetscCall(PetscSFComputeDegreeBegin(face_sfs[num_directions], &my_donor_degree));
249:     PetscCall(PetscSFComputeDegreeEnd(face_sfs[num_directions], &my_donor_degree));

251:     for (PetscInt i = 0; i < vEnd - vStart; i++) {
252:       num_multiroots += my_donor_degree[i];
253:       if (my_donor_degree[i] == 0) continue;
254:       PetscAssert(my_donor_degree[i] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex has multiple faces");
255:     }
256:     PetscInt  *my_donors, *donor_indices, *my_donor_indices;
257:     PetscCount num_my_donors;

259:     PetscCall(PetscSegBufferGetSize(my_donor_faces[direction], &num_my_donors));
260:     PetscCheck(num_my_donors == num_multiroots, PETSC_COMM_SELF, PETSC_ERR_SUP, "Donor request does not match expected donors");
261:     PetscCall(PetscSegBufferExtractInPlace(my_donor_faces[direction], &my_donors));
262:     PetscCall(PetscMalloc1(vEnd - vStart, &my_donor_indices));
263:     for (PetscCount i = 0; i < num_my_donors; i++) {
264:       PetscInt f = my_donors[i];
265:       PetscInt num_points, *points = NULL, minv = PETSC_INT_MAX;

267:       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
268:       for (PetscInt j = 0; j < num_points; j++) {
269:         PetscInt p = points[2 * j];
270:         if (p < vStart || vEnd <= p) continue;
271:         minv = PetscMin(minv, p);
272:       }
273:       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &num_points, &points));
274:       PetscAssert(my_donor_degree[minv - vStart] == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Local vertex not requested");
275:       my_donor_indices[minv - vStart] = f;
276:     }
277:     PetscCall(PetscMalloc1(num_faces, &donor_indices));
278:     PetscCall(PetscSFBcastBegin(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
279:     PetscCall(PetscSFBcastEnd(face_sfs[num_directions], MPIU_INT, my_donor_indices, donor_indices, MPI_REPLACE));
280:     PetscCall(PetscFree(my_donor_indices));
281:     // Modify our leafs so they point to donor faces instead of donor minz. Additionally, give them indices as faces.
282:     for (PetscCount i = 0; i < num_faces; i++) leaf[i].index = donor_indices[i];
283:     PetscCall(PetscFree(donor_indices));
284:     PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
285:     PetscCall(PetscSFSetGraph(face_sfs[num_directions], pEnd - pStart, inum_faces, faces, PETSC_COPY_VALUES, leaf, PETSC_OWN_POINTER));
286:     {
287:       char face_sf_name[PETSC_MAX_PATH_LEN];
288:       PetscCall(PetscSNPrintf(face_sf_name, sizeof face_sf_name, "Z-order Isoperiodic Faces #%" PetscInt_FMT, num_directions));
289:       PetscCall(PetscObjectSetName((PetscObject)face_sfs[num_directions], face_sf_name));
290:     }

292:     transforms[num_directions][0][0]         = 1;
293:     transforms[num_directions][1][1]         = 1;
294:     transforms[num_directions][2][2]         = 1;
295:     transforms[num_directions][3][3]         = 1;
296:     transforms[num_directions][direction][3] = upper[direction] - lower[direction];
297:     num_directions++;
298:   }

300:   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, num_directions, face_sfs));
301:   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, num_directions, (PetscScalar *)transforms));

303:   for (PetscInt i = 0; i < num_directions; i++) PetscCall(PetscSFDestroy(&face_sfs[i]));
304:   PetscFunctionReturn(PETSC_SUCCESS);
305: }

307: // This is a DMGlobalToLocalHook that applies the affine offsets. When extended for rotated periodicity, it'll need to
308: // apply a rotatonal transform and similar operations will be needed for fields (e.g., to rotate a velocity vector).
309: // We use this crude approach here so we don't have to write new GPU kernels yet.
310: static PetscErrorCode DMCoordAddPeriodicOffsets_Private(DM dm, Vec g, InsertMode mode, Vec l, void *ctx)
311: {
312:   PetscFunctionBegin;
313:   // These `VecScatter`s should be merged to improve efficiency; the scatters cannot be overlapped.
314:   for (PetscInt i = 0; i < dm->periodic.num_affines; i++) {
315:     PetscCall(VecScatterBegin(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
316:     PetscCall(VecScatterEnd(dm->periodic.affine_to_local[i], dm->periodic.affine[i], l, ADD_VALUES, SCATTER_FORWARD));
317:   }
318:   PetscFunctionReturn(PETSC_SUCCESS);
319: }

321: // Start with an SF for a positive depth (e.g., faces) and create a new SF for matched closure. The caller must ensure
322: // that both the donor (root) face and the periodic (leaf) face have consistent orientation, meaning that their closures
323: // are isomorphic. It may be useful/necessary for this restriction to be loosened.
324: //
325: // Output Arguments:
326: //
327: // + closure_sf - augmented point SF (see `DMGetPointSF()`) that includes the faces and all points in its closure. This
328: //   can be used to create a global section and section SF.
329: // - is_points - array of index sets for just the points in the closure of `face_sf`. These may be used to apply an affine
330: //   transformation to periodic dofs; see DMPeriodicCoordinateSetUp_Internal().
331: //
332: static PetscErrorCode DMPlexCreateIsoperiodicPointSF_Private(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs, PetscSF *closure_sf, IS **is_points)
333: {
334:   MPI_Comm           comm;
335:   PetscMPIInt        rank;
336:   PetscSF            point_sf;
337:   PetscInt           nroots, nleaves;
338:   const PetscInt    *filocal;
339:   const PetscSFNode *firemote;

341:   PetscFunctionBegin;
342:   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
343:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
344:   PetscCall(DMGetPointSF(dm, &point_sf)); // Point SF has remote points
345:   PetscCall(PetscMalloc1(num_face_sfs, is_points));

347:   for (PetscInt f = 0; f < num_face_sfs; f++) {
348:     PetscSF   face_sf = face_sfs[f];
349:     PetscInt *rootdata, *leafdata;

351:     PetscCall(PetscSFGetGraph(face_sf, &nroots, &nleaves, &filocal, &firemote));
352:     PetscCall(PetscCalloc2(2 * nroots, &rootdata, 2 * nroots, &leafdata));
353:     for (PetscInt i = 0; i < nleaves; i++) {
354:       PetscInt point = filocal[i], cl_size, *closure = NULL;
355:       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
356:       leafdata[point] = cl_size - 1;
357:       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
358:     }
359:     PetscCall(PetscSFReduceBegin(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));
360:     PetscCall(PetscSFReduceEnd(face_sf, MPIU_INT, leafdata, rootdata + nroots, MPIU_SUM));

362:     PetscInt root_offset = 0;
363:     for (PetscInt p = 0; p < nroots; p++) {
364:       const PetscInt *donor_dof = rootdata + nroots;
365:       if (donor_dof[p] == 0) {
366:         rootdata[2 * p]     = -1;
367:         rootdata[2 * p + 1] = -1;
368:         continue;
369:       }
370:       PetscInt  cl_size;
371:       PetscInt *closure = NULL;
372:       PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
373:       // cl_size - 1 = points not including self
374:       PetscAssert(donor_dof[p] == cl_size - 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Reduced leaf cone sizes do not match root cone sizes");
375:       rootdata[2 * p]     = root_offset;
376:       rootdata[2 * p + 1] = cl_size - 1;
377:       root_offset += cl_size - 1;
378:       PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
379:     }
380:     PetscCall(PetscSFBcastBegin(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
381:     PetscCall(PetscSFBcastEnd(face_sf, MPIU_2INT, rootdata, leafdata, MPI_REPLACE));
382:     // Count how many leaves we need to communicate the closures
383:     PetscInt leaf_offset = 0;
384:     for (PetscInt i = 0; i < nleaves; i++) {
385:       PetscInt point = filocal[i];
386:       if (leafdata[2 * point + 1] < 0) continue;
387:       leaf_offset += leafdata[2 * point + 1];
388:     }

390:     PetscSFNode *closure_leaf;
391:     PetscCall(PetscMalloc1(leaf_offset, &closure_leaf));
392:     leaf_offset = 0;
393:     for (PetscInt i = 0; i < nleaves; i++) {
394:       PetscInt point   = filocal[i];
395:       PetscInt cl_size = leafdata[2 * point + 1];
396:       if (cl_size < 0) continue;
397:       for (PetscInt j = 0; j < cl_size; j++) {
398:         closure_leaf[leaf_offset].rank  = firemote[i].rank;
399:         closure_leaf[leaf_offset].index = leafdata[2 * point] + j;
400:         leaf_offset++;
401:       }
402:     }

404:     PetscSF sf_closure;
405:     PetscCall(PetscSFCreate(comm, &sf_closure));
406:     PetscCall(PetscSFSetGraph(sf_closure, root_offset, leaf_offset, NULL, PETSC_USE_POINTER, closure_leaf, PETSC_OWN_POINTER));

408:     PetscSFNode *leaf_donor_closure;
409:     { // Pack root buffer with owner for every point in the root cones
410:       PetscSFNode       *donor_closure;
411:       const PetscInt    *pilocal;
412:       const PetscSFNode *piremote;
413:       PetscInt           npoints;

415:       PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, &pilocal, &piremote));
416:       PetscCall(PetscCalloc1(root_offset, &donor_closure));
417:       root_offset = 0;
418:       for (PetscInt p = 0; p < nroots; p++) {
419:         if (rootdata[2 * p] < 0) continue;
420:         PetscInt  cl_size;
421:         PetscInt *closure = NULL;
422:         PetscCall(DMPlexGetTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
423:         for (PetscInt j = 1; j < cl_size; j++) {
424:           PetscInt c = closure[2 * j];
425:           if (pilocal) {
426:             PetscInt found = -1;
427:             if (npoints > 0) PetscCall(PetscFindInt(c, npoints, pilocal, &found));
428:             if (found >= 0) {
429:               donor_closure[root_offset++] = piremote[found];
430:               continue;
431:             }
432:           }
433:           // we own c
434:           donor_closure[root_offset].rank  = rank;
435:           donor_closure[root_offset].index = c;
436:           root_offset++;
437:         }
438:         PetscCall(DMPlexRestoreTransitiveClosure(dm, p, PETSC_TRUE, &cl_size, &closure));
439:       }

441:       PetscCall(PetscMalloc1(leaf_offset, &leaf_donor_closure));
442:       PetscCall(PetscSFBcastBegin(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
443:       PetscCall(PetscSFBcastEnd(sf_closure, MPIU_SF_NODE, donor_closure, leaf_donor_closure, MPI_REPLACE));
444:       PetscCall(PetscSFDestroy(&sf_closure));
445:       PetscCall(PetscFree(donor_closure));
446:     }

448:     PetscSFNode *new_iremote;
449:     PetscCall(PetscCalloc1(nroots, &new_iremote));
450:     for (PetscInt i = 0; i < nroots; i++) new_iremote[i].rank = -1;
451:     // Walk leaves and match vertices
452:     leaf_offset = 0;
453:     for (PetscInt i = 0; i < nleaves; i++) {
454:       PetscInt  point   = filocal[i], cl_size;
455:       PetscInt *closure = NULL;
456:       PetscCall(DMPlexGetTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
457:       for (PetscInt j = 1; j < cl_size; j++) { // TODO: should we send donor edge orientations so we can flip for consistency?
458:         PetscInt    c  = closure[2 * j];
459:         PetscSFNode lc = leaf_donor_closure[leaf_offset];
460:         // printf("[%d] face %d.%d: %d ?-- (%d,%d)\n", rank, point, j, c, lc.rank, lc.index);
461:         if (new_iremote[c].rank == -1) {
462:           new_iremote[c] = lc;
463:         } else PetscCheck(new_iremote[c].rank == lc.rank && new_iremote[c].index == lc.index, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mismatched cone ordering between faces");
464:         leaf_offset++;
465:       }
466:       PetscCall(DMPlexRestoreTransitiveClosure(dm, point, PETSC_TRUE, &cl_size, &closure));
467:     }
468:     PetscCall(PetscFree(leaf_donor_closure));

470:     // Include face points in closure SF
471:     for (PetscInt i = 0; i < nleaves; i++) new_iremote[filocal[i]] = firemote[i];
472:     // consolidate leaves
473:     PetscInt num_new_leaves = 0;
474:     for (PetscInt i = 0; i < nroots; i++) {
475:       if (new_iremote[i].rank == -1) continue;
476:       new_iremote[num_new_leaves] = new_iremote[i];
477:       leafdata[num_new_leaves]    = i;
478:       num_new_leaves++;
479:     }
480:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, num_new_leaves, leafdata, PETSC_COPY_VALUES, &(*is_points)[f]));

482:     PetscSF csf;
483:     PetscCall(PetscSFCreate(comm, &csf));
484:     PetscCall(PetscSFSetGraph(csf, nroots, num_new_leaves, leafdata, PETSC_COPY_VALUES, new_iremote, PETSC_COPY_VALUES));
485:     PetscCall(PetscFree(new_iremote)); // copy and delete because new_iremote is longer than it needs to be
486:     PetscCall(PetscFree2(rootdata, leafdata));

488:     PetscInt npoints;
489:     PetscCall(PetscSFGetGraph(point_sf, NULL, &npoints, NULL, NULL));
490:     if (npoints < 0) { // empty point_sf
491:       *closure_sf = csf;
492:     } else {
493:       PetscCall(PetscSFMerge(point_sf, csf, closure_sf));
494:       PetscCall(PetscSFDestroy(&csf));
495:     }
496:     if (f > 0) PetscCall(PetscSFDestroy(&point_sf)); // Only destroy if point_sf is from previous calls to PetscSFMerge
497:     point_sf = *closure_sf;                          // Use combined point + isoperiodic SF to define point ownership for further face_sf
498:   }
499:   PetscCall(PetscObjectSetName((PetscObject)*closure_sf, "Composed Periodic Points"));
500:   PetscFunctionReturn(PETSC_SUCCESS);
501: }

503: static PetscErrorCode DMGetIsoperiodicPointSF_Plex(DM dm, PetscSF *sf)
504: {
505:   DM_Plex *plex = (DM_Plex *)dm->data;

507:   PetscFunctionBegin;
508:   if (!plex->periodic.composed_sf) PetscCall(DMPlexCreateIsoperiodicPointSF_Private(dm, plex->periodic.num_face_sfs, plex->periodic.face_sfs, &plex->periodic.composed_sf, &plex->periodic.periodic_points));
509:   if (sf) *sf = plex->periodic.composed_sf;
510:   PetscFunctionReturn(PETSC_SUCCESS);
511: }

513: PetscErrorCode DMPlexMigrateIsoperiodicFaceSF_Internal(DM old_dm, DM dm, PetscSF sf_migration)
514: {
515:   DM_Plex    *plex = (DM_Plex *)old_dm->data;
516:   PetscSF     sf_point, *new_face_sfs;
517:   PetscMPIInt rank;

519:   PetscFunctionBegin;
520:   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
521:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
522:   PetscCall(DMGetPointSF(dm, &sf_point));
523:   PetscCall(PetscMalloc1(plex->periodic.num_face_sfs, &new_face_sfs));

525:   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
526:     PetscInt           old_npoints, new_npoints, old_nleaf, new_nleaf, point_nleaf;
527:     PetscSFNode       *new_leafdata, *rootdata, *leafdata;
528:     const PetscInt    *old_local, *point_local;
529:     const PetscSFNode *old_remote, *point_remote;

531:     PetscCall(PetscSFGetGraph(plex->periodic.face_sfs[f], &old_npoints, &old_nleaf, &old_local, &old_remote));
532:     PetscCall(PetscSFGetGraph(sf_migration, NULL, &new_nleaf, NULL, NULL));
533:     PetscCall(PetscSFGetGraph(sf_point, &new_npoints, &point_nleaf, &point_local, &point_remote));
534:     PetscAssert(new_nleaf == new_npoints, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected migration leaf space to match new point root space");
535:     PetscCall(PetscMalloc3(old_npoints, &rootdata, old_npoints, &leafdata, new_npoints, &new_leafdata));

537:     // Fill new_leafdata with new owners of all points
538:     for (PetscInt i = 0; i < new_npoints; i++) {
539:       new_leafdata[i].rank  = rank;
540:       new_leafdata[i].index = i;
541:     }
542:     for (PetscInt i = 0; i < point_nleaf; i++) {
543:       PetscInt j      = point_local[i];
544:       new_leafdata[j] = point_remote[i];
545:     }
546:     // REPLACE is okay because every leaf agrees about the new owners
547:     PetscCall(PetscSFReduceBegin(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
548:     PetscCall(PetscSFReduceEnd(sf_migration, MPIU_SF_NODE, new_leafdata, rootdata, MPI_REPLACE));
549:     // rootdata now contains the new owners

551:     // Send to leaves of old space
552:     for (PetscInt i = 0; i < old_npoints; i++) {
553:       leafdata[i].rank  = -1;
554:       leafdata[i].index = -1;
555:     }
556:     PetscCall(PetscSFBcastBegin(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));
557:     PetscCall(PetscSFBcastEnd(plex->periodic.face_sfs[f], MPIU_SF_NODE, rootdata, leafdata, MPI_REPLACE));

559:     // Send to new leaf space
560:     PetscCall(PetscSFBcastBegin(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));
561:     PetscCall(PetscSFBcastEnd(sf_migration, MPIU_SF_NODE, leafdata, new_leafdata, MPI_REPLACE));

563:     PetscInt     nface = 0, *new_local;
564:     PetscSFNode *new_remote;
565:     for (PetscInt i = 0; i < new_npoints; i++) nface += (new_leafdata[i].rank >= 0);
566:     PetscCall(PetscMalloc1(nface, &new_local));
567:     PetscCall(PetscMalloc1(nface, &new_remote));
568:     nface = 0;
569:     for (PetscInt i = 0; i < new_npoints; i++) {
570:       if (new_leafdata[i].rank == -1) continue;
571:       new_local[nface]  = i;
572:       new_remote[nface] = new_leafdata[i];
573:       nface++;
574:     }
575:     PetscCall(PetscFree3(rootdata, leafdata, new_leafdata));
576:     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &new_face_sfs[f]));
577:     PetscCall(PetscSFSetGraph(new_face_sfs[f], new_npoints, nface, new_local, PETSC_OWN_POINTER, new_remote, PETSC_OWN_POINTER));
578:     {
579:       char new_face_sf_name[PETSC_MAX_PATH_LEN];
580:       PetscCall(PetscSNPrintf(new_face_sf_name, sizeof new_face_sf_name, "Migrated Isoperiodic Faces #%" PetscInt_FMT, f));
581:       PetscCall(PetscObjectSetName((PetscObject)new_face_sfs[f], new_face_sf_name));
582:     }
583:   }

585:   PetscCall(DMPlexSetIsoperiodicFaceSF(dm, plex->periodic.num_face_sfs, new_face_sfs));
586:   PetscCall(DMPlexSetIsoperiodicFaceTransform(dm, plex->periodic.num_face_sfs, (PetscScalar *)plex->periodic.transform));
587:   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) PetscCall(PetscSFDestroy(&new_face_sfs[f]));
588:   PetscCall(PetscFree(new_face_sfs));
589:   PetscFunctionReturn(PETSC_SUCCESS);
590: }

592: PetscErrorCode DMPeriodicCoordinateSetUp_Internal(DM dm)
593: {
594:   DM_Plex   *plex = (DM_Plex *)dm->data;
595:   PetscCount count;
596:   IS         isdof;
597:   PetscInt   dim;

599:   PetscFunctionBegin;
600:   if (!plex->periodic.face_sfs) PetscFunctionReturn(PETSC_SUCCESS);
601:   PetscCall(DMGetIsoperiodicPointSF_Plex(dm, NULL));
602:   PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));

604:   PetscCall(DMGetDimension(dm, &dim));
605:   dm->periodic.num_affines = plex->periodic.num_face_sfs;
606:   PetscCall(PetscMalloc2(dm->periodic.num_affines, &dm->periodic.affine_to_local, dm->periodic.num_affines, &dm->periodic.affine));

608:   for (PetscInt f = 0; f < plex->periodic.num_face_sfs; f++) {
609:     PetscInt        npoints, vsize, isize;
610:     const PetscInt *points;
611:     IS              is = plex->periodic.periodic_points[f];
612:     PetscSegBuffer  seg;
613:     PetscSection    section;
614:     PetscInt       *ind;
615:     Vec             L, P;
616:     VecType         vec_type;
617:     VecScatter      scatter;
618:     PetscScalar    *x;

620:     PetscCall(DMGetLocalSection(dm, &section));
621:     PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 32, &seg));
622:     PetscCall(ISGetSize(is, &npoints));
623:     PetscCall(ISGetIndices(is, &points));
624:     for (PetscInt i = 0; i < npoints; i++) {
625:       PetscInt point = points[i], off, dof;

627:       PetscCall(PetscSectionGetOffset(section, point, &off));
628:       PetscCall(PetscSectionGetDof(section, point, &dof));
629:       PetscAssert(dof % dim == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected dof %" PetscInt_FMT " not divisible by dimension %" PetscInt_FMT, dof, dim);
630:       for (PetscInt j = 0; j < dof / dim; j++) {
631:         PetscInt *slot;

633:         PetscCall(PetscSegBufferGetInts(seg, 1, &slot));
634:         *slot = off / dim + j;
635:       }
636:     }
637:     PetscCall(PetscSegBufferGetSize(seg, &count));
638:     PetscCall(PetscSegBufferExtractAlloc(seg, &ind));
639:     PetscCall(PetscSegBufferDestroy(&seg));
640:     PetscCall(PetscIntCast(count, &isize));
641:     PetscCall(ISCreateBlock(PETSC_COMM_SELF, dim, isize, ind, PETSC_OWN_POINTER, &isdof));

643:     PetscCall(PetscIntCast(count * dim, &vsize));
644:     PetscCall(DMGetLocalVector(dm, &L));
645:     PetscCall(VecCreate(PETSC_COMM_SELF, &P));
646:     PetscCall(VecSetSizes(P, vsize, vsize));
647:     PetscCall(VecGetType(L, &vec_type));
648:     PetscCall(VecSetType(P, vec_type));
649:     PetscCall(VecScatterCreate(P, NULL, L, isdof, &scatter));
650:     PetscCall(DMRestoreLocalVector(dm, &L));
651:     PetscCall(ISDestroy(&isdof));

653:     PetscCall(VecGetArrayWrite(P, &x));
654:     for (PetscCount i = 0; i < count; i++) {
655:       for (PetscInt j = 0; j < dim; j++) x[i * dim + j] = plex->periodic.transform[f][j][3];
656:     }
657:     PetscCall(VecRestoreArrayWrite(P, &x));

659:     dm->periodic.affine_to_local[f] = scatter;
660:     dm->periodic.affine[f]          = P;
661:   }
662:   PetscCall(DMGlobalToLocalHookAdd(dm, NULL, DMCoordAddPeriodicOffsets_Private, NULL));
663:   PetscFunctionReturn(PETSC_SUCCESS);
664: }

666: // We'll just orient all the edges, though only periodic boundary edges need orientation
667: static PetscErrorCode DMPlexOrientPositiveEdges_Private(DM dm)
668: {
669:   PetscInt dim, eStart, eEnd;

671:   PetscFunctionBegin;
672:   PetscCall(DMGetDimension(dm, &dim));
673:   if (dim < 3) PetscFunctionReturn(PETSC_SUCCESS); // not necessary
674:   PetscCall(DMPlexGetDepthStratum(dm, 1, &eStart, &eEnd));
675:   for (PetscInt e = eStart; e < eEnd; e++) {
676:     const PetscInt *cone;
677:     PetscCall(DMPlexGetCone(dm, e, &cone));
678:     if (cone[0] > cone[1]) PetscCall(DMPlexOrientPoint(dm, e, -1));
679:   }
680:   PetscFunctionReturn(PETSC_SUCCESS);
681: }

683: PetscErrorCode DMPlexCreateBoxMesh_Tensor_SFC_Internal(DM dm, PetscInt dim, const PetscInt faces[], const PetscReal lower[], const PetscReal upper[], const DMBoundaryType periodicity[], PetscBool interpolate)
684: {
685:   PetscInt  eextent[3] = {1, 1, 1}, vextent[3] = {1, 1, 1};
686:   const Ijk closure_1[] = {
687:     {0, 0, 0},
688:     {1, 0, 0},
689:   };
690:   const Ijk closure_2[] = {
691:     {0, 0, 0},
692:     {1, 0, 0},
693:     {1, 1, 0},
694:     {0, 1, 0},
695:   };
696:   const Ijk closure_3[] = {
697:     {0, 0, 0},
698:     {0, 1, 0},
699:     {1, 1, 0},
700:     {1, 0, 0},
701:     {0, 0, 1},
702:     {1, 0, 1},
703:     {1, 1, 1},
704:     {0, 1, 1},
705:   };
706:   const Ijk *const closure_dim[] = {NULL, closure_1, closure_2, closure_3};
707:   // This must be kept consistent with DMPlexCreateCubeMesh_Internal
708:   const PetscInt        face_marker_1[]   = {1, 2};
709:   const PetscInt        face_marker_2[]   = {4, 2, 1, 3};
710:   const PetscInt        face_marker_3[]   = {6, 5, 3, 4, 1, 2};
711:   const PetscInt *const face_marker_dim[] = {NULL, face_marker_1, face_marker_2, face_marker_3};
712:   // Orient faces so the normal is in the positive axis and the first vertex is the one closest to zero.
713:   // These orientations can be determined by examining cones of a reference quad and hex element.
714:   const PetscInt        face_orient_1[]   = {0, 0};
715:   const PetscInt        face_orient_2[]   = {-1, 0, 0, -1};
716:   const PetscInt        face_orient_3[]   = {-2, 0, -2, 1, -2, 0};
717:   const PetscInt *const face_orient_dim[] = {NULL, face_orient_1, face_orient_2, face_orient_3};

719:   PetscFunctionBegin;
720:   PetscAssertPointer(dm, 1);
722:   PetscCall(DMSetDimension(dm, dim));
723:   PetscMPIInt rank, size;
724:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
725:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
726:   for (PetscInt i = 0; i < dim; i++) {
727:     eextent[i] = faces[i];
728:     vextent[i] = faces[i] + 1;
729:   }
730:   ZLayout layout;
731:   PetscCall(ZLayoutCreate(size, eextent, vextent, &layout));
732:   PetscZSet vset; // set of all vertices in the closure of the owned elements
733:   PetscCall(PetscZSetCreate(&vset));
734:   PetscInt local_elems = 0;
735:   for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
736:     Ijk loc = ZCodeSplit(z);
737:     if (IjkActive(layout.vextent, loc)) PetscCall(PetscZSetAdd(vset, z));
738:     else {
739:       z += ZStepOct(z);
740:       continue;
741:     }
742:     if (IjkActive(layout.eextent, loc)) {
743:       local_elems++;
744:       // Add all neighboring vertices to set
745:       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
746:         Ijk   inc  = closure_dim[dim][n];
747:         Ijk   nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
748:         ZCode v    = ZEncode(nloc);
749:         PetscCall(PetscZSetAdd(vset, v));
750:       }
751:     }
752:   }
753:   PetscInt local_verts, off = 0;
754:   ZCode   *vert_z;
755:   PetscCall(PetscZSetGetSize(vset, &local_verts));
756:   PetscCall(PetscMalloc1(local_verts, &vert_z));
757:   PetscCall(PetscZSetGetElems(vset, &off, vert_z));
758:   PetscCall(PetscZSetDestroy(&vset));
759:   // ZCode is unsigned for bitwise convenience, but highest bit should never be set, so can interpret as signed
760:   PetscCall(PetscSortInt64(local_verts, (PetscInt64 *)vert_z));

762:   PetscCall(DMPlexSetChart(dm, 0, local_elems + local_verts));
763:   for (PetscInt e = 0; e < local_elems; e++) PetscCall(DMPlexSetConeSize(dm, e, PetscPowInt(2, dim)));
764:   PetscCall(DMSetUp(dm));
765:   {
766:     PetscInt e = 0;
767:     for (ZCode z = layout.zstarts[rank]; z < layout.zstarts[rank + 1]; z++) {
768:       Ijk loc = ZCodeSplit(z);
769:       if (!IjkActive(layout.eextent, loc)) {
770:         z += ZStepOct(z);
771:         continue;
772:       }
773:       PetscInt cone[8], orient[8] = {0};
774:       for (PetscInt n = 0; n < PetscPowInt(2, dim); n++) {
775:         Ijk      inc  = closure_dim[dim][n];
776:         Ijk      nloc = {loc.i + inc.i, loc.j + inc.j, loc.k + inc.k};
777:         ZCode    v    = ZEncode(nloc);
778:         PetscInt ci   = ZCodeFind(v, local_verts, vert_z);
779:         PetscAssert(ci >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Did not find neighbor vertex in set");
780:         cone[n] = local_elems + ci;
781:       }
782:       PetscCall(DMPlexSetCone(dm, e, cone));
783:       PetscCall(DMPlexSetConeOrientation(dm, e, orient));
784:       e++;
785:     }
786:   }

788:   PetscCall(DMPlexSymmetrize(dm));
789:   PetscCall(DMPlexStratify(dm));

791:   { // Create point SF
792:     PetscSF sf;
793:     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)dm), &sf));
794:     PetscInt owned_verts = ZCodeFind(layout.zstarts[rank + 1], local_verts, vert_z);
795:     if (owned_verts < 0) owned_verts = -(owned_verts + 1); // We don't care whether the key was found
796:     PetscInt     num_ghosts = local_verts - owned_verts;   // Due to sorting, owned vertices always come first
797:     PetscInt    *local_ghosts;
798:     PetscSFNode *ghosts;
799:     PetscCall(PetscMalloc1(num_ghosts, &local_ghosts));
800:     PetscCall(PetscMalloc1(num_ghosts, &ghosts));
801:     for (PetscInt i = 0; i < num_ghosts;) {
802:       ZCode       z           = vert_z[owned_verts + i];
803:       PetscMPIInt remote_rank = (PetscMPIInt)ZCodeFind(z, size + 1, layout.zstarts), remote_count = 0;
804:       if (remote_rank < 0) remote_rank = -(remote_rank + 1) - 1;
805:       // We have a new remote rank; find all the ghost indices (which are contiguous in vert_z)

807:       // Count the elements on remote_rank
808:       PetscInt remote_elem = ZLayoutElementsOnRank(&layout, remote_rank);

810:       // Traverse vertices and make ghost links
811:       for (ZCode rz = layout.zstarts[remote_rank]; rz < layout.zstarts[remote_rank + 1]; rz++) {
812:         Ijk loc = ZCodeSplit(rz);
813:         if (rz == z) {
814:           local_ghosts[i] = local_elems + owned_verts + i;
815:           ghosts[i].rank  = remote_rank;
816:           ghosts[i].index = remote_elem + remote_count;
817:           i++;
818:           if (i == num_ghosts) break;
819:           z = vert_z[owned_verts + i];
820:         }
821:         if (IjkActive(layout.vextent, loc)) remote_count++;
822:         else rz += ZStepOct(rz);
823:       }
824:     }
825:     PetscCall(PetscSFSetGraph(sf, local_elems + local_verts, num_ghosts, local_ghosts, PETSC_OWN_POINTER, ghosts, PETSC_OWN_POINTER));
826:     PetscCall(PetscObjectSetName((PetscObject)sf, "SFC Point SF"));
827:     PetscCall(DMSetPointSF(dm, sf));
828:     PetscCall(PetscSFDestroy(&sf));
829:   }
830:   {
831:     Vec          coordinates;
832:     PetscScalar *coords;
833:     PetscSection coord_section;
834:     PetscInt     coord_size;
835:     PetscCall(DMGetCoordinateSection(dm, &coord_section));
836:     PetscCall(PetscSectionSetNumFields(coord_section, 1));
837:     PetscCall(PetscSectionSetFieldComponents(coord_section, 0, dim));
838:     PetscCall(PetscSectionSetChart(coord_section, local_elems, local_elems + local_verts));
839:     for (PetscInt v = 0; v < local_verts; v++) {
840:       PetscInt point = local_elems + v;
841:       PetscCall(PetscSectionSetDof(coord_section, point, dim));
842:       PetscCall(PetscSectionSetFieldDof(coord_section, point, 0, dim));
843:     }
844:     PetscCall(PetscSectionSetUp(coord_section));
845:     PetscCall(PetscSectionGetStorageSize(coord_section, &coord_size));
846:     PetscCall(VecCreate(PETSC_COMM_SELF, &coordinates));
847:     PetscCall(PetscObjectSetName((PetscObject)coordinates, "coordinates"));
848:     PetscCall(VecSetSizes(coordinates, coord_size, PETSC_DETERMINE));
849:     PetscCall(VecSetBlockSize(coordinates, dim));
850:     PetscCall(VecSetType(coordinates, VECSTANDARD));
851:     PetscCall(VecGetArray(coordinates, &coords));
852:     for (PetscInt v = 0; v < local_verts; v++) {
853:       Ijk loc             = ZCodeSplit(vert_z[v]);
854:       coords[v * dim + 0] = lower[0] + loc.i * (upper[0] - lower[0]) / layout.eextent.i;
855:       if (dim > 1) coords[v * dim + 1] = lower[1] + loc.j * (upper[1] - lower[1]) / layout.eextent.j;
856:       if (dim > 2) coords[v * dim + 2] = lower[2] + loc.k * (upper[2] - lower[2]) / layout.eextent.k;
857:     }
858:     PetscCall(VecRestoreArray(coordinates, &coords));
859:     PetscCall(DMSetCoordinatesLocal(dm, coordinates));
860:     PetscCall(VecDestroy(&coordinates));
861:   }
862:   if (interpolate) {
863:     PetscCall(DMPlexInterpolateInPlace_Internal(dm));
864:     // It's currently necessary to orient the donor and periodic edges consistently. An easy way to ensure that is ot
865:     // give all edges positive orientation. Since vertices are created in Z-order, all ranks will agree about the
866:     // ordering cone[0] < cone[1]. This is overkill and it would be nice to remove this preparation and make
867:     // DMPlexCreateIsoperiodicClosureSF_Private() more resilient, so it fixes any inconsistent orientations. That might
868:     // be needed in a general CGNS reader, for example.
869:     PetscCall(DMPlexOrientPositiveEdges_Private(dm));

871:     DMLabel label;
872:     PetscCall(DMCreateLabel(dm, "Face Sets"));
873:     PetscCall(DMGetLabel(dm, "Face Sets", &label));
874:     PetscSegBuffer per_faces[3], donor_face_closure[3], my_donor_faces[3];
875:     for (PetscInt i = 0; i < 3; i++) {
876:       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &per_faces[i]));
877:       PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 64, &my_donor_faces[i]));
878:       PetscCall(PetscSegBufferCreate(sizeof(ZCode), 64 * PetscPowInt(2, dim), &donor_face_closure[i]));
879:     }
880:     PetscInt fStart, fEnd, vStart, vEnd;
881:     PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
882:     PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
883:     for (PetscInt f = fStart; f < fEnd; f++) {
884:       PetscInt npoints, *points = NULL, num_fverts = 0, fverts[8];
885:       PetscCall(DMPlexGetTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
886:       PetscInt bc_count[6] = {0};
887:       for (PetscInt i = 0; i < npoints; i++) {
888:         PetscInt p = points[2 * i];
889:         if (p < vStart || vEnd <= p) continue;
890:         fverts[num_fverts++] = p;
891:         Ijk loc              = ZCodeSplit(vert_z[p - vStart]);
892:         // Convention here matches DMPlexCreateCubeMesh_Internal
893:         bc_count[0] += loc.i == 0;
894:         bc_count[1] += loc.i == layout.vextent.i - 1;
895:         bc_count[2] += loc.j == 0;
896:         bc_count[3] += loc.j == layout.vextent.j - 1;
897:         bc_count[4] += loc.k == 0;
898:         bc_count[5] += loc.k == layout.vextent.k - 1;
899:       }
900:       PetscCall(DMPlexRestoreTransitiveClosure(dm, f, PETSC_TRUE, &npoints, &points));
901:       for (PetscInt bc = 0, bc_match = 0; bc < 2 * dim; bc++) {
902:         if (bc_count[bc] == PetscPowInt(2, dim - 1)) {
903:           PetscCall(DMPlexOrientPoint(dm, f, face_orient_dim[dim][bc]));
904:           if (periodicity[bc / 2] == DM_BOUNDARY_PERIODIC) {
905:             PetscInt *put;
906:             if (bc % 2 == 0) { // donor face; no label
907:               PetscCall(PetscSegBufferGet(my_donor_faces[bc / 2], 1, &put));
908:               *put = f;
909:             } else { // periodic face
910:               PetscCall(PetscSegBufferGet(per_faces[bc / 2], 1, &put));
911:               *put = f;
912:               ZCode *zput;
913:               PetscCall(PetscSegBufferGet(donor_face_closure[bc / 2], num_fverts, &zput));
914:               for (PetscInt i = 0; i < num_fverts; i++) {
915:                 Ijk loc = ZCodeSplit(vert_z[fverts[i] - vStart]);
916:                 switch (bc / 2) {
917:                 case 0:
918:                   loc.i = 0;
919:                   break;
920:                 case 1:
921:                   loc.j = 0;
922:                   break;
923:                 case 2:
924:                   loc.k = 0;
925:                   break;
926:                 }
927:                 *zput++ = ZEncode(loc);
928:               }
929:             }
930:             continue;
931:           }
932:           PetscAssert(bc_match == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Face matches multiple face sets");
933:           PetscCall(DMLabelSetValue(label, f, face_marker_dim[dim][bc]));
934:           bc_match++;
935:         }
936:       }
937:     }
938:     // Ensure that the Coordinate DM has our new boundary labels
939:     DM cdm;
940:     PetscCall(DMGetCoordinateDM(dm, &cdm));
941:     PetscCall(DMCopyLabels(dm, cdm, PETSC_COPY_VALUES, PETSC_FALSE, DM_COPY_LABELS_FAIL));
942:     if (periodicity[0] == DM_BOUNDARY_PERIODIC || (dim > 1 && periodicity[1] == DM_BOUNDARY_PERIODIC) || (dim > 2 && periodicity[2] == DM_BOUNDARY_PERIODIC)) {
943:       PetscCall(DMPlexCreateBoxMesh_Tensor_SFC_Periodicity_Private(dm, &layout, vert_z, per_faces, lower, upper, periodicity, donor_face_closure, my_donor_faces));
944:     }
945:     for (PetscInt i = 0; i < 3; i++) {
946:       PetscCall(PetscSegBufferDestroy(&per_faces[i]));
947:       PetscCall(PetscSegBufferDestroy(&donor_face_closure[i]));
948:       PetscCall(PetscSegBufferDestroy(&my_donor_faces[i]));
949:     }
950:   }
951:   PetscCall(PetscFree(layout.zstarts));
952:   PetscCall(PetscFree(vert_z));
953:   PetscFunctionReturn(PETSC_SUCCESS);
954: }

956: /*@
957:   DMPlexSetIsoperiodicFaceSF - Express periodicity from an existing mesh

959:   Logically Collective

961:   Input Parameters:
962: + dm           - The `DMPLEX` on which to set periodicity
963: . num_face_sfs - Number of `PetscSF`s in `face_sfs`
964: - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.

966:   Level: advanced

968:   Note:
969:   One can use `-dm_plex_shape zbox` to use this mode of periodicity, wherein the periodic points are distinct both globally
970:   and locally, but are paired when creating a global dof space.

972: .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexGetIsoperiodicFaceSF()`
973: @*/
974: PetscErrorCode DMPlexSetIsoperiodicFaceSF(DM dm, PetscInt num_face_sfs, PetscSF *face_sfs)
975: {
976:   DM_Plex *plex = (DM_Plex *)dm->data;

978:   PetscFunctionBegin;
980:   if (face_sfs) PetscCall(PetscObjectComposeFunction((PetscObject)dm, "DMGetIsoperiodicPointSF_C", DMGetIsoperiodicPointSF_Plex));
981:   if (face_sfs == plex->periodic.face_sfs && num_face_sfs == plex->periodic.num_face_sfs) PetscFunctionReturn(PETSC_SUCCESS);

983:   for (PetscInt i = 0; i < num_face_sfs; i++) PetscCall(PetscObjectReference((PetscObject)face_sfs[i]));

985:   if (plex->periodic.num_face_sfs > 0) {
986:     for (PetscInt i = 0; i < plex->periodic.num_face_sfs; i++) PetscCall(PetscSFDestroy(&plex->periodic.face_sfs[i]));
987:     PetscCall(PetscFree(plex->periodic.face_sfs));
988:   }

990:   plex->periodic.num_face_sfs = num_face_sfs;
991:   PetscCall(PetscCalloc1(num_face_sfs, &plex->periodic.face_sfs));
992:   for (PetscInt i = 0; i < num_face_sfs; i++) plex->periodic.face_sfs[i] = face_sfs[i];

994:   DM cdm = dm->coordinates[0].dm; // Can't DMGetCoordinateDM because it automatically creates one
995:   if (cdm) {
996:     PetscCall(DMPlexSetIsoperiodicFaceSF(cdm, num_face_sfs, face_sfs));
997:     if (face_sfs) cdm->periodic.setup = DMPeriodicCoordinateSetUp_Internal;
998:   }
999:   PetscFunctionReturn(PETSC_SUCCESS);
1000: }

1002: /*@C
1003:   DMPlexGetIsoperiodicFaceSF - Obtain periodicity for a mesh

1005:   Logically Collective

1007:   Input Parameter:
1008: . dm - The `DMPLEX` for which to obtain periodic relation

1010:   Output Parameters:
1011: + num_face_sfs - Number of `PetscSF`s in the array
1012: - face_sfs     - Array of `PetscSF` in which roots are (owned) donor faces and leaves are faces that must be matched to a (possibly remote) donor face.

1014:   Level: advanced

1016: .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1017: @*/
1018: PetscErrorCode DMPlexGetIsoperiodicFaceSF(DM dm, PetscInt *num_face_sfs, const PetscSF **face_sfs)
1019: {
1020:   DM_Plex *plex = (DM_Plex *)dm->data;

1022:   PetscFunctionBegin;
1024:   *face_sfs     = plex->periodic.face_sfs;
1025:   *num_face_sfs = plex->periodic.num_face_sfs;
1026:   PetscFunctionReturn(PETSC_SUCCESS);
1027: }

1029: /*@C
1030:   DMPlexSetIsoperiodicFaceTransform - set geometric transform from donor to periodic points

1032:   Logically Collective

1034:   Input Parameters:
1035: + dm - `DMPLEX` that has been configured with `DMPlexSetIsoperiodicFaceSF()`
1036: . n  - Number of transforms in array
1037: - t  - Array of 4x4 affine transformation basis.

1039:   Level: advanced

1041:   Notes:
1042:   Affine transforms are 4x4 matrices in which the leading 3x3 block expresses a rotation (or identity for no rotation),
1043:   the last column contains a translation vector, and the bottom row is all zero except the last entry, which must always
1044:   be 1. This representation is common in geometric modeling and allows affine transformations to be composed using
1045:   simple matrix multiplication.

1047:   Although the interface accepts a general affine transform, only affine translation is supported at present.

1049:   Developer Notes:
1050:   This interface should be replaced by making BasisTransform public, expanding it to support affine representations, and
1051:   adding GPU implementations to apply the G2L/L2G transforms.

1053: .seealso: [](ch_unstructured), `DMPLEX`, `DMGetGlobalSection()`, `DMPlexSetIsoperiodicFaceSF()`
1054: @*/
1055: PetscErrorCode DMPlexSetIsoperiodicFaceTransform(DM dm, PetscInt n, const PetscScalar t[])
1056: {
1057:   DM_Plex *plex = (DM_Plex *)dm->data;

1059:   PetscFunctionBegin;
1061:   PetscCheck(n == plex->periodic.num_face_sfs, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Number of transforms (%" PetscInt_FMT ") must equal number of isoperiodc face SFs (%" PetscInt_FMT ")", n, plex->periodic.num_face_sfs);

1063:   PetscCall(PetscMalloc1(n, &plex->periodic.transform));
1064:   for (PetscInt i = 0; i < n; i++) {
1065:     for (PetscInt j = 0; j < 4; j++) {
1066:       for (PetscInt k = 0; k < 4; k++) {
1067:         PetscCheck(j != k || t[i * 16 + j * 4 + k] == 1., PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Rotated transforms not supported");
1068:         plex->periodic.transform[i][j][k] = t[i * 16 + j * 4 + k];
1069:       }
1070:     }
1071:   }
1072:   PetscFunctionReturn(PETSC_SUCCESS);
1073: }