Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petsc/private/hashmapi.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7: #include <petscdevice_cuda.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: extern void PetscSFCheckGraphSet(PetscSF, int);
 16: #else
 17:   #if defined(PETSC_USE_DEBUG)
 18:     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
 19:   #else
 20:     #define PetscSFCheckGraphSet(sf, arg) \
 21:       do { \
 22:       } while (0)
 23:   #endif
 24: #endif

 26: const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
 27: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};

 29: /*@
 30:   PetscSFCreate - create a star forest communication context

 32:   Collective

 34:   Input Parameter:
 35: . comm - communicator on which the star forest will operate

 37:   Output Parameter:
 38: . sf - new star forest context

 40:   Options Database Key:
 41: + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
 42: . -sf_type window                - Use MPI-3 one-sided window for communication
 43: . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
 44: - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)

 46:   Level: intermediate

 48:   Note:
 49:   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
 50:   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
 51:   `PetscSF`s are optimized and have better performance than the general `PetscSF`s.

 53: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 54: @*/
 55: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 56: {
 57:   PetscSF b;

 59:   PetscFunctionBegin;
 60:   PetscAssertPointer(sf, 2);
 61:   PetscCall(PetscSFInitializePackage());

 63:   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
 64:   b->nroots    = -1;
 65:   b->nleaves   = -1;
 66:   b->minleaf   = PETSC_INT_MAX;
 67:   b->maxleaf   = PETSC_INT_MIN;
 68:   b->nranks    = -1;
 69:   b->rankorder = PETSC_TRUE;
 70:   b->ingroup   = MPI_GROUP_NULL;
 71:   b->outgroup  = MPI_GROUP_NULL;
 72:   b->graphset  = PETSC_FALSE;
 73: #if defined(PETSC_HAVE_DEVICE)
 74:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 75:   b->use_stream_aware_mpi = PETSC_FALSE;
 76:   b->unknown_input_stream = PETSC_FALSE;
 77:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 78:   b->backend = PETSCSF_BACKEND_KOKKOS;
 79:   #elif defined(PETSC_HAVE_CUDA)
 80:   b->backend = PETSCSF_BACKEND_CUDA;
 81:   #elif defined(PETSC_HAVE_HIP)
 82:   b->backend = PETSCSF_BACKEND_HIP;
 83:   #endif

 85:   #if defined(PETSC_HAVE_NVSHMEM)
 86:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 87:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 88:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
 89:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
 90:   #endif
 91: #endif
 92:   b->vscat.from_n = -1;
 93:   b->vscat.to_n   = -1;
 94:   b->vscat.unit   = MPIU_SCALAR;
 95:   *sf             = b;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: /*@
100:   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

102:   Collective

104:   Input Parameter:
105: . sf - star forest

107:   Level: advanced

109: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110: @*/
111: PetscErrorCode PetscSFReset(PetscSF sf)
112: {
113:   PetscFunctionBegin;
115:   PetscTryTypeMethod(sf, Reset);
116:   PetscCall(PetscSFDestroy(&sf->rankssf));

118:   sf->nroots   = -1;
119:   sf->nleaves  = -1;
120:   sf->minleaf  = PETSC_INT_MAX;
121:   sf->maxleaf  = PETSC_INT_MIN;
122:   sf->mine     = NULL;
123:   sf->remote   = NULL;
124:   sf->graphset = PETSC_FALSE;
125:   PetscCall(PetscFree(sf->mine_alloc));
126:   PetscCall(PetscFree(sf->remote_alloc));
127:   sf->nranks = -1;
128:   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129:   sf->degreeknown = PETSC_FALSE;
130:   PetscCall(PetscFree(sf->degree));
131:   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132:   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));

134:   if (sf->multi) sf->multi->multi = NULL;
135:   PetscCall(PetscSFDestroy(&sf->multi));

137:   PetscCall(PetscLayoutDestroy(&sf->map));

139: #if defined(PETSC_HAVE_DEVICE)
140:   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141: #endif

143:   sf->setupcalled = PETSC_FALSE;
144:   PetscFunctionReturn(PETSC_SUCCESS);
145: }

147: /*@
148:   PetscSFSetType - Set the `PetscSF` communication implementation

150:   Collective

152:   Input Parameters:
153: + sf   - the `PetscSF` context
154: - type - a known method
155: .vb
156:     PETSCSFWINDOW - MPI-2/3 one-sided
157:     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158: .ve

160:   Options Database Key:
161: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods

163:   Level: intermediate

165:   Notes:
166:   See `PetscSFType` for possible values

168: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169: @*/
170: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171: {
172:   PetscBool match;
173:   PetscErrorCode (*r)(PetscSF);

175:   PetscFunctionBegin;
177:   PetscAssertPointer(type, 2);

179:   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180:   if (match) PetscFunctionReturn(PETSC_SUCCESS);

182:   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183:   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184:   /* Destroy the previous PetscSF implementation context */
185:   PetscTryTypeMethod(sf, Destroy);
186:   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187:   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188:   PetscCall((*r)(sf));
189:   PetscFunctionReturn(PETSC_SUCCESS);
190: }

192: /*@
193:   PetscSFGetType - Get the `PetscSF` communication implementation

195:   Not Collective

197:   Input Parameter:
198: . sf - the `PetscSF` context

200:   Output Parameter:
201: . type - the `PetscSF` type name

203:   Level: intermediate

205: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206: @*/
207: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208: {
209:   PetscFunctionBegin;
211:   PetscAssertPointer(type, 2);
212:   *type = ((PetscObject)sf)->type_name;
213:   PetscFunctionReturn(PETSC_SUCCESS);
214: }

216: /*@
217:   PetscSFDestroy - destroy a star forest

219:   Collective

221:   Input Parameter:
222: . sf - address of star forest

224:   Level: intermediate

226: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227: @*/
228: PetscErrorCode PetscSFDestroy(PetscSF *sf)
229: {
230:   PetscFunctionBegin;
231:   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233:   if (--((PetscObject)*sf)->refct > 0) {
234:     *sf = NULL;
235:     PetscFunctionReturn(PETSC_SUCCESS);
236:   }
237:   PetscCall(PetscSFReset(*sf));
238:   PetscTryTypeMethod(*sf, Destroy);
239:   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240:   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242:   if ((*sf)->use_stream_aware_mpi) {
243:     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244:     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245:   }
246: #endif
247:   PetscCall(PetscHeaderDestroy(sf));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252: {
253:   PetscInt           i, nleaves;
254:   PetscMPIInt        size;
255:   const PetscInt    *ilocal;
256:   const PetscSFNode *iremote;

258:   PetscFunctionBegin;
259:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260:   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262:   for (i = 0; i < nleaves; i++) {
263:     const PetscInt rank   = iremote[i].rank;
264:     const PetscInt remote = iremote[i].index;
265:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266:     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267:     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268:     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269:   }
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: /*@
274:   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication

276:   Collective

278:   Input Parameter:
279: . sf - star forest communication object

281:   Level: beginner

283: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284: @*/
285: PetscErrorCode PetscSFSetUp(PetscSF sf)
286: {
287:   PetscFunctionBegin;
289:   PetscSFCheckGraphSet(sf, 1);
290:   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291:   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292:   PetscCall(PetscSFCheckGraphValid_Private(sf));
293:   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294:   PetscTryTypeMethod(sf, SetUp);
295: #if defined(PETSC_HAVE_CUDA)
296:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297:     sf->ops->Malloc = PetscSFMalloc_CUDA;
298:     sf->ops->Free   = PetscSFFree_CUDA;
299:   }
300: #endif
301: #if defined(PETSC_HAVE_HIP)
302:   if (sf->backend == PETSCSF_BACKEND_HIP) {
303:     sf->ops->Malloc = PetscSFMalloc_HIP;
304:     sf->ops->Free   = PetscSFFree_HIP;
305:   }
306: #endif

308: #if defined(PETSC_HAVE_KOKKOS)
309:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311:     sf->ops->Free   = PetscSFFree_Kokkos;
312:   }
313: #endif
314:   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315:   sf->setupcalled = PETSC_TRUE;
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: /*@
320:   PetscSFSetFromOptions - set `PetscSF` options using the options database

322:   Logically Collective

324:   Input Parameter:
325: . sf - star forest

327:   Options Database Keys:
328: + -sf_type                      - implementation type, see `PetscSFSetType()`
329: . -sf_rank_order                - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
330: . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331:                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332:                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
333: . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334:                                   If true, this option only works with `-use_gpu_aware_mpi 1`.

336: - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337:                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338:                                   the only available is kokkos.

340:   Level: intermediate

342: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343: @*/
344: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345: {
346:   PetscSFType deft;
347:   char        type[256];
348:   PetscBool   flg;

350:   PetscFunctionBegin;
352:   PetscObjectOptionsBegin((PetscObject)sf);
353:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354:   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355:   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356:   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357:   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358: #if defined(PETSC_HAVE_DEVICE)
359:   {
360:     char      backendstr[32] = {0};
361:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362:     /* Change the defaults set in PetscSFCreate() with command line options */
363:     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364:     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365:     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366:     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367:     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368:     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373:     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374:   #elif defined(PETSC_HAVE_KOKKOS)
375:     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376:   #endif

378:   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379:     if (sf->use_stream_aware_mpi) {
380:       MPI_Info info;

382:       PetscCallMPI(MPI_Info_create(&info));
383:       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384:       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385:       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386:       PetscCallMPI(MPI_Info_free(&info));
387:       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388:     }
389:   #endif
390:   }
391: #endif
392:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393:   PetscOptionsEnd();
394:   PetscFunctionReturn(PETSC_SUCCESS);
395: }

397: /*@
398:   PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order

400:   Logically Collective

402:   Input Parameters:
403: + sf  - star forest
404: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)

406:   Level: advanced

408: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409: @*/
410: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411: {
412:   PetscFunctionBegin;
415:   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416:   sf->rankorder = flg;
417:   PetscFunctionReturn(PETSC_SUCCESS);
418: }

420: /*@
421:   PetscSFSetGraph - Set a parallel star forest

423:   Collective

425:   Input Parameters:
426: + sf         - star forest
427: . nroots     - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
428: . nleaves    - number of leaf vertices on the current MPI process, each of these references a root on any process
429: . ilocal     - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
430: . localmode  - copy mode for `ilocal`
431: . iremote    - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
432: - remotemode - copy mode for `iremote`

434:   Level: intermediate

436:   Notes:
437:   Leaf indices in `ilocal` must be unique, otherwise an error occurs.

439:   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
440:   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441:   PETSc might modify the respective array;
442:   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
443:   If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

445:   `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.

447:   Fortran Notes:
448:   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.

450:   Developer Notes:
451:   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452:   This also allows to compare leaf sets of two `PetscSF`s easily.

454: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
455: @*/
456: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457: {
458:   PetscBool unique, contiguous;

460:   PetscFunctionBegin;
462:   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463:   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464:   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465:   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468:   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469:   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);

471:   if (sf->nroots >= 0) { /* Reset only if graph already set */
472:     PetscCall(PetscSFReset(sf));
473:   }

475:   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476:   if (PetscDefined(USE_DEBUG)) {
477:     PetscMPIInt size;

479:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480:     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481:   }

483:   sf->nroots  = nroots;
484:   sf->nleaves = nleaves;

486:   if (localmode == PETSC_COPY_VALUES && ilocal) {
487:     PetscInt *tlocal = NULL;

489:     PetscCall(PetscMalloc1(nleaves, &tlocal));
490:     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491:     ilocal = tlocal;
492:   }
493:   if (remotemode == PETSC_COPY_VALUES) {
494:     PetscSFNode *tremote = NULL;

496:     PetscCall(PetscMalloc1(nleaves, &tremote));
497:     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498:     iremote = tremote;
499:   }

501:   if (nleaves && ilocal) {
502:     PetscSFNode work;

504:     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505:     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506:     unique = PetscNot(unique);
507:     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508:     sf->minleaf = ilocal[0];
509:     sf->maxleaf = ilocal[nleaves - 1];
510:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511:   } else {
512:     sf->minleaf = 0;
513:     sf->maxleaf = nleaves - 1;
514:     unique      = PETSC_TRUE;
515:     contiguous  = PETSC_TRUE;
516:   }

518:   if (contiguous) {
519:     if (localmode == PETSC_USE_POINTER) {
520:       ilocal = NULL;
521:     } else {
522:       PetscCall(PetscFree(ilocal));
523:     }
524:   }
525:   sf->mine = ilocal;
526:   if (localmode == PETSC_USE_POINTER) {
527:     sf->mine_alloc = NULL;
528:   } else {
529:     sf->mine_alloc = ilocal;
530:   }
531:   if (PetscDefined(USE_DEBUG)) {
532:     PetscMPIInt size;

534:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535:     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536:   }
537:   sf->remote = iremote;
538:   if (remotemode == PETSC_USE_POINTER) {
539:     sf->remote_alloc = NULL;
540:   } else {
541:     sf->remote_alloc = iremote;
542:   }
543:   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544:   sf->graphset = PETSC_TRUE;
545:   PetscFunctionReturn(PETSC_SUCCESS);
546: }

548: /*@
549:   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern

551:   Collective

553:   Input Parameters:
554: + sf      - The `PetscSF`
555: . map     - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`

558:   Level: intermediate

560:   Notes:
561:   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
562:   `n` and `N` are the local and global sizes of `root` respectively.

564:   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
565:   sequential vectors `leaves` on all MPI processes.

567:   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
568:   sequential vector `leaves` on MPI rank 0.

570:   With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
571:   creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572:   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
573:   not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
574:   items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.

576: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577:  @*/
578: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579: {
580:   MPI_Comm    comm;
581:   PetscInt    n, N, res[2];
582:   PetscMPIInt rank, size;
583:   PetscSFType type;

585:   PetscFunctionBegin;
588:   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
589:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
590:   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
591:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
592:   PetscCallMPI(MPI_Comm_size(comm, &size));

594:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595:     PetscInt sizei = size;

597:     type = PETSCSFALLTOALL;
598:     PetscCall(PetscLayoutCreate(comm, &sf->map));
599:     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
600:     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
601:     PetscCall(PetscLayoutSetUp(sf->map));
602:   } else {
603:     PetscCall(PetscLayoutGetLocalSize(map, &n));
604:     PetscCall(PetscLayoutGetSize(map, &N));
605:     res[0] = n;
606:     res[1] = -n;
607:     /* Check if n are same over all ranks so that we can optimize it */
608:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
609:     if (res[0] == -res[1]) { /* same n */
610:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
611:     } else {
612:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
613:     }
614:     PetscCall(PetscLayoutReference(map, &sf->map));
615:   }
616:   PetscCall(PetscSFSetType(sf, type));

618:   sf->pattern = pattern;
619:   sf->mine    = NULL; /* Contiguous */

621:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
622:      Also set other easy stuff.
623:    */
624:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
625:     sf->nleaves = N;
626:     sf->nroots  = n;
627:     sf->nranks  = size;
628:     sf->minleaf = 0;
629:     sf->maxleaf = N - 1;
630:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
631:     sf->nleaves = rank ? 0 : N;
632:     sf->nroots  = n;
633:     sf->nranks  = rank ? 0 : size;
634:     sf->minleaf = 0;
635:     sf->maxleaf = rank ? -1 : N - 1;
636:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
637:     sf->nleaves = size;
638:     sf->nroots  = size;
639:     sf->nranks  = size;
640:     sf->minleaf = 0;
641:     sf->maxleaf = size - 1;
642:   }
643:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
644:   sf->graphset = PETSC_TRUE;
645:   PetscFunctionReturn(PETSC_SUCCESS);
646: }

648: /*@
649:   PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map

651:   Collective

653:   Input Parameter:
654: . sf - star forest to invert

656:   Output Parameter:
657: . isf - inverse of `sf`

659:   Level: advanced

661:   Notes:
662:   All roots must have degree 1.

664:   The local space may be a permutation, but cannot be sparse.

666: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
667: @*/
668: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
669: {
670:   PetscMPIInt     rank;
671:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
672:   const PetscInt *ilocal;
673:   PetscSFNode    *roots, *leaves;

675:   PetscFunctionBegin;
677:   PetscSFCheckGraphSet(sf, 1);
678:   PetscAssertPointer(isf, 2);

680:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
681:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

683:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
684:   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
685:   for (i = 0; i < maxlocal; i++) {
686:     leaves[i].rank  = rank;
687:     leaves[i].index = i;
688:   }
689:   for (i = 0; i < nroots; i++) {
690:     roots[i].rank  = -1;
691:     roots[i].index = -1;
692:   }
693:   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
694:   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));

696:   /* Check whether our leaves are sparse */
697:   for (i = 0, count = 0; i < nroots; i++)
698:     if (roots[i].rank >= 0) count++;
699:   if (count == nroots) newilocal = NULL;
700:   else { // Index for sparse leaves and compact "roots" array (which is to become our leaves).
701:     PetscCall(PetscMalloc1(count, &newilocal));
702:     for (i = 0, count = 0; i < nroots; i++) {
703:       if (roots[i].rank >= 0) {
704:         newilocal[count]   = i;
705:         roots[count].rank  = roots[i].rank;
706:         roots[count].index = roots[i].index;
707:         count++;
708:       }
709:     }
710:   }

712:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
713:   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
714:   PetscCall(PetscFree2(roots, leaves));
715:   PetscFunctionReturn(PETSC_SUCCESS);
716: }

718: /*@
719:   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph

721:   Collective

723:   Input Parameters:
724: + sf  - communication object to duplicate
725: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)

727:   Output Parameter:
728: . newsf - new communication object

730:   Level: beginner

732: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
733: @*/
734: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
735: {
736:   PetscSFType  type;
737:   MPI_Datatype dtype = MPIU_SCALAR;

739:   PetscFunctionBegin;
742:   PetscAssertPointer(newsf, 3);
743:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
744:   PetscCall(PetscSFGetType(sf, &type));
745:   if (type) PetscCall(PetscSFSetType(*newsf, type));
746:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
747:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
748:     PetscSFCheckGraphSet(sf, 1);
749:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
750:       PetscInt           nroots, nleaves;
751:       const PetscInt    *ilocal;
752:       const PetscSFNode *iremote;
753:       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
754:       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
755:     } else {
756:       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
757:     }
758:   }
759:   /* Since oldtype is committed, so is newtype, according to MPI */
760:   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
761:   (*newsf)->vscat.bs     = sf->vscat.bs;
762:   (*newsf)->vscat.unit   = dtype;
763:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
764:   (*newsf)->vscat.from_n = sf->vscat.from_n;
765:   /* Do not copy lsf. Build it on demand since it is rarely used */

767: #if defined(PETSC_HAVE_DEVICE)
768:   (*newsf)->backend              = sf->backend;
769:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
770:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
771:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
772: #endif
773:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
774:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
775:   PetscFunctionReturn(PETSC_SUCCESS);
776: }

778: /*@C
779:   PetscSFGetGraph - Get the graph specifying a parallel star forest

781:   Not Collective

783:   Input Parameter:
784: . sf - star forest

786:   Output Parameters:
787: + nroots  - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
788: . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
789: . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
790: - iremote - remote locations of root vertices for each leaf on the current process

792:   Level: intermediate

794:   Notes:
795:   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet

797:   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`

799:   Fortran Note:
800:   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed

802: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
803: @*/
804: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
805: {
806:   PetscFunctionBegin;
808:   if (sf->ops->GetGraph) {
809:     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
810:   } else {
811:     if (nroots) *nroots = sf->nroots;
812:     if (nleaves) *nleaves = sf->nleaves;
813:     if (ilocal) *ilocal = sf->mine;
814:     if (iremote) *iremote = sf->remote;
815:   }
816:   PetscFunctionReturn(PETSC_SUCCESS);
817: }

819: /*@
820:   PetscSFGetLeafRange - Get the active leaf ranges

822:   Not Collective

824:   Input Parameter:
825: . sf - star forest

827:   Output Parameters:
828: + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
829: - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.

831:   Level: developer

833: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
834: @*/
835: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
836: {
837:   PetscFunctionBegin;
839:   PetscSFCheckGraphSet(sf, 1);
840:   if (minleaf) *minleaf = sf->minleaf;
841:   if (maxleaf) *maxleaf = sf->maxleaf;
842:   PetscFunctionReturn(PETSC_SUCCESS);
843: }

845: /*@
846:   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database

848:   Collective

850:   Input Parameters:
851: + A    - the star forest
852: . obj  - Optional object that provides the prefix for the option names
853: - name - command line option

855:   Level: intermediate

857:   Note:
858:   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`

860: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
861: @*/
862: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
863: {
864:   PetscFunctionBegin;
866:   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
867:   PetscFunctionReturn(PETSC_SUCCESS);
868: }

870: /*@
871:   PetscSFView - view a star forest

873:   Collective

875:   Input Parameters:
876: + sf     - star forest
877: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`

879:   Level: beginner

881: .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
882: @*/
883: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
884: {
885:   PetscBool         isascii;
886:   PetscViewerFormat format;

888:   PetscFunctionBegin;
890:   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
892:   PetscCheckSameComm(sf, 1, viewer, 2);
893:   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
894:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
895:   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
896:     PetscMPIInt rank;
897:     PetscInt    j;

899:     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
900:     PetscCall(PetscViewerASCIIPushTab(viewer));
901:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
902:       if (!sf->graphset) {
903:         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
904:         PetscCall(PetscViewerASCIIPopTab(viewer));
905:         PetscFunctionReturn(PETSC_SUCCESS);
906:       }
907:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
908:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
909:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
910:       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
911:       PetscCall(PetscViewerFlush(viewer));
912:       PetscCall(PetscViewerGetFormat(viewer, &format));
913:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
914:         PetscMPIInt *tmpranks, *perm;

916:         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
917:         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
918:         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
919:         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
920:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
921:         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
922:           PetscMPIInt i = perm[ii];

924:           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
925:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
926:         }
927:         PetscCall(PetscFree2(tmpranks, perm));
928:       }
929:       PetscCall(PetscViewerFlush(viewer));
930:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
931:     }
932:     PetscCall(PetscViewerASCIIPopTab(viewer));
933:   }
934:   PetscTryTypeMethod(sf, View, viewer);
935:   PetscFunctionReturn(PETSC_SUCCESS);
936: }

938: /*@C
939:   PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process

941:   Not Collective

943:   Input Parameter:
944: . sf - star forest

946:   Output Parameters:
947: + nranks  - number of MPI processes referenced by local part
948: . ranks   - [`nranks`] array of MPI ranks
949: . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
950: . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
951: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`

953:   Level: developer

955: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
956: @*/
957: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
958: {
959:   PetscFunctionBegin;
961:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
962:   if (sf->ops->GetRootRanks) {
963:     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
964:   } else {
965:     /* The generic implementation */
966:     if (nranks) *nranks = sf->nranks;
967:     if (ranks) *ranks = sf->ranks;
968:     if (roffset) *roffset = sf->roffset;
969:     if (rmine) *rmine = sf->rmine;
970:     if (rremote) *rremote = sf->rremote;
971:   }
972:   PetscFunctionReturn(PETSC_SUCCESS);
973: }

975: /*@C
976:   PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process

978:   Not Collective

980:   Input Parameter:
981: . sf - star forest

983:   Output Parameters:
984: + niranks  - number of leaf MPI processes referencing roots on this process
985: . iranks   - [`niranks`] array of MPI ranks
986: . ioffset  - [`niranks`+1] offset in `irootloc` for each MPI process
987: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process

989:   Level: developer

991: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
992: @*/
993: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
994: {
995:   PetscFunctionBegin;
997:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
998:   if (sf->ops->GetLeafRanks) {
999:     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1000:   } else {
1001:     PetscSFType type;
1002:     PetscCall(PetscSFGetType(sf, &type));
1003:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1004:   }
1005:   PetscFunctionReturn(PETSC_SUCCESS);
1006: }

1008: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1009: {
1010:   PetscInt i;
1011:   for (i = 0; i < n; i++) {
1012:     if (needle == list[i]) return PETSC_TRUE;
1013:   }
1014:   return PETSC_FALSE;
1015: }

1017: /*@C
1018:   PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.

1020:   Collective

1022:   Input Parameters:
1023: + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1024: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)

1026:   Level: developer

1028: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
1029: @*/
1030: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1031: {
1032:   PetscHMapI    table;
1033:   PetscHashIter pos;
1034:   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1035:   PetscInt     *rcount;
1036:   PetscInt      irank, sfnrank, ranksi;
1037:   PetscMPIInt   i, orank = -1;

1039:   PetscFunctionBegin;
1041:   PetscSFCheckGraphSet(sf, 1);
1042:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1043:   PetscCall(PetscHMapICreateWithSize(10, &table));
1044:   for (i = 0; i < sf->nleaves; i++) {
1045:     /* Log 1-based rank */
1046:     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1047:   }
1048:   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1049:   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1050:   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1051:   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1052:   PetscHashIterBegin(table, pos);
1053:   for (i = 0; i < sf->nranks; i++) {
1054:     PetscHashIterGetKey(table, pos, ranksi);
1055:     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1056:     PetscHashIterGetVal(table, pos, rcount[i]);
1057:     PetscHashIterNext(table, pos);
1058:     ranks[i]--; /* Convert back to 0-based */
1059:   }
1060:   PetscCall(PetscHMapIDestroy(&table));

1062:   /* We expect that dgroup is reliably "small" while nranks could be large */
1063:   {
1064:     MPI_Group    group = MPI_GROUP_NULL;
1065:     PetscMPIInt *dgroupranks;

1067:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1068:     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1069:     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1070:     PetscCall(PetscMalloc1(groupsize, &groupranks));
1071:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1072:     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1073:     PetscCallMPI(MPI_Group_free(&group));
1074:     PetscCall(PetscFree(dgroupranks));
1075:   }

1077:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1078:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1079:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1080:       if (InList(ranks[i], groupsize, groupranks)) break;
1081:     }
1082:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1083:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1084:     }
1085:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1086:       PetscMPIInt tmprank;
1087:       PetscInt    tmpcount;

1089:       tmprank             = ranks[i];
1090:       tmpcount            = rcount[i];
1091:       ranks[i]            = ranks[sf->ndranks];
1092:       rcount[i]           = rcount[sf->ndranks];
1093:       ranks[sf->ndranks]  = tmprank;
1094:       rcount[sf->ndranks] = tmpcount;
1095:       sf->ndranks++;
1096:     }
1097:   }
1098:   PetscCall(PetscFree(groupranks));
1099:   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1100:   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1101:   sf->roffset[0] = 0;
1102:   for (i = 0; i < sf->nranks; i++) {
1103:     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1104:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1105:     rcount[i]          = 0;
1106:   }
1107:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1108:     /* short circuit */
1109:     if (orank != sf->remote[i].rank) {
1110:       /* Search for index of iremote[i].rank in sf->ranks */
1111:       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1112:       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1113:       if (irank < 0) {
1114:         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1115:         if (irank >= 0) irank += sf->ndranks;
1116:       }
1117:     }
1118:     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1119:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1120:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1121:     rcount[irank]++;
1122:   }
1123:   PetscCall(PetscFree2(rcount, ranks));
1124:   PetscFunctionReturn(PETSC_SUCCESS);
1125: }

1127: /*@C
1128:   PetscSFGetGroups - gets incoming and outgoing process groups

1130:   Collective

1132:   Input Parameter:
1133: . sf - star forest

1135:   Output Parameters:
1136: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1137: - outgoing - group of destination processes for outgoing edges (roots that I reference)

1139:   Level: developer

1141: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1142: @*/
1143: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1144: {
1145:   MPI_Group group = MPI_GROUP_NULL;

1147:   PetscFunctionBegin;
1149:   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1150:   if (sf->ingroup == MPI_GROUP_NULL) {
1151:     PetscInt        i;
1152:     const PetscInt *indegree;
1153:     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1154:     PetscSFNode    *remote;
1155:     PetscSF         bgcount;

1157:     /* Compute the number of incoming ranks */
1158:     PetscCall(PetscMalloc1(sf->nranks, &remote));
1159:     for (i = 0; i < sf->nranks; i++) {
1160:       remote[i].rank  = sf->ranks[i];
1161:       remote[i].index = 0;
1162:     }
1163:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1164:     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1165:     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1166:     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1167:     /* Enumerate the incoming ranks */
1168:     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1169:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1170:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1171:     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1172:     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1173:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1174:     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1175:     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1176:     PetscCallMPI(MPI_Group_free(&group));
1177:     PetscCall(PetscFree2(inranks, outranks));
1178:     PetscCall(PetscSFDestroy(&bgcount));
1179:   }
1180:   *incoming = sf->ingroup;

1182:   if (sf->outgroup == MPI_GROUP_NULL) {
1183:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1184:     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1185:     PetscCallMPI(MPI_Group_free(&group));
1186:   }
1187:   *outgoing = sf->outgroup;
1188:   PetscFunctionReturn(PETSC_SUCCESS);
1189: }

1191: /*@
1192:   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks

1194:   Collective

1196:   Input Parameter:
1197: . sf - star forest

1199:   Output Parameter:
1200: . rsf - the star forest with a single root per MPI process to perform communications

1202:   Level: developer

1204: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1205: @*/
1206: PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1207: {
1208:   PetscFunctionBegin;
1210:   PetscAssertPointer(rsf, 2);
1211:   if (!sf->rankssf) {
1212:     PetscSFNode       *rremotes;
1213:     const PetscMPIInt *ranks;
1214:     PetscMPIInt        nranks;

1216:     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1217:     PetscCall(PetscMalloc1(nranks, &rremotes));
1218:     for (PetscInt i = 0; i < nranks; i++) {
1219:       rremotes[i].rank  = ranks[i];
1220:       rremotes[i].index = 0;
1221:     }
1222:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1223:     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1224:   }
1225:   *rsf = sf->rankssf;
1226:   PetscFunctionReturn(PETSC_SUCCESS);
1227: }

1229: /*@
1230:   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters

1232:   Collective

1234:   Input Parameter:
1235: . sf - star forest that may contain roots with 0 or with more than 1 vertex

1237:   Output Parameter:
1238: . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)

1240:   Level: developer

1242:   Note:
1243:   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1244:   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1245:   edge, it is a candidate for future optimization that might involve its removal.

1247: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1248: @*/
1249: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1250: {
1251:   PetscFunctionBegin;
1253:   PetscAssertPointer(multi, 2);
1254:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1255:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1256:     *multi           = sf->multi;
1257:     sf->multi->multi = sf->multi;
1258:     PetscFunctionReturn(PETSC_SUCCESS);
1259:   }
1260:   if (!sf->multi) {
1261:     const PetscInt *indegree;
1262:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1263:     PetscSFNode    *remote;
1264:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1265:     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1266:     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1267:     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1268:     inoffset[0] = 0;
1269:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1270:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1271:     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1272:     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1273:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1274:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1275:       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1276:     }
1277:     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1278:     for (i = 0; i < sf->nleaves; i++) {
1279:       remote[i].rank  = sf->remote[i].rank;
1280:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1281:     }
1282:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1283:     sf->multi->multi = sf->multi;
1284:     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1285:     if (sf->rankorder) { /* Sort the ranks */
1286:       PetscMPIInt  rank;
1287:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1288:       PetscSFNode *newremote;
1289:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1290:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1291:       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1292:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1293:       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1294:       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1295:       /* Sort the incoming ranks at each vertex, build the inverse map */
1296:       for (i = 0; i < sf->nroots; i++) {
1297:         PetscInt j;
1298:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1299:         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1300:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1301:       }
1302:       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1303:       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1304:       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1305:       for (i = 0; i < sf->nleaves; i++) {
1306:         newremote[i].rank  = sf->remote[i].rank;
1307:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1308:       }
1309:       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1310:       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1311:     }
1312:     PetscCall(PetscFree3(inoffset, outones, outoffset));
1313:   }
1314:   *multi = sf->multi;
1315:   PetscFunctionReturn(PETSC_SUCCESS);
1316: }

1318: /*@C
1319:   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices

1321:   Collective

1323:   Input Parameters:
1324: + sf        - original star forest
1325: . nselected - number of selected roots on this MPI process
1326: - selected  - indices of the selected roots on this MPI process

1328:   Output Parameter:
1329: . esf - new star forest

1331:   Level: advanced

1333:   Note:
1334:   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1335:   be done by calling PetscSFGetGraph().

1337: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1338: @*/
1339: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1340: {
1341:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1342:   const PetscInt    *ilocal;
1343:   signed char       *rootdata, *leafdata, *leafmem;
1344:   const PetscSFNode *iremote;
1345:   PetscSFNode       *new_iremote;
1346:   MPI_Comm           comm;

1348:   PetscFunctionBegin;
1350:   PetscSFCheckGraphSet(sf, 1);
1351:   if (nselected) PetscAssertPointer(selected, 3);
1352:   PetscAssertPointer(esf, 4);

1354:   PetscCall(PetscSFSetUp(sf));
1355:   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1356:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1357:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));

1359:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1360:     PetscBool dups;
1361:     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1362:     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1363:     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1364:   }

1366:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1367:   else {
1368:     /* A generic version of creating embedded sf */
1369:     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1370:     maxlocal = maxleaf - minleaf + 1;
1371:     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1372:     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1373:     /* Tag selected roots and bcast to leaves */
1374:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1375:     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1376:     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));

1378:     /* Build esf with leaves that are still connected */
1379:     esf_nleaves = 0;
1380:     for (i = 0; i < nleaves; i++) {
1381:       j = ilocal ? ilocal[i] : i;
1382:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1383:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1384:       */
1385:       esf_nleaves += (leafdata[j] ? 1 : 0);
1386:     }
1387:     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1388:     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1389:     for (i = n = 0; i < nleaves; i++) {
1390:       j = ilocal ? ilocal[i] : i;
1391:       if (leafdata[j]) {
1392:         new_ilocal[n]        = j;
1393:         new_iremote[n].rank  = iremote[i].rank;
1394:         new_iremote[n].index = iremote[i].index;
1395:         ++n;
1396:       }
1397:     }
1398:     PetscCall(PetscSFCreate(comm, esf));
1399:     PetscCall(PetscSFSetFromOptions(*esf));
1400:     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1401:     PetscCall(PetscFree2(rootdata, leafmem));
1402:   }
1403:   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1404:   PetscFunctionReturn(PETSC_SUCCESS);
1405: }

1407: /*@C
1408:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices

1410:   Collective

1412:   Input Parameters:
1413: + sf        - original star forest
1414: . nselected - number of selected leaves on this MPI process
1415: - selected  - indices of the selected leaves on this MPI process

1417:   Output Parameter:
1418: . newsf - new star forest

1420:   Level: advanced

1422: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1423: @*/
1424: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1425: {
1426:   const PetscSFNode *iremote;
1427:   PetscSFNode       *new_iremote;
1428:   const PetscInt    *ilocal;
1429:   PetscInt           i, nroots, *leaves, *new_ilocal;
1430:   MPI_Comm           comm;

1432:   PetscFunctionBegin;
1434:   PetscSFCheckGraphSet(sf, 1);
1435:   if (nselected) PetscAssertPointer(selected, 3);
1436:   PetscAssertPointer(newsf, 4);

1438:   /* Uniq selected[] and put results in leaves[] */
1439:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1440:   PetscCall(PetscMalloc1(nselected, &leaves));
1441:   PetscCall(PetscArraycpy(leaves, selected, nselected));
1442:   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1443:   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);

1445:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1446:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1447:   else {
1448:     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1449:     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1450:     PetscCall(PetscMalloc1(nselected, &new_iremote));
1451:     for (i = 0; i < nselected; ++i) {
1452:       const PetscInt l     = leaves[i];
1453:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1454:       new_iremote[i].rank  = iremote[l].rank;
1455:       new_iremote[i].index = iremote[l].index;
1456:     }
1457:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1458:     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1459:   }
1460:   PetscCall(PetscFree(leaves));
1461:   PetscFunctionReturn(PETSC_SUCCESS);
1462: }

1464: /*@C
1465:   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`

1467:   Collective

1469:   Input Parameters:
1470: + sf       - star forest on which to communicate
1471: . unit     - data type associated with each node
1472: . rootdata - buffer to broadcast
1473: - op       - operation to use for reduction

1475:   Output Parameter:
1476: . leafdata - buffer to be reduced with values from each leaf's respective root

1478:   Level: intermediate

1480:   Note:
1481:   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1482:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1483:   use `PetscSFBcastWithMemTypeBegin()` instead.

1485: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1486: @*/
1487: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1488: {
1489:   PetscMemType rootmtype, leafmtype;

1491:   PetscFunctionBegin;
1493:   PetscCall(PetscSFSetUp(sf));
1494:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1495:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1496:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1497:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1498:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1499:   PetscFunctionReturn(PETSC_SUCCESS);
1500: }

1502: /*@C
1503:   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1504:   to `PetscSFBcastEnd()`

1506:   Collective

1508:   Input Parameters:
1509: + sf        - star forest on which to communicate
1510: . unit      - data type associated with each node
1511: . rootmtype - memory type of `rootdata`
1512: . rootdata  - buffer to broadcast
1513: . leafmtype - memory type of `leafdata`
1514: - op        - operation to use for reduction

1516:   Output Parameter:
1517: . leafdata - buffer to be reduced with values from each leaf's respective root

1519:   Level: intermediate

1521: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1522: @*/
1523: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1524: {
1525:   PetscFunctionBegin;
1527:   PetscCall(PetscSFSetUp(sf));
1528:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1529:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1530:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1531:   PetscFunctionReturn(PETSC_SUCCESS);
1532: }

1534: /*@C
1535:   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`

1537:   Collective

1539:   Input Parameters:
1540: + sf       - star forest
1541: . unit     - data type
1542: . rootdata - buffer to broadcast
1543: - op       - operation to use for reduction

1545:   Output Parameter:
1546: . leafdata - buffer to be reduced with values from each leaf's respective root

1548:   Level: intermediate

1550: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1551: @*/
1552: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1553: {
1554:   PetscFunctionBegin;
1556:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1557:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1558:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1559:   PetscFunctionReturn(PETSC_SUCCESS);
1560: }

1562: /*@C
1563:   PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`

1565:   Collective

1567:   Input Parameters:
1568: + sf       - star forest
1569: . unit     - data type
1570: . leafdata - values to reduce (communicate)
1571: - op       - reduction operation

1573:   Output Parameter:
1574: . rootdata - result of reduction of values (`leafdata`) from all leaves to each root

1576:   Level: intermediate

1578:   Note:
1579:   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1580:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1581:   use `PetscSFReduceWithMemTypeBegin()` instead.

1583: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
1584: @*/
1585: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1586: {
1587:   PetscMemType rootmtype, leafmtype;

1589:   PetscFunctionBegin;
1591:   PetscCall(PetscSFSetUp(sf));
1592:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1593:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1594:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1595:   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1596:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1597:   PetscFunctionReturn(PETSC_SUCCESS);
1598: }

1600: /*@C
1601:   PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`

1603:   Collective

1605:   Input Parameters:
1606: + sf        - star forest
1607: . unit      - data type
1608: . leafmtype - memory type of `leafdata`
1609: . leafdata  - values to reduce
1610: . rootmtype - memory type of `rootdata`
1611: - op        - reduction operation

1613:   Output Parameter:
1614: . rootdata - result of reduction of values from all leaves of each root

1616:   Level: intermediate

1618: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1619: @*/
1620: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1621: {
1622:   PetscFunctionBegin;
1624:   PetscCall(PetscSFSetUp(sf));
1625:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1626:   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1627:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1628:   PetscFunctionReturn(PETSC_SUCCESS);
1629: }

1631: /*@C
1632:   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`

1634:   Collective

1636:   Input Parameters:
1637: + sf       - star forest
1638: . unit     - data type
1639: . leafdata - values to reduce
1640: - op       - reduction operation

1642:   Output Parameter:
1643: . rootdata - result of reduction of values from all leaves of each root

1645:   Level: intermediate

1647: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1648: @*/
1649: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1650: {
1651:   PetscFunctionBegin;
1653:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1654:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1655:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1656:   PetscFunctionReturn(PETSC_SUCCESS);
1657: }

1659: /*@C
1660:   PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1661:   to be completed with `PetscSFFetchAndOpEnd()`

1663:   Collective

1665:   Input Parameters:
1666: + sf       - star forest
1667: . unit     - data type
1668: . leafdata - leaf values to use in reduction
1669: - op       - operation to use for reduction

1671:   Output Parameters:
1672: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1673: - leafupdate - state at each leaf's respective root immediately prior to atomic update

1675:   Level: advanced

1677:   Note:
1678:   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1679:   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1680:   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1681:   integers.

1683: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1684: @*/
1685: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1686: {
1687:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1689:   PetscFunctionBegin;
1691:   PetscCall(PetscSFSetUp(sf));
1692:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1693:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1694:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1695:   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1696:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1697:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1698:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1699:   PetscFunctionReturn(PETSC_SUCCESS);
1700: }

1702: /*@C
1703:   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1704:   applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`

1706:   Collective

1708:   Input Parameters:
1709: + sf              - star forest
1710: . unit            - data type
1711: . rootmtype       - memory type of `rootdata`
1712: . leafmtype       - memory type of `leafdata`
1713: . leafdata        - leaf values to use in reduction
1714: . leafupdatemtype - memory type of `leafupdate`
1715: - op              - operation to use for reduction

1717:   Output Parameters:
1718: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1719: - leafupdate - state at each leaf's respective root immediately prior to atomic update

1721:   Level: advanced

1723:   Note:
1724:   See `PetscSFFetchAndOpBegin()` for more details.

1726: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1727: @*/
1728: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1729: {
1730:   PetscFunctionBegin;
1732:   PetscCall(PetscSFSetUp(sf));
1733:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1734:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1735:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1736:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1737:   PetscFunctionReturn(PETSC_SUCCESS);
1738: }

1740: /*@C
1741:   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1742:   to fetch values from roots and update atomically by applying operation using `leafdata`

1744:   Collective

1746:   Input Parameters:
1747: + sf       - star forest
1748: . unit     - data type
1749: . leafdata - leaf values to use in reduction
1750: - op       - operation to use for reduction

1752:   Output Parameters:
1753: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1754: - leafupdate - state at each leaf's respective root immediately prior to atomic update

1756:   Level: advanced

1758: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1759: @*/
1760: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1761: {
1762:   PetscFunctionBegin;
1764:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1765:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1766:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1767:   PetscFunctionReturn(PETSC_SUCCESS);
1768: }

1770: /*@C
1771:   PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`

1773:   Collective

1775:   Input Parameter:
1776: . sf - star forest

1778:   Output Parameter:
1779: . degree - degree (the number of leaves) of each root vertex

1781:   Level: advanced

1783:   Note:
1784:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1786: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1787: @*/
1788: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1789: {
1790:   PetscFunctionBegin;
1792:   PetscSFCheckGraphSet(sf, 1);
1793:   PetscAssertPointer(degree, 2);
1794:   if (!sf->degreeknown) {
1795:     PetscInt i, nroots = sf->nroots, maxlocal;
1796:     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1797:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1798:     PetscCall(PetscMalloc1(nroots, &sf->degree));
1799:     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1800:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1801:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1802:     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1803:   }
1804:   *degree = NULL;
1805:   PetscFunctionReturn(PETSC_SUCCESS);
1806: }

1808: /*@C
1809:   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`

1811:   Collective

1813:   Input Parameter:
1814: . sf - star forest

1816:   Output Parameter:
1817: . degree - degree of each root vertex

1819:   Level: developer

1821:   Note:
1822:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1824: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1825: @*/
1826: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1827: {
1828:   PetscFunctionBegin;
1830:   PetscSFCheckGraphSet(sf, 1);
1831:   PetscAssertPointer(degree, 2);
1832:   if (!sf->degreeknown) {
1833:     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1834:     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1835:     PetscCall(PetscFree(sf->degreetmp));
1836:     sf->degreeknown = PETSC_TRUE;
1837:   }
1838:   *degree = sf->degree;
1839:   PetscFunctionReturn(PETSC_SUCCESS);
1840: }

1842: /*@C
1843:   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1844:   Each multi-root is assigned index of the corresponding original root.

1846:   Collective

1848:   Input Parameters:
1849: + sf     - star forest
1850: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`

1852:   Output Parameters:
1853: + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1854: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`

1856:   Level: developer

1858:   Note:
1859:   The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.

1861: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1862: @*/
1863: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1864: {
1865:   PetscSF  msf;
1866:   PetscInt k = 0, nroots, nmroots;

1868:   PetscFunctionBegin;
1870:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1871:   if (nroots) PetscAssertPointer(degree, 2);
1872:   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1873:   PetscAssertPointer(multiRootsOrigNumbering, 4);
1874:   PetscCall(PetscSFGetMultiSF(sf, &msf));
1875:   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1876:   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1877:   for (PetscInt i = 0; i < nroots; i++) {
1878:     if (!degree[i]) continue;
1879:     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1880:   }
1881:   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1882:   if (nMultiRoots) *nMultiRoots = nmroots;
1883:   PetscFunctionReturn(PETSC_SUCCESS);
1884: }

1886: /*@C
1887:   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`

1889:   Collective

1891:   Input Parameters:
1892: + sf       - star forest
1893: . unit     - data type
1894: - leafdata - leaf data to gather to roots

1896:   Output Parameter:
1897: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)

1899:   Level: intermediate

1901: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1902: @*/
1903: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1904: {
1905:   PetscSF multi = NULL;

1907:   PetscFunctionBegin;
1909:   PetscCall(PetscSFSetUp(sf));
1910:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1911:   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1912:   PetscFunctionReturn(PETSC_SUCCESS);
1913: }

1915: /*@C
1916:   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`

1918:   Collective

1920:   Input Parameters:
1921: + sf       - star forest
1922: . unit     - data type
1923: - leafdata - leaf data to gather to roots

1925:   Output Parameter:
1926: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)

1928:   Level: intermediate

1930: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1931: @*/
1932: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1933: {
1934:   PetscSF multi = NULL;

1936:   PetscFunctionBegin;
1938:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1939:   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1940:   PetscFunctionReturn(PETSC_SUCCESS);
1941: }

1943: /*@C
1944:   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`

1946:   Collective

1948:   Input Parameters:
1949: + sf            - star forest
1950: . unit          - data type
1951: - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)

1953:   Output Parameter:
1954: . leafdata - leaf data to be update with personal data from each respective root

1956:   Level: intermediate

1958: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1959: @*/
1960: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1961: {
1962:   PetscSF multi = NULL;

1964:   PetscFunctionBegin;
1966:   PetscCall(PetscSFSetUp(sf));
1967:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1968:   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1969:   PetscFunctionReturn(PETSC_SUCCESS);
1970: }

1972: /*@C
1973:   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`

1975:   Collective

1977:   Input Parameters:
1978: + sf            - star forest
1979: . unit          - data type
1980: - multirootdata - root buffer to send to each leaf, one unit of data per leaf

1982:   Output Parameter:
1983: . leafdata - leaf data to be update with personal data from each respective root

1985:   Level: intermediate

1987: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1988: @*/
1989: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1990: {
1991:   PetscSF multi = NULL;

1993:   PetscFunctionBegin;
1995:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1996:   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1997:   PetscFunctionReturn(PETSC_SUCCESS);
1998: }

2000: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2001: {
2002:   PetscInt        i, n, nleaves;
2003:   const PetscInt *ilocal = NULL;
2004:   PetscHSetI      seen;

2006:   PetscFunctionBegin;
2007:   if (PetscDefined(USE_DEBUG)) {
2008:     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2009:     PetscCall(PetscHSetICreate(&seen));
2010:     for (i = 0; i < nleaves; i++) {
2011:       const PetscInt leaf = ilocal ? ilocal[i] : i;
2012:       PetscCall(PetscHSetIAdd(seen, leaf));
2013:     }
2014:     PetscCall(PetscHSetIGetSize(seen, &n));
2015:     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2016:     PetscCall(PetscHSetIDestroy(&seen));
2017:   }
2018:   PetscFunctionReturn(PETSC_SUCCESS);
2019: }

2021: /*@
2022:   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view

2024:   Input Parameters:
2025: + sfA - The first `PetscSF`
2026: - sfB - The second `PetscSF`

2028:   Output Parameter:
2029: . sfBA - The composite `PetscSF`

2031:   Level: developer

2033:   Notes:
2034:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2035:   forests, i.e. the same leaf is not connected with different roots.

2037:   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2038:   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2039:   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2040:   `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.

2042: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2043: @*/
2044: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2045: {
2046:   const PetscSFNode *remotePointsA, *remotePointsB;
2047:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2048:   const PetscInt    *localPointsA, *localPointsB;
2049:   PetscInt          *localPointsBA;
2050:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2051:   PetscBool          denseB;

2053:   PetscFunctionBegin;
2055:   PetscSFCheckGraphSet(sfA, 1);
2057:   PetscSFCheckGraphSet(sfB, 2);
2058:   PetscCheckSameComm(sfA, 1, sfB, 2);
2059:   PetscAssertPointer(sfBA, 3);
2060:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2061:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2063:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2064:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2065:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2066:      numRootsB; otherwise, garbage will be broadcasted.
2067:      Example (comm size = 1):
2068:      sfA: 0 <- (0, 0)
2069:      sfB: 100 <- (0, 0)
2070:           101 <- (0, 1)
2071:      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2072:      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2073:      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2074:      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2075:   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2076:   for (i = 0; i < numRootsB; i++) {
2077:     reorderedRemotePointsA[i].rank  = -1;
2078:     reorderedRemotePointsA[i].index = -1;
2079:   }
2080:   for (i = 0; i < numLeavesA; i++) {
2081:     PetscInt localp = localPointsA ? localPointsA[i] : i;

2083:     if (localp >= numRootsB) continue;
2084:     reorderedRemotePointsA[localp] = remotePointsA[i];
2085:   }
2086:   remotePointsA = reorderedRemotePointsA;
2087:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2088:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2089:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2090:     leafdataB[i].rank  = -1;
2091:     leafdataB[i].index = -1;
2092:   }
2093:   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2094:   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2095:   PetscCall(PetscFree(reorderedRemotePointsA));

2097:   denseB = (PetscBool)!localPointsB;
2098:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2099:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2100:     else numLeavesBA++;
2101:   }
2102:   if (denseB) {
2103:     localPointsBA  = NULL;
2104:     remotePointsBA = leafdataB;
2105:   } else {
2106:     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2107:     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2108:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2109:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2111:       if (leafdataB[l - minleaf].rank == -1) continue;
2112:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2113:       localPointsBA[numLeavesBA]  = l;
2114:       numLeavesBA++;
2115:     }
2116:     PetscCall(PetscFree(leafdataB));
2117:   }
2118:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2119:   PetscCall(PetscSFSetFromOptions(*sfBA));
2120:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2121:   PetscFunctionReturn(PETSC_SUCCESS);
2122: }

2124: /*@
2125:   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one

2127:   Input Parameters:
2128: + sfA - The first `PetscSF`
2129: - sfB - The second `PetscSF`

2131:   Output Parameter:
2132: . sfBA - The composite `PetscSF`.

2134:   Level: developer

2136:   Notes:
2137:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2138:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2139:   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.

2141:   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2142:   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2143:   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
2144:   on `sfA`, then
2145:   a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.

2147: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2148: @*/
2149: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2150: {
2151:   const PetscSFNode *remotePointsA, *remotePointsB;
2152:   PetscSFNode       *remotePointsBA;
2153:   const PetscInt    *localPointsA, *localPointsB;
2154:   PetscSFNode       *reorderedRemotePointsA = NULL;
2155:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2156:   MPI_Op             op;
2157: #if defined(PETSC_USE_64BIT_INDICES)
2158:   PetscBool iswin;
2159: #endif

2161:   PetscFunctionBegin;
2163:   PetscSFCheckGraphSet(sfA, 1);
2165:   PetscSFCheckGraphSet(sfB, 2);
2166:   PetscCheckSameComm(sfA, 1, sfB, 2);
2167:   PetscAssertPointer(sfBA, 3);
2168:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2169:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2171:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2172:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));

2174:   /* TODO: Check roots of sfB have degree of 1 */
2175:   /* Once we implement it, we can replace the MPI_MAXLOC
2176:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2177:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2178:      the root condition is not meet.
2179:    */
2180:   op = MPI_MAXLOC;
2181: #if defined(PETSC_USE_64BIT_INDICES)
2182:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2183:   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2184:   if (iswin) op = MPI_REPLACE;
2185: #endif

2187:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2188:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2189:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2190:     reorderedRemotePointsA[i].rank  = -1;
2191:     reorderedRemotePointsA[i].index = -1;
2192:   }
2193:   if (localPointsA) {
2194:     for (i = 0; i < numLeavesA; i++) {
2195:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2196:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2197:     }
2198:   } else {
2199:     for (i = 0; i < numLeavesA; i++) {
2200:       if (i > maxleaf || i < minleaf) continue;
2201:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2202:     }
2203:   }

2205:   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2206:   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2207:   for (i = 0; i < numRootsB; i++) {
2208:     remotePointsBA[i].rank  = -1;
2209:     remotePointsBA[i].index = -1;
2210:   }

2212:   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2213:   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2214:   PetscCall(PetscFree(reorderedRemotePointsA));
2215:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2216:     if (remotePointsBA[i].rank == -1) continue;
2217:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2218:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2219:     localPointsBA[numLeavesBA]        = i;
2220:     numLeavesBA++;
2221:   }
2222:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2223:   PetscCall(PetscSFSetFromOptions(*sfBA));
2224:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2225:   PetscFunctionReturn(PETSC_SUCCESS);
2226: }

2228: /*
2229:   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`

2231:   Input Parameter:
2232: . sf - The global `PetscSF`

2234:   Output Parameter:
2235: . out - The local `PetscSF`

2237: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
2238:  */
2239: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2240: {
2241:   MPI_Comm           comm;
2242:   PetscMPIInt        myrank;
2243:   const PetscInt    *ilocal;
2244:   const PetscSFNode *iremote;
2245:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2246:   PetscSFNode       *liremote;
2247:   PetscSF            lsf;

2249:   PetscFunctionBegin;
2251:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2252:   else {
2253:     PetscMPIInt irank;

2255:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2256:     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2257:     PetscCallMPI(MPI_Comm_rank(comm, &myrank));

2259:     /* Find out local edges and build a local SF */
2260:     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2261:     for (i = lnleaves = 0; i < nleaves; i++) {
2262:       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2263:       if (irank == myrank) lnleaves++;
2264:     }
2265:     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2266:     PetscCall(PetscMalloc1(lnleaves, &liremote));

2268:     for (i = j = 0; i < nleaves; i++) {
2269:       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2270:       if (irank == myrank) {
2271:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2272:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2273:         liremote[j].index = iremote[i].index;
2274:         j++;
2275:       }
2276:     }
2277:     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2278:     PetscCall(PetscSFSetFromOptions(lsf));
2279:     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2280:     PetscCall(PetscSFSetUp(lsf));
2281:     *out = lsf;
2282:   }
2283:   PetscFunctionReturn(PETSC_SUCCESS);
2284: }

2286: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2287: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2288: {
2289:   PetscMemType rootmtype, leafmtype;

2291:   PetscFunctionBegin;
2293:   PetscCall(PetscSFSetUp(sf));
2294:   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2295:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2296:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2297:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2298:   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2299:   PetscFunctionReturn(PETSC_SUCCESS);
2300: }

2302: /*@
2303:   PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`

2305:   Input Parameters:
2306: + comm        - the communicator
2307: . nsfs        - the number of input `PetscSF`
2308: . sfs         - the array of input `PetscSF`
2309: . rootMode    - the root mode specifying how roots are handled
2310: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage

2312:   Output Parameter:
2313: . newsf - The resulting `PetscSF`

2315:   Level: advanced

2317:   Notes:
2318:   The communicator of all `PetscSF`s in `sfs` must be comm.

2320:   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.

2322:   The offsets in `leafOffsets` are added to the original leaf indices.

2324:   If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2325:   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.

2327:   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2328:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2330:   All root modes retain the essential connectivity condition.

2332:   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.

2334:   Parameter `rootMode` controls how the input root spaces are combined.
2335:   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2336:   and is also the same in the output `PetscSF`.

2338:   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2339:   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2340:   roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2341:   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2342:   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2343:   the original root MPI processes are ignored.
2344:   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2345:   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
2346:   to keep the load balancing.
2347:   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.

2349:   Example:
2350:   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2351: .vb
2352:   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2353:   for m in {local,global,shared}; do
2354:     mpiexec -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2355:   done
2356: .ve
2357:   we generate two identical `PetscSF`s sf_0 and sf_1,
2358: .vb
2359:   PetscSF Object: sf_0 2 MPI processes
2360:     type: basic
2361:     rank #leaves #roots
2362:     [ 0]       4      2
2363:     [ 1]       4      2
2364:     leaves      roots       roots in global numbering
2365:     ( 0,  0) <- ( 0,  0)  =   0
2366:     ( 0,  1) <- ( 0,  1)  =   1
2367:     ( 0,  2) <- ( 1,  0)  =   2
2368:     ( 0,  3) <- ( 1,  1)  =   3
2369:     ( 1,  0) <- ( 0,  0)  =   0
2370:     ( 1,  1) <- ( 0,  1)  =   1
2371:     ( 1,  2) <- ( 1,  0)  =   2
2372:     ( 1,  3) <- ( 1,  1)  =   3
2373: .ve
2374:   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2375: .vb
2376:   rootMode = local:
2377:   PetscSF Object: result_sf 2 MPI processes
2378:     type: basic
2379:     rank #leaves #roots
2380:     [ 0]       8      4
2381:     [ 1]       8      4
2382:     leaves      roots       roots in global numbering
2383:     ( 0,  0) <- ( 0,  0)  =   0
2384:     ( 0,  1) <- ( 0,  1)  =   1
2385:     ( 0,  2) <- ( 1,  0)  =   4
2386:     ( 0,  3) <- ( 1,  1)  =   5
2387:     ( 0,  4) <- ( 0,  2)  =   2
2388:     ( 0,  5) <- ( 0,  3)  =   3
2389:     ( 0,  6) <- ( 1,  2)  =   6
2390:     ( 0,  7) <- ( 1,  3)  =   7
2391:     ( 1,  0) <- ( 0,  0)  =   0
2392:     ( 1,  1) <- ( 0,  1)  =   1
2393:     ( 1,  2) <- ( 1,  0)  =   4
2394:     ( 1,  3) <- ( 1,  1)  =   5
2395:     ( 1,  4) <- ( 0,  2)  =   2
2396:     ( 1,  5) <- ( 0,  3)  =   3
2397:     ( 1,  6) <- ( 1,  2)  =   6
2398:     ( 1,  7) <- ( 1,  3)  =   7

2400:   rootMode = global:
2401:   PetscSF Object: result_sf 2 MPI processes
2402:     type: basic
2403:     rank #leaves #roots
2404:     [ 0]       8      4
2405:     [ 1]       8      4
2406:     leaves      roots       roots in global numbering
2407:     ( 0,  0) <- ( 0,  0)  =   0
2408:     ( 0,  1) <- ( 0,  1)  =   1
2409:     ( 0,  2) <- ( 0,  2)  =   2
2410:     ( 0,  3) <- ( 0,  3)  =   3
2411:     ( 0,  4) <- ( 1,  0)  =   4
2412:     ( 0,  5) <- ( 1,  1)  =   5
2413:     ( 0,  6) <- ( 1,  2)  =   6
2414:     ( 0,  7) <- ( 1,  3)  =   7
2415:     ( 1,  0) <- ( 0,  0)  =   0
2416:     ( 1,  1) <- ( 0,  1)  =   1
2417:     ( 1,  2) <- ( 0,  2)  =   2
2418:     ( 1,  3) <- ( 0,  3)  =   3
2419:     ( 1,  4) <- ( 1,  0)  =   4
2420:     ( 1,  5) <- ( 1,  1)  =   5
2421:     ( 1,  6) <- ( 1,  2)  =   6
2422:     ( 1,  7) <- ( 1,  3)  =   7

2424:   rootMode = shared:
2425:   PetscSF Object: result_sf 2 MPI processes
2426:     type: basic
2427:     rank #leaves #roots
2428:     [ 0]       8      2
2429:     [ 1]       8      2
2430:     leaves      roots       roots in global numbering
2431:     ( 0,  0) <- ( 0,  0)  =   0
2432:     ( 0,  1) <- ( 0,  1)  =   1
2433:     ( 0,  2) <- ( 1,  0)  =   2
2434:     ( 0,  3) <- ( 1,  1)  =   3
2435:     ( 0,  4) <- ( 0,  0)  =   0
2436:     ( 0,  5) <- ( 0,  1)  =   1
2437:     ( 0,  6) <- ( 1,  0)  =   2
2438:     ( 0,  7) <- ( 1,  1)  =   3
2439:     ( 1,  0) <- ( 0,  0)  =   0
2440:     ( 1,  1) <- ( 0,  1)  =   1
2441:     ( 1,  2) <- ( 1,  0)  =   2
2442:     ( 1,  3) <- ( 1,  1)  =   3
2443:     ( 1,  4) <- ( 0,  0)  =   0
2444:     ( 1,  5) <- ( 0,  1)  =   1
2445:     ( 1,  6) <- ( 1,  0)  =   2
2446:     ( 1,  7) <- ( 1,  1)  =   3
2447: .ve

2449: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2450: @*/
2451: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2452: {
2453:   PetscInt     i, s, nLeaves, nRoots;
2454:   PetscInt    *leafArrayOffsets;
2455:   PetscInt    *ilocal_new;
2456:   PetscSFNode *iremote_new;
2457:   PetscBool    all_ilocal_null = PETSC_FALSE;
2458:   PetscLayout  glayout         = NULL;
2459:   PetscInt    *gremote         = NULL;
2460:   PetscMPIInt  rank, size;

2462:   PetscFunctionBegin;
2463:   if (PetscDefined(USE_DEBUG)) {
2464:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2466:     PetscCall(PetscSFCreate(comm, &dummy));
2468:     PetscAssertPointer(sfs, 3);
2469:     for (i = 0; i < nsfs; i++) {
2471:       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2472:     }
2474:     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2475:     PetscAssertPointer(newsf, 6);
2476:     PetscCall(PetscSFDestroy(&dummy));
2477:   }
2478:   if (!nsfs) {
2479:     PetscCall(PetscSFCreate(comm, newsf));
2480:     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2481:     PetscFunctionReturn(PETSC_SUCCESS);
2482:   }
2483:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2484:   PetscCallMPI(MPI_Comm_size(comm, &size));

2486:   /* Calculate leaf array offsets */
2487:   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2488:   leafArrayOffsets[0] = 0;
2489:   for (s = 0; s < nsfs; s++) {
2490:     PetscInt nl;

2492:     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2493:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2494:   }
2495:   nLeaves = leafArrayOffsets[nsfs];

2497:   /* Calculate number of roots */
2498:   switch (rootMode) {
2499:   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2500:     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2501:     if (PetscDefined(USE_DEBUG)) {
2502:       for (s = 1; s < nsfs; s++) {
2503:         PetscInt nr;

2505:         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2506:         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2507:       }
2508:     }
2509:   } break;
2510:   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2511:     /* Calculate also global layout in this case */
2512:     PetscInt    *nls;
2513:     PetscLayout *lts;
2514:     PetscInt   **inds;
2515:     PetscInt     j;
2516:     PetscInt     rootOffset = 0;

2518:     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2519:     PetscCall(PetscLayoutCreate(comm, &glayout));
2520:     glayout->bs = 1;
2521:     glayout->n  = 0;
2522:     glayout->N  = 0;
2523:     for (s = 0; s < nsfs; s++) {
2524:       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2525:       glayout->n += lts[s]->n;
2526:       glayout->N += lts[s]->N;
2527:     }
2528:     PetscCall(PetscLayoutSetUp(glayout));
2529:     PetscCall(PetscMalloc1(nLeaves, &gremote));
2530:     for (s = 0, j = 0; s < nsfs; s++) {
2531:       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2532:       rootOffset += lts[s]->N;
2533:       PetscCall(PetscLayoutDestroy(&lts[s]));
2534:       PetscCall(PetscFree(inds[s]));
2535:     }
2536:     PetscCall(PetscFree3(lts, nls, inds));
2537:     nRoots = glayout->N;
2538:   } break;
2539:   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2540:     /* nRoots calculated later in this case */
2541:     break;
2542:   default:
2543:     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2544:   }

2546:   if (!leafOffsets) {
2547:     all_ilocal_null = PETSC_TRUE;
2548:     for (s = 0; s < nsfs; s++) {
2549:       const PetscInt *ilocal;

2551:       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2552:       if (ilocal) {
2553:         all_ilocal_null = PETSC_FALSE;
2554:         break;
2555:       }
2556:     }
2557:     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2558:   }

2560:   /* Renumber and concatenate local leaves */
2561:   ilocal_new = NULL;
2562:   if (!all_ilocal_null) {
2563:     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2564:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2565:     for (s = 0; s < nsfs; s++) {
2566:       const PetscInt *ilocal;
2567:       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2568:       PetscInt        i, nleaves_l;

2570:       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2571:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2572:     }
2573:   }

2575:   /* Renumber and concatenate remote roots */
2576:   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2577:     PetscInt rootOffset = 0;

2579:     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2580:     for (i = 0; i < nLeaves; i++) {
2581:       iremote_new[i].rank  = -1;
2582:       iremote_new[i].index = -1;
2583:     }
2584:     for (s = 0; s < nsfs; s++) {
2585:       PetscInt           i, nl, nr;
2586:       PetscSF            tmp_sf;
2587:       const PetscSFNode *iremote;
2588:       PetscSFNode       *tmp_rootdata;
2589:       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);

2591:       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2592:       PetscCall(PetscSFCreate(comm, &tmp_sf));
2593:       /* create helper SF with contiguous leaves */
2594:       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2595:       PetscCall(PetscSFSetUp(tmp_sf));
2596:       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2597:       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2598:         for (i = 0; i < nr; i++) {
2599:           tmp_rootdata[i].index = i + rootOffset;
2600:           tmp_rootdata[i].rank  = rank;
2601:         }
2602:         rootOffset += nr;
2603:       } else {
2604:         for (i = 0; i < nr; i++) {
2605:           tmp_rootdata[i].index = i;
2606:           tmp_rootdata[i].rank  = rank;
2607:         }
2608:       }
2609:       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610:       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2611:       PetscCall(PetscSFDestroy(&tmp_sf));
2612:       PetscCall(PetscFree(tmp_rootdata));
2613:     }
2614:     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above

2616:     /* Build the new SF */
2617:     PetscCall(PetscSFCreate(comm, newsf));
2618:     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2619:   } else {
2620:     /* Build the new SF */
2621:     PetscCall(PetscSFCreate(comm, newsf));
2622:     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2623:   }
2624:   PetscCall(PetscSFSetUp(*newsf));
2625:   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2626:   PetscCall(PetscLayoutDestroy(&glayout));
2627:   PetscCall(PetscFree(gremote));
2628:   PetscCall(PetscFree(leafArrayOffsets));
2629:   PetscFunctionReturn(PETSC_SUCCESS);
2630: }

2632: /*@
2633:   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.

2635:   Collective

2637:   Input Parameters:
2638: + sf       - star forest
2639: . unit     - the data type contained within `rootdata` and `leafdata`
2640: . rootdata - root data that will be used for multiple `PetscSF` communications
2641: - leafdata - leaf data that will be used for multiple `PetscSF` communications

2643:   Level: advanced

2645:   Notes:
2646:   Implementations of `PetscSF` can make optimizations
2647:   for repeated communication using the same memory regions, but these optimizations
2648:   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2649:   The intended pattern is

2651: .vb
2652:   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);

2654:   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2655:   // repeated use of rootdata and leafdata will now be optimized

2657:   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658:   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2659:   // ...
2660:   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2661:   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2662:   // ... (other communications)

2664:   // rootdata and leafdata must be deregistered before freeing
2665:   // skipping this can lead to undefined behavior including
2666:   // deadlocks
2667:   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);

2669:   // it is now safe to free rootdata and leafdata
2670:   PetscFree2(rootdata, leafdata);
2671: .ve

2673:   If you do not register `rootdata` and `leafdata` it will not cause an error,
2674:   but optimizations that reduce the setup time for each communication cannot be
2675:   made.  Currently, the only implementation of `PetscSF` that benefits from
2676:   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2677:   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.

2679: .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2680: @*/
2681: PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2682: {
2683:   PetscFunctionBegin;
2685:   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2686:   PetscFunctionReturn(PETSC_SUCCESS);
2687: }

2689: /*@
2690:   PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.

2692:   Collective

2694:   Input Parameters:
2695: + sf       - star forest
2696: . unit     - the data type contained within the `rootdata` and `leafdata`
2697: . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2698: - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`

2700:   Level: advanced

2702:   Note:
2703:   See `PetscSFRegisterPersistent()` for when and how to use this function.

2705: .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2706: @*/
2707: PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2708: {
2709:   PetscFunctionBegin;
2711:   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2712:   PetscFunctionReturn(PETSC_SUCCESS);
2713: }

2715: PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2716: {
2717:   MPI_Aint lb, lb_true, bytes, bytes_true;

2719:   PetscFunctionBegin;
2720:   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2721:   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2722:   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2723:   *size = bytes;
2724:   PetscFunctionReturn(PETSC_SUCCESS);
2725: }