Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petsc/private/hashmapi.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #include <petscdevice_cuda.h>
  9: #endif

 11: #if defined(PETSC_HAVE_HIP)
 12:   #include <hip/hip_runtime.h>
 13: #endif

 15: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 16: extern void PetscSFCheckGraphSet(PetscSF, int);
 17: #else
 18:   #if defined(PETSC_USE_DEBUG)
 19:     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
 20:   #else
 21:     #define PetscSFCheckGraphSet(sf, arg) \
 22:       do { \
 23:       } while (0)
 24:   #endif
 25: #endif

 27: const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
 28: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};

 30: /*@
 31:   PetscSFCreate - create a star forest communication context

 33:   Collective

 35:   Input Parameter:
 36: . comm - communicator on which the star forest will operate

 38:   Output Parameter:
 39: . sf - new star forest context

 41:   Options Database Key:
 42: + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
 43: . -sf_type window                - Use MPI-3 one-sided window for communication
 44: . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
 45: - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)

 47:   Level: intermediate

 49:   Note:
 50:   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
 51:   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
 52:   `SF`s are optimized and they have better performance than the general `SF`s.

 54: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 55: @*/
 56: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 57: {
 58:   PetscSF b;

 60:   PetscFunctionBegin;
 61:   PetscAssertPointer(sf, 2);
 62:   PetscCall(PetscSFInitializePackage());

 64:   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
 65:   b->nroots    = -1;
 66:   b->nleaves   = -1;
 67:   b->minleaf   = PETSC_INT_MAX;
 68:   b->maxleaf   = PETSC_INT_MIN;
 69:   b->nranks    = -1;
 70:   b->rankorder = PETSC_TRUE;
 71:   b->ingroup   = MPI_GROUP_NULL;
 72:   b->outgroup  = MPI_GROUP_NULL;
 73:   b->graphset  = PETSC_FALSE;
 74: #if defined(PETSC_HAVE_DEVICE)
 75:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 76:   b->use_stream_aware_mpi = PETSC_FALSE;
 77:   b->unknown_input_stream = PETSC_FALSE;
 78:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 79:   b->backend = PETSCSF_BACKEND_KOKKOS;
 80:   #elif defined(PETSC_HAVE_CUDA)
 81:   b->backend = PETSCSF_BACKEND_CUDA;
 82:   #elif defined(PETSC_HAVE_HIP)
 83:   b->backend = PETSCSF_BACKEND_HIP;
 84:   #endif

 86:   #if defined(PETSC_HAVE_NVSHMEM)
 87:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 88:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 89:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
 90:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
 91:   #endif
 92: #endif
 93:   b->vscat.from_n = -1;
 94:   b->vscat.to_n   = -1;
 95:   b->vscat.unit   = MPIU_SCALAR;
 96:   *sf             = b;
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: /*@
101:   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

103:   Collective

105:   Input Parameter:
106: . sf - star forest

108:   Level: advanced

110: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111: @*/
112: PetscErrorCode PetscSFReset(PetscSF sf)
113: {
114:   PetscFunctionBegin;
116:   PetscTryTypeMethod(sf, Reset);
117:   PetscCall(PetscSFDestroy(&sf->rankssf));

119:   sf->nroots   = -1;
120:   sf->nleaves  = -1;
121:   sf->minleaf  = PETSC_INT_MAX;
122:   sf->maxleaf  = PETSC_INT_MIN;
123:   sf->mine     = NULL;
124:   sf->remote   = NULL;
125:   sf->graphset = PETSC_FALSE;
126:   PetscCall(PetscFree(sf->mine_alloc));
127:   PetscCall(PetscFree(sf->remote_alloc));
128:   sf->nranks = -1;
129:   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130:   sf->degreeknown = PETSC_FALSE;
131:   PetscCall(PetscFree(sf->degree));
132:   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133:   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));

135:   if (sf->multi) sf->multi->multi = NULL;
136:   PetscCall(PetscSFDestroy(&sf->multi));

138:   PetscCall(PetscLayoutDestroy(&sf->map));

140: #if defined(PETSC_HAVE_DEVICE)
141:   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
142: #endif

144:   sf->setupcalled = PETSC_FALSE;
145:   PetscFunctionReturn(PETSC_SUCCESS);
146: }

148: /*@
149:   PetscSFSetType - Set the `PetscSF` communication implementation

151:   Collective

153:   Input Parameters:
154: + sf   - the `PetscSF` context
155: - type - a known method
156: .vb
157:     PETSCSFWINDOW - MPI-2/3 one-sided
158:     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159: .ve

161:   Options Database Key:
162: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods

164:   Level: intermediate

166:   Notes:
167:   See `PetscSFType` for possible values

169: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
170: @*/
171: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172: {
173:   PetscBool match;
174:   PetscErrorCode (*r)(PetscSF);

176:   PetscFunctionBegin;
178:   PetscAssertPointer(type, 2);

180:   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
181:   if (match) PetscFunctionReturn(PETSC_SUCCESS);

183:   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
184:   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
185:   /* Destroy the previous PetscSF implementation context */
186:   PetscTryTypeMethod(sf, Destroy);
187:   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
188:   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
189:   PetscCall((*r)(sf));
190:   PetscFunctionReturn(PETSC_SUCCESS);
191: }

193: /*@
194:   PetscSFGetType - Get the `PetscSF` communication implementation

196:   Not Collective

198:   Input Parameter:
199: . sf - the `PetscSF` context

201:   Output Parameter:
202: . type - the `PetscSF` type name

204:   Level: intermediate

206: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
207: @*/
208: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209: {
210:   PetscFunctionBegin;
212:   PetscAssertPointer(type, 2);
213:   *type = ((PetscObject)sf)->type_name;
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: /*@
218:   PetscSFDestroy - destroy a star forest

220:   Collective

222:   Input Parameter:
223: . sf - address of star forest

225:   Level: intermediate

227: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
228: @*/
229: PetscErrorCode PetscSFDestroy(PetscSF *sf)
230: {
231:   PetscFunctionBegin;
232:   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
234:   if (--((PetscObject)*sf)->refct > 0) {
235:     *sf = NULL;
236:     PetscFunctionReturn(PETSC_SUCCESS);
237:   }
238:   PetscCall(PetscSFReset(*sf));
239:   PetscTryTypeMethod(*sf, Destroy);
240:   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
241:   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243:   if ((*sf)->use_stream_aware_mpi) {
244:     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245:     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246:   }
247: #endif
248:   PetscCall(PetscHeaderDestroy(sf));
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }

252: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253: {
254:   PetscInt           i, nleaves;
255:   PetscMPIInt        size;
256:   const PetscInt    *ilocal;
257:   const PetscSFNode *iremote;

259:   PetscFunctionBegin;
260:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
261:   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
262:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263:   for (i = 0; i < nleaves; i++) {
264:     const PetscInt rank   = iremote[i].rank;
265:     const PetscInt remote = iremote[i].index;
266:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267:     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
268:     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
269:     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270:   }
271:   PetscFunctionReturn(PETSC_SUCCESS);
272: }

274: /*@
275:   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication

277:   Collective

279:   Input Parameter:
280: . sf - star forest communication object

282:   Level: beginner

284: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
285: @*/
286: PetscErrorCode PetscSFSetUp(PetscSF sf)
287: {
288:   PetscFunctionBegin;
290:   PetscSFCheckGraphSet(sf, 1);
291:   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
292:   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
293:   PetscCall(PetscSFCheckGraphValid_Private(sf));
294:   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295:   PetscTryTypeMethod(sf, SetUp);
296: #if defined(PETSC_HAVE_CUDA)
297:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
298:     sf->ops->Malloc = PetscSFMalloc_CUDA;
299:     sf->ops->Free   = PetscSFFree_CUDA;
300:   }
301: #endif
302: #if defined(PETSC_HAVE_HIP)
303:   if (sf->backend == PETSCSF_BACKEND_HIP) {
304:     sf->ops->Malloc = PetscSFMalloc_HIP;
305:     sf->ops->Free   = PetscSFFree_HIP;
306:   }
307: #endif

309: #if defined(PETSC_HAVE_KOKKOS)
310:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
311:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
312:     sf->ops->Free   = PetscSFFree_Kokkos;
313:   }
314: #endif
315:   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
316:   sf->setupcalled = PETSC_TRUE;
317:   PetscFunctionReturn(PETSC_SUCCESS);
318: }

320: /*@
321:   PetscSFSetFromOptions - set `PetscSF` options using the options database

323:   Logically Collective

325:   Input Parameter:
326: . sf - star forest

328:   Options Database Keys:
329: + -sf_type                      - implementation type, see `PetscSFSetType()`
330: . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
331: . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
332:                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
333:                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
334: . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
335:                                   If true, this option only works with `-use_gpu_aware_mpi 1`.

337: - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
338:                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
339:                                   the only available is kokkos.

341:   Level: intermediate

343: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
344: @*/
345: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346: {
347:   PetscSFType deft;
348:   char        type[256];
349:   PetscBool   flg;

351:   PetscFunctionBegin;
353:   PetscObjectOptionsBegin((PetscObject)sf);
354:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
355:   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
356:   PetscCall(PetscSFSetType(sf, flg ? type : deft));
357:   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358:   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
359: #if defined(PETSC_HAVE_DEVICE)
360:   {
361:     char      backendstr[32] = {0};
362:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363:     /* Change the defaults set in PetscSFCreate() with command line options */
364:     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365:     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366:     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367:     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368:     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369:     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374:     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375:   #elif defined(PETSC_HAVE_KOKKOS)
376:     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377:   #endif

379:   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380:     if (sf->use_stream_aware_mpi) {
381:       MPI_Info info;

383:       PetscCallMPI(MPI_Info_create(&info));
384:       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385:       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386:       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387:       PetscCallMPI(MPI_Info_free(&info));
388:       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389:     }
390:   #endif
391:   }
392: #endif
393:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394:   PetscOptionsEnd();
395:   PetscFunctionReturn(PETSC_SUCCESS);
396: }

398: /*@
399:   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

401:   Logically Collective

403:   Input Parameters:
404: + sf  - star forest
405: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)

407:   Level: advanced

409: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410: @*/
411: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412: {
413:   PetscFunctionBegin;
416:   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417:   sf->rankorder = flg;
418:   PetscFunctionReturn(PETSC_SUCCESS);
419: }

421: /*@
422:   PetscSFSetGraph - Set a parallel star forest

424:   Collective

426:   Input Parameters:
427: + sf         - star forest
428: . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429: . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
430: . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431:                during setup in debug mode)
432: . localmode  - copy mode for `ilocal`
433: . iremote    - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
434:                (locations must be >= 0, enforced during setup in debug mode)
435: - remotemode - copy mode for `iremote`

437:   Level: intermediate

439:   Notes:
440:   Leaf indices in `ilocal` must be unique, otherwise an error occurs.

442:   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443:   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444:   PETSc might modify the respective array;
445:   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446:   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

448:   Fortran Notes:
449:   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.

451:   Developer Notes:
452:   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453:   This also allows to compare leaf sets of two `PetscSF`s easily.

455: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456: @*/
457: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
458: {
459:   PetscBool unique, contiguous;

461:   PetscFunctionBegin;
463:   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464:   if (nleaves > 0) PetscAssertPointer(iremote, 6);
465:   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466:   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469:   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470:   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);

472:   if (sf->nroots >= 0) { /* Reset only if graph already set */
473:     PetscCall(PetscSFReset(sf));
474:   }

476:   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
477:   if (PetscDefined(USE_DEBUG)) {
478:     PetscMPIInt size;

480:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
481:     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
482:   }

484:   sf->nroots  = nroots;
485:   sf->nleaves = nleaves;

487:   if (localmode == PETSC_COPY_VALUES && ilocal) {
488:     PetscInt *tlocal = NULL;

490:     PetscCall(PetscMalloc1(nleaves, &tlocal));
491:     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
492:     ilocal = tlocal;
493:   }
494:   if (remotemode == PETSC_COPY_VALUES) {
495:     PetscSFNode *tremote = NULL;

497:     PetscCall(PetscMalloc1(nleaves, &tremote));
498:     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
499:     iremote = tremote;
500:   }

502:   if (nleaves && ilocal) {
503:     PetscSFNode work;

505:     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
506:     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
507:     unique = PetscNot(unique);
508:     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
509:     sf->minleaf = ilocal[0];
510:     sf->maxleaf = ilocal[nleaves - 1];
511:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
512:   } else {
513:     sf->minleaf = 0;
514:     sf->maxleaf = nleaves - 1;
515:     unique      = PETSC_TRUE;
516:     contiguous  = PETSC_TRUE;
517:   }

519:   if (contiguous) {
520:     if (localmode == PETSC_USE_POINTER) {
521:       ilocal = NULL;
522:     } else {
523:       PetscCall(PetscFree(ilocal));
524:     }
525:   }
526:   sf->mine = ilocal;
527:   if (localmode == PETSC_USE_POINTER) {
528:     sf->mine_alloc = NULL;
529:   } else {
530:     sf->mine_alloc = ilocal;
531:   }
532:   if (PetscDefined(USE_DEBUG)) {
533:     PetscMPIInt size;

535:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
536:     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
537:   }
538:   sf->remote = iremote;
539:   if (remotemode == PETSC_USE_POINTER) {
540:     sf->remote_alloc = NULL;
541:   } else {
542:     sf->remote_alloc = iremote;
543:   }
544:   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
545:   sf->graphset = PETSC_TRUE;
546:   PetscFunctionReturn(PETSC_SUCCESS);
547: }

549: /*@
550:   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern

552:   Collective

554:   Input Parameters:
555: + sf      - The `PetscSF`
556: . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
557: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`

559:   Level: intermediate

561:   Notes:
562:   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
563:   `n` and `N` are the local and global sizes of `x` respectively.

565:   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
566:   sequential vectors `y` on all MPI processes.

568:   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
569:   sequential vector `y` on rank 0.

571:   In above cases, entries of `x` are roots and entries of `y` are leaves.

573:   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
574:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
575:   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
576:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
577:   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.

579:   In this case, roots and leaves are symmetric.

581: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
582:  @*/
583: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
584: {
585:   MPI_Comm    comm;
586:   PetscInt    n, N, res[2];
587:   PetscMPIInt rank, size;
588:   PetscSFType type;

590:   PetscFunctionBegin;
592:   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
593:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
594:   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
595:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
596:   PetscCallMPI(MPI_Comm_size(comm, &size));

598:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
599:     PetscInt sizei = size;

601:     type = PETSCSFALLTOALL;
602:     PetscCall(PetscLayoutCreate(comm, &sf->map));
603:     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
604:     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
605:     PetscCall(PetscLayoutSetUp(sf->map));
606:   } else {
607:     PetscCall(PetscLayoutGetLocalSize(map, &n));
608:     PetscCall(PetscLayoutGetSize(map, &N));
609:     res[0] = n;
610:     res[1] = -n;
611:     /* Check if n are same over all ranks so that we can optimize it */
612:     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
613:     if (res[0] == -res[1]) { /* same n */
614:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
615:     } else {
616:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
617:     }
618:     PetscCall(PetscLayoutReference(map, &sf->map));
619:   }
620:   PetscCall(PetscSFSetType(sf, type));

622:   sf->pattern = pattern;
623:   sf->mine    = NULL; /* Contiguous */

625:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
626:      Also set other easy stuff.
627:    */
628:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
629:     sf->nleaves = N;
630:     sf->nroots  = n;
631:     sf->nranks  = size;
632:     sf->minleaf = 0;
633:     sf->maxleaf = N - 1;
634:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
635:     sf->nleaves = rank ? 0 : N;
636:     sf->nroots  = n;
637:     sf->nranks  = rank ? 0 : size;
638:     sf->minleaf = 0;
639:     sf->maxleaf = rank ? -1 : N - 1;
640:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
641:     sf->nleaves = size;
642:     sf->nroots  = size;
643:     sf->nranks  = size;
644:     sf->minleaf = 0;
645:     sf->maxleaf = size - 1;
646:   }
647:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
648:   sf->graphset = PETSC_TRUE;
649:   PetscFunctionReturn(PETSC_SUCCESS);
650: }

652: /*@
653:   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map

655:   Collective

657:   Input Parameter:
658: . sf - star forest to invert

660:   Output Parameter:
661: . isf - inverse of `sf`

663:   Level: advanced

665:   Notes:
666:   All roots must have degree 1.

668:   The local space may be a permutation, but cannot be sparse.

670: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
671: @*/
672: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
673: {
674:   PetscMPIInt     rank;
675:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
676:   const PetscInt *ilocal;
677:   PetscSFNode    *roots, *leaves;

679:   PetscFunctionBegin;
681:   PetscSFCheckGraphSet(sf, 1);
682:   PetscAssertPointer(isf, 2);

684:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
685:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

687:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
688:   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
689:   for (i = 0; i < maxlocal; i++) {
690:     leaves[i].rank  = rank;
691:     leaves[i].index = i;
692:   }
693:   for (i = 0; i < nroots; i++) {
694:     roots[i].rank  = -1;
695:     roots[i].index = -1;
696:   }
697:   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
698:   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));

700:   /* Check whether our leaves are sparse */
701:   for (i = 0, count = 0; i < nroots; i++)
702:     if (roots[i].rank >= 0) count++;
703:   if (count == nroots) newilocal = NULL;
704:   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
705:     for (i = 0, count = 0; i < nroots; i++) {
706:       if (roots[i].rank >= 0) {
707:         newilocal[count]   = i;
708:         roots[count].rank  = roots[i].rank;
709:         roots[count].index = roots[i].index;
710:         count++;
711:       }
712:     }
713:   }

715:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
716:   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
717:   PetscCall(PetscFree2(roots, leaves));
718:   PetscFunctionReturn(PETSC_SUCCESS);
719: }

721: /*@
722:   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph

724:   Collective

726:   Input Parameters:
727: + sf  - communication object to duplicate
728: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)

730:   Output Parameter:
731: . newsf - new communication object

733:   Level: beginner

735: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
736: @*/
737: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
738: {
739:   PetscSFType  type;
740:   MPI_Datatype dtype = MPIU_SCALAR;

742:   PetscFunctionBegin;
745:   PetscAssertPointer(newsf, 3);
746:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
747:   PetscCall(PetscSFGetType(sf, &type));
748:   if (type) PetscCall(PetscSFSetType(*newsf, type));
749:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
750:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
751:     PetscSFCheckGraphSet(sf, 1);
752:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
753:       PetscInt           nroots, nleaves;
754:       const PetscInt    *ilocal;
755:       const PetscSFNode *iremote;
756:       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
757:       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
758:     } else {
759:       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
760:     }
761:   }
762:   /* Since oldtype is committed, so is newtype, according to MPI */
763:   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
764:   (*newsf)->vscat.bs     = sf->vscat.bs;
765:   (*newsf)->vscat.unit   = dtype;
766:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
767:   (*newsf)->vscat.from_n = sf->vscat.from_n;
768:   /* Do not copy lsf. Build it on demand since it is rarely used */

770: #if defined(PETSC_HAVE_DEVICE)
771:   (*newsf)->backend              = sf->backend;
772:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
773:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
774:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
775: #endif
776:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
777:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
778:   PetscFunctionReturn(PETSC_SUCCESS);
779: }

781: /*@C
782:   PetscSFGetGraph - Get the graph specifying a parallel star forest

784:   Not Collective

786:   Input Parameter:
787: . sf - star forest

789:   Output Parameters:
790: + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
791: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
792: . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
793: - iremote - remote locations of root vertices for each leaf on the current process

795:   Level: intermediate

797:   Notes:
798:   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet

800:   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`

802:   Fortran Note:
803:   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed

805: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
806: @*/
807: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
808: {
809:   PetscFunctionBegin;
811:   if (sf->ops->GetGraph) {
812:     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
813:   } else {
814:     if (nroots) *nroots = sf->nroots;
815:     if (nleaves) *nleaves = sf->nleaves;
816:     if (ilocal) *ilocal = sf->mine;
817:     if (iremote) *iremote = sf->remote;
818:   }
819:   PetscFunctionReturn(PETSC_SUCCESS);
820: }

822: /*@
823:   PetscSFGetLeafRange - Get the active leaf ranges

825:   Not Collective

827:   Input Parameter:
828: . sf - star forest

830:   Output Parameters:
831: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
832: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.

834:   Level: developer

836: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
837: @*/
838: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
839: {
840:   PetscFunctionBegin;
842:   PetscSFCheckGraphSet(sf, 1);
843:   if (minleaf) *minleaf = sf->minleaf;
844:   if (maxleaf) *maxleaf = sf->maxleaf;
845:   PetscFunctionReturn(PETSC_SUCCESS);
846: }

848: /*@
849:   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database

851:   Collective

853:   Input Parameters:
854: + A    - the star forest
855: . obj  - Optional object that provides the prefix for the option names
856: - name - command line option

858:   Level: intermediate

860:   Note:
861:   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`

863: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
864: @*/
865: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
866: {
867:   PetscFunctionBegin;
869:   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
870:   PetscFunctionReturn(PETSC_SUCCESS);
871: }

873: /*@
874:   PetscSFView - view a star forest

876:   Collective

878:   Input Parameters:
879: + sf     - star forest
880: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`

882:   Level: beginner

884: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
885: @*/
886: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
887: {
888:   PetscBool         iascii;
889:   PetscViewerFormat format;

891:   PetscFunctionBegin;
893:   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
895:   PetscCheckSameComm(sf, 1, viewer, 2);
896:   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
897:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
898:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
899:     PetscMPIInt rank;
900:     PetscInt    j;

902:     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
903:     PetscCall(PetscViewerASCIIPushTab(viewer));
904:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
905:       if (!sf->graphset) {
906:         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
907:         PetscCall(PetscViewerASCIIPopTab(viewer));
908:         PetscFunctionReturn(PETSC_SUCCESS);
909:       }
910:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
911:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
912:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
913:       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
914:       PetscCall(PetscViewerFlush(viewer));
915:       PetscCall(PetscViewerGetFormat(viewer, &format));
916:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
917:         PetscMPIInt *tmpranks, *perm;

919:         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
920:         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
921:         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
922:         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
923:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
924:         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
925:           PetscMPIInt i = perm[ii];

927:           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
928:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
929:         }
930:         PetscCall(PetscFree2(tmpranks, perm));
931:       }
932:       PetscCall(PetscViewerFlush(viewer));
933:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
934:     }
935:     PetscCall(PetscViewerASCIIPopTab(viewer));
936:   }
937:   PetscTryTypeMethod(sf, View, viewer);
938:   PetscFunctionReturn(PETSC_SUCCESS);
939: }

941: /*@C
942:   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

944:   Not Collective

946:   Input Parameter:
947: . sf - star forest

949:   Output Parameters:
950: + nranks  - number of ranks referenced by local part
951: . ranks   - [`nranks`] array of ranks
952: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
953: . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
954: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`

956:   Level: developer

958: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
959: @*/
960: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
961: {
962:   PetscFunctionBegin;
964:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
965:   if (sf->ops->GetRootRanks) {
966:     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
967:   } else {
968:     /* The generic implementation */
969:     if (nranks) *nranks = sf->nranks;
970:     if (ranks) *ranks = sf->ranks;
971:     if (roffset) *roffset = sf->roffset;
972:     if (rmine) *rmine = sf->rmine;
973:     if (rremote) *rremote = sf->rremote;
974:   }
975:   PetscFunctionReturn(PETSC_SUCCESS);
976: }

978: /*@C
979:   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

981:   Not Collective

983:   Input Parameter:
984: . sf - star forest

986:   Output Parameters:
987: + niranks  - number of leaf ranks referencing roots on this process
988: . iranks   - [`niranks`] array of ranks
989: . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
990: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank

992:   Level: developer

994: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
995: @*/
996: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
997: {
998:   PetscFunctionBegin;
1000:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1001:   if (sf->ops->GetLeafRanks) {
1002:     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1003:   } else {
1004:     PetscSFType type;
1005:     PetscCall(PetscSFGetType(sf, &type));
1006:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1007:   }
1008:   PetscFunctionReturn(PETSC_SUCCESS);
1009: }

1011: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1012: {
1013:   PetscInt i;
1014:   for (i = 0; i < n; i++) {
1015:     if (needle == list[i]) return PETSC_TRUE;
1016:   }
1017:   return PETSC_FALSE;
1018: }

1020: /*@C
1021:   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.

1023:   Collective

1025:   Input Parameters:
1026: + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1027: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)

1029:   Level: developer

1031: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1032: @*/
1033: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1034: {
1035:   PetscHMapI    table;
1036:   PetscHashIter pos;
1037:   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1038:   PetscInt     *rcount;
1039:   PetscInt      irank, sfnrank, ranksi;
1040:   PetscMPIInt   i, orank = -1;

1042:   PetscFunctionBegin;
1044:   PetscSFCheckGraphSet(sf, 1);
1045:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1046:   PetscCall(PetscHMapICreateWithSize(10, &table));
1047:   for (i = 0; i < sf->nleaves; i++) {
1048:     /* Log 1-based rank */
1049:     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1050:   }
1051:   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1052:   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1053:   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1054:   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1055:   PetscHashIterBegin(table, pos);
1056:   for (i = 0; i < sf->nranks; i++) {
1057:     PetscHashIterGetKey(table, pos, ranksi);
1058:     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1059:     PetscHashIterGetVal(table, pos, rcount[i]);
1060:     PetscHashIterNext(table, pos);
1061:     ranks[i]--; /* Convert back to 0-based */
1062:   }
1063:   PetscCall(PetscHMapIDestroy(&table));

1065:   /* We expect that dgroup is reliably "small" while nranks could be large */
1066:   {
1067:     MPI_Group    group = MPI_GROUP_NULL;
1068:     PetscMPIInt *dgroupranks;

1070:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1071:     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1072:     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1073:     PetscCall(PetscMalloc1(groupsize, &groupranks));
1074:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1075:     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1076:     PetscCallMPI(MPI_Group_free(&group));
1077:     PetscCall(PetscFree(dgroupranks));
1078:   }

1080:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1081:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1082:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1083:       if (InList(ranks[i], groupsize, groupranks)) break;
1084:     }
1085:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1086:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1087:     }
1088:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1089:       PetscMPIInt tmprank;
1090:       PetscInt    tmpcount;

1092:       tmprank             = ranks[i];
1093:       tmpcount            = rcount[i];
1094:       ranks[i]            = ranks[sf->ndranks];
1095:       rcount[i]           = rcount[sf->ndranks];
1096:       ranks[sf->ndranks]  = tmprank;
1097:       rcount[sf->ndranks] = tmpcount;
1098:       sf->ndranks++;
1099:     }
1100:   }
1101:   PetscCall(PetscFree(groupranks));
1102:   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1103:   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1104:   sf->roffset[0] = 0;
1105:   for (i = 0; i < sf->nranks; i++) {
1106:     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1107:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1108:     rcount[i]          = 0;
1109:   }
1110:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1111:     /* short circuit */
1112:     if (orank != sf->remote[i].rank) {
1113:       /* Search for index of iremote[i].rank in sf->ranks */
1114:       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1115:       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1116:       if (irank < 0) {
1117:         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1118:         if (irank >= 0) irank += sf->ndranks;
1119:       }
1120:     }
1121:     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1122:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1123:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1124:     rcount[irank]++;
1125:   }
1126:   PetscCall(PetscFree2(rcount, ranks));
1127:   PetscFunctionReturn(PETSC_SUCCESS);
1128: }

1130: /*@C
1131:   PetscSFGetGroups - gets incoming and outgoing process groups

1133:   Collective

1135:   Input Parameter:
1136: . sf - star forest

1138:   Output Parameters:
1139: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1140: - outgoing - group of destination processes for outgoing edges (roots that I reference)

1142:   Level: developer

1144: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1145: @*/
1146: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1147: {
1148:   MPI_Group group = MPI_GROUP_NULL;

1150:   PetscFunctionBegin;
1151:   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1152:   if (sf->ingroup == MPI_GROUP_NULL) {
1153:     PetscInt        i;
1154:     const PetscInt *indegree;
1155:     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1156:     PetscSFNode    *remote;
1157:     PetscSF         bgcount;

1159:     /* Compute the number of incoming ranks */
1160:     PetscCall(PetscMalloc1(sf->nranks, &remote));
1161:     for (i = 0; i < sf->nranks; i++) {
1162:       remote[i].rank  = sf->ranks[i];
1163:       remote[i].index = 0;
1164:     }
1165:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1166:     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1167:     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1168:     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1169:     /* Enumerate the incoming ranks */
1170:     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1171:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1172:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1173:     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1174:     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1175:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1176:     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1177:     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1178:     PetscCallMPI(MPI_Group_free(&group));
1179:     PetscCall(PetscFree2(inranks, outranks));
1180:     PetscCall(PetscSFDestroy(&bgcount));
1181:   }
1182:   *incoming = sf->ingroup;

1184:   if (sf->outgroup == MPI_GROUP_NULL) {
1185:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1186:     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1187:     PetscCallMPI(MPI_Group_free(&group));
1188:   }
1189:   *outgoing = sf->outgroup;
1190:   PetscFunctionReturn(PETSC_SUCCESS);
1191: }

1193: /*@
1194:   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks

1196:   Collective

1198:   Input Parameter:
1199: . sf - star forest

1201:   Output Parameter:
1202: . rsf - the star forest with a single root per process to perform communications

1204:   Level: developer

1206: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1207: @*/
1208: PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1209: {
1210:   PetscFunctionBegin;
1212:   PetscAssertPointer(rsf, 2);
1213:   if (!sf->rankssf) {
1214:     PetscSFNode       *rremotes;
1215:     const PetscMPIInt *ranks;
1216:     PetscMPIInt        nranks;

1218:     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1219:     PetscCall(PetscMalloc1(nranks, &rremotes));
1220:     for (PetscInt i = 0; i < nranks; i++) {
1221:       rremotes[i].rank  = ranks[i];
1222:       rremotes[i].index = 0;
1223:     }
1224:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1225:     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1226:   }
1227:   *rsf = sf->rankssf;
1228:   PetscFunctionReturn(PETSC_SUCCESS);
1229: }

1231: /*@
1232:   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters

1234:   Collective

1236:   Input Parameter:
1237: . sf - star forest that may contain roots with 0 or with more than 1 vertex

1239:   Output Parameter:
1240: . multi - star forest with split roots, such that each root has degree exactly 1

1242:   Level: developer

1244:   Note:
1245:   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1246:   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1247:   edge, it is a candidate for future optimization that might involve its removal.

1249: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1250: @*/
1251: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1252: {
1253:   PetscFunctionBegin;
1255:   PetscAssertPointer(multi, 2);
1256:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1257:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1258:     *multi           = sf->multi;
1259:     sf->multi->multi = sf->multi;
1260:     PetscFunctionReturn(PETSC_SUCCESS);
1261:   }
1262:   if (!sf->multi) {
1263:     const PetscInt *indegree;
1264:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1265:     PetscSFNode    *remote;
1266:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1267:     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1268:     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1269:     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1270:     inoffset[0] = 0;
1271:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1272:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1273:     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1274:     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1275:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1276:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1277:       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1278:     }
1279:     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1280:     for (i = 0; i < sf->nleaves; i++) {
1281:       remote[i].rank  = sf->remote[i].rank;
1282:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1283:     }
1284:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1285:     sf->multi->multi = sf->multi;
1286:     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1287:     if (sf->rankorder) { /* Sort the ranks */
1288:       PetscMPIInt  rank;
1289:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1290:       PetscSFNode *newremote;
1291:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1292:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1293:       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1294:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1295:       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1296:       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1297:       /* Sort the incoming ranks at each vertex, build the inverse map */
1298:       for (i = 0; i < sf->nroots; i++) {
1299:         PetscInt j;
1300:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1301:         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1302:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1303:       }
1304:       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1305:       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1306:       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1307:       for (i = 0; i < sf->nleaves; i++) {
1308:         newremote[i].rank  = sf->remote[i].rank;
1309:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1310:       }
1311:       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1312:       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1313:     }
1314:     PetscCall(PetscFree3(inoffset, outones, outoffset));
1315:   }
1316:   *multi = sf->multi;
1317:   PetscFunctionReturn(PETSC_SUCCESS);
1318: }

1320: /*@C
1321:   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices

1323:   Collective

1325:   Input Parameters:
1326: + sf        - original star forest
1327: . nselected - number of selected roots on this process
1328: - selected  - indices of the selected roots on this process

1330:   Output Parameter:
1331: . esf - new star forest

1333:   Level: advanced

1335:   Note:
1336:   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1337:   be done by calling PetscSFGetGraph().

1339: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1340: @*/
1341: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1342: {
1343:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1344:   const PetscInt    *ilocal;
1345:   signed char       *rootdata, *leafdata, *leafmem;
1346:   const PetscSFNode *iremote;
1347:   PetscSFNode       *new_iremote;
1348:   MPI_Comm           comm;

1350:   PetscFunctionBegin;
1352:   PetscSFCheckGraphSet(sf, 1);
1353:   if (nselected) PetscAssertPointer(selected, 3);
1354:   PetscAssertPointer(esf, 4);

1356:   PetscCall(PetscSFSetUp(sf));
1357:   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1358:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1359:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));

1361:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1362:     PetscBool dups;
1363:     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1364:     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1365:     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1366:   }

1368:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1369:   else {
1370:     /* A generic version of creating embedded sf */
1371:     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1372:     maxlocal = maxleaf - minleaf + 1;
1373:     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1374:     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1375:     /* Tag selected roots and bcast to leaves */
1376:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1377:     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1378:     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));

1380:     /* Build esf with leaves that are still connected */
1381:     esf_nleaves = 0;
1382:     for (i = 0; i < nleaves; i++) {
1383:       j = ilocal ? ilocal[i] : i;
1384:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1385:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1386:       */
1387:       esf_nleaves += (leafdata[j] ? 1 : 0);
1388:     }
1389:     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1390:     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1391:     for (i = n = 0; i < nleaves; i++) {
1392:       j = ilocal ? ilocal[i] : i;
1393:       if (leafdata[j]) {
1394:         new_ilocal[n]        = j;
1395:         new_iremote[n].rank  = iremote[i].rank;
1396:         new_iremote[n].index = iremote[i].index;
1397:         ++n;
1398:       }
1399:     }
1400:     PetscCall(PetscSFCreate(comm, esf));
1401:     PetscCall(PetscSFSetFromOptions(*esf));
1402:     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1403:     PetscCall(PetscFree2(rootdata, leafmem));
1404:   }
1405:   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1406:   PetscFunctionReturn(PETSC_SUCCESS);
1407: }

1409: /*@C
1410:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices

1412:   Collective

1414:   Input Parameters:
1415: + sf        - original star forest
1416: . nselected - number of selected leaves on this process
1417: - selected  - indices of the selected leaves on this process

1419:   Output Parameter:
1420: . newsf - new star forest

1422:   Level: advanced

1424: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1425: @*/
1426: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1427: {
1428:   const PetscSFNode *iremote;
1429:   PetscSFNode       *new_iremote;
1430:   const PetscInt    *ilocal;
1431:   PetscInt           i, nroots, *leaves, *new_ilocal;
1432:   MPI_Comm           comm;

1434:   PetscFunctionBegin;
1436:   PetscSFCheckGraphSet(sf, 1);
1437:   if (nselected) PetscAssertPointer(selected, 3);
1438:   PetscAssertPointer(newsf, 4);

1440:   /* Uniq selected[] and put results in leaves[] */
1441:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1442:   PetscCall(PetscMalloc1(nselected, &leaves));
1443:   PetscCall(PetscArraycpy(leaves, selected, nselected));
1444:   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1445:   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);

1447:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1448:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1449:   else {
1450:     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1451:     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1452:     PetscCall(PetscMalloc1(nselected, &new_iremote));
1453:     for (i = 0; i < nselected; ++i) {
1454:       const PetscInt l     = leaves[i];
1455:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1456:       new_iremote[i].rank  = iremote[l].rank;
1457:       new_iremote[i].index = iremote[l].index;
1458:     }
1459:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1460:     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1461:   }
1462:   PetscCall(PetscFree(leaves));
1463:   PetscFunctionReturn(PETSC_SUCCESS);
1464: }

1466: /*@C
1467:   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`

1469:   Collective

1471:   Input Parameters:
1472: + sf       - star forest on which to communicate
1473: . unit     - data type associated with each node
1474: . rootdata - buffer to broadcast
1475: - op       - operation to use for reduction

1477:   Output Parameter:
1478: . leafdata - buffer to be reduced with values from each leaf's respective root

1480:   Level: intermediate

1482:   Note:
1483:   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1484:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1485:   use `PetscSFBcastWithMemTypeBegin()` instead.

1487: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1488: @*/
1489: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1490: {
1491:   PetscMemType rootmtype, leafmtype;

1493:   PetscFunctionBegin;
1495:   PetscCall(PetscSFSetUp(sf));
1496:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1497:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1498:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1499:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1500:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1501:   PetscFunctionReturn(PETSC_SUCCESS);
1502: }

1504: /*@C
1505:   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1506:   to `PetscSFBcastEnd()`

1508:   Collective

1510:   Input Parameters:
1511: + sf        - star forest on which to communicate
1512: . unit      - data type associated with each node
1513: . rootmtype - memory type of rootdata
1514: . rootdata  - buffer to broadcast
1515: . leafmtype - memory type of leafdata
1516: - op        - operation to use for reduction

1518:   Output Parameter:
1519: . leafdata - buffer to be reduced with values from each leaf's respective root

1521:   Level: intermediate

1523: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1524: @*/
1525: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1526: {
1527:   PetscFunctionBegin;
1529:   PetscCall(PetscSFSetUp(sf));
1530:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1531:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1532:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1533:   PetscFunctionReturn(PETSC_SUCCESS);
1534: }

1536: /*@C
1537:   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`

1539:   Collective

1541:   Input Parameters:
1542: + sf       - star forest
1543: . unit     - data type
1544: . rootdata - buffer to broadcast
1545: - op       - operation to use for reduction

1547:   Output Parameter:
1548: . leafdata - buffer to be reduced with values from each leaf's respective root

1550:   Level: intermediate

1552: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1553: @*/
1554: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1555: {
1556:   PetscFunctionBegin;
1558:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1559:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1560:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1561:   PetscFunctionReturn(PETSC_SUCCESS);
1562: }

1564: /*@C
1565:   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`

1567:   Collective

1569:   Input Parameters:
1570: + sf       - star forest
1571: . unit     - data type
1572: . leafdata - values to reduce
1573: - op       - reduction operation

1575:   Output Parameter:
1576: . rootdata - result of reduction of values from all leaves of each root

1578:   Level: intermediate

1580:   Note:
1581:   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1582:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1583:   use `PetscSFReduceWithMemTypeBegin()` instead.

1585: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1586: @*/
1587: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1588: {
1589:   PetscMemType rootmtype, leafmtype;

1591:   PetscFunctionBegin;
1593:   PetscCall(PetscSFSetUp(sf));
1594:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1595:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1596:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1597:   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1598:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1599:   PetscFunctionReturn(PETSC_SUCCESS);
1600: }

1602: /*@C
1603:   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`

1605:   Collective

1607:   Input Parameters:
1608: + sf        - star forest
1609: . unit      - data type
1610: . leafmtype - memory type of leafdata
1611: . leafdata  - values to reduce
1612: . rootmtype - memory type of rootdata
1613: - op        - reduction operation

1615:   Output Parameter:
1616: . rootdata - result of reduction of values from all leaves of each root

1618:   Level: intermediate

1620: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1621: @*/
1622: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1623: {
1624:   PetscFunctionBegin;
1626:   PetscCall(PetscSFSetUp(sf));
1627:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1628:   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1629:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1630:   PetscFunctionReturn(PETSC_SUCCESS);
1631: }

1633: /*@C
1634:   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`

1636:   Collective

1638:   Input Parameters:
1639: + sf       - star forest
1640: . unit     - data type
1641: . leafdata - values to reduce
1642: - op       - reduction operation

1644:   Output Parameter:
1645: . rootdata - result of reduction of values from all leaves of each root

1647:   Level: intermediate

1649: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1650: @*/
1651: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1652: {
1653:   PetscFunctionBegin;
1655:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1656:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1657:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1658:   PetscFunctionReturn(PETSC_SUCCESS);
1659: }

1661: /*@C
1662:   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1663:   to be completed with `PetscSFFetchAndOpEnd()`

1665:   Collective

1667:   Input Parameters:
1668: + sf       - star forest
1669: . unit     - data type
1670: . leafdata - leaf values to use in reduction
1671: - op       - operation to use for reduction

1673:   Output Parameters:
1674: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1675: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1677:   Level: advanced

1679:   Note:
1680:   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1681:   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1682:   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1683:   integers.

1685: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1686: @*/
1687: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1688: {
1689:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1691:   PetscFunctionBegin;
1693:   PetscCall(PetscSFSetUp(sf));
1694:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1695:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1696:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1697:   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1698:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1699:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1700:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1701:   PetscFunctionReturn(PETSC_SUCCESS);
1702: }

1704: /*@C
1705:   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1706:   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`

1708:   Collective

1710:   Input Parameters:
1711: + sf              - star forest
1712: . unit            - data type
1713: . rootmtype       - memory type of rootdata
1714: . leafmtype       - memory type of leafdata
1715: . leafdata        - leaf values to use in reduction
1716: . leafupdatemtype - memory type of leafupdate
1717: - op              - operation to use for reduction

1719:   Output Parameters:
1720: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1721: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1723:   Level: advanced

1725:   Note:
1726:   See `PetscSFFetchAndOpBegin()` for more details.

1728: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1729: @*/
1730: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1731: {
1732:   PetscFunctionBegin;
1734:   PetscCall(PetscSFSetUp(sf));
1735:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1736:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1737:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1738:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1739:   PetscFunctionReturn(PETSC_SUCCESS);
1740: }

1742: /*@C
1743:   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1744:   to fetch values from roots and update atomically by applying operation using my leaf value

1746:   Collective

1748:   Input Parameters:
1749: + sf       - star forest
1750: . unit     - data type
1751: . leafdata - leaf values to use in reduction
1752: - op       - operation to use for reduction

1754:   Output Parameters:
1755: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1756: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1758:   Level: advanced

1760: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1761: @*/
1762: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1763: {
1764:   PetscFunctionBegin;
1766:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1767:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1768:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1769:   PetscFunctionReturn(PETSC_SUCCESS);
1770: }

1772: /*@C
1773:   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`

1775:   Collective

1777:   Input Parameter:
1778: . sf - star forest

1780:   Output Parameter:
1781: . degree - degree of each root vertex

1783:   Level: advanced

1785:   Note:
1786:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1788: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1789: @*/
1790: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1791: {
1792:   PetscFunctionBegin;
1794:   PetscSFCheckGraphSet(sf, 1);
1795:   PetscAssertPointer(degree, 2);
1796:   if (!sf->degreeknown) {
1797:     PetscInt i, nroots = sf->nroots, maxlocal;
1798:     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1799:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1800:     PetscCall(PetscMalloc1(nroots, &sf->degree));
1801:     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1802:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1803:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1804:     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1805:   }
1806:   *degree = NULL;
1807:   PetscFunctionReturn(PETSC_SUCCESS);
1808: }

1810: /*@C
1811:   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`

1813:   Collective

1815:   Input Parameter:
1816: . sf - star forest

1818:   Output Parameter:
1819: . degree - degree of each root vertex

1821:   Level: developer

1823:   Note:
1824:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1826: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1827: @*/
1828: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1829: {
1830:   PetscFunctionBegin;
1832:   PetscSFCheckGraphSet(sf, 1);
1833:   PetscAssertPointer(degree, 2);
1834:   if (!sf->degreeknown) {
1835:     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1836:     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1837:     PetscCall(PetscFree(sf->degreetmp));
1838:     sf->degreeknown = PETSC_TRUE;
1839:   }
1840:   *degree = sf->degree;
1841:   PetscFunctionReturn(PETSC_SUCCESS);
1842: }

1844: /*@C
1845:   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1846:   Each multi-root is assigned index of the corresponding original root.

1848:   Collective

1850:   Input Parameters:
1851: + sf     - star forest
1852: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`

1854:   Output Parameters:
1855: + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1856: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`

1858:   Level: developer

1860:   Note:
1861:   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.

1863: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1864: @*/
1865: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1866: {
1867:   PetscSF  msf;
1868:   PetscInt k = 0, nroots, nmroots;

1870:   PetscFunctionBegin;
1872:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1873:   if (nroots) PetscAssertPointer(degree, 2);
1874:   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1875:   PetscAssertPointer(multiRootsOrigNumbering, 4);
1876:   PetscCall(PetscSFGetMultiSF(sf, &msf));
1877:   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1878:   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1879:   for (PetscInt i = 0; i < nroots; i++) {
1880:     if (!degree[i]) continue;
1881:     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1882:   }
1883:   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1884:   if (nMultiRoots) *nMultiRoots = nmroots;
1885:   PetscFunctionReturn(PETSC_SUCCESS);
1886: }

1888: /*@C
1889:   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`

1891:   Collective

1893:   Input Parameters:
1894: + sf       - star forest
1895: . unit     - data type
1896: - leafdata - leaf data to gather to roots

1898:   Output Parameter:
1899: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1901:   Level: intermediate

1903: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1904: @*/
1905: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1906: {
1907:   PetscSF multi = NULL;

1909:   PetscFunctionBegin;
1911:   PetscCall(PetscSFSetUp(sf));
1912:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1913:   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1914:   PetscFunctionReturn(PETSC_SUCCESS);
1915: }

1917: /*@C
1918:   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`

1920:   Collective

1922:   Input Parameters:
1923: + sf       - star forest
1924: . unit     - data type
1925: - leafdata - leaf data to gather to roots

1927:   Output Parameter:
1928: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1930:   Level: intermediate

1932: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1933: @*/
1934: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1935: {
1936:   PetscSF multi = NULL;

1938:   PetscFunctionBegin;
1940:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1941:   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1942:   PetscFunctionReturn(PETSC_SUCCESS);
1943: }

1945: /*@C
1946:   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`

1948:   Collective

1950:   Input Parameters:
1951: + sf            - star forest
1952: . unit          - data type
1953: - multirootdata - root buffer to send to each leaf, one unit of data per leaf

1955:   Output Parameter:
1956: . leafdata - leaf data to be update with personal data from each respective root

1958:   Level: intermediate

1960: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1961: @*/
1962: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1963: {
1964:   PetscSF multi = NULL;

1966:   PetscFunctionBegin;
1968:   PetscCall(PetscSFSetUp(sf));
1969:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1970:   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1971:   PetscFunctionReturn(PETSC_SUCCESS);
1972: }

1974: /*@C
1975:   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`

1977:   Collective

1979:   Input Parameters:
1980: + sf            - star forest
1981: . unit          - data type
1982: - multirootdata - root buffer to send to each leaf, one unit of data per leaf

1984:   Output Parameter:
1985: . leafdata - leaf data to be update with personal data from each respective root

1987:   Level: intermediate

1989: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1990: @*/
1991: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1992: {
1993:   PetscSF multi = NULL;

1995:   PetscFunctionBegin;
1997:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1998:   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1999:   PetscFunctionReturn(PETSC_SUCCESS);
2000: }

2002: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2003: {
2004:   PetscInt        i, n, nleaves;
2005:   const PetscInt *ilocal = NULL;
2006:   PetscHSetI      seen;

2008:   PetscFunctionBegin;
2009:   if (PetscDefined(USE_DEBUG)) {
2010:     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2011:     PetscCall(PetscHSetICreate(&seen));
2012:     for (i = 0; i < nleaves; i++) {
2013:       const PetscInt leaf = ilocal ? ilocal[i] : i;
2014:       PetscCall(PetscHSetIAdd(seen, leaf));
2015:     }
2016:     PetscCall(PetscHSetIGetSize(seen, &n));
2017:     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2018:     PetscCall(PetscHSetIDestroy(&seen));
2019:   }
2020:   PetscFunctionReturn(PETSC_SUCCESS);
2021: }

2023: /*@
2024:   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view

2026:   Input Parameters:
2027: + sfA - The first `PetscSF`
2028: - sfB - The second `PetscSF`

2030:   Output Parameter:
2031: . sfBA - The composite `PetscSF`

2033:   Level: developer

2035:   Notes:
2036:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2037:   forests, i.e. the same leaf is not connected with different roots.

2039:   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2040:   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2041:   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2042:   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.

2044: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2045: @*/
2046: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2047: {
2048:   const PetscSFNode *remotePointsA, *remotePointsB;
2049:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2050:   const PetscInt    *localPointsA, *localPointsB;
2051:   PetscInt          *localPointsBA;
2052:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2053:   PetscBool          denseB;

2055:   PetscFunctionBegin;
2057:   PetscSFCheckGraphSet(sfA, 1);
2059:   PetscSFCheckGraphSet(sfB, 2);
2060:   PetscCheckSameComm(sfA, 1, sfB, 2);
2061:   PetscAssertPointer(sfBA, 3);
2062:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2063:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2065:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2066:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2067:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2068:      numRootsB; otherwise, garbage will be broadcasted.
2069:      Example (comm size = 1):
2070:      sfA: 0 <- (0, 0)
2071:      sfB: 100 <- (0, 0)
2072:           101 <- (0, 1)
2073:      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2074:      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2075:      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2076:      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2077:   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2078:   for (i = 0; i < numRootsB; i++) {
2079:     reorderedRemotePointsA[i].rank  = -1;
2080:     reorderedRemotePointsA[i].index = -1;
2081:   }
2082:   for (i = 0; i < numLeavesA; i++) {
2083:     PetscInt localp = localPointsA ? localPointsA[i] : i;

2085:     if (localp >= numRootsB) continue;
2086:     reorderedRemotePointsA[localp] = remotePointsA[i];
2087:   }
2088:   remotePointsA = reorderedRemotePointsA;
2089:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2090:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2091:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2092:     leafdataB[i].rank  = -1;
2093:     leafdataB[i].index = -1;
2094:   }
2095:   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2096:   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2097:   PetscCall(PetscFree(reorderedRemotePointsA));

2099:   denseB = (PetscBool)!localPointsB;
2100:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2101:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2102:     else numLeavesBA++;
2103:   }
2104:   if (denseB) {
2105:     localPointsBA  = NULL;
2106:     remotePointsBA = leafdataB;
2107:   } else {
2108:     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2109:     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2110:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2111:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2113:       if (leafdataB[l - minleaf].rank == -1) continue;
2114:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2115:       localPointsBA[numLeavesBA]  = l;
2116:       numLeavesBA++;
2117:     }
2118:     PetscCall(PetscFree(leafdataB));
2119:   }
2120:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2121:   PetscCall(PetscSFSetFromOptions(*sfBA));
2122:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2123:   PetscFunctionReturn(PETSC_SUCCESS);
2124: }

2126: /*@
2127:   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one

2129:   Input Parameters:
2130: + sfA - The first `PetscSF`
2131: - sfB - The second `PetscSF`

2133:   Output Parameter:
2134: . sfBA - The composite `PetscSF`.

2136:   Level: developer

2138:   Notes:
2139:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2140:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2141:   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.

2143:   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2144:   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2145:   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2146:   on `sfA`, then
2147:   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.

2149: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2150: @*/
2151: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2152: {
2153:   const PetscSFNode *remotePointsA, *remotePointsB;
2154:   PetscSFNode       *remotePointsBA;
2155:   const PetscInt    *localPointsA, *localPointsB;
2156:   PetscSFNode       *reorderedRemotePointsA = NULL;
2157:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2158:   MPI_Op             op;
2159: #if defined(PETSC_USE_64BIT_INDICES)
2160:   PetscBool iswin;
2161: #endif

2163:   PetscFunctionBegin;
2165:   PetscSFCheckGraphSet(sfA, 1);
2167:   PetscSFCheckGraphSet(sfB, 2);
2168:   PetscCheckSameComm(sfA, 1, sfB, 2);
2169:   PetscAssertPointer(sfBA, 3);
2170:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2171:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2173:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2174:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));

2176:   /* TODO: Check roots of sfB have degree of 1 */
2177:   /* Once we implement it, we can replace the MPI_MAXLOC
2178:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2179:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2180:      the root condition is not meet.
2181:    */
2182:   op = MPI_MAXLOC;
2183: #if defined(PETSC_USE_64BIT_INDICES)
2184:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2185:   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2186:   if (iswin) op = MPI_REPLACE;
2187: #endif

2189:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2190:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2191:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2192:     reorderedRemotePointsA[i].rank  = -1;
2193:     reorderedRemotePointsA[i].index = -1;
2194:   }
2195:   if (localPointsA) {
2196:     for (i = 0; i < numLeavesA; i++) {
2197:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2198:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2199:     }
2200:   } else {
2201:     for (i = 0; i < numLeavesA; i++) {
2202:       if (i > maxleaf || i < minleaf) continue;
2203:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2204:     }
2205:   }

2207:   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2208:   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2209:   for (i = 0; i < numRootsB; i++) {
2210:     remotePointsBA[i].rank  = -1;
2211:     remotePointsBA[i].index = -1;
2212:   }

2214:   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2215:   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2216:   PetscCall(PetscFree(reorderedRemotePointsA));
2217:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2218:     if (remotePointsBA[i].rank == -1) continue;
2219:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2220:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2221:     localPointsBA[numLeavesBA]        = i;
2222:     numLeavesBA++;
2223:   }
2224:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2225:   PetscCall(PetscSFSetFromOptions(*sfBA));
2226:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2227:   PetscFunctionReturn(PETSC_SUCCESS);
2228: }

2230: /*
2231:   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`

2233:   Input Parameter:
2234: . sf - The global `PetscSF`

2236:   Output Parameter:
2237: . out - The local `PetscSF`

2239: .seealso: `PetscSF`, `PetscSFCreate()`
2240:  */
2241: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2242: {
2243:   MPI_Comm           comm;
2244:   PetscMPIInt        myrank;
2245:   const PetscInt    *ilocal;
2246:   const PetscSFNode *iremote;
2247:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2248:   PetscSFNode       *liremote;
2249:   PetscSF            lsf;

2251:   PetscFunctionBegin;
2253:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2254:   else {
2255:     PetscMPIInt irank;

2257:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2258:     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2259:     PetscCallMPI(MPI_Comm_rank(comm, &myrank));

2261:     /* Find out local edges and build a local SF */
2262:     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2263:     for (i = lnleaves = 0; i < nleaves; i++) {
2264:       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2265:       if (irank == myrank) lnleaves++;
2266:     }
2267:     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2268:     PetscCall(PetscMalloc1(lnleaves, &liremote));

2270:     for (i = j = 0; i < nleaves; i++) {
2271:       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2272:       if (irank == myrank) {
2273:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2274:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2275:         liremote[j].index = iremote[i].index;
2276:         j++;
2277:       }
2278:     }
2279:     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2280:     PetscCall(PetscSFSetFromOptions(lsf));
2281:     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2282:     PetscCall(PetscSFSetUp(lsf));
2283:     *out = lsf;
2284:   }
2285:   PetscFunctionReturn(PETSC_SUCCESS);
2286: }

2288: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2289: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2290: {
2291:   PetscMemType rootmtype, leafmtype;

2293:   PetscFunctionBegin;
2295:   PetscCall(PetscSFSetUp(sf));
2296:   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2297:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2298:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2299:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2300:   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2301:   PetscFunctionReturn(PETSC_SUCCESS);
2302: }

2304: /*@
2305:   PetscSFConcatenate - concatenate multiple `PetscSF` into one

2307:   Input Parameters:
2308: + comm        - the communicator
2309: . nsfs        - the number of input `PetscSF`
2310: . sfs         - the array of input `PetscSF`
2311: . rootMode    - the root mode specifying how roots are handled
2312: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage

2314:   Output Parameter:
2315: . newsf - The resulting `PetscSF`

2317:   Level: advanced

2319:   Notes:
2320:   The communicator of all `PetscSF`s in `sfs` must be comm.

2322:   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.

2324:   The offsets in `leafOffsets` are added to the original leaf indices.

2326:   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2327:   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.

2329:   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2330:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2332:   All root modes retain the essential connectivity condition.
2333:   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2334:   Parameter `rootMode` controls how the input root spaces are combined.
2335:   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2336:   and is also the same in the output `PetscSF`.
2337:   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2338:   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2339:   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2340:   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2341:   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2342:   the original root ranks are ignored.
2343:   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2344:   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2345:   to keep the load balancing.
2346:   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.

2348:   Example:
2349:   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2350: .vb
2351:   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2352:   for m in {local,global,shared}; do
2353:     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2354:   done
2355: .ve
2356:   we generate two identical `PetscSF`s sf_0 and sf_1,
2357: .vb
2358:   PetscSF Object: sf_0 2 MPI processes
2359:     type: basic
2360:     rank #leaves #roots
2361:     [ 0]       4      2
2362:     [ 1]       4      2
2363:     leaves      roots       roots in global numbering
2364:     ( 0,  0) <- ( 0,  0)  =   0
2365:     ( 0,  1) <- ( 0,  1)  =   1
2366:     ( 0,  2) <- ( 1,  0)  =   2
2367:     ( 0,  3) <- ( 1,  1)  =   3
2368:     ( 1,  0) <- ( 0,  0)  =   0
2369:     ( 1,  1) <- ( 0,  1)  =   1
2370:     ( 1,  2) <- ( 1,  0)  =   2
2371:     ( 1,  3) <- ( 1,  1)  =   3
2372: .ve
2373:   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2374: .vb
2375:   rootMode = local:
2376:   PetscSF Object: result_sf 2 MPI processes
2377:     type: basic
2378:     rank #leaves #roots
2379:     [ 0]       8      4
2380:     [ 1]       8      4
2381:     leaves      roots       roots in global numbering
2382:     ( 0,  0) <- ( 0,  0)  =   0
2383:     ( 0,  1) <- ( 0,  1)  =   1
2384:     ( 0,  2) <- ( 1,  0)  =   4
2385:     ( 0,  3) <- ( 1,  1)  =   5
2386:     ( 0,  4) <- ( 0,  2)  =   2
2387:     ( 0,  5) <- ( 0,  3)  =   3
2388:     ( 0,  6) <- ( 1,  2)  =   6
2389:     ( 0,  7) <- ( 1,  3)  =   7
2390:     ( 1,  0) <- ( 0,  0)  =   0
2391:     ( 1,  1) <- ( 0,  1)  =   1
2392:     ( 1,  2) <- ( 1,  0)  =   4
2393:     ( 1,  3) <- ( 1,  1)  =   5
2394:     ( 1,  4) <- ( 0,  2)  =   2
2395:     ( 1,  5) <- ( 0,  3)  =   3
2396:     ( 1,  6) <- ( 1,  2)  =   6
2397:     ( 1,  7) <- ( 1,  3)  =   7

2399:   rootMode = global:
2400:   PetscSF Object: result_sf 2 MPI processes
2401:     type: basic
2402:     rank #leaves #roots
2403:     [ 0]       8      4
2404:     [ 1]       8      4
2405:     leaves      roots       roots in global numbering
2406:     ( 0,  0) <- ( 0,  0)  =   0
2407:     ( 0,  1) <- ( 0,  1)  =   1
2408:     ( 0,  2) <- ( 0,  2)  =   2
2409:     ( 0,  3) <- ( 0,  3)  =   3
2410:     ( 0,  4) <- ( 1,  0)  =   4
2411:     ( 0,  5) <- ( 1,  1)  =   5
2412:     ( 0,  6) <- ( 1,  2)  =   6
2413:     ( 0,  7) <- ( 1,  3)  =   7
2414:     ( 1,  0) <- ( 0,  0)  =   0
2415:     ( 1,  1) <- ( 0,  1)  =   1
2416:     ( 1,  2) <- ( 0,  2)  =   2
2417:     ( 1,  3) <- ( 0,  3)  =   3
2418:     ( 1,  4) <- ( 1,  0)  =   4
2419:     ( 1,  5) <- ( 1,  1)  =   5
2420:     ( 1,  6) <- ( 1,  2)  =   6
2421:     ( 1,  7) <- ( 1,  3)  =   7

2423:   rootMode = shared:
2424:   PetscSF Object: result_sf 2 MPI processes
2425:     type: basic
2426:     rank #leaves #roots
2427:     [ 0]       8      2
2428:     [ 1]       8      2
2429:     leaves      roots       roots in global numbering
2430:     ( 0,  0) <- ( 0,  0)  =   0
2431:     ( 0,  1) <- ( 0,  1)  =   1
2432:     ( 0,  2) <- ( 1,  0)  =   2
2433:     ( 0,  3) <- ( 1,  1)  =   3
2434:     ( 0,  4) <- ( 0,  0)  =   0
2435:     ( 0,  5) <- ( 0,  1)  =   1
2436:     ( 0,  6) <- ( 1,  0)  =   2
2437:     ( 0,  7) <- ( 1,  1)  =   3
2438:     ( 1,  0) <- ( 0,  0)  =   0
2439:     ( 1,  1) <- ( 0,  1)  =   1
2440:     ( 1,  2) <- ( 1,  0)  =   2
2441:     ( 1,  3) <- ( 1,  1)  =   3
2442:     ( 1,  4) <- ( 0,  0)  =   0
2443:     ( 1,  5) <- ( 0,  1)  =   1
2444:     ( 1,  6) <- ( 1,  0)  =   2
2445:     ( 1,  7) <- ( 1,  1)  =   3
2446: .ve

2448: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2449: @*/
2450: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2451: {
2452:   PetscInt     i, s, nLeaves, nRoots;
2453:   PetscInt    *leafArrayOffsets;
2454:   PetscInt    *ilocal_new;
2455:   PetscSFNode *iremote_new;
2456:   PetscBool    all_ilocal_null = PETSC_FALSE;
2457:   PetscLayout  glayout         = NULL;
2458:   PetscInt    *gremote         = NULL;
2459:   PetscMPIInt  rank, size;

2461:   PetscFunctionBegin;
2462:   if (PetscDefined(USE_DEBUG)) {
2463:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2465:     PetscCall(PetscSFCreate(comm, &dummy));
2467:     PetscAssertPointer(sfs, 3);
2468:     for (i = 0; i < nsfs; i++) {
2470:       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2471:     }
2473:     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2474:     PetscAssertPointer(newsf, 6);
2475:     PetscCall(PetscSFDestroy(&dummy));
2476:   }
2477:   if (!nsfs) {
2478:     PetscCall(PetscSFCreate(comm, newsf));
2479:     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2480:     PetscFunctionReturn(PETSC_SUCCESS);
2481:   }
2482:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2483:   PetscCallMPI(MPI_Comm_size(comm, &size));

2485:   /* Calculate leaf array offsets */
2486:   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2487:   leafArrayOffsets[0] = 0;
2488:   for (s = 0; s < nsfs; s++) {
2489:     PetscInt nl;

2491:     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2492:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2493:   }
2494:   nLeaves = leafArrayOffsets[nsfs];

2496:   /* Calculate number of roots */
2497:   switch (rootMode) {
2498:   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2499:     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2500:     if (PetscDefined(USE_DEBUG)) {
2501:       for (s = 1; s < nsfs; s++) {
2502:         PetscInt nr;

2504:         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2505:         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2506:       }
2507:     }
2508:   } break;
2509:   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2510:     /* Calculate also global layout in this case */
2511:     PetscInt    *nls;
2512:     PetscLayout *lts;
2513:     PetscInt   **inds;
2514:     PetscInt     j;
2515:     PetscInt     rootOffset = 0;

2517:     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2518:     PetscCall(PetscLayoutCreate(comm, &glayout));
2519:     glayout->bs = 1;
2520:     glayout->n  = 0;
2521:     glayout->N  = 0;
2522:     for (s = 0; s < nsfs; s++) {
2523:       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2524:       glayout->n += lts[s]->n;
2525:       glayout->N += lts[s]->N;
2526:     }
2527:     PetscCall(PetscLayoutSetUp(glayout));
2528:     PetscCall(PetscMalloc1(nLeaves, &gremote));
2529:     for (s = 0, j = 0; s < nsfs; s++) {
2530:       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2531:       rootOffset += lts[s]->N;
2532:       PetscCall(PetscLayoutDestroy(&lts[s]));
2533:       PetscCall(PetscFree(inds[s]));
2534:     }
2535:     PetscCall(PetscFree3(lts, nls, inds));
2536:     nRoots = glayout->N;
2537:   } break;
2538:   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2539:     /* nRoots calculated later in this case */
2540:     break;
2541:   default:
2542:     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2543:   }

2545:   if (!leafOffsets) {
2546:     all_ilocal_null = PETSC_TRUE;
2547:     for (s = 0; s < nsfs; s++) {
2548:       const PetscInt *ilocal;

2550:       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2551:       if (ilocal) {
2552:         all_ilocal_null = PETSC_FALSE;
2553:         break;
2554:       }
2555:     }
2556:     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2557:   }

2559:   /* Renumber and concatenate local leaves */
2560:   ilocal_new = NULL;
2561:   if (!all_ilocal_null) {
2562:     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2563:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2564:     for (s = 0; s < nsfs; s++) {
2565:       const PetscInt *ilocal;
2566:       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2567:       PetscInt        i, nleaves_l;

2569:       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2570:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2571:     }
2572:   }

2574:   /* Renumber and concatenate remote roots */
2575:   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2576:     PetscInt rootOffset = 0;

2578:     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2579:     for (i = 0; i < nLeaves; i++) {
2580:       iremote_new[i].rank  = -1;
2581:       iremote_new[i].index = -1;
2582:     }
2583:     for (s = 0; s < nsfs; s++) {
2584:       PetscInt           i, nl, nr;
2585:       PetscSF            tmp_sf;
2586:       const PetscSFNode *iremote;
2587:       PetscSFNode       *tmp_rootdata;
2588:       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);

2590:       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2591:       PetscCall(PetscSFCreate(comm, &tmp_sf));
2592:       /* create helper SF with contiguous leaves */
2593:       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2594:       PetscCall(PetscSFSetUp(tmp_sf));
2595:       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2596:       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2597:         for (i = 0; i < nr; i++) {
2598:           tmp_rootdata[i].index = i + rootOffset;
2599:           tmp_rootdata[i].rank  = rank;
2600:         }
2601:         rootOffset += nr;
2602:       } else {
2603:         for (i = 0; i < nr; i++) {
2604:           tmp_rootdata[i].index = i;
2605:           tmp_rootdata[i].rank  = rank;
2606:         }
2607:       }
2608:       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609:       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610:       PetscCall(PetscSFDestroy(&tmp_sf));
2611:       PetscCall(PetscFree(tmp_rootdata));
2612:     }
2613:     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above

2615:     /* Build the new SF */
2616:     PetscCall(PetscSFCreate(comm, newsf));
2617:     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2618:   } else {
2619:     /* Build the new SF */
2620:     PetscCall(PetscSFCreate(comm, newsf));
2621:     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2622:   }
2623:   PetscCall(PetscSFSetUp(*newsf));
2624:   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2625:   PetscCall(PetscLayoutDestroy(&glayout));
2626:   PetscCall(PetscFree(gremote));
2627:   PetscCall(PetscFree(leafArrayOffsets));
2628:   PetscFunctionReturn(PETSC_SUCCESS);
2629: }

2631: /*@
2632:   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.

2634:   Collective

2636:   Input Parameters:
2637: + sf       - star forest
2638: . unit     - the data type contained within the root and leaf data
2639: . rootdata - root data that will be used for multiple PetscSF communications
2640: - leafdata - leaf data that will be used for multiple PetscSF communications

2642:   Level: advanced

2644:   Notes:
2645:   Implementations of `PetscSF` can make optimizations
2646:   for repeated communication using the same memory regions, but these optimizations
2647:   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2648:   The intended pattern is

2650: .vb
2651:   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);

2653:   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2654:   // repeated use of rootdata and leafdata will now be optimized

2656:   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657:   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658:   // ...
2659:   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2660:   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2661:   // ... (other communications)

2663:   // rootdata and leafdata must be deregistered before freeing
2664:   // skipping this can lead to undefined behavior including
2665:   // deadlocks
2666:   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);

2668:   // it is now safe to free rootdata and leafdata
2669:   PetscFree2(rootdata, leafdata);
2670: .ve

2672:   If you do not register `rootdata` and `leafdata` it will not cause an error,
2673:   but optimizations that reduce the setup time for each communication cannot be
2674:   made.  Currently, the only implementation of `PetscSF` that benefits from
2675:   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2676:   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.

2678: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2679: @*/
2680: PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2681: {
2682:   PetscFunctionBegin;
2684:   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2685:   PetscFunctionReturn(PETSC_SUCCESS);
2686: }

2688: /*@
2689:   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.

2691:   Collective

2693:   Input Parameters:
2694: + sf       - star forest
2695: . unit     - the data type contained within the root and leaf data
2696: . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2697: - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`

2699:   Level: advanced

2701:   Note:
2702:   See `PetscSFRegisterPersistent()` for when/how to use this function.

2704: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2705: @*/
2706: PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2707: {
2708:   PetscFunctionBegin;
2710:   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2711:   PetscFunctionReturn(PETSC_SUCCESS);
2712: }

2714: PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2715: {
2716:   MPI_Aint lb, lb_true, bytes, bytes_true;

2718:   PetscFunctionBegin;
2719:   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2720:   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2721:   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2722:   *size = bytes;
2723:   PetscFunctionReturn(PETSC_SUCCESS);
2724: }