Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petsc/private/hashmapi.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #include <petscdevice_cuda.h>
  9: #endif

 11: #if defined(PETSC_HAVE_HIP)
 12:   #include <hip/hip_runtime.h>
 13: #endif

 15: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 16: extern void PetscSFCheckGraphSet(PetscSF, int);
 17: #else
 18:   #if defined(PETSC_USE_DEBUG)
 19:     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
 20:   #else
 21:     #define PetscSFCheckGraphSet(sf, arg) \
 22:       do { \
 23:       } while (0)
 24:   #endif
 25: #endif

 27: const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
 28: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};

 30: /*@
 31:   PetscSFCreate - create a star forest communication context

 33:   Collective

 35:   Input Parameter:
 36: . comm - communicator on which the star forest will operate

 38:   Output Parameter:
 39: . sf - new star forest context

 41:   Options Database Key:
 42: . -sf_type type - value of type may be
 43: .vb
 44:     basic     -Use MPI persistent Isend/Irecv for communication (Default)
 45:     window    -Use MPI-3 one-sided window for communication
 46:     neighbor  -Use MPI-3 neighborhood collectives for communication
 47: .ve

 49:   Level: intermediate

 51:   Note:
 52:   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
 53:   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
 54:   `SF`s are optimized and they have better performance than the general `SF`s.

 56: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 57: @*/
 58: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 59: {
 60:   PetscSF b;

 62:   PetscFunctionBegin;
 63:   PetscAssertPointer(sf, 2);
 64:   PetscCall(PetscSFInitializePackage());

 66:   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));

 68:   b->nroots    = -1;
 69:   b->nleaves   = -1;
 70:   b->minleaf   = PETSC_MAX_INT;
 71:   b->maxleaf   = PETSC_MIN_INT;
 72:   b->nranks    = -1;
 73:   b->rankorder = PETSC_TRUE;
 74:   b->ingroup   = MPI_GROUP_NULL;
 75:   b->outgroup  = MPI_GROUP_NULL;
 76:   b->graphset  = PETSC_FALSE;
 77: #if defined(PETSC_HAVE_DEVICE)
 78:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 79:   b->use_stream_aware_mpi = PETSC_FALSE;
 80:   b->unknown_input_stream = PETSC_FALSE;
 81:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 82:   b->backend = PETSCSF_BACKEND_KOKKOS;
 83:   #elif defined(PETSC_HAVE_CUDA)
 84:   b->backend = PETSCSF_BACKEND_CUDA;
 85:   #elif defined(PETSC_HAVE_HIP)
 86:   b->backend = PETSCSF_BACKEND_HIP;
 87:   #endif

 89:   #if defined(PETSC_HAVE_NVSHMEM)
 90:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 91:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 92:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
 93:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
 94:   #endif
 95: #endif
 96:   b->vscat.from_n = -1;
 97:   b->vscat.to_n   = -1;
 98:   b->vscat.unit   = MPIU_SCALAR;
 99:   *sf             = b;
100:   PetscFunctionReturn(PETSC_SUCCESS);
101: }

103: /*@
104:   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

106:   Collective

108:   Input Parameter:
109: . sf - star forest

111:   Level: advanced

113: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
114: @*/
115: PetscErrorCode PetscSFReset(PetscSF sf)
116: {
117:   PetscFunctionBegin;
119:   PetscTryTypeMethod(sf, Reset);
120:   sf->nroots   = -1;
121:   sf->nleaves  = -1;
122:   sf->minleaf  = PETSC_MAX_INT;
123:   sf->maxleaf  = PETSC_MIN_INT;
124:   sf->mine     = NULL;
125:   sf->remote   = NULL;
126:   sf->graphset = PETSC_FALSE;
127:   PetscCall(PetscFree(sf->mine_alloc));
128:   PetscCall(PetscFree(sf->remote_alloc));
129:   sf->nranks = -1;
130:   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
131:   sf->degreeknown = PETSC_FALSE;
132:   PetscCall(PetscFree(sf->degree));
133:   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
134:   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
135:   if (sf->multi) sf->multi->multi = NULL;
136:   PetscCall(PetscSFDestroy(&sf->multi));
137:   PetscCall(PetscLayoutDestroy(&sf->map));

139: #if defined(PETSC_HAVE_DEVICE)
140:   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141: #endif

143:   sf->setupcalled = PETSC_FALSE;
144:   PetscFunctionReturn(PETSC_SUCCESS);
145: }

147: /*@C
148:   PetscSFSetType - Set the `PetscSF` communication implementation

150:   Collective

152:   Input Parameters:
153: + sf   - the `PetscSF` context
154: - type - a known method
155: .vb
156:     PETSCSFWINDOW - MPI-2/3 one-sided
157:     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158: .ve

160:   Options Database Key:
161: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods

163:   Level: intermediate

165:   Notes:
166:   See `PetscSFType` for possible values

168: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169: @*/
170: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171: {
172:   PetscBool match;
173:   PetscErrorCode (*r)(PetscSF);

175:   PetscFunctionBegin;
177:   PetscAssertPointer(type, 2);

179:   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180:   if (match) PetscFunctionReturn(PETSC_SUCCESS);

182:   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183:   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184:   /* Destroy the previous PetscSF implementation context */
185:   PetscTryTypeMethod(sf, Destroy);
186:   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187:   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188:   PetscCall((*r)(sf));
189:   PetscFunctionReturn(PETSC_SUCCESS);
190: }

192: /*@C
193:   PetscSFGetType - Get the `PetscSF` communication implementation

195:   Not Collective

197:   Input Parameter:
198: . sf - the `PetscSF` context

200:   Output Parameter:
201: . type - the `PetscSF` type name

203:   Level: intermediate

205: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206: @*/
207: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208: {
209:   PetscFunctionBegin;
211:   PetscAssertPointer(type, 2);
212:   *type = ((PetscObject)sf)->type_name;
213:   PetscFunctionReturn(PETSC_SUCCESS);
214: }

216: /*@C
217:   PetscSFDestroy - destroy a star forest

219:   Collective

221:   Input Parameter:
222: . sf - address of star forest

224:   Level: intermediate

226: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227: @*/
228: PetscErrorCode PetscSFDestroy(PetscSF *sf)
229: {
230:   PetscFunctionBegin;
231:   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233:   if (--((PetscObject)(*sf))->refct > 0) {
234:     *sf = NULL;
235:     PetscFunctionReturn(PETSC_SUCCESS);
236:   }
237:   PetscCall(PetscSFReset(*sf));
238:   PetscTryTypeMethod((*sf), Destroy);
239:   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240:   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242:   if ((*sf)->use_stream_aware_mpi) {
243:     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244:     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245:   }
246: #endif
247:   PetscCall(PetscHeaderDestroy(sf));
248:   PetscFunctionReturn(PETSC_SUCCESS);
249: }

251: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252: {
253:   PetscInt           i, nleaves;
254:   PetscMPIInt        size;
255:   const PetscInt    *ilocal;
256:   const PetscSFNode *iremote;

258:   PetscFunctionBegin;
259:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260:   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262:   for (i = 0; i < nleaves; i++) {
263:     const PetscInt rank   = iremote[i].rank;
264:     const PetscInt remote = iremote[i].index;
265:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266:     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267:     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268:     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269:   }
270:   PetscFunctionReturn(PETSC_SUCCESS);
271: }

273: /*@
274:   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication

276:   Collective

278:   Input Parameter:
279: . sf - star forest communication object

281:   Level: beginner

283: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284: @*/
285: PetscErrorCode PetscSFSetUp(PetscSF sf)
286: {
287:   PetscFunctionBegin;
289:   PetscSFCheckGraphSet(sf, 1);
290:   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291:   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292:   PetscCall(PetscSFCheckGraphValid_Private(sf));
293:   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294:   PetscTryTypeMethod(sf, SetUp);
295: #if defined(PETSC_HAVE_CUDA)
296:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297:     sf->ops->Malloc = PetscSFMalloc_CUDA;
298:     sf->ops->Free   = PetscSFFree_CUDA;
299:   }
300: #endif
301: #if defined(PETSC_HAVE_HIP)
302:   if (sf->backend == PETSCSF_BACKEND_HIP) {
303:     sf->ops->Malloc = PetscSFMalloc_HIP;
304:     sf->ops->Free   = PetscSFFree_HIP;
305:   }
306: #endif

308: #if defined(PETSC_HAVE_KOKKOS)
309:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311:     sf->ops->Free   = PetscSFFree_Kokkos;
312:   }
313: #endif
314:   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315:   sf->setupcalled = PETSC_TRUE;
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: /*@
320:   PetscSFSetFromOptions - set `PetscSF` options using the options database

322:   Logically Collective

324:   Input Parameter:
325: . sf - star forest

327:   Options Database Keys:
328: + -sf_type                                                                                                         - implementation type, see `PetscSFSetType()`
329: . -sf_rank_order                                                                                                   - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
330: . -sf_use_default_stream                                                                                           - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331:                             use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332:                             If true, this option only works with `-use_gpu_aware_mpi 1`.
333: . -sf_use_stream_aware_mpi                                                                                         - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334:                                If true, this option only works with `-use_gpu_aware_mpi 1`.

336: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338:                               the only available is kokkos.

340:   Level: intermediate

342: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343: @*/
344: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345: {
346:   PetscSFType deft;
347:   char        type[256];
348:   PetscBool   flg;

350:   PetscFunctionBegin;
352:   PetscObjectOptionsBegin((PetscObject)sf);
353:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354:   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355:   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356:   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357: #if defined(PETSC_HAVE_DEVICE)
358:   {
359:     char      backendstr[32] = {0};
360:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
361:     /* Change the defaults set in PetscSFCreate() with command line options */
362:     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
363:     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
364:     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
365:     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
366:     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
367:     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
368:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
369:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
370:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
371:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
372:     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
373:   #elif defined(PETSC_HAVE_KOKKOS)
374:     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
375:   #endif

377:   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
378:     if (sf->use_stream_aware_mpi) {
379:       MPI_Info info;

381:       PetscCallMPI(MPI_Info_create(&info));
382:       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
383:       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
384:       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
385:       PetscCallMPI(MPI_Info_free(&info));
386:       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
387:     }
388:   #endif
389:   }
390: #endif
391:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
392:   PetscOptionsEnd();
393:   PetscFunctionReturn(PETSC_SUCCESS);
394: }

396: /*@
397:   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

399:   Logically Collective

401:   Input Parameters:
402: + sf  - star forest
403: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)

405:   Level: advanced

407: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
408: @*/
409: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
410: {
411:   PetscFunctionBegin;
414:   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
415:   sf->rankorder = flg;
416:   PetscFunctionReturn(PETSC_SUCCESS);
417: }

419: /*@C
420:   PetscSFSetGraph - Set a parallel star forest

422:   Collective

424:   Input Parameters:
425: + sf         - star forest
426: . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
427: . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
428: . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
429: during setup in debug mode)
430: . localmode  - copy mode for `ilocal`
431: . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
432: during setup in debug mode)
433: - remotemode - copy mode for `iremote`

435:   Level: intermediate

437:   Notes:
438:   Leaf indices in `ilocal` must be unique, otherwise an error occurs.

440:   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
441:   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
442:   PETSc might modify the respective array;
443:   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
444:   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

446:   Fortran Notes:
447:   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.

449:   Developer Notes:
450:   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
451:   This also allows to compare leaf sets of two `PetscSF`s easily.

453: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
454: @*/
455: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
456: {
457:   PetscBool unique, contiguous;

459:   PetscFunctionBegin;
461:   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
462:   if (nleaves > 0) PetscAssertPointer(iremote, 6);
463:   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
464:   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
465:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
466:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
467:   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
468:   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);

470:   if (sf->nroots >= 0) { /* Reset only if graph already set */
471:     PetscCall(PetscSFReset(sf));
472:   }

474:   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));

476:   sf->nroots  = nroots;
477:   sf->nleaves = nleaves;

479:   if (localmode == PETSC_COPY_VALUES && ilocal) {
480:     PetscInt *tlocal = NULL;

482:     PetscCall(PetscMalloc1(nleaves, &tlocal));
483:     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
484:     ilocal = tlocal;
485:   }
486:   if (remotemode == PETSC_COPY_VALUES) {
487:     PetscSFNode *tremote = NULL;

489:     PetscCall(PetscMalloc1(nleaves, &tremote));
490:     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
491:     iremote = tremote;
492:   }

494:   if (nleaves && ilocal) {
495:     PetscSFNode work;

497:     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
498:     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
499:     unique = PetscNot(unique);
500:     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
501:     sf->minleaf = ilocal[0];
502:     sf->maxleaf = ilocal[nleaves - 1];
503:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
504:   } else {
505:     sf->minleaf = 0;
506:     sf->maxleaf = nleaves - 1;
507:     unique      = PETSC_TRUE;
508:     contiguous  = PETSC_TRUE;
509:   }

511:   if (contiguous) {
512:     if (localmode == PETSC_USE_POINTER) {
513:       ilocal = NULL;
514:     } else {
515:       PetscCall(PetscFree(ilocal));
516:     }
517:   }
518:   sf->mine = ilocal;
519:   if (localmode == PETSC_USE_POINTER) {
520:     sf->mine_alloc = NULL;
521:   } else {
522:     sf->mine_alloc = ilocal;
523:   }
524:   sf->remote = iremote;
525:   if (remotemode == PETSC_USE_POINTER) {
526:     sf->remote_alloc = NULL;
527:   } else {
528:     sf->remote_alloc = iremote;
529:   }
530:   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
531:   sf->graphset = PETSC_TRUE;
532:   PetscFunctionReturn(PETSC_SUCCESS);
533: }

535: /*@
536:   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern

538:   Collective

540:   Input Parameters:
541: + sf      - The `PetscSF`
542: . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
543: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`

545:   Level: intermediate

547:   Notes:
548:   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
549:   `n` and `N` are the local and global sizes of `x` respectively.

551:   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
552:   sequential vectors `y` on all MPI processes.

554:   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
555:   sequential vector `y` on rank 0.

557:   In above cases, entries of `x` are roots and entries of `y` are leaves.

559:   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
560:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
561:   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
562:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
563:   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.

565:   In this case, roots and leaves are symmetric.

567: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
568:  @*/
569: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
570: {
571:   MPI_Comm    comm;
572:   PetscInt    n, N, res[2];
573:   PetscMPIInt rank, size;
574:   PetscSFType type;

576:   PetscFunctionBegin;
578:   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
579:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
580:   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
581:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
582:   PetscCallMPI(MPI_Comm_size(comm, &size));

584:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
585:     type = PETSCSFALLTOALL;
586:     PetscCall(PetscLayoutCreate(comm, &sf->map));
587:     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
588:     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
589:     PetscCall(PetscLayoutSetUp(sf->map));
590:   } else {
591:     PetscCall(PetscLayoutGetLocalSize(map, &n));
592:     PetscCall(PetscLayoutGetSize(map, &N));
593:     res[0] = n;
594:     res[1] = -n;
595:     /* Check if n are same over all ranks so that we can optimize it */
596:     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
597:     if (res[0] == -res[1]) { /* same n */
598:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
599:     } else {
600:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
601:     }
602:     PetscCall(PetscLayoutReference(map, &sf->map));
603:   }
604:   PetscCall(PetscSFSetType(sf, type));

606:   sf->pattern = pattern;
607:   sf->mine    = NULL; /* Contiguous */

609:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
610:      Also set other easy stuff.
611:    */
612:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
613:     sf->nleaves = N;
614:     sf->nroots  = n;
615:     sf->nranks  = size;
616:     sf->minleaf = 0;
617:     sf->maxleaf = N - 1;
618:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
619:     sf->nleaves = rank ? 0 : N;
620:     sf->nroots  = n;
621:     sf->nranks  = rank ? 0 : size;
622:     sf->minleaf = 0;
623:     sf->maxleaf = rank ? -1 : N - 1;
624:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
625:     sf->nleaves = size;
626:     sf->nroots  = size;
627:     sf->nranks  = size;
628:     sf->minleaf = 0;
629:     sf->maxleaf = size - 1;
630:   }
631:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
632:   sf->graphset = PETSC_TRUE;
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }

636: /*@
637:   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map

639:   Collective

641:   Input Parameter:
642: . sf - star forest to invert

644:   Output Parameter:
645: . isf - inverse of `sf`

647:   Level: advanced

649:   Notes:
650:   All roots must have degree 1.

652:   The local space may be a permutation, but cannot be sparse.

654: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
655: @*/
656: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
657: {
658:   PetscMPIInt     rank;
659:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
660:   const PetscInt *ilocal;
661:   PetscSFNode    *roots, *leaves;

663:   PetscFunctionBegin;
665:   PetscSFCheckGraphSet(sf, 1);
666:   PetscAssertPointer(isf, 2);

668:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
669:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

671:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
672:   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
673:   for (i = 0; i < maxlocal; i++) {
674:     leaves[i].rank  = rank;
675:     leaves[i].index = i;
676:   }
677:   for (i = 0; i < nroots; i++) {
678:     roots[i].rank  = -1;
679:     roots[i].index = -1;
680:   }
681:   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
682:   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));

684:   /* Check whether our leaves are sparse */
685:   for (i = 0, count = 0; i < nroots; i++)
686:     if (roots[i].rank >= 0) count++;
687:   if (count == nroots) newilocal = NULL;
688:   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
689:     for (i = 0, count = 0; i < nroots; i++) {
690:       if (roots[i].rank >= 0) {
691:         newilocal[count]   = i;
692:         roots[count].rank  = roots[i].rank;
693:         roots[count].index = roots[i].index;
694:         count++;
695:       }
696:     }
697:   }

699:   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
700:   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
701:   PetscCall(PetscFree2(roots, leaves));
702:   PetscFunctionReturn(PETSC_SUCCESS);
703: }

705: /*@
706:   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph

708:   Collective

710:   Input Parameters:
711: + sf  - communication object to duplicate
712: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)

714:   Output Parameter:
715: . newsf - new communication object

717:   Level: beginner

719: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
720: @*/
721: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
722: {
723:   PetscSFType  type;
724:   MPI_Datatype dtype = MPIU_SCALAR;

726:   PetscFunctionBegin;
729:   PetscAssertPointer(newsf, 3);
730:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
731:   PetscCall(PetscSFGetType(sf, &type));
732:   if (type) PetscCall(PetscSFSetType(*newsf, type));
733:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
734:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
735:     PetscSFCheckGraphSet(sf, 1);
736:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
737:       PetscInt           nroots, nleaves;
738:       const PetscInt    *ilocal;
739:       const PetscSFNode *iremote;
740:       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
741:       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
742:     } else {
743:       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
744:     }
745:   }
746:   /* Since oldtype is committed, so is newtype, according to MPI */
747:   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
748:   (*newsf)->vscat.bs     = sf->vscat.bs;
749:   (*newsf)->vscat.unit   = dtype;
750:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
751:   (*newsf)->vscat.from_n = sf->vscat.from_n;
752:   /* Do not copy lsf. Build it on demand since it is rarely used */

754: #if defined(PETSC_HAVE_DEVICE)
755:   (*newsf)->backend              = sf->backend;
756:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
757:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
758:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
759: #endif
760:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
761:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
762:   PetscFunctionReturn(PETSC_SUCCESS);
763: }

765: /*@C
766:   PetscSFGetGraph - Get the graph specifying a parallel star forest

768:   Not Collective

770:   Input Parameter:
771: . sf - star forest

773:   Output Parameters:
774: + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
775: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
776: . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
777: - iremote - remote locations of root vertices for each leaf on the current process

779:   Level: intermediate

781:   Notes:
782:   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet

784:   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`

786:   Fortran Notes:
787:   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
788:   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.

790:   To check for a `NULL` `ilocal` use
791: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

793: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
794: @*/
795: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
796: {
797:   PetscFunctionBegin;
799:   if (sf->ops->GetGraph) {
800:     PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
801:   } else {
802:     if (nroots) *nroots = sf->nroots;
803:     if (nleaves) *nleaves = sf->nleaves;
804:     if (ilocal) *ilocal = sf->mine;
805:     if (iremote) *iremote = sf->remote;
806:   }
807:   PetscFunctionReturn(PETSC_SUCCESS);
808: }

810: /*@
811:   PetscSFGetLeafRange - Get the active leaf ranges

813:   Not Collective

815:   Input Parameter:
816: . sf - star forest

818:   Output Parameters:
819: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
820: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.

822:   Level: developer

824: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
825: @*/
826: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
827: {
828:   PetscFunctionBegin;
830:   PetscSFCheckGraphSet(sf, 1);
831:   if (minleaf) *minleaf = sf->minleaf;
832:   if (maxleaf) *maxleaf = sf->maxleaf;
833:   PetscFunctionReturn(PETSC_SUCCESS);
834: }

836: /*@C
837:   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database

839:   Collective

841:   Input Parameters:
842: + A    - the star forest
843: . obj  - Optional object that provides the prefix for the option names
844: - name - command line option

846:   Level: intermediate

848:   Note:
849:   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`

851: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
852: @*/
853: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
854: {
855:   PetscFunctionBegin;
857:   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
858:   PetscFunctionReturn(PETSC_SUCCESS);
859: }

861: /*@C
862:   PetscSFView - view a star forest

864:   Collective

866:   Input Parameters:
867: + sf     - star forest
868: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`

870:   Level: beginner

872: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
873: @*/
874: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
875: {
876:   PetscBool         iascii;
877:   PetscViewerFormat format;

879:   PetscFunctionBegin;
881:   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
883:   PetscCheckSameComm(sf, 1, viewer, 2);
884:   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
885:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
886:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
887:     PetscMPIInt rank;
888:     PetscInt    ii, i, j;

890:     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
891:     PetscCall(PetscViewerASCIIPushTab(viewer));
892:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
893:       if (!sf->graphset) {
894:         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
895:         PetscCall(PetscViewerASCIIPopTab(viewer));
896:         PetscFunctionReturn(PETSC_SUCCESS);
897:       }
898:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
899:       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
900:       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
901:       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
902:       PetscCall(PetscViewerFlush(viewer));
903:       PetscCall(PetscViewerGetFormat(viewer, &format));
904:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
905:         PetscMPIInt *tmpranks, *perm;
906:         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
907:         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
908:         for (i = 0; i < sf->nranks; i++) perm[i] = i;
909:         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
910:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
911:         for (ii = 0; ii < sf->nranks; ii++) {
912:           i = perm[ii];
913:           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
914:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
915:         }
916:         PetscCall(PetscFree2(tmpranks, perm));
917:       }
918:       PetscCall(PetscViewerFlush(viewer));
919:       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
920:     }
921:     PetscCall(PetscViewerASCIIPopTab(viewer));
922:   }
923:   PetscTryTypeMethod(sf, View, viewer);
924:   PetscFunctionReturn(PETSC_SUCCESS);
925: }

927: /*@C
928:   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

930:   Not Collective

932:   Input Parameter:
933: . sf - star forest

935:   Output Parameters:
936: + nranks  - number of ranks referenced by local part
937: . ranks   - [`nranks`] array of ranks
938: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
939: . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
940: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank

942:   Level: developer

944: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
945: @*/
946: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
947: {
948:   PetscFunctionBegin;
950:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
951:   if (sf->ops->GetRootRanks) {
952:     PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
953:   } else {
954:     /* The generic implementation */
955:     if (nranks) *nranks = sf->nranks;
956:     if (ranks) *ranks = sf->ranks;
957:     if (roffset) *roffset = sf->roffset;
958:     if (rmine) *rmine = sf->rmine;
959:     if (rremote) *rremote = sf->rremote;
960:   }
961:   PetscFunctionReturn(PETSC_SUCCESS);
962: }

964: /*@C
965:   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

967:   Not Collective

969:   Input Parameter:
970: . sf - star forest

972:   Output Parameters:
973: + niranks  - number of leaf ranks referencing roots on this process
974: . iranks   - [`niranks`] array of ranks
975: . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
976: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank

978:   Level: developer

980: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
981: @*/
982: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
983: {
984:   PetscFunctionBegin;
986:   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
987:   if (sf->ops->GetLeafRanks) {
988:     PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
989:   } else {
990:     PetscSFType type;
991:     PetscCall(PetscSFGetType(sf, &type));
992:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
993:   }
994:   PetscFunctionReturn(PETSC_SUCCESS);
995: }

997: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
998: {
999:   PetscInt i;
1000:   for (i = 0; i < n; i++) {
1001:     if (needle == list[i]) return PETSC_TRUE;
1002:   }
1003:   return PETSC_FALSE;
1004: }

1006: /*@C
1007:   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.

1009:   Collective

1011:   Input Parameters:
1012: + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1013: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)

1015:   Level: developer

1017: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1018: @*/
1019: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1020: {
1021:   PetscHMapI    table;
1022:   PetscHashIter pos;
1023:   PetscMPIInt   size, groupsize, *groupranks;
1024:   PetscInt     *rcount, *ranks;
1025:   PetscInt      i, irank = -1, orank = -1;

1027:   PetscFunctionBegin;
1029:   PetscSFCheckGraphSet(sf, 1);
1030:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1031:   PetscCall(PetscHMapICreateWithSize(10, &table));
1032:   for (i = 0; i < sf->nleaves; i++) {
1033:     /* Log 1-based rank */
1034:     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1035:   }
1036:   PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1037:   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1038:   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1039:   PetscHashIterBegin(table, pos);
1040:   for (i = 0; i < sf->nranks; i++) {
1041:     PetscHashIterGetKey(table, pos, ranks[i]);
1042:     PetscHashIterGetVal(table, pos, rcount[i]);
1043:     PetscHashIterNext(table, pos);
1044:     ranks[i]--; /* Convert back to 0-based */
1045:   }
1046:   PetscCall(PetscHMapIDestroy(&table));

1048:   /* We expect that dgroup is reliably "small" while nranks could be large */
1049:   {
1050:     MPI_Group    group = MPI_GROUP_NULL;
1051:     PetscMPIInt *dgroupranks;
1052:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1053:     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1054:     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1055:     PetscCall(PetscMalloc1(groupsize, &groupranks));
1056:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1057:     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1058:     PetscCallMPI(MPI_Group_free(&group));
1059:     PetscCall(PetscFree(dgroupranks));
1060:   }

1062:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1063:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1064:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1065:       if (InList(ranks[i], groupsize, groupranks)) break;
1066:     }
1067:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1068:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1069:     }
1070:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1071:       PetscInt tmprank, tmpcount;

1073:       tmprank             = ranks[i];
1074:       tmpcount            = rcount[i];
1075:       ranks[i]            = ranks[sf->ndranks];
1076:       rcount[i]           = rcount[sf->ndranks];
1077:       ranks[sf->ndranks]  = tmprank;
1078:       rcount[sf->ndranks] = tmpcount;
1079:       sf->ndranks++;
1080:     }
1081:   }
1082:   PetscCall(PetscFree(groupranks));
1083:   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1084:   if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1085:   sf->roffset[0] = 0;
1086:   for (i = 0; i < sf->nranks; i++) {
1087:     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1088:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1089:     rcount[i]          = 0;
1090:   }
1091:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1092:     /* short circuit */
1093:     if (orank != sf->remote[i].rank) {
1094:       /* Search for index of iremote[i].rank in sf->ranks */
1095:       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1096:       if (irank < 0) {
1097:         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1098:         if (irank >= 0) irank += sf->ndranks;
1099:       }
1100:       orank = sf->remote[i].rank;
1101:     }
1102:     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1103:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1104:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1105:     rcount[irank]++;
1106:   }
1107:   PetscCall(PetscFree2(rcount, ranks));
1108:   PetscFunctionReturn(PETSC_SUCCESS);
1109: }

1111: /*@C
1112:   PetscSFGetGroups - gets incoming and outgoing process groups

1114:   Collective

1116:   Input Parameter:
1117: . sf - star forest

1119:   Output Parameters:
1120: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1121: - outgoing - group of destination processes for outgoing edges (roots that I reference)

1123:   Level: developer

1125: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1126: @*/
1127: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1128: {
1129:   MPI_Group group = MPI_GROUP_NULL;

1131:   PetscFunctionBegin;
1132:   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1133:   if (sf->ingroup == MPI_GROUP_NULL) {
1134:     PetscInt        i;
1135:     const PetscInt *indegree;
1136:     PetscMPIInt     rank, *outranks, *inranks;
1137:     PetscSFNode    *remote;
1138:     PetscSF         bgcount;

1140:     /* Compute the number of incoming ranks */
1141:     PetscCall(PetscMalloc1(sf->nranks, &remote));
1142:     for (i = 0; i < sf->nranks; i++) {
1143:       remote[i].rank  = sf->ranks[i];
1144:       remote[i].index = 0;
1145:     }
1146:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1147:     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1148:     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1149:     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1150:     /* Enumerate the incoming ranks */
1151:     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1152:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1153:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1154:     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1155:     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1156:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1157:     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1158:     PetscCallMPI(MPI_Group_free(&group));
1159:     PetscCall(PetscFree2(inranks, outranks));
1160:     PetscCall(PetscSFDestroy(&bgcount));
1161:   }
1162:   *incoming = sf->ingroup;

1164:   if (sf->outgroup == MPI_GROUP_NULL) {
1165:     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1166:     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1167:     PetscCallMPI(MPI_Group_free(&group));
1168:   }
1169:   *outgoing = sf->outgroup;
1170:   PetscFunctionReturn(PETSC_SUCCESS);
1171: }

1173: /*@
1174:   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters

1176:   Collective

1178:   Input Parameter:
1179: . sf - star forest that may contain roots with 0 or with more than 1 vertex

1181:   Output Parameter:
1182: . multi - star forest with split roots, such that each root has degree exactly 1

1184:   Level: developer

1186:   Note:
1187:   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1188:   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1189:   edge, it is a candidate for future optimization that might involve its removal.

1191: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1192: @*/
1193: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1194: {
1195:   PetscFunctionBegin;
1197:   PetscAssertPointer(multi, 2);
1198:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1199:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1200:     *multi           = sf->multi;
1201:     sf->multi->multi = sf->multi;
1202:     PetscFunctionReturn(PETSC_SUCCESS);
1203:   }
1204:   if (!sf->multi) {
1205:     const PetscInt *indegree;
1206:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1207:     PetscSFNode    *remote;
1208:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1209:     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1210:     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1211:     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1212:     inoffset[0] = 0;
1213:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1214:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1215:     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1216:     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1217:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1218:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1219:       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1220:     }
1221:     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1222:     for (i = 0; i < sf->nleaves; i++) {
1223:       remote[i].rank  = sf->remote[i].rank;
1224:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1225:     }
1226:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1227:     sf->multi->multi = sf->multi;
1228:     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1229:     if (sf->rankorder) { /* Sort the ranks */
1230:       PetscMPIInt  rank;
1231:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1232:       PetscSFNode *newremote;
1233:       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1234:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1235:       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1236:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1237:       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1238:       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1239:       /* Sort the incoming ranks at each vertex, build the inverse map */
1240:       for (i = 0; i < sf->nroots; i++) {
1241:         PetscInt j;
1242:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1243:         if (inranks) PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1244:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1245:       }
1246:       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1247:       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1248:       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1249:       for (i = 0; i < sf->nleaves; i++) {
1250:         newremote[i].rank  = sf->remote[i].rank;
1251:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1252:       }
1253:       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1254:       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1255:     }
1256:     PetscCall(PetscFree3(inoffset, outones, outoffset));
1257:   }
1258:   *multi = sf->multi;
1259:   PetscFunctionReturn(PETSC_SUCCESS);
1260: }

1262: /*@C
1263:   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices

1265:   Collective

1267:   Input Parameters:
1268: + sf        - original star forest
1269: . nselected - number of selected roots on this process
1270: - selected  - indices of the selected roots on this process

1272:   Output Parameter:
1273: . esf - new star forest

1275:   Level: advanced

1277:   Note:
1278:   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1279:   be done by calling PetscSFGetGraph().

1281: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1282: @*/
1283: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1284: {
1285:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1286:   const PetscInt    *ilocal;
1287:   signed char       *rootdata, *leafdata, *leafmem;
1288:   const PetscSFNode *iremote;
1289:   PetscSFNode       *new_iremote;
1290:   MPI_Comm           comm;

1292:   PetscFunctionBegin;
1294:   PetscSFCheckGraphSet(sf, 1);
1295:   if (nselected) PetscAssertPointer(selected, 3);
1296:   PetscAssertPointer(esf, 4);

1298:   PetscCall(PetscSFSetUp(sf));
1299:   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1300:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1301:   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));

1303:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1304:     PetscBool dups;
1305:     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1306:     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1307:     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1308:   }

1310:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1311:   else {
1312:     /* A generic version of creating embedded sf */
1313:     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1314:     maxlocal = maxleaf - minleaf + 1;
1315:     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1316:     leafdata = leafmem - minleaf;
1317:     /* Tag selected roots and bcast to leaves */
1318:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1319:     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1320:     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));

1322:     /* Build esf with leaves that are still connected */
1323:     esf_nleaves = 0;
1324:     for (i = 0; i < nleaves; i++) {
1325:       j = ilocal ? ilocal[i] : i;
1326:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1327:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1328:       */
1329:       esf_nleaves += (leafdata[j] ? 1 : 0);
1330:     }
1331:     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1332:     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1333:     for (i = n = 0; i < nleaves; i++) {
1334:       j = ilocal ? ilocal[i] : i;
1335:       if (leafdata[j]) {
1336:         new_ilocal[n]        = j;
1337:         new_iremote[n].rank  = iremote[i].rank;
1338:         new_iremote[n].index = iremote[i].index;
1339:         ++n;
1340:       }
1341:     }
1342:     PetscCall(PetscSFCreate(comm, esf));
1343:     PetscCall(PetscSFSetFromOptions(*esf));
1344:     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1345:     PetscCall(PetscFree2(rootdata, leafmem));
1346:   }
1347:   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1348:   PetscFunctionReturn(PETSC_SUCCESS);
1349: }

1351: /*@C
1352:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices

1354:   Collective

1356:   Input Parameters:
1357: + sf        - original star forest
1358: . nselected - number of selected leaves on this process
1359: - selected  - indices of the selected leaves on this process

1361:   Output Parameter:
1362: . newsf - new star forest

1364:   Level: advanced

1366: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1367: @*/
1368: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1369: {
1370:   const PetscSFNode *iremote;
1371:   PetscSFNode       *new_iremote;
1372:   const PetscInt    *ilocal;
1373:   PetscInt           i, nroots, *leaves, *new_ilocal;
1374:   MPI_Comm           comm;

1376:   PetscFunctionBegin;
1378:   PetscSFCheckGraphSet(sf, 1);
1379:   if (nselected) PetscAssertPointer(selected, 3);
1380:   PetscAssertPointer(newsf, 4);

1382:   /* Uniq selected[] and put results in leaves[] */
1383:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1384:   PetscCall(PetscMalloc1(nselected, &leaves));
1385:   PetscCall(PetscArraycpy(leaves, selected, nselected));
1386:   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1387:   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);

1389:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1390:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1391:   else {
1392:     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1393:     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1394:     PetscCall(PetscMalloc1(nselected, &new_iremote));
1395:     for (i = 0; i < nselected; ++i) {
1396:       const PetscInt l     = leaves[i];
1397:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1398:       new_iremote[i].rank  = iremote[l].rank;
1399:       new_iremote[i].index = iremote[l].index;
1400:     }
1401:     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1402:     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1403:   }
1404:   PetscCall(PetscFree(leaves));
1405:   PetscFunctionReturn(PETSC_SUCCESS);
1406: }

1408: /*@C
1409:   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`

1411:   Collective

1413:   Input Parameters:
1414: + sf       - star forest on which to communicate
1415: . unit     - data type associated with each node
1416: . rootdata - buffer to broadcast
1417: - op       - operation to use for reduction

1419:   Output Parameter:
1420: . leafdata - buffer to be reduced with values from each leaf's respective root

1422:   Level: intermediate

1424:   Note:
1425:   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1426:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1427:   use `PetscSFBcastWithMemTypeBegin()` instead.

1429: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1430: @*/
1431: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1432: {
1433:   PetscMemType rootmtype, leafmtype;

1435:   PetscFunctionBegin;
1437:   PetscCall(PetscSFSetUp(sf));
1438:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1439:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1440:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1441:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1442:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1443:   PetscFunctionReturn(PETSC_SUCCESS);
1444: }

1446: /*@C
1447:   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1448:   to `PetscSFBcastEnd()`

1450:   Collective

1452:   Input Parameters:
1453: + sf        - star forest on which to communicate
1454: . unit      - data type associated with each node
1455: . rootmtype - memory type of rootdata
1456: . rootdata  - buffer to broadcast
1457: . leafmtype - memory type of leafdata
1458: - op        - operation to use for reduction

1460:   Output Parameter:
1461: . leafdata - buffer to be reduced with values from each leaf's respective root

1463:   Level: intermediate

1465: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1466: @*/
1467: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1468: {
1469:   PetscFunctionBegin;
1471:   PetscCall(PetscSFSetUp(sf));
1472:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1473:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1474:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1475:   PetscFunctionReturn(PETSC_SUCCESS);
1476: }

1478: /*@C
1479:   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`

1481:   Collective

1483:   Input Parameters:
1484: + sf       - star forest
1485: . unit     - data type
1486: . rootdata - buffer to broadcast
1487: - op       - operation to use for reduction

1489:   Output Parameter:
1490: . leafdata - buffer to be reduced with values from each leaf's respective root

1492:   Level: intermediate

1494: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1495: @*/
1496: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1497: {
1498:   PetscFunctionBegin;
1500:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1501:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1502:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1503:   PetscFunctionReturn(PETSC_SUCCESS);
1504: }

1506: /*@C
1507:   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`

1509:   Collective

1511:   Input Parameters:
1512: + sf       - star forest
1513: . unit     - data type
1514: . leafdata - values to reduce
1515: - op       - reduction operation

1517:   Output Parameter:
1518: . rootdata - result of reduction of values from all leaves of each root

1520:   Level: intermediate

1522:   Note:
1523:   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1524:   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1525:   use `PetscSFReduceWithMemTypeBegin()` instead.

1527: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1528: @*/
1529: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1530: {
1531:   PetscMemType rootmtype, leafmtype;

1533:   PetscFunctionBegin;
1535:   PetscCall(PetscSFSetUp(sf));
1536:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1537:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1538:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1539:   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1540:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1541:   PetscFunctionReturn(PETSC_SUCCESS);
1542: }

1544: /*@C
1545:   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`

1547:   Collective

1549:   Input Parameters:
1550: + sf        - star forest
1551: . unit      - data type
1552: . leafmtype - memory type of leafdata
1553: . leafdata  - values to reduce
1554: . rootmtype - memory type of rootdata
1555: - op        - reduction operation

1557:   Output Parameter:
1558: . rootdata - result of reduction of values from all leaves of each root

1560:   Level: intermediate

1562: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1563: @*/
1564: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1565: {
1566:   PetscFunctionBegin;
1568:   PetscCall(PetscSFSetUp(sf));
1569:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1570:   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1571:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1572:   PetscFunctionReturn(PETSC_SUCCESS);
1573: }

1575: /*@C
1576:   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`

1578:   Collective

1580:   Input Parameters:
1581: + sf       - star forest
1582: . unit     - data type
1583: . leafdata - values to reduce
1584: - op       - reduction operation

1586:   Output Parameter:
1587: . rootdata - result of reduction of values from all leaves of each root

1589:   Level: intermediate

1591: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1592: @*/
1593: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1594: {
1595:   PetscFunctionBegin;
1597:   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1598:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1599:   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1600:   PetscFunctionReturn(PETSC_SUCCESS);
1601: }

1603: /*@C
1604:   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1605:   to be completed with `PetscSFFetchAndOpEnd()`

1607:   Collective

1609:   Input Parameters:
1610: + sf       - star forest
1611: . unit     - data type
1612: . leafdata - leaf values to use in reduction
1613: - op       - operation to use for reduction

1615:   Output Parameters:
1616: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1617: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1619:   Level: advanced

1621:   Note:
1622:   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1623:   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1624:   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1625:   integers.

1627: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1628: @*/
1629: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1630: {
1631:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1633:   PetscFunctionBegin;
1635:   PetscCall(PetscSFSetUp(sf));
1636:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1637:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1638:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1639:   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1640:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1641:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1642:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1643:   PetscFunctionReturn(PETSC_SUCCESS);
1644: }

1646: /*@C
1647:   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1648:   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`

1650:   Collective

1652:   Input Parameters:
1653: + sf              - star forest
1654: . unit            - data type
1655: . rootmtype       - memory type of rootdata
1656: . leafmtype       - memory type of leafdata
1657: . leafdata        - leaf values to use in reduction
1658: . leafupdatemtype - memory type of leafupdate
1659: - op              - operation to use for reduction

1661:   Output Parameters:
1662: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1663: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1665:   Level: advanced

1667:   Note:
1668:   See `PetscSFFetchAndOpBegin()` for more details.

1670: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1671: @*/
1672: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1673: {
1674:   PetscFunctionBegin;
1676:   PetscCall(PetscSFSetUp(sf));
1677:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1678:   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1679:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1680:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1681:   PetscFunctionReturn(PETSC_SUCCESS);
1682: }

1684: /*@C
1685:   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1686:   to fetch values from roots and update atomically by applying operation using my leaf value

1688:   Collective

1690:   Input Parameters:
1691: + sf       - star forest
1692: . unit     - data type
1693: . leafdata - leaf values to use in reduction
1694: - op       - operation to use for reduction

1696:   Output Parameters:
1697: + rootdata   - root values to be updated, input state is seen by first process to perform an update
1698: - leafupdate - state at each leaf's respective root immediately prior to my atomic update

1700:   Level: advanced

1702: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1703: @*/
1704: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1705: {
1706:   PetscFunctionBegin;
1708:   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1709:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1710:   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1711:   PetscFunctionReturn(PETSC_SUCCESS);
1712: }

1714: /*@C
1715:   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`

1717:   Collective

1719:   Input Parameter:
1720: . sf - star forest

1722:   Output Parameter:
1723: . degree - degree of each root vertex

1725:   Level: advanced

1727:   Note:
1728:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1730: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1731: @*/
1732: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1733: {
1734:   PetscFunctionBegin;
1736:   PetscSFCheckGraphSet(sf, 1);
1737:   PetscAssertPointer(degree, 2);
1738:   if (!sf->degreeknown) {
1739:     PetscInt i, nroots = sf->nroots, maxlocal;
1740:     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1741:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1742:     PetscCall(PetscMalloc1(nroots, &sf->degree));
1743:     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1744:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1745:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1746:     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1747:   }
1748:   *degree = NULL;
1749:   PetscFunctionReturn(PETSC_SUCCESS);
1750: }

1752: /*@C
1753:   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`

1755:   Collective

1757:   Input Parameter:
1758: . sf - star forest

1760:   Output Parameter:
1761: . degree - degree of each root vertex

1763:   Level: developer

1765:   Note:
1766:   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.

1768: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1769: @*/
1770: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1771: {
1772:   PetscFunctionBegin;
1774:   PetscSFCheckGraphSet(sf, 1);
1775:   PetscAssertPointer(degree, 2);
1776:   if (!sf->degreeknown) {
1777:     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1778:     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1779:     PetscCall(PetscFree(sf->degreetmp));
1780:     sf->degreeknown = PETSC_TRUE;
1781:   }
1782:   *degree = sf->degree;
1783:   PetscFunctionReturn(PETSC_SUCCESS);
1784: }

1786: /*@C
1787:   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1788:   Each multi-root is assigned index of the corresponding original root.

1790:   Collective

1792:   Input Parameters:
1793: + sf     - star forest
1794: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`

1796:   Output Parameters:
1797: + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1798: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`

1800:   Level: developer

1802:   Note:
1803:   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.

1805: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1806: @*/
1807: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1808: {
1809:   PetscSF  msf;
1810:   PetscInt i, j, k, nroots, nmroots;

1812:   PetscFunctionBegin;
1814:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1815:   if (nroots) PetscAssertPointer(degree, 2);
1816:   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1817:   PetscAssertPointer(multiRootsOrigNumbering, 4);
1818:   PetscCall(PetscSFGetMultiSF(sf, &msf));
1819:   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1820:   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1821:   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1822:     if (!degree[i]) continue;
1823:     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1824:   }
1825:   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1826:   if (nMultiRoots) *nMultiRoots = nmroots;
1827:   PetscFunctionReturn(PETSC_SUCCESS);
1828: }

1830: /*@C
1831:   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`

1833:   Collective

1835:   Input Parameters:
1836: + sf       - star forest
1837: . unit     - data type
1838: - leafdata - leaf data to gather to roots

1840:   Output Parameter:
1841: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1843:   Level: intermediate

1845: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1846: @*/
1847: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1848: {
1849:   PetscSF multi = NULL;

1851:   PetscFunctionBegin;
1853:   PetscCall(PetscSFSetUp(sf));
1854:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1855:   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1856:   PetscFunctionReturn(PETSC_SUCCESS);
1857: }

1859: /*@C
1860:   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`

1862:   Collective

1864:   Input Parameters:
1865: + sf       - star forest
1866: . unit     - data type
1867: - leafdata - leaf data to gather to roots

1869:   Output Parameter:
1870: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1872:   Level: intermediate

1874: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1875: @*/
1876: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1877: {
1878:   PetscSF multi = NULL;

1880:   PetscFunctionBegin;
1882:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1883:   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1884:   PetscFunctionReturn(PETSC_SUCCESS);
1885: }

1887: /*@C
1888:   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`

1890:   Collective

1892:   Input Parameters:
1893: + sf            - star forest
1894: . unit          - data type
1895: - multirootdata - root buffer to send to each leaf, one unit of data per leaf

1897:   Output Parameter:
1898: . leafdata - leaf data to be update with personal data from each respective root

1900:   Level: intermediate

1902: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1903: @*/
1904: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1905: {
1906:   PetscSF multi = NULL;

1908:   PetscFunctionBegin;
1910:   PetscCall(PetscSFSetUp(sf));
1911:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1912:   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1913:   PetscFunctionReturn(PETSC_SUCCESS);
1914: }

1916: /*@C
1917:   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`

1919:   Collective

1921:   Input Parameters:
1922: + sf            - star forest
1923: . unit          - data type
1924: - multirootdata - root buffer to send to each leaf, one unit of data per leaf

1926:   Output Parameter:
1927: . leafdata - leaf data to be update with personal data from each respective root

1929:   Level: intermediate

1931: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1932: @*/
1933: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1934: {
1935:   PetscSF multi = NULL;

1937:   PetscFunctionBegin;
1939:   PetscCall(PetscSFGetMultiSF(sf, &multi));
1940:   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1941:   PetscFunctionReturn(PETSC_SUCCESS);
1942: }

1944: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1945: {
1946:   PetscInt        i, n, nleaves;
1947:   const PetscInt *ilocal = NULL;
1948:   PetscHSetI      seen;

1950:   PetscFunctionBegin;
1951:   if (PetscDefined(USE_DEBUG)) {
1952:     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1953:     PetscCall(PetscHSetICreate(&seen));
1954:     for (i = 0; i < nleaves; i++) {
1955:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1956:       PetscCall(PetscHSetIAdd(seen, leaf));
1957:     }
1958:     PetscCall(PetscHSetIGetSize(seen, &n));
1959:     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1960:     PetscCall(PetscHSetIDestroy(&seen));
1961:   }
1962:   PetscFunctionReturn(PETSC_SUCCESS);
1963: }

1965: /*@
1966:   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view

1968:   Input Parameters:
1969: + sfA - The first `PetscSF`
1970: - sfB - The second `PetscSF`

1972:   Output Parameter:
1973: . sfBA - The composite `PetscSF`

1975:   Level: developer

1977:   Notes:
1978:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1979:   forests, i.e. the same leaf is not connected with different roots.

1981:   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
1982:   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
1983:   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
1984:   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.

1986: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1987: @*/
1988: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1989: {
1990:   const PetscSFNode *remotePointsA, *remotePointsB;
1991:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1992:   const PetscInt    *localPointsA, *localPointsB;
1993:   PetscInt          *localPointsBA;
1994:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1995:   PetscBool          denseB;

1997:   PetscFunctionBegin;
1999:   PetscSFCheckGraphSet(sfA, 1);
2001:   PetscSFCheckGraphSet(sfB, 2);
2002:   PetscCheckSameComm(sfA, 1, sfB, 2);
2003:   PetscAssertPointer(sfBA, 3);
2004:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2005:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2007:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2008:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2009:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2010:      numRootsB; otherwise, garbage will be broadcasted.
2011:      Example (comm size = 1):
2012:      sfA: 0 <- (0, 0)
2013:      sfB: 100 <- (0, 0)
2014:           101 <- (0, 1)
2015:      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2016:      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2017:      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2018:      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2019:   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2020:   for (i = 0; i < numRootsB; i++) {
2021:     reorderedRemotePointsA[i].rank  = -1;
2022:     reorderedRemotePointsA[i].index = -1;
2023:   }
2024:   for (i = 0; i < numLeavesA; i++) {
2025:     PetscInt localp = localPointsA ? localPointsA[i] : i;

2027:     if (localp >= numRootsB) continue;
2028:     reorderedRemotePointsA[localp] = remotePointsA[i];
2029:   }
2030:   remotePointsA = reorderedRemotePointsA;
2031:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2032:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2033:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2034:     leafdataB[i].rank  = -1;
2035:     leafdataB[i].index = -1;
2036:   }
2037:   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2038:   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2039:   PetscCall(PetscFree(reorderedRemotePointsA));

2041:   denseB = (PetscBool)!localPointsB;
2042:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2043:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2044:     else numLeavesBA++;
2045:   }
2046:   if (denseB) {
2047:     localPointsBA  = NULL;
2048:     remotePointsBA = leafdataB;
2049:   } else {
2050:     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2051:     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2052:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2053:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2055:       if (leafdataB[l - minleaf].rank == -1) continue;
2056:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2057:       localPointsBA[numLeavesBA]  = l;
2058:       numLeavesBA++;
2059:     }
2060:     PetscCall(PetscFree(leafdataB));
2061:   }
2062:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2063:   PetscCall(PetscSFSetFromOptions(*sfBA));
2064:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2065:   PetscFunctionReturn(PETSC_SUCCESS);
2066: }

2068: /*@
2069:   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one

2071:   Input Parameters:
2072: + sfA - The first `PetscSF`
2073: - sfB - The second `PetscSF`

2075:   Output Parameter:
2076: . sfBA - The composite `PetscSF`.

2078:   Level: developer

2080:   Notes:
2081:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2082:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2083:   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.

2085:   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2086:   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2087:   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2088:   on `sfA`, then
2089:   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.

2091: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2092: @*/
2093: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2094: {
2095:   const PetscSFNode *remotePointsA, *remotePointsB;
2096:   PetscSFNode       *remotePointsBA;
2097:   const PetscInt    *localPointsA, *localPointsB;
2098:   PetscSFNode       *reorderedRemotePointsA = NULL;
2099:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2100:   MPI_Op             op;
2101: #if defined(PETSC_USE_64BIT_INDICES)
2102:   PetscBool iswin;
2103: #endif

2105:   PetscFunctionBegin;
2107:   PetscSFCheckGraphSet(sfA, 1);
2109:   PetscSFCheckGraphSet(sfB, 2);
2110:   PetscCheckSameComm(sfA, 1, sfB, 2);
2111:   PetscAssertPointer(sfBA, 3);
2112:   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2113:   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));

2115:   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2116:   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));

2118:   /* TODO: Check roots of sfB have degree of 1 */
2119:   /* Once we implement it, we can replace the MPI_MAXLOC
2120:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2121:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2122:      the root condition is not meet.
2123:    */
2124:   op = MPI_MAXLOC;
2125: #if defined(PETSC_USE_64BIT_INDICES)
2126:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2127:   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2128:   if (iswin) op = MPI_REPLACE;
2129: #endif

2131:   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2132:   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2133:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2134:     reorderedRemotePointsA[i].rank  = -1;
2135:     reorderedRemotePointsA[i].index = -1;
2136:   }
2137:   if (localPointsA) {
2138:     for (i = 0; i < numLeavesA; i++) {
2139:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2140:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2141:     }
2142:   } else {
2143:     for (i = 0; i < numLeavesA; i++) {
2144:       if (i > maxleaf || i < minleaf) continue;
2145:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2146:     }
2147:   }

2149:   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2150:   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2151:   for (i = 0; i < numRootsB; i++) {
2152:     remotePointsBA[i].rank  = -1;
2153:     remotePointsBA[i].index = -1;
2154:   }

2156:   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2157:   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2158:   PetscCall(PetscFree(reorderedRemotePointsA));
2159:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2160:     if (remotePointsBA[i].rank == -1) continue;
2161:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2162:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2163:     localPointsBA[numLeavesBA]        = i;
2164:     numLeavesBA++;
2165:   }
2166:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2167:   PetscCall(PetscSFSetFromOptions(*sfBA));
2168:   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2169:   PetscFunctionReturn(PETSC_SUCCESS);
2170: }

2172: /*
2173:   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`

2175:   Input Parameter:
2176: . sf - The global `PetscSF`

2178:   Output Parameter:
2179: . out - The local `PetscSF`

2181: .seealso: `PetscSF`, `PetscSFCreate()`
2182:  */
2183: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2184: {
2185:   MPI_Comm           comm;
2186:   PetscMPIInt        myrank;
2187:   const PetscInt    *ilocal;
2188:   const PetscSFNode *iremote;
2189:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2190:   PetscSFNode       *liremote;
2191:   PetscSF            lsf;

2193:   PetscFunctionBegin;
2195:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2196:   else {
2197:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2198:     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2199:     PetscCallMPI(MPI_Comm_rank(comm, &myrank));

2201:     /* Find out local edges and build a local SF */
2202:     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2203:     for (i = lnleaves = 0; i < nleaves; i++) {
2204:       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2205:     }
2206:     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2207:     PetscCall(PetscMalloc1(lnleaves, &liremote));

2209:     for (i = j = 0; i < nleaves; i++) {
2210:       if (iremote[i].rank == (PetscInt)myrank) {
2211:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2212:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2213:         liremote[j].index = iremote[i].index;
2214:         j++;
2215:       }
2216:     }
2217:     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2218:     PetscCall(PetscSFSetFromOptions(lsf));
2219:     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2220:     PetscCall(PetscSFSetUp(lsf));
2221:     *out = lsf;
2222:   }
2223:   PetscFunctionReturn(PETSC_SUCCESS);
2224: }

2226: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2227: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2228: {
2229:   PetscMemType rootmtype, leafmtype;

2231:   PetscFunctionBegin;
2233:   PetscCall(PetscSFSetUp(sf));
2234:   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2235:   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2236:   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2237:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2238:   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2239:   PetscFunctionReturn(PETSC_SUCCESS);
2240: }

2242: /*@
2243:   PetscSFConcatenate - concatenate multiple `PetscSF` into one

2245:   Input Parameters:
2246: + comm        - the communicator
2247: . nsfs        - the number of input `PetscSF`
2248: . sfs         - the array of input `PetscSF`
2249: . rootMode    - the root mode specifying how roots are handled
2250: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage

2252:   Output Parameter:
2253: . newsf - The resulting `PetscSF`

2255:   Level: advanced

2257:   Notes:
2258:   The communicator of all `PetscSF`s in `sfs` must be comm.

2260:   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.

2262:   The offsets in `leafOffsets` are added to the original leaf indices.

2264:   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2265:   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.

2267:   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2268:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2270:   All root modes retain the essential connectivity condition.
2271:   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2272:   Parameter `rootMode` controls how the input root spaces are combined.
2273:   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2274:   and is also the same in the output `PetscSF`.
2275:   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2276:   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2277:   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2278:   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2279:   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2280:   the original root ranks are ignored.
2281:   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2282:   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2283:   to keep the load balancing.
2284:   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.

2286:   Example:
2287:   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2288: .vb
2289:   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2290:   for m in {local,global,shared}; do
2291:     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2292:   done
2293: .ve
2294:   we generate two identical `PetscSF`s sf_0 and sf_1,
2295: .vb
2296:   PetscSF Object: sf_0 2 MPI processes
2297:     type: basic
2298:     rank #leaves #roots
2299:     [ 0]       4      2
2300:     [ 1]       4      2
2301:     leaves      roots       roots in global numbering
2302:     ( 0,  0) <- ( 0,  0)  =   0
2303:     ( 0,  1) <- ( 0,  1)  =   1
2304:     ( 0,  2) <- ( 1,  0)  =   2
2305:     ( 0,  3) <- ( 1,  1)  =   3
2306:     ( 1,  0) <- ( 0,  0)  =   0
2307:     ( 1,  1) <- ( 0,  1)  =   1
2308:     ( 1,  2) <- ( 1,  0)  =   2
2309:     ( 1,  3) <- ( 1,  1)  =   3
2310: .ve
2311:   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2312: .vb
2313:   rootMode = local:
2314:   PetscSF Object: result_sf 2 MPI processes
2315:     type: basic
2316:     rank #leaves #roots
2317:     [ 0]       8      4
2318:     [ 1]       8      4
2319:     leaves      roots       roots in global numbering
2320:     ( 0,  0) <- ( 0,  0)  =   0
2321:     ( 0,  1) <- ( 0,  1)  =   1
2322:     ( 0,  2) <- ( 1,  0)  =   4
2323:     ( 0,  3) <- ( 1,  1)  =   5
2324:     ( 0,  4) <- ( 0,  2)  =   2
2325:     ( 0,  5) <- ( 0,  3)  =   3
2326:     ( 0,  6) <- ( 1,  2)  =   6
2327:     ( 0,  7) <- ( 1,  3)  =   7
2328:     ( 1,  0) <- ( 0,  0)  =   0
2329:     ( 1,  1) <- ( 0,  1)  =   1
2330:     ( 1,  2) <- ( 1,  0)  =   4
2331:     ( 1,  3) <- ( 1,  1)  =   5
2332:     ( 1,  4) <- ( 0,  2)  =   2
2333:     ( 1,  5) <- ( 0,  3)  =   3
2334:     ( 1,  6) <- ( 1,  2)  =   6
2335:     ( 1,  7) <- ( 1,  3)  =   7

2337:   rootMode = global:
2338:   PetscSF Object: result_sf 2 MPI processes
2339:     type: basic
2340:     rank #leaves #roots
2341:     [ 0]       8      4
2342:     [ 1]       8      4
2343:     leaves      roots       roots in global numbering
2344:     ( 0,  0) <- ( 0,  0)  =   0
2345:     ( 0,  1) <- ( 0,  1)  =   1
2346:     ( 0,  2) <- ( 0,  2)  =   2
2347:     ( 0,  3) <- ( 0,  3)  =   3
2348:     ( 0,  4) <- ( 1,  0)  =   4
2349:     ( 0,  5) <- ( 1,  1)  =   5
2350:     ( 0,  6) <- ( 1,  2)  =   6
2351:     ( 0,  7) <- ( 1,  3)  =   7
2352:     ( 1,  0) <- ( 0,  0)  =   0
2353:     ( 1,  1) <- ( 0,  1)  =   1
2354:     ( 1,  2) <- ( 0,  2)  =   2
2355:     ( 1,  3) <- ( 0,  3)  =   3
2356:     ( 1,  4) <- ( 1,  0)  =   4
2357:     ( 1,  5) <- ( 1,  1)  =   5
2358:     ( 1,  6) <- ( 1,  2)  =   6
2359:     ( 1,  7) <- ( 1,  3)  =   7

2361:   rootMode = shared:
2362:   PetscSF Object: result_sf 2 MPI processes
2363:     type: basic
2364:     rank #leaves #roots
2365:     [ 0]       8      2
2366:     [ 1]       8      2
2367:     leaves      roots       roots in global numbering
2368:     ( 0,  0) <- ( 0,  0)  =   0
2369:     ( 0,  1) <- ( 0,  1)  =   1
2370:     ( 0,  2) <- ( 1,  0)  =   2
2371:     ( 0,  3) <- ( 1,  1)  =   3
2372:     ( 0,  4) <- ( 0,  0)  =   0
2373:     ( 0,  5) <- ( 0,  1)  =   1
2374:     ( 0,  6) <- ( 1,  0)  =   2
2375:     ( 0,  7) <- ( 1,  1)  =   3
2376:     ( 1,  0) <- ( 0,  0)  =   0
2377:     ( 1,  1) <- ( 0,  1)  =   1
2378:     ( 1,  2) <- ( 1,  0)  =   2
2379:     ( 1,  3) <- ( 1,  1)  =   3
2380:     ( 1,  4) <- ( 0,  0)  =   0
2381:     ( 1,  5) <- ( 0,  1)  =   1
2382:     ( 1,  6) <- ( 1,  0)  =   2
2383:     ( 1,  7) <- ( 1,  1)  =   3
2384: .ve

2386: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2387: @*/
2388: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2389: {
2390:   PetscInt     i, s, nLeaves, nRoots;
2391:   PetscInt    *leafArrayOffsets;
2392:   PetscInt    *ilocal_new;
2393:   PetscSFNode *iremote_new;
2394:   PetscBool    all_ilocal_null = PETSC_FALSE;
2395:   PetscLayout  glayout         = NULL;
2396:   PetscInt    *gremote         = NULL;
2397:   PetscMPIInt  rank, size;

2399:   PetscFunctionBegin;
2400:   if (PetscDefined(USE_DEBUG)) {
2401:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2403:     PetscCall(PetscSFCreate(comm, &dummy));
2405:     PetscAssertPointer(sfs, 3);
2406:     for (i = 0; i < nsfs; i++) {
2408:       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2409:     }
2411:     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2412:     PetscAssertPointer(newsf, 6);
2413:     PetscCall(PetscSFDestroy(&dummy));
2414:   }
2415:   if (!nsfs) {
2416:     PetscCall(PetscSFCreate(comm, newsf));
2417:     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2418:     PetscFunctionReturn(PETSC_SUCCESS);
2419:   }
2420:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2421:   PetscCallMPI(MPI_Comm_size(comm, &size));

2423:   /* Calculate leaf array offsets */
2424:   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2425:   leafArrayOffsets[0] = 0;
2426:   for (s = 0; s < nsfs; s++) {
2427:     PetscInt nl;

2429:     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2430:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2431:   }
2432:   nLeaves = leafArrayOffsets[nsfs];

2434:   /* Calculate number of roots */
2435:   switch (rootMode) {
2436:   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2437:     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2438:     if (PetscDefined(USE_DEBUG)) {
2439:       for (s = 1; s < nsfs; s++) {
2440:         PetscInt nr;

2442:         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2443:         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2444:       }
2445:     }
2446:   } break;
2447:   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2448:     /* Calculate also global layout in this case */
2449:     PetscInt    *nls;
2450:     PetscLayout *lts;
2451:     PetscInt   **inds;
2452:     PetscInt     j;
2453:     PetscInt     rootOffset = 0;

2455:     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2456:     PetscCall(PetscLayoutCreate(comm, &glayout));
2457:     glayout->bs = 1;
2458:     glayout->n  = 0;
2459:     glayout->N  = 0;
2460:     for (s = 0; s < nsfs; s++) {
2461:       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2462:       glayout->n += lts[s]->n;
2463:       glayout->N += lts[s]->N;
2464:     }
2465:     PetscCall(PetscLayoutSetUp(glayout));
2466:     PetscCall(PetscMalloc1(nLeaves, &gremote));
2467:     for (s = 0, j = 0; s < nsfs; s++) {
2468:       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2469:       rootOffset += lts[s]->N;
2470:       PetscCall(PetscLayoutDestroy(&lts[s]));
2471:       PetscCall(PetscFree(inds[s]));
2472:     }
2473:     PetscCall(PetscFree3(lts, nls, inds));
2474:     nRoots = glayout->N;
2475:   } break;
2476:   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2477:     /* nRoots calculated later in this case */
2478:     break;
2479:   default:
2480:     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2481:   }

2483:   if (!leafOffsets) {
2484:     all_ilocal_null = PETSC_TRUE;
2485:     for (s = 0; s < nsfs; s++) {
2486:       const PetscInt *ilocal;

2488:       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2489:       if (ilocal) {
2490:         all_ilocal_null = PETSC_FALSE;
2491:         break;
2492:       }
2493:     }
2494:     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2495:   }

2497:   /* Renumber and concatenate local leaves */
2498:   ilocal_new = NULL;
2499:   if (!all_ilocal_null) {
2500:     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2501:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2502:     for (s = 0; s < nsfs; s++) {
2503:       const PetscInt *ilocal;
2504:       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2505:       PetscInt        i, nleaves_l;

2507:       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2508:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2509:     }
2510:   }

2512:   /* Renumber and concatenate remote roots */
2513:   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2514:     PetscInt rootOffset = 0;

2516:     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2517:     for (i = 0; i < nLeaves; i++) {
2518:       iremote_new[i].rank  = -1;
2519:       iremote_new[i].index = -1;
2520:     }
2521:     for (s = 0; s < nsfs; s++) {
2522:       PetscInt           i, nl, nr;
2523:       PetscSF            tmp_sf;
2524:       const PetscSFNode *iremote;
2525:       PetscSFNode       *tmp_rootdata;
2526:       PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2528:       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2529:       PetscCall(PetscSFCreate(comm, &tmp_sf));
2530:       /* create helper SF with contiguous leaves */
2531:       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2532:       PetscCall(PetscSFSetUp(tmp_sf));
2533:       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2534:       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2535:         for (i = 0; i < nr; i++) {
2536:           tmp_rootdata[i].index = i + rootOffset;
2537:           tmp_rootdata[i].rank  = (PetscInt)rank;
2538:         }
2539:         rootOffset += nr;
2540:       } else {
2541:         for (i = 0; i < nr; i++) {
2542:           tmp_rootdata[i].index = i;
2543:           tmp_rootdata[i].rank  = (PetscInt)rank;
2544:         }
2545:       }
2546:       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2547:       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2548:       PetscCall(PetscSFDestroy(&tmp_sf));
2549:       PetscCall(PetscFree(tmp_rootdata));
2550:     }
2551:     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above

2553:     /* Build the new SF */
2554:     PetscCall(PetscSFCreate(comm, newsf));
2555:     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2556:   } else {
2557:     /* Build the new SF */
2558:     PetscCall(PetscSFCreate(comm, newsf));
2559:     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2560:   }
2561:   PetscCall(PetscSFSetUp(*newsf));
2562:   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2563:   PetscCall(PetscLayoutDestroy(&glayout));
2564:   PetscCall(PetscFree(gremote));
2565:   PetscCall(PetscFree(leafArrayOffsets));
2566:   PetscFunctionReturn(PETSC_SUCCESS);
2567: }