Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petscctable.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: void PetscSFCheckGraphSet(PetscSF, int);
 16: #else
 17:   #if defined(PETSC_USE_DEBUG)
 19:   #else
 20:     #define PetscSFCheckGraphSet(sf, arg) \
 21:       do { \
 22:       } while (0)
 23:   #endif
 24: #endif

 26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};

 28: /*@
 29:    PetscSFCreate - create a star forest communication context

 31:    Collective

 33:    Input Parameter:
 34: .  comm - communicator on which the star forest will operate

 36:    Output Parameter:
 37: .  sf - new star forest context

 39:    Options Database Keys:
 40: .  -sf_type type - value of type may be
 41: .vb
 42:     basic     -Use MPI persistent Isend/Irecv for communication (Default)
 43:     window    -Use MPI-3 one-sided window for communication
 44:     neighbor  -Use MPI-3 neighborhood collectives for communication
 45: .ve

 47:    Level: intermediate

 49:    Note:
 50:    When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
 51:    `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
 52:    `SF`s are optimized and they have better performance than general `SF`s.

 54: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 55: @*/
 56: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 57: {
 58:   PetscSF b;

 61:   PetscSFInitializePackage();

 63:   PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView);

 65:   b->nroots    = -1;
 66:   b->nleaves   = -1;
 67:   b->minleaf   = PETSC_MAX_INT;
 68:   b->maxleaf   = PETSC_MIN_INT;
 69:   b->nranks    = -1;
 70:   b->rankorder = PETSC_TRUE;
 71:   b->ingroup   = MPI_GROUP_NULL;
 72:   b->outgroup  = MPI_GROUP_NULL;
 73:   b->graphset  = PETSC_FALSE;
 74: #if defined(PETSC_HAVE_DEVICE)
 75:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 76:   b->use_stream_aware_mpi = PETSC_FALSE;
 77:   b->unknown_input_stream = PETSC_FALSE;
 78:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 79:   b->backend = PETSCSF_BACKEND_KOKKOS;
 80:   #elif defined(PETSC_HAVE_CUDA)
 81:   b->backend = PETSCSF_BACKEND_CUDA;
 82:   #elif defined(PETSC_HAVE_HIP)
 83:   b->backend = PETSCSF_BACKEND_HIP;
 84:   #endif

 86:   #if defined(PETSC_HAVE_NVSHMEM)
 87:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 88:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 89:   PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL);
 90:   PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL);
 91:   #endif
 92: #endif
 93:   b->vscat.from_n = -1;
 94:   b->vscat.to_n   = -1;
 95:   b->vscat.unit   = MPIU_SCALAR;
 96:   *sf             = b;
 97:   return 0;
 98: }

100: /*@
101:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

103:    Collective

105:    Input Parameter:
106: .  sf - star forest

108:    Level: advanced

110: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111: @*/
112: PetscErrorCode PetscSFReset(PetscSF sf)
113: {
115:   PetscTryTypeMethod(sf, Reset);
116:   sf->nroots   = -1;
117:   sf->nleaves  = -1;
118:   sf->minleaf  = PETSC_MAX_INT;
119:   sf->maxleaf  = PETSC_MIN_INT;
120:   sf->mine     = NULL;
121:   sf->remote   = NULL;
122:   sf->graphset = PETSC_FALSE;
123:   PetscFree(sf->mine_alloc);
124:   PetscFree(sf->remote_alloc);
125:   sf->nranks = -1;
126:   PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote);
127:   sf->degreeknown = PETSC_FALSE;
128:   PetscFree(sf->degree);
129:   if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
130:   if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
131:   if (sf->multi) sf->multi->multi = NULL;
132:   PetscSFDestroy(&sf->multi);
133:   PetscLayoutDestroy(&sf->map);

135: #if defined(PETSC_HAVE_DEVICE)
136:   for (PetscInt i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]);
137: #endif

139:   sf->setupcalled = PETSC_FALSE;
140:   return 0;
141: }

143: /*@C
144:    PetscSFSetType - Set the `PetscSF` communication implementation

146:    Collective

148:    Input Parameters:
149: +  sf - the `PetscSF` context
150: -  type - a known method
151: .vb
152:     PETSCSFWINDOW - MPI-2/3 one-sided
153:     PETSCSFBASIC - basic implementation using MPI-1 two-sided
154: .ve

156:    Options Database Key:
157: .  -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor)

159:   Level: intermediate

161:    Notes:
162:    See "include/petscsf.h" for available methods (for instance)

164: .seealso: `PetscSFType`, `PetscSFCreate()`
165: @*/
166: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
167: {
168:   PetscBool match;
169:   PetscErrorCode (*r)(PetscSF);


174:   PetscObjectTypeCompare((PetscObject)sf, type, &match);
175:   if (match) return 0;

177:   PetscFunctionListFind(PetscSFList, type, &r);
179:   /* Destroy the previous PetscSF implementation context */
180:   PetscTryTypeMethod(sf, Destroy);
181:   PetscMemzero(sf->ops, sizeof(*sf->ops));
182:   PetscObjectChangeTypeName((PetscObject)sf, type);
183:   (*r)(sf);
184:   return 0;
185: }

187: /*@C
188:   PetscSFGetType - Get the `PetscSF` communication implementation

190:   Not Collective

192:   Input Parameter:
193: . sf  - the `PetscSF` context

195:   Output Parameter:
196: . type - the `PetscSF` type name

198:   Level: intermediate

200: .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
201: @*/
202: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203: {
206:   *type = ((PetscObject)sf)->type_name;
207:   return 0;
208: }

210: /*@C
211:    PetscSFDestroy - destroy star forest

213:    Collective

215:    Input Parameter:
216: .  sf - address of star forest

218:    Level: intermediate

220: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
221: @*/
222: PetscErrorCode PetscSFDestroy(PetscSF *sf)
223: {
224:   if (!*sf) return 0;
226:   if (--((PetscObject)(*sf))->refct > 0) {
227:     *sf = NULL;
228:     return 0;
229:   }
230:   PetscSFReset(*sf);
231:   PetscTryTypeMethod((*sf), Destroy);
232:   PetscSFDestroy(&(*sf)->vscat.lsf);
233:   if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
234:   PetscHeaderDestroy(sf);
235:   return 0;
236: }

238: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
239: {
240:   PetscInt           i, nleaves;
241:   PetscMPIInt        size;
242:   const PetscInt    *ilocal;
243:   const PetscSFNode *iremote;

245:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
246:   PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote);
247:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
248:   for (i = 0; i < nleaves; i++) {
249:     const PetscInt rank   = iremote[i].rank;
250:     const PetscInt remote = iremote[i].index;
251:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
255:   }
256:   return 0;
257: }

259: /*@
260:    PetscSFSetUp - set up communication structures

262:    Collective

264:    Input Parameter:
265: .  sf - star forest communication object

267:    Level: beginner

269: .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
270: @*/
271: PetscErrorCode PetscSFSetUp(PetscSF sf)
272: {
274:   PetscSFCheckGraphSet(sf, 1);
275:   if (sf->setupcalled) return 0;
276:   PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0);
277:   PetscSFCheckGraphValid_Private(sf);
278:   if (!((PetscObject)sf)->type_name) PetscSFSetType(sf, PETSCSFBASIC); /* Zero all sf->ops */
279:   PetscTryTypeMethod(sf, SetUp);
280: #if defined(PETSC_HAVE_CUDA)
281:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
282:     sf->ops->Malloc = PetscSFMalloc_CUDA;
283:     sf->ops->Free   = PetscSFFree_CUDA;
284:   }
285: #endif
286: #if defined(PETSC_HAVE_HIP)
287:   if (sf->backend == PETSCSF_BACKEND_HIP) {
288:     sf->ops->Malloc = PetscSFMalloc_HIP;
289:     sf->ops->Free   = PetscSFFree_HIP;
290:   }
291: #endif

293: #
294: #if defined(PETSC_HAVE_KOKKOS)
295:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
296:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
297:     sf->ops->Free   = PetscSFFree_Kokkos;
298:   }
299: #endif
300:   PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0);
301:   sf->setupcalled = PETSC_TRUE;
302:   return 0;
303: }

305: /*@
306:    PetscSFSetFromOptions - set `PetscSF` options using the options database

308:    Logically Collective

310:    Input Parameter:
311: .  sf - star forest

313:    Options Database Keys:
314: +  -sf_type               - implementation type, see PetscSFSetType()
315: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
316: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
317:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
318:                             If true, this option only works with -use_gpu_aware_mpi 1.
319: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
320:                                If true, this option only works with -use_gpu_aware_mpi 1.

322: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
323:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
324:                               the only available is kokkos.

326:    Level: intermediate

328: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
329: @*/
330: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
331: {
332:   PetscSFType deft;
333:   char        type[256];
334:   PetscBool   flg;

337:   PetscObjectOptionsBegin((PetscObject)sf);
338:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
339:   PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg);
340:   PetscSFSetType(sf, flg ? type : deft);
341:   PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL);
342: #if defined(PETSC_HAVE_DEVICE)
343:   {
344:     char      backendstr[32] = {0};
345:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
346:     /* Change the defaults set in PetscSFCreate() with command line options */
347:     PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL);
348:     PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL);
349:     PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set);
350:     PetscStrcasecmp("cuda", backendstr, &isCuda);
351:     PetscStrcasecmp("kokkos", backendstr, &isKokkos);
352:     PetscStrcasecmp("hip", backendstr, &isHip);
353:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
354:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
355:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
356:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
358:   #elif defined(PETSC_HAVE_KOKKOS)
360:   #endif
361:   }
362: #endif
363:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
364:   PetscOptionsEnd();
365:   return 0;
366: }

368: /*@
369:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

371:    Logically Collective

373:    Input Parameters:
374: +  sf - star forest
375: -  flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)

377:    Level: advanced

379: .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
380: @*/
381: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
382: {
386:   sf->rankorder = flg;
387:   return 0;
388: }

390: /*@C
391:    PetscSFSetGraph - Set a parallel star forest

393:    Collective

395:    Input Parameters:
396: +  sf - star forest
397: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
398: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
399: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
400: during setup in debug mode)
401: .  localmode - copy mode for ilocal
402: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
403: during setup in debug mode)
404: -  remotemode - copy mode for iremote

406:    Level: intermediate

408:    Notes:
409:    Leaf indices in ilocal must be unique, otherwise an error occurs.

411:    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
412:    In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
413:    PETSc might modify the respective array;
414:    if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy().
415:    Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

417:    Fortran Note:
418:    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.

420:    Developer Note:
421:    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
422:    This also allows to compare leaf sets of two SFs easily.

424: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
425: @*/
426: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
427: {
428:   PetscBool unique, contiguous;

435:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
436:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */

440:   if (sf->nroots >= 0) { /* Reset only if graph already set */
441:     PetscSFReset(sf);
442:   }

444:   PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0);

446:   sf->nroots  = nroots;
447:   sf->nleaves = nleaves;

449:   if (localmode == PETSC_COPY_VALUES && ilocal) {
450:     PetscInt *tlocal = NULL;

452:     PetscMalloc1(nleaves, &tlocal);
453:     PetscArraycpy(tlocal, ilocal, nleaves);
454:     ilocal = tlocal;
455:   }
456:   if (remotemode == PETSC_COPY_VALUES) {
457:     PetscSFNode *tremote = NULL;

459:     PetscMalloc1(nleaves, &tremote);
460:     PetscArraycpy(tremote, iremote, nleaves);
461:     iremote = tremote;
462:   }

464:   if (nleaves && ilocal) {
465:     PetscSFNode work;

467:     PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
468:     PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
469:     unique = PetscNot(unique);
471:     sf->minleaf = ilocal[0];
472:     sf->maxleaf = ilocal[nleaves - 1];
473:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
474:   } else {
475:     sf->minleaf = 0;
476:     sf->maxleaf = nleaves - 1;
477:     unique      = PETSC_TRUE;
478:     contiguous  = PETSC_TRUE;
479:   }

481:   if (contiguous) {
482:     if (localmode == PETSC_USE_POINTER) {
483:       ilocal = NULL;
484:     } else {
485:       PetscFree(ilocal);
486:     }
487:   }
488:   sf->mine = ilocal;
489:   if (localmode == PETSC_USE_POINTER) {
490:     sf->mine_alloc = NULL;
491:   } else {
492:     sf->mine_alloc = ilocal;
493:   }
494:   sf->remote = iremote;
495:   if (remotemode == PETSC_USE_POINTER) {
496:     sf->remote_alloc = NULL;
497:   } else {
498:     sf->remote_alloc = iremote;
499:   }
500:   PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0);
501:   sf->graphset = PETSC_TRUE;
502:   return 0;
503: }

505: /*@
506:   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern

508:   Collective

510:   Input Parameters:
511: + sf      - The `PetscSF`
512: . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
513: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`

515:   Level: intermediate

517:   Notes:
518:   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map.
519:   n and N are local and global sizes of x respectively.

521:   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to
522:   sequential vectors y on all ranks.

524:   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a
525:   sequential vector y on rank 0.

527:   In above cases, entries of x are roots and entries of y are leaves.

529:   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine
530:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
531:   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
532:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
533:   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.

535:   In this case, roots and leaves are symmetric.

537: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
538:  @*/
539: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
540: {
541:   MPI_Comm    comm;
542:   PetscInt    n, N, res[2];
543:   PetscMPIInt rank, size;
544:   PetscSFType type;

548:   PetscObjectGetComm((PetscObject)sf, &comm);
550:   MPI_Comm_rank(comm, &rank);
551:   MPI_Comm_size(comm, &size);

553:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
554:     type = PETSCSFALLTOALL;
555:     PetscLayoutCreate(comm, &sf->map);
556:     PetscLayoutSetLocalSize(sf->map, size);
557:     PetscLayoutSetSize(sf->map, ((PetscInt)size) * size);
558:     PetscLayoutSetUp(sf->map);
559:   } else {
560:     PetscLayoutGetLocalSize(map, &n);
561:     PetscLayoutGetSize(map, &N);
562:     res[0] = n;
563:     res[1] = -n;
564:     /* Check if n are same over all ranks so that we can optimize it */
565:     MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm);
566:     if (res[0] == -res[1]) { /* same n */
567:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
568:     } else {
569:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
570:     }
571:     PetscLayoutReference(map, &sf->map);
572:   }
573:   PetscSFSetType(sf, type);

575:   sf->pattern = pattern;
576:   sf->mine    = NULL; /* Contiguous */

578:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
579:      Also set other easy stuff.
580:    */
581:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
582:     sf->nleaves = N;
583:     sf->nroots  = n;
584:     sf->nranks  = size;
585:     sf->minleaf = 0;
586:     sf->maxleaf = N - 1;
587:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
588:     sf->nleaves = rank ? 0 : N;
589:     sf->nroots  = n;
590:     sf->nranks  = rank ? 0 : size;
591:     sf->minleaf = 0;
592:     sf->maxleaf = rank ? -1 : N - 1;
593:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594:     sf->nleaves = size;
595:     sf->nroots  = size;
596:     sf->nranks  = size;
597:     sf->minleaf = 0;
598:     sf->maxleaf = size - 1;
599:   }
600:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
601:   sf->graphset = PETSC_TRUE;
602:   return 0;
603: }

605: /*@
606:    PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map

608:    Collective

610:    Input Parameter:
611: .  sf - star forest to invert

613:    Output Parameter:
614: .  isf - inverse of sf

616:    Level: advanced

618:    Notes:
619:    All roots must have degree 1.

621:    The local space may be a permutation, but cannot be sparse.

623: .seealso: `PetscSFType`, `PetscSFSetGraph()`
624: @*/
625: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
626: {
627:   PetscMPIInt     rank;
628:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
629:   const PetscInt *ilocal;
630:   PetscSFNode    *roots, *leaves;

633:   PetscSFCheckGraphSet(sf, 1);

636:   PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL);
637:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

639:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
640:   PetscMalloc2(nroots, &roots, maxlocal, &leaves);
641:   for (i = 0; i < maxlocal; i++) {
642:     leaves[i].rank  = rank;
643:     leaves[i].index = i;
644:   }
645:   for (i = 0; i < nroots; i++) {
646:     roots[i].rank  = -1;
647:     roots[i].index = -1;
648:   }
649:   PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
650:   PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);

652:   /* Check whether our leaves are sparse */
653:   for (i = 0, count = 0; i < nroots; i++)
654:     if (roots[i].rank >= 0) count++;
655:   if (count == nroots) newilocal = NULL;
656:   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscMalloc1(count, &newilocal);
657:     for (i = 0, count = 0; i < nroots; i++) {
658:       if (roots[i].rank >= 0) {
659:         newilocal[count]   = i;
660:         roots[count].rank  = roots[i].rank;
661:         roots[count].index = roots[i].index;
662:         count++;
663:       }
664:     }
665:   }

667:   PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf);
668:   PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES);
669:   PetscFree2(roots, leaves);
670:   return 0;
671: }

673: /*@
674:    PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph

676:    Collective

678:    Input Parameters:
679: +  sf - communication object to duplicate
680: -  opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)

682:    Output Parameter:
683: .  newsf - new communication object

685:    Level: beginner

687: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
688: @*/
689: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
690: {
691:   PetscSFType  type;
692:   MPI_Datatype dtype = MPIU_SCALAR;

697:   PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf);
698:   PetscSFGetType(sf, &type);
699:   if (type) PetscSFSetType(*newsf, type);
700:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
701:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
702:     PetscSFCheckGraphSet(sf, 1);
703:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
704:       PetscInt           nroots, nleaves;
705:       const PetscInt    *ilocal;
706:       const PetscSFNode *iremote;
707:       PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
708:       PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
709:     } else {
710:       PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern);
711:     }
712:   }
713:   /* Since oldtype is committed, so is newtype, according to MPI */
714:   if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit, &dtype);
715:   (*newsf)->vscat.bs     = sf->vscat.bs;
716:   (*newsf)->vscat.unit   = dtype;
717:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
718:   (*newsf)->vscat.from_n = sf->vscat.from_n;
719:   /* Do not copy lsf. Build it on demand since it is rarely used */

721: #if defined(PETSC_HAVE_DEVICE)
722:   (*newsf)->backend              = sf->backend;
723:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
724:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
725:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
726: #endif
727:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
728:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
729:   return 0;
730: }

732: /*@C
733:    PetscSFGetGraph - Get the graph specifying a parallel star forest

735:    Not Collective

737:    Input Parameter:
738: .  sf - star forest

740:    Output Parameters:
741: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
742: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
743: .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
744: -  iremote - remote locations of root vertices for each leaf on the current process

746:    Level: intermediate

748:    Notes:
749:      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

751:      The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`,
752:      see its manpage for details.

754:    Fortran Notes:
755:      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
756:      want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array.

758:      To check for a NULL ilocal use
759: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

761: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
762: @*/
763: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
764: {
766:   if (sf->ops->GetGraph) {
767:     (sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote);
768:   } else {
769:     if (nroots) *nroots = sf->nroots;
770:     if (nleaves) *nleaves = sf->nleaves;
771:     if (ilocal) *ilocal = sf->mine;
772:     if (iremote) *iremote = sf->remote;
773:   }
774:   return 0;
775: }

777: /*@
778:    PetscSFGetLeafRange - Get the active leaf ranges

780:    Not Collective

782:    Input Parameter:
783: .  sf - star forest

785:    Output Parameters:
786: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
787: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

789:    Level: developer

791: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
792: @*/
793: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
794: {
796:   PetscSFCheckGraphSet(sf, 1);
797:   if (minleaf) *minleaf = sf->minleaf;
798:   if (maxleaf) *maxleaf = sf->maxleaf;
799:   return 0;
800: }

802: /*@C
803:    PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database

805:    Collective on A

807:    Input Parameters:
808: +  A - the star forest
809: .  obj - Optional object that provides the prefix for the option names
810: -  name - command line option

812:    Level: intermediate

814: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
815: @*/
816: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
817: {
819:   PetscObjectViewFromOptions((PetscObject)A, obj, name);
820:   return 0;
821: }

823: /*@C
824:    PetscSFView - view a star forest

826:    Collective

828:    Input Parameters:
829: +  sf - star forest
830: -  viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`

832:    Level: beginner

834: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
835: @*/
836: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
837: {
838:   PetscBool         iascii;
839:   PetscViewerFormat format;

842:   if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer);
845:   if (sf->graphset) PetscSFSetUp(sf);
846:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
847:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
848:     PetscMPIInt rank;
849:     PetscInt    ii, i, j;

851:     PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer);
852:     PetscViewerASCIIPushTab(viewer);
853:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
854:       if (!sf->graphset) {
855:         PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n");
856:         PetscViewerASCIIPopTab(viewer);
857:         return 0;
858:       }
859:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
860:       PetscViewerASCIIPushSynchronized(viewer);
861:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks);
862:       for (i = 0; i < sf->nleaves; i++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index);
863:       PetscViewerFlush(viewer);
864:       PetscViewerGetFormat(viewer, &format);
865:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
866:         PetscMPIInt *tmpranks, *perm;
867:         PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm);
868:         PetscArraycpy(tmpranks, sf->ranks, sf->nranks);
869:         for (i = 0; i < sf->nranks; i++) perm[i] = i;
870:         PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm);
871:         PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank);
872:         for (ii = 0; ii < sf->nranks; ii++) {
873:           i = perm[ii];
874:           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]);
875:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]);
876:         }
877:         PetscFree2(tmpranks, perm);
878:       }
879:       PetscViewerFlush(viewer);
880:       PetscViewerASCIIPopSynchronized(viewer);
881:     }
882:     PetscViewerASCIIPopTab(viewer);
883:   }
884:   PetscTryTypeMethod(sf, View, viewer);
885:   return 0;
886: }

888: /*@C
889:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

891:    Not Collective

893:    Input Parameter:
894: .  sf - star forest

896:    Output Parameters:
897: +  nranks - number of ranks referenced by local part
898: .  ranks - array of ranks
899: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
900: .  rmine - concatenated array holding local indices referencing each remote rank
901: -  rremote - concatenated array holding remote indices referenced for each remote rank

903:    Level: developer

905: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
906: @*/
907: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
908: {
911:   if (sf->ops->GetRootRanks) {
912:     (sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote);
913:   } else {
914:     /* The generic implementation */
915:     if (nranks) *nranks = sf->nranks;
916:     if (ranks) *ranks = sf->ranks;
917:     if (roffset) *roffset = sf->roffset;
918:     if (rmine) *rmine = sf->rmine;
919:     if (rremote) *rremote = sf->rremote;
920:   }
921:   return 0;
922: }

924: /*@C
925:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

927:    Not Collective

929:    Input Parameter:
930: .  sf - star forest

932:    Output Parameters:
933: +  niranks - number of leaf ranks referencing roots on this process
934: .  iranks - array of ranks
935: .  ioffset - offset in irootloc for each rank (length niranks+1)
936: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

938:    Level: developer

940: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
941: @*/
942: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
943: {
946:   if (sf->ops->GetLeafRanks) {
947:     (sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc);
948:   } else {
949:     PetscSFType type;
950:     PetscSFGetType(sf, &type);
951:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
952:   }
953:   return 0;
954: }

956: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
957: {
958:   PetscInt i;
959:   for (i = 0; i < n; i++) {
960:     if (needle == list[i]) return PETSC_TRUE;
961:   }
962:   return PETSC_FALSE;
963: }

965: /*@C
966:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.

968:    Collective

970:    Input Parameters:
971: +  sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
972: -  dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)

974:    Level: developer

976: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
977: @*/
978: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
979: {
980:   PetscTable         table;
981:   PetscTablePosition pos;
982:   PetscMPIInt        size, groupsize, *groupranks;
983:   PetscInt          *rcount, *ranks;
984:   PetscInt           i, irank = -1, orank = -1;

987:   PetscSFCheckGraphSet(sf, 1);
988:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
989:   PetscTableCreate(10, size, &table);
990:   for (i = 0; i < sf->nleaves; i++) {
991:     /* Log 1-based rank */
992:     PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES);
993:   }
994:   PetscTableGetCount(table, &sf->nranks);
995:   PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote);
996:   PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks);
997:   PetscTableGetHeadPosition(table, &pos);
998:   for (i = 0; i < sf->nranks; i++) {
999:     PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]);
1000:     ranks[i]--; /* Convert back to 0-based */
1001:   }
1002:   PetscTableDestroy(&table);

1004:   /* We expect that dgroup is reliably "small" while nranks could be large */
1005:   {
1006:     MPI_Group    group = MPI_GROUP_NULL;
1007:     PetscMPIInt *dgroupranks;
1008:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1009:     MPI_Group_size(dgroup, &groupsize);
1010:     PetscMalloc1(groupsize, &dgroupranks);
1011:     PetscMalloc1(groupsize, &groupranks);
1012:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1013:     if (groupsize) MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks);
1014:     MPI_Group_free(&group);
1015:     PetscFree(dgroupranks);
1016:   }

1018:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1019:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1020:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1021:       if (InList(ranks[i], groupsize, groupranks)) break;
1022:     }
1023:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1024:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1025:     }
1026:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1027:       PetscInt tmprank, tmpcount;

1029:       tmprank             = ranks[i];
1030:       tmpcount            = rcount[i];
1031:       ranks[i]            = ranks[sf->ndranks];
1032:       rcount[i]           = rcount[sf->ndranks];
1033:       ranks[sf->ndranks]  = tmprank;
1034:       rcount[sf->ndranks] = tmpcount;
1035:       sf->ndranks++;
1036:     }
1037:   }
1038:   PetscFree(groupranks);
1039:   PetscSortIntWithArray(sf->ndranks, ranks, rcount);
1040:   PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks);
1041:   sf->roffset[0] = 0;
1042:   for (i = 0; i < sf->nranks; i++) {
1043:     PetscMPIIntCast(ranks[i], sf->ranks + i);
1044:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1045:     rcount[i]          = 0;
1046:   }
1047:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1048:     /* short circuit */
1049:     if (orank != sf->remote[i].rank) {
1050:       /* Search for index of iremote[i].rank in sf->ranks */
1051:       PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank);
1052:       if (irank < 0) {
1053:         PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank);
1054:         if (irank >= 0) irank += sf->ndranks;
1055:       }
1056:       orank = sf->remote[i].rank;
1057:     }
1059:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1060:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1061:     rcount[irank]++;
1062:   }
1063:   PetscFree2(rcount, ranks);
1064:   return 0;
1065: }

1067: /*@C
1068:    PetscSFGetGroups - gets incoming and outgoing process groups

1070:    Collective

1072:    Input Parameter:
1073: .  sf - star forest

1075:    Output Parameters:
1076: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1077: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1079:    Level: developer

1081: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1082: @*/
1083: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1084: {
1085:   MPI_Group group = MPI_GROUP_NULL;

1088:   if (sf->ingroup == MPI_GROUP_NULL) {
1089:     PetscInt        i;
1090:     const PetscInt *indegree;
1091:     PetscMPIInt     rank, *outranks, *inranks;
1092:     PetscSFNode    *remote;
1093:     PetscSF         bgcount;

1095:     /* Compute the number of incoming ranks */
1096:     PetscMalloc1(sf->nranks, &remote);
1097:     for (i = 0; i < sf->nranks; i++) {
1098:       remote[i].rank  = sf->ranks[i];
1099:       remote[i].index = 0;
1100:     }
1101:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount);
1102:     PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1103:     PetscSFComputeDegreeBegin(bgcount, &indegree);
1104:     PetscSFComputeDegreeEnd(bgcount, &indegree);
1105:     /* Enumerate the incoming ranks */
1106:     PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks);
1107:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1108:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1109:     PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks);
1110:     PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks);
1111:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1112:     MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup);
1113:     MPI_Group_free(&group);
1114:     PetscFree2(inranks, outranks);
1115:     PetscSFDestroy(&bgcount);
1116:   }
1117:   *incoming = sf->ingroup;

1119:   if (sf->outgroup == MPI_GROUP_NULL) {
1120:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1121:     MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup);
1122:     MPI_Group_free(&group);
1123:   }
1124:   *outgoing = sf->outgroup;
1125:   return 0;
1126: }

1128: /*@
1129:    PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters

1131:    Collective

1133:    Input Parameter:
1134: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1136:    Output Parameter:
1137: .  multi - star forest with split roots, such that each root has degree exactly 1

1139:    Level: developer

1141:    Note:
1142:    In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1143:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1144:    edge, it is a candidate for future optimization that might involve its removal.

1146: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1147: @*/
1148: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1149: {
1152:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1153:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1154:     *multi           = sf->multi;
1155:     sf->multi->multi = sf->multi;
1156:     return 0;
1157:   }
1158:   if (!sf->multi) {
1159:     const PetscInt *indegree;
1160:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1161:     PetscSFNode    *remote;
1162:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1163:     PetscSFComputeDegreeBegin(sf, &indegree);
1164:     PetscSFComputeDegreeEnd(sf, &indegree);
1165:     PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset);
1166:     inoffset[0] = 0;
1167:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1168:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1169:     PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1170:     PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1171:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1172:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1174:     }
1175:     PetscMalloc1(sf->nleaves, &remote);
1176:     for (i = 0; i < sf->nleaves; i++) {
1177:       remote[i].rank  = sf->remote[i].rank;
1178:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1179:     }
1180:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1181:     sf->multi->multi = sf->multi;
1182:     PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1183:     if (sf->rankorder) { /* Sort the ranks */
1184:       PetscMPIInt  rank;
1185:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1186:       PetscSFNode *newremote;
1187:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1188:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1189:       PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset);
1190:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1191:       PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1192:       PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1193:       /* Sort the incoming ranks at each vertex, build the inverse map */
1194:       for (i = 0; i < sf->nroots; i++) {
1195:         PetscInt j;
1196:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1197:         PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset);
1198:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1199:       }
1200:       PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1201:       PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1202:       PetscMalloc1(sf->nleaves, &newremote);
1203:       for (i = 0; i < sf->nleaves; i++) {
1204:         newremote[i].rank  = sf->remote[i].rank;
1205:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1206:       }
1207:       PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER);
1208:       PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset);
1209:     }
1210:     PetscFree3(inoffset, outones, outoffset);
1211:   }
1212:   *multi = sf->multi;
1213:   return 0;
1214: }

1216: /*@C
1217:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1219:    Collective

1221:    Input Parameters:
1222: +  sf - original star forest
1223: .  nselected  - number of selected roots on this process
1224: -  selected   - indices of the selected roots on this process

1226:    Output Parameter:
1227: .  esf - new star forest

1229:    Level: advanced

1231:    Note:
1232:    To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1233:    be done by calling PetscSFGetGraph().

1235: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1236: @*/
1237: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1238: {
1239:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1240:   const PetscInt    *ilocal;
1241:   signed char       *rootdata, *leafdata, *leafmem;
1242:   const PetscSFNode *iremote;
1243:   PetscSFNode       *new_iremote;
1244:   MPI_Comm           comm;

1247:   PetscSFCheckGraphSet(sf, 1);

1251:   PetscSFSetUp(sf);
1252:   PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0);
1253:   PetscObjectGetComm((PetscObject)sf, &comm);
1254:   PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);

1256:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1257:     PetscBool dups;
1261:   }

1263:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1264:   else {
1265:     /* A generic version of creating embedded sf */
1266:     PetscSFGetLeafRange(sf, &minleaf, &maxleaf);
1267:     maxlocal = maxleaf - minleaf + 1;
1268:     PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem);
1269:     leafdata = leafmem - minleaf;
1270:     /* Tag selected roots and bcast to leaves */
1271:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1272:     PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1273:     PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);

1275:     /* Build esf with leaves that are still connected */
1276:     esf_nleaves = 0;
1277:     for (i = 0; i < nleaves; i++) {
1278:       j = ilocal ? ilocal[i] : i;
1279:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1280:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1281:       */
1282:       esf_nleaves += (leafdata[j] ? 1 : 0);
1283:     }
1284:     PetscMalloc1(esf_nleaves, &new_ilocal);
1285:     PetscMalloc1(esf_nleaves, &new_iremote);
1286:     for (i = n = 0; i < nleaves; i++) {
1287:       j = ilocal ? ilocal[i] : i;
1288:       if (leafdata[j]) {
1289:         new_ilocal[n]        = j;
1290:         new_iremote[n].rank  = iremote[i].rank;
1291:         new_iremote[n].index = iremote[i].index;
1292:         ++n;
1293:       }
1294:     }
1295:     PetscSFCreate(comm, esf);
1296:     PetscSFSetFromOptions(*esf);
1297:     PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1298:     PetscFree2(rootdata, leafmem);
1299:   }
1300:   PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0);
1301:   return 0;
1302: }

1304: /*@C
1305:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1307:   Collective

1309:   Input Parameters:
1310: + sf - original star forest
1311: . nselected  - number of selected leaves on this process
1312: - selected   - indices of the selected leaves on this process

1314:   Output Parameter:
1315: .  newsf - new star forest

1317:   Level: advanced

1319: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1320: @*/
1321: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1322: {
1323:   const PetscSFNode *iremote;
1324:   PetscSFNode       *new_iremote;
1325:   const PetscInt    *ilocal;
1326:   PetscInt           i, nroots, *leaves, *new_ilocal;
1327:   MPI_Comm           comm;

1330:   PetscSFCheckGraphSet(sf, 1);

1334:   /* Uniq selected[] and put results in leaves[] */
1335:   PetscObjectGetComm((PetscObject)sf, &comm);
1336:   PetscMalloc1(nselected, &leaves);
1337:   PetscArraycpy(leaves, selected, nselected);
1338:   PetscSortedRemoveDupsInt(&nselected, leaves);

1341:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1342:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1343:   else {
1344:     PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote);
1345:     PetscMalloc1(nselected, &new_ilocal);
1346:     PetscMalloc1(nselected, &new_iremote);
1347:     for (i = 0; i < nselected; ++i) {
1348:       const PetscInt l     = leaves[i];
1349:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1350:       new_iremote[i].rank  = iremote[l].rank;
1351:       new_iremote[i].index = iremote[l].index;
1352:     }
1353:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf);
1354:     PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1355:   }
1356:   PetscFree(leaves);
1357:   return 0;
1358: }

1360: /*@C
1361:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`

1363:    Collective

1365:    Input Parameters:
1366: +  sf - star forest on which to communicate
1367: .  unit - data type associated with each node
1368: .  rootdata - buffer to broadcast
1369: -  op - operation to use for reduction

1371:    Output Parameter:
1372: .  leafdata - buffer to be reduced with values from each leaf's respective root

1374:    Level: intermediate

1376:    Notes:
1377:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1378:     are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1379:     use `PetscSFBcastWithMemTypeBegin()` instead.

1381: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1382: @*/
1383: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1384: {
1385:   PetscMemType rootmtype, leafmtype;

1388:   PetscSFSetUp(sf);
1389:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1390:   PetscGetMemType(rootdata, &rootmtype);
1391:   PetscGetMemType(leafdata, &leafmtype);
1392:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1393:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1394:   return 0;
1395: }

1397: /*@C
1398:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()`

1400:    Collective

1402:    Input Parameters:
1403: +  sf - star forest on which to communicate
1404: .  unit - data type associated with each node
1405: .  rootmtype - memory type of rootdata
1406: .  rootdata - buffer to broadcast
1407: .  leafmtype - memory type of leafdata
1408: -  op - operation to use for reduction

1410:    Output Parameter:
1411: .  leafdata - buffer to be reduced with values from each leaf's respective root

1413:    Level: intermediate

1415: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1416: @*/
1417: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1418: {
1420:   PetscSFSetUp(sf);
1421:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1422:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1423:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1424:   return 0;
1425: }

1427: /*@C
1428:    PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()`

1430:    Collective

1432:    Input Parameters:
1433: +  sf - star forest
1434: .  unit - data type
1435: .  rootdata - buffer to broadcast
1436: -  op - operation to use for reduction

1438:    Output Parameter:
1439: .  leafdata - buffer to be reduced with values from each leaf's respective root

1441:    Level: intermediate

1443: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1444: @*/
1445: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1446: {
1448:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0);
1449:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1450:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0);
1451:   return 0;
1452: }

1454: /*@C
1455:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`

1457:    Collective

1459:    Input Parameters:
1460: +  sf - star forest
1461: .  unit - data type
1462: .  leafdata - values to reduce
1463: -  op - reduction operation

1465:    Output Parameter:
1466: .  rootdata - result of reduction of values from all leaves of each root

1468:    Level: intermediate

1470:    Notes:
1471:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1472:     are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1473:     use `PetscSFReduceWithMemTypeBegin()` instead.

1475: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1476: @*/
1477: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1478: {
1479:   PetscMemType rootmtype, leafmtype;

1482:   PetscSFSetUp(sf);
1483:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1484:   PetscGetMemType(rootdata, &rootmtype);
1485:   PetscGetMemType(leafdata, &leafmtype);
1486:   (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1487:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1488:   return 0;
1489: }

1491: /*@C
1492:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`

1494:    Collective

1496:    Input Parameters:
1497: +  sf - star forest
1498: .  unit - data type
1499: .  leafmtype - memory type of leafdata
1500: .  leafdata - values to reduce
1501: .  rootmtype - memory type of rootdata
1502: -  op - reduction operation

1504:    Output Parameter:
1505: .  rootdata - result of reduction of values from all leaves of each root

1507:    Level: intermediate

1509: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1510: @*/
1511: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1512: {
1514:   PetscSFSetUp(sf);
1515:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1516:   (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1517:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1518:   return 0;
1519: }

1521: /*@C
1522:    PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()`

1524:    Collective

1526:    Input Parameters:
1527: +  sf - star forest
1528: .  unit - data type
1529: .  leafdata - values to reduce
1530: -  op - reduction operation

1532:    Output Parameter:
1533: .  rootdata - result of reduction of values from all leaves of each root

1535:    Level: intermediate

1537: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1538: @*/
1539: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1540: {
1542:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1543:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1544:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1545:   return 0;
1546: }

1548: /*@C
1549:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1550:    to be completed with `PetscSFFetchAndOpEnd()`

1552:    Collective

1554:    Input Parameters:
1555: +  sf - star forest
1556: .  unit - data type
1557: .  leafdata - leaf values to use in reduction
1558: -  op - operation to use for reduction

1560:    Output Parameters:
1561: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1562: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1564:    Level: advanced

1566:    Note:
1567:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1568:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1569:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1570:    integers.

1572: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1573: @*/
1574: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1575: {
1576:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1579:   PetscSFSetUp(sf);
1580:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1581:   PetscGetMemType(rootdata, &rootmtype);
1582:   PetscGetMemType(leafdata, &leafmtype);
1583:   PetscGetMemType(leafupdate, &leafupdatemtype);
1585:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1586:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1587:   return 0;
1588: }

1590: /*@C
1591:    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1592:    applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`

1594:    Collective

1596:    Input Parameters:
1597: +  sf - star forest
1598: .  unit - data type
1599: .  rootmtype - memory type of rootdata
1600: .  leafmtype - memory type of leafdata
1601: .  leafdata - leaf values to use in reduction
1602: .  leafupdatemtype - memory type of leafupdate
1603: -  op - operation to use for reduction

1605:    Output Parameters:
1606: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1607: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1609:    Level: advanced

1611:    Note:
1612:    See `PetscSFFetchAndOpBegin()` for more details.

1614: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1615: @*/
1616: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1617: {
1619:   PetscSFSetUp(sf);
1620:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1622:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1623:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1624:   return 0;
1625: }

1627: /*@C
1628:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1630:    Collective

1632:    Input Parameters:
1633: +  sf - star forest
1634: .  unit - data type
1635: .  leafdata - leaf values to use in reduction
1636: -  op - operation to use for reduction

1638:    Output Parameters:
1639: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1640: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1642:    Level: advanced

1644: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1645: @*/
1646: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1647: {
1649:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1650:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1651:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1652:   return 0;
1653: }

1655: /*@C
1656:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`

1658:    Collective

1660:    Input Parameter:
1661: .  sf - star forest

1663:    Output Parameter:
1664: .  degree - degree of each root vertex

1666:    Level: advanced

1668:    Note:
1669:    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.

1671: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1672: @*/
1673: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1674: {
1676:   PetscSFCheckGraphSet(sf, 1);
1678:   if (!sf->degreeknown) {
1679:     PetscInt i, nroots = sf->nroots, maxlocal;
1681:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1682:     PetscMalloc1(nroots, &sf->degree);
1683:     PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1684:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1685:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1686:     PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1687:   }
1688:   *degree = NULL;
1689:   return 0;
1690: }

1692: /*@C
1693:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`

1695:    Collective

1697:    Input Parameter:
1698: .  sf - star forest

1700:    Output Parameter:
1701: .  degree - degree of each root vertex

1703:    Level: developer

1705:    Note:
1706:    The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.

1708: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1709: @*/
1710: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1711: {
1713:   PetscSFCheckGraphSet(sf, 1);
1715:   if (!sf->degreeknown) {
1717:     PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1718:     PetscFree(sf->degreetmp);
1719:     sf->degreeknown = PETSC_TRUE;
1720:   }
1721:   *degree = sf->degree;
1722:   return 0;
1723: }

1725: /*@C
1726:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`).
1727:    Each multi-root is assigned index of the corresponding original root.

1729:    Collective

1731:    Input Parameters:
1732: +  sf - star forest
1733: -  degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`

1735:    Output Parameters:
1736: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1737: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1739:    Level: developer

1741:    Note:
1742:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed.

1744: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1745: @*/
1746: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1747: {
1748:   PetscSF  msf;
1749:   PetscInt i, j, k, nroots, nmroots;

1752:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1756:   PetscSFGetMultiSF(sf, &msf);
1757:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1758:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1759:   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1760:     if (!degree[i]) continue;
1761:     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1762:   }
1764:   if (nMultiRoots) *nMultiRoots = nmroots;
1765:   return 0;
1766: }

1768: /*@C
1769:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`

1771:    Collective

1773:    Input Parameters:
1774: +  sf - star forest
1775: .  unit - data type
1776: -  leafdata - leaf data to gather to roots

1778:    Output Parameter:
1779: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1781:    Level: intermediate

1783: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1784: @*/
1785: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1786: {
1787:   PetscSF multi = NULL;

1790:   PetscSFSetUp(sf);
1791:   PetscSFGetMultiSF(sf, &multi);
1792:   PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1793:   return 0;
1794: }

1796: /*@C
1797:    PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`

1799:    Collective

1801:    Input Parameters:
1802: +  sf - star forest
1803: .  unit - data type
1804: -  leafdata - leaf data to gather to roots

1806:    Output Parameter:
1807: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1809:    Level: intermediate

1811: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1812: @*/
1813: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1814: {
1815:   PetscSF multi = NULL;

1818:   PetscSFGetMultiSF(sf, &multi);
1819:   PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1820:   return 0;
1821: }

1823: /*@C
1824:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`

1826:    Collective

1828:    Input Parameters:
1829: +  sf - star forest
1830: .  unit - data type
1831: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1833:    Output Parameter:
1834: .  leafdata - leaf data to be update with personal data from each respective root

1836:    Level: intermediate

1838: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1839: @*/
1840: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1841: {
1842:   PetscSF multi = NULL;

1845:   PetscSFSetUp(sf);
1846:   PetscSFGetMultiSF(sf, &multi);
1847:   PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1848:   return 0;
1849: }

1851: /*@C
1852:    PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`

1854:    Collective

1856:    Input Parameters:
1857: +  sf - star forest
1858: .  unit - data type
1859: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1861:    Output Parameter:
1862: .  leafdata - leaf data to be update with personal data from each respective root

1864:    Level: intermediate

1866: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1867: @*/
1868: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1869: {
1870:   PetscSF multi = NULL;

1873:   PetscSFGetMultiSF(sf, &multi);
1874:   PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1875:   return 0;
1876: }

1878: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1879: {
1880:   PetscInt        i, n, nleaves;
1881:   const PetscInt *ilocal = NULL;
1882:   PetscHSetI      seen;

1884:   if (PetscDefined(USE_DEBUG)) {
1885:     PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL);
1886:     PetscHSetICreate(&seen);
1887:     for (i = 0; i < nleaves; i++) {
1888:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1889:       PetscHSetIAdd(seen, leaf);
1890:     }
1891:     PetscHSetIGetSize(seen, &n);
1893:     PetscHSetIDestroy(&seen);
1894:   }
1895:   return 0;
1896: }

1898: /*@
1899:   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view

1901:   Input Parameters:
1902: + sfA - The first `PetscSF`
1903: - sfB - The second `PetscSF`

1905:   Output Parameters:
1906: . sfBA - The composite `PetscSF`

1908:   Level: developer

1910:   Notes:
1911:   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1912:   forests, i.e. the same leaf is not connected with different roots.

1914:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1915:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1916:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1917:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1919: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1920: @*/
1921: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1922: {
1923:   const PetscSFNode *remotePointsA, *remotePointsB;
1924:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1925:   const PetscInt    *localPointsA, *localPointsB;
1926:   PetscInt          *localPointsBA;
1927:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1928:   PetscBool          denseB;

1931:   PetscSFCheckGraphSet(sfA, 1);
1933:   PetscSFCheckGraphSet(sfB, 2);
1936:   PetscSFCheckLeavesUnique_Private(sfA);
1937:   PetscSFCheckLeavesUnique_Private(sfB);

1939:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1940:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1941:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size       */
1942:   /* numRootsB; otherwise, garbage will be broadcasted.                                   */
1943:   /* Example (comm size = 1):                                                             */
1944:   /* sfA: 0 <- (0, 0)                                                                     */
1945:   /* sfB: 100 <- (0, 0)                                                                   */
1946:   /*      101 <- (0, 1)                                                                   */
1947:   /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget  */
1948:   /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
1949:   /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on  */
1950:   /* remotePointsA; if not recasted, point 101 would receive a garbage value.             */
1951:   PetscMalloc1(numRootsB, &reorderedRemotePointsA);
1952:   for (i = 0; i < numRootsB; i++) {
1953:     reorderedRemotePointsA[i].rank  = -1;
1954:     reorderedRemotePointsA[i].index = -1;
1955:   }
1956:   for (i = 0; i < numLeavesA; i++) {
1957:     PetscInt localp = localPointsA ? localPointsA[i] : i;

1959:     if (localp >= numRootsB) continue;
1960:     reorderedRemotePointsA[localp] = remotePointsA[i];
1961:   }
1962:   remotePointsA = reorderedRemotePointsA;
1963:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1964:   PetscMalloc1(maxleaf - minleaf + 1, &leafdataB);
1965:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
1966:     leafdataB[i].rank  = -1;
1967:     leafdataB[i].index = -1;
1968:   }
1969:   PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1970:   PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1971:   PetscFree(reorderedRemotePointsA);

1973:   denseB = (PetscBool)!localPointsB;
1974:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1975:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
1976:     else numLeavesBA++;
1977:   }
1978:   if (denseB) {
1979:     localPointsBA  = NULL;
1980:     remotePointsBA = leafdataB;
1981:   } else {
1982:     PetscMalloc1(numLeavesBA, &localPointsBA);
1983:     PetscMalloc1(numLeavesBA, &remotePointsBA);
1984:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1985:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1987:       if (leafdataB[l - minleaf].rank == -1) continue;
1988:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
1989:       localPointsBA[numLeavesBA]  = l;
1990:       numLeavesBA++;
1991:     }
1992:     PetscFree(leafdataB);
1993:   }
1994:   PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
1995:   PetscSFSetFromOptions(*sfBA);
1996:   PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
1997:   return 0;
1998: }

2000: /*@
2001:   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one

2003:   Input Parameters:
2004: + sfA - The first `PetscSF`
2005: - sfB - The second `PetscSF`

2007:   Output Parameters:
2008: . sfBA - The composite `PetscSF`.

2010:   Level: developer

2012:   Notes:
2013:   Currently, the two SFs must be defined on congruent communicators and they must be true star
2014:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2015:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

2017:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2018:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2019:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2020:   a Reduce on sfB, on connected roots.

2022: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2023: @*/
2024: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2025: {
2026:   const PetscSFNode *remotePointsA, *remotePointsB;
2027:   PetscSFNode       *remotePointsBA;
2028:   const PetscInt    *localPointsA, *localPointsB;
2029:   PetscSFNode       *reorderedRemotePointsA = NULL;
2030:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2031:   MPI_Op             op;
2032: #if defined(PETSC_USE_64BIT_INDICES)
2033:   PetscBool iswin;
2034: #endif

2037:   PetscSFCheckGraphSet(sfA, 1);
2039:   PetscSFCheckGraphSet(sfB, 2);
2042:   PetscSFCheckLeavesUnique_Private(sfA);
2043:   PetscSFCheckLeavesUnique_Private(sfB);

2045:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2046:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2048:   /* TODO: Check roots of sfB have degree of 1 */
2049:   /* Once we implement it, we can replace the MPI_MAXLOC
2050:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2051:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2052:      the root condition is not meet.
2053:    */
2054:   op = MPI_MAXLOC;
2055: #if defined(PETSC_USE_64BIT_INDICES)
2056:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2057:   PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin);
2058:   if (iswin) op = MPI_REPLACE;
2059: #endif

2061:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2062:   PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA);
2063:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2064:     reorderedRemotePointsA[i].rank  = -1;
2065:     reorderedRemotePointsA[i].index = -1;
2066:   }
2067:   if (localPointsA) {
2068:     for (i = 0; i < numLeavesA; i++) {
2069:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2070:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2071:     }
2072:   } else {
2073:     for (i = 0; i < numLeavesA; i++) {
2074:       if (i > maxleaf || i < minleaf) continue;
2075:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2076:     }
2077:   }

2079:   PetscMalloc1(numRootsB, &localPointsBA);
2080:   PetscMalloc1(numRootsB, &remotePointsBA);
2081:   for (i = 0; i < numRootsB; i++) {
2082:     remotePointsBA[i].rank  = -1;
2083:     remotePointsBA[i].index = -1;
2084:   }

2086:   PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2087:   PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2088:   PetscFree(reorderedRemotePointsA);
2089:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2090:     if (remotePointsBA[i].rank == -1) continue;
2091:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2092:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2093:     localPointsBA[numLeavesBA]        = i;
2094:     numLeavesBA++;
2095:   }
2096:   PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
2097:   PetscSFSetFromOptions(*sfBA);
2098:   PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
2099:   return 0;
2100: }

2102: /*
2103:   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`

2105:   Input Parameters:
2106: . sf - The global `PetscSF`

2108:   Output Parameters:
2109: . out - The local `PetscSF`

2111: .seealso: `PetscSF`, `PetscSFCreate()`
2112:  */
2113: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2114: {
2115:   MPI_Comm           comm;
2116:   PetscMPIInt        myrank;
2117:   const PetscInt    *ilocal;
2118:   const PetscSFNode *iremote;
2119:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2120:   PetscSFNode       *liremote;
2121:   PetscSF            lsf;

2124:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2125:   else {
2126:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2127:     PetscObjectGetComm((PetscObject)sf, &comm);
2128:     MPI_Comm_rank(comm, &myrank);

2130:     /* Find out local edges and build a local SF */
2131:     PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
2132:     for (i = lnleaves = 0; i < nleaves; i++) {
2133:       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2134:     }
2135:     PetscMalloc1(lnleaves, &lilocal);
2136:     PetscMalloc1(lnleaves, &liremote);

2138:     for (i = j = 0; i < nleaves; i++) {
2139:       if (iremote[i].rank == (PetscInt)myrank) {
2140:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2141:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2142:         liremote[j].index = iremote[i].index;
2143:         j++;
2144:       }
2145:     }
2146:     PetscSFCreate(PETSC_COMM_SELF, &lsf);
2147:     PetscSFSetFromOptions(lsf);
2148:     PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER);
2149:     PetscSFSetUp(lsf);
2150:     *out = lsf;
2151:   }
2152:   return 0;
2153: }

2155: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2156: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2157: {
2158:   PetscMemType rootmtype, leafmtype;

2161:   PetscSFSetUp(sf);
2162:   PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
2163:   PetscGetMemType(rootdata, &rootmtype);
2164:   PetscGetMemType(leafdata, &leafmtype);
2165:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2166:   PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
2167:   return 0;
2168: }

2170: /*@
2171:   PetscSFConcatenate - concatenate multiple `PetscSF` into one

2173:   Input Parameters:
2174: + comm - the communicator
2175: . nsfs - the number of input `PetscSF`
2176: . sfs  - the array of input `PetscSF`
2177: . shareRoots - the flag whether roots of input `PetscSF` are taken as shared (`PETSC_TRUE`), or separate and concatenated (`PETSC_FALSE`)
2178: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage

2180:   Output Parameters:
2181: . newsf - The resulting `PetscSF`

2183:   Level: developer

2185:   Notes:
2186:   The communicator of all SFs in sfs must be comm.

2188:   The offsets in leafOffsets are added to the original leaf indices.

2190:   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2191:   In this case, NULL is also passed as ilocal to the resulting SF.

2193:   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2194:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2196: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2197: @*/
2198: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2199: {
2200:   PetscInt     i, s, nLeaves, nRoots;
2201:   PetscInt    *leafArrayOffsets;
2202:   PetscInt    *ilocal_new;
2203:   PetscSFNode *iremote_new;
2204:   PetscInt    *rootOffsets;
2205:   PetscBool    all_ilocal_null = PETSC_FALSE;
2206:   PetscMPIInt  rank;

2208:   {
2209:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2211:     PetscSFCreate(comm, &dummy);
2214:     for (i = 0; i < nsfs; i++) {
2217:     }
2221:     PetscSFDestroy(&dummy);
2222:   }
2223:   if (!nsfs) {
2224:     PetscSFCreate(comm, newsf);
2225:     PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2226:     return 0;
2227:   }
2228:   MPI_Comm_rank(comm, &rank);

2230:   PetscCalloc1(nsfs + 1, &rootOffsets);
2231:   if (shareRoots) {
2232:     PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2233:     if (PetscDefined(USE_DEBUG)) {
2234:       for (s = 1; s < nsfs; s++) {
2235:         PetscInt nr;

2237:         PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2239:       }
2240:     }
2241:   } else {
2242:     for (s = 0; s < nsfs; s++) {
2243:       PetscInt nr;

2245:       PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2246:       rootOffsets[s + 1] = rootOffsets[s] + nr;
2247:     }
2248:     nRoots = rootOffsets[nsfs];
2249:   }

2251:   /* Calculate leaf array offsets and automatic root offsets */
2252:   PetscMalloc1(nsfs + 1, &leafArrayOffsets);
2253:   leafArrayOffsets[0] = 0;
2254:   for (s = 0; s < nsfs; s++) {
2255:     PetscInt nl;

2257:     PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2258:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2259:   }
2260:   nLeaves = leafArrayOffsets[nsfs];

2262:   if (!leafOffsets) {
2263:     all_ilocal_null = PETSC_TRUE;
2264:     for (s = 0; s < nsfs; s++) {
2265:       const PetscInt *ilocal;

2267:       PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2268:       if (ilocal) {
2269:         all_ilocal_null = PETSC_FALSE;
2270:         break;
2271:       }
2272:     }
2274:   }

2276:   /* Renumber and concatenate local leaves */
2277:   ilocal_new = NULL;
2278:   if (!all_ilocal_null) {
2279:     PetscMalloc1(nLeaves, &ilocal_new);
2280:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2281:     for (s = 0; s < nsfs; s++) {
2282:       const PetscInt *ilocal;
2283:       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2284:       PetscInt        i, nleaves_l;

2286:       PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2287:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2288:     }
2289:   }

2291:   /* Renumber and concatenate remote roots */
2292:   PetscMalloc1(nLeaves, &iremote_new);
2293:   for (i = 0; i < nLeaves; i++) {
2294:     iremote_new[i].rank  = -1;
2295:     iremote_new[i].index = -1;
2296:   }
2297:   for (s = 0; s < nsfs; s++) {
2298:     PetscInt           i, nl, nr;
2299:     PetscSF            tmp_sf;
2300:     const PetscSFNode *iremote;
2301:     PetscSFNode       *tmp_rootdata;
2302:     PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2304:     PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2305:     PetscSFCreate(comm, &tmp_sf);
2306:     /* create helper SF with contiguous leaves */
2307:     PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
2308:     PetscSFSetUp(tmp_sf);
2309:     PetscMalloc1(nr, &tmp_rootdata);
2310:     for (i = 0; i < nr; i++) {
2311:       tmp_rootdata[i].index = i + rootOffsets[s];
2312:       tmp_rootdata[i].rank  = (PetscInt)rank;
2313:     }
2314:     PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2315:     PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2316:     PetscSFDestroy(&tmp_sf);
2317:     PetscFree(tmp_rootdata);
2318:   }

2320:   /* Build the new SF */
2321:   PetscSFCreate(comm, newsf);
2322:   PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2323:   PetscSFSetUp(*newsf);
2324:   PetscFree(rootOffsets);
2325:   PetscFree(leafArrayOffsets);
2326:   return 0;
2327: }