Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petscctable.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: void PetscSFCheckGraphSet(PetscSF, int);
 16: #else
 17:   #if defined(PETSC_USE_DEBUG)
 19:   #else
 20:     #define PetscSFCheckGraphSet(sf, arg) \
 21:       do { \
 22:       } while (0)
 23:   #endif
 24: #endif

 26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};

 28: /*@
 29:    PetscSFCreate - create a star forest communication context

 31:    Collective

 33:    Input Parameter:
 34: .  comm - communicator on which the star forest will operate

 36:    Output Parameter:
 37: .  sf - new star forest context

 39:    Options Database Keys:
 40: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 41: .  -sf_type window    -Use MPI-3 one-sided window for communication
 42: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 44:    Level: intermediate

 46:    Notes:
 47:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 48:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 49:    SFs are optimized and they have better performance than general SFs.

 51: .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
 52: @*/
 53: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
 54: {
 55:   PetscSF b;

 58:   PetscSFInitializePackage();

 60:   PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView);

 62:   b->nroots    = -1;
 63:   b->nleaves   = -1;
 64:   b->minleaf   = PETSC_MAX_INT;
 65:   b->maxleaf   = PETSC_MIN_INT;
 66:   b->nranks    = -1;
 67:   b->rankorder = PETSC_TRUE;
 68:   b->ingroup   = MPI_GROUP_NULL;
 69:   b->outgroup  = MPI_GROUP_NULL;
 70:   b->graphset  = PETSC_FALSE;
 71: #if defined(PETSC_HAVE_DEVICE)
 72:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 73:   b->use_stream_aware_mpi = PETSC_FALSE;
 74:   b->unknown_input_stream = PETSC_FALSE;
 75:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 76:   b->backend = PETSCSF_BACKEND_KOKKOS;
 77:   #elif defined(PETSC_HAVE_CUDA)
 78:   b->backend = PETSCSF_BACKEND_CUDA;
 79:   #elif defined(PETSC_HAVE_HIP)
 80:   b->backend = PETSCSF_BACKEND_HIP;
 81:   #endif

 83:   #if defined(PETSC_HAVE_NVSHMEM)
 84:   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 85:   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 86:   PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL);
 87:   PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL);
 88:   #endif
 89: #endif
 90:   b->vscat.from_n = -1;
 91:   b->vscat.to_n   = -1;
 92:   b->vscat.unit   = MPIU_SCALAR;
 93:   *sf             = b;
 94:   return 0;
 95: }

 97: /*@
 98:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

100:    Collective

102:    Input Parameter:
103: .  sf - star forest

105:    Level: advanced

107: .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
108: @*/
109: PetscErrorCode PetscSFReset(PetscSF sf)
110: {
112:   PetscTryTypeMethod(sf, Reset);
113:   sf->nroots   = -1;
114:   sf->nleaves  = -1;
115:   sf->minleaf  = PETSC_MAX_INT;
116:   sf->maxleaf  = PETSC_MIN_INT;
117:   sf->mine     = NULL;
118:   sf->remote   = NULL;
119:   sf->graphset = PETSC_FALSE;
120:   PetscFree(sf->mine_alloc);
121:   PetscFree(sf->remote_alloc);
122:   sf->nranks = -1;
123:   PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote);
124:   sf->degreeknown = PETSC_FALSE;
125:   PetscFree(sf->degree);
126:   if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
127:   if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
128:   if (sf->multi) sf->multi->multi = NULL;
129:   PetscSFDestroy(&sf->multi);
130:   PetscLayoutDestroy(&sf->map);

132: #if defined(PETSC_HAVE_DEVICE)
133:   for (PetscInt i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]);
134: #endif

136:   sf->setupcalled = PETSC_FALSE;
137:   return 0;
138: }

140: /*@C
141:    PetscSFSetType - Set the PetscSF communication implementation

143:    Collective on PetscSF

145:    Input Parameters:
146: +  sf - the PetscSF context
147: -  type - a known method

149:    Options Database Key:
150: .  -sf_type <type> - Sets the method; use -help for a list
151:    of available methods (for instance, window, basic, neighbor)

153:    Notes:
154:    See "include/petscsf.h" for available methods (for instance)
155: +    PETSCSFWINDOW - MPI-2/3 one-sided
156: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

158:   Level: intermediate

160: .seealso: `PetscSFType`, `PetscSFCreate()`
161: @*/
162: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
163: {
164:   PetscBool match;
165:   PetscErrorCode (*r)(PetscSF);


170:   PetscObjectTypeCompare((PetscObject)sf, type, &match);
171:   if (match) return 0;

173:   PetscFunctionListFind(PetscSFList, type, &r);
175:   /* Destroy the previous PetscSF implementation context */
176:   PetscTryTypeMethod(sf, Destroy);
177:   PetscMemzero(sf->ops, sizeof(*sf->ops));
178:   PetscObjectChangeTypeName((PetscObject)sf, type);
179:   (*r)(sf);
180:   return 0;
181: }

183: /*@C
184:   PetscSFGetType - Get the PetscSF communication implementation

186:   Not Collective

188:   Input Parameter:
189: . sf  - the PetscSF context

191:   Output Parameter:
192: . type - the PetscSF type name

194:   Level: intermediate

196: .seealso: `PetscSFSetType()`, `PetscSFCreate()`
197: @*/
198: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
199: {
202:   *type = ((PetscObject)sf)->type_name;
203:   return 0;
204: }

206: /*@C
207:    PetscSFDestroy - destroy star forest

209:    Collective

211:    Input Parameter:
212: .  sf - address of star forest

214:    Level: intermediate

216: .seealso: `PetscSFCreate()`, `PetscSFReset()`
217: @*/
218: PetscErrorCode PetscSFDestroy(PetscSF *sf)
219: {
220:   if (!*sf) return 0;
222:   if (--((PetscObject)(*sf))->refct > 0) {
223:     *sf = NULL;
224:     return 0;
225:   }
226:   PetscSFReset(*sf);
227:   PetscTryTypeMethod((*sf), Destroy);
228:   PetscSFDestroy(&(*sf)->vscat.lsf);
229:   if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
230:   PetscHeaderDestroy(sf);
231:   return 0;
232: }

234: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
235: {
236:   PetscInt           i, nleaves;
237:   PetscMPIInt        size;
238:   const PetscInt    *ilocal;
239:   const PetscSFNode *iremote;

241:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
242:   PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote);
243:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
244:   for (i = 0; i < nleaves; i++) {
245:     const PetscInt rank   = iremote[i].rank;
246:     const PetscInt remote = iremote[i].index;
247:     const PetscInt leaf   = ilocal ? ilocal[i] : i;
251:   }
252:   return 0;
253: }

255: /*@
256:    PetscSFSetUp - set up communication structures

258:    Collective

260:    Input Parameter:
261: .  sf - star forest communication object

263:    Level: beginner

265: .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
266: @*/
267: PetscErrorCode PetscSFSetUp(PetscSF sf)
268: {
270:   PetscSFCheckGraphSet(sf, 1);
271:   if (sf->setupcalled) return 0;
272:   PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0);
273:   PetscSFCheckGraphValid_Private(sf);
274:   if (!((PetscObject)sf)->type_name) PetscSFSetType(sf, PETSCSFBASIC); /* Zero all sf->ops */
275:   PetscTryTypeMethod(sf, SetUp);
276: #if defined(PETSC_HAVE_CUDA)
277:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
278:     sf->ops->Malloc = PetscSFMalloc_CUDA;
279:     sf->ops->Free   = PetscSFFree_CUDA;
280:   }
281: #endif
282: #if defined(PETSC_HAVE_HIP)
283:   if (sf->backend == PETSCSF_BACKEND_HIP) {
284:     sf->ops->Malloc = PetscSFMalloc_HIP;
285:     sf->ops->Free   = PetscSFFree_HIP;
286:   }
287: #endif

289: #
290: #if defined(PETSC_HAVE_KOKKOS)
291:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
292:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
293:     sf->ops->Free   = PetscSFFree_Kokkos;
294:   }
295: #endif
296:   PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0);
297:   sf->setupcalled = PETSC_TRUE;
298:   return 0;
299: }

301: /*@
302:    PetscSFSetFromOptions - set PetscSF options using the options database

304:    Logically Collective

306:    Input Parameter:
307: .  sf - star forest

309:    Options Database Keys:
310: +  -sf_type               - implementation type, see PetscSFSetType()
311: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
312: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
313:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
314:                             If true, this option only works with -use_gpu_aware_mpi 1.
315: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
316:                                If true, this option only works with -use_gpu_aware_mpi 1.

318: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
319:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
320:                               the only available is kokkos.

322:    Level: intermediate
323: @*/
324: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
325: {
326:   PetscSFType deft;
327:   char        type[256];
328:   PetscBool   flg;

331:   PetscObjectOptionsBegin((PetscObject)sf);
332:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
333:   PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg);
334:   PetscSFSetType(sf, flg ? type : deft);
335:   PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL);
336: #if defined(PETSC_HAVE_DEVICE)
337:   {
338:     char      backendstr[32] = {0};
339:     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
340:     /* Change the defaults set in PetscSFCreate() with command line options */
341:     PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL);
342:     PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL);
343:     PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set);
344:     PetscStrcasecmp("cuda", backendstr, &isCuda);
345:     PetscStrcasecmp("kokkos", backendstr, &isKokkos);
346:     PetscStrcasecmp("hip", backendstr, &isHip);
347:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
348:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
349:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
350:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
352:   #elif defined(PETSC_HAVE_KOKKOS)
354:   #endif
355:   }
356: #endif
357:   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
358:   PetscOptionsEnd();
359:   return 0;
360: }

362: /*@
363:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

365:    Logically Collective

367:    Input Parameters:
368: +  sf - star forest
369: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

371:    Level: advanced

373: .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
374: @*/
375: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
376: {
380:   sf->rankorder = flg;
381:   return 0;
382: }

384: /*@C
385:    PetscSFSetGraph - Set a parallel star forest

387:    Collective

389:    Input Parameters:
390: +  sf - star forest
391: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
392: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
393: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
394: during setup in debug mode)
395: .  localmode - copy mode for ilocal
396: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
397: during setup in debug mode)
398: -  remotemode - copy mode for iremote

400:    Level: intermediate

402:    Notes:
403:    Leaf indices in ilocal must be unique, otherwise an error occurs.

405:    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
406:    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
407:    PETSc might modify the respective array;
408:    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
409:    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

411:    Fortran Notes:
412:    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.

414:    Developer Notes:
415:    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
416:    This also allows to compare leaf sets of two SFs easily.

418: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
419: @*/
420: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
421: {
422:   PetscBool unique, contiguous;

429:   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
430:    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */

434:   if (sf->nroots >= 0) { /* Reset only if graph already set */
435:     PetscSFReset(sf);
436:   }

438:   PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0);

440:   sf->nroots  = nroots;
441:   sf->nleaves = nleaves;

443:   if (localmode == PETSC_COPY_VALUES && ilocal) {
444:     PetscInt *tlocal = NULL;

446:     PetscMalloc1(nleaves, &tlocal);
447:     PetscArraycpy(tlocal, ilocal, nleaves);
448:     ilocal = tlocal;
449:   }
450:   if (remotemode == PETSC_COPY_VALUES) {
451:     PetscSFNode *tremote = NULL;

453:     PetscMalloc1(nleaves, &tremote);
454:     PetscArraycpy(tremote, iremote, nleaves);
455:     iremote = tremote;
456:   }

458:   if (nleaves && ilocal) {
459:     PetscSFNode work;

461:     PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
462:     PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
463:     unique = PetscNot(unique);
465:     sf->minleaf = ilocal[0];
466:     sf->maxleaf = ilocal[nleaves - 1];
467:     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
468:   } else {
469:     sf->minleaf = 0;
470:     sf->maxleaf = nleaves - 1;
471:     unique      = PETSC_TRUE;
472:     contiguous  = PETSC_TRUE;
473:   }

475:   if (contiguous) {
476:     if (localmode == PETSC_USE_POINTER) {
477:       ilocal = NULL;
478:     } else {
479:       PetscFree(ilocal);
480:     }
481:   }
482:   sf->mine = ilocal;
483:   if (localmode == PETSC_USE_POINTER) {
484:     sf->mine_alloc = NULL;
485:   } else {
486:     sf->mine_alloc = ilocal;
487:   }
488:   sf->remote = iremote;
489:   if (remotemode == PETSC_USE_POINTER) {
490:     sf->remote_alloc = NULL;
491:   } else {
492:     sf->remote_alloc = iremote;
493:   }
494:   PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0);
495:   sf->graphset = PETSC_TRUE;
496:   return 0;
497: }

499: /*@
500:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

502:   Collective

504:   Input Parameters:
505: + sf      - The PetscSF
506: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
507: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

509:   Notes:
510:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
511:   n and N are local and global sizes of x respectively.

513:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
514:   sequential vectors y on all ranks.

516:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
517:   sequential vector y on rank 0.

519:   In above cases, entries of x are roots and entries of y are leaves.

521:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
522:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
523:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
524:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
525:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

527:   In this case, roots and leaves are symmetric.

529:   Level: intermediate
530:  @*/
531: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
532: {
533:   MPI_Comm    comm;
534:   PetscInt    n, N, res[2];
535:   PetscMPIInt rank, size;
536:   PetscSFType type;

540:   PetscObjectGetComm((PetscObject)sf, &comm);
542:   MPI_Comm_rank(comm, &rank);
543:   MPI_Comm_size(comm, &size);

545:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
546:     type = PETSCSFALLTOALL;
547:     PetscLayoutCreate(comm, &sf->map);
548:     PetscLayoutSetLocalSize(sf->map, size);
549:     PetscLayoutSetSize(sf->map, ((PetscInt)size) * size);
550:     PetscLayoutSetUp(sf->map);
551:   } else {
552:     PetscLayoutGetLocalSize(map, &n);
553:     PetscLayoutGetSize(map, &N);
554:     res[0] = n;
555:     res[1] = -n;
556:     /* Check if n are same over all ranks so that we can optimize it */
557:     MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm);
558:     if (res[0] == -res[1]) { /* same n */
559:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
560:     } else {
561:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
562:     }
563:     PetscLayoutReference(map, &sf->map);
564:   }
565:   PetscSFSetType(sf, type);

567:   sf->pattern = pattern;
568:   sf->mine    = NULL; /* Contiguous */

570:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
571:      Also set other easy stuff.
572:    */
573:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
574:     sf->nleaves = N;
575:     sf->nroots  = n;
576:     sf->nranks  = size;
577:     sf->minleaf = 0;
578:     sf->maxleaf = N - 1;
579:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
580:     sf->nleaves = rank ? 0 : N;
581:     sf->nroots  = n;
582:     sf->nranks  = rank ? 0 : size;
583:     sf->minleaf = 0;
584:     sf->maxleaf = rank ? -1 : N - 1;
585:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
586:     sf->nleaves = size;
587:     sf->nroots  = size;
588:     sf->nranks  = size;
589:     sf->minleaf = 0;
590:     sf->maxleaf = size - 1;
591:   }
592:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
593:   sf->graphset = PETSC_TRUE;
594:   return 0;
595: }

597: /*@
598:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

600:    Collective

602:    Input Parameter:
603: .  sf - star forest to invert

605:    Output Parameter:
606: .  isf - inverse of sf

608:    Level: advanced

610:    Notes:
611:    All roots must have degree 1.

613:    The local space may be a permutation, but cannot be sparse.

615: .seealso: `PetscSFSetGraph()`
616: @*/
617: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
618: {
619:   PetscMPIInt     rank;
620:   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
621:   const PetscInt *ilocal;
622:   PetscSFNode    *roots, *leaves;

625:   PetscSFCheckGraphSet(sf, 1);

628:   PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL);
629:   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */

631:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
632:   PetscMalloc2(nroots, &roots, maxlocal, &leaves);
633:   for (i = 0; i < maxlocal; i++) {
634:     leaves[i].rank  = rank;
635:     leaves[i].index = i;
636:   }
637:   for (i = 0; i < nroots; i++) {
638:     roots[i].rank  = -1;
639:     roots[i].index = -1;
640:   }
641:   PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
642:   PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);

644:   /* Check whether our leaves are sparse */
645:   for (i = 0, count = 0; i < nroots; i++)
646:     if (roots[i].rank >= 0) count++;
647:   if (count == nroots) newilocal = NULL;
648:   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscMalloc1(count, &newilocal);
649:     for (i = 0, count = 0; i < nroots; i++) {
650:       if (roots[i].rank >= 0) {
651:         newilocal[count]   = i;
652:         roots[count].rank  = roots[i].rank;
653:         roots[count].index = roots[i].index;
654:         count++;
655:       }
656:     }
657:   }

659:   PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf);
660:   PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES);
661:   PetscFree2(roots, leaves);
662:   return 0;
663: }

665: /*@
666:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

668:    Collective

670:    Input Parameters:
671: +  sf - communication object to duplicate
672: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

674:    Output Parameter:
675: .  newsf - new communication object

677:    Level: beginner

679: .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
680: @*/
681: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
682: {
683:   PetscSFType  type;
684:   MPI_Datatype dtype = MPIU_SCALAR;

689:   PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf);
690:   PetscSFGetType(sf, &type);
691:   if (type) PetscSFSetType(*newsf, type);
692:   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
693:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
694:     PetscSFCheckGraphSet(sf, 1);
695:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
696:       PetscInt           nroots, nleaves;
697:       const PetscInt    *ilocal;
698:       const PetscSFNode *iremote;
699:       PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
700:       PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
701:     } else {
702:       PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern);
703:     }
704:   }
705:   /* Since oldtype is committed, so is newtype, according to MPI */
706:   if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit, &dtype);
707:   (*newsf)->vscat.bs     = sf->vscat.bs;
708:   (*newsf)->vscat.unit   = dtype;
709:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
710:   (*newsf)->vscat.from_n = sf->vscat.from_n;
711:   /* Do not copy lsf. Build it on demand since it is rarely used */

713: #if defined(PETSC_HAVE_DEVICE)
714:   (*newsf)->backend              = sf->backend;
715:   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
716:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
717:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
718: #endif
719:   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
720:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
721:   return 0;
722: }

724: /*@C
725:    PetscSFGetGraph - Get the graph specifying a parallel star forest

727:    Not Collective

729:    Input Parameter:
730: .  sf - star forest

732:    Output Parameters:
733: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
734: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
735: .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
736: -  iremote - remote locations of root vertices for each leaf on the current process

738:    Notes:
739:      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

741:      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
742:      see its manpage for details.

744:    Fortran Notes:
745:      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
746:      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.

748:      To check for a NULL ilocal use
749: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

751:    Level: intermediate

753: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
754: @*/
755: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
756: {
758:   if (sf->ops->GetGraph) {
759:     (sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote);
760:   } else {
761:     if (nroots) *nroots = sf->nroots;
762:     if (nleaves) *nleaves = sf->nleaves;
763:     if (ilocal) *ilocal = sf->mine;
764:     if (iremote) *iremote = sf->remote;
765:   }
766:   return 0;
767: }

769: /*@
770:    PetscSFGetLeafRange - Get the active leaf ranges

772:    Not Collective

774:    Input Parameter:
775: .  sf - star forest

777:    Output Parameters:
778: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
779: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

781:    Level: developer

783: .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
784: @*/
785: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
786: {
788:   PetscSFCheckGraphSet(sf, 1);
789:   if (minleaf) *minleaf = sf->minleaf;
790:   if (maxleaf) *maxleaf = sf->maxleaf;
791:   return 0;
792: }

794: /*@C
795:    PetscSFViewFromOptions - View from Options

797:    Collective on PetscSF

799:    Input Parameters:
800: +  A - the star forest
801: .  obj - Optional object
802: -  name - command line option

804:    Level: intermediate
805: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
806: @*/
807: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
808: {
810:   PetscObjectViewFromOptions((PetscObject)A, obj, name);
811:   return 0;
812: }

814: /*@C
815:    PetscSFView - view a star forest

817:    Collective

819:    Input Parameters:
820: +  sf - star forest
821: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

823:    Level: beginner

825: .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
826: @*/
827: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
828: {
829:   PetscBool         iascii;
830:   PetscViewerFormat format;

833:   if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer);
836:   if (sf->graphset) PetscSFSetUp(sf);
837:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
838:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
839:     PetscMPIInt rank;
840:     PetscInt    ii, i, j;

842:     PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer);
843:     PetscViewerASCIIPushTab(viewer);
844:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
845:       if (!sf->graphset) {
846:         PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n");
847:         PetscViewerASCIIPopTab(viewer);
848:         return 0;
849:       }
850:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
851:       PetscViewerASCIIPushSynchronized(viewer);
852:       PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks);
853:       for (i = 0; i < sf->nleaves; i++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index);
854:       PetscViewerFlush(viewer);
855:       PetscViewerGetFormat(viewer, &format);
856:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
857:         PetscMPIInt *tmpranks, *perm;
858:         PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm);
859:         PetscArraycpy(tmpranks, sf->ranks, sf->nranks);
860:         for (i = 0; i < sf->nranks; i++) perm[i] = i;
861:         PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm);
862:         PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank);
863:         for (ii = 0; ii < sf->nranks; ii++) {
864:           i = perm[ii];
865:           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]);
866:           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]);
867:         }
868:         PetscFree2(tmpranks, perm);
869:       }
870:       PetscViewerFlush(viewer);
871:       PetscViewerASCIIPopSynchronized(viewer);
872:     }
873:     PetscViewerASCIIPopTab(viewer);
874:   }
875:   PetscTryTypeMethod(sf, View, viewer);
876:   return 0;
877: }

879: /*@C
880:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

882:    Not Collective

884:    Input Parameter:
885: .  sf - star forest

887:    Output Parameters:
888: +  nranks - number of ranks referenced by local part
889: .  ranks - array of ranks
890: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
891: .  rmine - concatenated array holding local indices referencing each remote rank
892: -  rremote - concatenated array holding remote indices referenced for each remote rank

894:    Level: developer

896: .seealso: `PetscSFGetLeafRanks()`
897: @*/
898: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
899: {
902:   if (sf->ops->GetRootRanks) {
903:     (sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote);
904:   } else {
905:     /* The generic implementation */
906:     if (nranks) *nranks = sf->nranks;
907:     if (ranks) *ranks = sf->ranks;
908:     if (roffset) *roffset = sf->roffset;
909:     if (rmine) *rmine = sf->rmine;
910:     if (rremote) *rremote = sf->rremote;
911:   }
912:   return 0;
913: }

915: /*@C
916:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

918:    Not Collective

920:    Input Parameter:
921: .  sf - star forest

923:    Output Parameters:
924: +  niranks - number of leaf ranks referencing roots on this process
925: .  iranks - array of ranks
926: .  ioffset - offset in irootloc for each rank (length niranks+1)
927: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

929:    Level: developer

931: .seealso: `PetscSFGetRootRanks()`
932: @*/
933: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
934: {
937:   if (sf->ops->GetLeafRanks) {
938:     (sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc);
939:   } else {
940:     PetscSFType type;
941:     PetscSFGetType(sf, &type);
942:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
943:   }
944:   return 0;
945: }

947: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
948: {
949:   PetscInt i;
950:   for (i = 0; i < n; i++) {
951:     if (needle == list[i]) return PETSC_TRUE;
952:   }
953:   return PETSC_FALSE;
954: }

956: /*@C
957:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

959:    Collective

961:    Input Parameters:
962: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
963: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

965:    Level: developer

967: .seealso: `PetscSFGetRootRanks()`
968: @*/
969: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
970: {
971:   PetscTable         table;
972:   PetscTablePosition pos;
973:   PetscMPIInt        size, groupsize, *groupranks;
974:   PetscInt          *rcount, *ranks;
975:   PetscInt           i, irank = -1, orank = -1;

978:   PetscSFCheckGraphSet(sf, 1);
979:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
980:   PetscTableCreate(10, size, &table);
981:   for (i = 0; i < sf->nleaves; i++) {
982:     /* Log 1-based rank */
983:     PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES);
984:   }
985:   PetscTableGetCount(table, &sf->nranks);
986:   PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote);
987:   PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks);
988:   PetscTableGetHeadPosition(table, &pos);
989:   for (i = 0; i < sf->nranks; i++) {
990:     PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]);
991:     ranks[i]--; /* Convert back to 0-based */
992:   }
993:   PetscTableDestroy(&table);

995:   /* We expect that dgroup is reliably "small" while nranks could be large */
996:   {
997:     MPI_Group    group = MPI_GROUP_NULL;
998:     PetscMPIInt *dgroupranks;
999:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1000:     MPI_Group_size(dgroup, &groupsize);
1001:     PetscMalloc1(groupsize, &dgroupranks);
1002:     PetscMalloc1(groupsize, &groupranks);
1003:     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1004:     if (groupsize) MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks);
1005:     MPI_Group_free(&group);
1006:     PetscFree(dgroupranks);
1007:   }

1009:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1010:   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1011:     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1012:       if (InList(ranks[i], groupsize, groupranks)) break;
1013:     }
1014:     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1015:       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1016:     }
1017:     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1018:       PetscInt tmprank, tmpcount;

1020:       tmprank             = ranks[i];
1021:       tmpcount            = rcount[i];
1022:       ranks[i]            = ranks[sf->ndranks];
1023:       rcount[i]           = rcount[sf->ndranks];
1024:       ranks[sf->ndranks]  = tmprank;
1025:       rcount[sf->ndranks] = tmpcount;
1026:       sf->ndranks++;
1027:     }
1028:   }
1029:   PetscFree(groupranks);
1030:   PetscSortIntWithArray(sf->ndranks, ranks, rcount);
1031:   PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks);
1032:   sf->roffset[0] = 0;
1033:   for (i = 0; i < sf->nranks; i++) {
1034:     PetscMPIIntCast(ranks[i], sf->ranks + i);
1035:     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1036:     rcount[i]          = 0;
1037:   }
1038:   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1039:     /* short circuit */
1040:     if (orank != sf->remote[i].rank) {
1041:       /* Search for index of iremote[i].rank in sf->ranks */
1042:       PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank);
1043:       if (irank < 0) {
1044:         PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank);
1045:         if (irank >= 0) irank += sf->ndranks;
1046:       }
1047:       orank = sf->remote[i].rank;
1048:     }
1050:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1051:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1052:     rcount[irank]++;
1053:   }
1054:   PetscFree2(rcount, ranks);
1055:   return 0;
1056: }

1058: /*@C
1059:    PetscSFGetGroups - gets incoming and outgoing process groups

1061:    Collective

1063:    Input Parameter:
1064: .  sf - star forest

1066:    Output Parameters:
1067: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1068: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1070:    Level: developer

1072: .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1073: @*/
1074: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1075: {
1076:   MPI_Group group = MPI_GROUP_NULL;

1079:   if (sf->ingroup == MPI_GROUP_NULL) {
1080:     PetscInt        i;
1081:     const PetscInt *indegree;
1082:     PetscMPIInt     rank, *outranks, *inranks;
1083:     PetscSFNode    *remote;
1084:     PetscSF         bgcount;

1086:     /* Compute the number of incoming ranks */
1087:     PetscMalloc1(sf->nranks, &remote);
1088:     for (i = 0; i < sf->nranks; i++) {
1089:       remote[i].rank  = sf->ranks[i];
1090:       remote[i].index = 0;
1091:     }
1092:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount);
1093:     PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1094:     PetscSFComputeDegreeBegin(bgcount, &indegree);
1095:     PetscSFComputeDegreeEnd(bgcount, &indegree);
1096:     /* Enumerate the incoming ranks */
1097:     PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks);
1098:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1099:     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1100:     PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks);
1101:     PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks);
1102:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1103:     MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup);
1104:     MPI_Group_free(&group);
1105:     PetscFree2(inranks, outranks);
1106:     PetscSFDestroy(&bgcount);
1107:   }
1108:   *incoming = sf->ingroup;

1110:   if (sf->outgroup == MPI_GROUP_NULL) {
1111:     MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1112:     MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup);
1113:     MPI_Group_free(&group);
1114:   }
1115:   *outgoing = sf->outgroup;
1116:   return 0;
1117: }

1119: /*@
1120:    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters

1122:    Collective

1124:    Input Parameter:
1125: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1127:    Output Parameter:
1128: .  multi - star forest with split roots, such that each root has degree exactly 1

1130:    Level: developer

1132:    Notes:

1134:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1135:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1136:    edge, it is a candidate for future optimization that might involve its removal.

1138: .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1139: @*/
1140: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1141: {
1144:   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1145:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1146:     *multi           = sf->multi;
1147:     sf->multi->multi = sf->multi;
1148:     return 0;
1149:   }
1150:   if (!sf->multi) {
1151:     const PetscInt *indegree;
1152:     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1153:     PetscSFNode    *remote;
1154:     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1155:     PetscSFComputeDegreeBegin(sf, &indegree);
1156:     PetscSFComputeDegreeEnd(sf, &indegree);
1157:     PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset);
1158:     inoffset[0] = 0;
1159:     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1160:     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1161:     PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1162:     PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1163:     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1164:     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1166:     }
1167:     PetscMalloc1(sf->nleaves, &remote);
1168:     for (i = 0; i < sf->nleaves; i++) {
1169:       remote[i].rank  = sf->remote[i].rank;
1170:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1171:     }
1172:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1173:     sf->multi->multi = sf->multi;
1174:     PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1175:     if (sf->rankorder) { /* Sort the ranks */
1176:       PetscMPIInt  rank;
1177:       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1178:       PetscSFNode *newremote;
1179:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1180:       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1181:       PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset);
1182:       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1183:       PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1184:       PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1185:       /* Sort the incoming ranks at each vertex, build the inverse map */
1186:       for (i = 0; i < sf->nroots; i++) {
1187:         PetscInt j;
1188:         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1189:         PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset);
1190:         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1191:       }
1192:       PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1193:       PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1194:       PetscMalloc1(sf->nleaves, &newremote);
1195:       for (i = 0; i < sf->nleaves; i++) {
1196:         newremote[i].rank  = sf->remote[i].rank;
1197:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1198:       }
1199:       PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER);
1200:       PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset);
1201:     }
1202:     PetscFree3(inoffset, outones, outoffset);
1203:   }
1204:   *multi = sf->multi;
1205:   return 0;
1206: }

1208: /*@C
1209:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1211:    Collective

1213:    Input Parameters:
1214: +  sf - original star forest
1215: .  nselected  - number of selected roots on this process
1216: -  selected   - indices of the selected roots on this process

1218:    Output Parameter:
1219: .  esf - new star forest

1221:    Level: advanced

1223:    Note:
1224:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1225:    be done by calling PetscSFGetGraph().

1227: .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1228: @*/
1229: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1230: {
1231:   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1232:   const PetscInt    *ilocal;
1233:   signed char       *rootdata, *leafdata, *leafmem;
1234:   const PetscSFNode *iremote;
1235:   PetscSFNode       *new_iremote;
1236:   MPI_Comm           comm;

1239:   PetscSFCheckGraphSet(sf, 1);

1243:   PetscSFSetUp(sf);
1244:   PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0);
1245:   PetscObjectGetComm((PetscObject)sf, &comm);
1246:   PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);

1248:   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1249:     PetscBool dups;
1253:   }

1255:   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1256:   else {
1257:     /* A generic version of creating embedded sf */
1258:     PetscSFGetLeafRange(sf, &minleaf, &maxleaf);
1259:     maxlocal = maxleaf - minleaf + 1;
1260:     PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem);
1261:     leafdata = leafmem - minleaf;
1262:     /* Tag selected roots and bcast to leaves */
1263:     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1264:     PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1265:     PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);

1267:     /* Build esf with leaves that are still connected */
1268:     esf_nleaves = 0;
1269:     for (i = 0; i < nleaves; i++) {
1270:       j = ilocal ? ilocal[i] : i;
1271:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1272:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1273:       */
1274:       esf_nleaves += (leafdata[j] ? 1 : 0);
1275:     }
1276:     PetscMalloc1(esf_nleaves, &new_ilocal);
1277:     PetscMalloc1(esf_nleaves, &new_iremote);
1278:     for (i = n = 0; i < nleaves; i++) {
1279:       j = ilocal ? ilocal[i] : i;
1280:       if (leafdata[j]) {
1281:         new_ilocal[n]        = j;
1282:         new_iremote[n].rank  = iremote[i].rank;
1283:         new_iremote[n].index = iremote[i].index;
1284:         ++n;
1285:       }
1286:     }
1287:     PetscSFCreate(comm, esf);
1288:     PetscSFSetFromOptions(*esf);
1289:     PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1290:     PetscFree2(rootdata, leafmem);
1291:   }
1292:   PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0);
1293:   return 0;
1294: }

1296: /*@C
1297:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1299:   Collective

1301:   Input Parameters:
1302: + sf - original star forest
1303: . nselected  - number of selected leaves on this process
1304: - selected   - indices of the selected leaves on this process

1306:   Output Parameter:
1307: .  newsf - new star forest

1309:   Level: advanced

1311: .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1312: @*/
1313: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1314: {
1315:   const PetscSFNode *iremote;
1316:   PetscSFNode       *new_iremote;
1317:   const PetscInt    *ilocal;
1318:   PetscInt           i, nroots, *leaves, *new_ilocal;
1319:   MPI_Comm           comm;

1322:   PetscSFCheckGraphSet(sf, 1);

1326:   /* Uniq selected[] and put results in leaves[] */
1327:   PetscObjectGetComm((PetscObject)sf, &comm);
1328:   PetscMalloc1(nselected, &leaves);
1329:   PetscArraycpy(leaves, selected, nselected);
1330:   PetscSortedRemoveDupsInt(&nselected, leaves);

1333:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1334:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1335:   else {
1336:     PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote);
1337:     PetscMalloc1(nselected, &new_ilocal);
1338:     PetscMalloc1(nselected, &new_iremote);
1339:     for (i = 0; i < nselected; ++i) {
1340:       const PetscInt l     = leaves[i];
1341:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1342:       new_iremote[i].rank  = iremote[l].rank;
1343:       new_iremote[i].index = iremote[l].index;
1344:     }
1345:     PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf);
1346:     PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1347:   }
1348:   PetscFree(leaves);
1349:   return 0;
1350: }

1352: /*@C
1353:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()

1355:    Collective on PetscSF

1357:    Input Parameters:
1358: +  sf - star forest on which to communicate
1359: .  unit - data type associated with each node
1360: .  rootdata - buffer to broadcast
1361: -  op - operation to use for reduction

1363:    Output Parameter:
1364: .  leafdata - buffer to be reduced with values from each leaf's respective root

1366:    Level: intermediate

1368:    Notes:
1369:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1370:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1371:     use PetscSFBcastWithMemTypeBegin() instead.
1372: .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1373: @*/
1374: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1375: {
1376:   PetscMemType rootmtype, leafmtype;

1379:   PetscSFSetUp(sf);
1380:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1381:   PetscGetMemType(rootdata, &rootmtype);
1382:   PetscGetMemType(leafdata, &leafmtype);
1383:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1384:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1385:   return 0;
1386: }

1388: /*@C
1389:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()

1391:    Collective on PetscSF

1393:    Input Parameters:
1394: +  sf - star forest on which to communicate
1395: .  unit - data type associated with each node
1396: .  rootmtype - memory type of rootdata
1397: .  rootdata - buffer to broadcast
1398: .  leafmtype - memory type of leafdata
1399: -  op - operation to use for reduction

1401:    Output Parameter:
1402: .  leafdata - buffer to be reduced with values from each leaf's respective root

1404:    Level: intermediate

1406: .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1407: @*/
1408: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1409: {
1411:   PetscSFSetUp(sf);
1412:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1413:   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1414:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1415:   return 0;
1416: }

1418: /*@C
1419:    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()

1421:    Collective

1423:    Input Parameters:
1424: +  sf - star forest
1425: .  unit - data type
1426: .  rootdata - buffer to broadcast
1427: -  op - operation to use for reduction

1429:    Output Parameter:
1430: .  leafdata - buffer to be reduced with values from each leaf's respective root

1432:    Level: intermediate

1434: .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1435: @*/
1436: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1437: {
1439:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0);
1440:   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1441:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0);
1442:   return 0;
1443: }

1445: /*@C
1446:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1448:    Collective

1450:    Input Parameters:
1451: +  sf - star forest
1452: .  unit - data type
1453: .  leafdata - values to reduce
1454: -  op - reduction operation

1456:    Output Parameter:
1457: .  rootdata - result of reduction of values from all leaves of each root

1459:    Level: intermediate

1461:    Notes:
1462:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1463:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1464:     use PetscSFReduceWithMemTypeBegin() instead.

1466: .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1467: @*/
1468: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1469: {
1470:   PetscMemType rootmtype, leafmtype;

1473:   PetscSFSetUp(sf);
1474:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1475:   PetscGetMemType(rootdata, &rootmtype);
1476:   PetscGetMemType(leafdata, &leafmtype);
1477:   (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1478:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1479:   return 0;
1480: }

1482: /*@C
1483:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()

1485:    Collective

1487:    Input Parameters:
1488: +  sf - star forest
1489: .  unit - data type
1490: .  leafmtype - memory type of leafdata
1491: .  leafdata - values to reduce
1492: .  rootmtype - memory type of rootdata
1493: -  op - reduction operation

1495:    Output Parameter:
1496: .  rootdata - result of reduction of values from all leaves of each root

1498:    Level: intermediate

1500: .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1501: @*/
1502: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1503: {
1505:   PetscSFSetUp(sf);
1506:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1507:   (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1508:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1509:   return 0;
1510: }

1512: /*@C
1513:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1515:    Collective

1517:    Input Parameters:
1518: +  sf - star forest
1519: .  unit - data type
1520: .  leafdata - values to reduce
1521: -  op - reduction operation

1523:    Output Parameter:
1524: .  rootdata - result of reduction of values from all leaves of each root

1526:    Level: intermediate

1528: .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1529: @*/
1530: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1531: {
1533:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1534:   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1535:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1536:   return 0;
1537: }

1539: /*@C
1540:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1542:    Collective

1544:    Input Parameters:
1545: +  sf - star forest
1546: .  unit - data type
1547: .  leafdata - leaf values to use in reduction
1548: -  op - operation to use for reduction

1550:    Output Parameters:
1551: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1552: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1554:    Level: advanced

1556:    Note:
1557:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1558:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1559:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1560:    integers.

1562: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1563: @*/
1564: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1565: {
1566:   PetscMemType rootmtype, leafmtype, leafupdatemtype;

1569:   PetscSFSetUp(sf);
1570:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1571:   PetscGetMemType(rootdata, &rootmtype);
1572:   PetscGetMemType(leafdata, &leafmtype);
1573:   PetscGetMemType(leafupdate, &leafupdatemtype);
1575:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1576:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1577:   return 0;
1578: }

1580: /*@C
1581:    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1583:    Collective

1585:    Input Parameters:
1586: +  sf - star forest
1587: .  unit - data type
1588: .  rootmtype - memory type of rootdata
1589: .  leafmtype - memory type of leafdata
1590: .  leafdata - leaf values to use in reduction
1591: .  leafupdatemtype - memory type of leafupdate
1592: -  op - operation to use for reduction

1594:    Output Parameters:
1595: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1596: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1598:    Level: advanced

1600:    Note: See PetscSFFetchAndOpBegin() for more details.

1602: .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1603: @*/
1604: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1605: {
1607:   PetscSFSetUp(sf);
1608:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1610:   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1611:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1612:   return 0;
1613: }

1615: /*@C
1616:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1618:    Collective

1620:    Input Parameters:
1621: +  sf - star forest
1622: .  unit - data type
1623: .  leafdata - leaf values to use in reduction
1624: -  op - operation to use for reduction

1626:    Output Parameters:
1627: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1628: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1630:    Level: advanced

1632: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1633: @*/
1634: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1635: {
1637:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1638:   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1639:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1640:   return 0;
1641: }

1643: /*@C
1644:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1646:    Collective

1648:    Input Parameter:
1649: .  sf - star forest

1651:    Output Parameter:
1652: .  degree - degree of each root vertex

1654:    Level: advanced

1656:    Notes:
1657:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1659: .seealso: `PetscSFGatherBegin()`
1660: @*/
1661: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1662: {
1664:   PetscSFCheckGraphSet(sf, 1);
1666:   if (!sf->degreeknown) {
1667:     PetscInt i, nroots = sf->nroots, maxlocal;
1669:     maxlocal = sf->maxleaf - sf->minleaf + 1;
1670:     PetscMalloc1(nroots, &sf->degree);
1671:     PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1672:     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1673:     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1674:     PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1675:   }
1676:   *degree = NULL;
1677:   return 0;
1678: }

1680: /*@C
1681:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1683:    Collective

1685:    Input Parameter:
1686: .  sf - star forest

1688:    Output Parameter:
1689: .  degree - degree of each root vertex

1691:    Level: developer

1693:    Notes:
1694:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1696: .seealso:
1697: @*/
1698: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1699: {
1701:   PetscSFCheckGraphSet(sf, 1);
1703:   if (!sf->degreeknown) {
1705:     PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1706:     PetscFree(sf->degreetmp);
1707:     sf->degreeknown = PETSC_TRUE;
1708:   }
1709:   *degree = sf->degree;
1710:   return 0;
1711: }

1713: /*@C
1714:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1715:    Each multi-root is assigned index of the corresponding original root.

1717:    Collective

1719:    Input Parameters:
1720: +  sf - star forest
1721: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1723:    Output Parameters:
1724: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1725: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1727:    Level: developer

1729:    Notes:
1730:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1732: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1733: @*/
1734: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1735: {
1736:   PetscSF  msf;
1737:   PetscInt i, j, k, nroots, nmroots;

1740:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1744:   PetscSFGetMultiSF(sf, &msf);
1745:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1746:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1747:   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1748:     if (!degree[i]) continue;
1749:     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1750:   }
1752:   if (nMultiRoots) *nMultiRoots = nmroots;
1753:   return 0;
1754: }

1756: /*@C
1757:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1759:    Collective

1761:    Input Parameters:
1762: +  sf - star forest
1763: .  unit - data type
1764: -  leafdata - leaf data to gather to roots

1766:    Output Parameter:
1767: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1769:    Level: intermediate

1771: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1772: @*/
1773: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1774: {
1775:   PetscSF multi = NULL;

1778:   PetscSFSetUp(sf);
1779:   PetscSFGetMultiSF(sf, &multi);
1780:   PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1781:   return 0;
1782: }

1784: /*@C
1785:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1787:    Collective

1789:    Input Parameters:
1790: +  sf - star forest
1791: .  unit - data type
1792: -  leafdata - leaf data to gather to roots

1794:    Output Parameter:
1795: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1797:    Level: intermediate

1799: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1800: @*/
1801: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1802: {
1803:   PetscSF multi = NULL;

1806:   PetscSFGetMultiSF(sf, &multi);
1807:   PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1808:   return 0;
1809: }

1811: /*@C
1812:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1814:    Collective

1816:    Input Parameters:
1817: +  sf - star forest
1818: .  unit - data type
1819: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1821:    Output Parameter:
1822: .  leafdata - leaf data to be update with personal data from each respective root

1824:    Level: intermediate

1826: .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1827: @*/
1828: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1829: {
1830:   PetscSF multi = NULL;

1833:   PetscSFSetUp(sf);
1834:   PetscSFGetMultiSF(sf, &multi);
1835:   PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1836:   return 0;
1837: }

1839: /*@C
1840:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1842:    Collective

1844:    Input Parameters:
1845: +  sf - star forest
1846: .  unit - data type
1847: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1849:    Output Parameter:
1850: .  leafdata - leaf data to be update with personal data from each respective root

1852:    Level: intermediate

1854: .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1855: @*/
1856: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1857: {
1858:   PetscSF multi = NULL;

1861:   PetscSFGetMultiSF(sf, &multi);
1862:   PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1863:   return 0;
1864: }

1866: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1867: {
1868:   PetscInt        i, n, nleaves;
1869:   const PetscInt *ilocal = NULL;
1870:   PetscHSetI      seen;

1872:   if (PetscDefined(USE_DEBUG)) {
1873:     PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL);
1874:     PetscHSetICreate(&seen);
1875:     for (i = 0; i < nleaves; i++) {
1876:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1877:       PetscHSetIAdd(seen, leaf);
1878:     }
1879:     PetscHSetIGetSize(seen, &n);
1881:     PetscHSetIDestroy(&seen);
1882:   }
1883:   return 0;
1884: }

1886: /*@
1887:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1889:   Input Parameters:
1890: + sfA - The first PetscSF
1891: - sfB - The second PetscSF

1893:   Output Parameters:
1894: . sfBA - The composite SF

1896:   Level: developer

1898:   Notes:
1899:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1900:   forests, i.e. the same leaf is not connected with different roots.

1902:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1903:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1904:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1905:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1907: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1908: @*/
1909: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1910: {
1911:   const PetscSFNode *remotePointsA, *remotePointsB;
1912:   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1913:   const PetscInt    *localPointsA, *localPointsB;
1914:   PetscInt          *localPointsBA;
1915:   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1916:   PetscBool          denseB;

1919:   PetscSFCheckGraphSet(sfA, 1);
1921:   PetscSFCheckGraphSet(sfB, 2);
1924:   PetscSFCheckLeavesUnique_Private(sfA);
1925:   PetscSFCheckLeavesUnique_Private(sfB);

1927:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1928:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1929:   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size       */
1930:   /* numRootsB; otherwise, garbage will be broadcasted.                                   */
1931:   /* Example (comm size = 1):                                                             */
1932:   /* sfA: 0 <- (0, 0)                                                                     */
1933:   /* sfB: 100 <- (0, 0)                                                                   */
1934:   /*      101 <- (0, 1)                                                                   */
1935:   /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget  */
1936:   /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
1937:   /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on  */
1938:   /* remotePointsA; if not recasted, point 101 would receive a garbage value.             */
1939:   PetscMalloc1(numRootsB, &reorderedRemotePointsA);
1940:   for (i = 0; i < numRootsB; i++) {
1941:     reorderedRemotePointsA[i].rank  = -1;
1942:     reorderedRemotePointsA[i].index = -1;
1943:   }
1944:   for (i = 0; i < numLeavesA; i++) {
1945:     PetscInt localp = localPointsA ? localPointsA[i] : i;

1947:     if (localp >= numRootsB) continue;
1948:     reorderedRemotePointsA[localp] = remotePointsA[i];
1949:   }
1950:   remotePointsA = reorderedRemotePointsA;
1951:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1952:   PetscMalloc1(maxleaf - minleaf + 1, &leafdataB);
1953:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
1954:     leafdataB[i].rank  = -1;
1955:     leafdataB[i].index = -1;
1956:   }
1957:   PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1958:   PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1959:   PetscFree(reorderedRemotePointsA);

1961:   denseB = (PetscBool)!localPointsB;
1962:   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1963:     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
1964:     else numLeavesBA++;
1965:   }
1966:   if (denseB) {
1967:     localPointsBA  = NULL;
1968:     remotePointsBA = leafdataB;
1969:   } else {
1970:     PetscMalloc1(numLeavesBA, &localPointsBA);
1971:     PetscMalloc1(numLeavesBA, &remotePointsBA);
1972:     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1973:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1975:       if (leafdataB[l - minleaf].rank == -1) continue;
1976:       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
1977:       localPointsBA[numLeavesBA]  = l;
1978:       numLeavesBA++;
1979:     }
1980:     PetscFree(leafdataB);
1981:   }
1982:   PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
1983:   PetscSFSetFromOptions(*sfBA);
1984:   PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
1985:   return 0;
1986: }

1988: /*@
1989:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1991:   Input Parameters:
1992: + sfA - The first PetscSF
1993: - sfB - The second PetscSF

1995:   Output Parameters:
1996: . sfBA - The composite SF.

1998:   Level: developer

2000:   Notes:
2001:   Currently, the two SFs must be defined on congruent communicators and they must be true star
2002:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2003:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

2005:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2006:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2007:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2008:   a Reduce on sfB, on connected roots.

2010: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2011: @*/
2012: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2013: {
2014:   const PetscSFNode *remotePointsA, *remotePointsB;
2015:   PetscSFNode       *remotePointsBA;
2016:   const PetscInt    *localPointsA, *localPointsB;
2017:   PetscSFNode       *reorderedRemotePointsA = NULL;
2018:   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2019:   MPI_Op             op;
2020: #if defined(PETSC_USE_64BIT_INDICES)
2021:   PetscBool iswin;
2022: #endif

2025:   PetscSFCheckGraphSet(sfA, 1);
2027:   PetscSFCheckGraphSet(sfB, 2);
2030:   PetscSFCheckLeavesUnique_Private(sfA);
2031:   PetscSFCheckLeavesUnique_Private(sfB);

2033:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2034:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2036:   /* TODO: Check roots of sfB have degree of 1 */
2037:   /* Once we implement it, we can replace the MPI_MAXLOC
2038:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2039:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2040:      the root condition is not meet.
2041:    */
2042:   op = MPI_MAXLOC;
2043: #if defined(PETSC_USE_64BIT_INDICES)
2044:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2045:   PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin);
2046:   if (iswin) op = MPI_REPLACE;
2047: #endif

2049:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2050:   PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA);
2051:   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2052:     reorderedRemotePointsA[i].rank  = -1;
2053:     reorderedRemotePointsA[i].index = -1;
2054:   }
2055:   if (localPointsA) {
2056:     for (i = 0; i < numLeavesA; i++) {
2057:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2058:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2059:     }
2060:   } else {
2061:     for (i = 0; i < numLeavesA; i++) {
2062:       if (i > maxleaf || i < minleaf) continue;
2063:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2064:     }
2065:   }

2067:   PetscMalloc1(numRootsB, &localPointsBA);
2068:   PetscMalloc1(numRootsB, &remotePointsBA);
2069:   for (i = 0; i < numRootsB; i++) {
2070:     remotePointsBA[i].rank  = -1;
2071:     remotePointsBA[i].index = -1;
2072:   }

2074:   PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2075:   PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2076:   PetscFree(reorderedRemotePointsA);
2077:   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2078:     if (remotePointsBA[i].rank == -1) continue;
2079:     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2080:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2081:     localPointsBA[numLeavesBA]        = i;
2082:     numLeavesBA++;
2083:   }
2084:   PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
2085:   PetscSFSetFromOptions(*sfBA);
2086:   PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
2087:   return 0;
2088: }

2090: /*
2091:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2093:   Input Parameters:
2094: . sf - The global PetscSF

2096:   Output Parameters:
2097: . out - The local PetscSF
2098:  */
2099: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2100: {
2101:   MPI_Comm           comm;
2102:   PetscMPIInt        myrank;
2103:   const PetscInt    *ilocal;
2104:   const PetscSFNode *iremote;
2105:   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2106:   PetscSFNode       *liremote;
2107:   PetscSF            lsf;

2110:   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2111:   else {
2112:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2113:     PetscObjectGetComm((PetscObject)sf, &comm);
2114:     MPI_Comm_rank(comm, &myrank);

2116:     /* Find out local edges and build a local SF */
2117:     PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
2118:     for (i = lnleaves = 0; i < nleaves; i++) {
2119:       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2120:     }
2121:     PetscMalloc1(lnleaves, &lilocal);
2122:     PetscMalloc1(lnleaves, &liremote);

2124:     for (i = j = 0; i < nleaves; i++) {
2125:       if (iremote[i].rank == (PetscInt)myrank) {
2126:         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2127:         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2128:         liremote[j].index = iremote[i].index;
2129:         j++;
2130:       }
2131:     }
2132:     PetscSFCreate(PETSC_COMM_SELF, &lsf);
2133:     PetscSFSetFromOptions(lsf);
2134:     PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER);
2135:     PetscSFSetUp(lsf);
2136:     *out = lsf;
2137:   }
2138:   return 0;
2139: }

2141: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2142: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2143: {
2144:   PetscMemType rootmtype, leafmtype;

2147:   PetscSFSetUp(sf);
2148:   PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
2149:   PetscGetMemType(rootdata, &rootmtype);
2150:   PetscGetMemType(leafdata, &leafmtype);
2151:   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2152:   PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
2153:   return 0;
2154: }

2156: /*@
2157:   PetscSFConcatenate - concatenate multiple SFs into one

2159:   Input Parameters:
2160: + comm - the communicator
2161: . nsfs - the number of input PetscSF
2162: . sfs  - the array of input PetscSF
2163: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2164: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage

2166:   Output Parameters:
2167: . newsf - The resulting PetscSF

2169:   Level: developer

2171:   Notes:
2172:   The communicator of all SFs in sfs must be comm.

2174:   The offsets in leafOffsets are added to the original leaf indices.

2176:   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2177:   In this case, NULL is also passed as ilocal to the resulting SF.

2179:   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2180:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2182: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2183: @*/
2184: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2185: {
2186:   PetscInt     i, s, nLeaves, nRoots;
2187:   PetscInt    *leafArrayOffsets;
2188:   PetscInt    *ilocal_new;
2189:   PetscSFNode *iremote_new;
2190:   PetscInt    *rootOffsets;
2191:   PetscBool    all_ilocal_null = PETSC_FALSE;
2192:   PetscMPIInt  rank;

2194:   {
2195:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2197:     PetscSFCreate(comm, &dummy);
2200:     for (i = 0; i < nsfs; i++) {
2203:     }
2207:     PetscSFDestroy(&dummy);
2208:   }
2209:   if (!nsfs) {
2210:     PetscSFCreate(comm, newsf);
2211:     PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2212:     return 0;
2213:   }
2214:   MPI_Comm_rank(comm, &rank);

2216:   PetscCalloc1(nsfs + 1, &rootOffsets);
2217:   if (shareRoots) {
2218:     PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2219:     if (PetscDefined(USE_DEBUG)) {
2220:       for (s = 1; s < nsfs; s++) {
2221:         PetscInt nr;

2223:         PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2225:       }
2226:     }
2227:   } else {
2228:     for (s = 0; s < nsfs; s++) {
2229:       PetscInt nr;

2231:       PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2232:       rootOffsets[s + 1] = rootOffsets[s] + nr;
2233:     }
2234:     nRoots = rootOffsets[nsfs];
2235:   }

2237:   /* Calculate leaf array offsets and automatic root offsets */
2238:   PetscMalloc1(nsfs + 1, &leafArrayOffsets);
2239:   leafArrayOffsets[0] = 0;
2240:   for (s = 0; s < nsfs; s++) {
2241:     PetscInt nl;

2243:     PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2244:     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2245:   }
2246:   nLeaves = leafArrayOffsets[nsfs];

2248:   if (!leafOffsets) {
2249:     all_ilocal_null = PETSC_TRUE;
2250:     for (s = 0; s < nsfs; s++) {
2251:       const PetscInt *ilocal;

2253:       PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2254:       if (ilocal) {
2255:         all_ilocal_null = PETSC_FALSE;
2256:         break;
2257:       }
2258:     }
2260:   }

2262:   /* Renumber and concatenate local leaves */
2263:   ilocal_new = NULL;
2264:   if (!all_ilocal_null) {
2265:     PetscMalloc1(nLeaves, &ilocal_new);
2266:     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2267:     for (s = 0; s < nsfs; s++) {
2268:       const PetscInt *ilocal;
2269:       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2270:       PetscInt        i, nleaves_l;

2272:       PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2273:       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2274:     }
2275:   }

2277:   /* Renumber and concatenate remote roots */
2278:   PetscMalloc1(nLeaves, &iremote_new);
2279:   for (i = 0; i < nLeaves; i++) {
2280:     iremote_new[i].rank  = -1;
2281:     iremote_new[i].index = -1;
2282:   }
2283:   for (s = 0; s < nsfs; s++) {
2284:     PetscInt           i, nl, nr;
2285:     PetscSF            tmp_sf;
2286:     const PetscSFNode *iremote;
2287:     PetscSFNode       *tmp_rootdata;
2288:     PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2290:     PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2291:     PetscSFCreate(comm, &tmp_sf);
2292:     /* create helper SF with contiguous leaves */
2293:     PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
2294:     PetscSFSetUp(tmp_sf);
2295:     PetscMalloc1(nr, &tmp_rootdata);
2296:     for (i = 0; i < nr; i++) {
2297:       tmp_rootdata[i].index = i + rootOffsets[s];
2298:       tmp_rootdata[i].rank  = (PetscInt)rank;
2299:     }
2300:     PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2301:     PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2302:     PetscSFDestroy(&tmp_sf);
2303:     PetscFree(tmp_rootdata);
2304:   }

2306:   /* Build the new SF */
2307:   PetscSFCreate(comm, newsf);
2308:   PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2309:   PetscSFSetUp(*newsf);
2310:   PetscFree(rootOffsets);
2311:   PetscFree(leafArrayOffsets);
2312:   return 0;
2313: }