Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petscctable.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: void PetscSFCheckGraphSet(PetscSF, int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
19: #else
20: #define PetscSFCheckGraphSet(sf, arg) \
21: do { \
22: } while (0)
23: #endif
24: #endif
26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28: /*@
29: PetscSFCreate - create a star forest communication context
31: Collective
33: Input Parameter:
34: . comm - communicator on which the star forest will operate
36: Output Parameter:
37: . sf - new star forest context
39: Options Database Keys:
40: . -sf_type type - value of type may be
41: .vb
42: basic -Use MPI persistent Isend/Irecv for communication (Default)
43: window -Use MPI-3 one-sided window for communication
44: neighbor -Use MPI-3 neighborhood collectives for communication
45: .ve
47: Level: intermediate
49: Note:
50: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52: `SF`s are optimized and they have better performance than general `SF`s.
54: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55: @*/
56: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57: {
58: PetscSF b;
61: PetscSFInitializePackage();
63: PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView);
65: b->nroots = -1;
66: b->nleaves = -1;
67: b->minleaf = PETSC_MAX_INT;
68: b->maxleaf = PETSC_MIN_INT;
69: b->nranks = -1;
70: b->rankorder = PETSC_TRUE;
71: b->ingroup = MPI_GROUP_NULL;
72: b->outgroup = MPI_GROUP_NULL;
73: b->graphset = PETSC_FALSE;
74: #if defined(PETSC_HAVE_DEVICE)
75: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
76: b->use_stream_aware_mpi = PETSC_FALSE;
77: b->unknown_input_stream = PETSC_FALSE;
78: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79: b->backend = PETSCSF_BACKEND_KOKKOS;
80: #elif defined(PETSC_HAVE_CUDA)
81: b->backend = PETSCSF_BACKEND_CUDA;
82: #elif defined(PETSC_HAVE_HIP)
83: b->backend = PETSCSF_BACKEND_HIP;
84: #endif
86: #if defined(PETSC_HAVE_NVSHMEM)
87: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
88: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89: PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL);
90: PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL);
91: #endif
92: #endif
93: b->vscat.from_n = -1;
94: b->vscat.to_n = -1;
95: b->vscat.unit = MPIU_SCALAR;
96: *sf = b;
97: return 0;
98: }
100: /*@
101: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
103: Collective
105: Input Parameter:
106: . sf - star forest
108: Level: advanced
110: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111: @*/
112: PetscErrorCode PetscSFReset(PetscSF sf)
113: {
115: PetscTryTypeMethod(sf, Reset);
116: sf->nroots = -1;
117: sf->nleaves = -1;
118: sf->minleaf = PETSC_MAX_INT;
119: sf->maxleaf = PETSC_MIN_INT;
120: sf->mine = NULL;
121: sf->remote = NULL;
122: sf->graphset = PETSC_FALSE;
123: PetscFree(sf->mine_alloc);
124: PetscFree(sf->remote_alloc);
125: sf->nranks = -1;
126: PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote);
127: sf->degreeknown = PETSC_FALSE;
128: PetscFree(sf->degree);
129: if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
130: if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
131: if (sf->multi) sf->multi->multi = NULL;
132: PetscSFDestroy(&sf->multi);
133: PetscLayoutDestroy(&sf->map);
135: #if defined(PETSC_HAVE_DEVICE)
136: for (PetscInt i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]);
137: #endif
139: sf->setupcalled = PETSC_FALSE;
140: return 0;
141: }
143: /*@C
144: PetscSFSetType - Set the `PetscSF` communication implementation
146: Collective
148: Input Parameters:
149: + sf - the `PetscSF` context
150: - type - a known method
151: .vb
152: PETSCSFWINDOW - MPI-2/3 one-sided
153: PETSCSFBASIC - basic implementation using MPI-1 two-sided
154: .ve
156: Options Database Key:
157: . -sf_type <type> - Sets the method; for example basic or window use -help for a list of available methods (for instance, window, basic, neighbor)
159: Level: intermediate
161: Notes:
162: See "include/petscsf.h" for available methods (for instance)
164: .seealso: `PetscSFType`, `PetscSFCreate()`
165: @*/
166: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
167: {
168: PetscBool match;
169: PetscErrorCode (*r)(PetscSF);
174: PetscObjectTypeCompare((PetscObject)sf, type, &match);
175: if (match) return 0;
177: PetscFunctionListFind(PetscSFList, type, &r);
179: /* Destroy the previous PetscSF implementation context */
180: PetscTryTypeMethod(sf, Destroy);
181: PetscMemzero(sf->ops, sizeof(*sf->ops));
182: PetscObjectChangeTypeName((PetscObject)sf, type);
183: (*r)(sf);
184: return 0;
185: }
187: /*@C
188: PetscSFGetType - Get the `PetscSF` communication implementation
190: Not Collective
192: Input Parameter:
193: . sf - the `PetscSF` context
195: Output Parameter:
196: . type - the `PetscSF` type name
198: Level: intermediate
200: .seealso: `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
201: @*/
202: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203: {
206: *type = ((PetscObject)sf)->type_name;
207: return 0;
208: }
210: /*@C
211: PetscSFDestroy - destroy star forest
213: Collective
215: Input Parameter:
216: . sf - address of star forest
218: Level: intermediate
220: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
221: @*/
222: PetscErrorCode PetscSFDestroy(PetscSF *sf)
223: {
224: if (!*sf) return 0;
226: if (--((PetscObject)(*sf))->refct > 0) {
227: *sf = NULL;
228: return 0;
229: }
230: PetscSFReset(*sf);
231: PetscTryTypeMethod((*sf), Destroy);
232: PetscSFDestroy(&(*sf)->vscat.lsf);
233: if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
234: PetscHeaderDestroy(sf);
235: return 0;
236: }
238: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
239: {
240: PetscInt i, nleaves;
241: PetscMPIInt size;
242: const PetscInt *ilocal;
243: const PetscSFNode *iremote;
245: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
246: PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote);
247: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
248: for (i = 0; i < nleaves; i++) {
249: const PetscInt rank = iremote[i].rank;
250: const PetscInt remote = iremote[i].index;
251: const PetscInt leaf = ilocal ? ilocal[i] : i;
255: }
256: return 0;
257: }
259: /*@
260: PetscSFSetUp - set up communication structures
262: Collective
264: Input Parameter:
265: . sf - star forest communication object
267: Level: beginner
269: .seealso: `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
270: @*/
271: PetscErrorCode PetscSFSetUp(PetscSF sf)
272: {
274: PetscSFCheckGraphSet(sf, 1);
275: if (sf->setupcalled) return 0;
276: PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0);
277: PetscSFCheckGraphValid_Private(sf);
278: if (!((PetscObject)sf)->type_name) PetscSFSetType(sf, PETSCSFBASIC); /* Zero all sf->ops */
279: PetscTryTypeMethod(sf, SetUp);
280: #if defined(PETSC_HAVE_CUDA)
281: if (sf->backend == PETSCSF_BACKEND_CUDA) {
282: sf->ops->Malloc = PetscSFMalloc_CUDA;
283: sf->ops->Free = PetscSFFree_CUDA;
284: }
285: #endif
286: #if defined(PETSC_HAVE_HIP)
287: if (sf->backend == PETSCSF_BACKEND_HIP) {
288: sf->ops->Malloc = PetscSFMalloc_HIP;
289: sf->ops->Free = PetscSFFree_HIP;
290: }
291: #endif
293: #
294: #if defined(PETSC_HAVE_KOKKOS)
295: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
296: sf->ops->Malloc = PetscSFMalloc_Kokkos;
297: sf->ops->Free = PetscSFFree_Kokkos;
298: }
299: #endif
300: PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0);
301: sf->setupcalled = PETSC_TRUE;
302: return 0;
303: }
305: /*@
306: PetscSFSetFromOptions - set `PetscSF` options using the options database
308: Logically Collective
310: Input Parameter:
311: . sf - star forest
313: Options Database Keys:
314: + -sf_type - implementation type, see PetscSFSetType()
315: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
316: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
317: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
318: If true, this option only works with -use_gpu_aware_mpi 1.
319: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
320: If true, this option only works with -use_gpu_aware_mpi 1.
322: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
323: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
324: the only available is kokkos.
326: Level: intermediate
328: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
329: @*/
330: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
331: {
332: PetscSFType deft;
333: char type[256];
334: PetscBool flg;
337: PetscObjectOptionsBegin((PetscObject)sf);
338: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
339: PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg);
340: PetscSFSetType(sf, flg ? type : deft);
341: PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL);
342: #if defined(PETSC_HAVE_DEVICE)
343: {
344: char backendstr[32] = {0};
345: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
346: /* Change the defaults set in PetscSFCreate() with command line options */
347: PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL);
348: PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL);
349: PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set);
350: PetscStrcasecmp("cuda", backendstr, &isCuda);
351: PetscStrcasecmp("kokkos", backendstr, &isKokkos);
352: PetscStrcasecmp("hip", backendstr, &isHip);
353: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
354: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
355: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
356: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
358: #elif defined(PETSC_HAVE_KOKKOS)
360: #endif
361: }
362: #endif
363: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
364: PetscOptionsEnd();
365: return 0;
366: }
368: /*@
369: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
371: Logically Collective
373: Input Parameters:
374: + sf - star forest
375: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
377: Level: advanced
379: .seealso: `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
380: @*/
381: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
382: {
386: sf->rankorder = flg;
387: return 0;
388: }
390: /*@C
391: PetscSFSetGraph - Set a parallel star forest
393: Collective
395: Input Parameters:
396: + sf - star forest
397: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
398: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
399: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
400: during setup in debug mode)
401: . localmode - copy mode for ilocal
402: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
403: during setup in debug mode)
404: - remotemode - copy mode for iremote
406: Level: intermediate
408: Notes:
409: Leaf indices in ilocal must be unique, otherwise an error occurs.
411: Input arrays ilocal and iremote follow the PetscCopyMode semantics.
412: In particular, if localmode/remotemode is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
413: PETSc might modify the respective array;
414: if `PETSC_USE_POINTER`, the user must delete the array after PetscSFDestroy().
415: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
417: Fortran Note:
418: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
420: Developer Note:
421: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
422: This also allows to compare leaf sets of two SFs easily.
424: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
425: @*/
426: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
427: {
428: PetscBool unique, contiguous;
435: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
436: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
440: if (sf->nroots >= 0) { /* Reset only if graph already set */
441: PetscSFReset(sf);
442: }
444: PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0);
446: sf->nroots = nroots;
447: sf->nleaves = nleaves;
449: if (localmode == PETSC_COPY_VALUES && ilocal) {
450: PetscInt *tlocal = NULL;
452: PetscMalloc1(nleaves, &tlocal);
453: PetscArraycpy(tlocal, ilocal, nleaves);
454: ilocal = tlocal;
455: }
456: if (remotemode == PETSC_COPY_VALUES) {
457: PetscSFNode *tremote = NULL;
459: PetscMalloc1(nleaves, &tremote);
460: PetscArraycpy(tremote, iremote, nleaves);
461: iremote = tremote;
462: }
464: if (nleaves && ilocal) {
465: PetscSFNode work;
467: PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
468: PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
469: unique = PetscNot(unique);
471: sf->minleaf = ilocal[0];
472: sf->maxleaf = ilocal[nleaves - 1];
473: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
474: } else {
475: sf->minleaf = 0;
476: sf->maxleaf = nleaves - 1;
477: unique = PETSC_TRUE;
478: contiguous = PETSC_TRUE;
479: }
481: if (contiguous) {
482: if (localmode == PETSC_USE_POINTER) {
483: ilocal = NULL;
484: } else {
485: PetscFree(ilocal);
486: }
487: }
488: sf->mine = ilocal;
489: if (localmode == PETSC_USE_POINTER) {
490: sf->mine_alloc = NULL;
491: } else {
492: sf->mine_alloc = ilocal;
493: }
494: sf->remote = iremote;
495: if (remotemode == PETSC_USE_POINTER) {
496: sf->remote_alloc = NULL;
497: } else {
498: sf->remote_alloc = iremote;
499: }
500: PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0);
501: sf->graphset = PETSC_TRUE;
502: return 0;
503: }
505: /*@
506: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
508: Collective
510: Input Parameters:
511: + sf - The `PetscSF`
512: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
513: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
515: Level: intermediate
517: Notes:
518: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector x and its layout is map.
519: n and N are local and global sizes of x respectively.
521: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to
522: sequential vectors y on all ranks.
524: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does Bcast on it, it will copy x to a
525: sequential vector y on rank 0.
527: In above cases, entries of x are roots and entries of y are leaves.
529: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of sf's communicator. The routine
530: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
531: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
532: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
533: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
535: In this case, roots and leaves are symmetric.
537: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
538: @*/
539: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
540: {
541: MPI_Comm comm;
542: PetscInt n, N, res[2];
543: PetscMPIInt rank, size;
544: PetscSFType type;
548: PetscObjectGetComm((PetscObject)sf, &comm);
550: MPI_Comm_rank(comm, &rank);
551: MPI_Comm_size(comm, &size);
553: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
554: type = PETSCSFALLTOALL;
555: PetscLayoutCreate(comm, &sf->map);
556: PetscLayoutSetLocalSize(sf->map, size);
557: PetscLayoutSetSize(sf->map, ((PetscInt)size) * size);
558: PetscLayoutSetUp(sf->map);
559: } else {
560: PetscLayoutGetLocalSize(map, &n);
561: PetscLayoutGetSize(map, &N);
562: res[0] = n;
563: res[1] = -n;
564: /* Check if n are same over all ranks so that we can optimize it */
565: MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm);
566: if (res[0] == -res[1]) { /* same n */
567: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
568: } else {
569: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
570: }
571: PetscLayoutReference(map, &sf->map);
572: }
573: PetscSFSetType(sf, type);
575: sf->pattern = pattern;
576: sf->mine = NULL; /* Contiguous */
578: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
579: Also set other easy stuff.
580: */
581: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
582: sf->nleaves = N;
583: sf->nroots = n;
584: sf->nranks = size;
585: sf->minleaf = 0;
586: sf->maxleaf = N - 1;
587: } else if (pattern == PETSCSF_PATTERN_GATHER) {
588: sf->nleaves = rank ? 0 : N;
589: sf->nroots = n;
590: sf->nranks = rank ? 0 : size;
591: sf->minleaf = 0;
592: sf->maxleaf = rank ? -1 : N - 1;
593: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594: sf->nleaves = size;
595: sf->nroots = size;
596: sf->nranks = size;
597: sf->minleaf = 0;
598: sf->maxleaf = size - 1;
599: }
600: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
601: sf->graphset = PETSC_TRUE;
602: return 0;
603: }
605: /*@
606: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
608: Collective
610: Input Parameter:
611: . sf - star forest to invert
613: Output Parameter:
614: . isf - inverse of sf
616: Level: advanced
618: Notes:
619: All roots must have degree 1.
621: The local space may be a permutation, but cannot be sparse.
623: .seealso: `PetscSFType`, `PetscSFSetGraph()`
624: @*/
625: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
626: {
627: PetscMPIInt rank;
628: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
629: const PetscInt *ilocal;
630: PetscSFNode *roots, *leaves;
633: PetscSFCheckGraphSet(sf, 1);
636: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL);
637: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
639: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
640: PetscMalloc2(nroots, &roots, maxlocal, &leaves);
641: for (i = 0; i < maxlocal; i++) {
642: leaves[i].rank = rank;
643: leaves[i].index = i;
644: }
645: for (i = 0; i < nroots; i++) {
646: roots[i].rank = -1;
647: roots[i].index = -1;
648: }
649: PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
650: PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE);
652: /* Check whether our leaves are sparse */
653: for (i = 0, count = 0; i < nroots; i++)
654: if (roots[i].rank >= 0) count++;
655: if (count == nroots) newilocal = NULL;
656: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscMalloc1(count, &newilocal);
657: for (i = 0, count = 0; i < nroots; i++) {
658: if (roots[i].rank >= 0) {
659: newilocal[count] = i;
660: roots[count].rank = roots[i].rank;
661: roots[count].index = roots[i].index;
662: count++;
663: }
664: }
665: }
667: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf);
668: PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES);
669: PetscFree2(roots, leaves);
670: return 0;
671: }
673: /*@
674: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
676: Collective
678: Input Parameters:
679: + sf - communication object to duplicate
680: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
682: Output Parameter:
683: . newsf - new communication object
685: Level: beginner
687: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
688: @*/
689: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
690: {
691: PetscSFType type;
692: MPI_Datatype dtype = MPIU_SCALAR;
697: PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf);
698: PetscSFGetType(sf, &type);
699: if (type) PetscSFSetType(*newsf, type);
700: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
701: if (opt == PETSCSF_DUPLICATE_GRAPH) {
702: PetscSFCheckGraphSet(sf, 1);
703: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
704: PetscInt nroots, nleaves;
705: const PetscInt *ilocal;
706: const PetscSFNode *iremote;
707: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
708: PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
709: } else {
710: PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern);
711: }
712: }
713: /* Since oldtype is committed, so is newtype, according to MPI */
714: if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit, &dtype);
715: (*newsf)->vscat.bs = sf->vscat.bs;
716: (*newsf)->vscat.unit = dtype;
717: (*newsf)->vscat.to_n = sf->vscat.to_n;
718: (*newsf)->vscat.from_n = sf->vscat.from_n;
719: /* Do not copy lsf. Build it on demand since it is rarely used */
721: #if defined(PETSC_HAVE_DEVICE)
722: (*newsf)->backend = sf->backend;
723: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
724: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
725: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
726: #endif
727: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
728: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
729: return 0;
730: }
732: /*@C
733: PetscSFGetGraph - Get the graph specifying a parallel star forest
735: Not Collective
737: Input Parameter:
738: . sf - star forest
740: Output Parameters:
741: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
742: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
743: . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
744: - iremote - remote locations of root vertices for each leaf on the current process
746: Level: intermediate
748: Notes:
749: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
751: The returned ilocal/iremote might contain values in different order than the input ones in `PetscSFSetGraph()`,
752: see its manpage for details.
754: Fortran Notes:
755: The returned iremote array is a copy and must be deallocated after use. Consequently, if you
756: want to update the graph, you must call `PetscSFSetGraph()` after modifying the iremote array.
758: To check for a NULL ilocal use
759: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
761: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
762: @*/
763: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
764: {
766: if (sf->ops->GetGraph) {
767: (sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote);
768: } else {
769: if (nroots) *nroots = sf->nroots;
770: if (nleaves) *nleaves = sf->nleaves;
771: if (ilocal) *ilocal = sf->mine;
772: if (iremote) *iremote = sf->remote;
773: }
774: return 0;
775: }
777: /*@
778: PetscSFGetLeafRange - Get the active leaf ranges
780: Not Collective
782: Input Parameter:
783: . sf - star forest
785: Output Parameters:
786: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
787: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
789: Level: developer
791: .seealso: `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
792: @*/
793: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
794: {
796: PetscSFCheckGraphSet(sf, 1);
797: if (minleaf) *minleaf = sf->minleaf;
798: if (maxleaf) *maxleaf = sf->maxleaf;
799: return 0;
800: }
802: /*@C
803: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
805: Collective on A
807: Input Parameters:
808: + A - the star forest
809: . obj - Optional object that provides the prefix for the option names
810: - name - command line option
812: Level: intermediate
814: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
815: @*/
816: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
817: {
819: PetscObjectViewFromOptions((PetscObject)A, obj, name);
820: return 0;
821: }
823: /*@C
824: PetscSFView - view a star forest
826: Collective
828: Input Parameters:
829: + sf - star forest
830: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
832: Level: beginner
834: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
835: @*/
836: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
837: {
838: PetscBool iascii;
839: PetscViewerFormat format;
842: if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer);
845: if (sf->graphset) PetscSFSetUp(sf);
846: PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii);
847: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
848: PetscMPIInt rank;
849: PetscInt ii, i, j;
851: PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer);
852: PetscViewerASCIIPushTab(viewer);
853: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
854: if (!sf->graphset) {
855: PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n");
856: PetscViewerASCIIPopTab(viewer);
857: return 0;
858: }
859: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
860: PetscViewerASCIIPushSynchronized(viewer);
861: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks);
862: for (i = 0; i < sf->nleaves; i++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index);
863: PetscViewerFlush(viewer);
864: PetscViewerGetFormat(viewer, &format);
865: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
866: PetscMPIInt *tmpranks, *perm;
867: PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm);
868: PetscArraycpy(tmpranks, sf->ranks, sf->nranks);
869: for (i = 0; i < sf->nranks; i++) perm[i] = i;
870: PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm);
871: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank);
872: for (ii = 0; ii < sf->nranks; ii++) {
873: i = perm[ii];
874: PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]);
875: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]);
876: }
877: PetscFree2(tmpranks, perm);
878: }
879: PetscViewerFlush(viewer);
880: PetscViewerASCIIPopSynchronized(viewer);
881: }
882: PetscViewerASCIIPopTab(viewer);
883: }
884: PetscTryTypeMethod(sf, View, viewer);
885: return 0;
886: }
888: /*@C
889: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
891: Not Collective
893: Input Parameter:
894: . sf - star forest
896: Output Parameters:
897: + nranks - number of ranks referenced by local part
898: . ranks - array of ranks
899: . roffset - offset in rmine/rremote for each rank (length nranks+1)
900: . rmine - concatenated array holding local indices referencing each remote rank
901: - rremote - concatenated array holding remote indices referenced for each remote rank
903: Level: developer
905: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
906: @*/
907: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
908: {
911: if (sf->ops->GetRootRanks) {
912: (sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote);
913: } else {
914: /* The generic implementation */
915: if (nranks) *nranks = sf->nranks;
916: if (ranks) *ranks = sf->ranks;
917: if (roffset) *roffset = sf->roffset;
918: if (rmine) *rmine = sf->rmine;
919: if (rremote) *rremote = sf->rremote;
920: }
921: return 0;
922: }
924: /*@C
925: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
927: Not Collective
929: Input Parameter:
930: . sf - star forest
932: Output Parameters:
933: + niranks - number of leaf ranks referencing roots on this process
934: . iranks - array of ranks
935: . ioffset - offset in irootloc for each rank (length niranks+1)
936: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
938: Level: developer
940: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
941: @*/
942: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
943: {
946: if (sf->ops->GetLeafRanks) {
947: (sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc);
948: } else {
949: PetscSFType type;
950: PetscSFGetType(sf, &type);
951: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
952: }
953: return 0;
954: }
956: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
957: {
958: PetscInt i;
959: for (i = 0; i < n; i++) {
960: if (needle == list[i]) return PETSC_TRUE;
961: }
962: return PETSC_FALSE;
963: }
965: /*@C
966: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
968: Collective
970: Input Parameters:
971: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
972: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
974: Level: developer
976: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
977: @*/
978: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
979: {
980: PetscTable table;
981: PetscTablePosition pos;
982: PetscMPIInt size, groupsize, *groupranks;
983: PetscInt *rcount, *ranks;
984: PetscInt i, irank = -1, orank = -1;
987: PetscSFCheckGraphSet(sf, 1);
988: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
989: PetscTableCreate(10, size, &table);
990: for (i = 0; i < sf->nleaves; i++) {
991: /* Log 1-based rank */
992: PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES);
993: }
994: PetscTableGetCount(table, &sf->nranks);
995: PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote);
996: PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks);
997: PetscTableGetHeadPosition(table, &pos);
998: for (i = 0; i < sf->nranks; i++) {
999: PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]);
1000: ranks[i]--; /* Convert back to 0-based */
1001: }
1002: PetscTableDestroy(&table);
1004: /* We expect that dgroup is reliably "small" while nranks could be large */
1005: {
1006: MPI_Group group = MPI_GROUP_NULL;
1007: PetscMPIInt *dgroupranks;
1008: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1009: MPI_Group_size(dgroup, &groupsize);
1010: PetscMalloc1(groupsize, &dgroupranks);
1011: PetscMalloc1(groupsize, &groupranks);
1012: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1013: if (groupsize) MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks);
1014: MPI_Group_free(&group);
1015: PetscFree(dgroupranks);
1016: }
1018: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1019: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1020: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1021: if (InList(ranks[i], groupsize, groupranks)) break;
1022: }
1023: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1024: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1025: }
1026: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1027: PetscInt tmprank, tmpcount;
1029: tmprank = ranks[i];
1030: tmpcount = rcount[i];
1031: ranks[i] = ranks[sf->ndranks];
1032: rcount[i] = rcount[sf->ndranks];
1033: ranks[sf->ndranks] = tmprank;
1034: rcount[sf->ndranks] = tmpcount;
1035: sf->ndranks++;
1036: }
1037: }
1038: PetscFree(groupranks);
1039: PetscSortIntWithArray(sf->ndranks, ranks, rcount);
1040: PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks);
1041: sf->roffset[0] = 0;
1042: for (i = 0; i < sf->nranks; i++) {
1043: PetscMPIIntCast(ranks[i], sf->ranks + i);
1044: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1045: rcount[i] = 0;
1046: }
1047: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1048: /* short circuit */
1049: if (orank != sf->remote[i].rank) {
1050: /* Search for index of iremote[i].rank in sf->ranks */
1051: PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank);
1052: if (irank < 0) {
1053: PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank);
1054: if (irank >= 0) irank += sf->ndranks;
1055: }
1056: orank = sf->remote[i].rank;
1057: }
1059: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1060: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1061: rcount[irank]++;
1062: }
1063: PetscFree2(rcount, ranks);
1064: return 0;
1065: }
1067: /*@C
1068: PetscSFGetGroups - gets incoming and outgoing process groups
1070: Collective
1072: Input Parameter:
1073: . sf - star forest
1075: Output Parameters:
1076: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1077: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1079: Level: developer
1081: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1082: @*/
1083: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1084: {
1085: MPI_Group group = MPI_GROUP_NULL;
1088: if (sf->ingroup == MPI_GROUP_NULL) {
1089: PetscInt i;
1090: const PetscInt *indegree;
1091: PetscMPIInt rank, *outranks, *inranks;
1092: PetscSFNode *remote;
1093: PetscSF bgcount;
1095: /* Compute the number of incoming ranks */
1096: PetscMalloc1(sf->nranks, &remote);
1097: for (i = 0; i < sf->nranks; i++) {
1098: remote[i].rank = sf->ranks[i];
1099: remote[i].index = 0;
1100: }
1101: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount);
1102: PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1103: PetscSFComputeDegreeBegin(bgcount, &indegree);
1104: PetscSFComputeDegreeEnd(bgcount, &indegree);
1105: /* Enumerate the incoming ranks */
1106: PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks);
1107: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1108: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1109: PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks);
1110: PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks);
1111: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1112: MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup);
1113: MPI_Group_free(&group);
1114: PetscFree2(inranks, outranks);
1115: PetscSFDestroy(&bgcount);
1116: }
1117: *incoming = sf->ingroup;
1119: if (sf->outgroup == MPI_GROUP_NULL) {
1120: MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group);
1121: MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup);
1122: MPI_Group_free(&group);
1123: }
1124: *outgoing = sf->outgroup;
1125: return 0;
1126: }
1128: /*@
1129: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1131: Collective
1133: Input Parameter:
1134: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1136: Output Parameter:
1137: . multi - star forest with split roots, such that each root has degree exactly 1
1139: Level: developer
1141: Note:
1142: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1143: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1144: edge, it is a candidate for future optimization that might involve its removal.
1146: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1147: @*/
1148: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1149: {
1152: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1153: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1154: *multi = sf->multi;
1155: sf->multi->multi = sf->multi;
1156: return 0;
1157: }
1158: if (!sf->multi) {
1159: const PetscInt *indegree;
1160: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1161: PetscSFNode *remote;
1162: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1163: PetscSFComputeDegreeBegin(sf, &indegree);
1164: PetscSFComputeDegreeEnd(sf, &indegree);
1165: PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset);
1166: inoffset[0] = 0;
1167: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1168: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1169: PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1170: PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM);
1171: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1172: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1174: }
1175: PetscMalloc1(sf->nleaves, &remote);
1176: for (i = 0; i < sf->nleaves; i++) {
1177: remote[i].rank = sf->remote[i].rank;
1178: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1179: }
1180: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi);
1181: sf->multi->multi = sf->multi;
1182: PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER);
1183: if (sf->rankorder) { /* Sort the ranks */
1184: PetscMPIInt rank;
1185: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1186: PetscSFNode *newremote;
1187: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
1188: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1189: PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset);
1190: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1191: PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1192: PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE);
1193: /* Sort the incoming ranks at each vertex, build the inverse map */
1194: for (i = 0; i < sf->nroots; i++) {
1195: PetscInt j;
1196: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1197: PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset);
1198: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1199: }
1200: PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1201: PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE);
1202: PetscMalloc1(sf->nleaves, &newremote);
1203: for (i = 0; i < sf->nleaves; i++) {
1204: newremote[i].rank = sf->remote[i].rank;
1205: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1206: }
1207: PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER);
1208: PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset);
1209: }
1210: PetscFree3(inoffset, outones, outoffset);
1211: }
1212: *multi = sf->multi;
1213: return 0;
1214: }
1216: /*@C
1217: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1219: Collective
1221: Input Parameters:
1222: + sf - original star forest
1223: . nselected - number of selected roots on this process
1224: - selected - indices of the selected roots on this process
1226: Output Parameter:
1227: . esf - new star forest
1229: Level: advanced
1231: Note:
1232: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1233: be done by calling PetscSFGetGraph().
1235: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1236: @*/
1237: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1238: {
1239: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1240: const PetscInt *ilocal;
1241: signed char *rootdata, *leafdata, *leafmem;
1242: const PetscSFNode *iremote;
1243: PetscSFNode *new_iremote;
1244: MPI_Comm comm;
1247: PetscSFCheckGraphSet(sf, 1);
1251: PetscSFSetUp(sf);
1252: PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0);
1253: PetscObjectGetComm((PetscObject)sf, &comm);
1254: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
1256: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1257: PetscBool dups;
1261: }
1263: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1264: else {
1265: /* A generic version of creating embedded sf */
1266: PetscSFGetLeafRange(sf, &minleaf, &maxleaf);
1267: maxlocal = maxleaf - minleaf + 1;
1268: PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem);
1269: leafdata = leafmem - minleaf;
1270: /* Tag selected roots and bcast to leaves */
1271: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1272: PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1273: PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE);
1275: /* Build esf with leaves that are still connected */
1276: esf_nleaves = 0;
1277: for (i = 0; i < nleaves; i++) {
1278: j = ilocal ? ilocal[i] : i;
1279: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1280: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1281: */
1282: esf_nleaves += (leafdata[j] ? 1 : 0);
1283: }
1284: PetscMalloc1(esf_nleaves, &new_ilocal);
1285: PetscMalloc1(esf_nleaves, &new_iremote);
1286: for (i = n = 0; i < nleaves; i++) {
1287: j = ilocal ? ilocal[i] : i;
1288: if (leafdata[j]) {
1289: new_ilocal[n] = j;
1290: new_iremote[n].rank = iremote[i].rank;
1291: new_iremote[n].index = iremote[i].index;
1292: ++n;
1293: }
1294: }
1295: PetscSFCreate(comm, esf);
1296: PetscSFSetFromOptions(*esf);
1297: PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1298: PetscFree2(rootdata, leafmem);
1299: }
1300: PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0);
1301: return 0;
1302: }
1304: /*@C
1305: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1307: Collective
1309: Input Parameters:
1310: + sf - original star forest
1311: . nselected - number of selected leaves on this process
1312: - selected - indices of the selected leaves on this process
1314: Output Parameter:
1315: . newsf - new star forest
1317: Level: advanced
1319: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1320: @*/
1321: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1322: {
1323: const PetscSFNode *iremote;
1324: PetscSFNode *new_iremote;
1325: const PetscInt *ilocal;
1326: PetscInt i, nroots, *leaves, *new_ilocal;
1327: MPI_Comm comm;
1330: PetscSFCheckGraphSet(sf, 1);
1334: /* Uniq selected[] and put results in leaves[] */
1335: PetscObjectGetComm((PetscObject)sf, &comm);
1336: PetscMalloc1(nselected, &leaves);
1337: PetscArraycpy(leaves, selected, nselected);
1338: PetscSortedRemoveDupsInt(&nselected, leaves);
1341: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1342: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1343: else {
1344: PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote);
1345: PetscMalloc1(nselected, &new_ilocal);
1346: PetscMalloc1(nselected, &new_iremote);
1347: for (i = 0; i < nselected; ++i) {
1348: const PetscInt l = leaves[i];
1349: new_ilocal[i] = ilocal ? ilocal[l] : l;
1350: new_iremote[i].rank = iremote[l].rank;
1351: new_iremote[i].index = iremote[l].index;
1352: }
1353: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf);
1354: PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER);
1355: }
1356: PetscFree(leaves);
1357: return 0;
1358: }
1360: /*@C
1361: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1363: Collective
1365: Input Parameters:
1366: + sf - star forest on which to communicate
1367: . unit - data type associated with each node
1368: . rootdata - buffer to broadcast
1369: - op - operation to use for reduction
1371: Output Parameter:
1372: . leafdata - buffer to be reduced with values from each leaf's respective root
1374: Level: intermediate
1376: Notes:
1377: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1378: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1379: use `PetscSFBcastWithMemTypeBegin()` instead.
1381: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1382: @*/
1383: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1384: {
1385: PetscMemType rootmtype, leafmtype;
1388: PetscSFSetUp(sf);
1389: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1390: PetscGetMemType(rootdata, &rootmtype);
1391: PetscGetMemType(leafdata, &leafmtype);
1392: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1393: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1394: return 0;
1395: }
1397: /*@C
1398: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to `PetscSFBcastEnd()`
1400: Collective
1402: Input Parameters:
1403: + sf - star forest on which to communicate
1404: . unit - data type associated with each node
1405: . rootmtype - memory type of rootdata
1406: . rootdata - buffer to broadcast
1407: . leafmtype - memory type of leafdata
1408: - op - operation to use for reduction
1410: Output Parameter:
1411: . leafdata - buffer to be reduced with values from each leaf's respective root
1413: Level: intermediate
1415: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1416: @*/
1417: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1418: {
1420: PetscSFSetUp(sf);
1421: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
1422: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1423: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
1424: return 0;
1425: }
1427: /*@C
1428: PetscSFBcastEnd - end a broadcast & reduce operation started with `PetscSFBcastBegin()`
1430: Collective
1432: Input Parameters:
1433: + sf - star forest
1434: . unit - data type
1435: . rootdata - buffer to broadcast
1436: - op - operation to use for reduction
1438: Output Parameter:
1439: . leafdata - buffer to be reduced with values from each leaf's respective root
1441: Level: intermediate
1443: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1444: @*/
1445: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1446: {
1448: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0);
1449: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1450: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0);
1451: return 0;
1452: }
1454: /*@C
1455: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1457: Collective
1459: Input Parameters:
1460: + sf - star forest
1461: . unit - data type
1462: . leafdata - values to reduce
1463: - op - reduction operation
1465: Output Parameter:
1466: . rootdata - result of reduction of values from all leaves of each root
1468: Level: intermediate
1470: Notes:
1471: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1472: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1473: use `PetscSFReduceWithMemTypeBegin()` instead.
1475: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1476: @*/
1477: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1478: {
1479: PetscMemType rootmtype, leafmtype;
1482: PetscSFSetUp(sf);
1483: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1484: PetscGetMemType(rootdata, &rootmtype);
1485: PetscGetMemType(leafdata, &leafmtype);
1486: (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1487: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1488: return 0;
1489: }
1491: /*@C
1492: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1494: Collective
1496: Input Parameters:
1497: + sf - star forest
1498: . unit - data type
1499: . leafmtype - memory type of leafdata
1500: . leafdata - values to reduce
1501: . rootmtype - memory type of rootdata
1502: - op - reduction operation
1504: Output Parameter:
1505: . rootdata - result of reduction of values from all leaves of each root
1507: Level: intermediate
1509: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1510: @*/
1511: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1512: {
1514: PetscSFSetUp(sf);
1515: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1516: (sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op);
1517: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0);
1518: return 0;
1519: }
1521: /*@C
1522: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()`
1524: Collective
1526: Input Parameters:
1527: + sf - star forest
1528: . unit - data type
1529: . leafdata - values to reduce
1530: - op - reduction operation
1532: Output Parameter:
1533: . rootdata - result of reduction of values from all leaves of each root
1535: Level: intermediate
1537: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1538: @*/
1539: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1540: {
1542: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1543: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1544: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0);
1545: return 0;
1546: }
1548: /*@C
1549: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1550: to be completed with `PetscSFFetchAndOpEnd()`
1552: Collective
1554: Input Parameters:
1555: + sf - star forest
1556: . unit - data type
1557: . leafdata - leaf values to use in reduction
1558: - op - operation to use for reduction
1560: Output Parameters:
1561: + rootdata - root values to be updated, input state is seen by first process to perform an update
1562: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1564: Level: advanced
1566: Note:
1567: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1568: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1569: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1570: integers.
1572: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1573: @*/
1574: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1575: {
1576: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1579: PetscSFSetUp(sf);
1580: PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1581: PetscGetMemType(rootdata, &rootmtype);
1582: PetscGetMemType(leafdata, &leafmtype);
1583: PetscGetMemType(leafupdate, &leafupdatemtype);
1585: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1586: PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1587: return 0;
1588: }
1590: /*@C
1591: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1592: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1594: Collective
1596: Input Parameters:
1597: + sf - star forest
1598: . unit - data type
1599: . rootmtype - memory type of rootdata
1600: . leafmtype - memory type of leafdata
1601: . leafdata - leaf values to use in reduction
1602: . leafupdatemtype - memory type of leafupdate
1603: - op - operation to use for reduction
1605: Output Parameters:
1606: + rootdata - root values to be updated, input state is seen by first process to perform an update
1607: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1609: Level: advanced
1611: Note:
1612: See `PetscSFFetchAndOpBegin()` for more details.
1614: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1615: @*/
1616: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1617: {
1619: PetscSFSetUp(sf);
1620: PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1622: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1623: PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0);
1624: return 0;
1625: }
1627: /*@C
1628: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1630: Collective
1632: Input Parameters:
1633: + sf - star forest
1634: . unit - data type
1635: . leafdata - leaf values to use in reduction
1636: - op - operation to use for reduction
1638: Output Parameters:
1639: + rootdata - root values to be updated, input state is seen by first process to perform an update
1640: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1642: Level: advanced
1644: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1645: @*/
1646: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1647: {
1649: PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1650: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1651: PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0);
1652: return 0;
1653: }
1655: /*@C
1656: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1658: Collective
1660: Input Parameter:
1661: . sf - star forest
1663: Output Parameter:
1664: . degree - degree of each root vertex
1666: Level: advanced
1668: Note:
1669: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.
1671: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1672: @*/
1673: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1674: {
1676: PetscSFCheckGraphSet(sf, 1);
1678: if (!sf->degreeknown) {
1679: PetscInt i, nroots = sf->nroots, maxlocal;
1681: maxlocal = sf->maxleaf - sf->minleaf + 1;
1682: PetscMalloc1(nroots, &sf->degree);
1683: PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1684: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1685: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1686: PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1687: }
1688: *degree = NULL;
1689: return 0;
1690: }
1692: /*@C
1693: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1695: Collective
1697: Input Parameter:
1698: . sf - star forest
1700: Output Parameter:
1701: . degree - degree of each root vertex
1703: Level: developer
1705: Note:
1706: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence no need to call `PetscFree()` on it.
1708: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1709: @*/
1710: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1711: {
1713: PetscSFCheckGraphSet(sf, 1);
1715: if (!sf->degreeknown) {
1717: PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM);
1718: PetscFree(sf->degreetmp);
1719: sf->degreeknown = PETSC_TRUE;
1720: }
1721: *degree = sf->degree;
1722: return 0;
1723: }
1725: /*@C
1726: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by `PetscSFGetMultiSF()`).
1727: Each multi-root is assigned index of the corresponding original root.
1729: Collective
1731: Input Parameters:
1732: + sf - star forest
1733: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1735: Output Parameters:
1736: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1737: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1739: Level: developer
1741: Note:
1742: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1744: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1745: @*/
1746: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1747: {
1748: PetscSF msf;
1749: PetscInt i, j, k, nroots, nmroots;
1752: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1756: PetscSFGetMultiSF(sf, &msf);
1757: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1758: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1759: for (i = 0, j = 0, k = 0; i < nroots; i++) {
1760: if (!degree[i]) continue;
1761: for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1762: }
1764: if (nMultiRoots) *nMultiRoots = nmroots;
1765: return 0;
1766: }
1768: /*@C
1769: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1771: Collective
1773: Input Parameters:
1774: + sf - star forest
1775: . unit - data type
1776: - leafdata - leaf data to gather to roots
1778: Output Parameter:
1779: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1781: Level: intermediate
1783: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1784: @*/
1785: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1786: {
1787: PetscSF multi = NULL;
1790: PetscSFSetUp(sf);
1791: PetscSFGetMultiSF(sf, &multi);
1792: PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1793: return 0;
1794: }
1796: /*@C
1797: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1799: Collective
1801: Input Parameters:
1802: + sf - star forest
1803: . unit - data type
1804: - leafdata - leaf data to gather to roots
1806: Output Parameter:
1807: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1809: Level: intermediate
1811: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1812: @*/
1813: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1814: {
1815: PetscSF multi = NULL;
1818: PetscSFGetMultiSF(sf, &multi);
1819: PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE);
1820: return 0;
1821: }
1823: /*@C
1824: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1826: Collective
1828: Input Parameters:
1829: + sf - star forest
1830: . unit - data type
1831: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1833: Output Parameter:
1834: . leafdata - leaf data to be update with personal data from each respective root
1836: Level: intermediate
1838: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1839: @*/
1840: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1841: {
1842: PetscSF multi = NULL;
1845: PetscSFSetUp(sf);
1846: PetscSFGetMultiSF(sf, &multi);
1847: PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1848: return 0;
1849: }
1851: /*@C
1852: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1854: Collective
1856: Input Parameters:
1857: + sf - star forest
1858: . unit - data type
1859: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1861: Output Parameter:
1862: . leafdata - leaf data to be update with personal data from each respective root
1864: Level: intermediate
1866: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1867: @*/
1868: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1869: {
1870: PetscSF multi = NULL;
1873: PetscSFGetMultiSF(sf, &multi);
1874: PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE);
1875: return 0;
1876: }
1878: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1879: {
1880: PetscInt i, n, nleaves;
1881: const PetscInt *ilocal = NULL;
1882: PetscHSetI seen;
1884: if (PetscDefined(USE_DEBUG)) {
1885: PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL);
1886: PetscHSetICreate(&seen);
1887: for (i = 0; i < nleaves; i++) {
1888: const PetscInt leaf = ilocal ? ilocal[i] : i;
1889: PetscHSetIAdd(seen, leaf);
1890: }
1891: PetscHSetIGetSize(seen, &n);
1893: PetscHSetIDestroy(&seen);
1894: }
1895: return 0;
1896: }
1898: /*@
1899: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
1901: Input Parameters:
1902: + sfA - The first `PetscSF`
1903: - sfB - The second `PetscSF`
1905: Output Parameters:
1906: . sfBA - The composite `PetscSF`
1908: Level: developer
1910: Notes:
1911: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
1912: forests, i.e. the same leaf is not connected with different roots.
1914: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1915: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1916: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1917: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1919: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1920: @*/
1921: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1922: {
1923: const PetscSFNode *remotePointsA, *remotePointsB;
1924: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1925: const PetscInt *localPointsA, *localPointsB;
1926: PetscInt *localPointsBA;
1927: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1928: PetscBool denseB;
1931: PetscSFCheckGraphSet(sfA, 1);
1933: PetscSFCheckGraphSet(sfB, 2);
1936: PetscSFCheckLeavesUnique_Private(sfA);
1937: PetscSFCheckLeavesUnique_Private(sfB);
1939: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
1940: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
1941: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size */
1942: /* numRootsB; otherwise, garbage will be broadcasted. */
1943: /* Example (comm size = 1): */
1944: /* sfA: 0 <- (0, 0) */
1945: /* sfB: 100 <- (0, 0) */
1946: /* 101 <- (0, 1) */
1947: /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget */
1948: /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
1949: /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on */
1950: /* remotePointsA; if not recasted, point 101 would receive a garbage value. */
1951: PetscMalloc1(numRootsB, &reorderedRemotePointsA);
1952: for (i = 0; i < numRootsB; i++) {
1953: reorderedRemotePointsA[i].rank = -1;
1954: reorderedRemotePointsA[i].index = -1;
1955: }
1956: for (i = 0; i < numLeavesA; i++) {
1957: PetscInt localp = localPointsA ? localPointsA[i] : i;
1959: if (localp >= numRootsB) continue;
1960: reorderedRemotePointsA[localp] = remotePointsA[i];
1961: }
1962: remotePointsA = reorderedRemotePointsA;
1963: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
1964: PetscMalloc1(maxleaf - minleaf + 1, &leafdataB);
1965: for (i = 0; i < maxleaf - minleaf + 1; i++) {
1966: leafdataB[i].rank = -1;
1967: leafdataB[i].index = -1;
1968: }
1969: PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1970: PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE);
1971: PetscFree(reorderedRemotePointsA);
1973: denseB = (PetscBool)!localPointsB;
1974: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1975: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
1976: else numLeavesBA++;
1977: }
1978: if (denseB) {
1979: localPointsBA = NULL;
1980: remotePointsBA = leafdataB;
1981: } else {
1982: PetscMalloc1(numLeavesBA, &localPointsBA);
1983: PetscMalloc1(numLeavesBA, &remotePointsBA);
1984: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1985: const PetscInt l = localPointsB ? localPointsB[i] : i;
1987: if (leafdataB[l - minleaf].rank == -1) continue;
1988: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
1989: localPointsBA[numLeavesBA] = l;
1990: numLeavesBA++;
1991: }
1992: PetscFree(leafdataB);
1993: }
1994: PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
1995: PetscSFSetFromOptions(*sfBA);
1996: PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
1997: return 0;
1998: }
2000: /*@
2001: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2003: Input Parameters:
2004: + sfA - The first `PetscSF`
2005: - sfB - The second `PetscSF`
2007: Output Parameters:
2008: . sfBA - The composite `PetscSF`.
2010: Level: developer
2012: Notes:
2013: Currently, the two SFs must be defined on congruent communicators and they must be true star
2014: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2015: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2017: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2018: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2019: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2020: a Reduce on sfB, on connected roots.
2022: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2023: @*/
2024: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2025: {
2026: const PetscSFNode *remotePointsA, *remotePointsB;
2027: PetscSFNode *remotePointsBA;
2028: const PetscInt *localPointsA, *localPointsB;
2029: PetscSFNode *reorderedRemotePointsA = NULL;
2030: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2031: MPI_Op op;
2032: #if defined(PETSC_USE_64BIT_INDICES)
2033: PetscBool iswin;
2034: #endif
2037: PetscSFCheckGraphSet(sfA, 1);
2039: PetscSFCheckGraphSet(sfB, 2);
2042: PetscSFCheckLeavesUnique_Private(sfA);
2043: PetscSFCheckLeavesUnique_Private(sfB);
2045: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2046: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2048: /* TODO: Check roots of sfB have degree of 1 */
2049: /* Once we implement it, we can replace the MPI_MAXLOC
2050: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2051: We use MPI_MAXLOC only to have a deterministic output from this routine if
2052: the root condition is not meet.
2053: */
2054: op = MPI_MAXLOC;
2055: #if defined(PETSC_USE_64BIT_INDICES)
2056: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2057: PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin);
2058: if (iswin) op = MPI_REPLACE;
2059: #endif
2061: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2062: PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA);
2063: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2064: reorderedRemotePointsA[i].rank = -1;
2065: reorderedRemotePointsA[i].index = -1;
2066: }
2067: if (localPointsA) {
2068: for (i = 0; i < numLeavesA; i++) {
2069: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2070: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2071: }
2072: } else {
2073: for (i = 0; i < numLeavesA; i++) {
2074: if (i > maxleaf || i < minleaf) continue;
2075: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2076: }
2077: }
2079: PetscMalloc1(numRootsB, &localPointsBA);
2080: PetscMalloc1(numRootsB, &remotePointsBA);
2081: for (i = 0; i < numRootsB; i++) {
2082: remotePointsBA[i].rank = -1;
2083: remotePointsBA[i].index = -1;
2084: }
2086: PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2087: PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op);
2088: PetscFree(reorderedRemotePointsA);
2089: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2090: if (remotePointsBA[i].rank == -1) continue;
2091: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2092: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2093: localPointsBA[numLeavesBA] = i;
2094: numLeavesBA++;
2095: }
2096: PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA);
2097: PetscSFSetFromOptions(*sfBA);
2098: PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER);
2099: return 0;
2100: }
2102: /*
2103: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2105: Input Parameters:
2106: . sf - The global `PetscSF`
2108: Output Parameters:
2109: . out - The local `PetscSF`
2111: .seealso: `PetscSF`, `PetscSFCreate()`
2112: */
2113: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2114: {
2115: MPI_Comm comm;
2116: PetscMPIInt myrank;
2117: const PetscInt *ilocal;
2118: const PetscSFNode *iremote;
2119: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2120: PetscSFNode *liremote;
2121: PetscSF lsf;
2124: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2125: else {
2126: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2127: PetscObjectGetComm((PetscObject)sf, &comm);
2128: MPI_Comm_rank(comm, &myrank);
2130: /* Find out local edges and build a local SF */
2131: PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote);
2132: for (i = lnleaves = 0; i < nleaves; i++) {
2133: if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2134: }
2135: PetscMalloc1(lnleaves, &lilocal);
2136: PetscMalloc1(lnleaves, &liremote);
2138: for (i = j = 0; i < nleaves; i++) {
2139: if (iremote[i].rank == (PetscInt)myrank) {
2140: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2141: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2142: liremote[j].index = iremote[i].index;
2143: j++;
2144: }
2145: }
2146: PetscSFCreate(PETSC_COMM_SELF, &lsf);
2147: PetscSFSetFromOptions(lsf);
2148: PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER);
2149: PetscSFSetUp(lsf);
2150: *out = lsf;
2151: }
2152: return 0;
2153: }
2155: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2156: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2157: {
2158: PetscMemType rootmtype, leafmtype;
2161: PetscSFSetUp(sf);
2162: PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0);
2163: PetscGetMemType(rootdata, &rootmtype);
2164: PetscGetMemType(leafdata, &leafmtype);
2165: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2166: PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0);
2167: return 0;
2168: }
2170: /*@
2171: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2173: Input Parameters:
2174: + comm - the communicator
2175: . nsfs - the number of input `PetscSF`
2176: . sfs - the array of input `PetscSF`
2177: . shareRoots - the flag whether roots of input `PetscSF` are taken as shared (`PETSC_TRUE`), or separate and concatenated (`PETSC_FALSE`)
2178: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or NULL for contiguous storage
2180: Output Parameters:
2181: . newsf - The resulting `PetscSF`
2183: Level: developer
2185: Notes:
2186: The communicator of all SFs in sfs must be comm.
2188: The offsets in leafOffsets are added to the original leaf indices.
2190: If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2191: In this case, NULL is also passed as ilocal to the resulting SF.
2193: If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2194: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2196: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2197: @*/
2198: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2199: {
2200: PetscInt i, s, nLeaves, nRoots;
2201: PetscInt *leafArrayOffsets;
2202: PetscInt *ilocal_new;
2203: PetscSFNode *iremote_new;
2204: PetscInt *rootOffsets;
2205: PetscBool all_ilocal_null = PETSC_FALSE;
2206: PetscMPIInt rank;
2208: {
2209: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2211: PetscSFCreate(comm, &dummy);
2214: for (i = 0; i < nsfs; i++) {
2217: }
2221: PetscSFDestroy(&dummy);
2222: }
2223: if (!nsfs) {
2224: PetscSFCreate(comm, newsf);
2225: PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2226: return 0;
2227: }
2228: MPI_Comm_rank(comm, &rank);
2230: PetscCalloc1(nsfs + 1, &rootOffsets);
2231: if (shareRoots) {
2232: PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2233: if (PetscDefined(USE_DEBUG)) {
2234: for (s = 1; s < nsfs; s++) {
2235: PetscInt nr;
2237: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2239: }
2240: }
2241: } else {
2242: for (s = 0; s < nsfs; s++) {
2243: PetscInt nr;
2245: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2246: rootOffsets[s + 1] = rootOffsets[s] + nr;
2247: }
2248: nRoots = rootOffsets[nsfs];
2249: }
2251: /* Calculate leaf array offsets and automatic root offsets */
2252: PetscMalloc1(nsfs + 1, &leafArrayOffsets);
2253: leafArrayOffsets[0] = 0;
2254: for (s = 0; s < nsfs; s++) {
2255: PetscInt nl;
2257: PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2258: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2259: }
2260: nLeaves = leafArrayOffsets[nsfs];
2262: if (!leafOffsets) {
2263: all_ilocal_null = PETSC_TRUE;
2264: for (s = 0; s < nsfs; s++) {
2265: const PetscInt *ilocal;
2267: PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2268: if (ilocal) {
2269: all_ilocal_null = PETSC_FALSE;
2270: break;
2271: }
2272: }
2274: }
2276: /* Renumber and concatenate local leaves */
2277: ilocal_new = NULL;
2278: if (!all_ilocal_null) {
2279: PetscMalloc1(nLeaves, &ilocal_new);
2280: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2281: for (s = 0; s < nsfs; s++) {
2282: const PetscInt *ilocal;
2283: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2284: PetscInt i, nleaves_l;
2286: PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2287: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2288: }
2289: }
2291: /* Renumber and concatenate remote roots */
2292: PetscMalloc1(nLeaves, &iremote_new);
2293: for (i = 0; i < nLeaves; i++) {
2294: iremote_new[i].rank = -1;
2295: iremote_new[i].index = -1;
2296: }
2297: for (s = 0; s < nsfs; s++) {
2298: PetscInt i, nl, nr;
2299: PetscSF tmp_sf;
2300: const PetscSFNode *iremote;
2301: PetscSFNode *tmp_rootdata;
2302: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2304: PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2305: PetscSFCreate(comm, &tmp_sf);
2306: /* create helper SF with contiguous leaves */
2307: PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES);
2308: PetscSFSetUp(tmp_sf);
2309: PetscMalloc1(nr, &tmp_rootdata);
2310: for (i = 0; i < nr; i++) {
2311: tmp_rootdata[i].index = i + rootOffsets[s];
2312: tmp_rootdata[i].rank = (PetscInt)rank;
2313: }
2314: PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2315: PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2316: PetscSFDestroy(&tmp_sf);
2317: PetscFree(tmp_rootdata);
2318: }
2320: /* Build the new SF */
2321: PetscSFCreate(comm, newsf);
2322: PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2323: PetscSFSetUp(*newsf);
2324: PetscFree(rootOffsets);
2325: PetscFree(leafArrayOffsets);
2326: return 0;
2327: }