Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/hashmapi.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <petscdevice_cuda.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: extern void PetscSFCheckGraphSet(PetscSF, int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
18: #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
19: #else
20: #define PetscSFCheckGraphSet(sf, arg) \
21: do { \
22: } while (0)
23: #endif
24: #endif
26: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
29: /*@
30: PetscSFCreate - create a star forest communication context
32: Collective
34: Input Parameter:
35: . comm - communicator on which the star forest will operate
37: Output Parameter:
38: . sf - new star forest context
40: Options Database Key:
41: + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default)
42: . -sf_type window - Use MPI-3 one-sided window for communication
43: . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication
44: - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46: Level: intermediate
48: Note:
49: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51: `SF`s are optimized and they have better performance than the general `SF`s.
53: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
54: @*/
55: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56: {
57: PetscSF b;
59: PetscFunctionBegin;
60: PetscAssertPointer(sf, 2);
61: PetscCall(PetscSFInitializePackage());
63: PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
64: b->nroots = -1;
65: b->nleaves = -1;
66: b->minleaf = PETSC_INT_MAX;
67: b->maxleaf = PETSC_INT_MIN;
68: b->nranks = -1;
69: b->rankorder = PETSC_TRUE;
70: b->ingroup = MPI_GROUP_NULL;
71: b->outgroup = MPI_GROUP_NULL;
72: b->graphset = PETSC_FALSE;
73: #if defined(PETSC_HAVE_DEVICE)
74: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
75: b->use_stream_aware_mpi = PETSC_FALSE;
76: b->unknown_input_stream = PETSC_FALSE;
77: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78: b->backend = PETSCSF_BACKEND_KOKKOS;
79: #elif defined(PETSC_HAVE_CUDA)
80: b->backend = PETSCSF_BACKEND_CUDA;
81: #elif defined(PETSC_HAVE_HIP)
82: b->backend = PETSCSF_BACKEND_HIP;
83: #endif
85: #if defined(PETSC_HAVE_NVSHMEM)
86: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
87: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
89: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
90: #endif
91: #endif
92: b->vscat.from_n = -1;
93: b->vscat.to_n = -1;
94: b->vscat.unit = MPIU_SCALAR;
95: *sf = b;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: /*@
100: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102: Collective
104: Input Parameter:
105: . sf - star forest
107: Level: advanced
109: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110: @*/
111: PetscErrorCode PetscSFReset(PetscSF sf)
112: {
113: PetscFunctionBegin;
115: PetscTryTypeMethod(sf, Reset);
116: PetscCall(PetscSFDestroy(&sf->rankssf));
118: sf->nroots = -1;
119: sf->nleaves = -1;
120: sf->minleaf = PETSC_INT_MAX;
121: sf->maxleaf = PETSC_INT_MIN;
122: sf->mine = NULL;
123: sf->remote = NULL;
124: sf->graphset = PETSC_FALSE;
125: PetscCall(PetscFree(sf->mine_alloc));
126: PetscCall(PetscFree(sf->remote_alloc));
127: sf->nranks = -1;
128: PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129: sf->degreeknown = PETSC_FALSE;
130: PetscCall(PetscFree(sf->degree));
131: if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132: if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134: if (sf->multi) sf->multi->multi = NULL;
135: PetscCall(PetscSFDestroy(&sf->multi));
137: PetscCall(PetscLayoutDestroy(&sf->map));
139: #if defined(PETSC_HAVE_DEVICE)
140: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141: #endif
143: sf->setupcalled = PETSC_FALSE;
144: PetscFunctionReturn(PETSC_SUCCESS);
145: }
147: /*@
148: PetscSFSetType - Set the `PetscSF` communication implementation
150: Collective
152: Input Parameters:
153: + sf - the `PetscSF` context
154: - type - a known method
155: .vb
156: PETSCSFWINDOW - MPI-2/3 one-sided
157: PETSCSFBASIC - basic implementation using MPI-1 two-sided
158: .ve
160: Options Database Key:
161: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163: Level: intermediate
165: Notes:
166: See `PetscSFType` for possible values
168: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169: @*/
170: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171: {
172: PetscBool match;
173: PetscErrorCode (*r)(PetscSF);
175: PetscFunctionBegin;
177: PetscAssertPointer(type, 2);
179: PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180: if (match) PetscFunctionReturn(PETSC_SUCCESS);
182: PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183: PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184: /* Destroy the previous PetscSF implementation context */
185: PetscTryTypeMethod(sf, Destroy);
186: PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187: PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188: PetscCall((*r)(sf));
189: PetscFunctionReturn(PETSC_SUCCESS);
190: }
192: /*@
193: PetscSFGetType - Get the `PetscSF` communication implementation
195: Not Collective
197: Input Parameter:
198: . sf - the `PetscSF` context
200: Output Parameter:
201: . type - the `PetscSF` type name
203: Level: intermediate
205: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206: @*/
207: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208: {
209: PetscFunctionBegin;
211: PetscAssertPointer(type, 2);
212: *type = ((PetscObject)sf)->type_name;
213: PetscFunctionReturn(PETSC_SUCCESS);
214: }
216: /*@
217: PetscSFDestroy - destroy a star forest
219: Collective
221: Input Parameter:
222: . sf - address of star forest
224: Level: intermediate
226: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227: @*/
228: PetscErrorCode PetscSFDestroy(PetscSF *sf)
229: {
230: PetscFunctionBegin;
231: if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233: if (--((PetscObject)*sf)->refct > 0) {
234: *sf = NULL;
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
237: PetscCall(PetscSFReset(*sf));
238: PetscTryTypeMethod(*sf, Destroy);
239: PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240: if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242: if ((*sf)->use_stream_aware_mpi) {
243: PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244: PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245: }
246: #endif
247: PetscCall(PetscHeaderDestroy(sf));
248: PetscFunctionReturn(PETSC_SUCCESS);
249: }
251: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252: {
253: PetscInt i, nleaves;
254: PetscMPIInt size;
255: const PetscInt *ilocal;
256: const PetscSFNode *iremote;
258: PetscFunctionBegin;
259: if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262: for (i = 0; i < nleaves; i++) {
263: const PetscInt rank = iremote[i].rank;
264: const PetscInt remote = iremote[i].index;
265: const PetscInt leaf = ilocal ? ilocal[i] : i;
266: PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267: PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268: PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269: }
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }
273: /*@
274: PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
276: Collective
278: Input Parameter:
279: . sf - star forest communication object
281: Level: beginner
283: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284: @*/
285: PetscErrorCode PetscSFSetUp(PetscSF sf)
286: {
287: PetscFunctionBegin;
289: PetscSFCheckGraphSet(sf, 1);
290: if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291: PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292: PetscCall(PetscSFCheckGraphValid_Private(sf));
293: if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294: PetscTryTypeMethod(sf, SetUp);
295: #if defined(PETSC_HAVE_CUDA)
296: if (sf->backend == PETSCSF_BACKEND_CUDA) {
297: sf->ops->Malloc = PetscSFMalloc_CUDA;
298: sf->ops->Free = PetscSFFree_CUDA;
299: }
300: #endif
301: #if defined(PETSC_HAVE_HIP)
302: if (sf->backend == PETSCSF_BACKEND_HIP) {
303: sf->ops->Malloc = PetscSFMalloc_HIP;
304: sf->ops->Free = PetscSFFree_HIP;
305: }
306: #endif
308: #if defined(PETSC_HAVE_KOKKOS)
309: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310: sf->ops->Malloc = PetscSFMalloc_Kokkos;
311: sf->ops->Free = PetscSFFree_Kokkos;
312: }
313: #endif
314: PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315: sf->setupcalled = PETSC_TRUE;
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }
319: /*@
320: PetscSFSetFromOptions - set `PetscSF` options using the options database
322: Logically Collective
324: Input Parameter:
325: . sf - star forest
327: Options Database Keys:
328: + -sf_type - implementation type, see `PetscSFSetType()`
329: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
330: . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331: use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332: If true, this option only works with `-use_gpu_aware_mpi 1`.
333: . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334: If true, this option only works with `-use_gpu_aware_mpi 1`.
336: - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338: the only available is kokkos.
340: Level: intermediate
342: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343: @*/
344: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345: {
346: PetscSFType deft;
347: char type[256];
348: PetscBool flg;
350: PetscFunctionBegin;
352: PetscObjectOptionsBegin((PetscObject)sf);
353: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354: PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355: PetscCall(PetscSFSetType(sf, flg ? type : deft));
356: PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357: PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358: #if defined(PETSC_HAVE_DEVICE)
359: {
360: char backendstr[32] = {0};
361: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362: /* Change the defaults set in PetscSFCreate() with command line options */
363: PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364: PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365: PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366: PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367: PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368: PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373: else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374: #elif defined(PETSC_HAVE_KOKKOS)
375: PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376: #endif
378: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379: if (sf->use_stream_aware_mpi) {
380: MPI_Info info;
382: PetscCallMPI(MPI_Info_create(&info));
383: PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384: PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385: PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386: PetscCallMPI(MPI_Info_free(&info));
387: PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388: }
389: #endif
390: }
391: #endif
392: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393: PetscOptionsEnd();
394: PetscFunctionReturn(PETSC_SUCCESS);
395: }
397: /*@
398: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
400: Logically Collective
402: Input Parameters:
403: + sf - star forest
404: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
406: Level: advanced
408: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409: @*/
410: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411: {
412: PetscFunctionBegin;
415: PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416: sf->rankorder = flg;
417: PetscFunctionReturn(PETSC_SUCCESS);
418: }
420: /*@
421: PetscSFSetGraph - Set a parallel star forest
423: Collective
425: Input Parameters:
426: + sf - star forest
427: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
428: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
429: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430: during setup in debug mode)
431: . localmode - copy mode for `ilocal`
432: . iremote - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
433: (locations must be >= 0, enforced during setup in debug mode)
434: - remotemode - copy mode for `iremote`
436: Level: intermediate
438: Notes:
439: Leaf indices in `ilocal` must be unique, otherwise an error occurs.
441: Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
442: In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443: PETSc might modify the respective array;
444: if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447: Fortran Notes:
448: In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450: Developer Notes:
451: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452: This also allows to compare leaf sets of two `PetscSF`s easily.
454: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
455: @*/
456: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457: {
458: PetscBool unique, contiguous;
460: PetscFunctionBegin;
462: if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463: if (nleaves > 0) PetscAssertPointer(iremote, 6);
464: PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465: PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468: PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469: PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
471: if (sf->nroots >= 0) { /* Reset only if graph already set */
472: PetscCall(PetscSFReset(sf));
473: }
475: PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476: if (PetscDefined(USE_DEBUG)) {
477: PetscMPIInt size;
479: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480: for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481: }
483: sf->nroots = nroots;
484: sf->nleaves = nleaves;
486: if (localmode == PETSC_COPY_VALUES && ilocal) {
487: PetscInt *tlocal = NULL;
489: PetscCall(PetscMalloc1(nleaves, &tlocal));
490: PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491: ilocal = tlocal;
492: }
493: if (remotemode == PETSC_COPY_VALUES) {
494: PetscSFNode *tremote = NULL;
496: PetscCall(PetscMalloc1(nleaves, &tremote));
497: PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498: iremote = tremote;
499: }
501: if (nleaves && ilocal) {
502: PetscSFNode work;
504: PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505: PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506: unique = PetscNot(unique);
507: PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508: sf->minleaf = ilocal[0];
509: sf->maxleaf = ilocal[nleaves - 1];
510: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511: } else {
512: sf->minleaf = 0;
513: sf->maxleaf = nleaves - 1;
514: unique = PETSC_TRUE;
515: contiguous = PETSC_TRUE;
516: }
518: if (contiguous) {
519: if (localmode == PETSC_USE_POINTER) {
520: ilocal = NULL;
521: } else {
522: PetscCall(PetscFree(ilocal));
523: }
524: }
525: sf->mine = ilocal;
526: if (localmode == PETSC_USE_POINTER) {
527: sf->mine_alloc = NULL;
528: } else {
529: sf->mine_alloc = ilocal;
530: }
531: if (PetscDefined(USE_DEBUG)) {
532: PetscMPIInt size;
534: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535: for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536: }
537: sf->remote = iremote;
538: if (remotemode == PETSC_USE_POINTER) {
539: sf->remote_alloc = NULL;
540: } else {
541: sf->remote_alloc = iremote;
542: }
543: PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544: sf->graphset = PETSC_TRUE;
545: PetscFunctionReturn(PETSC_SUCCESS);
546: }
548: /*@
549: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
551: Collective
553: Input Parameters:
554: + sf - The `PetscSF`
555: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
558: Level: intermediate
560: Notes:
561: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
562: `n` and `N` are the local and global sizes of `x` respectively.
564: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
565: sequential vectors `y` on all MPI processes.
567: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
568: sequential vector `y` on rank 0.
570: In above cases, entries of `x` are roots and entries of `y` are leaves.
572: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
573: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
574: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
575: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
576: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
578: In this case, roots and leaves are symmetric.
580: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
581: @*/
582: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
583: {
584: MPI_Comm comm;
585: PetscInt n, N, res[2];
586: PetscMPIInt rank, size;
587: PetscSFType type;
589: PetscFunctionBegin;
591: if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
592: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
593: PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
594: PetscCallMPI(MPI_Comm_rank(comm, &rank));
595: PetscCallMPI(MPI_Comm_size(comm, &size));
597: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598: PetscInt sizei = size;
600: type = PETSCSFALLTOALL;
601: PetscCall(PetscLayoutCreate(comm, &sf->map));
602: PetscCall(PetscLayoutSetLocalSize(sf->map, size));
603: PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
604: PetscCall(PetscLayoutSetUp(sf->map));
605: } else {
606: PetscCall(PetscLayoutGetLocalSize(map, &n));
607: PetscCall(PetscLayoutGetSize(map, &N));
608: res[0] = n;
609: res[1] = -n;
610: /* Check if n are same over all ranks so that we can optimize it */
611: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
612: if (res[0] == -res[1]) { /* same n */
613: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
614: } else {
615: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
616: }
617: PetscCall(PetscLayoutReference(map, &sf->map));
618: }
619: PetscCall(PetscSFSetType(sf, type));
621: sf->pattern = pattern;
622: sf->mine = NULL; /* Contiguous */
624: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
625: Also set other easy stuff.
626: */
627: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
628: sf->nleaves = N;
629: sf->nroots = n;
630: sf->nranks = size;
631: sf->minleaf = 0;
632: sf->maxleaf = N - 1;
633: } else if (pattern == PETSCSF_PATTERN_GATHER) {
634: sf->nleaves = rank ? 0 : N;
635: sf->nroots = n;
636: sf->nranks = rank ? 0 : size;
637: sf->minleaf = 0;
638: sf->maxleaf = rank ? -1 : N - 1;
639: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
640: sf->nleaves = size;
641: sf->nroots = size;
642: sf->nranks = size;
643: sf->minleaf = 0;
644: sf->maxleaf = size - 1;
645: }
646: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
647: sf->graphset = PETSC_TRUE;
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
651: /*@
652: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
654: Collective
656: Input Parameter:
657: . sf - star forest to invert
659: Output Parameter:
660: . isf - inverse of `sf`
662: Level: advanced
664: Notes:
665: All roots must have degree 1.
667: The local space may be a permutation, but cannot be sparse.
669: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
670: @*/
671: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
672: {
673: PetscMPIInt rank;
674: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
675: const PetscInt *ilocal;
676: PetscSFNode *roots, *leaves;
678: PetscFunctionBegin;
680: PetscSFCheckGraphSet(sf, 1);
681: PetscAssertPointer(isf, 2);
683: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
684: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
686: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
687: PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
688: for (i = 0; i < maxlocal; i++) {
689: leaves[i].rank = rank;
690: leaves[i].index = i;
691: }
692: for (i = 0; i < nroots; i++) {
693: roots[i].rank = -1;
694: roots[i].index = -1;
695: }
696: PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
697: PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
699: /* Check whether our leaves are sparse */
700: for (i = 0, count = 0; i < nroots; i++)
701: if (roots[i].rank >= 0) count++;
702: if (count == nroots) newilocal = NULL;
703: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
704: for (i = 0, count = 0; i < nroots; i++) {
705: if (roots[i].rank >= 0) {
706: newilocal[count] = i;
707: roots[count].rank = roots[i].rank;
708: roots[count].index = roots[i].index;
709: count++;
710: }
711: }
712: }
714: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
715: PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
716: PetscCall(PetscFree2(roots, leaves));
717: PetscFunctionReturn(PETSC_SUCCESS);
718: }
720: /*@
721: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
723: Collective
725: Input Parameters:
726: + sf - communication object to duplicate
727: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
729: Output Parameter:
730: . newsf - new communication object
732: Level: beginner
734: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
735: @*/
736: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
737: {
738: PetscSFType type;
739: MPI_Datatype dtype = MPIU_SCALAR;
741: PetscFunctionBegin;
744: PetscAssertPointer(newsf, 3);
745: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
746: PetscCall(PetscSFGetType(sf, &type));
747: if (type) PetscCall(PetscSFSetType(*newsf, type));
748: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
749: if (opt == PETSCSF_DUPLICATE_GRAPH) {
750: PetscSFCheckGraphSet(sf, 1);
751: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
752: PetscInt nroots, nleaves;
753: const PetscInt *ilocal;
754: const PetscSFNode *iremote;
755: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
756: PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
757: } else {
758: PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
759: }
760: }
761: /* Since oldtype is committed, so is newtype, according to MPI */
762: if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
763: (*newsf)->vscat.bs = sf->vscat.bs;
764: (*newsf)->vscat.unit = dtype;
765: (*newsf)->vscat.to_n = sf->vscat.to_n;
766: (*newsf)->vscat.from_n = sf->vscat.from_n;
767: /* Do not copy lsf. Build it on demand since it is rarely used */
769: #if defined(PETSC_HAVE_DEVICE)
770: (*newsf)->backend = sf->backend;
771: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
772: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
773: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
774: #endif
775: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
776: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
777: PetscFunctionReturn(PETSC_SUCCESS);
778: }
780: /*@C
781: PetscSFGetGraph - Get the graph specifying a parallel star forest
783: Not Collective
785: Input Parameter:
786: . sf - star forest
788: Output Parameters:
789: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
790: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
791: . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
792: - iremote - remote locations of root vertices for each leaf on the current process
794: Level: intermediate
796: Notes:
797: We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
799: The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
801: Fortran Note:
802: Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
804: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
805: @*/
806: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
807: {
808: PetscFunctionBegin;
810: if (sf->ops->GetGraph) {
811: PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
812: } else {
813: if (nroots) *nroots = sf->nroots;
814: if (nleaves) *nleaves = sf->nleaves;
815: if (ilocal) *ilocal = sf->mine;
816: if (iremote) *iremote = sf->remote;
817: }
818: PetscFunctionReturn(PETSC_SUCCESS);
819: }
821: /*@
822: PetscSFGetLeafRange - Get the active leaf ranges
824: Not Collective
826: Input Parameter:
827: . sf - star forest
829: Output Parameters:
830: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
831: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
833: Level: developer
835: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
836: @*/
837: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
838: {
839: PetscFunctionBegin;
841: PetscSFCheckGraphSet(sf, 1);
842: if (minleaf) *minleaf = sf->minleaf;
843: if (maxleaf) *maxleaf = sf->maxleaf;
844: PetscFunctionReturn(PETSC_SUCCESS);
845: }
847: /*@
848: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
850: Collective
852: Input Parameters:
853: + A - the star forest
854: . obj - Optional object that provides the prefix for the option names
855: - name - command line option
857: Level: intermediate
859: Note:
860: See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
862: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
863: @*/
864: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
865: {
866: PetscFunctionBegin;
868: PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
869: PetscFunctionReturn(PETSC_SUCCESS);
870: }
872: /*@
873: PetscSFView - view a star forest
875: Collective
877: Input Parameters:
878: + sf - star forest
879: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
881: Level: beginner
883: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
884: @*/
885: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
886: {
887: PetscBool isascii;
888: PetscViewerFormat format;
890: PetscFunctionBegin;
892: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
894: PetscCheckSameComm(sf, 1, viewer, 2);
895: if (sf->graphset) PetscCall(PetscSFSetUp(sf));
896: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
897: if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
898: PetscMPIInt rank;
899: PetscInt j;
901: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
902: PetscCall(PetscViewerASCIIPushTab(viewer));
903: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
904: if (!sf->graphset) {
905: PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
906: PetscCall(PetscViewerASCIIPopTab(viewer));
907: PetscFunctionReturn(PETSC_SUCCESS);
908: }
909: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
910: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
911: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
912: for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
913: PetscCall(PetscViewerFlush(viewer));
914: PetscCall(PetscViewerGetFormat(viewer, &format));
915: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
916: PetscMPIInt *tmpranks, *perm;
918: PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
919: PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
920: for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
921: PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
922: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
923: for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
924: PetscMPIInt i = perm[ii];
926: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
927: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
928: }
929: PetscCall(PetscFree2(tmpranks, perm));
930: }
931: PetscCall(PetscViewerFlush(viewer));
932: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
933: }
934: PetscCall(PetscViewerASCIIPopTab(viewer));
935: }
936: PetscTryTypeMethod(sf, View, viewer);
937: PetscFunctionReturn(PETSC_SUCCESS);
938: }
940: /*@C
941: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
943: Not Collective
945: Input Parameter:
946: . sf - star forest
948: Output Parameters:
949: + nranks - number of ranks referenced by local part
950: . ranks - [`nranks`] array of ranks
951: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
952: . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
953: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
955: Level: developer
957: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
958: @*/
959: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
960: {
961: PetscFunctionBegin;
963: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
964: if (sf->ops->GetRootRanks) {
965: PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
966: } else {
967: /* The generic implementation */
968: if (nranks) *nranks = sf->nranks;
969: if (ranks) *ranks = sf->ranks;
970: if (roffset) *roffset = sf->roffset;
971: if (rmine) *rmine = sf->rmine;
972: if (rremote) *rremote = sf->rremote;
973: }
974: PetscFunctionReturn(PETSC_SUCCESS);
975: }
977: /*@C
978: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
980: Not Collective
982: Input Parameter:
983: . sf - star forest
985: Output Parameters:
986: + niranks - number of leaf ranks referencing roots on this process
987: . iranks - [`niranks`] array of ranks
988: . ioffset - [`niranks`+1] offset in `irootloc` for each rank
989: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
991: Level: developer
993: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
994: @*/
995: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
996: {
997: PetscFunctionBegin;
999: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1000: if (sf->ops->GetLeafRanks) {
1001: PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1002: } else {
1003: PetscSFType type;
1004: PetscCall(PetscSFGetType(sf, &type));
1005: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1006: }
1007: PetscFunctionReturn(PETSC_SUCCESS);
1008: }
1010: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1011: {
1012: PetscInt i;
1013: for (i = 0; i < n; i++) {
1014: if (needle == list[i]) return PETSC_TRUE;
1015: }
1016: return PETSC_FALSE;
1017: }
1019: /*@C
1020: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1022: Collective
1024: Input Parameters:
1025: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1026: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1028: Level: developer
1030: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1031: @*/
1032: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1033: {
1034: PetscHMapI table;
1035: PetscHashIter pos;
1036: PetscMPIInt size, groupsize, *groupranks, *ranks;
1037: PetscInt *rcount;
1038: PetscInt irank, sfnrank, ranksi;
1039: PetscMPIInt i, orank = -1;
1041: PetscFunctionBegin;
1043: PetscSFCheckGraphSet(sf, 1);
1044: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1045: PetscCall(PetscHMapICreateWithSize(10, &table));
1046: for (i = 0; i < sf->nleaves; i++) {
1047: /* Log 1-based rank */
1048: PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1049: }
1050: PetscCall(PetscHMapIGetSize(table, &sfnrank));
1051: PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1052: PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1053: PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1054: PetscHashIterBegin(table, pos);
1055: for (i = 0; i < sf->nranks; i++) {
1056: PetscHashIterGetKey(table, pos, ranksi);
1057: PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1058: PetscHashIterGetVal(table, pos, rcount[i]);
1059: PetscHashIterNext(table, pos);
1060: ranks[i]--; /* Convert back to 0-based */
1061: }
1062: PetscCall(PetscHMapIDestroy(&table));
1064: /* We expect that dgroup is reliably "small" while nranks could be large */
1065: {
1066: MPI_Group group = MPI_GROUP_NULL;
1067: PetscMPIInt *dgroupranks;
1069: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1070: PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1071: PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1072: PetscCall(PetscMalloc1(groupsize, &groupranks));
1073: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1074: if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1075: PetscCallMPI(MPI_Group_free(&group));
1076: PetscCall(PetscFree(dgroupranks));
1077: }
1079: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1080: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1081: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1082: if (InList(ranks[i], groupsize, groupranks)) break;
1083: }
1084: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1085: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1086: }
1087: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1088: PetscMPIInt tmprank;
1089: PetscInt tmpcount;
1091: tmprank = ranks[i];
1092: tmpcount = rcount[i];
1093: ranks[i] = ranks[sf->ndranks];
1094: rcount[i] = rcount[sf->ndranks];
1095: ranks[sf->ndranks] = tmprank;
1096: rcount[sf->ndranks] = tmpcount;
1097: sf->ndranks++;
1098: }
1099: }
1100: PetscCall(PetscFree(groupranks));
1101: PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1102: if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1103: sf->roffset[0] = 0;
1104: for (i = 0; i < sf->nranks; i++) {
1105: PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1106: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1107: rcount[i] = 0;
1108: }
1109: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1110: /* short circuit */
1111: if (orank != sf->remote[i].rank) {
1112: /* Search for index of iremote[i].rank in sf->ranks */
1113: PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1114: PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1115: if (irank < 0) {
1116: PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1117: if (irank >= 0) irank += sf->ndranks;
1118: }
1119: }
1120: PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1121: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1122: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1123: rcount[irank]++;
1124: }
1125: PetscCall(PetscFree2(rcount, ranks));
1126: PetscFunctionReturn(PETSC_SUCCESS);
1127: }
1129: /*@C
1130: PetscSFGetGroups - gets incoming and outgoing process groups
1132: Collective
1134: Input Parameter:
1135: . sf - star forest
1137: Output Parameters:
1138: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1139: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1141: Level: developer
1143: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1144: @*/
1145: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1146: {
1147: MPI_Group group = MPI_GROUP_NULL;
1149: PetscFunctionBegin;
1150: PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1151: if (sf->ingroup == MPI_GROUP_NULL) {
1152: PetscInt i;
1153: const PetscInt *indegree;
1154: PetscMPIInt rank, *outranks, *inranks, indegree0;
1155: PetscSFNode *remote;
1156: PetscSF bgcount;
1158: /* Compute the number of incoming ranks */
1159: PetscCall(PetscMalloc1(sf->nranks, &remote));
1160: for (i = 0; i < sf->nranks; i++) {
1161: remote[i].rank = sf->ranks[i];
1162: remote[i].index = 0;
1163: }
1164: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1165: PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1166: PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1167: PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1168: /* Enumerate the incoming ranks */
1169: PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1170: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1171: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1172: PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1173: PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1174: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1175: PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1176: PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1177: PetscCallMPI(MPI_Group_free(&group));
1178: PetscCall(PetscFree2(inranks, outranks));
1179: PetscCall(PetscSFDestroy(&bgcount));
1180: }
1181: *incoming = sf->ingroup;
1183: if (sf->outgroup == MPI_GROUP_NULL) {
1184: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1185: PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1186: PetscCallMPI(MPI_Group_free(&group));
1187: }
1188: *outgoing = sf->outgroup;
1189: PetscFunctionReturn(PETSC_SUCCESS);
1190: }
1192: /*@
1193: PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1195: Collective
1197: Input Parameter:
1198: . sf - star forest
1200: Output Parameter:
1201: . rsf - the star forest with a single root per process to perform communications
1203: Level: developer
1205: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1206: @*/
1207: PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1208: {
1209: PetscFunctionBegin;
1211: PetscAssertPointer(rsf, 2);
1212: if (!sf->rankssf) {
1213: PetscSFNode *rremotes;
1214: const PetscMPIInt *ranks;
1215: PetscMPIInt nranks;
1217: PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1218: PetscCall(PetscMalloc1(nranks, &rremotes));
1219: for (PetscInt i = 0; i < nranks; i++) {
1220: rremotes[i].rank = ranks[i];
1221: rremotes[i].index = 0;
1222: }
1223: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1224: PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1225: }
1226: *rsf = sf->rankssf;
1227: PetscFunctionReturn(PETSC_SUCCESS);
1228: }
1230: /*@
1231: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1233: Collective
1235: Input Parameter:
1236: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1238: Output Parameter:
1239: . multi - star forest with split roots, such that each root has degree exactly 1
1241: Level: developer
1243: Note:
1244: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1245: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1246: edge, it is a candidate for future optimization that might involve its removal.
1248: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1249: @*/
1250: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1251: {
1252: PetscFunctionBegin;
1254: PetscAssertPointer(multi, 2);
1255: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1256: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1257: *multi = sf->multi;
1258: sf->multi->multi = sf->multi;
1259: PetscFunctionReturn(PETSC_SUCCESS);
1260: }
1261: if (!sf->multi) {
1262: const PetscInt *indegree;
1263: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1264: PetscSFNode *remote;
1265: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1266: PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1267: PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1268: PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1269: inoffset[0] = 0;
1270: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1271: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1272: PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1273: PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1274: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1275: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1276: for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1277: }
1278: PetscCall(PetscMalloc1(sf->nleaves, &remote));
1279: for (i = 0; i < sf->nleaves; i++) {
1280: remote[i].rank = sf->remote[i].rank;
1281: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1282: }
1283: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1284: sf->multi->multi = sf->multi;
1285: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1286: if (sf->rankorder) { /* Sort the ranks */
1287: PetscMPIInt rank;
1288: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1289: PetscSFNode *newremote;
1290: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1291: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1292: PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1293: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1294: PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1295: PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1296: /* Sort the incoming ranks at each vertex, build the inverse map */
1297: for (i = 0; i < sf->nroots; i++) {
1298: PetscInt j;
1299: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1300: PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1301: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1302: }
1303: PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1304: PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1305: PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1306: for (i = 0; i < sf->nleaves; i++) {
1307: newremote[i].rank = sf->remote[i].rank;
1308: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1309: }
1310: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1311: PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1312: }
1313: PetscCall(PetscFree3(inoffset, outones, outoffset));
1314: }
1315: *multi = sf->multi;
1316: PetscFunctionReturn(PETSC_SUCCESS);
1317: }
1319: /*@C
1320: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1322: Collective
1324: Input Parameters:
1325: + sf - original star forest
1326: . nselected - number of selected roots on this process
1327: - selected - indices of the selected roots on this process
1329: Output Parameter:
1330: . esf - new star forest
1332: Level: advanced
1334: Note:
1335: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1336: be done by calling PetscSFGetGraph().
1338: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1339: @*/
1340: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1341: {
1342: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1343: const PetscInt *ilocal;
1344: signed char *rootdata, *leafdata, *leafmem;
1345: const PetscSFNode *iremote;
1346: PetscSFNode *new_iremote;
1347: MPI_Comm comm;
1349: PetscFunctionBegin;
1351: PetscSFCheckGraphSet(sf, 1);
1352: if (nselected) PetscAssertPointer(selected, 3);
1353: PetscAssertPointer(esf, 4);
1355: PetscCall(PetscSFSetUp(sf));
1356: PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1357: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1358: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1360: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1361: PetscBool dups;
1362: PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1363: PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1364: for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1365: }
1367: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1368: else {
1369: /* A generic version of creating embedded sf */
1370: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1371: maxlocal = maxleaf - minleaf + 1;
1372: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1373: leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1374: /* Tag selected roots and bcast to leaves */
1375: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1376: PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1377: PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1379: /* Build esf with leaves that are still connected */
1380: esf_nleaves = 0;
1381: for (i = 0; i < nleaves; i++) {
1382: j = ilocal ? ilocal[i] : i;
1383: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1384: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1385: */
1386: esf_nleaves += (leafdata[j] ? 1 : 0);
1387: }
1388: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1389: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1390: for (i = n = 0; i < nleaves; i++) {
1391: j = ilocal ? ilocal[i] : i;
1392: if (leafdata[j]) {
1393: new_ilocal[n] = j;
1394: new_iremote[n].rank = iremote[i].rank;
1395: new_iremote[n].index = iremote[i].index;
1396: ++n;
1397: }
1398: }
1399: PetscCall(PetscSFCreate(comm, esf));
1400: PetscCall(PetscSFSetFromOptions(*esf));
1401: PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1402: PetscCall(PetscFree2(rootdata, leafmem));
1403: }
1404: PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1405: PetscFunctionReturn(PETSC_SUCCESS);
1406: }
1408: /*@C
1409: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1411: Collective
1413: Input Parameters:
1414: + sf - original star forest
1415: . nselected - number of selected leaves on this process
1416: - selected - indices of the selected leaves on this process
1418: Output Parameter:
1419: . newsf - new star forest
1421: Level: advanced
1423: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1424: @*/
1425: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1426: {
1427: const PetscSFNode *iremote;
1428: PetscSFNode *new_iremote;
1429: const PetscInt *ilocal;
1430: PetscInt i, nroots, *leaves, *new_ilocal;
1431: MPI_Comm comm;
1433: PetscFunctionBegin;
1435: PetscSFCheckGraphSet(sf, 1);
1436: if (nselected) PetscAssertPointer(selected, 3);
1437: PetscAssertPointer(newsf, 4);
1439: /* Uniq selected[] and put results in leaves[] */
1440: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1441: PetscCall(PetscMalloc1(nselected, &leaves));
1442: PetscCall(PetscArraycpy(leaves, selected, nselected));
1443: PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1444: PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1446: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1447: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1448: else {
1449: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1450: PetscCall(PetscMalloc1(nselected, &new_ilocal));
1451: PetscCall(PetscMalloc1(nselected, &new_iremote));
1452: for (i = 0; i < nselected; ++i) {
1453: const PetscInt l = leaves[i];
1454: new_ilocal[i] = ilocal ? ilocal[l] : l;
1455: new_iremote[i].rank = iremote[l].rank;
1456: new_iremote[i].index = iremote[l].index;
1457: }
1458: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1459: PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1460: }
1461: PetscCall(PetscFree(leaves));
1462: PetscFunctionReturn(PETSC_SUCCESS);
1463: }
1465: /*@C
1466: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1468: Collective
1470: Input Parameters:
1471: + sf - star forest on which to communicate
1472: . unit - data type associated with each node
1473: . rootdata - buffer to broadcast
1474: - op - operation to use for reduction
1476: Output Parameter:
1477: . leafdata - buffer to be reduced with values from each leaf's respective root
1479: Level: intermediate
1481: Note:
1482: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1483: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1484: use `PetscSFBcastWithMemTypeBegin()` instead.
1486: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1487: @*/
1488: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1489: {
1490: PetscMemType rootmtype, leafmtype;
1492: PetscFunctionBegin;
1494: PetscCall(PetscSFSetUp(sf));
1495: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1496: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1497: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1498: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1499: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1500: PetscFunctionReturn(PETSC_SUCCESS);
1501: }
1503: /*@C
1504: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1505: to `PetscSFBcastEnd()`
1507: Collective
1509: Input Parameters:
1510: + sf - star forest on which to communicate
1511: . unit - data type associated with each node
1512: . rootmtype - memory type of rootdata
1513: . rootdata - buffer to broadcast
1514: . leafmtype - memory type of leafdata
1515: - op - operation to use for reduction
1517: Output Parameter:
1518: . leafdata - buffer to be reduced with values from each leaf's respective root
1520: Level: intermediate
1522: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1523: @*/
1524: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1525: {
1526: PetscFunctionBegin;
1528: PetscCall(PetscSFSetUp(sf));
1529: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1530: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1531: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1532: PetscFunctionReturn(PETSC_SUCCESS);
1533: }
1535: /*@C
1536: PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1538: Collective
1540: Input Parameters:
1541: + sf - star forest
1542: . unit - data type
1543: . rootdata - buffer to broadcast
1544: - op - operation to use for reduction
1546: Output Parameter:
1547: . leafdata - buffer to be reduced with values from each leaf's respective root
1549: Level: intermediate
1551: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1552: @*/
1553: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1554: {
1555: PetscFunctionBegin;
1557: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1558: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1559: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1560: PetscFunctionReturn(PETSC_SUCCESS);
1561: }
1563: /*@C
1564: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1566: Collective
1568: Input Parameters:
1569: + sf - star forest
1570: . unit - data type
1571: . leafdata - values to reduce
1572: - op - reduction operation
1574: Output Parameter:
1575: . rootdata - result of reduction of values from all leaves of each root
1577: Level: intermediate
1579: Note:
1580: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1581: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1582: use `PetscSFReduceWithMemTypeBegin()` instead.
1584: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1585: @*/
1586: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1587: {
1588: PetscMemType rootmtype, leafmtype;
1590: PetscFunctionBegin;
1592: PetscCall(PetscSFSetUp(sf));
1593: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1594: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1595: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1596: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1597: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1598: PetscFunctionReturn(PETSC_SUCCESS);
1599: }
1601: /*@C
1602: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1604: Collective
1606: Input Parameters:
1607: + sf - star forest
1608: . unit - data type
1609: . leafmtype - memory type of leafdata
1610: . leafdata - values to reduce
1611: . rootmtype - memory type of rootdata
1612: - op - reduction operation
1614: Output Parameter:
1615: . rootdata - result of reduction of values from all leaves of each root
1617: Level: intermediate
1619: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1620: @*/
1621: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1622: {
1623: PetscFunctionBegin;
1625: PetscCall(PetscSFSetUp(sf));
1626: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1627: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1628: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1629: PetscFunctionReturn(PETSC_SUCCESS);
1630: }
1632: /*@C
1633: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1635: Collective
1637: Input Parameters:
1638: + sf - star forest
1639: . unit - data type
1640: . leafdata - values to reduce
1641: - op - reduction operation
1643: Output Parameter:
1644: . rootdata - result of reduction of values from all leaves of each root
1646: Level: intermediate
1648: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1649: @*/
1650: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1651: {
1652: PetscFunctionBegin;
1654: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1655: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1656: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1657: PetscFunctionReturn(PETSC_SUCCESS);
1658: }
1660: /*@C
1661: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1662: to be completed with `PetscSFFetchAndOpEnd()`
1664: Collective
1666: Input Parameters:
1667: + sf - star forest
1668: . unit - data type
1669: . leafdata - leaf values to use in reduction
1670: - op - operation to use for reduction
1672: Output Parameters:
1673: + rootdata - root values to be updated, input state is seen by first process to perform an update
1674: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1676: Level: advanced
1678: Note:
1679: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1680: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1681: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1682: integers.
1684: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1685: @*/
1686: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1687: {
1688: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1690: PetscFunctionBegin;
1692: PetscCall(PetscSFSetUp(sf));
1693: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1694: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1695: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1696: PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1697: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1698: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1699: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1700: PetscFunctionReturn(PETSC_SUCCESS);
1701: }
1703: /*@C
1704: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1705: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1707: Collective
1709: Input Parameters:
1710: + sf - star forest
1711: . unit - data type
1712: . rootmtype - memory type of rootdata
1713: . leafmtype - memory type of leafdata
1714: . leafdata - leaf values to use in reduction
1715: . leafupdatemtype - memory type of leafupdate
1716: - op - operation to use for reduction
1718: Output Parameters:
1719: + rootdata - root values to be updated, input state is seen by first process to perform an update
1720: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1722: Level: advanced
1724: Note:
1725: See `PetscSFFetchAndOpBegin()` for more details.
1727: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1728: @*/
1729: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1730: {
1731: PetscFunctionBegin;
1733: PetscCall(PetscSFSetUp(sf));
1734: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1735: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1736: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1737: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1738: PetscFunctionReturn(PETSC_SUCCESS);
1739: }
1741: /*@C
1742: PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1743: to fetch values from roots and update atomically by applying operation using my leaf value
1745: Collective
1747: Input Parameters:
1748: + sf - star forest
1749: . unit - data type
1750: . leafdata - leaf values to use in reduction
1751: - op - operation to use for reduction
1753: Output Parameters:
1754: + rootdata - root values to be updated, input state is seen by first process to perform an update
1755: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1757: Level: advanced
1759: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1760: @*/
1761: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1762: {
1763: PetscFunctionBegin;
1765: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1766: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1767: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1768: PetscFunctionReturn(PETSC_SUCCESS);
1769: }
1771: /*@C
1772: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1774: Collective
1776: Input Parameter:
1777: . sf - star forest
1779: Output Parameter:
1780: . degree - degree of each root vertex
1782: Level: advanced
1784: Note:
1785: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1787: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1788: @*/
1789: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1790: {
1791: PetscFunctionBegin;
1793: PetscSFCheckGraphSet(sf, 1);
1794: PetscAssertPointer(degree, 2);
1795: if (!sf->degreeknown) {
1796: PetscInt i, nroots = sf->nroots, maxlocal;
1797: PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1798: maxlocal = sf->maxleaf - sf->minleaf + 1;
1799: PetscCall(PetscMalloc1(nroots, &sf->degree));
1800: PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1801: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1802: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1803: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1804: }
1805: *degree = NULL;
1806: PetscFunctionReturn(PETSC_SUCCESS);
1807: }
1809: /*@C
1810: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1812: Collective
1814: Input Parameter:
1815: . sf - star forest
1817: Output Parameter:
1818: . degree - degree of each root vertex
1820: Level: developer
1822: Note:
1823: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1825: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1826: @*/
1827: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1828: {
1829: PetscFunctionBegin;
1831: PetscSFCheckGraphSet(sf, 1);
1832: PetscAssertPointer(degree, 2);
1833: if (!sf->degreeknown) {
1834: PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1835: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1836: PetscCall(PetscFree(sf->degreetmp));
1837: sf->degreeknown = PETSC_TRUE;
1838: }
1839: *degree = sf->degree;
1840: PetscFunctionReturn(PETSC_SUCCESS);
1841: }
1843: /*@C
1844: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1845: Each multi-root is assigned index of the corresponding original root.
1847: Collective
1849: Input Parameters:
1850: + sf - star forest
1851: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1853: Output Parameters:
1854: + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1855: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1857: Level: developer
1859: Note:
1860: The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1862: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1863: @*/
1864: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1865: {
1866: PetscSF msf;
1867: PetscInt k = 0, nroots, nmroots;
1869: PetscFunctionBegin;
1871: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1872: if (nroots) PetscAssertPointer(degree, 2);
1873: if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1874: PetscAssertPointer(multiRootsOrigNumbering, 4);
1875: PetscCall(PetscSFGetMultiSF(sf, &msf));
1876: PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1877: PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1878: for (PetscInt i = 0; i < nroots; i++) {
1879: if (!degree[i]) continue;
1880: for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1881: }
1882: PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1883: if (nMultiRoots) *nMultiRoots = nmroots;
1884: PetscFunctionReturn(PETSC_SUCCESS);
1885: }
1887: /*@C
1888: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1890: Collective
1892: Input Parameters:
1893: + sf - star forest
1894: . unit - data type
1895: - leafdata - leaf data to gather to roots
1897: Output Parameter:
1898: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1900: Level: intermediate
1902: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1903: @*/
1904: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1905: {
1906: PetscSF multi = NULL;
1908: PetscFunctionBegin;
1910: PetscCall(PetscSFSetUp(sf));
1911: PetscCall(PetscSFGetMultiSF(sf, &multi));
1912: PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1913: PetscFunctionReturn(PETSC_SUCCESS);
1914: }
1916: /*@C
1917: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1919: Collective
1921: Input Parameters:
1922: + sf - star forest
1923: . unit - data type
1924: - leafdata - leaf data to gather to roots
1926: Output Parameter:
1927: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1929: Level: intermediate
1931: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1932: @*/
1933: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1934: {
1935: PetscSF multi = NULL;
1937: PetscFunctionBegin;
1939: PetscCall(PetscSFGetMultiSF(sf, &multi));
1940: PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1941: PetscFunctionReturn(PETSC_SUCCESS);
1942: }
1944: /*@C
1945: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1947: Collective
1949: Input Parameters:
1950: + sf - star forest
1951: . unit - data type
1952: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1954: Output Parameter:
1955: . leafdata - leaf data to be update with personal data from each respective root
1957: Level: intermediate
1959: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1960: @*/
1961: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1962: {
1963: PetscSF multi = NULL;
1965: PetscFunctionBegin;
1967: PetscCall(PetscSFSetUp(sf));
1968: PetscCall(PetscSFGetMultiSF(sf, &multi));
1969: PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1970: PetscFunctionReturn(PETSC_SUCCESS);
1971: }
1973: /*@C
1974: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1976: Collective
1978: Input Parameters:
1979: + sf - star forest
1980: . unit - data type
1981: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1983: Output Parameter:
1984: . leafdata - leaf data to be update with personal data from each respective root
1986: Level: intermediate
1988: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1989: @*/
1990: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1991: {
1992: PetscSF multi = NULL;
1994: PetscFunctionBegin;
1996: PetscCall(PetscSFGetMultiSF(sf, &multi));
1997: PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1998: PetscFunctionReturn(PETSC_SUCCESS);
1999: }
2001: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2002: {
2003: PetscInt i, n, nleaves;
2004: const PetscInt *ilocal = NULL;
2005: PetscHSetI seen;
2007: PetscFunctionBegin;
2008: if (PetscDefined(USE_DEBUG)) {
2009: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2010: PetscCall(PetscHSetICreate(&seen));
2011: for (i = 0; i < nleaves; i++) {
2012: const PetscInt leaf = ilocal ? ilocal[i] : i;
2013: PetscCall(PetscHSetIAdd(seen, leaf));
2014: }
2015: PetscCall(PetscHSetIGetSize(seen, &n));
2016: PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2017: PetscCall(PetscHSetIDestroy(&seen));
2018: }
2019: PetscFunctionReturn(PETSC_SUCCESS);
2020: }
2022: /*@
2023: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2025: Input Parameters:
2026: + sfA - The first `PetscSF`
2027: - sfB - The second `PetscSF`
2029: Output Parameter:
2030: . sfBA - The composite `PetscSF`
2032: Level: developer
2034: Notes:
2035: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2036: forests, i.e. the same leaf is not connected with different roots.
2038: `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2039: a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2040: nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2041: `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2043: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2044: @*/
2045: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2046: {
2047: const PetscSFNode *remotePointsA, *remotePointsB;
2048: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2049: const PetscInt *localPointsA, *localPointsB;
2050: PetscInt *localPointsBA;
2051: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2052: PetscBool denseB;
2054: PetscFunctionBegin;
2056: PetscSFCheckGraphSet(sfA, 1);
2058: PetscSFCheckGraphSet(sfB, 2);
2059: PetscCheckSameComm(sfA, 1, sfB, 2);
2060: PetscAssertPointer(sfBA, 3);
2061: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2062: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2064: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2065: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2066: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2067: numRootsB; otherwise, garbage will be broadcasted.
2068: Example (comm size = 1):
2069: sfA: 0 <- (0, 0)
2070: sfB: 100 <- (0, 0)
2071: 101 <- (0, 1)
2072: Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2073: of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2074: receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2075: remotePointsA; if not recasted, point 101 would receive a garbage value. */
2076: PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2077: for (i = 0; i < numRootsB; i++) {
2078: reorderedRemotePointsA[i].rank = -1;
2079: reorderedRemotePointsA[i].index = -1;
2080: }
2081: for (i = 0; i < numLeavesA; i++) {
2082: PetscInt localp = localPointsA ? localPointsA[i] : i;
2084: if (localp >= numRootsB) continue;
2085: reorderedRemotePointsA[localp] = remotePointsA[i];
2086: }
2087: remotePointsA = reorderedRemotePointsA;
2088: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2089: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2090: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2091: leafdataB[i].rank = -1;
2092: leafdataB[i].index = -1;
2093: }
2094: PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2095: PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2096: PetscCall(PetscFree(reorderedRemotePointsA));
2098: denseB = (PetscBool)!localPointsB;
2099: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2100: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2101: else numLeavesBA++;
2102: }
2103: if (denseB) {
2104: localPointsBA = NULL;
2105: remotePointsBA = leafdataB;
2106: } else {
2107: PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2108: PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2109: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2110: const PetscInt l = localPointsB ? localPointsB[i] : i;
2112: if (leafdataB[l - minleaf].rank == -1) continue;
2113: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2114: localPointsBA[numLeavesBA] = l;
2115: numLeavesBA++;
2116: }
2117: PetscCall(PetscFree(leafdataB));
2118: }
2119: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2120: PetscCall(PetscSFSetFromOptions(*sfBA));
2121: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2122: PetscFunctionReturn(PETSC_SUCCESS);
2123: }
2125: /*@
2126: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2128: Input Parameters:
2129: + sfA - The first `PetscSF`
2130: - sfB - The second `PetscSF`
2132: Output Parameter:
2133: . sfBA - The composite `PetscSF`.
2135: Level: developer
2137: Notes:
2138: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2139: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2140: second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2142: `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2143: a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2144: roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2145: on `sfA`, then
2146: a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2148: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2149: @*/
2150: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2151: {
2152: const PetscSFNode *remotePointsA, *remotePointsB;
2153: PetscSFNode *remotePointsBA;
2154: const PetscInt *localPointsA, *localPointsB;
2155: PetscSFNode *reorderedRemotePointsA = NULL;
2156: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2157: MPI_Op op;
2158: #if defined(PETSC_USE_64BIT_INDICES)
2159: PetscBool iswin;
2160: #endif
2162: PetscFunctionBegin;
2164: PetscSFCheckGraphSet(sfA, 1);
2166: PetscSFCheckGraphSet(sfB, 2);
2167: PetscCheckSameComm(sfA, 1, sfB, 2);
2168: PetscAssertPointer(sfBA, 3);
2169: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2170: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2172: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2173: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2175: /* TODO: Check roots of sfB have degree of 1 */
2176: /* Once we implement it, we can replace the MPI_MAXLOC
2177: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2178: We use MPI_MAXLOC only to have a deterministic output from this routine if
2179: the root condition is not meet.
2180: */
2181: op = MPI_MAXLOC;
2182: #if defined(PETSC_USE_64BIT_INDICES)
2183: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2184: PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2185: if (iswin) op = MPI_REPLACE;
2186: #endif
2188: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2189: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2190: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2191: reorderedRemotePointsA[i].rank = -1;
2192: reorderedRemotePointsA[i].index = -1;
2193: }
2194: if (localPointsA) {
2195: for (i = 0; i < numLeavesA; i++) {
2196: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2197: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2198: }
2199: } else {
2200: for (i = 0; i < numLeavesA; i++) {
2201: if (i > maxleaf || i < minleaf) continue;
2202: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2203: }
2204: }
2206: PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2207: PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2208: for (i = 0; i < numRootsB; i++) {
2209: remotePointsBA[i].rank = -1;
2210: remotePointsBA[i].index = -1;
2211: }
2213: PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2214: PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2215: PetscCall(PetscFree(reorderedRemotePointsA));
2216: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2217: if (remotePointsBA[i].rank == -1) continue;
2218: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2219: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2220: localPointsBA[numLeavesBA] = i;
2221: numLeavesBA++;
2222: }
2223: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2224: PetscCall(PetscSFSetFromOptions(*sfBA));
2225: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2226: PetscFunctionReturn(PETSC_SUCCESS);
2227: }
2229: /*
2230: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2232: Input Parameter:
2233: . sf - The global `PetscSF`
2235: Output Parameter:
2236: . out - The local `PetscSF`
2238: .seealso: `PetscSF`, `PetscSFCreate()`
2239: */
2240: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2241: {
2242: MPI_Comm comm;
2243: PetscMPIInt myrank;
2244: const PetscInt *ilocal;
2245: const PetscSFNode *iremote;
2246: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2247: PetscSFNode *liremote;
2248: PetscSF lsf;
2250: PetscFunctionBegin;
2252: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2253: else {
2254: PetscMPIInt irank;
2256: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2257: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2258: PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2260: /* Find out local edges and build a local SF */
2261: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2262: for (i = lnleaves = 0; i < nleaves; i++) {
2263: PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2264: if (irank == myrank) lnleaves++;
2265: }
2266: PetscCall(PetscMalloc1(lnleaves, &lilocal));
2267: PetscCall(PetscMalloc1(lnleaves, &liremote));
2269: for (i = j = 0; i < nleaves; i++) {
2270: PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2271: if (irank == myrank) {
2272: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2273: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2274: liremote[j].index = iremote[i].index;
2275: j++;
2276: }
2277: }
2278: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2279: PetscCall(PetscSFSetFromOptions(lsf));
2280: PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2281: PetscCall(PetscSFSetUp(lsf));
2282: *out = lsf;
2283: }
2284: PetscFunctionReturn(PETSC_SUCCESS);
2285: }
2287: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2288: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2289: {
2290: PetscMemType rootmtype, leafmtype;
2292: PetscFunctionBegin;
2294: PetscCall(PetscSFSetUp(sf));
2295: PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2296: PetscCall(PetscGetMemType(rootdata, &rootmtype));
2297: PetscCall(PetscGetMemType(leafdata, &leafmtype));
2298: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2299: PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2300: PetscFunctionReturn(PETSC_SUCCESS);
2301: }
2303: /*@
2304: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2306: Input Parameters:
2307: + comm - the communicator
2308: . nsfs - the number of input `PetscSF`
2309: . sfs - the array of input `PetscSF`
2310: . rootMode - the root mode specifying how roots are handled
2311: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2313: Output Parameter:
2314: . newsf - The resulting `PetscSF`
2316: Level: advanced
2318: Notes:
2319: The communicator of all `PetscSF`s in `sfs` must be comm.
2321: Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2323: The offsets in `leafOffsets` are added to the original leaf indices.
2325: If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2326: In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2328: If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2329: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2331: All root modes retain the essential connectivity condition.
2332: If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2333: Parameter `rootMode` controls how the input root spaces are combined.
2334: For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2335: and is also the same in the output `PetscSF`.
2336: For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2337: `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2338: roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2339: `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2340: roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2341: the original root ranks are ignored.
2342: For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2343: the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2344: to keep the load balancing.
2345: However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2347: Example:
2348: We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2349: .vb
2350: make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2351: for m in {local,global,shared}; do
2352: mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2353: done
2354: .ve
2355: we generate two identical `PetscSF`s sf_0 and sf_1,
2356: .vb
2357: PetscSF Object: sf_0 2 MPI processes
2358: type: basic
2359: rank #leaves #roots
2360: [ 0] 4 2
2361: [ 1] 4 2
2362: leaves roots roots in global numbering
2363: ( 0, 0) <- ( 0, 0) = 0
2364: ( 0, 1) <- ( 0, 1) = 1
2365: ( 0, 2) <- ( 1, 0) = 2
2366: ( 0, 3) <- ( 1, 1) = 3
2367: ( 1, 0) <- ( 0, 0) = 0
2368: ( 1, 1) <- ( 0, 1) = 1
2369: ( 1, 2) <- ( 1, 0) = 2
2370: ( 1, 3) <- ( 1, 1) = 3
2371: .ve
2372: and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2373: .vb
2374: rootMode = local:
2375: PetscSF Object: result_sf 2 MPI processes
2376: type: basic
2377: rank #leaves #roots
2378: [ 0] 8 4
2379: [ 1] 8 4
2380: leaves roots roots in global numbering
2381: ( 0, 0) <- ( 0, 0) = 0
2382: ( 0, 1) <- ( 0, 1) = 1
2383: ( 0, 2) <- ( 1, 0) = 4
2384: ( 0, 3) <- ( 1, 1) = 5
2385: ( 0, 4) <- ( 0, 2) = 2
2386: ( 0, 5) <- ( 0, 3) = 3
2387: ( 0, 6) <- ( 1, 2) = 6
2388: ( 0, 7) <- ( 1, 3) = 7
2389: ( 1, 0) <- ( 0, 0) = 0
2390: ( 1, 1) <- ( 0, 1) = 1
2391: ( 1, 2) <- ( 1, 0) = 4
2392: ( 1, 3) <- ( 1, 1) = 5
2393: ( 1, 4) <- ( 0, 2) = 2
2394: ( 1, 5) <- ( 0, 3) = 3
2395: ( 1, 6) <- ( 1, 2) = 6
2396: ( 1, 7) <- ( 1, 3) = 7
2398: rootMode = global:
2399: PetscSF Object: result_sf 2 MPI processes
2400: type: basic
2401: rank #leaves #roots
2402: [ 0] 8 4
2403: [ 1] 8 4
2404: leaves roots roots in global numbering
2405: ( 0, 0) <- ( 0, 0) = 0
2406: ( 0, 1) <- ( 0, 1) = 1
2407: ( 0, 2) <- ( 0, 2) = 2
2408: ( 0, 3) <- ( 0, 3) = 3
2409: ( 0, 4) <- ( 1, 0) = 4
2410: ( 0, 5) <- ( 1, 1) = 5
2411: ( 0, 6) <- ( 1, 2) = 6
2412: ( 0, 7) <- ( 1, 3) = 7
2413: ( 1, 0) <- ( 0, 0) = 0
2414: ( 1, 1) <- ( 0, 1) = 1
2415: ( 1, 2) <- ( 0, 2) = 2
2416: ( 1, 3) <- ( 0, 3) = 3
2417: ( 1, 4) <- ( 1, 0) = 4
2418: ( 1, 5) <- ( 1, 1) = 5
2419: ( 1, 6) <- ( 1, 2) = 6
2420: ( 1, 7) <- ( 1, 3) = 7
2422: rootMode = shared:
2423: PetscSF Object: result_sf 2 MPI processes
2424: type: basic
2425: rank #leaves #roots
2426: [ 0] 8 2
2427: [ 1] 8 2
2428: leaves roots roots in global numbering
2429: ( 0, 0) <- ( 0, 0) = 0
2430: ( 0, 1) <- ( 0, 1) = 1
2431: ( 0, 2) <- ( 1, 0) = 2
2432: ( 0, 3) <- ( 1, 1) = 3
2433: ( 0, 4) <- ( 0, 0) = 0
2434: ( 0, 5) <- ( 0, 1) = 1
2435: ( 0, 6) <- ( 1, 0) = 2
2436: ( 0, 7) <- ( 1, 1) = 3
2437: ( 1, 0) <- ( 0, 0) = 0
2438: ( 1, 1) <- ( 0, 1) = 1
2439: ( 1, 2) <- ( 1, 0) = 2
2440: ( 1, 3) <- ( 1, 1) = 3
2441: ( 1, 4) <- ( 0, 0) = 0
2442: ( 1, 5) <- ( 0, 1) = 1
2443: ( 1, 6) <- ( 1, 0) = 2
2444: ( 1, 7) <- ( 1, 1) = 3
2445: .ve
2447: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2448: @*/
2449: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2450: {
2451: PetscInt i, s, nLeaves, nRoots;
2452: PetscInt *leafArrayOffsets;
2453: PetscInt *ilocal_new;
2454: PetscSFNode *iremote_new;
2455: PetscBool all_ilocal_null = PETSC_FALSE;
2456: PetscLayout glayout = NULL;
2457: PetscInt *gremote = NULL;
2458: PetscMPIInt rank, size;
2460: PetscFunctionBegin;
2461: if (PetscDefined(USE_DEBUG)) {
2462: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2464: PetscCall(PetscSFCreate(comm, &dummy));
2466: PetscAssertPointer(sfs, 3);
2467: for (i = 0; i < nsfs; i++) {
2469: PetscCheckSameComm(dummy, 1, sfs[i], 3);
2470: }
2472: if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2473: PetscAssertPointer(newsf, 6);
2474: PetscCall(PetscSFDestroy(&dummy));
2475: }
2476: if (!nsfs) {
2477: PetscCall(PetscSFCreate(comm, newsf));
2478: PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2479: PetscFunctionReturn(PETSC_SUCCESS);
2480: }
2481: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2482: PetscCallMPI(MPI_Comm_size(comm, &size));
2484: /* Calculate leaf array offsets */
2485: PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2486: leafArrayOffsets[0] = 0;
2487: for (s = 0; s < nsfs; s++) {
2488: PetscInt nl;
2490: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2491: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2492: }
2493: nLeaves = leafArrayOffsets[nsfs];
2495: /* Calculate number of roots */
2496: switch (rootMode) {
2497: case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2498: PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2499: if (PetscDefined(USE_DEBUG)) {
2500: for (s = 1; s < nsfs; s++) {
2501: PetscInt nr;
2503: PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2504: PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2505: }
2506: }
2507: } break;
2508: case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2509: /* Calculate also global layout in this case */
2510: PetscInt *nls;
2511: PetscLayout *lts;
2512: PetscInt **inds;
2513: PetscInt j;
2514: PetscInt rootOffset = 0;
2516: PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
2517: PetscCall(PetscLayoutCreate(comm, &glayout));
2518: glayout->bs = 1;
2519: glayout->n = 0;
2520: glayout->N = 0;
2521: for (s = 0; s < nsfs; s++) {
2522: PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
2523: glayout->n += lts[s]->n;
2524: glayout->N += lts[s]->N;
2525: }
2526: PetscCall(PetscLayoutSetUp(glayout));
2527: PetscCall(PetscMalloc1(nLeaves, &gremote));
2528: for (s = 0, j = 0; s < nsfs; s++) {
2529: for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2530: rootOffset += lts[s]->N;
2531: PetscCall(PetscLayoutDestroy(<s[s]));
2532: PetscCall(PetscFree(inds[s]));
2533: }
2534: PetscCall(PetscFree3(lts, nls, inds));
2535: nRoots = glayout->N;
2536: } break;
2537: case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2538: /* nRoots calculated later in this case */
2539: break;
2540: default:
2541: SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2542: }
2544: if (!leafOffsets) {
2545: all_ilocal_null = PETSC_TRUE;
2546: for (s = 0; s < nsfs; s++) {
2547: const PetscInt *ilocal;
2549: PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2550: if (ilocal) {
2551: all_ilocal_null = PETSC_FALSE;
2552: break;
2553: }
2554: }
2555: PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2556: }
2558: /* Renumber and concatenate local leaves */
2559: ilocal_new = NULL;
2560: if (!all_ilocal_null) {
2561: PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2562: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2563: for (s = 0; s < nsfs; s++) {
2564: const PetscInt *ilocal;
2565: PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2566: PetscInt i, nleaves_l;
2568: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2569: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2570: }
2571: }
2573: /* Renumber and concatenate remote roots */
2574: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2575: PetscInt rootOffset = 0;
2577: PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2578: for (i = 0; i < nLeaves; i++) {
2579: iremote_new[i].rank = -1;
2580: iremote_new[i].index = -1;
2581: }
2582: for (s = 0; s < nsfs; s++) {
2583: PetscInt i, nl, nr;
2584: PetscSF tmp_sf;
2585: const PetscSFNode *iremote;
2586: PetscSFNode *tmp_rootdata;
2587: PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2589: PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2590: PetscCall(PetscSFCreate(comm, &tmp_sf));
2591: /* create helper SF with contiguous leaves */
2592: PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2593: PetscCall(PetscSFSetUp(tmp_sf));
2594: PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2595: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2596: for (i = 0; i < nr; i++) {
2597: tmp_rootdata[i].index = i + rootOffset;
2598: tmp_rootdata[i].rank = rank;
2599: }
2600: rootOffset += nr;
2601: } else {
2602: for (i = 0; i < nr; i++) {
2603: tmp_rootdata[i].index = i;
2604: tmp_rootdata[i].rank = rank;
2605: }
2606: }
2607: PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2608: PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609: PetscCall(PetscSFDestroy(&tmp_sf));
2610: PetscCall(PetscFree(tmp_rootdata));
2611: }
2612: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2614: /* Build the new SF */
2615: PetscCall(PetscSFCreate(comm, newsf));
2616: PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2617: } else {
2618: /* Build the new SF */
2619: PetscCall(PetscSFCreate(comm, newsf));
2620: PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2621: }
2622: PetscCall(PetscSFSetUp(*newsf));
2623: PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2624: PetscCall(PetscLayoutDestroy(&glayout));
2625: PetscCall(PetscFree(gremote));
2626: PetscCall(PetscFree(leafArrayOffsets));
2627: PetscFunctionReturn(PETSC_SUCCESS);
2628: }
2630: /*@
2631: PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2633: Collective
2635: Input Parameters:
2636: + sf - star forest
2637: . unit - the data type contained within the root and leaf data
2638: . rootdata - root data that will be used for multiple PetscSF communications
2639: - leafdata - leaf data that will be used for multiple PetscSF communications
2641: Level: advanced
2643: Notes:
2644: Implementations of `PetscSF` can make optimizations
2645: for repeated communication using the same memory regions, but these optimizations
2646: can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2647: The intended pattern is
2649: .vb
2650: PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2652: PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2653: // repeated use of rootdata and leafdata will now be optimized
2655: PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2656: PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657: // ...
2658: PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2659: PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2660: // ... (other communications)
2662: // rootdata and leafdata must be deregistered before freeing
2663: // skipping this can lead to undefined behavior including
2664: // deadlocks
2665: PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2667: // it is now safe to free rootdata and leafdata
2668: PetscFree2(rootdata, leafdata);
2669: .ve
2671: If you do not register `rootdata` and `leafdata` it will not cause an error,
2672: but optimizations that reduce the setup time for each communication cannot be
2673: made. Currently, the only implementation of `PetscSF` that benefits from
2674: `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default
2675: `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2677: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2678: @*/
2679: PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2680: {
2681: PetscFunctionBegin;
2683: PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2684: PetscFunctionReturn(PETSC_SUCCESS);
2685: }
2687: /*@
2688: PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2690: Collective
2692: Input Parameters:
2693: + sf - star forest
2694: . unit - the data type contained within the root and leaf data
2695: . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2696: - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2698: Level: advanced
2700: Note:
2701: See `PetscSFRegisterPersistent()` for when/how to use this function.
2703: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2704: @*/
2705: PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2706: {
2707: PetscFunctionBegin;
2709: PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2710: PetscFunctionReturn(PETSC_SUCCESS);
2711: }
2713: PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2714: {
2715: MPI_Aint lb, lb_true, bytes, bytes_true;
2717: PetscFunctionBegin;
2718: PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2719: PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2720: PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2721: *size = bytes;
2722: PetscFunctionReturn(PETSC_SUCCESS);
2723: }