Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petsc/private/hashmapi.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #include <petscdevice_cuda.h>
9: #endif
11: #if defined(PETSC_HAVE_HIP)
12: #include <hip/hip_runtime.h>
13: #endif
15: #if defined(PETSC_CLANG_STATIC_ANALYZER)
16: extern void PetscSFCheckGraphSet(PetscSF, int);
17: #else
18: #if defined(PETSC_USE_DEBUG)
19: #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20: #else
21: #define PetscSFCheckGraphSet(sf, arg) \
22: do { \
23: } while (0)
24: #endif
25: #endif
27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28: const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
30: /*@
31: PetscSFCreate - create a star forest communication context
33: Collective
35: Input Parameter:
36: . comm - communicator on which the star forest will operate
38: Output Parameter:
39: . sf - new star forest context
41: Options Database Key:
42: + -sf_type basic - Use MPI persistent Isend/Irecv for communication (Default)
43: . -sf_type window - Use MPI-3 one-sided window for communication
44: . -sf_type neighbor - Use MPI-3 neighborhood collectives for communication
45: - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
47: Level: intermediate
49: Note:
50: When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51: `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52: `SF`s are optimized and they have better performance than the general `SF`s.
54: .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55: @*/
56: PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57: {
58: PetscSF b;
60: PetscFunctionBegin;
61: PetscAssertPointer(sf, 2);
62: PetscCall(PetscSFInitializePackage());
64: PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
65: b->nroots = -1;
66: b->nleaves = -1;
67: b->minleaf = PETSC_INT_MAX;
68: b->maxleaf = PETSC_INT_MIN;
69: b->nranks = -1;
70: b->rankorder = PETSC_TRUE;
71: b->ingroup = MPI_GROUP_NULL;
72: b->outgroup = MPI_GROUP_NULL;
73: b->graphset = PETSC_FALSE;
74: #if defined(PETSC_HAVE_DEVICE)
75: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
76: b->use_stream_aware_mpi = PETSC_FALSE;
77: b->unknown_input_stream = PETSC_FALSE;
78: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79: b->backend = PETSCSF_BACKEND_KOKKOS;
80: #elif defined(PETSC_HAVE_CUDA)
81: b->backend = PETSCSF_BACKEND_CUDA;
82: #elif defined(PETSC_HAVE_HIP)
83: b->backend = PETSCSF_BACKEND_HIP;
84: #endif
86: #if defined(PETSC_HAVE_NVSHMEM)
87: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
88: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
90: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
91: #endif
92: #endif
93: b->vscat.from_n = -1;
94: b->vscat.to_n = -1;
95: b->vscat.unit = MPIU_SCALAR;
96: *sf = b;
97: PetscFunctionReturn(PETSC_SUCCESS);
98: }
100: /*@
101: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
103: Collective
105: Input Parameter:
106: . sf - star forest
108: Level: advanced
110: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111: @*/
112: PetscErrorCode PetscSFReset(PetscSF sf)
113: {
114: PetscFunctionBegin;
116: PetscTryTypeMethod(sf, Reset);
117: PetscCall(PetscSFDestroy(&sf->rankssf));
119: sf->nroots = -1;
120: sf->nleaves = -1;
121: sf->minleaf = PETSC_INT_MAX;
122: sf->maxleaf = PETSC_INT_MIN;
123: sf->mine = NULL;
124: sf->remote = NULL;
125: sf->graphset = PETSC_FALSE;
126: PetscCall(PetscFree(sf->mine_alloc));
127: PetscCall(PetscFree(sf->remote_alloc));
128: sf->nranks = -1;
129: PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130: sf->degreeknown = PETSC_FALSE;
131: PetscCall(PetscFree(sf->degree));
132: if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133: if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
135: if (sf->multi) sf->multi->multi = NULL;
136: PetscCall(PetscSFDestroy(&sf->multi));
138: PetscCall(PetscLayoutDestroy(&sf->map));
140: #if defined(PETSC_HAVE_DEVICE)
141: for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
142: #endif
144: sf->setupcalled = PETSC_FALSE;
145: PetscFunctionReturn(PETSC_SUCCESS);
146: }
148: /*@
149: PetscSFSetType - Set the `PetscSF` communication implementation
151: Collective
153: Input Parameters:
154: + sf - the `PetscSF` context
155: - type - a known method
156: .vb
157: PETSCSFWINDOW - MPI-2/3 one-sided
158: PETSCSFBASIC - basic implementation using MPI-1 two-sided
159: .ve
161: Options Database Key:
162: . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
164: Level: intermediate
166: Notes:
167: See `PetscSFType` for possible values
169: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
170: @*/
171: PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172: {
173: PetscBool match;
174: PetscErrorCode (*r)(PetscSF);
176: PetscFunctionBegin;
178: PetscAssertPointer(type, 2);
180: PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
181: if (match) PetscFunctionReturn(PETSC_SUCCESS);
183: PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
184: PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
185: /* Destroy the previous PetscSF implementation context */
186: PetscTryTypeMethod(sf, Destroy);
187: PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
188: PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
189: PetscCall((*r)(sf));
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: /*@
194: PetscSFGetType - Get the `PetscSF` communication implementation
196: Not Collective
198: Input Parameter:
199: . sf - the `PetscSF` context
201: Output Parameter:
202: . type - the `PetscSF` type name
204: Level: intermediate
206: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
207: @*/
208: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209: {
210: PetscFunctionBegin;
212: PetscAssertPointer(type, 2);
213: *type = ((PetscObject)sf)->type_name;
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: /*@
218: PetscSFDestroy - destroy a star forest
220: Collective
222: Input Parameter:
223: . sf - address of star forest
225: Level: intermediate
227: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
228: @*/
229: PetscErrorCode PetscSFDestroy(PetscSF *sf)
230: {
231: PetscFunctionBegin;
232: if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
234: if (--((PetscObject)*sf)->refct > 0) {
235: *sf = NULL;
236: PetscFunctionReturn(PETSC_SUCCESS);
237: }
238: PetscCall(PetscSFReset(*sf));
239: PetscTryTypeMethod(*sf, Destroy);
240: PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
241: if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243: if ((*sf)->use_stream_aware_mpi) {
244: PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245: PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246: }
247: #endif
248: PetscCall(PetscHeaderDestroy(sf));
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253: {
254: PetscInt i, nleaves;
255: PetscMPIInt size;
256: const PetscInt *ilocal;
257: const PetscSFNode *iremote;
259: PetscFunctionBegin;
260: if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
261: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
262: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263: for (i = 0; i < nleaves; i++) {
264: const PetscInt rank = iremote[i].rank;
265: const PetscInt remote = iremote[i].index;
266: const PetscInt leaf = ilocal ? ilocal[i] : i;
267: PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
268: PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
269: PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270: }
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: /*@
275: PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
277: Collective
279: Input Parameter:
280: . sf - star forest communication object
282: Level: beginner
284: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
285: @*/
286: PetscErrorCode PetscSFSetUp(PetscSF sf)
287: {
288: PetscFunctionBegin;
290: PetscSFCheckGraphSet(sf, 1);
291: if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
292: PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
293: PetscCall(PetscSFCheckGraphValid_Private(sf));
294: if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295: PetscTryTypeMethod(sf, SetUp);
296: #if defined(PETSC_HAVE_CUDA)
297: if (sf->backend == PETSCSF_BACKEND_CUDA) {
298: sf->ops->Malloc = PetscSFMalloc_CUDA;
299: sf->ops->Free = PetscSFFree_CUDA;
300: }
301: #endif
302: #if defined(PETSC_HAVE_HIP)
303: if (sf->backend == PETSCSF_BACKEND_HIP) {
304: sf->ops->Malloc = PetscSFMalloc_HIP;
305: sf->ops->Free = PetscSFFree_HIP;
306: }
307: #endif
309: #if defined(PETSC_HAVE_KOKKOS)
310: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
311: sf->ops->Malloc = PetscSFMalloc_Kokkos;
312: sf->ops->Free = PetscSFFree_Kokkos;
313: }
314: #endif
315: PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
316: sf->setupcalled = PETSC_TRUE;
317: PetscFunctionReturn(PETSC_SUCCESS);
318: }
320: /*@
321: PetscSFSetFromOptions - set `PetscSF` options using the options database
323: Logically Collective
325: Input Parameter:
326: . sf - star forest
328: Options Database Keys:
329: + -sf_type - implementation type, see `PetscSFSetType()`
330: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
331: . -sf_use_default_stream - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
332: use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
333: If true, this option only works with `-use_gpu_aware_mpi 1`.
334: . -sf_use_stream_aware_mpi - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
335: If true, this option only works with `-use_gpu_aware_mpi 1`.
337: - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
338: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
339: the only available is kokkos.
341: Level: intermediate
343: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
344: @*/
345: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346: {
347: PetscSFType deft;
348: char type[256];
349: PetscBool flg;
351: PetscFunctionBegin;
353: PetscObjectOptionsBegin((PetscObject)sf);
354: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
355: PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
356: PetscCall(PetscSFSetType(sf, flg ? type : deft));
357: PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358: PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
359: #if defined(PETSC_HAVE_DEVICE)
360: {
361: char backendstr[32] = {0};
362: PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363: /* Change the defaults set in PetscSFCreate() with command line options */
364: PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365: PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366: PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367: PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368: PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369: PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374: else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375: #elif defined(PETSC_HAVE_KOKKOS)
376: PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377: #endif
379: #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380: if (sf->use_stream_aware_mpi) {
381: MPI_Info info;
383: PetscCallMPI(MPI_Info_create(&info));
384: PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385: PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386: PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387: PetscCallMPI(MPI_Info_free(&info));
388: PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389: }
390: #endif
391: }
392: #endif
393: PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394: PetscOptionsEnd();
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: /*@
399: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
401: Logically Collective
403: Input Parameters:
404: + sf - star forest
405: - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
407: Level: advanced
409: .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410: @*/
411: PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412: {
413: PetscFunctionBegin;
416: PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417: sf->rankorder = flg;
418: PetscFunctionReturn(PETSC_SUCCESS);
419: }
421: /*@
422: PetscSFSetGraph - Set a parallel star forest
424: Collective
426: Input Parameters:
427: + sf - star forest
428: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
430: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431: during setup in debug mode)
432: . localmode - copy mode for `ilocal`
433: . iremote - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
434: (locations must be >= 0, enforced during setup in debug mode)
435: - remotemode - copy mode for `iremote`
437: Level: intermediate
439: Notes:
440: Leaf indices in `ilocal` must be unique, otherwise an error occurs.
442: Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443: In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444: PETSc might modify the respective array;
445: if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446: Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
448: Fortran Notes:
449: In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
451: Developer Notes:
452: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453: This also allows to compare leaf sets of two `PetscSF`s easily.
455: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456: @*/
457: PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
458: {
459: PetscBool unique, contiguous;
461: PetscFunctionBegin;
463: if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464: if (nleaves > 0) PetscAssertPointer(iremote, 6);
465: PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466: PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467: /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468: * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469: PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470: PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
472: if (sf->nroots >= 0) { /* Reset only if graph already set */
473: PetscCall(PetscSFReset(sf));
474: }
476: PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
477: if (PetscDefined(USE_DEBUG)) {
478: PetscMPIInt size;
480: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
481: for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
482: }
484: sf->nroots = nroots;
485: sf->nleaves = nleaves;
487: if (localmode == PETSC_COPY_VALUES && ilocal) {
488: PetscInt *tlocal = NULL;
490: PetscCall(PetscMalloc1(nleaves, &tlocal));
491: PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
492: ilocal = tlocal;
493: }
494: if (remotemode == PETSC_COPY_VALUES) {
495: PetscSFNode *tremote = NULL;
497: PetscCall(PetscMalloc1(nleaves, &tremote));
498: PetscCall(PetscArraycpy(tremote, iremote, nleaves));
499: iremote = tremote;
500: }
502: if (nleaves && ilocal) {
503: PetscSFNode work;
505: PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
506: PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
507: unique = PetscNot(unique);
508: PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
509: sf->minleaf = ilocal[0];
510: sf->maxleaf = ilocal[nleaves - 1];
511: contiguous = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
512: } else {
513: sf->minleaf = 0;
514: sf->maxleaf = nleaves - 1;
515: unique = PETSC_TRUE;
516: contiguous = PETSC_TRUE;
517: }
519: if (contiguous) {
520: if (localmode == PETSC_USE_POINTER) {
521: ilocal = NULL;
522: } else {
523: PetscCall(PetscFree(ilocal));
524: }
525: }
526: sf->mine = ilocal;
527: if (localmode == PETSC_USE_POINTER) {
528: sf->mine_alloc = NULL;
529: } else {
530: sf->mine_alloc = ilocal;
531: }
532: if (PetscDefined(USE_DEBUG)) {
533: PetscMPIInt size;
535: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
536: for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
537: }
538: sf->remote = iremote;
539: if (remotemode == PETSC_USE_POINTER) {
540: sf->remote_alloc = NULL;
541: } else {
542: sf->remote_alloc = iremote;
543: }
544: PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
545: sf->graphset = PETSC_TRUE;
546: PetscFunctionReturn(PETSC_SUCCESS);
547: }
549: /*@
550: PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
552: Collective
554: Input Parameters:
555: + sf - The `PetscSF`
556: . map - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
557: - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
559: Level: intermediate
561: Notes:
562: It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
563: `n` and `N` are the local and global sizes of `x` respectively.
565: With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
566: sequential vectors `y` on all MPI processes.
568: With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
569: sequential vector `y` on rank 0.
571: In above cases, entries of `x` are roots and entries of `y` are leaves.
573: With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
574: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
575: of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
576: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
577: items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
579: In this case, roots and leaves are symmetric.
581: .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
582: @*/
583: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
584: {
585: MPI_Comm comm;
586: PetscInt n, N, res[2];
587: PetscMPIInt rank, size;
588: PetscSFType type;
590: PetscFunctionBegin;
592: if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
593: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
594: PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
595: PetscCallMPI(MPI_Comm_rank(comm, &rank));
596: PetscCallMPI(MPI_Comm_size(comm, &size));
598: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
599: PetscInt sizei = size;
601: type = PETSCSFALLTOALL;
602: PetscCall(PetscLayoutCreate(comm, &sf->map));
603: PetscCall(PetscLayoutSetLocalSize(sf->map, size));
604: PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
605: PetscCall(PetscLayoutSetUp(sf->map));
606: } else {
607: PetscCall(PetscLayoutGetLocalSize(map, &n));
608: PetscCall(PetscLayoutGetSize(map, &N));
609: res[0] = n;
610: res[1] = -n;
611: /* Check if n are same over all ranks so that we can optimize it */
612: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
613: if (res[0] == -res[1]) { /* same n */
614: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
615: } else {
616: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
617: }
618: PetscCall(PetscLayoutReference(map, &sf->map));
619: }
620: PetscCall(PetscSFSetType(sf, type));
622: sf->pattern = pattern;
623: sf->mine = NULL; /* Contiguous */
625: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
626: Also set other easy stuff.
627: */
628: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
629: sf->nleaves = N;
630: sf->nroots = n;
631: sf->nranks = size;
632: sf->minleaf = 0;
633: sf->maxleaf = N - 1;
634: } else if (pattern == PETSCSF_PATTERN_GATHER) {
635: sf->nleaves = rank ? 0 : N;
636: sf->nroots = n;
637: sf->nranks = rank ? 0 : size;
638: sf->minleaf = 0;
639: sf->maxleaf = rank ? -1 : N - 1;
640: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
641: sf->nleaves = size;
642: sf->nroots = size;
643: sf->nranks = size;
644: sf->minleaf = 0;
645: sf->maxleaf = size - 1;
646: }
647: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
648: sf->graphset = PETSC_TRUE;
649: PetscFunctionReturn(PETSC_SUCCESS);
650: }
652: /*@
653: PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
655: Collective
657: Input Parameter:
658: . sf - star forest to invert
660: Output Parameter:
661: . isf - inverse of `sf`
663: Level: advanced
665: Notes:
666: All roots must have degree 1.
668: The local space may be a permutation, but cannot be sparse.
670: .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
671: @*/
672: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
673: {
674: PetscMPIInt rank;
675: PetscInt i, nroots, nleaves, maxlocal, count, *newilocal;
676: const PetscInt *ilocal;
677: PetscSFNode *roots, *leaves;
679: PetscFunctionBegin;
681: PetscSFCheckGraphSet(sf, 1);
682: PetscAssertPointer(isf, 2);
684: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
685: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
687: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
688: PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
689: for (i = 0; i < maxlocal; i++) {
690: leaves[i].rank = rank;
691: leaves[i].index = i;
692: }
693: for (i = 0; i < nroots; i++) {
694: roots[i].rank = -1;
695: roots[i].index = -1;
696: }
697: PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
698: PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
700: /* Check whether our leaves are sparse */
701: for (i = 0, count = 0; i < nroots; i++)
702: if (roots[i].rank >= 0) count++;
703: if (count == nroots) newilocal = NULL;
704: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
705: for (i = 0, count = 0; i < nroots; i++) {
706: if (roots[i].rank >= 0) {
707: newilocal[count] = i;
708: roots[count].rank = roots[i].rank;
709: roots[count].index = roots[i].index;
710: count++;
711: }
712: }
713: }
715: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
716: PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
717: PetscCall(PetscFree2(roots, leaves));
718: PetscFunctionReturn(PETSC_SUCCESS);
719: }
721: /*@
722: PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
724: Collective
726: Input Parameters:
727: + sf - communication object to duplicate
728: - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
730: Output Parameter:
731: . newsf - new communication object
733: Level: beginner
735: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
736: @*/
737: PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
738: {
739: PetscSFType type;
740: MPI_Datatype dtype = MPIU_SCALAR;
742: PetscFunctionBegin;
745: PetscAssertPointer(newsf, 3);
746: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
747: PetscCall(PetscSFGetType(sf, &type));
748: if (type) PetscCall(PetscSFSetType(*newsf, type));
749: (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
750: if (opt == PETSCSF_DUPLICATE_GRAPH) {
751: PetscSFCheckGraphSet(sf, 1);
752: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
753: PetscInt nroots, nleaves;
754: const PetscInt *ilocal;
755: const PetscSFNode *iremote;
756: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
757: PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
758: } else {
759: PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
760: }
761: }
762: /* Since oldtype is committed, so is newtype, according to MPI */
763: if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
764: (*newsf)->vscat.bs = sf->vscat.bs;
765: (*newsf)->vscat.unit = dtype;
766: (*newsf)->vscat.to_n = sf->vscat.to_n;
767: (*newsf)->vscat.from_n = sf->vscat.from_n;
768: /* Do not copy lsf. Build it on demand since it is rarely used */
770: #if defined(PETSC_HAVE_DEVICE)
771: (*newsf)->backend = sf->backend;
772: (*newsf)->unknown_input_stream = sf->unknown_input_stream;
773: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
774: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
775: #endif
776: PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
777: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
778: PetscFunctionReturn(PETSC_SUCCESS);
779: }
781: /*@C
782: PetscSFGetGraph - Get the graph specifying a parallel star forest
784: Not Collective
786: Input Parameter:
787: . sf - star forest
789: Output Parameters:
790: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
791: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
792: . ilocal - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
793: - iremote - remote locations of root vertices for each leaf on the current process
795: Level: intermediate
797: Notes:
798: We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
800: The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
802: Fortran Note:
803: Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
805: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
806: @*/
807: PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
808: {
809: PetscFunctionBegin;
811: if (sf->ops->GetGraph) {
812: PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
813: } else {
814: if (nroots) *nroots = sf->nroots;
815: if (nleaves) *nleaves = sf->nleaves;
816: if (ilocal) *ilocal = sf->mine;
817: if (iremote) *iremote = sf->remote;
818: }
819: PetscFunctionReturn(PETSC_SUCCESS);
820: }
822: /*@
823: PetscSFGetLeafRange - Get the active leaf ranges
825: Not Collective
827: Input Parameter:
828: . sf - star forest
830: Output Parameters:
831: + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
832: - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
834: Level: developer
836: .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
837: @*/
838: PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
839: {
840: PetscFunctionBegin;
842: PetscSFCheckGraphSet(sf, 1);
843: if (minleaf) *minleaf = sf->minleaf;
844: if (maxleaf) *maxleaf = sf->maxleaf;
845: PetscFunctionReturn(PETSC_SUCCESS);
846: }
848: /*@
849: PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
851: Collective
853: Input Parameters:
854: + A - the star forest
855: . obj - Optional object that provides the prefix for the option names
856: - name - command line option
858: Level: intermediate
860: Note:
861: See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
863: .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
864: @*/
865: PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
866: {
867: PetscFunctionBegin;
869: PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
870: PetscFunctionReturn(PETSC_SUCCESS);
871: }
873: /*@
874: PetscSFView - view a star forest
876: Collective
878: Input Parameters:
879: + sf - star forest
880: - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
882: Level: beginner
884: .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
885: @*/
886: PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
887: {
888: PetscBool iascii;
889: PetscViewerFormat format;
891: PetscFunctionBegin;
893: if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
895: PetscCheckSameComm(sf, 1, viewer, 2);
896: if (sf->graphset) PetscCall(PetscSFSetUp(sf));
897: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
898: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
899: PetscMPIInt rank;
900: PetscInt j;
902: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
903: PetscCall(PetscViewerASCIIPushTab(viewer));
904: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
905: if (!sf->graphset) {
906: PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
907: PetscCall(PetscViewerASCIIPopTab(viewer));
908: PetscFunctionReturn(PETSC_SUCCESS);
909: }
910: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
911: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
912: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
913: for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
914: PetscCall(PetscViewerFlush(viewer));
915: PetscCall(PetscViewerGetFormat(viewer, &format));
916: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
917: PetscMPIInt *tmpranks, *perm;
919: PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
920: PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
921: for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
922: PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
923: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
924: for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
925: PetscMPIInt i = perm[ii];
927: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
928: for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
929: }
930: PetscCall(PetscFree2(tmpranks, perm));
931: }
932: PetscCall(PetscViewerFlush(viewer));
933: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
934: }
935: PetscCall(PetscViewerASCIIPopTab(viewer));
936: }
937: PetscTryTypeMethod(sf, View, viewer);
938: PetscFunctionReturn(PETSC_SUCCESS);
939: }
941: /*@C
942: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
944: Not Collective
946: Input Parameter:
947: . sf - star forest
949: Output Parameters:
950: + nranks - number of ranks referenced by local part
951: . ranks - [`nranks`] array of ranks
952: . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
953: . rmine - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
954: - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
956: Level: developer
958: .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
959: @*/
960: PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
961: {
962: PetscFunctionBegin;
964: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
965: if (sf->ops->GetRootRanks) {
966: PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
967: } else {
968: /* The generic implementation */
969: if (nranks) *nranks = sf->nranks;
970: if (ranks) *ranks = sf->ranks;
971: if (roffset) *roffset = sf->roffset;
972: if (rmine) *rmine = sf->rmine;
973: if (rremote) *rremote = sf->rremote;
974: }
975: PetscFunctionReturn(PETSC_SUCCESS);
976: }
978: /*@C
979: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
981: Not Collective
983: Input Parameter:
984: . sf - star forest
986: Output Parameters:
987: + niranks - number of leaf ranks referencing roots on this process
988: . iranks - [`niranks`] array of ranks
989: . ioffset - [`niranks`+1] offset in `irootloc` for each rank
990: - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
992: Level: developer
994: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
995: @*/
996: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
997: {
998: PetscFunctionBegin;
1000: PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1001: if (sf->ops->GetLeafRanks) {
1002: PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1003: } else {
1004: PetscSFType type;
1005: PetscCall(PetscSFGetType(sf, &type));
1006: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1007: }
1008: PetscFunctionReturn(PETSC_SUCCESS);
1009: }
1011: static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1012: {
1013: PetscInt i;
1014: for (i = 0; i < n; i++) {
1015: if (needle == list[i]) return PETSC_TRUE;
1016: }
1017: return PETSC_FALSE;
1018: }
1020: /*@C
1021: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1023: Collective
1025: Input Parameters:
1026: + sf - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1027: - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1029: Level: developer
1031: .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1032: @*/
1033: PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1034: {
1035: PetscHMapI table;
1036: PetscHashIter pos;
1037: PetscMPIInt size, groupsize, *groupranks, *ranks;
1038: PetscInt *rcount;
1039: PetscInt irank, sfnrank, ranksi;
1040: PetscMPIInt i, orank = -1;
1042: PetscFunctionBegin;
1044: PetscSFCheckGraphSet(sf, 1);
1045: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1046: PetscCall(PetscHMapICreateWithSize(10, &table));
1047: for (i = 0; i < sf->nleaves; i++) {
1048: /* Log 1-based rank */
1049: PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1050: }
1051: PetscCall(PetscHMapIGetSize(table, &sfnrank));
1052: PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1053: PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1054: PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1055: PetscHashIterBegin(table, pos);
1056: for (i = 0; i < sf->nranks; i++) {
1057: PetscHashIterGetKey(table, pos, ranksi);
1058: PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1059: PetscHashIterGetVal(table, pos, rcount[i]);
1060: PetscHashIterNext(table, pos);
1061: ranks[i]--; /* Convert back to 0-based */
1062: }
1063: PetscCall(PetscHMapIDestroy(&table));
1065: /* We expect that dgroup is reliably "small" while nranks could be large */
1066: {
1067: MPI_Group group = MPI_GROUP_NULL;
1068: PetscMPIInt *dgroupranks;
1070: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1071: PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1072: PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1073: PetscCall(PetscMalloc1(groupsize, &groupranks));
1074: for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1075: if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1076: PetscCallMPI(MPI_Group_free(&group));
1077: PetscCall(PetscFree(dgroupranks));
1078: }
1080: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1081: for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1082: for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1083: if (InList(ranks[i], groupsize, groupranks)) break;
1084: }
1085: for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1086: if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1087: }
1088: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1089: PetscMPIInt tmprank;
1090: PetscInt tmpcount;
1092: tmprank = ranks[i];
1093: tmpcount = rcount[i];
1094: ranks[i] = ranks[sf->ndranks];
1095: rcount[i] = rcount[sf->ndranks];
1096: ranks[sf->ndranks] = tmprank;
1097: rcount[sf->ndranks] = tmpcount;
1098: sf->ndranks++;
1099: }
1100: }
1101: PetscCall(PetscFree(groupranks));
1102: PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1103: if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1104: sf->roffset[0] = 0;
1105: for (i = 0; i < sf->nranks; i++) {
1106: PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1107: sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1108: rcount[i] = 0;
1109: }
1110: for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1111: /* short circuit */
1112: if (orank != sf->remote[i].rank) {
1113: /* Search for index of iremote[i].rank in sf->ranks */
1114: PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1115: PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1116: if (irank < 0) {
1117: PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1118: if (irank >= 0) irank += sf->ndranks;
1119: }
1120: }
1121: PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1122: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1123: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1124: rcount[irank]++;
1125: }
1126: PetscCall(PetscFree2(rcount, ranks));
1127: PetscFunctionReturn(PETSC_SUCCESS);
1128: }
1130: /*@C
1131: PetscSFGetGroups - gets incoming and outgoing process groups
1133: Collective
1135: Input Parameter:
1136: . sf - star forest
1138: Output Parameters:
1139: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1140: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1142: Level: developer
1144: .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1145: @*/
1146: PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1147: {
1148: MPI_Group group = MPI_GROUP_NULL;
1150: PetscFunctionBegin;
1151: PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1152: if (sf->ingroup == MPI_GROUP_NULL) {
1153: PetscInt i;
1154: const PetscInt *indegree;
1155: PetscMPIInt rank, *outranks, *inranks, indegree0;
1156: PetscSFNode *remote;
1157: PetscSF bgcount;
1159: /* Compute the number of incoming ranks */
1160: PetscCall(PetscMalloc1(sf->nranks, &remote));
1161: for (i = 0; i < sf->nranks; i++) {
1162: remote[i].rank = sf->ranks[i];
1163: remote[i].index = 0;
1164: }
1165: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1166: PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1167: PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1168: PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1169: /* Enumerate the incoming ranks */
1170: PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1171: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1172: for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1173: PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1174: PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1175: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1176: PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1177: PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1178: PetscCallMPI(MPI_Group_free(&group));
1179: PetscCall(PetscFree2(inranks, outranks));
1180: PetscCall(PetscSFDestroy(&bgcount));
1181: }
1182: *incoming = sf->ingroup;
1184: if (sf->outgroup == MPI_GROUP_NULL) {
1185: PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1186: PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1187: PetscCallMPI(MPI_Group_free(&group));
1188: }
1189: *outgoing = sf->outgroup;
1190: PetscFunctionReturn(PETSC_SUCCESS);
1191: }
1193: /*@
1194: PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1196: Collective
1198: Input Parameter:
1199: . sf - star forest
1201: Output Parameter:
1202: . rsf - the star forest with a single root per process to perform communications
1204: Level: developer
1206: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1207: @*/
1208: PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1209: {
1210: PetscFunctionBegin;
1212: PetscAssertPointer(rsf, 2);
1213: if (!sf->rankssf) {
1214: PetscSFNode *rremotes;
1215: const PetscMPIInt *ranks;
1216: PetscMPIInt nranks;
1218: PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1219: PetscCall(PetscMalloc1(nranks, &rremotes));
1220: for (PetscInt i = 0; i < nranks; i++) {
1221: rremotes[i].rank = ranks[i];
1222: rremotes[i].index = 0;
1223: }
1224: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1225: PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1226: }
1227: *rsf = sf->rankssf;
1228: PetscFunctionReturn(PETSC_SUCCESS);
1229: }
1231: /*@
1232: PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1234: Collective
1236: Input Parameter:
1237: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1239: Output Parameter:
1240: . multi - star forest with split roots, such that each root has degree exactly 1
1242: Level: developer
1244: Note:
1245: In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1246: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1247: edge, it is a candidate for future optimization that might involve its removal.
1249: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1250: @*/
1251: PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1252: {
1253: PetscFunctionBegin;
1255: PetscAssertPointer(multi, 2);
1256: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1257: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1258: *multi = sf->multi;
1259: sf->multi->multi = sf->multi;
1260: PetscFunctionReturn(PETSC_SUCCESS);
1261: }
1262: if (!sf->multi) {
1263: const PetscInt *indegree;
1264: PetscInt i, *inoffset, *outones, *outoffset, maxlocal;
1265: PetscSFNode *remote;
1266: maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1267: PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1268: PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1269: PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1270: inoffset[0] = 0;
1271: for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1272: for (i = 0; i < maxlocal; i++) outones[i] = 1;
1273: PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1274: PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1275: for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1276: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1277: for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1278: }
1279: PetscCall(PetscMalloc1(sf->nleaves, &remote));
1280: for (i = 0; i < sf->nleaves; i++) {
1281: remote[i].rank = sf->remote[i].rank;
1282: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1283: }
1284: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1285: sf->multi->multi = sf->multi;
1286: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1287: if (sf->rankorder) { /* Sort the ranks */
1288: PetscMPIInt rank;
1289: PetscInt *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1290: PetscSFNode *newremote;
1291: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1292: for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1293: PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1294: for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1295: PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1296: PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1297: /* Sort the incoming ranks at each vertex, build the inverse map */
1298: for (i = 0; i < sf->nroots; i++) {
1299: PetscInt j;
1300: for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1301: PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1302: for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1303: }
1304: PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1305: PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1306: PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1307: for (i = 0; i < sf->nleaves; i++) {
1308: newremote[i].rank = sf->remote[i].rank;
1309: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1310: }
1311: PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1312: PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1313: }
1314: PetscCall(PetscFree3(inoffset, outones, outoffset));
1315: }
1316: *multi = sf->multi;
1317: PetscFunctionReturn(PETSC_SUCCESS);
1318: }
1320: /*@C
1321: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1323: Collective
1325: Input Parameters:
1326: + sf - original star forest
1327: . nselected - number of selected roots on this process
1328: - selected - indices of the selected roots on this process
1330: Output Parameter:
1331: . esf - new star forest
1333: Level: advanced
1335: Note:
1336: To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1337: be done by calling PetscSFGetGraph().
1339: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1340: @*/
1341: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1342: {
1343: PetscInt i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1344: const PetscInt *ilocal;
1345: signed char *rootdata, *leafdata, *leafmem;
1346: const PetscSFNode *iremote;
1347: PetscSFNode *new_iremote;
1348: MPI_Comm comm;
1350: PetscFunctionBegin;
1352: PetscSFCheckGraphSet(sf, 1);
1353: if (nselected) PetscAssertPointer(selected, 3);
1354: PetscAssertPointer(esf, 4);
1356: PetscCall(PetscSFSetUp(sf));
1357: PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1358: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1359: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1361: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1362: PetscBool dups;
1363: PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1364: PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1365: for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1366: }
1368: if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1369: else {
1370: /* A generic version of creating embedded sf */
1371: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1372: maxlocal = maxleaf - minleaf + 1;
1373: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1374: leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1375: /* Tag selected roots and bcast to leaves */
1376: for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1377: PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1378: PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1380: /* Build esf with leaves that are still connected */
1381: esf_nleaves = 0;
1382: for (i = 0; i < nleaves; i++) {
1383: j = ilocal ? ilocal[i] : i;
1384: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1385: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1386: */
1387: esf_nleaves += (leafdata[j] ? 1 : 0);
1388: }
1389: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1390: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1391: for (i = n = 0; i < nleaves; i++) {
1392: j = ilocal ? ilocal[i] : i;
1393: if (leafdata[j]) {
1394: new_ilocal[n] = j;
1395: new_iremote[n].rank = iremote[i].rank;
1396: new_iremote[n].index = iremote[i].index;
1397: ++n;
1398: }
1399: }
1400: PetscCall(PetscSFCreate(comm, esf));
1401: PetscCall(PetscSFSetFromOptions(*esf));
1402: PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1403: PetscCall(PetscFree2(rootdata, leafmem));
1404: }
1405: PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1406: PetscFunctionReturn(PETSC_SUCCESS);
1407: }
1409: /*@C
1410: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1412: Collective
1414: Input Parameters:
1415: + sf - original star forest
1416: . nselected - number of selected leaves on this process
1417: - selected - indices of the selected leaves on this process
1419: Output Parameter:
1420: . newsf - new star forest
1422: Level: advanced
1424: .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1425: @*/
1426: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1427: {
1428: const PetscSFNode *iremote;
1429: PetscSFNode *new_iremote;
1430: const PetscInt *ilocal;
1431: PetscInt i, nroots, *leaves, *new_ilocal;
1432: MPI_Comm comm;
1434: PetscFunctionBegin;
1436: PetscSFCheckGraphSet(sf, 1);
1437: if (nselected) PetscAssertPointer(selected, 3);
1438: PetscAssertPointer(newsf, 4);
1440: /* Uniq selected[] and put results in leaves[] */
1441: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1442: PetscCall(PetscMalloc1(nselected, &leaves));
1443: PetscCall(PetscArraycpy(leaves, selected, nselected));
1444: PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1445: PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1447: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1448: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1449: else {
1450: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1451: PetscCall(PetscMalloc1(nselected, &new_ilocal));
1452: PetscCall(PetscMalloc1(nselected, &new_iremote));
1453: for (i = 0; i < nselected; ++i) {
1454: const PetscInt l = leaves[i];
1455: new_ilocal[i] = ilocal ? ilocal[l] : l;
1456: new_iremote[i].rank = iremote[l].rank;
1457: new_iremote[i].index = iremote[l].index;
1458: }
1459: PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1460: PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1461: }
1462: PetscCall(PetscFree(leaves));
1463: PetscFunctionReturn(PETSC_SUCCESS);
1464: }
1466: /*@C
1467: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1469: Collective
1471: Input Parameters:
1472: + sf - star forest on which to communicate
1473: . unit - data type associated with each node
1474: . rootdata - buffer to broadcast
1475: - op - operation to use for reduction
1477: Output Parameter:
1478: . leafdata - buffer to be reduced with values from each leaf's respective root
1480: Level: intermediate
1482: Note:
1483: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1484: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1485: use `PetscSFBcastWithMemTypeBegin()` instead.
1487: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1488: @*/
1489: PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1490: {
1491: PetscMemType rootmtype, leafmtype;
1493: PetscFunctionBegin;
1495: PetscCall(PetscSFSetUp(sf));
1496: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1497: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1498: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1499: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1500: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1501: PetscFunctionReturn(PETSC_SUCCESS);
1502: }
1504: /*@C
1505: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1506: to `PetscSFBcastEnd()`
1508: Collective
1510: Input Parameters:
1511: + sf - star forest on which to communicate
1512: . unit - data type associated with each node
1513: . rootmtype - memory type of rootdata
1514: . rootdata - buffer to broadcast
1515: . leafmtype - memory type of leafdata
1516: - op - operation to use for reduction
1518: Output Parameter:
1519: . leafdata - buffer to be reduced with values from each leaf's respective root
1521: Level: intermediate
1523: .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1524: @*/
1525: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1526: {
1527: PetscFunctionBegin;
1529: PetscCall(PetscSFSetUp(sf));
1530: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1531: PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1532: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1533: PetscFunctionReturn(PETSC_SUCCESS);
1534: }
1536: /*@C
1537: PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1539: Collective
1541: Input Parameters:
1542: + sf - star forest
1543: . unit - data type
1544: . rootdata - buffer to broadcast
1545: - op - operation to use for reduction
1547: Output Parameter:
1548: . leafdata - buffer to be reduced with values from each leaf's respective root
1550: Level: intermediate
1552: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1553: @*/
1554: PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1555: {
1556: PetscFunctionBegin;
1558: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1559: PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1560: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1561: PetscFunctionReturn(PETSC_SUCCESS);
1562: }
1564: /*@C
1565: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1567: Collective
1569: Input Parameters:
1570: + sf - star forest
1571: . unit - data type
1572: . leafdata - values to reduce
1573: - op - reduction operation
1575: Output Parameter:
1576: . rootdata - result of reduction of values from all leaves of each root
1578: Level: intermediate
1580: Note:
1581: When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1582: are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1583: use `PetscSFReduceWithMemTypeBegin()` instead.
1585: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1586: @*/
1587: PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1588: {
1589: PetscMemType rootmtype, leafmtype;
1591: PetscFunctionBegin;
1593: PetscCall(PetscSFSetUp(sf));
1594: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1595: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1596: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1597: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1598: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1599: PetscFunctionReturn(PETSC_SUCCESS);
1600: }
1602: /*@C
1603: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1605: Collective
1607: Input Parameters:
1608: + sf - star forest
1609: . unit - data type
1610: . leafmtype - memory type of leafdata
1611: . leafdata - values to reduce
1612: . rootmtype - memory type of rootdata
1613: - op - reduction operation
1615: Output Parameter:
1616: . rootdata - result of reduction of values from all leaves of each root
1618: Level: intermediate
1620: .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1621: @*/
1622: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1623: {
1624: PetscFunctionBegin;
1626: PetscCall(PetscSFSetUp(sf));
1627: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1628: PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1629: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1630: PetscFunctionReturn(PETSC_SUCCESS);
1631: }
1633: /*@C
1634: PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1636: Collective
1638: Input Parameters:
1639: + sf - star forest
1640: . unit - data type
1641: . leafdata - values to reduce
1642: - op - reduction operation
1644: Output Parameter:
1645: . rootdata - result of reduction of values from all leaves of each root
1647: Level: intermediate
1649: .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1650: @*/
1651: PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1652: {
1653: PetscFunctionBegin;
1655: if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1656: PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1657: if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1658: PetscFunctionReturn(PETSC_SUCCESS);
1659: }
1661: /*@C
1662: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1663: to be completed with `PetscSFFetchAndOpEnd()`
1665: Collective
1667: Input Parameters:
1668: + sf - star forest
1669: . unit - data type
1670: . leafdata - leaf values to use in reduction
1671: - op - operation to use for reduction
1673: Output Parameters:
1674: + rootdata - root values to be updated, input state is seen by first process to perform an update
1675: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1677: Level: advanced
1679: Note:
1680: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1681: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1682: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1683: integers.
1685: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1686: @*/
1687: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1688: {
1689: PetscMemType rootmtype, leafmtype, leafupdatemtype;
1691: PetscFunctionBegin;
1693: PetscCall(PetscSFSetUp(sf));
1694: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1695: PetscCall(PetscGetMemType(rootdata, &rootmtype));
1696: PetscCall(PetscGetMemType(leafdata, &leafmtype));
1697: PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1698: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1699: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1700: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1701: PetscFunctionReturn(PETSC_SUCCESS);
1702: }
1704: /*@C
1705: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1706: applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1708: Collective
1710: Input Parameters:
1711: + sf - star forest
1712: . unit - data type
1713: . rootmtype - memory type of rootdata
1714: . leafmtype - memory type of leafdata
1715: . leafdata - leaf values to use in reduction
1716: . leafupdatemtype - memory type of leafupdate
1717: - op - operation to use for reduction
1719: Output Parameters:
1720: + rootdata - root values to be updated, input state is seen by first process to perform an update
1721: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1723: Level: advanced
1725: Note:
1726: See `PetscSFFetchAndOpBegin()` for more details.
1728: .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1729: @*/
1730: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1731: {
1732: PetscFunctionBegin;
1734: PetscCall(PetscSFSetUp(sf));
1735: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1736: PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1737: PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1738: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1739: PetscFunctionReturn(PETSC_SUCCESS);
1740: }
1742: /*@C
1743: PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1744: to fetch values from roots and update atomically by applying operation using my leaf value
1746: Collective
1748: Input Parameters:
1749: + sf - star forest
1750: . unit - data type
1751: . leafdata - leaf values to use in reduction
1752: - op - operation to use for reduction
1754: Output Parameters:
1755: + rootdata - root values to be updated, input state is seen by first process to perform an update
1756: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1758: Level: advanced
1760: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1761: @*/
1762: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1763: {
1764: PetscFunctionBegin;
1766: PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1767: PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1768: PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1769: PetscFunctionReturn(PETSC_SUCCESS);
1770: }
1772: /*@C
1773: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1775: Collective
1777: Input Parameter:
1778: . sf - star forest
1780: Output Parameter:
1781: . degree - degree of each root vertex
1783: Level: advanced
1785: Note:
1786: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1788: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1789: @*/
1790: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1791: {
1792: PetscFunctionBegin;
1794: PetscSFCheckGraphSet(sf, 1);
1795: PetscAssertPointer(degree, 2);
1796: if (!sf->degreeknown) {
1797: PetscInt i, nroots = sf->nroots, maxlocal;
1798: PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1799: maxlocal = sf->maxleaf - sf->minleaf + 1;
1800: PetscCall(PetscMalloc1(nroots, &sf->degree));
1801: PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1802: for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1803: for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1804: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1805: }
1806: *degree = NULL;
1807: PetscFunctionReturn(PETSC_SUCCESS);
1808: }
1810: /*@C
1811: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1813: Collective
1815: Input Parameter:
1816: . sf - star forest
1818: Output Parameter:
1819: . degree - degree of each root vertex
1821: Level: developer
1823: Note:
1824: The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1826: .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1827: @*/
1828: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1829: {
1830: PetscFunctionBegin;
1832: PetscSFCheckGraphSet(sf, 1);
1833: PetscAssertPointer(degree, 2);
1834: if (!sf->degreeknown) {
1835: PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1836: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1837: PetscCall(PetscFree(sf->degreetmp));
1838: sf->degreeknown = PETSC_TRUE;
1839: }
1840: *degree = sf->degree;
1841: PetscFunctionReturn(PETSC_SUCCESS);
1842: }
1844: /*@C
1845: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1846: Each multi-root is assigned index of the corresponding original root.
1848: Collective
1850: Input Parameters:
1851: + sf - star forest
1852: - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1854: Output Parameters:
1855: + nMultiRoots - (optional) number of multi-roots (roots of multi-`PetscSF`)
1856: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1858: Level: developer
1860: Note:
1861: The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1863: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1864: @*/
1865: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1866: {
1867: PetscSF msf;
1868: PetscInt k = 0, nroots, nmroots;
1870: PetscFunctionBegin;
1872: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1873: if (nroots) PetscAssertPointer(degree, 2);
1874: if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1875: PetscAssertPointer(multiRootsOrigNumbering, 4);
1876: PetscCall(PetscSFGetMultiSF(sf, &msf));
1877: PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1878: PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1879: for (PetscInt i = 0; i < nroots; i++) {
1880: if (!degree[i]) continue;
1881: for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1882: }
1883: PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1884: if (nMultiRoots) *nMultiRoots = nmroots;
1885: PetscFunctionReturn(PETSC_SUCCESS);
1886: }
1888: /*@C
1889: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1891: Collective
1893: Input Parameters:
1894: + sf - star forest
1895: . unit - data type
1896: - leafdata - leaf data to gather to roots
1898: Output Parameter:
1899: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1901: Level: intermediate
1903: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1904: @*/
1905: PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1906: {
1907: PetscSF multi = NULL;
1909: PetscFunctionBegin;
1911: PetscCall(PetscSFSetUp(sf));
1912: PetscCall(PetscSFGetMultiSF(sf, &multi));
1913: PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1914: PetscFunctionReturn(PETSC_SUCCESS);
1915: }
1917: /*@C
1918: PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1920: Collective
1922: Input Parameters:
1923: + sf - star forest
1924: . unit - data type
1925: - leafdata - leaf data to gather to roots
1927: Output Parameter:
1928: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1930: Level: intermediate
1932: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1933: @*/
1934: PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1935: {
1936: PetscSF multi = NULL;
1938: PetscFunctionBegin;
1940: PetscCall(PetscSFGetMultiSF(sf, &multi));
1941: PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1942: PetscFunctionReturn(PETSC_SUCCESS);
1943: }
1945: /*@C
1946: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1948: Collective
1950: Input Parameters:
1951: + sf - star forest
1952: . unit - data type
1953: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1955: Output Parameter:
1956: . leafdata - leaf data to be update with personal data from each respective root
1958: Level: intermediate
1960: .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1961: @*/
1962: PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1963: {
1964: PetscSF multi = NULL;
1966: PetscFunctionBegin;
1968: PetscCall(PetscSFSetUp(sf));
1969: PetscCall(PetscSFGetMultiSF(sf, &multi));
1970: PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1971: PetscFunctionReturn(PETSC_SUCCESS);
1972: }
1974: /*@C
1975: PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1977: Collective
1979: Input Parameters:
1980: + sf - star forest
1981: . unit - data type
1982: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1984: Output Parameter:
1985: . leafdata - leaf data to be update with personal data from each respective root
1987: Level: intermediate
1989: .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1990: @*/
1991: PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1992: {
1993: PetscSF multi = NULL;
1995: PetscFunctionBegin;
1997: PetscCall(PetscSFGetMultiSF(sf, &multi));
1998: PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1999: PetscFunctionReturn(PETSC_SUCCESS);
2000: }
2002: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2003: {
2004: PetscInt i, n, nleaves;
2005: const PetscInt *ilocal = NULL;
2006: PetscHSetI seen;
2008: PetscFunctionBegin;
2009: if (PetscDefined(USE_DEBUG)) {
2010: PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2011: PetscCall(PetscHSetICreate(&seen));
2012: for (i = 0; i < nleaves; i++) {
2013: const PetscInt leaf = ilocal ? ilocal[i] : i;
2014: PetscCall(PetscHSetIAdd(seen, leaf));
2015: }
2016: PetscCall(PetscHSetIGetSize(seen, &n));
2017: PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2018: PetscCall(PetscHSetIDestroy(&seen));
2019: }
2020: PetscFunctionReturn(PETSC_SUCCESS);
2021: }
2023: /*@
2024: PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2026: Input Parameters:
2027: + sfA - The first `PetscSF`
2028: - sfB - The second `PetscSF`
2030: Output Parameter:
2031: . sfBA - The composite `PetscSF`
2033: Level: developer
2035: Notes:
2036: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2037: forests, i.e. the same leaf is not connected with different roots.
2039: `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2040: a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2041: nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2042: `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2044: .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2045: @*/
2046: PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2047: {
2048: const PetscSFNode *remotePointsA, *remotePointsB;
2049: PetscSFNode *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2050: const PetscInt *localPointsA, *localPointsB;
2051: PetscInt *localPointsBA;
2052: PetscInt i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2053: PetscBool denseB;
2055: PetscFunctionBegin;
2057: PetscSFCheckGraphSet(sfA, 1);
2059: PetscSFCheckGraphSet(sfB, 2);
2060: PetscCheckSameComm(sfA, 1, sfB, 2);
2061: PetscAssertPointer(sfBA, 3);
2062: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2063: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2065: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2066: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2067: /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2068: numRootsB; otherwise, garbage will be broadcasted.
2069: Example (comm size = 1):
2070: sfA: 0 <- (0, 0)
2071: sfB: 100 <- (0, 0)
2072: 101 <- (0, 1)
2073: Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2074: of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2075: receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2076: remotePointsA; if not recasted, point 101 would receive a garbage value. */
2077: PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2078: for (i = 0; i < numRootsB; i++) {
2079: reorderedRemotePointsA[i].rank = -1;
2080: reorderedRemotePointsA[i].index = -1;
2081: }
2082: for (i = 0; i < numLeavesA; i++) {
2083: PetscInt localp = localPointsA ? localPointsA[i] : i;
2085: if (localp >= numRootsB) continue;
2086: reorderedRemotePointsA[localp] = remotePointsA[i];
2087: }
2088: remotePointsA = reorderedRemotePointsA;
2089: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2090: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2091: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2092: leafdataB[i].rank = -1;
2093: leafdataB[i].index = -1;
2094: }
2095: PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2096: PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2097: PetscCall(PetscFree(reorderedRemotePointsA));
2099: denseB = (PetscBool)!localPointsB;
2100: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2101: if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2102: else numLeavesBA++;
2103: }
2104: if (denseB) {
2105: localPointsBA = NULL;
2106: remotePointsBA = leafdataB;
2107: } else {
2108: PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2109: PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2110: for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2111: const PetscInt l = localPointsB ? localPointsB[i] : i;
2113: if (leafdataB[l - minleaf].rank == -1) continue;
2114: remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2115: localPointsBA[numLeavesBA] = l;
2116: numLeavesBA++;
2117: }
2118: PetscCall(PetscFree(leafdataB));
2119: }
2120: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2121: PetscCall(PetscSFSetFromOptions(*sfBA));
2122: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2123: PetscFunctionReturn(PETSC_SUCCESS);
2124: }
2126: /*@
2127: PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2129: Input Parameters:
2130: + sfA - The first `PetscSF`
2131: - sfB - The second `PetscSF`
2133: Output Parameter:
2134: . sfBA - The composite `PetscSF`.
2136: Level: developer
2138: Notes:
2139: Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2140: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2141: second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2143: `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2144: a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2145: roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2146: on `sfA`, then
2147: a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2149: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2150: @*/
2151: PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2152: {
2153: const PetscSFNode *remotePointsA, *remotePointsB;
2154: PetscSFNode *remotePointsBA;
2155: const PetscInt *localPointsA, *localPointsB;
2156: PetscSFNode *reorderedRemotePointsA = NULL;
2157: PetscInt i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2158: MPI_Op op;
2159: #if defined(PETSC_USE_64BIT_INDICES)
2160: PetscBool iswin;
2161: #endif
2163: PetscFunctionBegin;
2165: PetscSFCheckGraphSet(sfA, 1);
2167: PetscSFCheckGraphSet(sfB, 2);
2168: PetscCheckSameComm(sfA, 1, sfB, 2);
2169: PetscAssertPointer(sfBA, 3);
2170: PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2171: PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2173: PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2174: PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2176: /* TODO: Check roots of sfB have degree of 1 */
2177: /* Once we implement it, we can replace the MPI_MAXLOC
2178: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2179: We use MPI_MAXLOC only to have a deterministic output from this routine if
2180: the root condition is not meet.
2181: */
2182: op = MPI_MAXLOC;
2183: #if defined(PETSC_USE_64BIT_INDICES)
2184: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2185: PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2186: if (iswin) op = MPI_REPLACE;
2187: #endif
2189: PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2190: PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2191: for (i = 0; i < maxleaf - minleaf + 1; i++) {
2192: reorderedRemotePointsA[i].rank = -1;
2193: reorderedRemotePointsA[i].index = -1;
2194: }
2195: if (localPointsA) {
2196: for (i = 0; i < numLeavesA; i++) {
2197: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2198: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2199: }
2200: } else {
2201: for (i = 0; i < numLeavesA; i++) {
2202: if (i > maxleaf || i < minleaf) continue;
2203: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2204: }
2205: }
2207: PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2208: PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2209: for (i = 0; i < numRootsB; i++) {
2210: remotePointsBA[i].rank = -1;
2211: remotePointsBA[i].index = -1;
2212: }
2214: PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2215: PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2216: PetscCall(PetscFree(reorderedRemotePointsA));
2217: for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2218: if (remotePointsBA[i].rank == -1) continue;
2219: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2220: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2221: localPointsBA[numLeavesBA] = i;
2222: numLeavesBA++;
2223: }
2224: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2225: PetscCall(PetscSFSetFromOptions(*sfBA));
2226: PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2227: PetscFunctionReturn(PETSC_SUCCESS);
2228: }
2230: /*
2231: PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2233: Input Parameter:
2234: . sf - The global `PetscSF`
2236: Output Parameter:
2237: . out - The local `PetscSF`
2239: .seealso: `PetscSF`, `PetscSFCreate()`
2240: */
2241: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2242: {
2243: MPI_Comm comm;
2244: PetscMPIInt myrank;
2245: const PetscInt *ilocal;
2246: const PetscSFNode *iremote;
2247: PetscInt i, j, nroots, nleaves, lnleaves, *lilocal;
2248: PetscSFNode *liremote;
2249: PetscSF lsf;
2251: PetscFunctionBegin;
2253: if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2254: else {
2255: PetscMPIInt irank;
2257: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2258: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2259: PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2261: /* Find out local edges and build a local SF */
2262: PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2263: for (i = lnleaves = 0; i < nleaves; i++) {
2264: PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2265: if (irank == myrank) lnleaves++;
2266: }
2267: PetscCall(PetscMalloc1(lnleaves, &lilocal));
2268: PetscCall(PetscMalloc1(lnleaves, &liremote));
2270: for (i = j = 0; i < nleaves; i++) {
2271: PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2272: if (irank == myrank) {
2273: lilocal[j] = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2274: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2275: liremote[j].index = iremote[i].index;
2276: j++;
2277: }
2278: }
2279: PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2280: PetscCall(PetscSFSetFromOptions(lsf));
2281: PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2282: PetscCall(PetscSFSetUp(lsf));
2283: *out = lsf;
2284: }
2285: PetscFunctionReturn(PETSC_SUCCESS);
2286: }
2288: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2289: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2290: {
2291: PetscMemType rootmtype, leafmtype;
2293: PetscFunctionBegin;
2295: PetscCall(PetscSFSetUp(sf));
2296: PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2297: PetscCall(PetscGetMemType(rootdata, &rootmtype));
2298: PetscCall(PetscGetMemType(leafdata, &leafmtype));
2299: PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2300: PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2301: PetscFunctionReturn(PETSC_SUCCESS);
2302: }
2304: /*@
2305: PetscSFConcatenate - concatenate multiple `PetscSF` into one
2307: Input Parameters:
2308: + comm - the communicator
2309: . nsfs - the number of input `PetscSF`
2310: . sfs - the array of input `PetscSF`
2311: . rootMode - the root mode specifying how roots are handled
2312: - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2314: Output Parameter:
2315: . newsf - The resulting `PetscSF`
2317: Level: advanced
2319: Notes:
2320: The communicator of all `PetscSF`s in `sfs` must be comm.
2322: Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2324: The offsets in `leafOffsets` are added to the original leaf indices.
2326: If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2327: In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2329: If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2330: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2332: All root modes retain the essential connectivity condition.
2333: If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2334: Parameter `rootMode` controls how the input root spaces are combined.
2335: For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2336: and is also the same in the output `PetscSF`.
2337: For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2338: `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2339: roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2340: `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2341: roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2342: the original root ranks are ignored.
2343: For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2344: the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2345: to keep the load balancing.
2346: However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2348: Example:
2349: We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2350: .vb
2351: make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2352: for m in {local,global,shared}; do
2353: mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2354: done
2355: .ve
2356: we generate two identical `PetscSF`s sf_0 and sf_1,
2357: .vb
2358: PetscSF Object: sf_0 2 MPI processes
2359: type: basic
2360: rank #leaves #roots
2361: [ 0] 4 2
2362: [ 1] 4 2
2363: leaves roots roots in global numbering
2364: ( 0, 0) <- ( 0, 0) = 0
2365: ( 0, 1) <- ( 0, 1) = 1
2366: ( 0, 2) <- ( 1, 0) = 2
2367: ( 0, 3) <- ( 1, 1) = 3
2368: ( 1, 0) <- ( 0, 0) = 0
2369: ( 1, 1) <- ( 0, 1) = 1
2370: ( 1, 2) <- ( 1, 0) = 2
2371: ( 1, 3) <- ( 1, 1) = 3
2372: .ve
2373: and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2374: .vb
2375: rootMode = local:
2376: PetscSF Object: result_sf 2 MPI processes
2377: type: basic
2378: rank #leaves #roots
2379: [ 0] 8 4
2380: [ 1] 8 4
2381: leaves roots roots in global numbering
2382: ( 0, 0) <- ( 0, 0) = 0
2383: ( 0, 1) <- ( 0, 1) = 1
2384: ( 0, 2) <- ( 1, 0) = 4
2385: ( 0, 3) <- ( 1, 1) = 5
2386: ( 0, 4) <- ( 0, 2) = 2
2387: ( 0, 5) <- ( 0, 3) = 3
2388: ( 0, 6) <- ( 1, 2) = 6
2389: ( 0, 7) <- ( 1, 3) = 7
2390: ( 1, 0) <- ( 0, 0) = 0
2391: ( 1, 1) <- ( 0, 1) = 1
2392: ( 1, 2) <- ( 1, 0) = 4
2393: ( 1, 3) <- ( 1, 1) = 5
2394: ( 1, 4) <- ( 0, 2) = 2
2395: ( 1, 5) <- ( 0, 3) = 3
2396: ( 1, 6) <- ( 1, 2) = 6
2397: ( 1, 7) <- ( 1, 3) = 7
2399: rootMode = global:
2400: PetscSF Object: result_sf 2 MPI processes
2401: type: basic
2402: rank #leaves #roots
2403: [ 0] 8 4
2404: [ 1] 8 4
2405: leaves roots roots in global numbering
2406: ( 0, 0) <- ( 0, 0) = 0
2407: ( 0, 1) <- ( 0, 1) = 1
2408: ( 0, 2) <- ( 0, 2) = 2
2409: ( 0, 3) <- ( 0, 3) = 3
2410: ( 0, 4) <- ( 1, 0) = 4
2411: ( 0, 5) <- ( 1, 1) = 5
2412: ( 0, 6) <- ( 1, 2) = 6
2413: ( 0, 7) <- ( 1, 3) = 7
2414: ( 1, 0) <- ( 0, 0) = 0
2415: ( 1, 1) <- ( 0, 1) = 1
2416: ( 1, 2) <- ( 0, 2) = 2
2417: ( 1, 3) <- ( 0, 3) = 3
2418: ( 1, 4) <- ( 1, 0) = 4
2419: ( 1, 5) <- ( 1, 1) = 5
2420: ( 1, 6) <- ( 1, 2) = 6
2421: ( 1, 7) <- ( 1, 3) = 7
2423: rootMode = shared:
2424: PetscSF Object: result_sf 2 MPI processes
2425: type: basic
2426: rank #leaves #roots
2427: [ 0] 8 2
2428: [ 1] 8 2
2429: leaves roots roots in global numbering
2430: ( 0, 0) <- ( 0, 0) = 0
2431: ( 0, 1) <- ( 0, 1) = 1
2432: ( 0, 2) <- ( 1, 0) = 2
2433: ( 0, 3) <- ( 1, 1) = 3
2434: ( 0, 4) <- ( 0, 0) = 0
2435: ( 0, 5) <- ( 0, 1) = 1
2436: ( 0, 6) <- ( 1, 0) = 2
2437: ( 0, 7) <- ( 1, 1) = 3
2438: ( 1, 0) <- ( 0, 0) = 0
2439: ( 1, 1) <- ( 0, 1) = 1
2440: ( 1, 2) <- ( 1, 0) = 2
2441: ( 1, 3) <- ( 1, 1) = 3
2442: ( 1, 4) <- ( 0, 0) = 0
2443: ( 1, 5) <- ( 0, 1) = 1
2444: ( 1, 6) <- ( 1, 0) = 2
2445: ( 1, 7) <- ( 1, 1) = 3
2446: .ve
2448: .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2449: @*/
2450: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2451: {
2452: PetscInt i, s, nLeaves, nRoots;
2453: PetscInt *leafArrayOffsets;
2454: PetscInt *ilocal_new;
2455: PetscSFNode *iremote_new;
2456: PetscBool all_ilocal_null = PETSC_FALSE;
2457: PetscLayout glayout = NULL;
2458: PetscInt *gremote = NULL;
2459: PetscMPIInt rank, size;
2461: PetscFunctionBegin;
2462: if (PetscDefined(USE_DEBUG)) {
2463: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2465: PetscCall(PetscSFCreate(comm, &dummy));
2467: PetscAssertPointer(sfs, 3);
2468: for (i = 0; i < nsfs; i++) {
2470: PetscCheckSameComm(dummy, 1, sfs[i], 3);
2471: }
2473: if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2474: PetscAssertPointer(newsf, 6);
2475: PetscCall(PetscSFDestroy(&dummy));
2476: }
2477: if (!nsfs) {
2478: PetscCall(PetscSFCreate(comm, newsf));
2479: PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2480: PetscFunctionReturn(PETSC_SUCCESS);
2481: }
2482: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2483: PetscCallMPI(MPI_Comm_size(comm, &size));
2485: /* Calculate leaf array offsets */
2486: PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2487: leafArrayOffsets[0] = 0;
2488: for (s = 0; s < nsfs; s++) {
2489: PetscInt nl;
2491: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2492: leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2493: }
2494: nLeaves = leafArrayOffsets[nsfs];
2496: /* Calculate number of roots */
2497: switch (rootMode) {
2498: case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2499: PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2500: if (PetscDefined(USE_DEBUG)) {
2501: for (s = 1; s < nsfs; s++) {
2502: PetscInt nr;
2504: PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2505: PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2506: }
2507: }
2508: } break;
2509: case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2510: /* Calculate also global layout in this case */
2511: PetscInt *nls;
2512: PetscLayout *lts;
2513: PetscInt **inds;
2514: PetscInt j;
2515: PetscInt rootOffset = 0;
2517: PetscCall(PetscCalloc3(nsfs, <s, nsfs, &nls, nsfs, &inds));
2518: PetscCall(PetscLayoutCreate(comm, &glayout));
2519: glayout->bs = 1;
2520: glayout->n = 0;
2521: glayout->N = 0;
2522: for (s = 0; s < nsfs; s++) {
2523: PetscCall(PetscSFGetGraphLayout(sfs[s], <s[s], &nls[s], NULL, &inds[s]));
2524: glayout->n += lts[s]->n;
2525: glayout->N += lts[s]->N;
2526: }
2527: PetscCall(PetscLayoutSetUp(glayout));
2528: PetscCall(PetscMalloc1(nLeaves, &gremote));
2529: for (s = 0, j = 0; s < nsfs; s++) {
2530: for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2531: rootOffset += lts[s]->N;
2532: PetscCall(PetscLayoutDestroy(<s[s]));
2533: PetscCall(PetscFree(inds[s]));
2534: }
2535: PetscCall(PetscFree3(lts, nls, inds));
2536: nRoots = glayout->N;
2537: } break;
2538: case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2539: /* nRoots calculated later in this case */
2540: break;
2541: default:
2542: SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2543: }
2545: if (!leafOffsets) {
2546: all_ilocal_null = PETSC_TRUE;
2547: for (s = 0; s < nsfs; s++) {
2548: const PetscInt *ilocal;
2550: PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2551: if (ilocal) {
2552: all_ilocal_null = PETSC_FALSE;
2553: break;
2554: }
2555: }
2556: PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2557: }
2559: /* Renumber and concatenate local leaves */
2560: ilocal_new = NULL;
2561: if (!all_ilocal_null) {
2562: PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2563: for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2564: for (s = 0; s < nsfs; s++) {
2565: const PetscInt *ilocal;
2566: PetscInt *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2567: PetscInt i, nleaves_l;
2569: PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2570: for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2571: }
2572: }
2574: /* Renumber and concatenate remote roots */
2575: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2576: PetscInt rootOffset = 0;
2578: PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2579: for (i = 0; i < nLeaves; i++) {
2580: iremote_new[i].rank = -1;
2581: iremote_new[i].index = -1;
2582: }
2583: for (s = 0; s < nsfs; s++) {
2584: PetscInt i, nl, nr;
2585: PetscSF tmp_sf;
2586: const PetscSFNode *iremote;
2587: PetscSFNode *tmp_rootdata;
2588: PetscSFNode *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2590: PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2591: PetscCall(PetscSFCreate(comm, &tmp_sf));
2592: /* create helper SF with contiguous leaves */
2593: PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2594: PetscCall(PetscSFSetUp(tmp_sf));
2595: PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2596: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2597: for (i = 0; i < nr; i++) {
2598: tmp_rootdata[i].index = i + rootOffset;
2599: tmp_rootdata[i].rank = rank;
2600: }
2601: rootOffset += nr;
2602: } else {
2603: for (i = 0; i < nr; i++) {
2604: tmp_rootdata[i].index = i;
2605: tmp_rootdata[i].rank = rank;
2606: }
2607: }
2608: PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609: PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610: PetscCall(PetscSFDestroy(&tmp_sf));
2611: PetscCall(PetscFree(tmp_rootdata));
2612: }
2613: if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2615: /* Build the new SF */
2616: PetscCall(PetscSFCreate(comm, newsf));
2617: PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2618: } else {
2619: /* Build the new SF */
2620: PetscCall(PetscSFCreate(comm, newsf));
2621: PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2622: }
2623: PetscCall(PetscSFSetUp(*newsf));
2624: PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2625: PetscCall(PetscLayoutDestroy(&glayout));
2626: PetscCall(PetscFree(gremote));
2627: PetscCall(PetscFree(leafArrayOffsets));
2628: PetscFunctionReturn(PETSC_SUCCESS);
2629: }
2631: /*@
2632: PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2634: Collective
2636: Input Parameters:
2637: + sf - star forest
2638: . unit - the data type contained within the root and leaf data
2639: . rootdata - root data that will be used for multiple PetscSF communications
2640: - leafdata - leaf data that will be used for multiple PetscSF communications
2642: Level: advanced
2644: Notes:
2645: Implementations of `PetscSF` can make optimizations
2646: for repeated communication using the same memory regions, but these optimizations
2647: can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2648: The intended pattern is
2650: .vb
2651: PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2653: PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2654: // repeated use of rootdata and leafdata will now be optimized
2656: PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657: PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658: // ...
2659: PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2660: PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2661: // ... (other communications)
2663: // rootdata and leafdata must be deregistered before freeing
2664: // skipping this can lead to undefined behavior including
2665: // deadlocks
2666: PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2668: // it is now safe to free rootdata and leafdata
2669: PetscFree2(rootdata, leafdata);
2670: .ve
2672: If you do not register `rootdata` and `leafdata` it will not cause an error,
2673: but optimizations that reduce the setup time for each communication cannot be
2674: made. Currently, the only implementation of `PetscSF` that benefits from
2675: `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`. For the default
2676: `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2678: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2679: @*/
2680: PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2681: {
2682: PetscFunctionBegin;
2684: PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2685: PetscFunctionReturn(PETSC_SUCCESS);
2686: }
2688: /*@
2689: PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2691: Collective
2693: Input Parameters:
2694: + sf - star forest
2695: . unit - the data type contained within the root and leaf data
2696: . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2697: - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2699: Level: advanced
2701: Note:
2702: See `PetscSFRegisterPersistent()` for when/how to use this function.
2704: .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2705: @*/
2706: PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2707: {
2708: PetscFunctionBegin;
2710: PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2711: PetscFunctionReturn(PETSC_SUCCESS);
2712: }
2714: PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2715: {
2716: MPI_Aint lb, lb_true, bytes, bytes_true;
2718: PetscFunctionBegin;
2719: PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2720: PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2721: PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2722: *size = bytes;
2723: PetscFunctionReturn(PETSC_SUCCESS);
2724: }