Actual source code: sfutils.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/sectionimpl.h>
4: /*@
5: PetscSFSetGraphLayout - Set a `PetscSF` communication pattern using global indices and a `PetscLayout`
7: Collective
9: Input Parameters:
10: + sf - star forest
11: . layout - `PetscLayout` defining the global space for roots, i.e. which roots are owned by each MPI process
12: . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
13: . ilocal - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage, that is the locations are in [0,`nleaves`)
14: . localmode - copy mode for `ilocal`
15: - gremote - root vertices in global numbering corresponding to the leaves
17: Level: intermediate
19: Note:
20: Global indices must lie in [0, N) where N is the global size of `layout`.
21: Leaf indices in `ilocal` get sorted; this means the user-provided array gets sorted if localmode is `PETSC_OWN_POINTER`.
23: Developer Notes:
24: Local indices which are the identity permutation in the range [0,`nleaves`) are discarded as they
25: encode contiguous storage. In such case, if localmode is `PETSC_OWN_POINTER`, the memory is deallocated as it is not
26: needed
28: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
29: @*/
30: PetscErrorCode PetscSFSetGraphLayout(PetscSF sf, PetscLayout layout, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, const PetscInt gremote[])
31: {
32: const PetscInt *range;
33: PetscInt i, nroots, ls = -1, ln = -1;
34: PetscMPIInt lr = -1;
35: PetscSFNode *remote;
37: PetscFunctionBegin;
39: PetscAssertPointer(layout, 2);
40: if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
41: if (nleaves > 0) PetscAssertPointer(gremote, 6);
42: PetscCall(PetscLayoutSetUp(layout));
43: PetscCall(PetscLayoutGetLocalSize(layout, &nroots));
44: PetscCall(PetscLayoutGetRanges(layout, &range));
45: PetscCall(PetscMalloc1(nleaves, &remote));
46: if (nleaves) ls = gremote[0] + 1;
47: for (i = 0; i < nleaves; i++) {
48: const PetscInt idx = gremote[i] - ls;
49: if (idx < 0 || idx >= ln) { /* short-circuit the search */
50: PetscCall(PetscLayoutFindOwnerIndex(layout, gremote[i], &lr, &remote[i].index));
51: remote[i].rank = lr;
52: ls = range[lr];
53: ln = range[lr + 1] - ls;
54: } else {
55: remote[i].rank = lr;
56: remote[i].index = idx;
57: }
58: }
59: PetscCall(PetscSFSetGraph(sf, nroots, nleaves, ilocal, localmode, remote, PETSC_OWN_POINTER));
60: PetscFunctionReturn(PETSC_SUCCESS);
61: }
63: /*@C
64: PetscSFGetGraphLayout - Get the global indices and `PetscLayout` that describe a `PetscSF`
66: Collective
68: Input Parameter:
69: . sf - star forest
71: Output Parameters:
72: + layout - `PetscLayout` defining the global space for roots
73: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
74: . ilocal - locations of leaves in leafdata buffers, or `NULL` for contiguous storage
75: - gremote - root vertices in global numbering corresponding to the leaves
77: Level: intermediate
79: Notes:
80: The outputs are such that passing them as inputs to `PetscSFSetGraphLayout()` would lead to the same star forest.
81: The outputs `layout` and `gremote` are freshly created each time this function is called,
82: so they need to be freed (with `PetscLayoutDestroy()` and `PetscFree()`) by the user.
84: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraphLayout()`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
85: @*/
86: PetscErrorCode PetscSFGetGraphLayout(PetscSF sf, PetscLayout *layout, PetscInt *nleaves, const PetscInt *ilocal[], PetscInt *gremote[])
87: {
88: PetscInt nr, nl;
89: const PetscSFNode *ir;
90: PetscLayout lt;
92: PetscFunctionBegin;
94: if (layout) PetscAssertPointer(layout, 2);
95: if (nleaves) PetscAssertPointer(nleaves, 3);
96: if (ilocal) PetscAssertPointer(ilocal, 4);
97: if (gremote) PetscAssertPointer(gremote, 5);
98: PetscCall(PetscSFGetGraph(sf, &nr, &nl, ilocal, &ir));
99: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sf), nr, PETSC_DECIDE, 1, <));
100: if (gremote) {
101: PetscInt i;
102: const PetscInt *range;
103: PetscInt *gr;
105: PetscCall(PetscLayoutGetRanges(lt, &range));
106: PetscCall(PetscMalloc1(nl, &gr));
107: for (i = 0; i < nl; i++) gr[i] = range[ir[i].rank] + ir[i].index;
108: *gremote = gr;
109: }
110: if (nleaves) *nleaves = nl;
111: if (layout) *layout = lt;
112: else PetscCall(PetscLayoutDestroy(<));
113: PetscFunctionReturn(PETSC_SUCCESS);
114: }
116: /*@
117: PetscSFSetGraphSection - Sets the `PetscSF` graph (communication pattern) encoding the parallel dof overlap based upon the `PetscSection` describing the data layout.
119: Input Parameters:
120: + sf - The `PetscSF`
121: . localSection - `PetscSection` describing the local data layout
122: - globalSection - `PetscSection` describing the global data layout
124: Level: developer
126: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFSetGraphLayout()`
127: @*/
128: PetscErrorCode PetscSFSetGraphSection(PetscSF sf, PetscSection localSection, PetscSection globalSection)
129: {
130: MPI_Comm comm;
131: PetscLayout layout;
132: const PetscInt *ranges;
133: PetscInt *local;
134: PetscSFNode *remote;
135: PetscInt pStart, pEnd, p, nroots, nleaves = 0, l;
136: PetscMPIInt size, rank;
138: PetscFunctionBegin;
143: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
144: PetscCallMPI(MPI_Comm_size(comm, &size));
145: PetscCallMPI(MPI_Comm_rank(comm, &rank));
146: PetscCall(PetscSectionGetChart(globalSection, &pStart, &pEnd));
147: PetscCall(PetscSectionGetConstrainedStorageSize(globalSection, &nroots));
148: PetscCall(PetscLayoutCreate(comm, &layout));
149: PetscCall(PetscLayoutSetBlockSize(layout, 1));
150: PetscCall(PetscLayoutSetLocalSize(layout, nroots));
151: PetscCall(PetscLayoutSetUp(layout));
152: PetscCall(PetscLayoutGetRanges(layout, &ranges));
153: for (p = pStart; p < pEnd; ++p) {
154: PetscInt gdof, gcdof;
156: PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
157: PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
158: PetscCheck(gcdof <= (gdof < 0 ? -(gdof + 1) : gdof), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " has %" PetscInt_FMT " constraints > %" PetscInt_FMT " dof", p, gcdof, gdof < 0 ? -(gdof + 1) : gdof);
159: nleaves += gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
160: }
161: PetscCall(PetscMalloc1(nleaves, &local));
162: PetscCall(PetscMalloc1(nleaves, &remote));
163: for (p = pStart, l = 0; p < pEnd; ++p) {
164: const PetscInt *cind;
165: PetscInt dof, cdof, off, gdof, gcdof, goff, gsize, d, c;
167: PetscCall(PetscSectionGetDof(localSection, p, &dof));
168: PetscCall(PetscSectionGetOffset(localSection, p, &off));
169: PetscCall(PetscSectionGetConstraintDof(localSection, p, &cdof));
170: PetscCall(PetscSectionGetConstraintIndices(localSection, p, &cind));
171: PetscCall(PetscSectionGetDof(globalSection, p, &gdof));
172: PetscCall(PetscSectionGetConstraintDof(globalSection, p, &gcdof));
173: PetscCall(PetscSectionGetOffset(globalSection, p, &goff));
174: if (!gdof) continue; /* Censored point */
175: gsize = gdof < 0 ? -(gdof + 1) - gcdof : gdof - gcdof;
176: if (gsize != dof - cdof) {
177: PetscCheck(gsize == dof, comm, PETSC_ERR_ARG_WRONG, "Global dof %" PetscInt_FMT " for point %" PetscInt_FMT " is neither the constrained size %" PetscInt_FMT ", nor the unconstrained %" PetscInt_FMT, gsize, p, dof - cdof, dof);
178: cdof = 0; /* Ignore constraints */
179: }
180: for (d = 0, c = 0; d < dof; ++d) {
181: if ((c < cdof) && (cind[c] == d)) {
182: ++c;
183: continue;
184: }
185: local[l + d - c] = off + d;
186: }
187: PetscCheck(d - c == gsize, comm, PETSC_ERR_ARG_WRONG, "Point %" PetscInt_FMT ": Global dof %" PetscInt_FMT " != %" PetscInt_FMT " size - number of constraints", p, gsize, d - c);
188: if (gdof < 0) {
189: for (d = 0; d < gsize; ++d, ++l) {
190: PetscInt offset = -(goff + 1) + d, ir;
191: PetscMPIInt r;
193: PetscCall(PetscFindInt(offset, size + 1, ranges, &ir));
194: PetscCall(PetscMPIIntCast(ir, &r));
195: if (r < 0) r = -(r + 2);
196: PetscCheck(!(r < 0) && !(r >= size), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Point %" PetscInt_FMT " mapped to invalid process %d (%" PetscInt_FMT ", %" PetscInt_FMT ")", p, r, gdof, goff);
197: remote[l].rank = r;
198: remote[l].index = offset - ranges[r];
199: }
200: } else {
201: for (d = 0; d < gsize; ++d, ++l) {
202: remote[l].rank = rank;
203: remote[l].index = goff + d - ranges[rank];
204: }
205: }
206: }
207: PetscCheck(l == nleaves, comm, PETSC_ERR_PLIB, "Iteration error, l %" PetscInt_FMT " != nleaves %" PetscInt_FMT, l, nleaves);
208: PetscCall(PetscLayoutDestroy(&layout));
209: PetscCall(PetscSFSetGraph(sf, nroots, nleaves, local, PETSC_OWN_POINTER, remote, PETSC_OWN_POINTER));
210: PetscFunctionReturn(PETSC_SUCCESS);
211: }
213: /*@C
214: PetscSFDistributeSection - Create a new `PetscSection` reorganized, moving from the root to the leaves of the `PetscSF`
216: Collective
218: Input Parameters:
219: + sf - The `PetscSF`
220: - rootSection - Section defined on root space
222: Output Parameters:
223: + remoteOffsets - root offsets in leaf storage, or `NULL`, its length will be the size of the chart of `leafSection`
224: - leafSection - Section defined on the leaf space
226: Level: advanced
228: Note:
229: Caller must `PetscFree()` `remoteOffsets` if it was requested
231: To distribute data from the `rootSection` to the `leafSection`, see `PetscSFCreateSectionSF()` or `PetscSectionMigrateData()`.
233: Fortran Note:
234: Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
236: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFCreateSectionSF()`
237: @*/
238: PetscErrorCode PetscSFDistributeSection(PetscSF sf, PetscSection rootSection, PetscInt *remoteOffsets[], PetscSection leafSection)
239: {
240: PetscSF embedSF;
241: const PetscInt *indices;
242: IS selected;
243: PetscInt numFields, nroots, rpStart, rpEnd, lpStart = PETSC_INT_MAX, lpEnd = -1, f, c;
244: PetscBool *sub, hasc;
246: PetscFunctionBegin;
249: if (remoteOffsets) PetscAssertPointer(remoteOffsets, 3);
251: PetscCall(PetscLogEventBegin(PETSCSF_DistSect, sf, 0, 0, 0));
252: PetscCall(PetscSectionGetNumFields(rootSection, &numFields));
253: if (numFields) {
254: IS perm;
256: /* PetscSectionSetNumFields() calls PetscSectionReset(), which destroys
257: leafSection->perm. To keep this permutation set by the user, we grab
258: the reference before calling PetscSectionSetNumFields() and set it
259: back after. */
260: PetscCall(PetscSectionGetPermutation(leafSection, &perm));
261: PetscCall(PetscObjectReference((PetscObject)perm));
262: PetscCall(PetscSectionSetNumFields(leafSection, numFields));
263: PetscCall(PetscSectionSetPermutation(leafSection, perm));
264: PetscCall(ISDestroy(&perm));
265: }
266: PetscCall(PetscMalloc1(numFields + 2, &sub));
267: sub[1] = rootSection->bc ? PETSC_TRUE : PETSC_FALSE;
268: for (f = 0; f < numFields; ++f) {
269: PetscSectionSym sym, dsym = NULL;
270: const char *name = NULL;
271: PetscInt numComp = 0;
273: sub[2 + f] = rootSection->field[f]->bc ? PETSC_TRUE : PETSC_FALSE;
274: PetscCall(PetscSectionGetFieldComponents(rootSection, f, &numComp));
275: PetscCall(PetscSectionGetFieldName(rootSection, f, &name));
276: PetscCall(PetscSectionGetFieldSym(rootSection, f, &sym));
277: if (sym) PetscCall(PetscSectionSymDistribute(sym, sf, &dsym));
278: PetscCall(PetscSectionSetFieldComponents(leafSection, f, numComp));
279: PetscCall(PetscSectionSetFieldName(leafSection, f, name));
280: PetscCall(PetscSectionSetFieldSym(leafSection, f, dsym));
281: PetscCall(PetscSectionSymDestroy(&dsym));
282: for (c = 0; c < rootSection->numFieldComponents[f]; ++c) {
283: PetscCall(PetscSectionGetComponentName(rootSection, f, c, &name));
284: PetscCall(PetscSectionSetComponentName(leafSection, f, c, name));
285: }
286: }
287: PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
288: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
289: rpEnd = PetscMin(rpEnd, nroots);
290: rpEnd = PetscMax(rpStart, rpEnd);
291: /* see if we can avoid creating the embedded SF, since it can cost more than an allreduce */
292: sub[0] = (PetscBool)(nroots != rpEnd - rpStart);
293: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, sub, 2 + numFields, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)sf)));
294: if (sub[0]) {
295: PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
296: PetscCall(ISGetIndices(selected, &indices));
297: PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
298: PetscCall(ISRestoreIndices(selected, &indices));
299: PetscCall(ISDestroy(&selected));
300: } else {
301: PetscCall(PetscObjectReference((PetscObject)sf));
302: embedSF = sf;
303: }
304: PetscCall(PetscSFGetLeafRange(embedSF, &lpStart, &lpEnd));
305: lpEnd++;
307: PetscCall(PetscSectionSetChart(leafSection, lpStart, lpEnd));
309: /* Constrained dof section */
310: hasc = sub[1];
311: for (f = 0; f < numFields; ++f) hasc = (PetscBool)(hasc || sub[2 + f]);
313: /* Could fuse these at the cost of copies and extra allocation */
314: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
315: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->atlasDof, -lpStart), MPI_REPLACE));
316: if (sub[1]) {
317: PetscCall(PetscSectionCheckConstraints_Private(rootSection));
318: PetscCall(PetscSectionCheckConstraints_Private(leafSection));
319: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
320: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasDof[-rpStart], &leafSection->bc->atlasDof[-lpStart], MPI_REPLACE));
321: }
322: for (f = 0; f < numFields; ++f) {
323: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
324: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->field[f]->atlasDof, -rpStart), PetscSafePointerPlusOffset(leafSection->field[f]->atlasDof, -lpStart), MPI_REPLACE));
325: if (sub[2 + f]) {
326: PetscCall(PetscSectionCheckConstraints_Private(rootSection->field[f]));
327: PetscCall(PetscSectionCheckConstraints_Private(leafSection->field[f]));
328: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
329: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasDof[-rpStart], &leafSection->field[f]->bc->atlasDof[-lpStart], MPI_REPLACE));
330: }
331: }
332: if (remoteOffsets) {
333: PetscCall(PetscMalloc1(lpEnd - lpStart, remoteOffsets));
334: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
335: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
336: }
337: PetscCall(PetscSectionInvalidateMaxDof_Internal(leafSection));
338: PetscCall(PetscSectionSetUp(leafSection));
339: if (hasc) { /* need to communicate bcIndices */
340: PetscSF bcSF;
341: PetscInt *rOffBc;
343: PetscCall(PetscMalloc1(lpEnd - lpStart, &rOffBc));
344: if (sub[1]) {
345: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
346: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
347: PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->bc, rOffBc, leafSection->bc, &bcSF));
348: PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
349: PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->bcIndices, leafSection->bcIndices, MPI_REPLACE));
350: PetscCall(PetscSFDestroy(&bcSF));
351: }
352: for (f = 0; f < numFields; ++f) {
353: if (sub[2 + f]) {
354: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
355: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, &rootSection->field[f]->bc->atlasOff[-rpStart], &rOffBc[-lpStart], MPI_REPLACE));
356: PetscCall(PetscSFCreateSectionSF(embedSF, rootSection->field[f]->bc, rOffBc, leafSection->field[f]->bc, &bcSF));
357: PetscCall(PetscSFBcastBegin(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
358: PetscCall(PetscSFBcastEnd(bcSF, MPIU_INT, rootSection->field[f]->bcIndices, leafSection->field[f]->bcIndices, MPI_REPLACE));
359: PetscCall(PetscSFDestroy(&bcSF));
360: }
361: }
362: PetscCall(PetscFree(rOffBc));
363: }
364: PetscCall(PetscSFDestroy(&embedSF));
365: PetscCall(PetscFree(sub));
366: PetscCall(PetscLogEventEnd(PETSCSF_DistSect, sf, 0, 0, 0));
367: PetscFunctionReturn(PETSC_SUCCESS);
368: }
370: /*@C
371: PetscSFCreateRemoteOffsets - Create offsets for point data on remote processes
373: Collective
375: Input Parameters:
376: + sf - The `PetscSF`
377: . rootSection - Data layout of remote points for outgoing data (this is layout for roots)
378: - leafSection - Data layout of local points for incoming data (this is layout for leaves)
380: Output Parameter:
381: . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL`
383: Level: developer
385: Note:
386: Caller must `PetscFree()` `remoteOffsets` if it was requested
388: Fortran Note:
389: Use `PetscSFDestroyRemoteOffsets()` when `remoteOffsets` is no longer needed.
391: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
392: @*/
393: PetscErrorCode PetscSFCreateRemoteOffsets(PetscSF sf, PetscSection rootSection, PetscSection leafSection, PetscInt *remoteOffsets[])
394: {
395: PetscSF embedSF;
396: const PetscInt *indices;
397: IS selected;
398: PetscInt numRoots, rpStart = 0, rpEnd = 0, lpStart = 0, lpEnd = 0;
400: PetscFunctionBegin;
404: PetscAssertPointer(remoteOffsets, 4);
405: *remoteOffsets = NULL;
406: PetscCall(PetscSFGetGraph(sf, &numRoots, NULL, NULL, NULL));
407: if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
408: PetscCall(PetscLogEventBegin(PETSCSF_RemoteOff, sf, 0, 0, 0));
409: PetscCall(PetscSectionGetChart(rootSection, &rpStart, &rpEnd));
410: PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
411: PetscCall(ISCreateStride(PETSC_COMM_SELF, rpEnd - rpStart, rpStart, 1, &selected));
412: PetscCall(ISGetIndices(selected, &indices));
413: PetscCall(PetscSFCreateEmbeddedRootSF(sf, rpEnd - rpStart, indices, &embedSF));
414: PetscCall(ISRestoreIndices(selected, &indices));
415: PetscCall(ISDestroy(&selected));
416: PetscCall(PetscCalloc1(lpEnd - lpStart, remoteOffsets));
417: PetscCall(PetscSFBcastBegin(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
418: PetscCall(PetscSFBcastEnd(embedSF, MPIU_INT, PetscSafePointerPlusOffset(rootSection->atlasOff, -rpStart), PetscSafePointerPlusOffset(*remoteOffsets, -lpStart), MPI_REPLACE));
419: PetscCall(PetscSFDestroy(&embedSF));
420: PetscCall(PetscLogEventEnd(PETSCSF_RemoteOff, sf, 0, 0, 0));
421: PetscFunctionReturn(PETSC_SUCCESS);
422: }
424: /*@
425: PetscSFCreateSectionSF - Create an expanded `PetscSF` of dofs, assuming the input `PetscSF` relates points
427: Collective
429: Input Parameters:
430: + sf - The `PetscSF`
431: . rootSection - Data layout of remote points for outgoing data (this is usually the serial section)
432: . remoteOffsets - Offsets for point data on remote processes (these are offsets from the root section), or `NULL`
433: - leafSection - Data layout of local points for incoming data (this is the distributed section)
435: Output Parameter:
436: . sectionSF - The new `PetscSF`
438: Level: advanced
440: Notes:
441: `remoteOffsets` can be `NULL` if `sf` does not reference any points in `leafSection`
443: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFDistributeSection()`
444: @*/
445: PetscErrorCode PetscSFCreateSectionSF(PetscSF sf, PetscSection rootSection, PetscInt remoteOffsets[], PetscSection leafSection, PetscSF *sectionSF)
446: {
447: MPI_Comm comm;
448: const PetscInt *localPoints;
449: const PetscSFNode *remotePoints;
450: PetscInt lpStart, lpEnd;
451: PetscInt numRoots, numSectionRoots, numPoints, numIndices = 0;
452: PetscInt *localIndices;
453: PetscSFNode *remoteIndices;
454: PetscInt i, ind;
456: PetscFunctionBegin;
458: PetscAssertPointer(rootSection, 2);
459: /* Cannot check PetscAssertPointer(remoteOffsets,3) because it can be NULL if sf does not reference any points in leafSection */
460: PetscAssertPointer(leafSection, 4);
461: PetscAssertPointer(sectionSF, 5);
462: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
463: PetscCall(PetscSFCreate(comm, sectionSF));
464: PetscCall(PetscSectionGetChart(leafSection, &lpStart, &lpEnd));
465: PetscCall(PetscSectionGetStorageSize(rootSection, &numSectionRoots));
466: PetscCall(PetscSFGetGraph(sf, &numRoots, &numPoints, &localPoints, &remotePoints));
467: if (numRoots < 0) PetscFunctionReturn(PETSC_SUCCESS);
468: PetscCall(PetscLogEventBegin(PETSCSF_SectSF, sf, 0, 0, 0));
469: for (i = 0; i < numPoints; ++i) {
470: PetscInt localPoint = localPoints ? localPoints[i] : i;
471: PetscInt dof;
473: if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
474: PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
475: numIndices += dof < 0 ? 0 : dof;
476: }
477: }
478: PetscCall(PetscMalloc1(numIndices, &localIndices));
479: PetscCall(PetscMalloc1(numIndices, &remoteIndices));
480: /* Create new index graph */
481: for (i = 0, ind = 0; i < numPoints; ++i) {
482: PetscInt localPoint = localPoints ? localPoints[i] : i;
483: PetscInt rank = remotePoints[i].rank;
485: if ((localPoint >= lpStart) && (localPoint < lpEnd)) {
486: PetscInt remoteOffset = remoteOffsets[localPoint - lpStart];
487: PetscInt loff, dof, d;
489: PetscCall(PetscSectionGetOffset(leafSection, localPoint, &loff));
490: PetscCall(PetscSectionGetDof(leafSection, localPoint, &dof));
491: for (d = 0; d < dof; ++d, ++ind) {
492: localIndices[ind] = loff + d;
493: remoteIndices[ind].rank = rank;
494: remoteIndices[ind].index = remoteOffset + d;
495: }
496: }
497: }
498: PetscCheck(numIndices == ind, comm, PETSC_ERR_PLIB, "Inconsistency in indices, %" PetscInt_FMT " should be %" PetscInt_FMT, ind, numIndices);
499: PetscCall(PetscSFSetGraph(*sectionSF, numSectionRoots, numIndices, localIndices, PETSC_OWN_POINTER, remoteIndices, PETSC_OWN_POINTER));
500: PetscCall(PetscSFSetUp(*sectionSF));
501: PetscCall(PetscLogEventEnd(PETSCSF_SectSF, sf, 0, 0, 0));
502: PetscFunctionReturn(PETSC_SUCCESS);
503: }
505: /*@C
506: PetscSFCreateFromLayouts - Creates a parallel star forest mapping between two `PetscLayout` objects
508: Collective
510: Input Parameters:
511: + rmap - `PetscLayout` defining the global root space
512: - lmap - `PetscLayout` defining the global leaf space
514: Output Parameter:
515: . sf - The parallel star forest
517: Level: intermediate
519: Notes:
520: If the global length of `lmap` differs from the global length of `rmap` then the excess entries are ignored.
522: The resulting `sf` used with `PetscSFBcastBegin()` and `PetscSFBcastEnd()` merely copies the array entries of `rootdata` to
523: `leafdata`; moving them between MPI processes if needed. For example,
524: if rmap is [0, 3, 5) and lmap is [0, 2, 6) and `rootdata` is (1, 2, 3) on MPI rank 0 and (4, 5) on MPI rank 1 then the
525: `leafdata` would become (1, 2) on MPI rank 0 and (3, 4, 5, x) on MPI rank 1.
527: .seealso: [](sec_petscsf), `PetscSF`, `PetscLayout`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscLayoutCreate()`, `PetscSFSetGraphLayout()`
528: @*/
529: PetscErrorCode PetscSFCreateFromLayouts(PetscLayout rmap, PetscLayout lmap, PetscSF *sf)
530: {
531: PetscInt i, nroots, nleaves = 0;
532: PetscInt rN, lst, len;
533: PetscMPIInt owner = -1;
534: PetscSFNode *remote;
535: MPI_Comm rcomm = rmap->comm;
536: MPI_Comm lcomm = lmap->comm;
537: PetscMPIInt flg;
539: PetscFunctionBegin;
540: PetscAssertPointer(rmap, 1);
541: PetscAssertPointer(lmap, 2);
542: PetscAssertPointer(sf, 3);
543: PetscCheck(rmap->setupcalled, rcomm, PETSC_ERR_ARG_WRONGSTATE, "Root layout not setup");
544: PetscCheck(lmap->setupcalled, lcomm, PETSC_ERR_ARG_WRONGSTATE, "Leaf layout not setup");
545: PetscCallMPI(MPI_Comm_compare(rcomm, lcomm, &flg));
546: PetscCheck(flg == MPI_CONGRUENT || flg == MPI_IDENT, rcomm, PETSC_ERR_SUP, "cannot map two layouts with non-matching communicators");
547: PetscCall(PetscSFCreate(rcomm, sf));
548: PetscCall(PetscLayoutGetLocalSize(rmap, &nroots));
549: PetscCall(PetscLayoutGetSize(rmap, &rN));
550: PetscCall(PetscLayoutGetRange(lmap, &lst, &len));
551: PetscCall(PetscMalloc1(len - lst, &remote));
552: for (i = lst; i < len && i < rN; i++) {
553: if (owner < -1 || i >= rmap->range[owner + 1]) PetscCall(PetscLayoutFindOwner(rmap, i, &owner));
554: remote[nleaves].rank = owner;
555: remote[nleaves].index = i - rmap->range[owner];
556: nleaves++;
557: }
558: PetscCall(PetscSFSetGraph(*sf, nroots, nleaves, NULL, PETSC_OWN_POINTER, remote, PETSC_COPY_VALUES));
559: PetscCall(PetscFree(remote));
560: PetscFunctionReturn(PETSC_SUCCESS);
561: }
563: /* TODO: handle nooffprocentries like MatZeroRowsMapLocal_Private, since this code is the same */
564: PetscErrorCode PetscLayoutMapLocal(PetscLayout map, PetscInt N, const PetscInt idxs[], PetscInt *on, PetscInt *oidxs[], PetscInt *ogidxs[])
565: {
566: PetscInt *owners = map->range;
567: PetscInt n = map->n;
568: PetscSF sf;
569: PetscInt *lidxs, *work = NULL, *ilocal;
570: PetscSFNode *ridxs;
571: PetscMPIInt rank, p = 0;
572: PetscInt r, len = 0, nleaves = 0;
574: PetscFunctionBegin;
575: if (on) *on = 0; /* squelch -Wmaybe-uninitialized */
576: /* Create SF where leaves are input idxs and roots are owned idxs */
577: PetscCallMPI(MPI_Comm_rank(map->comm, &rank));
578: PetscCall(PetscMalloc1(n, &lidxs));
579: for (r = 0; r < n; ++r) lidxs[r] = -1;
580: PetscCall(PetscMalloc1(N, &ridxs));
581: PetscCall(PetscMalloc1(N, &ilocal));
582: for (r = 0; r < N; ++r) {
583: const PetscInt idx = idxs[r];
585: if (idx < 0) continue;
586: PetscCheck(idx < map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, map->N);
587: if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this idx too */
588: PetscCall(PetscLayoutFindOwner(map, idx, &p));
589: }
590: ridxs[nleaves].rank = p;
591: ridxs[nleaves].index = idxs[r] - owners[p];
592: ilocal[nleaves] = r;
593: nleaves++;
594: }
595: PetscCall(PetscSFCreate(map->comm, &sf));
596: PetscCall(PetscSFSetGraph(sf, n, nleaves, ilocal, PETSC_OWN_POINTER, ridxs, PETSC_OWN_POINTER));
597: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
598: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)idxs, lidxs, MPI_LOR));
599: if (ogidxs) { /* communicate global idxs */
600: PetscInt cum = 0, start, *work2;
602: PetscCall(PetscMalloc1(n, &work));
603: PetscCall(PetscCalloc1(N, &work2));
604: for (r = 0; r < N; ++r)
605: if (idxs[r] >= 0) cum++;
606: PetscCallMPI(MPI_Scan(&cum, &start, 1, MPIU_INT, MPI_SUM, map->comm));
607: start -= cum;
608: cum = 0;
609: for (r = 0; r < N; ++r)
610: if (idxs[r] >= 0) work2[r] = start + cum++;
611: PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work2, work, MPI_REPLACE));
612: PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work2, work, MPI_REPLACE));
613: PetscCall(PetscFree(work2));
614: }
615: PetscCall(PetscSFDestroy(&sf));
616: /* Compress and put in indices */
617: for (r = 0; r < n; ++r)
618: if (lidxs[r] >= 0) {
619: if (work) work[len] = work[r];
620: lidxs[len++] = r;
621: }
622: if (on) *on = len;
623: if (oidxs) *oidxs = lidxs;
624: if (ogidxs) *ogidxs = work;
625: PetscFunctionReturn(PETSC_SUCCESS);
626: }
628: /*@
629: PetscSFCreateByMatchingIndices - Create `PetscSF` by matching root and leaf indices
631: Collective
633: Input Parameters:
634: + layout - `PetscLayout` defining the global index space and the MPI rank that brokers each index
635: . numRootIndices - size of `rootIndices`
636: . rootIndices - array of global indices of which this process requests ownership
637: . rootLocalIndices - root local index permutation (`NULL` if no permutation)
638: . rootLocalOffset - offset to be added to `rootLocalIndices`
639: . numLeafIndices - size of `leafIndices`
640: . leafIndices - array of global indices with which this process requires data associated
641: . leafLocalIndices - leaf local index permutation (`NULL` if no permutation)
642: - leafLocalOffset - offset to be added to `leafLocalIndices`
644: Output Parameters:
645: + sfA - star forest representing the communication pattern from the layout space to the leaf space (`NULL` if not needed)
646: - sf - star forest representing the communication pattern from the root space to the leaf space
648: Level: advanced
650: Example 1:
651: .vb
652: rank : 0 1 2
653: rootIndices : [1 0 2] [3] [3]
654: rootLocalOffset : 100 200 300
655: layout : [0 1] [2] [3]
656: leafIndices : [0] [2] [0 3]
657: leafLocalOffset : 400 500 600
659: would build the following PetscSF
661: [0] 400 <- (0,101)
662: [1] 500 <- (0,102)
663: [2] 600 <- (0,101)
664: [2] 601 <- (2,300)
665: .ve
667: Example 2:
668: .vb
669: rank : 0 1 2
670: rootIndices : [1 0 2] [3] [3]
671: rootLocalOffset : 100 200 300
672: layout : [0 1] [2] [3]
673: leafIndices : rootIndices rootIndices rootIndices
674: leafLocalOffset : rootLocalOffset rootLocalOffset rootLocalOffset
676: would build the following PetscSF
678: [1] 200 <- (2,300)
679: .ve
681: Example 3:
682: .vb
683: No process requests ownership of global index 1, but no process needs it.
685: rank : 0 1 2
686: numRootIndices : 2 1 1
687: rootIndices : [0 2] [3] [3]
688: rootLocalOffset : 100 200 300
689: layout : [0 1] [2] [3]
690: numLeafIndices : 1 1 2
691: leafIndices : [0] [2] [0 3]
692: leafLocalOffset : 400 500 600
694: would build the following PetscSF
696: [0] 400 <- (0,100)
697: [1] 500 <- (0,101)
698: [2] 600 <- (0,100)
699: [2] 601 <- (2,300)
700: .ve
702: Notes:
703: `layout` represents any partitioning of [0, N), where N is the total number of global indices, and its
704: local size can be set to `PETSC_DECIDE`.
706: If a global index x lies in the partition owned by process i, each process whose `rootIndices` contains x requests
707: ownership of x and sends its own rank and the local index of x to process i.
708: If multiple processes request ownership of x, the one with the highest rank is to own x.
709: Process i then broadcasts the ownership information, so that each process whose `leafIndices` contains x knows the
710: ownership information of x.
711: The output `sf` is constructed by associating each leaf point to a root point in this way.
713: Suppose there is point data ordered according to the global indices and partitioned according to the given layout.
714: The optional output `sfA` can be used to push such data to leaf points.
716: All indices in `rootIndices` and `leafIndices` must lie in the layout range. The union (over all processes) of `rootIndices`
717: must cover that of `leafIndices`, but need not cover the entire layout.
719: If (leafIndices, leafLocalIndices, leafLocalOffset) == (rootIndices, rootLocalIndices, rootLocalOffset), the output
720: star forest is almost identity, so will only include non-trivial part of the map.
722: Developer Notes:
723: Current approach of a process of the highest rank gaining the ownership may cause load imbalance; consider using
724: hash(rank, root_local_index) as the bid for the ownership determination.
726: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
727: @*/
728: PetscErrorCode PetscSFCreateByMatchingIndices(PetscLayout layout, PetscInt numRootIndices, const PetscInt rootIndices[], const PetscInt rootLocalIndices[], PetscInt rootLocalOffset, PetscInt numLeafIndices, const PetscInt leafIndices[], const PetscInt leafLocalIndices[], PetscInt leafLocalOffset, PetscSF *sfA, PetscSF *sf)
729: {
730: MPI_Comm comm = layout->comm;
731: PetscMPIInt rank;
732: PetscSF sf1;
733: PetscSFNode *owners, *buffer, *iremote;
734: PetscInt *ilocal, nleaves, N, n, i;
735: PetscBool areIndicesSame;
737: PetscFunctionBegin;
738: PetscAssertPointer(layout, 1);
739: if (rootIndices) PetscAssertPointer(rootIndices, 3);
740: if (rootLocalIndices) PetscAssertPointer(rootLocalIndices, 4);
741: if (leafIndices) PetscAssertPointer(leafIndices, 7);
742: if (leafLocalIndices) PetscAssertPointer(leafLocalIndices, 8);
743: if (sfA) PetscAssertPointer(sfA, 10);
744: PetscAssertPointer(sf, 11);
745: PetscCheck(numRootIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numRootIndices (%" PetscInt_FMT ") must be non-negative", numRootIndices);
746: PetscCheck(numLeafIndices >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "numLeafIndices (%" PetscInt_FMT ") must be non-negative", numLeafIndices);
747: PetscCallMPI(MPI_Comm_rank(comm, &rank));
748: PetscCall(PetscLayoutSetUp(layout));
749: PetscCall(PetscLayoutGetSize(layout, &N));
750: PetscCall(PetscLayoutGetLocalSize(layout, &n));
751: areIndicesSame = (PetscBool)(leafIndices == rootIndices);
752: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &areIndicesSame, 1, MPI_C_BOOL, MPI_LAND, comm));
753: PetscCheck(!areIndicesSame || numLeafIndices == numRootIndices, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "leafIndices == rootIndices, but numLeafIndices (%" PetscInt_FMT ") != numRootIndices(%" PetscInt_FMT ")", numLeafIndices, numRootIndices);
754: if (PetscDefined(USE_DEBUG)) {
755: PetscInt N1 = PETSC_INT_MIN;
756: for (i = 0; i < numRootIndices; i++)
757: if (rootIndices[i] > N1) N1 = rootIndices[i];
758: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
759: PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. root index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
760: if (!areIndicesSame) {
761: N1 = PETSC_INT_MIN;
762: for (i = 0; i < numLeafIndices; i++)
763: if (leafIndices[i] > N1) N1 = leafIndices[i];
764: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &N1, 1, MPIU_INT, MPI_MAX, comm));
765: PetscCheck(N1 < N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Max. leaf index (%" PetscInt_FMT ") out of layout range [0,%" PetscInt_FMT ")", N1, N);
766: }
767: }
769: /* Reduce: owners -> buffer */
770: PetscCall(PetscMalloc1(n, &buffer));
771: PetscCall(PetscSFCreate(comm, &sf1));
772: PetscCall(PetscSFSetFromOptions(sf1));
773: PetscCall(PetscSFSetGraphLayout(sf1, layout, numRootIndices, NULL, PETSC_OWN_POINTER, rootIndices));
774: PetscCall(PetscMalloc1(numRootIndices, &owners));
775: for (i = 0; i < numRootIndices; ++i) {
776: owners[i].rank = rank;
777: owners[i].index = rootLocalOffset + (rootLocalIndices ? rootLocalIndices[i] : i);
778: }
779: for (i = 0; i < n; ++i) {
780: buffer[i].index = -1;
781: buffer[i].rank = -1;
782: }
783: PetscCall(PetscSFReduceBegin(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
784: PetscCall(PetscSFReduceEnd(sf1, MPIU_SF_NODE, owners, buffer, MPI_MAXLOC));
785: /* Bcast: buffer -> owners */
786: if (!areIndicesSame) {
787: PetscCall(PetscFree(owners));
788: PetscCall(PetscSFSetGraphLayout(sf1, layout, numLeafIndices, NULL, PETSC_OWN_POINTER, leafIndices));
789: PetscCall(PetscMalloc1(numLeafIndices, &owners));
790: }
791: PetscCall(PetscSFBcastBegin(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
792: PetscCall(PetscSFBcastEnd(sf1, MPIU_SF_NODE, buffer, owners, MPI_REPLACE));
793: for (i = 0; i < numLeafIndices; ++i) PetscCheck(owners[i].rank >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Global point %" PetscInt_FMT " was unclaimed", leafIndices[i]);
794: PetscCall(PetscFree(buffer));
795: if (sfA) {
796: *sfA = sf1;
797: } else PetscCall(PetscSFDestroy(&sf1));
798: /* Create sf */
799: if (areIndicesSame && rootLocalIndices == leafLocalIndices && leafLocalOffset == rootLocalOffset) {
800: /* leaf space == root space */
801: for (i = 0, nleaves = 0; i < numLeafIndices; ++i)
802: if (owners[i].rank != rank) ++nleaves;
803: PetscCall(PetscMalloc1(nleaves, &ilocal));
804: PetscCall(PetscMalloc1(nleaves, &iremote));
805: for (i = 0, nleaves = 0; i < numLeafIndices; ++i) {
806: if (owners[i].rank != rank) {
807: ilocal[nleaves] = leafLocalOffset + i;
808: iremote[nleaves].rank = owners[i].rank;
809: iremote[nleaves].index = owners[i].index;
810: ++nleaves;
811: }
812: }
813: PetscCall(PetscFree(owners));
814: } else {
815: nleaves = numLeafIndices;
816: PetscCall(PetscMalloc1(nleaves, &ilocal));
817: for (i = 0; i < nleaves; ++i) ilocal[i] = leafLocalOffset + (leafLocalIndices ? leafLocalIndices[i] : i);
818: iremote = owners;
819: }
820: PetscCall(PetscSFCreate(comm, sf));
821: PetscCall(PetscSFSetFromOptions(*sf));
822: PetscCall(PetscSFSetGraph(*sf, rootLocalOffset + numRootIndices, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
823: PetscFunctionReturn(PETSC_SUCCESS);
824: }
826: /*@
827: PetscSFMerge - append/merge indices of `sfb` into `sfa`, with preference for `sfb`
829: Collective
831: Input Parameters:
832: + sfa - default `PetscSF`
833: - sfb - additional edges to add/replace edges in `sfa`
835: Output Parameter:
836: . merged - new `PetscSF` with combined edges
838: Level: intermediate
840: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`
841: @*/
842: PetscErrorCode PetscSFMerge(PetscSF sfa, PetscSF sfb, PetscSF *merged)
843: {
844: PetscInt maxleaf;
846: PetscFunctionBegin;
849: PetscCheckSameComm(sfa, 1, sfb, 2);
850: PetscAssertPointer(merged, 3);
851: {
852: PetscInt aleaf, bleaf;
853: PetscCall(PetscSFGetLeafRange(sfa, NULL, &aleaf));
854: PetscCall(PetscSFGetLeafRange(sfb, NULL, &bleaf));
855: maxleaf = PetscMax(aleaf, bleaf) + 1; // One more than the last index
856: }
857: PetscInt *clocal, aroots, aleaves, broots, bleaves;
858: PetscSFNode *cremote;
859: const PetscInt *alocal, *blocal;
860: const PetscSFNode *aremote, *bremote;
861: PetscCall(PetscMalloc2(maxleaf, &clocal, maxleaf, &cremote));
862: for (PetscInt i = 0; i < maxleaf; i++) clocal[i] = -1;
863: PetscCall(PetscSFGetGraph(sfa, &aroots, &aleaves, &alocal, &aremote));
864: PetscCall(PetscSFGetGraph(sfb, &broots, &bleaves, &blocal, &bremote));
865: PetscCheck(aroots == broots, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Both sfa and sfb must have the same root space");
866: for (PetscInt i = 0; i < aleaves; i++) {
867: PetscInt a = alocal ? alocal[i] : i;
868: clocal[a] = a;
869: cremote[a] = aremote[i];
870: }
871: for (PetscInt i = 0; i < bleaves; i++) {
872: PetscInt b = blocal ? blocal[i] : i;
873: clocal[b] = b;
874: cremote[b] = bremote[i];
875: }
876: PetscInt nleaves = 0;
877: for (PetscInt i = 0; i < maxleaf; i++) {
878: if (clocal[i] < 0) continue;
879: clocal[nleaves] = clocal[i];
880: cremote[nleaves] = cremote[i];
881: nleaves++;
882: }
883: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfa), merged));
884: PetscCall(PetscSFSetGraph(*merged, aroots, nleaves, clocal, PETSC_COPY_VALUES, cremote, PETSC_COPY_VALUES));
885: PetscCall(PetscFree2(clocal, cremote));
886: PetscFunctionReturn(PETSC_SUCCESS);
887: }
889: /*@
890: PetscSFCreateStridedSF - Create an `PetscSF` to communicate interleaved blocks of data
892: Collective
894: Input Parameters:
895: + sf - star forest
896: . bs - stride
897: . ldr - leading dimension of root space
898: - ldl - leading dimension of leaf space
900: Output Parameter:
901: . vsf - the new `PetscSF`
903: Level: intermediate
905: Notes:
906: This can be useful to perform communications on multiple right-hand sides stored in a Fortran-style two dimensional array.
907: For example, the calling sequence
908: .vb
909: c_datatype *roots, *leaves;
910: for i in [0,bs) do
911: PetscSFBcastBegin(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
912: PetscSFBcastEnd(sf, mpi_datatype, roots + i*ldr, leaves + i*ldl, op)
913: .ve
914: is equivalent to
915: .vb
916: c_datatype *roots, *leaves;
917: PetscSFCreateStridedSF(sf, bs, ldr, ldl, &vsf)
918: PetscSFBcastBegin(vsf, mpi_datatype, roots, leaves, op)
919: PetscSFBcastEnd(vsf, mpi_datatype, roots, leaves, op)
920: .ve
922: Developer Notes:
923: Should this functionality be handled with a new API instead of creating a new object?
925: .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`
926: @*/
927: PetscErrorCode PetscSFCreateStridedSF(PetscSF sf, PetscInt bs, PetscInt ldr, PetscInt ldl, PetscSF *vsf)
928: {
929: PetscSF rankssf;
930: const PetscSFNode *iremote, *sfrremote;
931: PetscSFNode *viremote;
932: const PetscInt *ilocal;
933: PetscInt *vilocal = NULL, *ldrs;
934: PetscInt nranks, nr, nl, vnr, vnl, maxl;
935: PetscMPIInt rank;
936: MPI_Comm comm;
937: PetscSFType sftype;
939: PetscFunctionBegin;
942: PetscAssertPointer(vsf, 5);
943: if (bs == 1) {
944: PetscCall(PetscObjectReference((PetscObject)sf));
945: *vsf = sf;
946: PetscFunctionReturn(PETSC_SUCCESS);
947: }
948: PetscCall(PetscSFSetUp(sf));
949: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
950: PetscCallMPI(MPI_Comm_rank(comm, &rank));
951: PetscCall(PetscSFGetGraph(sf, &nr, &nl, &ilocal, &iremote));
952: PetscCall(PetscSFGetLeafRange(sf, NULL, &maxl));
953: maxl += 1;
954: if (ldl == PETSC_DECIDE) ldl = maxl;
955: if (ldr == PETSC_DECIDE) ldr = nr;
956: ldl /= PetscMax(1, sf->vscat.bs); // SFs created from VecScatterCreate() may have a nonzero block size. If not 0, we need to scale ldl and ldr
957: ldr /= PetscMax(1, sf->vscat.bs);
958: PetscCheck(ldr >= nr, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be smaller than number of roots %" PetscInt_FMT, ldr, nr);
959: PetscCheck(ldl >= maxl, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid leading dimension %" PetscInt_FMT " must be larger than leaf range %" PetscInt_FMT, ldl, maxl - 1);
960: vnr = nr * bs;
961: vnl = nl * bs;
962: PetscCall(PetscMalloc1(vnl, &viremote));
963: PetscCall(PetscMalloc1(vnl, &vilocal));
965: /* Communicate root leading dimensions to leaf ranks */
966: PetscCall(PetscSFGetRanksSF(sf, &rankssf));
967: PetscCall(PetscSFGetGraph(rankssf, NULL, &nranks, NULL, &sfrremote));
968: PetscCall(PetscMalloc1(nranks, &ldrs));
969: PetscCall(PetscSFBcastBegin(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
970: PetscCall(PetscSFBcastEnd(rankssf, MPIU_INT, &ldr, ldrs, MPI_REPLACE));
972: for (PetscInt i = 0, rold = -1, lda = -1; i < nl; i++) {
973: const PetscInt r = iremote[i].rank;
974: const PetscInt ii = iremote[i].index;
976: if (r == rank) lda = ldr;
977: else if (rold != r) {
978: PetscInt j;
980: for (j = 0; j < nranks; j++)
981: if (sfrremote[j].rank == r) break;
982: PetscCheck(j < nranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to locate neighbor rank %" PetscInt_FMT, r);
983: lda = ldrs[j];
984: }
985: rold = r;
986: for (PetscInt v = 0; v < bs; v++) {
987: viremote[v * nl + i].rank = r;
988: viremote[v * nl + i].index = v * lda + ii;
989: vilocal[v * nl + i] = v * ldl + (ilocal ? ilocal[i] : i);
990: }
991: }
992: PetscCall(PetscFree(ldrs));
993: PetscCall(PetscSFCreate(comm, vsf));
994: if (sf->vscat.bs > 1) {
995: (*vsf)->vscat.bs = sf->vscat.bs;
996: PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &(*vsf)->vscat.unit));
997: (*vsf)->vscat.to_n = bs * sf->vscat.to_n;
998: (*vsf)->vscat.from_n = bs * sf->vscat.from_n;
999: }
1000: PetscCall(PetscSFGetType(sf, &sftype));
1001: PetscCall(PetscSFSetType(*vsf, sftype));
1002: PetscCall(PetscSFSetGraph(*vsf, vnr, vnl, vilocal, PETSC_OWN_POINTER, viremote, PETSC_OWN_POINTER));
1003: PetscFunctionReturn(PETSC_SUCCESS);
1004: }