Actual source code: sfbasic.c
1: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2: #include <../src/vec/is/sf/impls/basic/sfpack.h>
3: #include <petsc/private/viewerimpl.h>
5: // Init persistent MPI send/recv requests
6: static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
7: {
8: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
9: PetscInt cnt;
10: PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks;
11: const PetscInt *rootoffset, *leafoffset;
12: MPI_Aint disp;
13: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
14: MPI_Datatype unit = link->unit;
15: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
16: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
18: PetscFunctionBegin;
19: if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
20: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
21: if (direction == PETSCSF_LEAF2ROOT) {
22: for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
23: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
24: cnt = rootoffset[i + 1] - rootoffset[i];
25: PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
26: }
27: } else { /* PETSCSF_ROOT2LEAF */
28: for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
29: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
30: cnt = rootoffset[i + 1] - rootoffset[i];
31: PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
32: }
33: }
34: link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
35: }
37: if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) {
38: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
39: if (direction == PETSCSF_LEAF2ROOT) {
40: for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
41: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
42: cnt = leafoffset[i + 1] - leafoffset[i];
43: PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
44: }
45: } else { /* PETSCSF_ROOT2LEAF */
46: for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
47: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
48: cnt = leafoffset[i + 1] - leafoffset[i];
49: PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
50: }
51: }
52: link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE;
53: }
54: PetscFunctionReturn(PETSC_SUCCESS);
55: }
57: // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
58: static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
59: {
60: PetscMPIInt nsreqs = 0, nrreqs = 0;
61: MPI_Request *sreqs = NULL, *rreqs = NULL;
62: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
63: PetscInt sbuflen, rbuflen;
65: PetscFunctionBegin;
66: rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
67: if (rbuflen) {
68: if (direction == PETSCSF_ROOT2LEAF) {
69: nrreqs = sf->nleafreqs;
70: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs));
71: } else { /* leaf to root */
72: nrreqs = bas->nrootreqs;
73: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL));
74: }
75: }
77: sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
78: if (sbuflen) {
79: if (direction == PETSCSF_ROOT2LEAF) {
80: nsreqs = bas->nrootreqs;
81: PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
82: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL));
83: } else { /* leaf to root */
84: nsreqs = sf->nleafreqs;
85: PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
86: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs));
87: }
88: }
89: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready
90: if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs));
91: if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs));
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: #if defined(PETSC_HAVE_MPIX_STREAM)
96: // issue MPIX_Isend/Irecv_enqueue()
97: static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
98: {
99: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
100: PetscInt i, j;
101: PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks, cnt;
102: const PetscInt *rootoffset, *leafoffset;
103: MPI_Aint disp;
104: MPI_Comm stream_comm = sf->stream_comm;
105: MPI_Datatype unit = link->unit;
106: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
107: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
109: PetscFunctionBegin;
110: if (bas->rootbuflen[PETSCSF_REMOTE]) {
111: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
112: if (direction == PETSCSF_LEAF2ROOT) {
113: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
114: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
115: cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
116: PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
117: }
118: } else { // PETSCSF_ROOT2LEAF
119: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
120: disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
121: cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
122: // no need to sync the gpu stream!
123: PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
124: }
125: }
126: }
128: if (sf->leafbuflen[PETSCSF_REMOTE]) {
129: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
130: if (direction == PETSCSF_LEAF2ROOT) {
131: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
132: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
133: cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
134: // no need to sync the gpu stream!
135: PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
136: }
137: } else { // PETSCSF_ROOT2LEAF
138: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
139: disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
140: cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
141: PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
142: }
143: }
144: }
145: PetscFunctionReturn(PETSC_SUCCESS);
146: }
148: static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
149: {
150: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
151: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
152: const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
154: PetscFunctionBegin;
155: PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
156: PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
157: PetscFunctionReturn(PETSC_SUCCESS);
158: }
159: #endif
161: static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link)
162: {
163: PetscFunctionBegin;
164: link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Basic;
165: link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic;
166: #if defined(PETSC_HAVE_MPIX_STREAM)
167: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
168: if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
169: link->StartCommunication = PetscSFLinkStartCommunication_MPIX_Stream;
170: link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream;
171: }
172: #endif
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: /*===================================================================================*/
177: /* SF public interface implementations */
178: /*===================================================================================*/
179: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
180: {
181: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
182: PetscInt *rlengths, *ilengths;
183: PetscMPIInt nRemoteRootRanks, nRemoteLeafRanks;
184: PetscMPIInt rank, niranks, *iranks, tag;
185: MPI_Comm comm;
186: MPI_Group group;
187: MPI_Request *rootreqs, *leafreqs;
189: PetscFunctionBegin;
190: PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
191: PetscCall(PetscSFSetUpRanks(sf, group));
192: PetscCallMPI(MPI_Group_free(&group));
193: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
194: PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
195: PetscCallMPI(MPI_Comm_rank(comm, &rank));
196: /*
197: * Inform roots about how many leaves and from which ranks
198: */
199: PetscCall(PetscMalloc1(sf->nranks, &rlengths));
200: /* Determine number, sending ranks and length of incoming */
201: for (PetscMPIInt i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
202: nRemoteRootRanks = sf->nranks - sf->ndranks;
203: PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths));
205: /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
206: We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
207: small and the sorting is cheap.
208: */
209: PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
211: /* Partition into distinguished and non-distinguished incoming ranks */
212: bas->ndiranks = sf->ndranks;
213: bas->niranks = bas->ndiranks + niranks;
214: PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
215: bas->ioffset[0] = 0;
216: for (PetscMPIInt i = 0; i < bas->ndiranks; i++) {
217: bas->iranks[i] = sf->ranks[i];
218: bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
219: }
220: PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
221: for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) {
222: bas->iranks[i] = iranks[i - bas->ndiranks];
223: bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
224: }
225: bas->itotal = bas->ioffset[bas->niranks];
226: PetscCall(PetscFree(rlengths));
227: PetscCall(PetscFree(iranks));
228: PetscCall(PetscFree(ilengths));
230: /* Send leaf identities to roots */
231: nRemoteLeafRanks = bas->niranks - bas->ndiranks;
232: PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
233: PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
234: for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
235: for (PetscMPIInt i = 0; i < sf->nranks; i++) {
236: PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
237: if (i < sf->ndranks) {
238: PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
239: PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
240: PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
241: PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
242: continue;
243: }
244: PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
245: }
246: PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
247: PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
249: sf->nleafreqs = nRemoteRootRanks;
250: bas->nrootreqs = nRemoteLeafRanks;
252: /* Setup fields related to packing, such as rootbuflen[] */
253: PetscCall(PetscSFSetUpPackFields(sf));
254: PetscCall(PetscFree2(rootreqs, leafreqs));
255: PetscFunctionReturn(PETSC_SUCCESS);
256: }
258: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
259: {
260: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
261: PetscSFLink link = bas->avail, next;
263: PetscFunctionBegin;
264: PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
265: PetscCall(PetscFree2(bas->iranks, bas->ioffset));
266: PetscCall(PetscFree(bas->irootloc));
268: #if defined(PETSC_HAVE_DEVICE)
269: for (int i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
270: #endif
272: #if defined(PETSC_HAVE_NVSHMEM)
273: PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
274: #endif
276: for (; link; link = next) {
277: next = link->next;
278: PetscCall(PetscSFLinkDestroy(sf, link));
279: }
280: bas->avail = NULL;
281: PetscCall(PetscSFResetPackFields(sf));
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
286: {
287: PetscFunctionBegin;
288: PetscCall(PetscSFReset_Basic(sf));
289: PetscCall(PetscFree(sf->data));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: #if defined(PETSC_USE_SINGLE_LIBRARY)
294: #include <petscmat.h>
296: PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
297: {
298: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
299: PetscMPIInt nrootranks, ndrootranks;
300: const PetscInt *rootoffset;
301: PetscMPIInt rank, size;
302: const PetscMPIInt *rootranks;
303: MPI_Comm comm = PetscObjectComm((PetscObject)sf);
304: PetscScalar unitbytes;
305: Mat A;
307: PetscFunctionBegin;
308: PetscCallMPI(MPI_Comm_size(comm, &size));
309: PetscCallMPI(MPI_Comm_rank(comm, &rank));
310: /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
311: PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix
312: in senders' view point: how many bytes I will send to my neighbors.
314: Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
316: If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
317: different interpretations for the same SF for different data types. Since we most care about VecScatter,
318: we uniformly treat each vertex as a PetscScalar.
319: */
320: unitbytes = (PetscScalar)sizeof(PetscScalar);
322: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
323: PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
324: PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
325: for (PetscMPIInt i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
326: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
327: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
328: PetscCall(MatView(A, viewer));
329: PetscCall(MatDestroy(&A));
330: PetscFunctionReturn(PETSC_SUCCESS);
331: }
332: #endif
334: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
335: {
336: PetscBool isascii;
338: PetscFunctionBegin;
339: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
340: if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
341: #if defined(PETSC_USE_SINGLE_LIBRARY)
342: else {
343: PetscBool isdraw, isbinary;
344: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
345: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
346: if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
347: }
348: #endif
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
353: {
354: PetscSFLink link = NULL;
356: PetscFunctionBegin;
357: /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
358: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
359: /* Pack rootdata to rootbuf for remote communication */
360: PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
361: /* Start communication, e.g., post MPIU_Isend */
362: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
363: /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
364: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
365: PetscFunctionReturn(PETSC_SUCCESS);
366: }
368: PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
369: {
370: PetscSFLink link = NULL;
372: PetscFunctionBegin;
373: /* Retrieve the link used in XxxBegin() with root/leafdata as key */
374: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
375: /* Finish remote communication, e.g., post MPI_Waitall */
376: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
377: /* Unpack data in leafbuf to leafdata for remote communication */
378: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
379: /* Recycle the link */
380: PetscCall(PetscSFLinkReclaim(sf, &link));
381: PetscFunctionReturn(PETSC_SUCCESS);
382: }
384: /* Shared by ReduceBegin and FetchAndOpBegin */
385: static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
386: {
387: PetscSFLink link = NULL;
389: PetscFunctionBegin;
390: PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
391: PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
392: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
393: *out = link;
394: PetscFunctionReturn(PETSC_SUCCESS);
395: }
397: /* leaf -> root with reduction */
398: PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
399: {
400: PetscSFLink link = NULL;
402: PetscFunctionBegin;
403: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
404: PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
405: PetscFunctionReturn(PETSC_SUCCESS);
406: }
408: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
409: {
410: PetscSFLink link = NULL;
412: PetscFunctionBegin;
413: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
414: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
415: PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
416: PetscCall(PetscSFLinkReclaim(sf, &link));
417: PetscFunctionReturn(PETSC_SUCCESS);
418: }
420: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
421: {
422: PetscSFLink link = NULL;
424: PetscFunctionBegin;
425: PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
426: PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
427: PetscFunctionReturn(PETSC_SUCCESS);
428: }
430: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
431: {
432: PetscSFLink link = NULL;
434: PetscFunctionBegin;
435: PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
436: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
437: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
438: /* Do fetch-and-op, the (remote) update results are in rootbuf */
439: PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
440: /* Bcast rootbuf to leafupdate */
441: PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
442: PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
443: /* Unpack and insert fetched data into leaves */
444: PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
445: PetscCall(PetscSFLinkReclaim(sf, &link));
446: PetscFunctionReturn(PETSC_SUCCESS);
447: }
449: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
450: {
451: PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
453: PetscFunctionBegin;
454: if (niranks) *niranks = bas->niranks;
455: if (iranks) *iranks = bas->iranks;
456: if (ioffset) *ioffset = bas->ioffset;
457: if (irootloc) *irootloc = bas->irootloc;
458: PetscFunctionReturn(PETSC_SUCCESS);
459: }
461: /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
462: We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
463: was sorted before calling the routine.
464: */
465: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
466: {
467: PetscSF esf;
468: PetscInt *esf_roffset, *esf_rmine, *esf_rremote;
469: PetscInt j, p, q, nroots, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
470: char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
471: PetscMPIInt *esf_ranks, nranks, ndranks, niranks, esf_nranks, esf_ndranks, ndiranks;
472: const PetscMPIInt *ranks, *iranks;
473: const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc;
474: PetscBool connected;
475: PetscSFNode *new_iremote;
476: PetscSF_Basic *bas;
478: PetscFunctionBegin;
479: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
480: PetscCall(PetscSFSetFromOptions(esf));
481: PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
483: /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
484: PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
485: PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
486: maxlocal = maxleaf - minleaf + 1;
487: PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
488: leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
489: /* Tag selected roots */
490: for (PetscInt i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
492: PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
493: PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
494: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
495: esf_nranks = esf_ndranks = esf_nleaves = 0;
496: for (PetscMPIInt i = 0; i < nranks; i++) {
497: connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
498: for (j = roffset[i]; j < roffset[i + 1]; j++) {
499: if (leafdata[rmine[j]]) {
500: esf_nleaves++;
501: connected = PETSC_TRUE;
502: }
503: }
504: if (connected) {
505: esf_nranks++;
506: if (i < ndranks) esf_ndranks++;
507: }
508: }
510: /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
511: PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
512: PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
513: PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
514: p = 0; /* Counter for connected root ranks */
515: q = 0; /* Counter for connected leaves */
516: esf_roffset[0] = 0;
517: for (PetscMPIInt i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
518: connected = PETSC_FALSE;
519: for (j = roffset[i]; j < roffset[i + 1]; j++) {
520: if (leafdata[rmine[j]]) {
521: esf_rmine[q] = new_ilocal[q] = rmine[j];
522: esf_rremote[q] = rremote[j];
523: new_iremote[q].index = rremote[j];
524: new_iremote[q].rank = ranks[i];
525: connected = PETSC_TRUE;
526: q++;
527: }
528: }
529: if (connected) {
530: esf_ranks[p] = ranks[i];
531: esf_roffset[p + 1] = q;
532: p++;
533: }
534: }
536: /* SetGraph internally resets the SF, so we only set its fields after the call */
537: PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
538: esf->nranks = esf_nranks;
539: esf->ndranks = esf_ndranks;
540: esf->ranks = esf_ranks;
541: esf->roffset = esf_roffset;
542: esf->rmine = esf_rmine;
543: esf->rremote = esf_rremote;
544: esf->nleafreqs = esf_nranks - esf_ndranks;
546: /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
547: bas = (PetscSF_Basic *)esf->data;
548: PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
549: /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
550: we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
551: */
552: PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
553: PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
554: bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
555: p = 0; /* Counter for connected leaf ranks */
556: q = 0; /* Counter for connected roots */
557: for (PetscMPIInt i = 0; i < niranks; i++) {
558: connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
559: for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
560: if (rootdata[irootloc[j]]) {
561: bas->irootloc[q++] = irootloc[j];
562: connected = PETSC_TRUE;
563: }
564: }
565: if (connected) {
566: bas->niranks++;
567: if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
568: bas->iranks[p] = iranks[i];
569: bas->ioffset[p + 1] = q;
570: p++;
571: }
572: }
573: bas->itotal = q;
574: bas->nrootreqs = bas->niranks - bas->ndiranks;
575: esf->persistent = PETSC_TRUE;
576: /* Setup packing related fields */
577: PetscCall(PetscSFSetUpPackFields(esf));
579: /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
580: #if defined(PETSC_HAVE_CUDA)
581: if (esf->backend == PETSCSF_BACKEND_CUDA) {
582: esf->ops->Malloc = PetscSFMalloc_CUDA;
583: esf->ops->Free = PetscSFFree_CUDA;
584: }
585: #endif
587: #if defined(PETSC_HAVE_HIP)
588: /* TODO: Needs debugging */
589: if (esf->backend == PETSCSF_BACKEND_HIP) {
590: esf->ops->Malloc = PetscSFMalloc_HIP;
591: esf->ops->Free = PetscSFFree_HIP;
592: }
593: #endif
595: #if defined(PETSC_HAVE_KOKKOS)
596: if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
597: esf->ops->Malloc = PetscSFMalloc_Kokkos;
598: esf->ops->Free = PetscSFFree_Kokkos;
599: }
600: #endif
601: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
602: PetscCall(PetscFree2(rootdata, leafmem));
603: *newsf = esf;
604: PetscFunctionReturn(PETSC_SUCCESS);
605: }
607: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
608: {
609: PetscSF_Basic *dat;
611: PetscFunctionBegin;
612: sf->ops->SetUp = PetscSFSetUp_Basic;
613: sf->ops->Reset = PetscSFReset_Basic;
614: sf->ops->Destroy = PetscSFDestroy_Basic;
615: sf->ops->View = PetscSFView_Basic;
616: sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
617: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
618: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
619: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
620: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
621: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
622: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
623: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
624: sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Basic;
626: sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv
627: sf->collective = PETSC_FALSE;
629: PetscCall(PetscNew(&dat));
630: sf->data = (void *)dat;
631: PetscFunctionReturn(PETSC_SUCCESS);
632: }