Actual source code: sfneighbor.c
1: #include <../src/vec/is/sf/impls/basic/sfpack.h>
2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
4: /* Convenience local types */
5: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
6: typedef MPI_Count PetscSFCount;
7: typedef MPI_Aint PetscSFAint;
8: #else
9: typedef PetscMPIInt PetscSFCount;
10: typedef PetscMPIInt PetscSFAint;
11: #endif
13: typedef struct {
14: SFBASICHEADER;
15: MPI_Comm comms[2]; /* Communicators with distributed topology in both directions */
16: PetscBool initialized[2]; /* Are the two communicators initialized? */
17: PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */
18: PetscSFAint *rootdispls, *leafdispls; /* displs for non-distinguished ranks */
19: PetscMPIInt *rootweights, *leafweights;
20: PetscInt rootdegree, leafdegree;
21: } PetscSF_Neighbor;
23: /*===================================================================================*/
24: /* Internal utility routines */
25: /*===================================================================================*/
27: static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype)
28: {
29: PetscFunctionBegin;
30: if (PetscDefined(USE_LOG)) {
31: petsc_isend_ct += (PetscLogDouble)nsend;
32: petsc_irecv_ct += (PetscLogDouble)nrecv;
34: if (sendtype != MPI_DATATYPE_NULL) {
35: PetscMPIInt i, typesize;
36: PetscCallMPI(MPI_Type_size(sendtype, &typesize));
37: for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize);
38: }
40: if (recvtype != MPI_DATATYPE_NULL) {
41: PetscMPIInt i, typesize;
42: PetscCallMPI(MPI_Type_size(recvtype, &typesize));
43: for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize);
44: }
45: }
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
50: static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm)
51: {
52: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
54: PetscFunctionBegin;
55: if (!dat->initialized[direction]) {
56: PetscInt nrootranks, ndrootranks, nleafranks, ndleafranks;
57: PetscMPIInt indegree, outdegree;
58: const PetscMPIInt *rootranks, *leafranks, *sources, *destinations;
59: MPI_Comm comm, *mycomm = &dat->comms[direction];
61: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL)); /* Which ranks will access my roots (I am a destination) */
62: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */
63: indegree = nrootranks - ndrootranks;
64: outdegree = nleafranks - ndleafranks;
65: sources = PetscSafePointerPlusOffset(rootranks, ndrootranks);
66: destinations = PetscSafePointerPlusOffset(leafranks, ndleafranks);
67: PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
68: if (direction == PETSCSF_LEAF2ROOT) {
69: PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
70: } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */
71: PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
72: }
73: dat->initialized[direction] = PETSC_TRUE;
74: }
75: *distcomm = dat->comms[direction];
76: PetscFunctionReturn(PETSC_SUCCESS);
77: }
79: // start MPI_Ineighbor_alltoallv (only used for inter-proccess communication)
80: static PetscErrorCode PetscSFLinkStartCommunication_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
81: {
82: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
83: MPI_Comm distcomm = MPI_COMM_NULL;
84: void *rootbuf = NULL, *leafbuf = NULL;
85: MPI_Request *req = NULL;
87: PetscFunctionBegin;
88: if (direction == PETSCSF_ROOT2LEAF) {
89: PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
90: } else {
91: PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
92: }
94: PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
95: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL));
96: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction));
98: if (dat->rootdegree || dat->leafdegree) { // OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case
99: if (direction == PETSCSF_ROOT2LEAF) {
100: PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, req));
101: PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
102: } else {
103: PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, req));
104: PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
105: }
106: }
107: PetscFunctionReturn(PETSC_SUCCESS);
108: }
110: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
111: static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
112: {
113: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
114: MPI_Comm distcomm = MPI_COMM_NULL;
115: const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
116: const PetscInt rootdirect_mpi = link->rootdirect_mpi;
117: MPI_Request *req = link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi];
118: void *rootbuf = link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi], *leafbuf = link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi];
119: MPI_Info info;
121: PetscFunctionBegin;
122: PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
123: if (dat->rootdegree || dat->leafdegree) {
124: if (!link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
125: PetscCallMPI(MPI_Info_create(&info)); // currently, we don't use info
126: if (direction == PETSCSF_ROOT2LEAF) {
127: PetscCallMPI(MPIU_Neighbor_alltoallv_init(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, info, req));
128: } else {
129: PetscCallMPI(MPIU_Neighbor_alltoallv_init(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, info, req));
130: }
131: link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
132: PetscCallMPI(MPI_Info_free(&info));
133: }
134: }
135: PetscFunctionReturn(PETSC_SUCCESS);
136: }
138: // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
139: static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
140: {
141: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
142: MPI_Request *req = NULL;
144: PetscFunctionBegin;
145: if (direction == PETSCSF_ROOT2LEAF) {
146: PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
147: } else {
148: PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
149: }
151: PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &req, NULL));
152: PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction));
153: if (dat->rootdegree || dat->leafdegree) {
154: PetscCallMPI(MPI_Start(req));
155: if (direction == PETSCSF_ROOT2LEAF) {
156: PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
157: } else {
158: PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
159: }
160: }
161: PetscFunctionReturn(PETSC_SUCCESS);
162: }
163: #endif
165: static PetscErrorCode PetscSFSetCommunicationOps_Neighbor(PetscSF sf, PetscSFLink link)
166: {
167: PetscFunctionBegin;
168: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
169: if (sf->persistent) {
170: link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Neighbor;
171: link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Neighbor;
172: } else
173: #endif
174: {
175: link->StartCommunication = PetscSFLinkStartCommunication_Neighbor;
176: }
177: PetscFunctionReturn(PETSC_SUCCESS);
178: }
180: /*===================================================================================*/
181: /* Implementations of SF public APIs */
182: /*===================================================================================*/
183: static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
184: {
185: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
186: PetscInt i, j, nrootranks, ndrootranks, nleafranks, ndleafranks;
187: const PetscInt *rootoffset, *leafoffset;
188: PetscMPIInt m, n;
190: PetscFunctionBegin;
191: /* SFNeighbor inherits from Basic */
192: PetscCall(PetscSFSetUp_Basic(sf));
193: /* SFNeighbor specific */
194: PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
195: PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
196: dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks);
197: dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks);
198: sf->nleafreqs = 0;
199: dat->nrootreqs = 1; // collectives only need one MPI_Request. We just put it in rootreqs[]
201: /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
202: #if !PetscDefined(HAVE_OPENMPI) || (PetscDefined(HAVE_OMPI_MAJOR_VERSION) && PetscDefined(HAVE_OMPI_MINOR_VERSION) && PetscDefined(HAVE_OMPI_RELEASE_VERSION) && !(PETSC_HAVE_OMPI_MAJOR_VERSION == 5 && PETSC_HAVE_OMPI_MINOR_VERSION == 0 && PETSC_HAVE_OMPI_RELEASE_VERSION == 0))
203: PetscCall(PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights));
204: #else // workaround for an OpenMPI 5.0.0 bug, https://github.com/open-mpi/ompi/issues/12037
205: PetscMPIInt m2 = m ? m : 1, n2 = n ? n : 1;
206: PetscCall(PetscMalloc6(m2, &dat->rootdispls, m2, &dat->rootcounts, m2, &dat->rootweights, n2, &dat->leafdispls, n2, &dat->leafcounts, n2, &dat->leafweights));
207: #endif
209: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
210: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
211: dat->rootdispls[j] = rootoffset[i] - rootoffset[ndrootranks];
212: dat->rootcounts[j] = rootoffset[i + 1] - rootoffset[i];
213: dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */
214: }
216: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
217: dat->leafdispls[j] = leafoffset[i] - leafoffset[ndleafranks];
218: dat->leafcounts[j] = leafoffset[i + 1] - leafoffset[i];
219: dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647);
220: }
221: #else
222: for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
223: PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m));
224: dat->rootdispls[j] = m;
225: PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n));
226: dat->rootcounts[j] = n;
227: dat->rootweights[j] = n;
228: }
230: for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
231: PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m));
232: dat->leafdispls[j] = m;
233: PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n));
234: dat->leafcounts[j] = n;
235: dat->leafweights[j] = n;
236: }
237: #endif
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
242: {
243: PetscInt i;
244: PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
246: PetscFunctionBegin;
247: PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
248: PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights));
249: for (i = 0; i < 2; i++) {
250: if (dat->initialized[i]) {
251: PetscCallMPI(MPI_Comm_free(&dat->comms[i]));
252: dat->initialized[i] = PETSC_FALSE;
253: }
254: }
255: PetscCall(PetscSFReset_Basic(sf)); /* Common part */
256: PetscFunctionReturn(PETSC_SUCCESS);
257: }
259: static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
260: {
261: PetscFunctionBegin;
262: PetscCall(PetscSFReset_Neighbor(sf));
263: PetscCall(PetscFree(sf->data));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
268: {
269: PetscSF_Neighbor *dat;
271: PetscFunctionBegin;
272: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
273: sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
274: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
275: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
276: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
277: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
278: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
279: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
280: sf->ops->View = PetscSFView_Basic;
282: sf->ops->SetUp = PetscSFSetUp_Neighbor;
283: sf->ops->Reset = PetscSFReset_Neighbor;
284: sf->ops->Destroy = PetscSFDestroy_Neighbor;
285: sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Neighbor;
287: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
288: PetscObjectOptionsBegin((PetscObject)sf);
289: PetscCall(PetscOptionsBool("-sf_neighbor_persistent", "Use MPI-4 persistent neighborhood collectives; used along with -sf_type neighbor", "PetscSFCreate", sf->persistent, &sf->persistent, NULL));
290: PetscOptionsEnd();
291: #endif
292: sf->collective = PETSC_TRUE;
294: PetscCall(PetscNew(&dat));
295: sf->data = (void *)dat;
296: PetscFunctionReturn(PETSC_SUCCESS);
297: }