Actual source code: sfneighbor.c

  1: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>

  4: /* Convenience local types */
  5: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
  6: typedef MPI_Count PetscSFCount;
  7: typedef MPI_Aint  PetscSFAint;
  8: #else
  9: typedef PetscMPIInt PetscSFCount;
 10: typedef PetscMPIInt PetscSFAint;
 11: #endif

 13: typedef struct {
 14:   SFBASICHEADER;
 15:   MPI_Comm      comms[2];                /* Communicators with distributed topology in both directions */
 16:   PetscBool     initialized[2];          /* Are the two communicators initialized? */
 17:   PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */
 18:   PetscSFAint  *rootdispls, *leafdispls; /* displs for non-distinguished ranks */
 19:   PetscMPIInt  *rootweights, *leafweights;
 20:   PetscInt      rootdegree, leafdegree;
 21: } PetscSF_Neighbor;

 23: /*===================================================================================*/
 24: /*              Internal utility routines                                            */
 25: /*===================================================================================*/

 27: static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype)
 28: {
 29:   PetscFunctionBegin;
 30:   if (PetscDefined(USE_LOG)) {
 31:     petsc_isend_ct += (PetscLogDouble)nsend;
 32:     petsc_irecv_ct += (PetscLogDouble)nrecv;

 34:     if (sendtype != MPI_DATATYPE_NULL) {
 35:       PetscMPIInt i, typesize;
 36:       PetscCallMPI(MPI_Type_size(sendtype, &typesize));
 37:       for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize);
 38:     }

 40:     if (recvtype != MPI_DATATYPE_NULL) {
 41:       PetscMPIInt i, typesize;
 42:       PetscCallMPI(MPI_Type_size(recvtype, &typesize));
 43:       for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize);
 44:     }
 45:   }
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }

 49: /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
 50: static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm)
 51: {
 52:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;

 54:   PetscFunctionBegin;
 55:   if (!dat->initialized[direction]) {
 56:     PetscInt           nrootranks, ndrootranks, nleafranks, ndleafranks;
 57:     PetscMPIInt        indegree, outdegree;
 58:     const PetscMPIInt *rootranks, *leafranks, *sources, *destinations;
 59:     MPI_Comm           comm, *mycomm = &dat->comms[direction];

 61:     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL));       /* Which ranks will access my roots (I am a destination) */
 62:     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL)); /* My leaves will access whose roots (I am a source) */
 63:     indegree     = nrootranks - ndrootranks;
 64:     outdegree    = nleafranks - ndleafranks;
 65:     sources      = PetscSafePointerPlusOffset(rootranks, ndrootranks);
 66:     destinations = PetscSafePointerPlusOffset(leafranks, ndleafranks);
 67:     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
 68:     if (direction == PETSCSF_LEAF2ROOT) {
 69:       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
 70:     } else { /* PETSCSF_ROOT2LEAF, reverse src & dest */
 71:       PetscCallMPI(MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm));
 72:     }
 73:     dat->initialized[direction] = PETSC_TRUE;
 74:   }
 75:   *distcomm = dat->comms[direction];
 76:   PetscFunctionReturn(PETSC_SUCCESS);
 77: }

 79: // start MPI_Ineighbor_alltoallv (only used for inter-proccess communication)
 80: static PetscErrorCode PetscSFLinkStartCommunication_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
 81: {
 82:   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
 83:   MPI_Comm          distcomm = MPI_COMM_NULL;
 84:   void             *rootbuf = NULL, *leafbuf = NULL;
 85:   MPI_Request      *req = NULL;

 87:   PetscFunctionBegin;
 88:   if (direction == PETSCSF_ROOT2LEAF) {
 89:     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
 90:   } else {
 91:     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
 92:   }

 94:   PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
 95:   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL));
 96:   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction));

 98:   if (dat->rootdegree || dat->leafdegree) { // OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case
 99:     if (direction == PETSCSF_ROOT2LEAF) {
100:       PetscCallMPI(MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, req));
101:       PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
102:     } else {
103:       PetscCallMPI(MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, req));
104:       PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
105:     }
106:   }
107:   PetscFunctionReturn(PETSC_SUCCESS);
108: }

110: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
111: static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
112: {
113:   PetscSF_Neighbor  *dat           = (PetscSF_Neighbor *)sf->data;
114:   MPI_Comm           distcomm      = MPI_COMM_NULL;
115:   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
116:   const PetscInt     rootdirect_mpi = link->rootdirect_mpi;
117:   MPI_Request       *req            = link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi];
118:   void              *rootbuf = link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi], *leafbuf = link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi];
119:   MPI_Info           info;

121:   PetscFunctionBegin;
122:   PetscCall(PetscSFGetDistComm_Neighbor(sf, direction, &distcomm));
123:   if (dat->rootdegree || dat->leafdegree) {
124:     if (!link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
125:       PetscCallMPI(MPI_Info_create(&info)); // currently, we don't use info
126:       if (direction == PETSCSF_ROOT2LEAF) {
127:         PetscCallMPI(MPIU_Neighbor_alltoallv_init(rootbuf, dat->rootcounts, dat->rootdispls, link->unit, leafbuf, dat->leafcounts, dat->leafdispls, link->unit, distcomm, info, req));
128:       } else {
129:         PetscCallMPI(MPIU_Neighbor_alltoallv_init(leafbuf, dat->leafcounts, dat->leafdispls, link->unit, rootbuf, dat->rootcounts, dat->rootdispls, link->unit, distcomm, info, req));
130:       }
131:       link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
132:       PetscCallMPI(MPI_Info_free(&info));
133:     }
134:   }
135:   PetscFunctionReturn(PETSC_SUCCESS);
136: }

138: // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
139: static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Neighbor(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
140: {
141:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
142:   MPI_Request      *req = NULL;

144:   PetscFunctionBegin;
145:   if (direction == PETSCSF_ROOT2LEAF) {
146:     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
147:   } else {
148:     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
149:   }

151:   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &req, NULL));
152:   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, direction));
153:   if (dat->rootdegree || dat->leafdegree) {
154:     PetscCallMPI(MPI_Start(req));
155:     if (direction == PETSCSF_ROOT2LEAF) {
156:       PetscCall(PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, link->unit, dat->leafdegree, dat->leafcounts, link->unit));
157:     } else {
158:       PetscCall(PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, link->unit, dat->rootdegree, dat->rootcounts, link->unit));
159:     }
160:   }
161:   PetscFunctionReturn(PETSC_SUCCESS);
162: }
163: #endif

165: static PetscErrorCode PetscSFSetCommunicationOps_Neighbor(PetscSF sf, PetscSFLink link)
166: {
167:   PetscFunctionBegin;
168: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
169:   if (sf->persistent) {
170:     link->InitMPIRequests    = PetscSFLinkInitMPIRequests_Persistent_Neighbor;
171:     link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Neighbor;
172:   } else
173: #endif
174:   {
175:     link->StartCommunication = PetscSFLinkStartCommunication_Neighbor;
176:   }
177:   PetscFunctionReturn(PETSC_SUCCESS);
178: }

180: /*===================================================================================*/
181: /*              Implementations of SF public APIs                                    */
182: /*===================================================================================*/
183: static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
184: {
185:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
186:   PetscInt          i, j, nrootranks, ndrootranks, nleafranks, ndleafranks;
187:   const PetscInt   *rootoffset, *leafoffset;
188:   PetscMPIInt       m, n;

190:   PetscFunctionBegin;
191:   /* SFNeighbor inherits from Basic */
192:   PetscCall(PetscSFSetUp_Basic(sf));
193:   /* SFNeighbor specific */
194:   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
195:   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
196:   dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks);
197:   dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks);
198:   sf->nleafreqs       = 0;
199:   dat->nrootreqs      = 1; // collectives only need one MPI_Request. We just put it in rootreqs[]

201:   /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
202: #if !PetscDefined(HAVE_OPENMPI) || (PetscDefined(HAVE_OMPI_MAJOR_VERSION) && PetscDefined(HAVE_OMPI_MINOR_VERSION) && PetscDefined(HAVE_OMPI_RELEASE_VERSION) && !(PETSC_HAVE_OMPI_MAJOR_VERSION == 5 && PETSC_HAVE_OMPI_MINOR_VERSION == 0 && PETSC_HAVE_OMPI_RELEASE_VERSION == 0))
203:   PetscCall(PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights));
204: #else // workaround for an OpenMPI 5.0.0 bug, https://github.com/open-mpi/ompi/issues/12037
205:   PetscMPIInt m2 = m ? m : 1, n2 = n ? n : 1;
206:   PetscCall(PetscMalloc6(m2, &dat->rootdispls, m2, &dat->rootcounts, m2, &dat->rootweights, n2, &dat->leafdispls, n2, &dat->leafcounts, n2, &dat->leafweights));
207: #endif

209: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
210:   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
211:     dat->rootdispls[j]  = rootoffset[i] - rootoffset[ndrootranks];
212:     dat->rootcounts[j]  = rootoffset[i + 1] - rootoffset[i];
213:     dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */
214:   }

216:   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
217:     dat->leafdispls[j]  = leafoffset[i] - leafoffset[ndleafranks];
218:     dat->leafcounts[j]  = leafoffset[i + 1] - leafoffset[i];
219:     dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647);
220:   }
221: #else
222:   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
223:     PetscCall(PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m));
224:     dat->rootdispls[j] = m;
225:     PetscCall(PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n));
226:     dat->rootcounts[j]  = n;
227:     dat->rootweights[j] = n;
228:   }

230:   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
231:     PetscCall(PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m));
232:     dat->leafdispls[j] = m;
233:     PetscCall(PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n));
234:     dat->leafcounts[j]  = n;
235:     dat->leafweights[j] = n;
236:   }
237: #endif
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
242: {
243:   PetscInt          i;
244:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;

246:   PetscFunctionBegin;
247:   PetscCheck(!dat->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
248:   PetscCall(PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights));
249:   for (i = 0; i < 2; i++) {
250:     if (dat->initialized[i]) {
251:       PetscCallMPI(MPI_Comm_free(&dat->comms[i]));
252:       dat->initialized[i] = PETSC_FALSE;
253:     }
254:   }
255:   PetscCall(PetscSFReset_Basic(sf)); /* Common part */
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
260: {
261:   PetscFunctionBegin;
262:   PetscCall(PetscSFReset_Neighbor(sf));
263:   PetscCall(PetscFree(sf->data));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
268: {
269:   PetscSF_Neighbor *dat;

271:   PetscFunctionBegin;
272:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
273:   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
274:   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
275:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
276:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
277:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
278:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
279:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
280:   sf->ops->View                 = PetscSFView_Basic;

282:   sf->ops->SetUp               = PetscSFSetUp_Neighbor;
283:   sf->ops->Reset               = PetscSFReset_Neighbor;
284:   sf->ops->Destroy             = PetscSFDestroy_Neighbor;
285:   sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Neighbor;

287: #if defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
288:   PetscObjectOptionsBegin((PetscObject)sf);
289:   PetscCall(PetscOptionsBool("-sf_neighbor_persistent", "Use MPI-4 persistent neighborhood collectives; used along with -sf_type neighbor", "PetscSFCreate", sf->persistent, &sf->persistent, NULL));
290:   PetscOptionsEnd();
291: #endif
292:   sf->collective = PETSC_TRUE;

294:   PetscCall(PetscNew(&dat));
295:   sf->data = (void *)dat;
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }