Actual source code: sfneighbor.c

  1: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  2: #include <../src/vec/is/sf/impls/basic/sfbasic.h>

  4: /* Convenience local types */
  5: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
  6: typedef MPI_Count PetscSFCount;
  7: typedef MPI_Aint  PetscSFAint;
  8: #else
  9: typedef PetscMPIInt PetscSFCount;
 10: typedef PetscMPIInt PetscSFAint;
 11: #endif

 13: typedef struct {
 14:   SFBASICHEADER;
 15:   MPI_Comm      comms[2];                /* Communicators with distributed topology in both directions */
 16:   PetscBool     initialized[2];          /* Are the two communicators initialized? */
 17:   PetscSFCount *rootcounts, *leafcounts; /* counts for non-distinguished ranks */
 18:   PetscSFAint  *rootdispls, *leafdispls; /* displs for non-distinguished ranks */
 19:   PetscMPIInt  *rootweights, *leafweights;
 20:   PetscInt      rootdegree, leafdegree;
 21: } PetscSF_Neighbor;

 23: /*===================================================================================*/
 24: /*              Internal utility routines                                            */
 25: /*===================================================================================*/

 27: static inline PetscErrorCode PetscLogMPIMessages(PetscInt nsend, PetscSFCount *sendcnts, MPI_Datatype sendtype, PetscInt nrecv, PetscSFCount *recvcnts, MPI_Datatype recvtype)
 28: {
 29: #if defined(PETSC_USE_LOG)
 30:   petsc_isend_ct += (PetscLogDouble)nsend;
 31:   petsc_irecv_ct += (PetscLogDouble)nrecv;

 33:   if (sendtype != MPI_DATATYPE_NULL) {
 34:     PetscMPIInt i, typesize;
 35:     MPI_Type_size(sendtype, &typesize);
 36:     for (i = 0; i < nsend; i++) petsc_isend_len += (PetscLogDouble)(sendcnts[i] * typesize);
 37:   }

 39:   if (recvtype != MPI_DATATYPE_NULL) {
 40:     PetscMPIInt i, typesize;
 41:     MPI_Type_size(recvtype, &typesize);
 42:     for (i = 0; i < nrecv; i++) petsc_irecv_len += (PetscLogDouble)(recvcnts[i] * typesize);
 43:   }
 44: #endif
 45:   return 0;
 46: }

 48: /* Get the communicator with distributed graph topology, which is not cheap to build so we do it on demand (instead of at PetscSFSetUp time) */
 49: static PetscErrorCode PetscSFGetDistComm_Neighbor(PetscSF sf, PetscSFDirection direction, MPI_Comm *distcomm)
 50: {
 51:   PetscSF_Neighbor  *dat = (PetscSF_Neighbor *)sf->data;
 52:   PetscInt           nrootranks, ndrootranks, nleafranks, ndleafranks;
 53:   const PetscMPIInt *rootranks, *leafranks;
 54:   MPI_Comm           comm;

 56:   PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, NULL, NULL);       /* Which ranks will access my roots (I am a destination) */
 57:   PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, &leafranks, NULL, NULL, NULL); /* My leaves will access whose roots (I am a source) */

 59:   if (!dat->initialized[direction]) {
 60:     const PetscMPIInt indegree = nrootranks - ndrootranks, *sources = rootranks + ndrootranks;
 61:     const PetscMPIInt outdegree = nleafranks - ndleafranks, *destinations = leafranks + ndleafranks;
 62:     MPI_Comm         *mycomm = &dat->comms[direction];
 63:     PetscObjectGetComm((PetscObject)sf, &comm);
 64:     if (direction == PETSCSF_LEAF2../../../../../..) {
 65:       MPI_Dist_graph_create_adjacent(comm, indegree, sources, dat->rootweights, outdegree, destinations, dat->leafweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm);
 66:     } else { /* PETSCSF_../../../../../..2LEAF, reverse src & dest */
 67:       MPI_Dist_graph_create_adjacent(comm, outdegree, destinations, dat->leafweights, indegree, sources, dat->rootweights, MPI_INFO_NULL, 1 /*reorder*/, mycomm);
 68:     }
 69:     dat->initialized[direction] = PETSC_TRUE;
 70:   }
 71:   *distcomm = dat->comms[direction];
 72:   return 0;
 73: }

 75: /*===================================================================================*/
 76: /*              Implementations of SF public APIs                                    */
 77: /*===================================================================================*/
 78: static PetscErrorCode PetscSFSetUp_Neighbor(PetscSF sf)
 79: {
 80:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;
 81:   PetscInt          i, j, nrootranks, ndrootranks, nleafranks, ndleafranks;
 82:   const PetscInt   *rootoffset, *leafoffset;
 83:   PetscMPIInt       m, n;

 85:   /* SFNeighbor inherits from Basic */
 86:   PetscSFSetUp_Basic(sf);
 87:   /* SFNeighbor specific */
 88:   sf->persistent = PETSC_FALSE;
 89:   PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL);
 90:   PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL);
 91:   dat->rootdegree = m = (PetscMPIInt)(nrootranks - ndrootranks);
 92:   dat->leafdegree = n = (PetscMPIInt)(nleafranks - ndleafranks);
 93:   sf->nleafreqs       = 0;
 94:   dat->nrootreqs      = 1;

 96:   /* Only setup MPI displs/counts for non-distinguished ranks. Distinguished ranks use shared memory */
 97:   PetscMalloc6(m, &dat->rootdispls, m, &dat->rootcounts, m, &dat->rootweights, n, &dat->leafdispls, n, &dat->leafcounts, n, &dat->leafweights);

 99: #if defined(PETSC_HAVE_MPI_LARGE_COUNT) && defined(PETSC_USE_64BIT_INDICES)
100:   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
101:     dat->rootdispls[j]  = rootoffset[i] - rootoffset[ndrootranks];
102:     dat->rootcounts[j]  = rootoffset[i + 1] - rootoffset[i];
103:     dat->rootweights[j] = (PetscMPIInt)((PetscReal)dat->rootcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647); /* Scale to range of PetscMPIInt */
104:   }

106:   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
107:     dat->leafdispls[j]  = leafoffset[i] - leafoffset[ndleafranks];
108:     dat->leafcounts[j]  = leafoffset[i + 1] - leafoffset[i];
109:     dat->leafweights[j] = (PetscMPIInt)((PetscReal)dat->leafcounts[j] / (PetscReal)PETSC_MAX_INT * 2147483647);
110:   }
111: #else
112:   for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
113:     PetscMPIIntCast(rootoffset[i] - rootoffset[ndrootranks], &m);
114:     dat->rootdispls[j] = m;
115:     PetscMPIIntCast(rootoffset[i + 1] - rootoffset[i], &n);
116:     dat->rootcounts[j]  = n;
117:     dat->rootweights[j] = n;
118:   }

120:   for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
121:     PetscMPIIntCast(leafoffset[i] - leafoffset[ndleafranks], &m);
122:     dat->leafdispls[j] = m;
123:     PetscMPIIntCast(leafoffset[i + 1] - leafoffset[i], &n);
124:     dat->leafcounts[j]  = n;
125:     dat->leafweights[j] = n;
126:   }
127: #endif
128:   return 0;
129: }

131: static PetscErrorCode PetscSFReset_Neighbor(PetscSF sf)
132: {
133:   PetscInt          i;
134:   PetscSF_Neighbor *dat = (PetscSF_Neighbor *)sf->data;

137:   PetscFree6(dat->rootdispls, dat->rootcounts, dat->rootweights, dat->leafdispls, dat->leafcounts, dat->leafweights);
138:   for (i = 0; i < 2; i++) {
139:     if (dat->initialized[i]) {
140:       MPI_Comm_free(&dat->comms[i]);
141:       dat->initialized[i] = PETSC_FALSE;
142:     }
143:   }
144:   PetscSFReset_Basic(sf); /* Common part */
145:   return 0;
146: }

148: static PetscErrorCode PetscSFDestroy_Neighbor(PetscSF sf)
149: {
150:   PetscSFReset_Neighbor(sf);
151:   PetscFree(sf->data);
152:   return 0;
153: }

155: static PetscErrorCode PetscSFBcastBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
156: {
157:   PetscSFLink       link;
158:   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
159:   MPI_Comm          distcomm = MPI_COMM_NULL;
160:   void             *rootbuf = NULL, *leafbuf = NULL;
161:   MPI_Request      *req;

163:   PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link);
164:   PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata);
165:   /* Do neighborhood alltoallv for remote ranks */
166:   PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
167:   PetscSFGetDistComm_Neighbor(sf, PETSCSF_../../../../../..2LEAF, &distcomm);
168:   PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_../../../../../..2LEAF, &rootbuf, &leafbuf, &req, NULL);
169:   PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_../../../../../..2LEAF);
170:   /* OpenMPI-3.0 ran into error with rootdegree = leafdegree = 0, so we skip the call in this case */
171:   if (dat->rootdegree || dat->leafdegree) MPIU_Ineighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, distcomm, req);
172:   PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit);
173:   PetscSFLinkScatterLocal(sf, link, PETSCSF_../../../../../..2LEAF, (void *)rootdata, leafdata, op);
174:   return 0;
175: }

177: static inline PetscErrorCode PetscSFLeafToRootBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
178: {
179:   PetscSFLink       link;
180:   PetscSF_Neighbor *dat      = (PetscSF_Neighbor *)sf->data;
181:   MPI_Comm          distcomm = MPI_COMM_NULL;
182:   void             *rootbuf = NULL, *leafbuf = NULL;
183:   MPI_Request      *req = NULL;

185:   PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link);
186:   PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata);
187:   /* Do neighborhood alltoallv for remote ranks */
188:   PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
189:   PetscSFGetDistComm_Neighbor(sf, PETSCSF_LEAF2../../../../../.., &distcomm);
190:   PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2../../../../../.., &rootbuf, &leafbuf, &req, NULL);
191:   PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2../../../../../..);
192:   if (dat->rootdegree || dat->leafdegree) MPIU_Ineighbor_alltoallv(leafbuf, dat->leafcounts, dat->leafdispls, unit, rootbuf, dat->rootcounts, dat->rootdispls, unit, distcomm, req);
193:   PetscLogMPIMessages(dat->leafdegree, dat->leafcounts, unit, dat->rootdegree, dat->rootcounts, unit);
194:   *out = link;
195:   return 0;
196: }

198: static PetscErrorCode PetscSFReduceBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
199: {
200:   PetscSFLink link = NULL;

202:   PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link);
203:   PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2../../../../../.., rootdata, (void *)leafdata, op);
204:   return 0;
205: }

207: static PetscErrorCode PetscSFFetchAndOpBegin_Neighbor(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
208: {
209:   PetscSFLink link = NULL;

211:   PetscSFLeafToRootBegin_Neighbor(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link);
212:   PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op);
213:   return 0;
214: }

216: static PetscErrorCode PetscSFFetchAndOpEnd_Neighbor(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
217: {
218:   PetscSFLink       link    = NULL;
219:   MPI_Comm          comm    = MPI_COMM_NULL;
220:   PetscSF_Neighbor *dat     = (PetscSF_Neighbor *)sf->data;
221:   void             *rootbuf = NULL, *leafbuf = NULL;

223:   PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link);
224:   PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2../../../../../..);
225:   /* Process remote fetch-and-op */
226:   PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op);
227:   /* Bcast the updated rootbuf back to leaves */
228:   PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
229:   PetscSFGetDistComm_Neighbor(sf, PETSCSF_../../../../../..2LEAF, &comm);
230:   PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_../../../../../..2LEAF, &rootbuf, &leafbuf, NULL, NULL);
231:   PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_../../../../../..2LEAF);
232:   if (dat->rootdegree || dat->leafdegree) MPIU_Neighbor_alltoallv(rootbuf, dat->rootcounts, dat->rootdispls, unit, leafbuf, dat->leafcounts, dat->leafdispls, unit, comm);
233:   PetscLogMPIMessages(dat->rootdegree, dat->rootcounts, unit, dat->leafdegree, dat->leafcounts, unit);
234:   PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_FALSE /* host2device after recving */);
235:   PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE);
236:   PetscSFLinkReclaim(sf, &link);
237:   return 0;
238: }

240: PETSC_INTERN PetscErrorCode PetscSFCreate_Neighbor(PetscSF sf)
241: {
242:   PetscSF_Neighbor *dat;

244:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
245:   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
246:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
247:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
248:   sf->ops->View                 = PetscSFView_Basic;

250:   sf->ops->SetUp           = PetscSFSetUp_Neighbor;
251:   sf->ops->Reset           = PetscSFReset_Neighbor;
252:   sf->ops->Destroy         = PetscSFDestroy_Neighbor;
253:   sf->ops->BcastBegin      = PetscSFBcastBegin_Neighbor;
254:   sf->ops->ReduceBegin     = PetscSFReduceBegin_Neighbor;
255:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Neighbor;
256:   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Neighbor;

258:   PetscNew(&dat);
259:   sf->data = (void *)dat;
260:   return 0;
261: }