Actual source code: sfalltoall.c

  1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
  2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
  3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>

  5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
  6: typedef PetscSF_Allgatherv PetscSF_Alltoall;

  8: /*===================================================================================*/
  9: /*              Implementations of SF public APIs                                    */
 10: /*===================================================================================*/
 11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
 12: {
 13:   PetscInt i;

 15:   if (nroots) *nroots = sf->nroots;
 16:   if (nleaves) *nleaves = sf->nleaves;
 17:   if (ilocal) *ilocal = NULL; /* Contiguous local indices */
 18:   if (iremote) {
 19:     if (!sf->remote) {
 20:       PetscMalloc1(sf->nleaves, &sf->remote);
 21:       sf->remote_alloc = sf->remote;
 22:       for (i = 0; i < sf->nleaves; i++) {
 23:         sf->remote[i].rank  = i;
 24:         sf->remote[i].index = i;
 25:       }
 26:     }
 27:     *iremote = sf->remote;
 28:   }
 29:   return 0;
 30: }

 32: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
 33: {
 34:   PetscSFLink  link;
 35:   MPI_Comm     comm;
 36:   void        *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
 37:   MPI_Request *req;

 39:   PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link);
 40:   PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata);
 41:   PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
 42:   PetscObjectGetComm((PetscObject)sf, &comm);
 43:   PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_../../../../../..2LEAF, &rootbuf, &leafbuf, &req, NULL);
 44:   PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_../../../../../..2LEAF);
 45:   MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req);
 46:   return 0;
 47: }

 49: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
 50: {
 51:   PetscSFLink  link;
 52:   MPI_Comm     comm;
 53:   void        *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */
 54:   MPI_Request *req;

 56:   PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link);
 57:   PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata);
 58:   PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */);
 59:   PetscObjectGetComm((PetscObject)sf, &comm);
 60:   PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2../../../../../.., &rootbuf, &leafbuf, &req, NULL);
 61:   PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2../../../../../..);
 62:   MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req);
 63:   return 0;
 64: }

 66: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out)
 67: {
 68:   PetscInt     nroots = 1, nleaves = 1, *ilocal;
 69:   PetscSFNode *iremote = NULL;
 70:   PetscSF      lsf;
 71:   PetscMPIInt  rank;

 73:   nroots  = 1;
 74:   nleaves = 1;
 75:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
 76:   PetscMalloc1(nleaves, &ilocal);
 77:   PetscMalloc1(nleaves, &iremote);
 78:   ilocal[0]        = rank;
 79:   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
 80:   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */

 82:   PetscSFCreate(PETSC_COMM_SELF, &lsf);
 83:   PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);
 84:   PetscSFSetUp(lsf);
 85:   *out = lsf;
 86:   return 0;
 87: }

 89: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
 90: {
 91:   PetscInt       i, *tmproots, *ilocal, ndranks, ndiranks;
 92:   PetscSFNode   *iremote;
 93:   PetscMPIInt    nroots, *roots, nleaves, *leaves, rank;
 94:   MPI_Comm       comm;
 95:   PetscSF_Basic *bas;
 96:   PetscSF        esf;

 98:   PetscObjectGetComm((PetscObject)sf, &comm);
 99:   MPI_Comm_rank(comm, &rank);

101:   /* Uniq selected[] and store the result in roots[] */
102:   PetscMalloc1(nselected, &tmproots);
103:   PetscArraycpy(tmproots, selected, nselected);
104:   PetscSortRemoveDupsInt(&nselected, tmproots); /* nselected might be changed */
106:   nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */
107:   PetscMalloc1(nselected, &roots);
108:   for (i = 0; i < nselected; i++) roots[i] = tmproots[i];
109:   PetscFree(tmproots);

111:   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
112:   PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/);

114:   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
115:   ndranks = 0;
116:   for (i = 0; i < nleaves; i++) {
117:     if (leaves[i] == rank) {
118:       leaves[i] = -rank;
119:       ndranks   = 1;
120:       break;
121:     }
122:   }
123:   PetscSortMPIInt(nleaves, leaves);
124:   if (nleaves && leaves[0] < 0) leaves[0] = rank;

126:   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
127:   PetscMalloc1(nleaves, &ilocal);
128:   PetscMalloc1(nleaves, &iremote);
129:   for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */
130:     ilocal[i]        = leaves[i];
131:     iremote[i].rank  = leaves[i];
132:     iremote[i].index = leaves[i];
133:   }
134:   PetscSFCreate(comm, &esf);
135:   PetscSFSetType(esf, PETSCSFBASIC); /* This optimized routine can only create a basic sf */
136:   PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);

138:   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
139:   PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote);
140:   esf->nranks     = nleaves;
141:   esf->ndranks    = ndranks;
142:   esf->roffset[0] = 0;
143:   for (i = 0; i < nleaves; i++) {
144:     esf->ranks[i]       = leaves[i];
145:     esf->roffset[i + 1] = i + 1;
146:     esf->rmine[i]       = leaves[i];
147:     esf->rremote[i]     = leaves[i];
148:   }

150:   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
151:   bas = (PetscSF_Basic *)esf->data;
152:   PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset);
153:   PetscMalloc1(nroots, &bas->irootloc);
154:   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
155:   ndiranks = 0;
156:   for (i = 0; i < nroots; i++) {
157:     if (roots[i] == rank) {
158:       roots[i] = -rank;
159:       ndiranks = 1;
160:       break;
161:     }
162:   }
163:   PetscSortMPIInt(nroots, roots);
164:   if (nroots && roots[0] < 0) roots[0] = rank;

166:   bas->niranks    = nroots;
167:   bas->ndiranks   = ndiranks;
168:   bas->ioffset[0] = 0;
169:   bas->itotal     = nroots;
170:   for (i = 0; i < nroots; i++) {
171:     bas->iranks[i]      = roots[i];
172:     bas->ioffset[i + 1] = i + 1;
173:     bas->irootloc[i]    = roots[i];
174:   }

176:   /* See PetscSFCreateEmbeddedRootSF_Basic */
177:   esf->nleafreqs  = esf->nranks - esf->ndranks;
178:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
179:   esf->persistent = PETSC_TRUE;
180:   /* Setup packing related fields */
181:   PetscSFSetUpPackFields(esf);

183:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
184:   *newsf           = esf;
185:   return 0;
186: }

188: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
189: {
190:   PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data;

192:   sf->ops->BcastEnd  = PetscSFBcastEnd_Basic;
193:   sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;

195:   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
196:   sf->ops->Destroy       = PetscSFDestroy_Allgatherv;
197:   sf->ops->Reset         = PetscSFReset_Allgatherv;
198:   sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
199:   sf->ops->GetRootRanks  = PetscSFGetRootRanks_Allgatherv;

201:   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
202:   sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
203:   sf->ops->SetUp        = PetscSFSetUp_Allgather;

205:   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
206:   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;

208:   /* Alltoall stuff */
209:   sf->ops->GetGraph             = PetscSFGetGraph_Alltoall;
210:   sf->ops->BcastBegin           = PetscSFBcastBegin_Alltoall;
211:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Alltoall;
212:   sf->ops->CreateLocalSF        = PetscSFCreateLocalSF_Alltoall;
213:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;

215:   PetscNew(&dat);
216:   sf->data = (void *)dat;
217:   return 0;
218: }