Actual source code: sfbasic.c

  1: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
  2: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  3: #include <petsc/private/viewerimpl.h>

  5: // Init persistent MPI send/recv requests
  6: static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
  7: {
  8:   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
  9:   PetscInt           cnt;
 10:   PetscMPIInt        nrootranks, ndrootranks, nleafranks, ndleafranks;
 11:   const PetscInt    *rootoffset, *leafoffset;
 12:   MPI_Aint           disp;
 13:   MPI_Comm           comm          = PetscObjectComm((PetscObject)sf);
 14:   MPI_Datatype       unit          = link->unit;
 15:   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
 16:   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;

 18:   PetscFunctionBegin;
 19:   if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
 20:     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
 21:     if (direction == PETSCSF_LEAF2ROOT) {
 22:       for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
 23:         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
 24:         cnt  = rootoffset[i + 1] - rootoffset[i];
 25:         PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
 26:       }
 27:     } else { /* PETSCSF_ROOT2LEAF */
 28:       for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
 29:         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
 30:         cnt  = rootoffset[i + 1] - rootoffset[i];
 31:         PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
 32:       }
 33:     }
 34:     link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
 35:   }

 37:   if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) {
 38:     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
 39:     if (direction == PETSCSF_LEAF2ROOT) {
 40:       for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
 41:         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
 42:         cnt  = leafoffset[i + 1] - leafoffset[i];
 43:         PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
 44:       }
 45:     } else { /* PETSCSF_ROOT2LEAF */
 46:       for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
 47:         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
 48:         cnt  = leafoffset[i + 1] - leafoffset[i];
 49:         PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
 50:       }
 51:     }
 52:     link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE;
 53:   }
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
 58: static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
 59: {
 60:   PetscMPIInt    nsreqs = 0, nrreqs = 0;
 61:   MPI_Request   *sreqs = NULL, *rreqs = NULL;
 62:   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
 63:   PetscInt       sbuflen, rbuflen;

 65:   PetscFunctionBegin;
 66:   rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
 67:   if (rbuflen) {
 68:     if (direction == PETSCSF_ROOT2LEAF) {
 69:       nrreqs = sf->nleafreqs;
 70:       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs));
 71:     } else { /* leaf to root */
 72:       nrreqs = bas->nrootreqs;
 73:       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL));
 74:     }
 75:   }

 77:   sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
 78:   if (sbuflen) {
 79:     if (direction == PETSCSF_ROOT2LEAF) {
 80:       nsreqs = bas->nrootreqs;
 81:       PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
 82:       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL));
 83:     } else { /* leaf to root */
 84:       nsreqs = sf->nleafreqs;
 85:       PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
 86:       PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs));
 87:     }
 88:   }
 89:   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready
 90:   if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs));
 91:   if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs));
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: #if defined(PETSC_HAVE_MPIX_STREAM)
 96: // issue MPIX_Isend/Irecv_enqueue()
 97: static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
 98: {
 99:   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
100:   PetscInt           i, j;
101:   PetscMPIInt        nrootranks, ndrootranks, nleafranks, ndleafranks, cnt;
102:   const PetscInt    *rootoffset, *leafoffset;
103:   MPI_Aint           disp;
104:   MPI_Comm           stream_comm   = sf->stream_comm;
105:   MPI_Datatype       unit          = link->unit;
106:   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
107:   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;

109:   PetscFunctionBegin;
110:   if (bas->rootbuflen[PETSCSF_REMOTE]) {
111:     PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
112:     if (direction == PETSCSF_LEAF2ROOT) {
113:       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
114:         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
115:         cnt  = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
116:         PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
117:       }
118:     } else { // PETSCSF_ROOT2LEAF
119:       for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
120:         disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
121:         cnt  = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
122:         // no need to sync the gpu stream!
123:         PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
124:       }
125:     }
126:   }

128:   if (sf->leafbuflen[PETSCSF_REMOTE]) {
129:     PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
130:     if (direction == PETSCSF_LEAF2ROOT) {
131:       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
132:         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
133:         cnt  = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
134:         // no need to sync the gpu stream!
135:         PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
136:       }
137:     } else { // PETSCSF_ROOT2LEAF
138:       for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
139:         disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
140:         cnt  = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
141:         PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
142:       }
143:     }
144:   }
145:   PetscFunctionReturn(PETSC_SUCCESS);
146: }

148: static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
149: {
150:   PetscSF_Basic     *bas           = (PetscSF_Basic *)sf->data;
151:   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
152:   const PetscInt     rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;

154:   PetscFunctionBegin;
155:   PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
156:   PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
157:   PetscFunctionReturn(PETSC_SUCCESS);
158: }
159: #endif

161: static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link)
162: {
163:   PetscFunctionBegin;
164:   link->InitMPIRequests    = PetscSFLinkInitMPIRequests_Persistent_Basic;
165:   link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic;
166: #if defined(PETSC_HAVE_MPIX_STREAM)
167:   const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
168:   if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
169:     link->StartCommunication  = PetscSFLinkStartCommunication_MPIX_Stream;
170:     link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream;
171:   }
172: #endif
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: /*===================================================================================*/
177: /*              SF public interface implementations                                  */
178: /*===================================================================================*/
179: PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
180: {
181:   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
182:   PetscInt      *rlengths, *ilengths;
183:   PetscMPIInt    nRemoteRootRanks, nRemoteLeafRanks;
184:   PetscMPIInt    rank, niranks, *iranks, tag;
185:   MPI_Comm       comm;
186:   MPI_Group      group;
187:   MPI_Request   *rootreqs, *leafreqs;

189:   PetscFunctionBegin;
190:   PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
191:   PetscCall(PetscSFSetUpRanks(sf, group));
192:   PetscCallMPI(MPI_Group_free(&group));
193:   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
194:   PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
195:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
196:   /*
197:    * Inform roots about how many leaves and from which ranks
198:    */
199:   PetscCall(PetscMalloc1(sf->nranks, &rlengths));
200:   /* Determine number, sending ranks and length of incoming */
201:   for (PetscMPIInt i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
202:   nRemoteRootRanks = sf->nranks - sf->ndranks;
203:   PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths));

205:   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
206:      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
207:      small and the sorting is cheap.
208:    */
209:   PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));

211:   /* Partition into distinguished and non-distinguished incoming ranks */
212:   bas->ndiranks = sf->ndranks;
213:   bas->niranks  = bas->ndiranks + niranks;
214:   PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
215:   bas->ioffset[0] = 0;
216:   for (PetscMPIInt i = 0; i < bas->ndiranks; i++) {
217:     bas->iranks[i]      = sf->ranks[i];
218:     bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
219:   }
220:   PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
221:   for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) {
222:     bas->iranks[i]      = iranks[i - bas->ndiranks];
223:     bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
224:   }
225:   bas->itotal = bas->ioffset[bas->niranks];
226:   PetscCall(PetscFree(rlengths));
227:   PetscCall(PetscFree(iranks));
228:   PetscCall(PetscFree(ilengths));

230:   /* Send leaf identities to roots */
231:   nRemoteLeafRanks = bas->niranks - bas->ndiranks;
232:   PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
233:   PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
234:   for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
235:   for (PetscMPIInt i = 0; i < sf->nranks; i++) {
236:     PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
237:     if (i < sf->ndranks) {
238:       PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
239:       PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
240:       PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
241:       PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
242:       continue;
243:     }
244:     PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
245:   }
246:   PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
247:   PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));

249:   sf->nleafreqs  = nRemoteRootRanks;
250:   bas->nrootreqs = nRemoteLeafRanks;

252:   /* Setup fields related to packing, such as rootbuflen[] */
253:   PetscCall(PetscSFSetUpPackFields(sf));
254:   PetscCall(PetscFree2(rootreqs, leafreqs));
255:   PetscFunctionReturn(PETSC_SUCCESS);
256: }

258: PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
259: {
260:   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
261:   PetscSFLink    link = bas->avail, next;

263:   PetscFunctionBegin;
264:   PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
265:   PetscCall(PetscFree2(bas->iranks, bas->ioffset));
266:   PetscCall(PetscFree(bas->irootloc));

268: #if defined(PETSC_HAVE_DEVICE)
269:   for (int i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
270: #endif

272: #if defined(PETSC_HAVE_NVSHMEM)
273:   PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
274: #endif

276:   for (; link; link = next) {
277:     next = link->next;
278:     PetscCall(PetscSFLinkDestroy(sf, link));
279:   }
280:   bas->avail = NULL;
281:   PetscCall(PetscSFResetPackFields(sf));
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
286: {
287:   PetscFunctionBegin;
288:   PetscCall(PetscSFReset_Basic(sf));
289:   PetscCall(PetscFree(sf->data));
290:   PetscFunctionReturn(PETSC_SUCCESS);
291: }

293: #if defined(PETSC_USE_SINGLE_LIBRARY)
294: #include <petscmat.h>

296: PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
297: {
298:   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
299:   PetscMPIInt        nrootranks, ndrootranks;
300:   const PetscInt    *rootoffset;
301:   PetscMPIInt        rank, size;
302:   const PetscMPIInt *rootranks;
303:   MPI_Comm           comm = PetscObjectComm((PetscObject)sf);
304:   PetscScalar        unitbytes;
305:   Mat                A;

307:   PetscFunctionBegin;
308:   PetscCallMPI(MPI_Comm_size(comm, &size));
309:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
310:   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
311:     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
312:     in senders' view point: how many bytes I will send to my neighbors.

314:     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.

316:     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
317:     different interpretations for the same SF for different data types. Since we most care about VecScatter,
318:     we uniformly treat each vertex as a PetscScalar.
319:   */
320:   unitbytes = (PetscScalar)sizeof(PetscScalar);

322:   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
323:   PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
324:   PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
325:   for (PetscMPIInt i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
326:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
327:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
328:   PetscCall(MatView(A, viewer));
329:   PetscCall(MatDestroy(&A));
330:   PetscFunctionReturn(PETSC_SUCCESS);
331: }
332: #endif

334: PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
335: {
336:   PetscBool isascii;

338:   PetscFunctionBegin;
339:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
340:   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "  MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
341: #if defined(PETSC_USE_SINGLE_LIBRARY)
342:   else {
343:     PetscBool isdraw, isbinary;
344:     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
345:     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
346:     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
347:   }
348: #endif
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: }

352: PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
353: {
354:   PetscSFLink link = NULL;

356:   PetscFunctionBegin;
357:   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
358:   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
359:   /* Pack rootdata to rootbuf for remote communication */
360:   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
361:   /* Start communication, e.g., post MPIU_Isend */
362:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
363:   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
364:   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
365:   PetscFunctionReturn(PETSC_SUCCESS);
366: }

368: PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
369: {
370:   PetscSFLink link = NULL;

372:   PetscFunctionBegin;
373:   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
374:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
375:   /* Finish remote communication, e.g., post MPI_Waitall */
376:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
377:   /* Unpack data in leafbuf to leafdata for remote communication */
378:   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
379:   /* Recycle the link */
380:   PetscCall(PetscSFLinkReclaim(sf, &link));
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384: /* Shared by ReduceBegin and FetchAndOpBegin */
385: static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
386: {
387:   PetscSFLink link = NULL;

389:   PetscFunctionBegin;
390:   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
391:   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
392:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
393:   *out = link;
394:   PetscFunctionReturn(PETSC_SUCCESS);
395: }

397: /* leaf -> root with reduction */
398: PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
399: {
400:   PetscSFLink link = NULL;

402:   PetscFunctionBegin;
403:   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
404:   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
405:   PetscFunctionReturn(PETSC_SUCCESS);
406: }

408: PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
409: {
410:   PetscSFLink link = NULL;

412:   PetscFunctionBegin;
413:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
414:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
415:   PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
416:   PetscCall(PetscSFLinkReclaim(sf, &link));
417:   PetscFunctionReturn(PETSC_SUCCESS);
418: }

420: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
421: {
422:   PetscSFLink link = NULL;

424:   PetscFunctionBegin;
425:   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
426:   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
427:   PetscFunctionReturn(PETSC_SUCCESS);
428: }

430: PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
431: {
432:   PetscSFLink link = NULL;

434:   PetscFunctionBegin;
435:   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
436:   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
437:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
438:   /* Do fetch-and-op, the (remote) update results are in rootbuf */
439:   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
440:   /* Bcast rootbuf to leafupdate */
441:   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
442:   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
443:   /* Unpack and insert fetched data into leaves */
444:   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
445:   PetscCall(PetscSFLinkReclaim(sf, &link));
446:   PetscFunctionReturn(PETSC_SUCCESS);
447: }

449: PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
450: {
451:   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;

453:   PetscFunctionBegin;
454:   if (niranks) *niranks = bas->niranks;
455:   if (iranks) *iranks = bas->iranks;
456:   if (ioffset) *ioffset = bas->ioffset;
457:   if (irootloc) *irootloc = bas->irootloc;
458:   PetscFunctionReturn(PETSC_SUCCESS);
459: }

461: /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
462:    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
463:    was sorted before calling the routine.
464:  */
465: PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
466: {
467:   PetscSF            esf;
468:   PetscInt          *esf_roffset, *esf_rmine, *esf_rremote;
469:   PetscInt           j, p, q, nroots, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
470:   char              *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
471:   PetscMPIInt       *esf_ranks, nranks, ndranks, niranks, esf_nranks, esf_ndranks, ndiranks;
472:   const PetscMPIInt *ranks, *iranks;
473:   const PetscInt    *roffset, *rmine, *rremote, *ioffset, *irootloc;
474:   PetscBool          connected;
475:   PetscSFNode       *new_iremote;
476:   PetscSF_Basic     *bas;

478:   PetscFunctionBegin;
479:   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
480:   PetscCall(PetscSFSetFromOptions(esf));
481:   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */

483:   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
484:   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
485:   PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
486:   maxlocal = maxleaf - minleaf + 1;
487:   PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
488:   leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
489:   /* Tag selected roots */
490:   for (PetscInt i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;

492:   PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
493:   PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
494:   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
495:   esf_nranks = esf_ndranks = esf_nleaves = 0;
496:   for (PetscMPIInt i = 0; i < nranks; i++) {
497:     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
498:     for (j = roffset[i]; j < roffset[i + 1]; j++) {
499:       if (leafdata[rmine[j]]) {
500:         esf_nleaves++;
501:         connected = PETSC_TRUE;
502:       }
503:     }
504:     if (connected) {
505:       esf_nranks++;
506:       if (i < ndranks) esf_ndranks++;
507:     }
508:   }

510:   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
511:   PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
512:   PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
513:   PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
514:   p              = 0; /* Counter for connected root ranks */
515:   q              = 0; /* Counter for connected leaves */
516:   esf_roffset[0] = 0;
517:   for (PetscMPIInt i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
518:     connected = PETSC_FALSE;
519:     for (j = roffset[i]; j < roffset[i + 1]; j++) {
520:       if (leafdata[rmine[j]]) {
521:         esf_rmine[q] = new_ilocal[q] = rmine[j];
522:         esf_rremote[q]               = rremote[j];
523:         new_iremote[q].index         = rremote[j];
524:         new_iremote[q].rank          = ranks[i];
525:         connected                    = PETSC_TRUE;
526:         q++;
527:       }
528:     }
529:     if (connected) {
530:       esf_ranks[p]       = ranks[i];
531:       esf_roffset[p + 1] = q;
532:       p++;
533:     }
534:   }

536:   /* SetGraph internally resets the SF, so we only set its fields after the call */
537:   PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
538:   esf->nranks    = esf_nranks;
539:   esf->ndranks   = esf_ndranks;
540:   esf->ranks     = esf_ranks;
541:   esf->roffset   = esf_roffset;
542:   esf->rmine     = esf_rmine;
543:   esf->rremote   = esf_rremote;
544:   esf->nleafreqs = esf_nranks - esf_ndranks;

546:   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
547:   bas = (PetscSF_Basic *)esf->data;
548:   PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
549:   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
550:      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
551:    */
552:   PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
553:   PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
554:   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
555:   p                                              = 0; /* Counter for connected leaf ranks */
556:   q                                              = 0; /* Counter for connected roots */
557:   for (PetscMPIInt i = 0; i < niranks; i++) {
558:     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
559:     for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
560:       if (rootdata[irootloc[j]]) {
561:         bas->irootloc[q++] = irootloc[j];
562:         connected          = PETSC_TRUE;
563:       }
564:     }
565:     if (connected) {
566:       bas->niranks++;
567:       if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
568:       bas->iranks[p]      = iranks[i];
569:       bas->ioffset[p + 1] = q;
570:       p++;
571:     }
572:   }
573:   bas->itotal     = q;
574:   bas->nrootreqs  = bas->niranks - bas->ndiranks;
575:   esf->persistent = PETSC_TRUE;
576:   /* Setup packing related fields */
577:   PetscCall(PetscSFSetUpPackFields(esf));

579:   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
580: #if defined(PETSC_HAVE_CUDA)
581:   if (esf->backend == PETSCSF_BACKEND_CUDA) {
582:     esf->ops->Malloc = PetscSFMalloc_CUDA;
583:     esf->ops->Free   = PetscSFFree_CUDA;
584:   }
585: #endif

587: #if defined(PETSC_HAVE_HIP)
588:   /* TODO: Needs debugging */
589:   if (esf->backend == PETSCSF_BACKEND_HIP) {
590:     esf->ops->Malloc = PetscSFMalloc_HIP;
591:     esf->ops->Free   = PetscSFFree_HIP;
592:   }
593: #endif

595: #if defined(PETSC_HAVE_KOKKOS)
596:   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
597:     esf->ops->Malloc = PetscSFMalloc_Kokkos;
598:     esf->ops->Free   = PetscSFFree_Kokkos;
599:   }
600: #endif
601:   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
602:   PetscCall(PetscFree2(rootdata, leafmem));
603:   *newsf = esf;
604:   PetscFunctionReturn(PETSC_SUCCESS);
605: }

607: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
608: {
609:   PetscSF_Basic *dat;

611:   PetscFunctionBegin;
612:   sf->ops->SetUp                = PetscSFSetUp_Basic;
613:   sf->ops->Reset                = PetscSFReset_Basic;
614:   sf->ops->Destroy              = PetscSFDestroy_Basic;
615:   sf->ops->View                 = PetscSFView_Basic;
616:   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
617:   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
618:   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
619:   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
620:   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
621:   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
622:   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
623:   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
624:   sf->ops->SetCommunicationOps  = PetscSFSetCommunicationOps_Basic;

626:   sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv
627:   sf->collective = PETSC_FALSE;

629:   PetscCall(PetscNew(&dat));
630:   sf->data = (void *)dat;
631:   PetscFunctionReturn(PETSC_SUCCESS);
632: }