Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petscctable.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_USE_DEBUG)
 15: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
 16:     if (PetscUnlikely(!(sf)->graphset))                              \
 17:       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
 18:   } while (0)
 19: #else
 20: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 21: #endif

 23: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};

 25: PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
 26: {
 29:   *type = PETSC_MEMTYPE_HOST;
 30: #if defined(PETSC_HAVE_CUDA)
 31:   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA) && data) {
 32:     cudaError_t                  cerr;
 33:     struct cudaPointerAttributes attr;
 34:     enum cudaMemoryType          mtype;
 35:     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
 36:     cudaGetLastError(); /* Reset the last error */
 37:     #if (CUDART_VERSION < 10000)
 38:       mtype = attr.memoryType;
 39:     #else
 40:       mtype = attr.type;
 41:     #endif
 42:     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 43:   }
 44: #endif

 46: #if defined(PETSC_HAVE_HIP)
 47:   if (PetscDeviceInitialized(PETSC_DEVICE_HIP) && data) {
 48:     hipError_t                   cerr;
 49:     struct hipPointerAttribute_t attr;
 50:     enum hipMemoryType           mtype;
 51:     cerr = hipPointerGetAttributes(&attr,data);
 52:     hipGetLastError(); /* Reset the last error */
 53:     mtype = attr.memoryType;
 54:     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
 55:   }
 56: #endif
 57:   return(0);
 58: }

 60: /*@
 61:    PetscSFCreate - create a star forest communication context

 63:    Collective

 65:    Input Parameter:
 66: .  comm - communicator on which the star forest will operate

 68:    Output Parameter:
 69: .  sf - new star forest context

 71:    Options Database Keys:
 72: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 73: .  -sf_type window    -Use MPI-3 one-sided window for communication
 74: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 76:    Level: intermediate

 78:    Notes:
 79:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 80:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 81:    SFs are optimized and they have better performance than general SFs.

 83: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 84: @*/
 85: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 86: {
 88:   PetscSF        b;

 92:   PetscSFInitializePackage();

 94:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 96:   b->nroots    = -1;
 97:   b->nleaves   = -1;
 98:   b->minleaf   = PETSC_MAX_INT;
 99:   b->maxleaf   = PETSC_MIN_INT;
100:   b->nranks    = -1;
101:   b->rankorder = PETSC_TRUE;
102:   b->ingroup   = MPI_GROUP_NULL;
103:   b->outgroup  = MPI_GROUP_NULL;
104:   b->graphset  = PETSC_FALSE;
105: #if defined(PETSC_HAVE_DEVICE)
106:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
107:   b->use_stream_aware_mpi = PETSC_FALSE;
108:   b->unknown_input_stream= PETSC_FALSE;
109:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
110:     b->backend = PETSCSF_BACKEND_KOKKOS;
111:   #elif defined(PETSC_HAVE_CUDA)
112:     b->backend = PETSCSF_BACKEND_CUDA;
113:   #elif defined(PETSC_HAVE_HIP)
114:     b->backend = PETSCSF_BACKEND_HIP;
115:   #endif

117:   #if defined(PETSC_HAVE_NVSHMEM)
118:     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
119:     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
120:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
121:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
122:   #endif
123: #endif
124:   b->vscat.from_n = -1;
125:   b->vscat.to_n   = -1;
126:   b->vscat.unit   = MPIU_SCALAR;
127:  *sf = b;
128:   return(0);
129: }

131: /*@
132:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

134:    Collective

136:    Input Parameter:
137: .  sf - star forest

139:    Level: advanced

141: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
142: @*/
143: PetscErrorCode PetscSFReset(PetscSF sf)
144: {

149:   if (sf->ops->Reset) {(*sf->ops->Reset)(sf);}
150:   sf->nroots   = -1;
151:   sf->nleaves  = -1;
152:   sf->minleaf  = PETSC_MAX_INT;
153:   sf->maxleaf  = PETSC_MIN_INT;
154:   sf->mine     = NULL;
155:   sf->remote   = NULL;
156:   sf->graphset = PETSC_FALSE;
157:   PetscFree(sf->mine_alloc);
158:   PetscFree(sf->remote_alloc);
159:   sf->nranks = -1;
160:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
161:   sf->degreeknown = PETSC_FALSE;
162:   PetscFree(sf->degree);
163:   if (sf->ingroup  != MPI_GROUP_NULL) {MPI_Group_free(&sf->ingroup);}
164:   if (sf->outgroup != MPI_GROUP_NULL) {MPI_Group_free(&sf->outgroup);}
165:   if (sf->multi) sf->multi->multi = NULL;
166:   PetscSFDestroy(&sf->multi);
167:   PetscLayoutDestroy(&sf->map);

169:  #if defined(PETSC_HAVE_DEVICE)
170:   for (PetscInt i=0; i<2; i++) {PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);}
171:  #endif

173:   sf->setupcalled = PETSC_FALSE;
174:   return(0);
175: }

177: /*@C
178:    PetscSFSetType - Set the PetscSF communication implementation

180:    Collective on PetscSF

182:    Input Parameters:
183: +  sf - the PetscSF context
184: -  type - a known method

186:    Options Database Key:
187: .  -sf_type <type> - Sets the method; use -help for a list
188:    of available methods (for instance, window, basic, neighbor)

190:    Notes:
191:    See "include/petscsf.h" for available methods (for instance)
192: +    PETSCSFWINDOW - MPI-2/3 one-sided
193: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

195:   Level: intermediate

197: .seealso: PetscSFType, PetscSFCreate()
198: @*/
199: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
200: {
201:   PetscErrorCode ierr,(*r)(PetscSF);
202:   PetscBool      match;


208:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
209:   if (match) return(0);

211:   PetscFunctionListFind(PetscSFList,type,&r);
212:   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
213:   /* Destroy the previous PetscSF implementation context */
214:   if (sf->ops->Destroy) {(*(sf)->ops->Destroy)(sf);}
215:   PetscMemzero(sf->ops,sizeof(*sf->ops));
216:   PetscObjectChangeTypeName((PetscObject)sf,type);
217:   (*r)(sf);
218:   return(0);
219: }

221: /*@C
222:   PetscSFGetType - Get the PetscSF communication implementation

224:   Not Collective

226:   Input Parameter:
227: . sf  - the PetscSF context

229:   Output Parameter:
230: . type - the PetscSF type name

232:   Level: intermediate

234: .seealso: PetscSFSetType(), PetscSFCreate()
235: @*/
236: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
237: {
241:   *type = ((PetscObject)sf)->type_name;
242:   return(0);
243: }

245: /*@C
246:    PetscSFDestroy - destroy star forest

248:    Collective

250:    Input Parameter:
251: .  sf - address of star forest

253:    Level: intermediate

255: .seealso: PetscSFCreate(), PetscSFReset()
256: @*/
257: PetscErrorCode PetscSFDestroy(PetscSF *sf)
258: {

262:   if (!*sf) return(0);
264:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return(0);}
265:   PetscSFReset(*sf);
266:   if ((*sf)->ops->Destroy) {(*(*sf)->ops->Destroy)(*sf);}
267:   PetscSFDestroy(&(*sf)->vscat.lsf);
268:   if ((*sf)->vscat.bs > 1) {MPI_Type_free(&(*sf)->vscat.unit);}
269:   PetscHeaderDestroy(sf);
270:   return(0);
271: }

273: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
274: {
275:   PetscInt           i, nleaves;
276:   PetscMPIInt        size;
277:   const PetscInt    *ilocal;
278:   const PetscSFNode *iremote;
279:   PetscErrorCode     ierr;

282:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return(0);
283:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
284:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
285:   for (i = 0; i < nleaves; i++) {
286:     const PetscInt rank = iremote[i].rank;
287:     const PetscInt remote = iremote[i].index;
288:     const PetscInt leaf = ilocal ? ilocal[i] : i;
289:     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
290:     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
291:     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
292:   }
293:   return(0);
294: }

296: /*@
297:    PetscSFSetUp - set up communication structures

299:    Collective

301:    Input Parameter:
302: .  sf - star forest communication object

304:    Level: beginner

306: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
307: @*/
308: PetscErrorCode PetscSFSetUp(PetscSF sf)
309: {

314:   PetscSFCheckGraphSet(sf,1);
315:   if (sf->setupcalled) return(0);
316:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
317:   PetscSFCheckGraphValid_Private(sf);
318:   if (!((PetscObject)sf)->type_name) {PetscSFSetType(sf,PETSCSFBASIC);} /* Zero all sf->ops */
319:   if (sf->ops->SetUp) {(*sf->ops->SetUp)(sf);}
320: #if defined(PETSC_HAVE_CUDA)
321:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
322:     sf->ops->Malloc = PetscSFMalloc_CUDA;
323:     sf->ops->Free   = PetscSFFree_CUDA;
324:   }
325: #endif
326: #if defined(PETSC_HAVE_HIP)
327:   if (sf->backend == PETSCSF_BACKEND_HIP) {
328:     sf->ops->Malloc = PetscSFMalloc_HIP;
329:     sf->ops->Free   = PetscSFFree_HIP;
330:   }
331: #endif

333: #
334: #if defined(PETSC_HAVE_KOKKOS)
335:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
336:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
337:     sf->ops->Free   = PetscSFFree_Kokkos;
338:   }
339: #endif
340:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
341:   sf->setupcalled = PETSC_TRUE;
342:   return(0);
343: }

345: /*@
346:    PetscSFSetFromOptions - set PetscSF options using the options database

348:    Logically Collective

350:    Input Parameter:
351: .  sf - star forest

353:    Options Database Keys:
354: +  -sf_type               - implementation type, see PetscSFSetType()
355: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
356: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
357:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
358:                             If true, this option only works with -use_gpu_aware_mpi 1.
359: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
360:                                If true, this option only works with -use_gpu_aware_mpi 1.

362: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
363:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
364:                               the only available is kokkos.

366:    Level: intermediate
367: @*/
368: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
369: {
370:   PetscSFType    deft;
371:   char           type[256];
373:   PetscBool      flg;

377:   PetscObjectOptionsBegin((PetscObject)sf);
378:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
379:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
380:   PetscSFSetType(sf,flg ? type : deft);
381:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
382:  #if defined(PETSC_HAVE_DEVICE)
383:   {
384:     char        backendstr[32] = {0};
385:     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
386:     /* Change the defaults set in PetscSFCreate() with command line options */
387:     PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
388:     PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
389:     PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
390:     PetscStrcasecmp("cuda",backendstr,&isCuda);
391:     PetscStrcasecmp("kokkos",backendstr,&isKokkos);
392:     PetscStrcasecmp("hip",backendstr,&isHip);
393:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
394:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
395:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
396:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
397:     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
398:   #elif defined(PETSC_HAVE_KOKKOS)
399:     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
400:    #endif
401:   }
402:  #endif
403:   if (sf->ops->SetFromOptions) {(*sf->ops->SetFromOptions)(PetscOptionsObject,sf);}
404:   PetscOptionsEnd();
405:   return(0);
406: }

408: /*@
409:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

411:    Logically Collective

413:    Input Parameters:
414: +  sf - star forest
415: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

417:    Level: advanced

419: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
420: @*/
421: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
422: {
426:   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
427:   sf->rankorder = flg;
428:   return(0);
429: }

431: /*@
432:    PetscSFSetGraph - Set a parallel star forest

434:    Collective

436:    Input Parameters:
437: +  sf - star forest
438: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
439: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
440: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
441: during setup in debug mode)
442: .  localmode - copy mode for ilocal
443: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
444: during setup in debug mode)
445: -  remotemode - copy mode for iremote

447:    Level: intermediate

449:    Notes:
450:     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode

452:    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
453:    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
454:    needed

456:    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
457:    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).

459: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
460: @*/
461: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
462: {

469:   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
470:   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);

472:   if (sf->nroots >= 0) { /* Reset only if graph already set */
473:     PetscSFReset(sf);
474:   }

476:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

478:   sf->nroots  = nroots;
479:   sf->nleaves = nleaves;

481:   if (nleaves && ilocal) {
482:     PetscInt i;
483:     PetscInt minleaf = PETSC_MAX_INT;
484:     PetscInt maxleaf = PETSC_MIN_INT;
485:     int      contiguous = 1;
486:     for (i=0; i<nleaves; i++) {
487:       minleaf = PetscMin(minleaf,ilocal[i]);
488:       maxleaf = PetscMax(maxleaf,ilocal[i]);
489:       contiguous &= (ilocal[i] == i);
490:     }
491:     sf->minleaf = minleaf;
492:     sf->maxleaf = maxleaf;
493:     if (contiguous) {
494:       if (localmode == PETSC_OWN_POINTER) {
495:         PetscFree(ilocal);
496:       }
497:       ilocal = NULL;
498:     }
499:   } else {
500:     sf->minleaf = 0;
501:     sf->maxleaf = nleaves - 1;
502:   }

504:   if (ilocal) {
505:     switch (localmode) {
506:     case PETSC_COPY_VALUES:
507:       PetscMalloc1(nleaves,&sf->mine_alloc);
508:       PetscArraycpy(sf->mine_alloc,ilocal,nleaves);
509:       sf->mine = sf->mine_alloc;
510:       break;
511:     case PETSC_OWN_POINTER:
512:       sf->mine_alloc = (PetscInt*)ilocal;
513:       sf->mine       = sf->mine_alloc;
514:       break;
515:     case PETSC_USE_POINTER:
516:       sf->mine_alloc = NULL;
517:       sf->mine       = (PetscInt*)ilocal;
518:       break;
519:     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
520:     }
521:   }

523:   switch (remotemode) {
524:   case PETSC_COPY_VALUES:
525:     PetscMalloc1(nleaves,&sf->remote_alloc);
526:     PetscArraycpy(sf->remote_alloc,iremote,nleaves);
527:     sf->remote = sf->remote_alloc;
528:     break;
529:   case PETSC_OWN_POINTER:
530:     sf->remote_alloc = (PetscSFNode*)iremote;
531:     sf->remote       = sf->remote_alloc;
532:     break;
533:   case PETSC_USE_POINTER:
534:     sf->remote_alloc = NULL;
535:     sf->remote       = (PetscSFNode*)iremote;
536:     break;
537:   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
538:   }

540:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
541:   sf->graphset = PETSC_TRUE;
542:   return(0);
543: }

545: /*@
546:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

548:   Collective

550:   Input Parameters:
551: + sf      - The PetscSF
552: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
553: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

555:   Notes:
556:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
557:   n and N are local and global sizes of x respectively.

559:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
560:   sequential vectors y on all ranks.

562:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
563:   sequential vector y on rank 0.

565:   In above cases, entries of x are roots and entries of y are leaves.

567:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
568:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
569:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
570:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
571:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

573:   In this case, roots and leaves are symmetric.

575:   Level: intermediate
576:  @*/
577: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
578: {
579:   MPI_Comm       comm;
580:   PetscInt       n,N,res[2];
581:   PetscMPIInt    rank,size;
582:   PetscSFType    type;

586:   PetscObjectGetComm((PetscObject)sf, &comm);
587:   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
588:   MPI_Comm_rank(comm,&rank);
589:   MPI_Comm_size(comm,&size);

591:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
592:     type = PETSCSFALLTOALL;
593:     PetscLayoutCreate(comm,&sf->map);
594:     PetscLayoutSetLocalSize(sf->map,size);
595:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
596:     PetscLayoutSetUp(sf->map);
597:   } else {
598:     PetscLayoutGetLocalSize(map,&n);
599:     PetscLayoutGetSize(map,&N);
600:     res[0] = n;
601:     res[1] = -n;
602:     /* Check if n are same over all ranks so that we can optimize it */
603:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
604:     if (res[0] == -res[1]) { /* same n */
605:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
606:     } else {
607:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
608:     }
609:     PetscLayoutReference(map,&sf->map);
610:   }
611:   PetscSFSetType(sf,type);

613:   sf->pattern = pattern;
614:   sf->mine    = NULL; /* Contiguous */

616:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
617:      Also set other easy stuff.
618:    */
619:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
620:     sf->nleaves      = N;
621:     sf->nroots       = n;
622:     sf->nranks       = size;
623:     sf->minleaf      = 0;
624:     sf->maxleaf      = N - 1;
625:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
626:     sf->nleaves      = rank ? 0 : N;
627:     sf->nroots       = n;
628:     sf->nranks       = rank ? 0 : size;
629:     sf->minleaf      = 0;
630:     sf->maxleaf      = rank ? -1 : N - 1;
631:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
632:     sf->nleaves      = size;
633:     sf->nroots       = size;
634:     sf->nranks       = size;
635:     sf->minleaf      = 0;
636:     sf->maxleaf      = size - 1;
637:   }
638:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
639:   sf->graphset = PETSC_TRUE;
640:   return(0);
641: }

643: /*@
644:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

646:    Collective

648:    Input Parameter:
649: .  sf - star forest to invert

651:    Output Parameter:
652: .  isf - inverse of sf

654:    Level: advanced

656:    Notes:
657:    All roots must have degree 1.

659:    The local space may be a permutation, but cannot be sparse.

661: .seealso: PetscSFSetGraph()
662: @*/
663: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
664: {
666:   PetscMPIInt    rank;
667:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
668:   const PetscInt *ilocal;
669:   PetscSFNode    *roots,*leaves;

673:   PetscSFCheckGraphSet(sf,1);

676:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
677:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

679:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
680:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
681:   for (i=0; i<maxlocal; i++) {
682:     leaves[i].rank  = rank;
683:     leaves[i].index = i;
684:   }
685:   for (i=0; i <nroots; i++) {
686:     roots[i].rank  = -1;
687:     roots[i].index = -1;
688:   }
689:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
690:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);

692:   /* Check whether our leaves are sparse */
693:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
694:   if (count == nroots) newilocal = NULL;
695:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
696:     PetscMalloc1(count,&newilocal);
697:     for (i=0,count=0; i<nroots; i++) {
698:       if (roots[i].rank >= 0) {
699:         newilocal[count]   = i;
700:         roots[count].rank  = roots[i].rank;
701:         roots[count].index = roots[i].index;
702:         count++;
703:       }
704:     }
705:   }

707:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
708:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
709:   PetscFree2(roots,leaves);
710:   return(0);
711: }

713: /*@
714:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

716:    Collective

718:    Input Parameters:
719: +  sf - communication object to duplicate
720: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

722:    Output Parameter:
723: .  newsf - new communication object

725:    Level: beginner

727: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
728: @*/
729: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
730: {
731:   PetscSFType    type;
733:   MPI_Datatype   dtype=MPIU_SCALAR;

739:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
740:   PetscSFGetType(sf,&type);
741:   if (type) {PetscSFSetType(*newsf,type);}
742:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
743:     PetscSFCheckGraphSet(sf,1);
744:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
745:       PetscInt          nroots,nleaves;
746:       const PetscInt    *ilocal;
747:       const PetscSFNode *iremote;
748:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
749:       PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);
750:     } else {
751:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
752:     }
753:   }
754:   /* Since oldtype is committed, so is newtype, according to MPI */
755:   if (sf->vscat.bs > 1) {MPI_Type_dup(sf->vscat.unit,&dtype);}
756:   (*newsf)->vscat.bs     = sf->vscat.bs;
757:   (*newsf)->vscat.unit   = dtype;
758:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
759:   (*newsf)->vscat.from_n = sf->vscat.from_n;
760:   /* Do not copy lsf. Build it on demand since it is rarely used */

762: #if defined(PETSC_HAVE_DEVICE)
763:   (*newsf)->backend              = sf->backend;
764:   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
765:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
766:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
767: #endif
768:   if (sf->ops->Duplicate) {(*sf->ops->Duplicate)(sf,opt,*newsf);}
769:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
770:   return(0);
771: }

773: /*@C
774:    PetscSFGetGraph - Get the graph specifying a parallel star forest

776:    Not Collective

778:    Input Parameter:
779: .  sf - star forest

781:    Output Parameters:
782: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
783: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
784: .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
785: -  iremote - remote locations of root vertices for each leaf on the current process

787:    Notes:
788:    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

790:    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
791:    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.

793:    Level: intermediate

795: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
796: @*/
797: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
798: {

803:   if (sf->ops->GetGraph) {
804:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
805:   } else {
806:     if (nroots) *nroots = sf->nroots;
807:     if (nleaves) *nleaves = sf->nleaves;
808:     if (ilocal) *ilocal = sf->mine;
809:     if (iremote) *iremote = sf->remote;
810:   }
811:   return(0);
812: }

814: /*@
815:    PetscSFGetLeafRange - Get the active leaf ranges

817:    Not Collective

819:    Input Parameter:
820: .  sf - star forest

822:    Output Parameters:
823: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
824: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

826:    Level: developer

828: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
829: @*/
830: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
831: {
834:   PetscSFCheckGraphSet(sf,1);
835:   if (minleaf) *minleaf = sf->minleaf;
836:   if (maxleaf) *maxleaf = sf->maxleaf;
837:   return(0);
838: }

840: /*@C
841:    PetscSFViewFromOptions - View from Options

843:    Collective on PetscSF

845:    Input Parameters:
846: +  A - the star forest
847: .  obj - Optional object
848: -  name - command line option

850:    Level: intermediate
851: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
852: @*/
853: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
854: {

859:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
860:   return(0);
861: }

863: /*@C
864:    PetscSFView - view a star forest

866:    Collective

868:    Input Parameters:
869: +  sf - star forest
870: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

872:    Level: beginner

874: .seealso: PetscSFCreate(), PetscSFSetGraph()
875: @*/
876: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
877: {
878:   PetscErrorCode    ierr;
879:   PetscBool         iascii;
880:   PetscViewerFormat format;

884:   if (!viewer) {PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);}
887:   if (sf->graphset) {PetscSFSetUp(sf);}
888:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
889:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
890:     PetscMPIInt rank;
891:     PetscInt    ii,i,j;

893:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
894:     PetscViewerASCIIPushTab(viewer);
895:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
896:       if (!sf->graphset) {
897:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
898:         PetscViewerASCIIPopTab(viewer);
899:         return(0);
900:       }
901:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
902:       PetscViewerASCIIPushSynchronized(viewer);
903:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);
904:       for (i=0; i<sf->nleaves; i++) {
905:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
906:       }
907:       PetscViewerFlush(viewer);
908:       PetscViewerGetFormat(viewer,&format);
909:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
910:         PetscMPIInt *tmpranks,*perm;
911:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
912:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
913:         for (i=0; i<sf->nranks; i++) perm[i] = i;
914:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
915:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
916:         for (ii=0; ii<sf->nranks; ii++) {
917:           i = perm[ii];
918:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
919:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
920:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);
921:           }
922:         }
923:         PetscFree2(tmpranks,perm);
924:       }
925:       PetscViewerFlush(viewer);
926:       PetscViewerASCIIPopSynchronized(viewer);
927:     }
928:     PetscViewerASCIIPopTab(viewer);
929:   }
930:   if (sf->ops->View) {(*sf->ops->View)(sf,viewer);}
931:   return(0);
932: }

934: /*@C
935:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

937:    Not Collective

939:    Input Parameter:
940: .  sf - star forest

942:    Output Parameters:
943: +  nranks - number of ranks referenced by local part
944: .  ranks - array of ranks
945: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
946: .  rmine - concatenated array holding local indices referencing each remote rank
947: -  rremote - concatenated array holding remote indices referenced for each remote rank

949:    Level: developer

951: .seealso: PetscSFGetLeafRanks()
952: @*/
953: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
954: {

959:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
960:   if (sf->ops->GetRootRanks) {
961:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
962:   } else {
963:     /* The generic implementation */
964:     if (nranks)  *nranks  = sf->nranks;
965:     if (ranks)   *ranks   = sf->ranks;
966:     if (roffset) *roffset = sf->roffset;
967:     if (rmine)   *rmine   = sf->rmine;
968:     if (rremote) *rremote = sf->rremote;
969:   }
970:   return(0);
971: }

973: /*@C
974:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

976:    Not Collective

978:    Input Parameter:
979: .  sf - star forest

981:    Output Parameters:
982: +  niranks - number of leaf ranks referencing roots on this process
983: .  iranks - array of ranks
984: .  ioffset - offset in irootloc for each rank (length niranks+1)
985: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

987:    Level: developer

989: .seealso: PetscSFGetRootRanks()
990: @*/
991: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
992: {

997:   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
998:   if (sf->ops->GetLeafRanks) {
999:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
1000:   } else {
1001:     PetscSFType type;
1002:     PetscSFGetType(sf,&type);
1003:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1004:   }
1005:   return(0);
1006: }

1008: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
1009:   PetscInt i;
1010:   for (i=0; i<n; i++) {
1011:     if (needle == list[i]) return PETSC_TRUE;
1012:   }
1013:   return PETSC_FALSE;
1014: }

1016: /*@C
1017:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

1019:    Collective

1021:    Input Parameters:
1022: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1023: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

1025:    Level: developer

1027: .seealso: PetscSFGetRootRanks()
1028: @*/
1029: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1030: {
1031:   PetscErrorCode     ierr;
1032:   PetscTable         table;
1033:   PetscTablePosition pos;
1034:   PetscMPIInt        size,groupsize,*groupranks;
1035:   PetscInt           *rcount,*ranks;
1036:   PetscInt           i, irank = -1,orank = -1;

1040:   PetscSFCheckGraphSet(sf,1);
1041:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
1042:   PetscTableCreate(10,size,&table);
1043:   for (i=0; i<sf->nleaves; i++) {
1044:     /* Log 1-based rank */
1045:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
1046:   }
1047:   PetscTableGetCount(table,&sf->nranks);
1048:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
1049:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
1050:   PetscTableGetHeadPosition(table,&pos);
1051:   for (i=0; i<sf->nranks; i++) {
1052:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
1053:     ranks[i]--;             /* Convert back to 0-based */
1054:   }
1055:   PetscTableDestroy(&table);

1057:   /* We expect that dgroup is reliably "small" while nranks could be large */
1058:   {
1059:     MPI_Group group = MPI_GROUP_NULL;
1060:     PetscMPIInt *dgroupranks;
1061:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1062:     MPI_Group_size(dgroup,&groupsize);
1063:     PetscMalloc1(groupsize,&dgroupranks);
1064:     PetscMalloc1(groupsize,&groupranks);
1065:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1066:     if (groupsize) {MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);}
1067:     MPI_Group_free(&group);
1068:     PetscFree(dgroupranks);
1069:   }

1071:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1072:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1073:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1074:       if (InList(ranks[i],groupsize,groupranks)) break;
1075:     }
1076:     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1077:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1078:     }
1079:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1080:       PetscInt tmprank,tmpcount;

1082:       tmprank             = ranks[i];
1083:       tmpcount            = rcount[i];
1084:       ranks[i]            = ranks[sf->ndranks];
1085:       rcount[i]           = rcount[sf->ndranks];
1086:       ranks[sf->ndranks]  = tmprank;
1087:       rcount[sf->ndranks] = tmpcount;
1088:       sf->ndranks++;
1089:     }
1090:   }
1091:   PetscFree(groupranks);
1092:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1093:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1094:   sf->roffset[0] = 0;
1095:   for (i=0; i<sf->nranks; i++) {
1096:     PetscMPIIntCast(ranks[i],sf->ranks+i);
1097:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1098:     rcount[i]        = 0;
1099:   }
1100:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1101:     /* short circuit */
1102:     if (orank != sf->remote[i].rank) {
1103:       /* Search for index of iremote[i].rank in sf->ranks */
1104:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1105:       if (irank < 0) {
1106:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1107:         if (irank >= 0) irank += sf->ndranks;
1108:       }
1109:       orank = sf->remote[i].rank;
1110:     }
1111:     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1112:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1113:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1114:     rcount[irank]++;
1115:   }
1116:   PetscFree2(rcount,ranks);
1117:   return(0);
1118: }

1120: /*@C
1121:    PetscSFGetGroups - gets incoming and outgoing process groups

1123:    Collective

1125:    Input Parameter:
1126: .  sf - star forest

1128:    Output Parameters:
1129: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1130: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1132:    Level: developer

1134: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1135: @*/
1136: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1137: {
1139:   MPI_Group      group = MPI_GROUP_NULL;

1142:   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1143:   if (sf->ingroup == MPI_GROUP_NULL) {
1144:     PetscInt       i;
1145:     const PetscInt *indegree;
1146:     PetscMPIInt    rank,*outranks,*inranks;
1147:     PetscSFNode    *remote;
1148:     PetscSF        bgcount;

1150:     /* Compute the number of incoming ranks */
1151:     PetscMalloc1(sf->nranks,&remote);
1152:     for (i=0; i<sf->nranks; i++) {
1153:       remote[i].rank  = sf->ranks[i];
1154:       remote[i].index = 0;
1155:     }
1156:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1157:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1158:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1159:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1160:     /* Enumerate the incoming ranks */
1161:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1162:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1163:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1164:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1165:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1166:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1167:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1168:     MPI_Group_free(&group);
1169:     PetscFree2(inranks,outranks);
1170:     PetscSFDestroy(&bgcount);
1171:   }
1172:   *incoming = sf->ingroup;

1174:   if (sf->outgroup == MPI_GROUP_NULL) {
1175:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1176:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1177:     MPI_Group_free(&group);
1178:   }
1179:   *outgoing = sf->outgroup;
1180:   return(0);
1181: }

1183: /*@
1184:    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters

1186:    Collective

1188:    Input Parameter:
1189: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1191:    Output Parameter:
1192: .  multi - star forest with split roots, such that each root has degree exactly 1

1194:    Level: developer

1196:    Notes:

1198:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1199:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1200:    edge, it is a candidate for future optimization that might involve its removal.

1202: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1203: @*/
1204: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1205: {

1211:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1212:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1213:     *multi = sf->multi;
1214:     sf->multi->multi = sf->multi;
1215:     return(0);
1216:   }
1217:   if (!sf->multi) {
1218:     const PetscInt *indegree;
1219:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1220:     PetscSFNode    *remote;
1221:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1222:     PetscSFComputeDegreeBegin(sf,&indegree);
1223:     PetscSFComputeDegreeEnd(sf,&indegree);
1224:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1225:     inoffset[0] = 0;
1226:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1227:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1228:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1229:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1230:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1231:     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1232:       for (i=0; i<sf->nroots; i++) {
1233:         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1234:       }
1235:     }
1236:     PetscMalloc1(sf->nleaves,&remote);
1237:     for (i=0; i<sf->nleaves; i++) {
1238:       remote[i].rank  = sf->remote[i].rank;
1239:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1240:     }
1241:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1242:     sf->multi->multi = sf->multi;
1243:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1244:     if (sf->rankorder) {        /* Sort the ranks */
1245:       PetscMPIInt rank;
1246:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1247:       PetscSFNode *newremote;
1248:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1249:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1250:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1251:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1252:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1253:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1254:       /* Sort the incoming ranks at each vertex, build the inverse map */
1255:       for (i=0; i<sf->nroots; i++) {
1256:         PetscInt j;
1257:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1258:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1259:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1260:       }
1261:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1262:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1263:       PetscMalloc1(sf->nleaves,&newremote);
1264:       for (i=0; i<sf->nleaves; i++) {
1265:         newremote[i].rank  = sf->remote[i].rank;
1266:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1267:       }
1268:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1269:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1270:     }
1271:     PetscFree3(inoffset,outones,outoffset);
1272:   }
1273:   *multi = sf->multi;
1274:   return(0);
1275: }

1277: /*@C
1278:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1280:    Collective

1282:    Input Parameters:
1283: +  sf - original star forest
1284: .  nselected  - number of selected roots on this process
1285: -  selected   - indices of the selected roots on this process

1287:    Output Parameter:
1288: .  esf - new star forest

1290:    Level: advanced

1292:    Note:
1293:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1294:    be done by calling PetscSFGetGraph().

1296: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1297: @*/
1298: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1299: {
1300:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1301:   const PetscInt    *ilocal;
1302:   signed char       *rootdata,*leafdata,*leafmem;
1303:   const PetscSFNode *iremote;
1304:   PetscSFNode       *new_iremote;
1305:   MPI_Comm          comm;
1306:   PetscErrorCode    ierr;

1310:   PetscSFCheckGraphSet(sf,1);

1314:   PetscSFSetUp(sf);
1315:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1316:   PetscObjectGetComm((PetscObject)sf,&comm);
1317:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1319:   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1320:     PetscBool dups;
1322:     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1323:     for (i=0; i<nselected; i++)
1324:       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1325:   }

1327:   if (sf->ops->CreateEmbeddedRootSF) {
1328:     (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1329:   } else {
1330:     /* A generic version of creating embedded sf */
1331:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1332:     maxlocal = maxleaf - minleaf + 1;
1333:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1334:     leafdata = leafmem - minleaf;
1335:     /* Tag selected roots and bcast to leaves */
1336:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1337:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1338:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);

1340:     /* Build esf with leaves that are still connected */
1341:     esf_nleaves = 0;
1342:     for (i=0; i<nleaves; i++) {
1343:       j = ilocal ? ilocal[i] : i;
1344:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1345:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1346:       */
1347:       esf_nleaves += (leafdata[j] ? 1 : 0);
1348:     }
1349:     PetscMalloc1(esf_nleaves,&new_ilocal);
1350:     PetscMalloc1(esf_nleaves,&new_iremote);
1351:     for (i=n=0; i<nleaves; i++) {
1352:       j = ilocal ? ilocal[i] : i;
1353:       if (leafdata[j]) {
1354:         new_ilocal[n]        = j;
1355:         new_iremote[n].rank  = iremote[i].rank;
1356:         new_iremote[n].index = iremote[i].index;
1357:         ++n;
1358:       }
1359:     }
1360:     PetscSFCreate(comm,esf);
1361:     PetscSFSetFromOptions(*esf);
1362:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1363:     PetscFree2(rootdata,leafmem);
1364:   }
1365:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1366:   return(0);
1367: }

1369: /*@C
1370:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1372:   Collective

1374:   Input Parameters:
1375: + sf - original star forest
1376: . nselected  - number of selected leaves on this process
1377: - selected   - indices of the selected leaves on this process

1379:   Output Parameter:
1380: .  newsf - new star forest

1382:   Level: advanced

1384: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1385: @*/
1386: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1387: {
1388:   const PetscSFNode *iremote;
1389:   PetscSFNode       *new_iremote;
1390:   const PetscInt    *ilocal;
1391:   PetscInt          i,nroots,*leaves,*new_ilocal;
1392:   MPI_Comm          comm;
1393:   PetscErrorCode    ierr;

1397:   PetscSFCheckGraphSet(sf,1);

1401:   /* Uniq selected[] and put results in leaves[] */
1402:   PetscObjectGetComm((PetscObject)sf,&comm);
1403:   PetscMalloc1(nselected,&leaves);
1404:   PetscArraycpy(leaves,selected,nselected);
1405:   PetscSortedRemoveDupsInt(&nselected,leaves);
1406:   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);

1408:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1409:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1410:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1411:   } else {
1412:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1413:     PetscMalloc1(nselected,&new_ilocal);
1414:     PetscMalloc1(nselected,&new_iremote);
1415:     for (i=0; i<nselected; ++i) {
1416:       const PetscInt l     = leaves[i];
1417:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1418:       new_iremote[i].rank  = iremote[l].rank;
1419:       new_iremote[i].index = iremote[l].index;
1420:     }
1421:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1422:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1423:   }
1424:   PetscFree(leaves);
1425:   return(0);
1426: }

1428: /*@C
1429:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()

1431:    Collective on PetscSF

1433:    Input Parameters:
1434: +  sf - star forest on which to communicate
1435: .  unit - data type associated with each node
1436: .  rootdata - buffer to broadcast
1437: -  op - operation to use for reduction

1439:    Output Parameter:
1440: .  leafdata - buffer to be reduced with values from each leaf's respective root

1442:    Level: intermediate

1444:    Notes:
1445:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1446:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1447:     use PetscSFBcastWithMemTypeBegin() instead.
1448: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1449: @*/
1450: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1451: {
1453:   PetscMemType   rootmtype,leafmtype;

1457:   PetscSFSetUp(sf);
1458:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1459:   PetscGetMemType(rootdata,&rootmtype);
1460:   PetscGetMemType(leafdata,&leafmtype);
1461:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1462:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1463:   return(0);
1464: }

1466: /*@C
1467:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()

1469:    Collective on PetscSF

1471:    Input Parameters:
1472: +  sf - star forest on which to communicate
1473: .  unit - data type associated with each node
1474: .  rootmtype - memory type of rootdata
1475: .  rootdata - buffer to broadcast
1476: .  leafmtype - memory type of leafdata
1477: -  op - operation to use for reduction

1479:    Output Parameter:
1480: .  leafdata - buffer to be reduced with values from each leaf's respective root

1482:    Level: intermediate

1484: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1485: @*/
1486: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1487: {

1492:   PetscSFSetUp(sf);
1493:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);}
1494:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1495:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);}
1496:   return(0);
1497: }

1499: /*@C
1500:    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()

1502:    Collective

1504:    Input Parameters:
1505: +  sf - star forest
1506: .  unit - data type
1507: .  rootdata - buffer to broadcast
1508: -  op - operation to use for reduction

1510:    Output Parameter:
1511: .  leafdata - buffer to be reduced with values from each leaf's respective root

1513:    Level: intermediate

1515: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1516: @*/
1517: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1518: {

1523:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);}
1524:   (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1525:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);}
1526:   return(0);
1527: }

1529: /*@C
1530:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1532:    Collective

1534:    Input Parameters:
1535: +  sf - star forest
1536: .  unit - data type
1537: .  leafdata - values to reduce
1538: -  op - reduction operation

1540:    Output Parameter:
1541: .  rootdata - result of reduction of values from all leaves of each root

1543:    Level: intermediate

1545:    Notes:
1546:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1547:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1548:     use PetscSFReduceWithMemTypeBegin() instead.

1550: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1551: @*/
1552: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1553: {
1555:   PetscMemType   rootmtype,leafmtype;

1559:   PetscSFSetUp(sf);
1560:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1561:   PetscGetMemType(rootdata,&rootmtype);
1562:   PetscGetMemType(leafdata,&leafmtype);
1563:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1564:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1565:   return(0);
1566: }

1568: /*@C
1569:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()

1571:    Collective

1573:    Input Parameters:
1574: +  sf - star forest
1575: .  unit - data type
1576: .  leafmtype - memory type of leafdata
1577: .  leafdata - values to reduce
1578: .  rootmtype - memory type of rootdata
1579: -  op - reduction operation

1581:    Output Parameter:
1582: .  rootdata - result of reduction of values from all leaves of each root

1584:    Level: intermediate

1586: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1587: @*/
1588: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1589: {

1594:   PetscSFSetUp(sf);
1595:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);}
1596:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1597:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);}
1598:   return(0);
1599: }

1601: /*@C
1602:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1604:    Collective

1606:    Input Parameters:
1607: +  sf - star forest
1608: .  unit - data type
1609: .  leafdata - values to reduce
1610: -  op - reduction operation

1612:    Output Parameter:
1613: .  rootdata - result of reduction of values from all leaves of each root

1615:    Level: intermediate

1617: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1618: @*/
1619: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1620: {

1625:   if (!sf->vscat.logging) {PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);}
1626:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1627:   if (!sf->vscat.logging) {PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);}
1628:   return(0);
1629: }

1631: /*@C
1632:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1634:    Collective

1636:    Input Parameters:
1637: +  sf - star forest
1638: .  unit - data type
1639: .  leafdata - leaf values to use in reduction
1640: -  op - operation to use for reduction

1642:    Output Parameters:
1643: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1644: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1646:    Level: advanced

1648:    Note:
1649:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1650:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1651:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1652:    integers.

1654: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1655: @*/
1656: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1657: {
1659:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1663:   PetscSFSetUp(sf);
1664:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1665:   PetscGetMemType(rootdata,&rootmtype);
1666:   PetscGetMemType(leafdata,&leafmtype);
1667:   PetscGetMemType(leafupdate,&leafupdatemtype);
1668:   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1669:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1670:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1671:   return(0);
1672: }

1674: /*@C
1675:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1677:    Collective

1679:    Input Parameters:
1680: +  sf - star forest
1681: .  unit - data type
1682: .  leafdata - leaf values to use in reduction
1683: -  op - operation to use for reduction

1685:    Output Parameters:
1686: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1687: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1689:    Level: advanced

1691: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1692: @*/
1693: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1694: {

1699:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1700:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1701:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1702:   return(0);
1703: }

1705: /*@C
1706:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1708:    Collective

1710:    Input Parameter:
1711: .  sf - star forest

1713:    Output Parameter:
1714: .  degree - degree of each root vertex

1716:    Level: advanced

1718:    Notes:
1719:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1721: .seealso: PetscSFGatherBegin()
1722: @*/
1723: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1724: {

1729:   PetscSFCheckGraphSet(sf,1);
1731:   if (!sf->degreeknown) {
1732:     PetscInt i, nroots = sf->nroots, maxlocal;
1733:     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1734:     maxlocal = sf->maxleaf-sf->minleaf+1;
1735:     PetscMalloc1(nroots,&sf->degree);
1736:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1737:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1738:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1739:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1740:   }
1741:   *degree = NULL;
1742:   return(0);
1743: }

1745: /*@C
1746:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1748:    Collective

1750:    Input Parameter:
1751: .  sf - star forest

1753:    Output Parameter:
1754: .  degree - degree of each root vertex

1756:    Level: developer

1758:    Notes:
1759:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1761: .seealso:
1762: @*/
1763: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1764: {

1769:   PetscSFCheckGraphSet(sf,1);
1771:   if (!sf->degreeknown) {
1772:     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1773:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1774:     PetscFree(sf->degreetmp);
1775:     sf->degreeknown = PETSC_TRUE;
1776:   }
1777:   *degree = sf->degree;
1778:   return(0);
1779: }

1781: /*@C
1782:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1783:    Each multi-root is assigned index of the corresponding original root.

1785:    Collective

1787:    Input Parameters:
1788: +  sf - star forest
1789: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1791:    Output Parameters:
1792: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1793: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1795:    Level: developer

1797:    Notes:
1798:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1800: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1801: @*/
1802: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1803: {
1804:   PetscSF             msf;
1805:   PetscInt            i, j, k, nroots, nmroots;
1806:   PetscErrorCode      ierr;

1810:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1814:   PetscSFGetMultiSF(sf,&msf);
1815:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1816:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1817:   for (i=0,j=0,k=0; i<nroots; i++) {
1818:     if (!degree[i]) continue;
1819:     for (j=0; j<degree[i]; j++,k++) {
1820:       (*multiRootsOrigNumbering)[k] = i;
1821:     }
1822:   }
1823:   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1824:   if (nMultiRoots) *nMultiRoots = nmroots;
1825:   return(0);
1826: }

1828: /*@C
1829:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1831:    Collective

1833:    Input Parameters:
1834: +  sf - star forest
1835: .  unit - data type
1836: -  leafdata - leaf data to gather to roots

1838:    Output Parameter:
1839: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1841:    Level: intermediate

1843: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1844: @*/
1845: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1846: {
1848:   PetscSF        multi = NULL;

1852:   PetscSFSetUp(sf);
1853:   PetscSFGetMultiSF(sf,&multi);
1854:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1855:   return(0);
1856: }

1858: /*@C
1859:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1861:    Collective

1863:    Input Parameters:
1864: +  sf - star forest
1865: .  unit - data type
1866: -  leafdata - leaf data to gather to roots

1868:    Output Parameter:
1869: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1871:    Level: intermediate

1873: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1874: @*/
1875: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1876: {
1878:   PetscSF        multi = NULL;

1882:   PetscSFGetMultiSF(sf,&multi);
1883:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1884:   return(0);
1885: }

1887: /*@C
1888:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1890:    Collective

1892:    Input Parameters:
1893: +  sf - star forest
1894: .  unit - data type
1895: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1897:    Output Parameter:
1898: .  leafdata - leaf data to be update with personal data from each respective root

1900:    Level: intermediate

1902: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1903: @*/
1904: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1905: {
1907:   PetscSF        multi = NULL;

1911:   PetscSFSetUp(sf);
1912:   PetscSFGetMultiSF(sf,&multi);
1913:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1914:   return(0);
1915: }

1917: /*@C
1918:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1920:    Collective

1922:    Input Parameters:
1923: +  sf - star forest
1924: .  unit - data type
1925: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1927:    Output Parameter:
1928: .  leafdata - leaf data to be update with personal data from each respective root

1930:    Level: intermediate

1932: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1933: @*/
1934: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1935: {
1937:   PetscSF        multi = NULL;

1941:   PetscSFGetMultiSF(sf,&multi);
1942:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1943:   return(0);
1944: }

1946: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1947: {
1948:   PetscInt        i, n, nleaves;
1949:   const PetscInt *ilocal = NULL;
1950:   PetscHSetI      seen;
1951:   PetscErrorCode  ierr;

1954:   if (PetscDefined(USE_DEBUG)) {
1955:     PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1956:     PetscHSetICreate(&seen);
1957:     for (i = 0; i < nleaves; i++) {
1958:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1959:       PetscHSetIAdd(seen,leaf);
1960:     }
1961:     PetscHSetIGetSize(seen,&n);
1962:     if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1963:     PetscHSetIDestroy(&seen);
1964:   }
1965:   return(0);
1966: }

1968: /*@
1969:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1971:   Input Parameters:
1972: + sfA - The first PetscSF
1973: - sfB - The second PetscSF

1975:   Output Parameters:
1976: . sfBA - The composite SF

1978:   Level: developer

1980:   Notes:
1981:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1982:   forests, i.e. the same leaf is not connected with different roots.

1984:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1985:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1986:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1987:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1989: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1990: @*/
1991: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1992: {
1993:   PetscErrorCode    ierr;
1994:   const PetscSFNode *remotePointsA,*remotePointsB;
1995:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1996:   const PetscInt    *localPointsA,*localPointsB;
1997:   PetscInt          *localPointsBA;
1998:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1999:   PetscBool         denseB;

2003:   PetscSFCheckGraphSet(sfA,1);
2005:   PetscSFCheckGraphSet(sfB,2);
2008:   PetscSFCheckLeavesUnique_Private(sfA);
2009:   PetscSFCheckLeavesUnique_Private(sfB);

2011:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
2012:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
2013:   if (localPointsA) {
2014:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
2015:     for (i=0; i<numRootsB; i++) {
2016:       reorderedRemotePointsA[i].rank = -1;
2017:       reorderedRemotePointsA[i].index = -1;
2018:     }
2019:     for (i=0; i<numLeavesA; i++) {
2020:       if (localPointsA[i] >= numRootsB) continue;
2021:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2022:     }
2023:     remotePointsA = reorderedRemotePointsA;
2024:   }
2025:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
2026:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
2027:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2028:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
2029:   PetscFree(reorderedRemotePointsA);

2031:   denseB = (PetscBool)!localPointsB;
2032:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2033:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2034:     else numLeavesBA++;
2035:   }
2036:   if (denseB) {
2037:     localPointsBA  = NULL;
2038:     remotePointsBA = leafdataB;
2039:   } else {
2040:     PetscMalloc1(numLeavesBA,&localPointsBA);
2041:     PetscMalloc1(numLeavesBA,&remotePointsBA);
2042:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2043:       const PetscInt l = localPointsB ? localPointsB[i] : i;

2045:       if (leafdataB[l-minleaf].rank == -1) continue;
2046:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2047:       localPointsBA[numLeavesBA] = l;
2048:       numLeavesBA++;
2049:     }
2050:     PetscFree(leafdataB);
2051:   }
2052:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2053:   PetscSFSetFromOptions(*sfBA);
2054:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2055:   return(0);
2056: }

2058: /*@
2059:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

2061:   Input Parameters:
2062: + sfA - The first PetscSF
2063: - sfB - The second PetscSF

2065:   Output Parameters:
2066: . sfBA - The composite SF.

2068:   Level: developer

2070:   Notes:
2071:   Currently, the two SFs must be defined on congruent communicators and they must be true star
2072:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2073:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

2075:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2076:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2077:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2078:   a Reduce on sfB, on connected roots.

2080: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2081: @*/
2082: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2083: {
2084:   PetscErrorCode    ierr;
2085:   const PetscSFNode *remotePointsA,*remotePointsB;
2086:   PetscSFNode       *remotePointsBA;
2087:   const PetscInt    *localPointsA,*localPointsB;
2088:   PetscSFNode       *reorderedRemotePointsA = NULL;
2089:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2090:   MPI_Op            op;
2091: #if defined(PETSC_USE_64BIT_INDICES)
2092:   PetscBool         iswin;
2093: #endif

2097:   PetscSFCheckGraphSet(sfA,1);
2099:   PetscSFCheckGraphSet(sfB,2);
2102:   PetscSFCheckLeavesUnique_Private(sfA);
2103:   PetscSFCheckLeavesUnique_Private(sfB);

2105:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2106:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2108:   /* TODO: Check roots of sfB have degree of 1 */
2109:   /* Once we implement it, we can replace the MPI_MAXLOC
2110:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2111:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2112:      the root condition is not meet.
2113:    */
2114:   op = MPI_MAXLOC;
2115: #if defined(PETSC_USE_64BIT_INDICES)
2116:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2117:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2118:   if (iswin) op = MPI_REPLACE;
2119: #endif

2121:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2122:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2123:   for (i=0; i<maxleaf - minleaf + 1; i++) {
2124:     reorderedRemotePointsA[i].rank = -1;
2125:     reorderedRemotePointsA[i].index = -1;
2126:   }
2127:   if (localPointsA) {
2128:     for (i=0; i<numLeavesA; i++) {
2129:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2130:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2131:     }
2132:   } else {
2133:     for (i=0; i<numLeavesA; i++) {
2134:       if (i > maxleaf || i < minleaf) continue;
2135:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2136:     }
2137:   }

2139:   PetscMalloc1(numRootsB,&localPointsBA);
2140:   PetscMalloc1(numRootsB,&remotePointsBA);
2141:   for (i=0; i<numRootsB; i++) {
2142:     remotePointsBA[i].rank = -1;
2143:     remotePointsBA[i].index = -1;
2144:   }

2146:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2147:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2148:   PetscFree(reorderedRemotePointsA);
2149:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2150:     if (remotePointsBA[i].rank == -1) continue;
2151:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2152:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2153:     localPointsBA[numLeavesBA] = i;
2154:     numLeavesBA++;
2155:   }
2156:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2157:   PetscSFSetFromOptions(*sfBA);
2158:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2159:   return(0);
2160: }

2162: /*
2163:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2165:   Input Parameters:
2166: . sf - The global PetscSF

2168:   Output Parameters:
2169: . out - The local PetscSF
2170:  */
2171: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2172: {
2173:   MPI_Comm           comm;
2174:   PetscMPIInt        myrank;
2175:   const PetscInt     *ilocal;
2176:   const PetscSFNode  *iremote;
2177:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2178:   PetscSFNode        *liremote;
2179:   PetscSF            lsf;
2180:   PetscErrorCode     ierr;

2184:   if (sf->ops->CreateLocalSF) {
2185:     (*sf->ops->CreateLocalSF)(sf,out);
2186:   } else {
2187:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2188:     PetscObjectGetComm((PetscObject)sf,&comm);
2189:     MPI_Comm_rank(comm,&myrank);

2191:     /* Find out local edges and build a local SF */
2192:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2193:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2194:     PetscMalloc1(lnleaves,&lilocal);
2195:     PetscMalloc1(lnleaves,&liremote);

2197:     for (i=j=0; i<nleaves; i++) {
2198:       if (iremote[i].rank == (PetscInt)myrank) {
2199:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2200:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2201:         liremote[j].index = iremote[i].index;
2202:         j++;
2203:       }
2204:     }
2205:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2206:     PetscSFSetFromOptions(lsf);
2207:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2208:     PetscSFSetUp(lsf);
2209:     *out = lsf;
2210:   }
2211:   return(0);
2212: }

2214: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2215: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2216: {
2217:   PetscErrorCode     ierr;
2218:   PetscMemType       rootmtype,leafmtype;

2222:   PetscSFSetUp(sf);
2223:   PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2224:   PetscGetMemType(rootdata,&rootmtype);
2225:   PetscGetMemType(leafdata,&leafmtype);
2226:   if (sf->ops->BcastToZero) {
2227:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2228:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2229:   PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2230:   return(0);
2231: }