Actual source code: vscat.c

  1: #include "petscsf.h"
  2: #include "petscsystypes.h"
  3: #include "petscvec.h"
  4: #include <petsc/private/sfimpl.h>
  5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
  6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
  7: #include <petsc/private/vecimpl.h>

  9: typedef enum {
 10:   IS_INVALID,
 11:   IS_GENERAL,
 12:   IS_BLOCK,
 13:   IS_STRIDE
 14: } ISTypeID;

 16: static inline PetscErrorCode ISGetTypeID_Private(IS is, ISTypeID *id)
 17: {
 18:   PetscBool same;

 20:   *id = IS_INVALID;
 21:   PetscObjectTypeCompare((PetscObject)is, ISGENERAL, &same);
 22:   if (same) {
 23:     *id = IS_GENERAL;
 24:     goto functionend;
 25:   }
 26:   PetscObjectTypeCompare((PetscObject)is, ISBLOCK, &same);
 27:   if (same) {
 28:     *id = IS_BLOCK;
 29:     goto functionend;
 30:   }
 31:   PetscObjectTypeCompare((PetscObject)is, ISSTRIDE, &same);
 32:   if (same) {
 33:     *id = IS_STRIDE;
 34:     goto functionend;
 35:   }
 36: functionend:
 37:   return 0;
 38: }

 40: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
 41: {
 42:   PetscSF      wsf = NULL; /* either sf or its local part */
 43:   MPI_Op       mop = MPI_OP_NULL;
 44:   PetscMPIInt  size;
 45:   PetscMemType xmtype = PETSC_MEMTYPE_HOST, ymtype = PETSC_MEMTYPE_HOST;

 47:   if (x != y) VecLockReadPush(x);
 48:   VecGetArrayReadAndMemType(x, &sf->vscat.xdata, &xmtype);
 49:   VecGetArrayAndMemType(y, &sf->vscat.ydata, &ymtype);
 50:   VecLockWriteSet(y, PETSC_TRUE);

 52:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 53:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
 54:   if ((mode & SCATTER_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_LOCAL is uncommon */
 55:     if (!sf->vscat.lsf) PetscSFCreateLocalSF_Private(sf, &sf->vscat.lsf);
 56:     wsf = sf->vscat.lsf;
 57:   } else {
 58:     wsf = sf;
 59:   }

 61:   /* Note xdata/ydata is always recorded on sf (not lsf) above */
 62:   if (addv == INSERT_VALUES) mop = MPI_REPLACE;
 63:   else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
 64:   else if (addv == MAX_VALUES) mop = MPIU_MAX;
 65:   else if (addv == MIN_VALUES) mop = MPIU_MIN;
 66:   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);

 68:   if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
 69:     PetscSFReduceWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop);
 70:   } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
 71:     PetscSFBcastWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop);
 72:   }
 73:   return 0;
 74: }

 76: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
 77: {
 78:   PetscSF     wsf = NULL;
 79:   MPI_Op      mop = MPI_OP_NULL;
 80:   PetscMPIInt size;

 82:   /* SCATTER_LOCAL indicates ignoring inter-process communication */
 83:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
 84:   wsf = ((mode & SCATTER_LOCAL) && size > 1) ? sf->vscat.lsf : sf;

 86:   if (addv == INSERT_VALUES) mop = MPI_REPLACE;
 87:   else if (addv == ADD_VALUES) mop = MPIU_SUM;
 88:   else if (addv == MAX_VALUES) mop = MPIU_MAX;
 89:   else if (addv == MIN_VALUES) mop = MPIU_MIN;
 90:   else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);

 92:   if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
 93:     PetscSFReduceEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop);
 94:   } else { /* forward scatter sends roots to leaves, i.e., x to y */
 95:     PetscSFBcastEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop);
 96:   }

 98:   VecRestoreArrayReadAndMemType(x, &sf->vscat.xdata);
 99:   if (x != y) VecLockReadPop(x);
100:   VecRestoreArrayAndMemType(y, &sf->vscat.ydata);
101:   VecLockWriteSet(y, PETSC_FALSE);
102:   return 0;
103: }

105: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
106:    x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
107:    x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
108:  */
109: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf, const PetscInt *tomap, const PetscInt *frommap)
110: {
111:   PetscInt       i, bs = sf->vscat.bs;
112:   PetscMPIInt    size;
113:   PetscBool      ident = PETSC_TRUE, isbasic, isneighbor;
114:   PetscSFType    type;
115:   PetscSF_Basic *bas = NULL;

117:   /* check if it is an identity map. If it is, do nothing */
118:   if (tomap) {
119:     for (i = 0; i < sf->nroots * bs; i++) {
120:       if (i != tomap[i]) {
121:         ident = PETSC_FALSE;
122:         break;
123:       }
124:     }
125:     if (ident) return 0;
126:   }
128:   if (!tomap) return 0;

130:   MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);

132:   /* Since the indices changed, we must also update the local SF. But we do not do it since
133:      lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
134:   */
135:   if (sf->vscat.lsf) PetscSFDestroy(&sf->vscat.lsf);

137:   PetscSFGetType(sf, &type);
138:   PetscObjectTypeCompare((PetscObject)sf, PETSCSFBASIC, &isbasic);
139:   PetscObjectTypeCompare((PetscObject)sf, PETSCSFNEIGHBOR, &isneighbor);

142:   PetscSFSetUp(sf); /* to bulid sf->irootloc if SetUp is not yet called */

144:   /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
145:     sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
146:     To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
147:     Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
148:     x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
149:     accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
150:     that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
151:     so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
152:     which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
153:   */
154:   sf->remote = NULL;
155:   PetscFree(sf->remote_alloc);
156:   /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
157:   for (i = 0; i < sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;

159:   /* Indices in tomap[] are for each indivisual vector entry. But indices in sf are for each
160:      block in the vector. So before the remapping, we have to expand indices in sf by bs, and
161:      after the remapping, we have to shrink them back.
162:    */
163:   bas = (PetscSF_Basic *)sf->data;
164:   for (i = 0; i < bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i] * bs] / bs;
165: #if defined(PETSC_HAVE_DEVICE)
166:   /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
167:   for (i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]);
168: #endif
169:   /* Destroy and then rebuild root packing optimizations since indices are changed */
170:   PetscSFResetPackFields(sf);
171:   PetscSFSetUpPackFields(sf);
172:   return 0;
173: }

175: /* Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication

177:   Input Parameters:
178: + sf   - the context (must be a parallel vecscatter)
179: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

181:   Output parameters:
182: + num_procs   - number of remote processors
183: - num_entries - number of vector entries to send or recv

185:   .seealso: `VecScatterGetRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`

187:   Notes:
188:   Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
189:   usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
190:  */
191: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf, PetscBool send, PetscInt *num_procs, PetscInt *num_entries)
192: {
193:   PetscInt           nranks, remote_start;
194:   PetscMPIInt        rank;
195:   const PetscInt    *offset;
196:   const PetscMPIInt *ranks;

198:   PetscSFSetUp(sf);
199:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);

201:   /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
202:      Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
203:      get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
204:      If send is false, we do the opposite, calling PetscSFGetRootRanks().
205:   */
206:   if (send) PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, NULL);
207:   else PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, NULL, NULL);
208:   if (nranks) {
209:     remote_start = (rank == ranks[0]) ? 1 : 0;
210:     if (num_procs) *num_procs = nranks - remote_start;
211:     if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
212:   } else {
213:     if (num_procs) *num_procs = 0;
214:     if (num_entries) *num_entries = 0;
215:   }
216:   return 0;
217: }

219: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
220:    Any output parameter can be NULL.

222:   Input Parameters:
223: + sf   - the context
224: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

226:   Output parameters:
227: + n        - number of remote processors
228: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
229:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
230:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
231:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
232: . indices  - indices of entries to send/recv
233: . procs    - ranks of remote processors
234: - bs       - block size

236:   .seealso: `VecScatterRestoreRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
237:  */
238: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
239: {
240:   PetscInt           nranks, remote_start;
241:   PetscMPIInt        rank;
242:   const PetscInt    *offset, *location;
243:   const PetscMPIInt *ranks;

245:   PetscSFSetUp(sf);
246:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);

248:   if (send) PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, &location);
249:   else PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, &location, NULL);

251:   if (nranks) {
252:     remote_start = (rank == ranks[0]) ? 1 : 0;
253:     if (n) *n = nranks - remote_start;
254:     if (starts) *starts = &offset[remote_start];
255:     if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
256:     if (procs) *procs = &ranks[remote_start];
257:   } else {
258:     if (n) *n = 0;
259:     if (starts) *starts = NULL;
260:     if (indices) *indices = NULL;
261:     if (procs) *procs = NULL;
262:   }

264:   if (bs) *bs = 1;
265:   return 0;
266: }

268: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
269:    processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.

271:   Input Parameters:
272: + sf   - the context
273: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

275:   Output parameters:
276: + n        - number of remote processors
277: . starts   - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
278:              Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
279:              expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
280:              with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
281: . indices  - indices of entries to send/recv
282: . procs    - ranks of remote processors
283: - bs       - block size

285:   .seealso: `VecScatterRestoreRemoteOrdered_Private()`, `VecScatterGetRemote_Private()`

287:   Notes:
288:   Output parameters like starts, indices must also be adapted according to the sorted ranks.
289:  */
290: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
291: {
292:   VecScatterGetRemote_Private(sf, send, n, starts, indices, procs, bs);
293:   if (PetscUnlikelyDebug(n && procs)) {
294:     PetscInt i;
295:     /* from back to front to also handle cases *n=0 */
297:   }
298:   return 0;
299: }

301: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
302:    an implementation to free memory allocated in the VecScatterGetRemote_Private call.

304:   Input Parameters:
305: + sf   - the context
306: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

308:   Output parameters:
309: + n        - number of remote processors
310: . starts   - starting point in indices for each proc
311: . indices  - indices of entries to send/recv
312: . procs    - ranks of remote processors
313: - bs       - block size

315:   .seealso: `VecScatterGetRemote_Private()`
316:  */
317: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
318: {
319:   if (starts) *starts = NULL;
320:   if (indices) *indices = NULL;
321:   if (procs) *procs = NULL;
322:   return 0;
323: }

325: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
326:    an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.

328:   Input Parameters:
329: + sf   - the context
330: - send  - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)

332:   Output parameters:
333: + n        - number of remote processors
334: . starts   - starting point in indices for each proc
335: . indices  - indices of entries to send/recv
336: . procs    - ranks of remote processors
337: - bs       - block size

339:   .seealso: `VecScatterGetRemoteOrdered_Private()`
340:  */
341: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
342: {
343:   VecScatterRestoreRemote_Private(sf, send, n, starts, indices, procs, bs);
344:   return 0;
345: }

347: /*@
348:    VecScatterSetUp - Sets up the VecScatter to be able to actually scatter information between vectors

350:    Collective on VecScatter

352:    Input Parameter:
353: .  sf - the scatter context

355:    Level: intermediate

357: .seealso: `VecScatterCreate()`, `VecScatterCopy()`
358: @*/
359: PetscErrorCode VecScatterSetUp(VecScatter sf)
360: {
361:   PetscSFSetUp(sf);
362:   return 0;
363: }

365: /*@C
366:   VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.

368:   Collective on VecScatter

370:   Input Parameters:
371: + sf - The VecScatter (SF) object
372: - type - The name of the vector scatter type

374:   Options Database Key:
375: . -sf_type <type> - Sets the VecScatter (SF) type

377:   Notes:
378:   Use VecScatterDuplicate() to form additional vectors scatter of the same type as an existing vector scatter.

380:   Level: intermediate

382: .seealso: `VecScatterGetType()`, `VecScatterCreate()`
383: @*/
384: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
385: {
386:   PetscSFSetType(sf, type);
387:   return 0;
388: }

390: /*@C
391:   VecScatterGetType - Gets the vector scatter type name (as a string) from the VecScatter.

393:   Not Collective

395:   Input Parameter:
396: . sf  - The vector scatter (SF)

398:   Output Parameter:
399: . type - The vector scatter type name

401:   Level: intermediate

403: .seealso: `VecScatterSetType()`, `VecScatterCreate()`
404: @*/
405: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
406: {
407:   PetscSFGetType(sf, type);
408:   return 0;
409: }

411: /*@C
412:   VecScatterRegister -  Adds a new vector scatter component implementation

414:   Not Collective

416:   Input Parameters:
417: + name        - The name of a new user-defined creation routine
418: - create_func - The creation routine itself

420:   Level: advanced

422: .seealso: `VecRegister()`
423: @*/
424: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
425: {
426:   PetscSFRegister(sname, function);
427:   return 0;
428: }

430: /* ------------------------------------------------------------------*/
431: /*@
432:    VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
433:       and the VecScatterEnd() does nothing

435:    Not Collective

437:    Input Parameter:
438: .   sf - scatter context created with VecScatterCreate()

440:    Output Parameter:
441: .   flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()

443:    Level: developer

445: .seealso: `VecScatterCreate()`, `VecScatterEnd()`, `VecScatterBegin()`
446: @*/
447: PetscErrorCode VecScatterGetMerged(VecScatter sf, PetscBool *flg)
448: {
450:   if (flg) *flg = sf->vscat.beginandendtogether;
451:   return 0;
452: }
453: /*@C
454:    VecScatterDestroy - Destroys a scatter context created by VecScatterCreate()

456:    Collective on VecScatter

458:    Input Parameter:
459: .  sf - the scatter context

461:    Level: intermediate

463: .seealso: `VecScatterCreate()`, `VecScatterCopy()`
464: @*/
465: PetscErrorCode VecScatterDestroy(VecScatter *sf)
466: {
467:   PetscSFDestroy(sf);
468:   return 0;
469: }

471: /*@
472:    VecScatterCopy - Makes a copy of a scatter context.

474:    Collective on VecScatter

476:    Input Parameter:
477: .  sf - the scatter context

479:    Output Parameter:
480: .  newsf - the context copy

482:    Level: advanced

484: .seealso: `VecScatterCreate()`, `VecScatterDestroy()`
485: @*/
486: PetscErrorCode VecScatterCopy(VecScatter sf, VecScatter *newsf)
487: {
489:   PetscSFDuplicate(sf, PETSCSF_DUPLICATE_GRAPH, newsf);
490:   PetscSFSetUp(*newsf);
491:   return 0;
492: }

494: /*@C
495:    VecScatterViewFromOptions - View from Options

497:    Collective on VecScatter

499:    Input Parameters:
500: +  sf - the scatter context
501: .  obj - Optional object
502: -  name - command line option

504:    Level: intermediate
505: .seealso: `VecScatter`, `VecScatterView`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
506: @*/
507: PetscErrorCode VecScatterViewFromOptions(VecScatter sf, PetscObject obj, const char name[])
508: {
510:   PetscObjectViewFromOptions((PetscObject)sf, obj, name);
511:   return 0;
512: }

514: /* ------------------------------------------------------------------*/
515: /*@C
516:    VecScatterView - Views a vector scatter context.

518:    Collective on VecScatter

520:    Input Parameters:
521: +  sf - the scatter context
522: -  viewer - the viewer for displaying the context

524:    Level: intermediate

526: @*/
527: PetscErrorCode VecScatterView(VecScatter sf, PetscViewer viewer)
528: {
529:   PetscSFView(sf, viewer);
530:   return 0;
531: }

533: /*@C
534:    VecScatterRemap - Remaps the "from" and "to" indices in a
535:    vector scatter context. FOR EXPERTS ONLY!

537:    Collective on VecScatter

539:    Input Parameters:
540: +  sf    - vector scatter context
541: .  tomap   - remapping plan for "to" indices (may be NULL).
542: -  frommap - remapping plan for "from" indices (may be NULL)

544:    Level: developer

546:    Notes:
547:      In the parallel case the todata contains indices from where the data is taken
548:      (and then sent to others)! The fromdata contains indices from where the received
549:      data is finally put locally.

551:      In the sequential case the todata contains indices from where the data is put
552:      and the fromdata contains indices from where the data is taken from.
553:      This is backwards from the paralllel case!

555: @*/
556: PetscErrorCode VecScatterRemap(VecScatter sf, PetscInt tomap[], PetscInt frommap[])
557: {
560:   VecScatterRemap_Internal(sf, tomap, frommap);
562:   /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
563:   sf->vscat.from_n = -1;
564:   sf->vscat.to_n   = -1;
565:   return 0;
566: }

568: /*@
569:   VecScatterSetFromOptions - Configures the vector scatter from the options database.

571:   Collective on VecScatter

573:   Input Parameter:
574: . sf - The vector scatter

576:   Notes:
577:     To see all options, run your program with the -help option, or consult the users manual.
578:           Must be called before VecScatterSetUp() but before the vector scatter is used.

580:   Level: beginner

582: .seealso: `VecScatterCreate()`, `VecScatterDestroy()`, `VecScatterSetUp()`
583: @*/
584: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
585: {
587:   PetscObjectOptionsBegin((PetscObject)sf);

589:   sf->vscat.beginandendtogether = PETSC_FALSE;
590:   PetscOptionsBool("-vecscatter_merge", "Use combined (merged) vector scatter begin and end", "VecScatterCreate", sf->vscat.beginandendtogether, &sf->vscat.beginandendtogether, NULL);
591:   if (sf->vscat.beginandendtogether) PetscInfo(sf, "Using combined (merged) vector scatter begin and end\n");
592:   PetscOptionsEnd();
593:   return 0;
594: }

596: /* ---------------------------------------------------------------- */
597: /*@
598:    VecScatterCreate - Creates a vector scatter context.

600:    Collective on Vec

602:    Input Parameters:
603: +  xin - a vector that defines the shape (parallel data layout of the vector)
604:          of vectors from which we scatter
605: .  yin - a vector that defines the shape (parallel data layout of the vector)
606:          of vectors to which we scatter
607: .  ix - the indices of xin to scatter (if NULL scatters all values)
608: -  iy - the indices of yin to hold results (if NULL fills entire vector yin)

610:    Output Parameter:
611: .  newsf - location to store the new scatter (SF) context

613:    Options Database Keys:
614: +  -vecscatter_view         - Prints detail of communications
615: .  -vecscatter_view ::ascii_info    - Print less details about communication
616: -  -vecscatter_merge        - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
617:                               eliminates the chance for overlap of computation and communication

619:   Level: intermediate

621:   Notes:
622:    If both xin and yin are parallel, their communicator must be on the same
623:    set of processes, but their process order can be different.
624:    In calls to VecScatter() you can use different vectors than the xin and
625:    yin you used above; BUT they must have the same parallel data layout, for example,
626:    they could be obtained from VecDuplicate().
627:    A VecScatter context CANNOT be used in two or more simultaneous scatters;
628:    that is you cannot call a second VecScatterBegin() with the same scatter
629:    context until the VecScatterEnd() has been called on the first VecScatterBegin().
630:    In this case a separate VecScatter is needed for each concurrent scatter.

632:    Currently the MPI_Send() use PERSISTENT versions.
633:    (this unfortunately requires that the same in and out arrays be used for each use, this
634:     is why  we always need to pack the input into the work array before sending
635:     and unpack upon receiving instead of using MPI datatypes to avoid the packing/unpacking).

637:    Both ix and iy cannot be NULL at the same time.

639:    Use VecScatterCreateToAll() to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
640:    Use VecScatterCreateToZero() to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
641:    These special vecscatters have better performance than general ones.

643: .seealso: `VecScatterDestroy()`, `VecScatterCreateToAll()`, `VecScatterCreateToZero()`, `PetscSFCreate()`
644: @*/
645: PetscErrorCode VecScatterCreate(Vec x, IS ix, Vec y, IS iy, VecScatter *newsf)
646: {
647:   MPI_Comm        xcomm, ycomm, bigcomm;
648:   Vec             xx, yy;
649:   IS              ix_old = ix, iy_old = iy, ixx, iyy;
650:   PetscMPIInt     xcommsize, ycommsize, rank, result;
651:   PetscInt        i, n, N, nroots, nleaves, *ilocal, xstart, ystart, ixsize, iysize, xlen, ylen;
652:   const PetscInt *xindices, *yindices;
653:   PetscSFNode    *iremote;
654:   PetscLayout     xlayout, ylayout;
655:   ISTypeID        ixid, iyid;
656:   PetscInt        bs, bsx, bsy, min, max, m[2], ixfirst, ixstep, iyfirst, iystep;
657:   PetscBool       can_do_block_opt = PETSC_FALSE;
658:   PetscSF         sf;


663:   /* Get comm from x and y */
664:   PetscObjectGetComm((PetscObject)x, &xcomm);
665:   MPI_Comm_size(xcomm, &xcommsize);
666:   PetscObjectGetComm((PetscObject)y, &ycomm);
667:   MPI_Comm_size(ycomm, &ycommsize);
668:   if (xcommsize > 1 && ycommsize > 1) {
669:     MPI_Comm_compare(xcomm, ycomm, &result);
671:   }
672:   bs = 1; /* default, no blocking */

674:   /*
675:    Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
676:    StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
677:    contains global indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.

679:    SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
680:    leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
681:   */

683:   /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
684:   if (!ix) {
685:     if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
686:       VecGetSize(x, &N);
687:       ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &ix);
688:     } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
689:       VecGetLocalSize(x, &n);
690:       VecGetOwnershipRange(x, &xstart, NULL);
691:       ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix);
692:     }
693:   }

695:   if (!iy) {
696:     if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
697:       VecGetSize(y, &N);
698:       ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iy);
699:     } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
700:       VecGetLocalSize(y, &n);
701:       VecGetOwnershipRange(y, &ystart, NULL);
702:       ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy);
703:     }
704:   }

706:   /* Do error checking immediately after we have non-empty ix, iy */
707:   ISGetLocalSize(ix, &ixsize);
708:   ISGetLocalSize(iy, &iysize);
709:   VecGetSize(x, &xlen);
710:   VecGetSize(y, &ylen);
712:   ISGetMinMax(ix, &min, &max);
714:   ISGetMinMax(iy, &min, &max);

717:   /* Extract info about ix, iy for further test */
718:   ISGetTypeID_Private(ix, &ixid);
719:   ISGetTypeID_Private(iy, &iyid);
720:   if (ixid == IS_BLOCK) ISGetBlockSize(ix, &bsx);
721:   else if (ixid == IS_STRIDE) ISStrideGetInfo(ix, &ixfirst, &ixstep);

723:   if (iyid == IS_BLOCK) ISGetBlockSize(iy, &bsy);
724:   else if (iyid == IS_STRIDE) ISStrideGetInfo(iy, &iyfirst, &iystep);

726:   /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
727:      ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
728:      vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).

730:      We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
731:   */
732:   if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
733:     PetscInt    pattern[2] = {0, 0};     /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
734:     PetscLayout map;

736:     MPI_Comm_rank(xcomm, &rank);
737:     VecGetLayout(x, &map);
738:     if (rank == 0) {
739:       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
740:         /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
741:         pattern[0] = pattern[1] = 1;
742:       }
743:     } else {
744:       if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
745:         /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
746:         pattern[0] = 1;
747:       } else if (ixsize == 0) {
748:         /* Other ranks do nothing, so it is a ToZero candiate */
749:         pattern[1] = 1;
750:       }
751:     }

753:     /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
754:     MPIU_Allreduce(MPI_IN_PLACE, pattern, 2, MPIU_INT, MPI_LAND, xcomm);

756:     if (pattern[0] || pattern[1]) {
757:       PetscSFCreate(xcomm, &sf);
758:       PetscSFSetFromOptions(sf);
759:       PetscSFSetGraphWithPattern(sf, map, pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER);
760:       goto functionend; /* No further analysis needed. What a big win! */
761:     }
762:   }

764:   /* Continue ...
765:      Do block optimization by taking advantage of high level info available in ix, iy.
766:      The block optimization is valid when all of the following conditions are met:
767:      1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
768:      2) ix, iy have the same block size;
769:      3) all processors agree on one block size;
770:      4) no blocks span more than one process;
771:    */
772:   bigcomm = (xcommsize == 1) ? ycomm : xcomm;

774:   /* Processors could go through different path in this if-else test */
775:   m[0] = m[1] = PETSC_MPI_INT_MIN;
776:   if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
777:     m[0] = PetscMax(bsx, bsy);
778:     m[1] = -PetscMin(bsx, bsy);
779:   } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep == 1 && iyfirst % bsx == 0) {
780:     m[0] = bsx;
781:     m[1] = -bsx;
782:   } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep == 1 && ixfirst % bsy == 0) {
783:     m[0] = bsy;
784:     m[1] = -bsy;
785:   }
786:   /* Get max and min of bsx,bsy over all processes in one allreduce */
787:   MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_MAX, bigcomm);
788:   max = m[0];
789:   min = -m[1];

791:   /* Since we used allreduce above, all ranks will have the same min and max. min==max
792:      implies all ranks have the same bs. Do further test to see if local vectors are dividable
793:      by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
794:    */
795:   if (min == max && min > 1) {
796:     VecGetLocalSize(x, &xlen);
797:     VecGetLocalSize(y, &ylen);
798:     m[0] = xlen % min;
799:     m[1] = ylen % min;
800:     MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_LOR, bigcomm);
801:     if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
802:   }

804:   /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
805:      and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
806:      indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.

808:      xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
809:      we can treat PtoP and StoP uniformly as PtoS.
810:    */
811:   if (can_do_block_opt) {
812:     const PetscInt *indices;

814:     /* Shrink x and ix */
815:     bs = min;
816:     VecCreateMPIWithArray(bigcomm, 1, xlen / bs, PETSC_DECIDE, NULL, &xx); /* We only care xx's layout */
817:     if (ixid == IS_BLOCK) {
818:       ISBlockGetIndices(ix, &indices);
819:       ISBlockGetLocalSize(ix, &ixsize);
820:       ISCreateGeneral(PETSC_COMM_SELF, ixsize, indices, PETSC_COPY_VALUES, &ixx);
821:       ISBlockRestoreIndices(ix, &indices);
822:     } else { /* ixid == IS_STRIDE */
823:       ISGetLocalSize(ix, &ixsize);
824:       ISCreateStride(PETSC_COMM_SELF, ixsize / bs, ixfirst / bs, 1, &ixx);
825:     }

827:     /* Shrink y and iy */
828:     VecCreateMPIWithArray(ycomm, 1, ylen / bs, PETSC_DECIDE, NULL, &yy);
829:     if (iyid == IS_BLOCK) {
830:       ISBlockGetIndices(iy, &indices);
831:       ISBlockGetLocalSize(iy, &iysize);
832:       ISCreateGeneral(PETSC_COMM_SELF, iysize, indices, PETSC_COPY_VALUES, &iyy);
833:       ISBlockRestoreIndices(iy, &indices);
834:     } else { /* iyid == IS_STRIDE */
835:       ISGetLocalSize(iy, &iysize);
836:       ISCreateStride(PETSC_COMM_SELF, iysize / bs, iyfirst / bs, 1, &iyy);
837:     }
838:   } else {
839:     ixx = ix;
840:     iyy = iy;
841:     yy  = y;
842:     if (xcommsize == 1) VecCreateMPIWithArray(bigcomm, 1, xlen, PETSC_DECIDE, NULL, &xx);
843:     else xx = x;
844:   }

846:   /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
847:   ISGetIndices(ixx, &xindices);
848:   ISGetIndices(iyy, &yindices);
849:   VecGetLayout(xx, &xlayout);

851:   if (ycommsize > 1) {
852:     /* PtoP or StoP */

854:     /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
855:        to owner process of yindices[i] according to ylayout, i = 0..n.

857:        I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
858:        We want to mape one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
859:        PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
860:        So I commented it out and did another optimized implementation. The commented code is left here for reference.
861:      */
862: #if 0
863:     const PetscInt *degree;
864:     PetscSF        tmpsf;
865:     PetscInt       inedges=0,*leafdata,*rootdata;

867:     VecGetOwnershipRange(xx,&xstart,NULL);
868:     VecGetLayout(yy,&ylayout);
869:     VecGetOwnershipRange(yy,&ystart,NULL);

871:     VecGetLocalSize(yy,&nroots);
872:     ISGetLocalSize(iyy,&nleaves);
873:     PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);

875:     for (i=0; i<nleaves; i++) {
876:       PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);
877:       leafdata[2*i]   = yindices[i];
878:       leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
879:     }

881:     PetscSFCreate(ycomm,&tmpsf);
882:     PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);

884:     PetscSFComputeDegreeBegin(tmpsf,&degree);
885:     PetscSFComputeDegreeEnd(tmpsf,&degree);

887:     for (i=0; i<nroots; i++) inedges += degree[i];
888:     PetscMalloc1(inedges*2,&rootdata);
889:     PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
890:     PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);

892:     PetscFree2(iremote,leafdata);
893:     PetscSFDestroy(&tmpsf);

895:     /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
896:        We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
897:      */
898:     nleaves = inedges;
899:     VecGetLocalSize(xx,&nroots);
900:     PetscMalloc1(nleaves,&ilocal);
901:     PetscMalloc1(nleaves,&iremote);

903:     for (i=0; i<inedges; i++) {
904:       ilocal[i] = rootdata[2*i] - ystart; /* covert y's global index to local index */
905:       PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert x's global index to (rank, index) */
906:     }
907:     PetscFree(rootdata);
908: #else
909:     PetscInt        j, k, n, disp, rlentotal, *sstart, *xindices_sorted, *yindices_sorted;
910:     const PetscInt *yrange;
911:     PetscMPIInt     nsend, nrecv, nreq, yrank, *sendto, *recvfrom, tag1, tag2;
912:     PetscInt       *slens, *rlens, count;
913:     PetscInt       *rxindices, *ryindices;
914:     MPI_Request    *reqs, *sreqs, *rreqs;

916:     /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
917:        yindices_sorted - sorted yindices
918:        xindices_sorted - xindices sorted along with yindces
919:      */
920:     ISGetLocalSize(ixx, &n); /*ixx, iyy have the same local size */
921:     PetscMalloc2(n, &xindices_sorted, n, &yindices_sorted);
922:     PetscArraycpy(xindices_sorted, xindices, n);
923:     PetscArraycpy(yindices_sorted, yindices, n);
924:     PetscSortIntWithArray(n, yindices_sorted, xindices_sorted);
925:     VecGetOwnershipRange(xx, &xstart, NULL);
926:     if (xcommsize == 1) {
927:       for (i = 0; i < n; i++) xindices_sorted[i] += xstart;
928:     } /* Convert to global indices */

930:     /*=============================================================================
931:              Calculate info about messages I need to send
932:       =============================================================================*/
933:     /* nsend    - number of non-empty messages to send
934:        sendto   - [nsend] ranks I will send messages to
935:        sstart   - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
936:        slens    - [ycommsize] I want to send slens[i] entries to rank i.
937:      */
938:     VecGetLayout(yy, &ylayout);
939:     PetscLayoutGetRanges(ylayout, &yrange);
940:     PetscCalloc1(ycommsize, &slens); /* The only O(P) array in this algorithm */

942:     i = j = nsend = 0;
943:     while (i < n) {
944:       if (yindices_sorted[i] >= yrange[j + 1]) { /* If i-th index is out of rank j's bound */
945:         do {
946:           j++;
947:         } while (yindices_sorted[i] >= yrange[j + 1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
949:       }
950:       i++;
951:       if (!slens[j]++) nsend++;
952:     }

954:     PetscMalloc2(nsend + 1, &sstart, nsend, &sendto);

956:     sstart[0] = 0;
957:     for (i = j = 0; i < ycommsize; i++) {
958:       if (slens[i]) {
959:         sendto[j]     = (PetscMPIInt)i;
960:         sstart[j + 1] = sstart[j] + slens[i];
961:         j++;
962:       }
963:     }

965:     /*=============================================================================
966:       Calculate the reverse info about messages I will recv
967:       =============================================================================*/
968:     /* nrecv     - number of messages I will recv
969:        recvfrom  - [nrecv] ranks I recv from
970:        rlens     - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
971:        rlentotal - sum of rlens[]
972:        rxindices - [rlentotal] recv buffer for xindices_sorted
973:        ryindices - [rlentotal] recv buffer for yindices_sorted
974:      */
975:     PetscGatherNumberOfMessages_Private(ycomm, NULL, slens, &nrecv);
976:     PetscGatherMessageLengths_Private(ycomm, nsend, nrecv, slens, &recvfrom, &rlens);
977:     PetscFree(slens); /* Free the O(P) array ASAP */
978:     rlentotal = 0;
979:     for (i = 0; i < nrecv; i++) rlentotal += rlens[i];

981:     /*=============================================================================
982:       Communicate with processors in recvfrom[] to populate rxindices and ryindices
983:       ============================================================================*/
984:     PetscCommGetNewTag(ycomm, &tag1);
985:     PetscCommGetNewTag(ycomm, &tag2);
986:     PetscMalloc2(rlentotal, &rxindices, rlentotal, &ryindices);
987:     PetscMPIIntCast((nsend + nrecv) * 2, &nreq);
988:     PetscMalloc1(nreq, &reqs);
989:     sreqs = reqs;
990:     rreqs = reqs + nsend * 2;

992:     for (i = disp = 0; i < nrecv; i++) {
993:       count = rlens[i];
994:       MPIU_Irecv(rxindices + disp, count, MPIU_INT, recvfrom[i], tag1, ycomm, rreqs + i);
995:       MPIU_Irecv(ryindices + disp, count, MPIU_INT, recvfrom[i], tag2, ycomm, rreqs + nrecv + i);
996:       disp += rlens[i];
997:     }

999:     for (i = 0; i < nsend; i++) {
1000:       count = sstart[i + 1] - sstart[i];
1001:       MPIU_Isend(xindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag1, ycomm, sreqs + i);
1002:       MPIU_Isend(yindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag2, ycomm, sreqs + nsend + i);
1003:     }
1004:     MPI_Waitall(nreq, reqs, MPI_STATUS_IGNORE);

1006:     /* Transform VecScatter into SF */
1007:     nleaves = rlentotal;
1008:     PetscMalloc1(nleaves, &ilocal);
1009:     PetscMalloc1(nleaves, &iremote);
1010:     MPI_Comm_rank(ycomm, &yrank);
1011:     for (i = disp = 0; i < nrecv; i++) {
1012:       for (j = 0; j < rlens[i]; j++) {
1013:         k         = disp + j;                                                                  /* k-th index pair */
1014:         ilocal[k] = ryindices[k] - yrange[yrank];                                              /* Convert y's global index to local index */
1015:         PetscLayoutFindOwnerIndex(xlayout, rxindices[k], &rank, &iremote[k].index); /* Convert x's global index to (rank, index) */
1016:         iremote[k].rank = rank;
1017:       }
1018:       disp += rlens[i];
1019:     }

1021:     PetscFree2(sstart, sendto);
1022:     PetscFree(rlens);
1023:     PetscFree(recvfrom);
1024:     PetscFree(reqs);
1025:     PetscFree2(rxindices, ryindices);
1026:     PetscFree2(xindices_sorted, yindices_sorted);
1027: #endif
1028:   } else {
1029:     /* PtoS or StoS */
1030:     ISGetLocalSize(iyy, &nleaves);
1031:     PetscMalloc1(nleaves, &ilocal);
1032:     PetscMalloc1(nleaves, &iremote);
1033:     PetscArraycpy(ilocal, yindices, nleaves);
1034:     for (i = 0; i < nleaves; i++) {
1035:       PetscLayoutFindOwnerIndex(xlayout, xindices[i], &rank, &iremote[i].index);
1036:       iremote[i].rank = rank;
1037:     }
1038:   }

1040:   /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1041:      In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1042:      yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1043:   PetscSFCreate(PetscObjectComm((PetscObject)xx), &sf);
1044:   sf->allow_multi_leaves = PETSC_TRUE;
1045:   PetscSFSetFromOptions(sf);
1046:   VecGetLocalSize(xx, &nroots);
1047:   PetscSFSetGraph(sf, nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER); /* Give ilocal/iremote to petsc and no need to free them here */

1049:   /* Free memory no longer needed */
1050:   ISRestoreIndices(ixx, &xindices);
1051:   ISRestoreIndices(iyy, &yindices);
1052:   if (can_do_block_opt) {
1053:     VecDestroy(&xx);
1054:     VecDestroy(&yy);
1055:     ISDestroy(&ixx);
1056:     ISDestroy(&iyy);
1057:   } else if (xcommsize == 1) {
1058:     VecDestroy(&xx);
1059:   }

1061: functionend:
1062:   sf->vscat.bs = bs;
1063:   if (sf->vscat.bs > 1) {
1064:     MPI_Type_contiguous(sf->vscat.bs, MPIU_SCALAR, &sf->vscat.unit);
1065:     MPI_Type_commit(&sf->vscat.unit);
1066:   } else {
1067:     sf->vscat.unit = MPIU_SCALAR;
1068:   }
1069:   VecGetLocalSize(x, &sf->vscat.from_n);
1070:   VecGetLocalSize(y, &sf->vscat.to_n);
1071:   if (!ix_old) ISDestroy(&ix); /* We created helper ix, iy. Free them */
1072:   if (!iy_old) ISDestroy(&iy);

1074:   /* Set default */
1075:   VecScatterSetFromOptions(sf);

1077:   *newsf = sf;
1078:   return 0;
1079: }

1081: /*@C
1082:       VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1083:           vector values to each processor

1085:   Collective on Vec

1087:   Input Parameter:
1088: .  vin  - input MPIVEC

1090:   Output Parameters:
1091: +  ctx - scatter context
1092: -  vout - output SEQVEC that is large enough to scatter into

1094:   Level: intermediate

1096:    Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1097:    need to have it created

1099:    Usage:
1100: $        VecScatterCreateToAll(vin,&ctx,&vout);
1101: $
1102: $        // scatter as many times as you need
1103: $        VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1104: $        VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1105: $
1106: $        // destroy scatter context and local vector when no longer needed
1107: $        VecScatterDestroy(&ctx);
1108: $        VecDestroy(&vout);

1110:     Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1111:   automatically (unless you pass NULL in for that argument if you do not need it).

1113: .seealso `VecScatterCreate()`, `VecScatterCreateToZero()`, `VecScatterBegin()`, `VecScatterEnd()`

1115: @*/
1116: PetscErrorCode VecScatterCreateToAll(Vec vin, VecScatter *ctx, Vec *vout)
1117: {
1118:   PetscInt  N;
1119:   IS        is;
1120:   Vec       tmp;
1121:   Vec      *tmpv;
1122:   PetscBool tmpvout = PETSC_FALSE;
1123:   VecType   roottype;

1128:   if (vout) {
1130:     tmpv = vout;
1131:   } else {
1132:     tmpvout = PETSC_TRUE;
1133:     tmpv    = &tmp;
1134:   }

1136:   /* Create seq vec on each proc, with the same size of the original vec */
1137:   VecGetSize(vin, &N);
1138:   VecGetRootType_Private(vin, &roottype);
1139:   VecCreate(PETSC_COMM_SELF, tmpv);
1140:   VecSetSizes(*tmpv, N, PETSC_DECIDE);
1141:   VecSetType(*tmpv, roottype);
1142:   /* Create the VecScatter ctx with the communication info */
1143:   ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is);
1144:   VecScatterCreate(vin, is, *tmpv, is, ctx);
1145:   ISDestroy(&is);
1146:   if (tmpvout) VecDestroy(tmpv);
1147:   return 0;
1148: }

1150: /*@C
1151:       VecScatterCreateToZero - Creates an output vector and a scatter context used to
1152:               copy all vector values into the output vector on the zeroth processor

1154:   Collective on Vec

1156:   Input Parameter:
1157: .  vin  - input MPIVEC

1159:   Output Parameters:
1160: +  ctx - scatter context
1161: -  vout - output SEQVEC that is large enough to scatter into on processor 0 and
1162:           of length zero on all other processors

1164:   Level: intermediate

1166:    Note: vout may be NULL [PETSC_NULL_VEC from fortran] if you do not
1167:    need to have it created

1169:    Usage:
1170: $        VecScatterCreateToZero(vin,&ctx,&vout);
1171: $
1172: $        // scatter as many times as you need
1173: $        VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1174: $        VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1175: $
1176: $        // destroy scatter context and local vector when no longer needed
1177: $        VecScatterDestroy(&ctx);
1178: $        VecDestroy(&vout);

1180: .seealso `VecScatterCreate()`, `VecScatterCreateToAll()`, `VecScatterBegin()`, `VecScatterEnd()`

1182:     Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1183:   automatically (unless you pass NULL in for that argument if you do not need it).

1185: @*/
1186: PetscErrorCode VecScatterCreateToZero(Vec vin, VecScatter *ctx, Vec *vout)
1187: {
1188:   PetscInt    N;
1189:   PetscMPIInt rank;
1190:   IS          is;
1191:   Vec         tmp;
1192:   Vec        *tmpv;
1193:   PetscBool   tmpvout = PETSC_FALSE;
1194:   VecType     roottype;

1199:   if (vout) {
1201:     tmpv = vout;
1202:   } else {
1203:     tmpvout = PETSC_TRUE;
1204:     tmpv    = &tmp;
1205:   }

1207:   /* Create vec on each proc, with the same size of the original vec all on process 0 */
1208:   VecGetSize(vin, &N);
1209:   MPI_Comm_rank(PetscObjectComm((PetscObject)vin), &rank);
1210:   if (rank) N = 0;
1211:   VecGetRootType_Private(vin, &roottype);
1212:   VecCreate(PETSC_COMM_SELF, tmpv);
1213:   VecSetSizes(*tmpv, N, PETSC_DECIDE);
1214:   VecSetType(*tmpv, roottype);
1215:   /* Create the VecScatter ctx with the communication info */
1216:   ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is);
1217:   VecScatterCreate(vin, is, *tmpv, is, ctx);
1218:   ISDestroy(&is);
1219:   if (tmpvout) VecDestroy(tmpv);
1220:   return 0;
1221: }

1223: /*@
1224:    VecScatterBegin - Begins a generalized scatter from one vector to
1225:    another. Complete the scattering phase with VecScatterEnd().

1227:    Neighbor-wise Collective on VecScatter

1229:    Input Parameters:
1230: +  sf - scatter context generated by VecScatterCreate()
1231: .  x - the vector from which we scatter
1232: .  y - the vector to which we scatter
1233: .  addv - either ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1234:           not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1235: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1236:     SCATTER_FORWARD or SCATTER_REVERSE

1238:    Level: intermediate

1240:    Options Database: See VecScatterCreate()

1242:    Notes:
1243:    The vectors x and y need not be the same vectors used in the call
1244:    to VecScatterCreate(), but x must have the same parallel data layout
1245:    as that passed in as the x to VecScatterCreate(), similarly for the y.
1246:    Most likely they have been obtained from VecDuplicate().

1248:    You cannot change the values in the input vector between the calls to VecScatterBegin()
1249:    and VecScatterEnd().

1251:    If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1252:    the SCATTER_FORWARD.

1254:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1256:    This scatter is far more general than the conventional
1257:    scatter, since it can be a gather or a scatter or a combination,
1258:    depending on the indices ix and iy.  If x is a parallel vector and y
1259:    is sequential, VecScatterBegin() can serve to gather values to a
1260:    single processor.  Similarly, if y is parallel and x sequential, the
1261:    routine can scatter from one processor to many processors.

1263: .seealso: `VecScatterCreate()`, `VecScatterEnd()`
1264: @*/
1265: PetscErrorCode VecScatterBegin(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1266: {
1267:   PetscInt to_n, from_n;

1272:   if (PetscDefined(USE_DEBUG)) {
1273:     /*
1274:      Error checking to make sure these vectors match the vectors used
1275:      to create the vector scatter context. -1 in the from_n and to_n indicate the
1276:      vector lengths are unknown (for example with mapped scatters) and thus
1277:      no error checking is performed.
1278:      */
1279:     if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1280:       VecGetLocalSize(x, &from_n);
1281:       VecGetLocalSize(y, &to_n);
1282:       if (mode & SCATTER_REVERSE) {
1285:       } else {
1288:       }
1289:     }
1290:   }

1292:   sf->vscat.logging = PETSC_TRUE;
1293:   PetscLogEventBegin(VEC_ScatterBegin, sf, x, y, 0);
1294:   VecScatterBegin_Internal(sf, x, y, addv, mode);
1295:   if (sf->vscat.beginandendtogether) VecScatterEnd_Internal(sf, x, y, addv, mode);
1296:   PetscLogEventEnd(VEC_ScatterBegin, sf, x, y, 0);
1297:   sf->vscat.logging = PETSC_FALSE;
1298:   return 0;
1299: }

1301: /*@
1302:    VecScatterEnd - Ends a generalized scatter from one vector to another.  Call
1303:    after first calling VecScatterBegin().

1305:    Neighbor-wise Collective on VecScatter

1307:    Input Parameters:
1308: +  sf - scatter context generated by VecScatterCreate()
1309: .  x - the vector from which we scatter
1310: .  y - the vector to which we scatter
1311: .  addv - one of ADD_VALUES, MAX_VALUES, MIN_VALUES or INSERT_VALUES
1312: -  mode - the scattering mode, usually SCATTER_FORWARD.  The available modes are:
1313:      SCATTER_FORWARD, SCATTER_REVERSE

1315:    Level: intermediate

1317:    Notes:
1318:    If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.

1320:    y[iy[i]] = x[ix[i]], for i=0,...,ni-1

1322: .seealso: `VecScatterBegin()`, `VecScatterCreate()`
1323: @*/
1324: PetscErrorCode VecScatterEnd(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1325: {
1329:   if (!sf->vscat.beginandendtogether) {
1330:     sf->vscat.logging = PETSC_TRUE;
1331:     PetscLogEventBegin(VEC_ScatterEnd, sf, x, y, 0);
1332:     VecScatterEnd_Internal(sf, x, y, addv, mode);
1333:     PetscLogEventEnd(VEC_ScatterEnd, sf, x, y, 0);
1334:     sf->vscat.logging = PETSC_FALSE;
1335:   }
1336:   return 0;
1337: }