Actual source code: vscat.c
1: #include "petscsf.h"
2: #include "petscsystypes.h"
3: #include "petscvec.h"
4: #include <petsc/private/sfimpl.h>
5: #include <../src/vec/is/sf/impls/basic/sfbasic.h>
6: #include <../src/vec/is/sf/impls/basic/sfpack.h>
7: #include <petsc/private/vecimpl.h>
9: typedef enum {
10: IS_INVALID,
11: IS_GENERAL,
12: IS_BLOCK,
13: IS_STRIDE
14: } ISTypeID;
16: static inline PetscErrorCode ISGetTypeID_Private(IS is, ISTypeID *id)
17: {
18: PetscBool same;
20: *id = IS_INVALID;
21: PetscObjectTypeCompare((PetscObject)is, ISGENERAL, &same);
22: if (same) {
23: *id = IS_GENERAL;
24: goto functionend;
25: }
26: PetscObjectTypeCompare((PetscObject)is, ISBLOCK, &same);
27: if (same) {
28: *id = IS_BLOCK;
29: goto functionend;
30: }
31: PetscObjectTypeCompare((PetscObject)is, ISSTRIDE, &same);
32: if (same) {
33: *id = IS_STRIDE;
34: goto functionend;
35: }
36: functionend:
37: return 0;
38: }
40: static PetscErrorCode VecScatterBegin_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
41: {
42: PetscSF wsf = NULL; /* either sf or its local part */
43: MPI_Op mop = MPI_OP_NULL;
44: PetscMPIInt size;
45: PetscMemType xmtype = PETSC_MEMTYPE_HOST, ymtype = PETSC_MEMTYPE_HOST;
47: if (x != y) VecLockReadPush(x);
48: VecGetArrayReadAndMemType(x, &sf->vscat.xdata, &xmtype);
49: VecGetArrayAndMemType(y, &sf->vscat.ydata, &ymtype);
50: VecLockWriteSet(y, PETSC_TRUE);
52: /* SCATTER_LOCAL indicates ignoring inter-process communication */
53: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
54: if ((mode & SCATTER_LOCAL) && size > 1) { /* Lazy creation of sf->vscat.lsf since SCATTER_LOCAL is uncommon */
55: if (!sf->vscat.lsf) PetscSFCreateLocalSF_Private(sf, &sf->vscat.lsf);
56: wsf = sf->vscat.lsf;
57: } else {
58: wsf = sf;
59: }
61: /* Note xdata/ydata is always recorded on sf (not lsf) above */
62: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
63: else if (addv == ADD_VALUES) mop = MPIU_SUM; /* Petsc defines its own MPI datatype and SUM operation for __float128 etc. */
64: else if (addv == MAX_VALUES) mop = MPIU_MAX;
65: else if (addv == MIN_VALUES) mop = MPIU_MIN;
66: else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
68: if (mode & SCATTER_REVERSE) { /* REVERSE indicates leaves to root scatter. Note that x and y are swapped in input */
69: PetscSFReduceWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop);
70: } else { /* FORWARD indicates x to y scatter, where x is root and y is leaf */
71: PetscSFBcastWithMemTypeBegin(wsf, sf->vscat.unit, xmtype, sf->vscat.xdata, ymtype, sf->vscat.ydata, mop);
72: }
73: return 0;
74: }
76: static PetscErrorCode VecScatterEnd_Internal(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
77: {
78: PetscSF wsf = NULL;
79: MPI_Op mop = MPI_OP_NULL;
80: PetscMPIInt size;
82: /* SCATTER_LOCAL indicates ignoring inter-process communication */
83: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
84: wsf = ((mode & SCATTER_LOCAL) && size > 1) ? sf->vscat.lsf : sf;
86: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
87: else if (addv == ADD_VALUES) mop = MPIU_SUM;
88: else if (addv == MAX_VALUES) mop = MPIU_MAX;
89: else if (addv == MIN_VALUES) mop = MPIU_MIN;
90: else SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "Unsupported InsertMode %d in VecScatterBegin/End", addv);
92: if (mode & SCATTER_REVERSE) { /* reverse scatter sends leaves to roots. Note that x and y are swapped in input */
93: PetscSFReduceEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop);
94: } else { /* forward scatter sends roots to leaves, i.e., x to y */
95: PetscSFBcastEnd(wsf, sf->vscat.unit, sf->vscat.xdata, sf->vscat.ydata, mop);
96: }
98: VecRestoreArrayReadAndMemType(x, &sf->vscat.xdata);
99: if (x != y) VecLockReadPop(x);
100: VecRestoreArrayAndMemType(y, &sf->vscat.ydata);
101: VecLockWriteSet(y, PETSC_FALSE);
102: return 0;
103: }
105: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input sf scatters
106: x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j]. Note that in SF,
107: x is roots. That means we need to change incoming stuffs such as bas->irootloc[].
108: */
109: static PetscErrorCode VecScatterRemap_Internal(VecScatter sf, const PetscInt *tomap, const PetscInt *frommap)
110: {
111: PetscInt i, bs = sf->vscat.bs;
112: PetscMPIInt size;
113: PetscBool ident = PETSC_TRUE, isbasic, isneighbor;
114: PetscSFType type;
115: PetscSF_Basic *bas = NULL;
117: /* check if it is an identity map. If it is, do nothing */
118: if (tomap) {
119: for (i = 0; i < sf->nroots * bs; i++) {
120: if (i != tomap[i]) {
121: ident = PETSC_FALSE;
122: break;
123: }
124: }
125: if (ident) return 0;
126: }
128: if (!tomap) return 0;
130: MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size);
132: /* Since the indices changed, we must also update the local SF. But we do not do it since
133: lsf is rarely used. We just destroy lsf and rebuild it on demand from updated sf.
134: */
135: if (sf->vscat.lsf) PetscSFDestroy(&sf->vscat.lsf);
137: PetscSFGetType(sf, &type);
138: PetscObjectTypeCompare((PetscObject)sf, PETSCSFBASIC, &isbasic);
139: PetscObjectTypeCompare((PetscObject)sf, PETSCSFNEIGHBOR, &isneighbor);
142: PetscSFSetUp(sf); /* to build sf->irootloc if SetUp is not yet called */
144: /* Root indices are going to be remapped. This is tricky for SF. Root indices are used in sf->rremote,
145: sf->remote and bas->irootloc. The latter one is cheap to remap, but the former two are not.
146: To remap them, we have to do a bcast from roots to leaves, to let leaves know their updated roots.
147: Since VecScatterRemap is supposed to be a cheap routine to adapt a vecscatter by only changing where
148: x[] data is taken, we do not remap sf->rremote, sf->remote. The consequence is that operations
149: accessing them (such as PetscSFCompose) may get stale info. Considering VecScatter does not need
150: that complicated SF operations, we do not remap sf->rremote, sf->remote, instead we destroy them
151: so that code accessing them (if any) will crash (instead of get silent errors). Note that BcastAndOp/Reduce,
152: which are used by VecScatter and only rely on bas->irootloc, are updated and correct.
153: */
154: sf->remote = NULL;
155: PetscFree(sf->remote_alloc);
156: /* Not easy to free sf->rremote since it was allocated with PetscMalloc4(), so just give it crazy values */
157: for (i = 0; i < sf->roffset[sf->nranks]; i++) sf->rremote[i] = PETSC_MIN_INT;
159: /* Indices in tomap[] are for each individual vector entry. But indices in sf are for each
160: block in the vector. So before the remapping, we have to expand indices in sf by bs, and
161: after the remapping, we have to shrink them back.
162: */
163: bas = (PetscSF_Basic *)sf->data;
164: for (i = 0; i < bas->ioffset[bas->niranks]; i++) bas->irootloc[i] = tomap[bas->irootloc[i] * bs] / bs;
165: #if defined(PETSC_HAVE_DEVICE)
166: /* Free the irootloc copy on device. We allocate a new copy and get the updated value on demand. See PetscSFLinkGetRootPackOptAndIndices() */
167: for (i = 0; i < 2; i++) PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]);
168: #endif
169: /* Destroy and then rebuild root packing optimizations since indices are changed */
170: PetscSFResetPackFields(sf);
171: PetscSFSetUpPackFields(sf);
172: return 0;
173: }
175: /*
176: Given a parallel VecScatter context, return number of procs and vector entries involved in remote (i.e., off-process) communication
178: Input Parameters:
179: + sf - the context (must be a parallel vecscatter)
180: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
182: Output parameters:
183: + num_procs - number of remote processors
184: - num_entries - number of vector entries to send or recv
186: Notes:
187: Sometimes PETSc internally needs to use the matrix-vector-multiply vecscatter context for other purposes. The client code
188: usually only uses MPI_Send/Recv. This group of subroutines provides info needed for such uses.
190: .seealso: [](sec_scatter), `VecScatterGetRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
191: */
192: PetscErrorCode VecScatterGetRemoteCount_Private(VecScatter sf, PetscBool send, PetscInt *num_procs, PetscInt *num_entries)
193: {
194: PetscInt nranks, remote_start;
195: PetscMPIInt rank;
196: const PetscInt *offset;
197: const PetscMPIInt *ranks;
199: PetscSFSetUp(sf);
200: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
202: /* This routine is mainly used for MatMult's Mvctx. In Mvctx, we scatter an MPI vector x to a sequential vector lvec.
203: Remember x is roots and lvec is leaves. 'send' means roots to leaves communication. If 'send' is true, we need to
204: get info about which ranks this processor needs to send to. In other words, we need to call PetscSFGetLeafRanks().
205: If send is false, we do the opposite, calling PetscSFGetRootRanks().
206: */
207: if (send) PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, NULL);
208: else PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, NULL, NULL);
209: if (nranks) {
210: remote_start = (rank == ranks[0]) ? 1 : 0;
211: if (num_procs) *num_procs = nranks - remote_start;
212: if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
213: } else {
214: if (num_procs) *num_procs = 0;
215: if (num_entries) *num_entries = 0;
216: }
217: return 0;
218: }
220: /* Given a parallel VecScatter context, return a plan that represents the remote communication.
221: Any output parameter can be NULL.
223: Input Parameters:
224: + sf - the context
225: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
227: Output parameters:
228: + n - number of remote processors
229: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
230: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
231: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
232: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
233: . indices - indices of entries to send/recv
234: . procs - ranks of remote processors
235: - bs - block size
237: .seealso: `VecScatterRestoreRemote_Private()`, `VecScatterGetRemoteOrdered_Private()`
238: */
239: PetscErrorCode VecScatterGetRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
240: {
241: PetscInt nranks, remote_start;
242: PetscMPIInt rank;
243: const PetscInt *offset, *location;
244: const PetscMPIInt *ranks;
246: PetscSFSetUp(sf);
247: MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank);
249: if (send) PetscSFGetLeafRanks(sf, &nranks, &ranks, &offset, &location);
250: else PetscSFGetRootRanks(sf, &nranks, &ranks, &offset, &location, NULL);
252: if (nranks) {
253: remote_start = (rank == ranks[0]) ? 1 : 0;
254: if (n) *n = nranks - remote_start;
255: if (starts) *starts = &offset[remote_start];
256: if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
257: if (procs) *procs = &ranks[remote_start];
258: } else {
259: if (n) *n = 0;
260: if (starts) *starts = NULL;
261: if (indices) *indices = NULL;
262: if (procs) *procs = NULL;
263: }
265: if (bs) *bs = 1;
266: return 0;
267: }
269: /* Given a parallel VecScatter context, return a plan that represents the remote communication. Ranks of remote
270: processors returned in procs must be sorted in ascending order. Any output parameter can be NULL.
272: Input Parameters:
273: + sf - the context
274: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
276: Output parameters:
277: + n - number of remote processors
278: . starts - starting point in indices for each proc. ATTENTION: starts[0] is not necessarily zero.
279: Therefore, expressions like starts[i+1]-starts[i] and indices[starts[i]+j] work as
280: expected for a CSR structure but buf[starts[i]+j] may be out of range if buf was allocated
281: with length starts[n]-starts[0]. One should use buf[starts[i]-starts[0]+j] instead.
282: . indices - indices of entries to send/recv
283: . procs - ranks of remote processors
284: - bs - block size
286: Notes:
287: Output parameters like starts, indices must also be adapted according to the sorted ranks.
289: .seealso: `VecScatterRestoreRemoteOrdered_Private()`, `VecScatterGetRemote_Private()`
290: */
291: PetscErrorCode VecScatterGetRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
292: {
293: VecScatterGetRemote_Private(sf, send, n, starts, indices, procs, bs);
294: if (PetscUnlikelyDebug(n && procs)) {
295: PetscInt i;
296: /* from back to front to also handle cases *n=0 */
298: }
299: return 0;
300: }
302: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemote_Private. This gives a chance for
303: an implementation to free memory allocated in the VecScatterGetRemote_Private call.
305: Input Parameters:
306: + sf - the context
307: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
309: Output parameters:
310: + n - number of remote processors
311: . starts - starting point in indices for each proc
312: . indices - indices of entries to send/recv
313: . procs - ranks of remote processors
314: - bs - block size
316: .seealso: `VecScatterGetRemote_Private()`
317: */
318: PetscErrorCode VecScatterRestoreRemote_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
319: {
320: if (starts) *starts = NULL;
321: if (indices) *indices = NULL;
322: if (procs) *procs = NULL;
323: return 0;
324: }
326: /* Given a parallel VecScatter context, restore the plan returned by VecScatterGetRemoteOrdered_Private. This gives a chance for
327: an implementation to free memory allocated in the VecScatterGetRemoteOrdered_Private call.
329: Input Parameters:
330: + sf - the context
331: - send - true to select the send info (i.e., todata), otherwise to select the recv info (i.e., fromdata)
333: Output parameters:
334: + n - number of remote processors
335: . starts - starting point in indices for each proc
336: . indices - indices of entries to send/recv
337: . procs - ranks of remote processors
338: - bs - block size
340: .seealso: `VecScatterGetRemoteOrdered_Private()`
341: */
342: PetscErrorCode VecScatterRestoreRemoteOrdered_Private(VecScatter sf, PetscBool send, PetscInt *n, const PetscInt **starts, const PetscInt **indices, const PetscMPIInt **procs, PetscInt *bs)
343: {
344: VecScatterRestoreRemote_Private(sf, send, n, starts, indices, procs, bs);
345: return 0;
346: }
348: /*@
349: VecScatterSetUp - Sets up the `VecScatter` to be able to actually scatter information between vectors
351: Collective on sf
353: Input Parameter:
354: . sf - the scatter context
356: Level: intermediate
358: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
359: @*/
360: PetscErrorCode VecScatterSetUp(VecScatter sf)
361: {
362: PetscSFSetUp(sf);
363: return 0;
364: }
366: /*@C
367: VecScatterSetType - Builds a vector scatter, for a particular vector scatter implementation.
369: Collective on sf
371: Input Parameters:
372: + sf - The `VecScatter` object
373: - type - The name of the vector scatter type
375: Options Database Key:
376: . -sf_type <type> - Sets the `VecScatterType`
378: Level: intermediate
380: Note:
381: Use `VecScatterDuplicate()` to form additional vectors scatter of the same type as an existing vector scatter.
383: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterGetType()`, `VecScatterCreate()`
384: @*/
385: PetscErrorCode VecScatterSetType(VecScatter sf, VecScatterType type)
386: {
387: PetscSFSetType(sf, type);
388: return 0;
389: }
391: /*@C
392: VecScatterGetType - Gets the vector scatter type name (as a string) from the `VecScatter`.
394: Not Collective
396: Input Parameter:
397: . sf - The vector scatter
399: Output Parameter:
400: . type - The vector scatter type name
402: Level: intermediate
404: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterSetType()`, `VecScatterCreate()`
405: @*/
406: PetscErrorCode VecScatterGetType(VecScatter sf, VecScatterType *type)
407: {
408: PetscSFGetType(sf, type);
409: return 0;
410: }
412: /*@C
413: VecScatterRegister - Adds a new vector scatter component implementation
415: Not Collective
417: Input Parameters:
418: + name - The name of a new user-defined creation routine
419: - create_func - The creation routine itself
421: Level: advanced
423: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecRegister()`
424: @*/
425: PetscErrorCode VecScatterRegister(const char sname[], PetscErrorCode (*function)(VecScatter))
426: {
427: PetscSFRegister(sname, function);
428: return 0;
429: }
431: /* ------------------------------------------------------------------*/
432: /*@
433: VecScatterGetMerged - Returns true if the scatter is completed in the `VecScatterBegin()`
434: and the `VecScatterEnd()` does nothing
436: Not Collective
438: Input Parameter:
439: . sf - scatter context created with `VecScatterCreate()`
441: Output Parameter:
442: . flg - `PETSC_TRUE` if the `VecScatterBegin()`/`VecScatterEnd()` are all done during the `VecScatterBegin()`
444: Level: developer
446: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`, `VecScatterBegin()`
447: @*/
448: PetscErrorCode VecScatterGetMerged(VecScatter sf, PetscBool *flg)
449: {
451: if (flg) *flg = sf->vscat.beginandendtogether;
452: return 0;
453: }
454: /*@C
455: VecScatterDestroy - Destroys a scatter context created by `VecScatterCreate()`
457: Collective on sf
459: Input Parameter:
460: . sf - the scatter context
462: Level: intermediate
464: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCopy()`
465: @*/
466: PetscErrorCode VecScatterDestroy(VecScatter *sf)
467: {
468: PetscSFDestroy(sf);
469: return 0;
470: }
472: /*@
473: VecScatterCopy - Makes a copy of a scatter context.
475: Collective on sf
477: Input Parameter:
478: . sf - the scatter context
480: Output Parameter:
481: . newsf - the context copy
483: Level: advanced
485: .seealso: [](sec_scatter), `VecScatter`, `VecScatterType`, `VecScatterCreate()`, `VecScatterDestroy()`
486: @*/
487: PetscErrorCode VecScatterCopy(VecScatter sf, VecScatter *newsf)
488: {
490: PetscSFDuplicate(sf, PETSCSF_DUPLICATE_GRAPH, newsf);
491: PetscSFSetUp(*newsf);
492: return 0;
493: }
495: /*@C
496: VecScatterViewFromOptions - View a `VecScatter` object based on the options database
498: Collective on sf
500: Input Parameters:
501: + sf - the scatter context
502: . obj - Optional object
503: - name - command line option
505: Level: intermediate
507: .seealso: [](sec_scatter), `VecScatter`, `VecScatterView()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
508: @*/
509: PetscErrorCode VecScatterViewFromOptions(VecScatter sf, PetscObject obj, const char name[])
510: {
512: PetscObjectViewFromOptions((PetscObject)sf, obj, name);
513: return 0;
514: }
516: /* ------------------------------------------------------------------*/
517: /*@C
518: VecScatterView - Views a vector scatter context.
520: Collective on sf
522: Input Parameters:
523: + sf - the scatter context
524: - viewer - the viewer for displaying the context
526: Level: intermediate
528: .seealso: [](sec_scatter), `VecScatter`, `VecScatterViewFromOptions()`, `PetscObjectViewFromOptions()`, `VecScatterCreate()`
529: @*/
530: PetscErrorCode VecScatterView(VecScatter sf, PetscViewer viewer)
531: {
532: PetscSFView(sf, viewer);
533: return 0;
534: }
536: /*@C
537: VecScatterRemap - Remaps the "from" and "to" indices in a
538: vector scatter context. FOR EXPERTS ONLY!
540: Collective on sf
542: Input Parameters:
543: + sf - vector scatter context
544: . tomap - remapping plan for "to" indices (may be NULL).
545: - frommap - remapping plan for "from" indices (may be NULL)
547: Level: developer
549: Notes:
550: In the parallel case the todata contains indices from where the data is taken
551: (and then sent to others)! The fromdata contains indices from where the received
552: data is finally put locally.
554: In the sequential case the todata contains indices from where the data is put
555: and the fromdata contains indices from where the data is taken from.
556: This is backwards from the parallel case!
558: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`
559: @*/
560: PetscErrorCode VecScatterRemap(VecScatter sf, PetscInt tomap[], PetscInt frommap[])
561: {
564: VecScatterRemap_Internal(sf, tomap, frommap);
566: /* Mark then vector lengths as unknown because we do not know the lengths of the remapped vectors */
567: sf->vscat.from_n = -1;
568: sf->vscat.to_n = -1;
569: return 0;
570: }
572: /*@
573: VecScatterSetFromOptions - Configures the vector scatter from the options database.
575: Collective
577: Input Parameter:
578: . sf - The vector scatter
580: Notes:
581: To see all options, run your program with the -help option, or consult the users manual.
583: Must be called before `VecScatterSetUp()` but before the vector scatter is used.
585: Level: beginner
587: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterDestroy()`, `VecScatterSetUp()`
588: @*/
589: PetscErrorCode VecScatterSetFromOptions(VecScatter sf)
590: {
592: PetscObjectOptionsBegin((PetscObject)sf);
594: sf->vscat.beginandendtogether = PETSC_FALSE;
595: PetscOptionsBool("-vecscatter_merge", "Use combined (merged) vector scatter begin and end", "VecScatterCreate", sf->vscat.beginandendtogether, &sf->vscat.beginandendtogether, NULL);
596: if (sf->vscat.beginandendtogether) PetscInfo(sf, "Using combined (merged) vector scatter begin and end\n");
597: PetscOptionsEnd();
598: return 0;
599: }
601: /* ---------------------------------------------------------------- */
602: /*@
603: VecScatterCreate - Creates a vector scatter context.
605: Collective
607: Input Parameters:
608: + xin - a vector that defines the shape (parallel data layout of the vector)
609: of vectors from which we scatter
610: . yin - a vector that defines the shape (parallel data layout of the vector)
611: of vectors to which we scatter
612: . ix - the indices of xin to scatter (if NULL scatters all values)
613: - iy - the indices of yin to hold results (if NULL fills entire vector yin)
615: Output Parameter:
616: . newsf - location to store the new scatter context
618: Options Database Keys:
619: + -vecscatter_view - Prints detail of communications
620: . -vecscatter_view ::ascii_info - Print less details about communication
621: - -vecscatter_merge - `VecScatterBegin()` handles all of the communication, `VecScatterEnd()` is a nop
622: eliminates the chance for overlap of computation and communication
624: Level: intermediate
626: Notes:
627: If both xin and yin are parallel, their communicator must be on the same
628: set of processes, but their process order can be different.
629: In calls to the scatter options you can use different vectors than the xin and
630: yin you used above; BUT they must have the same parallel data layout, for example,
631: they could be obtained from `VecDuplicate()`.
632: A VecScatter context CANNOT be used in two or more simultaneous scatters;
633: that is you cannot call a second `VecScatterBegin()` with the same scatter
634: context until the `VecScatterEnd()` has been called on the first `VecScatterBegin()`.
635: In this case a separate `VecScatter` is needed for each concurrent scatter.
637: Both ix and iy cannot be NULL at the same time.
639: Use `VecScatterCreateToAll()` to create a vecscatter that copies an MPI vector to sequential vectors on all MPI ranks.
640: Use `VecScatterCreateToZero()` to create a vecscatter that copies an MPI vector to a sequential vector on MPI rank 0.
641: These special vecscatters have better performance than general ones.
643: .seealso: [](sec_scatter), `VecScatter`, `VecScatterDestroy()`, `VecScatterCreateToAll()`, `VecScatterCreateToZero()`, `PetscSFCreate()`
644: @*/
645: PetscErrorCode VecScatterCreate(Vec x, IS ix, Vec y, IS iy, VecScatter *newsf)
646: {
647: MPI_Comm xcomm, ycomm, bigcomm;
648: Vec xx, yy;
649: IS ix_old = ix, iy_old = iy, ixx, iyy;
650: PetscMPIInt xcommsize, ycommsize, rank, result;
651: PetscInt i, n, N, nroots, nleaves, *ilocal, xstart, ystart, ixsize, iysize, xlen, ylen;
652: const PetscInt *xindices, *yindices;
653: PetscSFNode *iremote;
654: PetscLayout xlayout, ylayout;
655: ISTypeID ixid, iyid;
656: PetscInt bs, bsx, bsy, min, max, m[2], ixfirst, ixstep, iyfirst, iystep;
657: PetscBool can_do_block_opt = PETSC_FALSE;
658: PetscSF sf;
663: /* Get comm from x and y */
664: PetscObjectGetComm((PetscObject)x, &xcomm);
665: MPI_Comm_size(xcomm, &xcommsize);
666: PetscObjectGetComm((PetscObject)y, &ycomm);
667: MPI_Comm_size(ycomm, &ycommsize);
668: if (xcommsize > 1 && ycommsize > 1) {
669: MPI_Comm_compare(xcomm, ycomm, &result);
671: }
672: bs = 1; /* default, no blocking */
674: /*
675: Let P and S stand for parallel and sequential vectors respectively. There are four combinations of vecscatters: PtoP, PtoS,
676: StoP and StoS. The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix
677: contains global indices of x. If x is sequential, ix contains local indices of x. Similarly for y and iy.
679: SF builds around concepts of local leaves and remote roots. We treat source vector x as roots and destination vector y as
680: leaves. A PtoS scatter can be naturally mapped to SF. We transform PtoP and StoP to PtoS, and treat StoS as trivial PtoS.
681: */
683: /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
684: if (!ix) {
685: if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
686: VecGetSize(x, &N);
687: ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &ix);
688: } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
689: VecGetLocalSize(x, &n);
690: VecGetOwnershipRange(x, &xstart, NULL);
691: ISCreateStride(PETSC_COMM_SELF, n, xstart, 1, &ix);
692: }
693: }
695: if (!iy) {
696: if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
697: VecGetSize(y, &N);
698: ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iy);
699: } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
700: VecGetLocalSize(y, &n);
701: VecGetOwnershipRange(y, &ystart, NULL);
702: ISCreateStride(PETSC_COMM_SELF, n, ystart, 1, &iy);
703: }
704: }
706: /* Do error checking immediately after we have non-empty ix, iy */
707: ISGetLocalSize(ix, &ixsize);
708: ISGetLocalSize(iy, &iysize);
709: VecGetSize(x, &xlen);
710: VecGetSize(y, &ylen);
712: ISGetMinMax(ix, &min, &max);
714: ISGetMinMax(iy, &min, &max);
717: /* Extract info about ix, iy for further test */
718: ISGetTypeID_Private(ix, &ixid);
719: ISGetTypeID_Private(iy, &iyid);
720: if (ixid == IS_BLOCK) ISGetBlockSize(ix, &bsx);
721: else if (ixid == IS_STRIDE) ISStrideGetInfo(ix, &ixfirst, &ixstep);
723: if (iyid == IS_BLOCK) ISGetBlockSize(iy, &bsy);
724: else if (iyid == IS_STRIDE) ISStrideGetInfo(iy, &iyfirst, &iystep);
726: /* Check if a PtoS is special ToAll/ToZero scatters, which can be results of VecScatterCreateToAll/Zero.
727: ToAll means a whole MPI vector is copied to a seq vector on every process. ToZero means a whole MPI
728: vector is copied to a seq vector on rank 0 and other processes do nothing(i.e.,they input empty ix,iy).
730: We can optimize these scatters with MPI collectives. We can also avoid costly analysis used for general scatters.
731: */
732: if (xcommsize > 1 && ycommsize == 1) { /* Ranks do not diverge at this if-test */
733: PetscInt pattern[2] = {0, 0}; /* A boolean array with pattern[0] for allgather-like (ToAll) and pattern[1] for gather-like (ToZero) */
734: PetscLayout map;
736: MPI_Comm_rank(xcomm, &rank);
737: VecGetLayout(x, &map);
738: if (rank == 0) {
739: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
740: /* Rank 0 scatters the whole mpi x to seq y, so it is either a ToAll or a ToZero candidate in its view */
741: pattern[0] = pattern[1] = 1;
742: }
743: } else {
744: if (ixid == IS_STRIDE && iyid == IS_STRIDE && ixsize == xlen && ixfirst == 0 && ixstep == 1 && iyfirst == 0 && iystep == 1) {
745: /* Other ranks also scatter the whole mpi x to seq y, so it is a ToAll candidate in their view */
746: pattern[0] = 1;
747: } else if (ixsize == 0) {
748: /* Other ranks do nothing, so it is a ToZero candidate */
749: pattern[1] = 1;
750: }
751: }
753: /* One stone (the expensive allreduce) two birds: pattern[] tells if it is ToAll or ToZero */
754: MPIU_Allreduce(MPI_IN_PLACE, pattern, 2, MPIU_INT, MPI_LAND, xcomm);
756: if (pattern[0] || pattern[1]) {
757: PetscSFCreate(xcomm, &sf);
758: PetscSFSetFromOptions(sf);
759: PetscSFSetGraphWithPattern(sf, map, pattern[0] ? PETSCSF_PATTERN_ALLGATHER : PETSCSF_PATTERN_GATHER);
760: goto functionend; /* No further analysis needed. What a big win! */
761: }
762: }
764: /* Continue ...
765: Do block optimization by taking advantage of high level info available in ix, iy.
766: The block optimization is valid when all of the following conditions are met:
767: 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
768: 2) ix, iy have the same block size;
769: 3) all processors agree on one block size;
770: 4) no blocks span more than one process;
771: */
772: bigcomm = (xcommsize == 1) ? ycomm : xcomm;
774: /* Processors could go through different path in this if-else test */
775: m[0] = m[1] = PETSC_MPI_INT_MIN;
776: if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
777: m[0] = PetscMax(bsx, bsy);
778: m[1] = -PetscMin(bsx, bsy);
779: } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep == 1 && iyfirst % bsx == 0) {
780: m[0] = bsx;
781: m[1] = -bsx;
782: } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep == 1 && ixfirst % bsy == 0) {
783: m[0] = bsy;
784: m[1] = -bsy;
785: }
786: /* Get max and min of bsx,bsy over all processes in one allreduce */
787: MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_MAX, bigcomm);
788: max = m[0];
789: min = -m[1];
791: /* Since we used allreduce above, all ranks will have the same min and max. min==max
792: implies all ranks have the same bs. Do further test to see if local vectors are dividable
793: by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
794: */
795: if (min == max && min > 1) {
796: VecGetLocalSize(x, &xlen);
797: VecGetLocalSize(y, &ylen);
798: m[0] = xlen % min;
799: m[1] = ylen % min;
800: MPIU_Allreduce(MPI_IN_PLACE, m, 2, MPIU_INT, MPI_LOR, bigcomm);
801: if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
802: }
804: /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
805: and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
806: indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
808: xx is a little special. If x is seq, then xx is the concatenation of seq x's on ycomm. In this way,
809: we can treat PtoP and StoP uniformly as PtoS.
810: */
811: if (can_do_block_opt) {
812: const PetscInt *indices;
814: /* Shrink x and ix */
815: bs = min;
816: VecCreateMPIWithArray(bigcomm, 1, xlen / bs, PETSC_DECIDE, NULL, &xx); /* We only care xx's layout */
817: if (ixid == IS_BLOCK) {
818: ISBlockGetIndices(ix, &indices);
819: ISBlockGetLocalSize(ix, &ixsize);
820: ISCreateGeneral(PETSC_COMM_SELF, ixsize, indices, PETSC_COPY_VALUES, &ixx);
821: ISBlockRestoreIndices(ix, &indices);
822: } else { /* ixid == IS_STRIDE */
823: ISGetLocalSize(ix, &ixsize);
824: ISCreateStride(PETSC_COMM_SELF, ixsize / bs, ixfirst / bs, 1, &ixx);
825: }
827: /* Shrink y and iy */
828: VecCreateMPIWithArray(ycomm, 1, ylen / bs, PETSC_DECIDE, NULL, &yy);
829: if (iyid == IS_BLOCK) {
830: ISBlockGetIndices(iy, &indices);
831: ISBlockGetLocalSize(iy, &iysize);
832: ISCreateGeneral(PETSC_COMM_SELF, iysize, indices, PETSC_COPY_VALUES, &iyy);
833: ISBlockRestoreIndices(iy, &indices);
834: } else { /* iyid == IS_STRIDE */
835: ISGetLocalSize(iy, &iysize);
836: ISCreateStride(PETSC_COMM_SELF, iysize / bs, iyfirst / bs, 1, &iyy);
837: }
838: } else {
839: ixx = ix;
840: iyy = iy;
841: yy = y;
842: if (xcommsize == 1) VecCreateMPIWithArray(bigcomm, 1, xlen, PETSC_DECIDE, NULL, &xx);
843: else xx = x;
844: }
846: /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
847: ISGetIndices(ixx, &xindices);
848: ISGetIndices(iyy, &yindices);
849: VecGetLayout(xx, &xlayout);
851: if (ycommsize > 1) {
852: /* PtoP or StoP */
854: /* Below is a piece of complex code with a very simple goal: move global index pairs (xindices[i], yindices[i]),
855: to owner process of yindices[i] according to ylayout, i = 0..n.
857: I did it through a temp sf, but later I thought the old design was inefficient and also distorted log view.
858: We want to map one VecScatterCreate() call to one PetscSFCreate() call. The old design mapped to three
859: PetscSFCreate() calls. This code is on critical path of VecScatterSetUp and is used by every VecScatterCreate.
860: So I commented it out and did another optimized implementation. The commented code is left here for reference.
861: */
862: #if 0
863: const PetscInt *degree;
864: PetscSF tmpsf;
865: PetscInt inedges=0,*leafdata,*rootdata;
867: VecGetOwnershipRange(xx,&xstart,NULL);
868: VecGetLayout(yy,&ylayout);
869: VecGetOwnershipRange(yy,&ystart,NULL);
871: VecGetLocalSize(yy,&nroots);
872: ISGetLocalSize(iyy,&nleaves);
873: PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);
875: for (i=0; i<nleaves; i++) {
876: PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);
877: leafdata[2*i] = yindices[i];
878: leafdata[2*i+1] = (xcommsize > 1)? xindices[i] : xindices[i] + xstart;
879: }
881: PetscSFCreate(ycomm,&tmpsf);
882: PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
884: PetscSFComputeDegreeBegin(tmpsf,°ree);
885: PetscSFComputeDegreeEnd(tmpsf,°ree);
887: for (i=0; i<nroots; i++) inedges += degree[i];
888: PetscMalloc1(inedges*2,&rootdata);
889: PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
890: PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);
892: PetscFree2(iremote,leafdata);
893: PetscSFDestroy(&tmpsf);
895: /* rootdata contains global index pairs (i, j). j's are owned by the current process, but i's can point to anywhere.
896: We convert j to local, and convert i to (rank, index). In the end, we get an PtoS suitable for building SF.
897: */
898: nleaves = inedges;
899: VecGetLocalSize(xx,&nroots);
900: PetscMalloc1(nleaves,&ilocal);
901: PetscMalloc1(nleaves,&iremote);
903: for (i=0; i<inedges; i++) {
904: ilocal[i] = rootdata[2*i] - ystart; /* convert y's global index to local index */
905: PetscLayoutFindOwnerIndex(xlayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert x's global index to (rank, index) */
906: }
907: PetscFree(rootdata);
908: #else
909: PetscInt j, k, n, disp, rlentotal, *sstart, *xindices_sorted, *yindices_sorted;
910: const PetscInt *yrange;
911: PetscMPIInt nsend, nrecv, nreq, yrank, *sendto, *recvfrom, tag1, tag2;
912: PetscInt *slens, *rlens, count;
913: PetscInt *rxindices, *ryindices;
914: MPI_Request *reqs, *sreqs, *rreqs;
916: /* Sorting makes code simpler, faster and also helps getting rid of many O(P) arrays, which hurt scalability at large scale
917: yindices_sorted - sorted yindices
918: xindices_sorted - xindices sorted along with yindces
919: */
920: ISGetLocalSize(ixx, &n); /*ixx, iyy have the same local size */
921: PetscMalloc2(n, &xindices_sorted, n, &yindices_sorted);
922: PetscArraycpy(xindices_sorted, xindices, n);
923: PetscArraycpy(yindices_sorted, yindices, n);
924: PetscSortIntWithArray(n, yindices_sorted, xindices_sorted);
925: VecGetOwnershipRange(xx, &xstart, NULL);
926: if (xcommsize == 1) {
927: for (i = 0; i < n; i++) xindices_sorted[i] += xstart;
928: } /* Convert to global indices */
930: /*=============================================================================
931: Calculate info about messages I need to send
932: =============================================================================*/
933: /* nsend - number of non-empty messages to send
934: sendto - [nsend] ranks I will send messages to
935: sstart - [nsend+1] sstart[i] is the start index in xsindices_sorted[] I send to rank sendto[i]
936: slens - [ycommsize] I want to send slens[i] entries to rank i.
937: */
938: VecGetLayout(yy, &ylayout);
939: PetscLayoutGetRanges(ylayout, &yrange);
940: PetscCalloc1(ycommsize, &slens); /* The only O(P) array in this algorithm */
942: i = j = nsend = 0;
943: while (i < n) {
944: if (yindices_sorted[i] >= yrange[j + 1]) { /* If i-th index is out of rank j's bound */
945: do {
946: j++;
947: } while (yindices_sorted[i] >= yrange[j + 1] && j < ycommsize); /* Increase j until i-th index falls in rank j's bound */
949: }
950: i++;
951: if (!slens[j]++) nsend++;
952: }
954: PetscMalloc2(nsend + 1, &sstart, nsend, &sendto);
956: sstart[0] = 0;
957: for (i = j = 0; i < ycommsize; i++) {
958: if (slens[i]) {
959: sendto[j] = (PetscMPIInt)i;
960: sstart[j + 1] = sstart[j] + slens[i];
961: j++;
962: }
963: }
965: /*=============================================================================
966: Calculate the reverse info about messages I will recv
967: =============================================================================*/
968: /* nrecv - number of messages I will recv
969: recvfrom - [nrecv] ranks I recv from
970: rlens - [nrecv] I will recv rlens[i] entries from rank recvfrom[i]
971: rlentotal - sum of rlens[]
972: rxindices - [rlentotal] recv buffer for xindices_sorted
973: ryindices - [rlentotal] recv buffer for yindices_sorted
974: */
975: PetscGatherNumberOfMessages_Private(ycomm, NULL, slens, &nrecv);
976: PetscGatherMessageLengths_Private(ycomm, nsend, nrecv, slens, &recvfrom, &rlens);
977: PetscFree(slens); /* Free the O(P) array ASAP */
978: rlentotal = 0;
979: for (i = 0; i < nrecv; i++) rlentotal += rlens[i];
981: /*=============================================================================
982: Communicate with processors in recvfrom[] to populate rxindices and ryindices
983: ============================================================================*/
984: PetscCommGetNewTag(ycomm, &tag1);
985: PetscCommGetNewTag(ycomm, &tag2);
986: PetscMalloc2(rlentotal, &rxindices, rlentotal, &ryindices);
987: PetscMPIIntCast((nsend + nrecv) * 2, &nreq);
988: PetscMalloc1(nreq, &reqs);
989: sreqs = reqs;
990: rreqs = reqs + nsend * 2;
992: for (i = disp = 0; i < nrecv; i++) {
993: count = rlens[i];
994: MPIU_Irecv(rxindices + disp, count, MPIU_INT, recvfrom[i], tag1, ycomm, rreqs + i);
995: MPIU_Irecv(ryindices + disp, count, MPIU_INT, recvfrom[i], tag2, ycomm, rreqs + nrecv + i);
996: disp += rlens[i];
997: }
999: for (i = 0; i < nsend; i++) {
1000: count = sstart[i + 1] - sstart[i];
1001: MPIU_Isend(xindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag1, ycomm, sreqs + i);
1002: MPIU_Isend(yindices_sorted + sstart[i], count, MPIU_INT, sendto[i], tag2, ycomm, sreqs + nsend + i);
1003: }
1004: MPI_Waitall(nreq, reqs, MPI_STATUS_IGNORE);
1006: /* Transform VecScatter into SF */
1007: nleaves = rlentotal;
1008: PetscMalloc1(nleaves, &ilocal);
1009: PetscMalloc1(nleaves, &iremote);
1010: MPI_Comm_rank(ycomm, &yrank);
1011: for (i = disp = 0; i < nrecv; i++) {
1012: for (j = 0; j < rlens[i]; j++) {
1013: k = disp + j; /* k-th index pair */
1014: ilocal[k] = ryindices[k] - yrange[yrank]; /* Convert y's global index to local index */
1015: PetscLayoutFindOwnerIndex(xlayout, rxindices[k], &rank, &iremote[k].index); /* Convert x's global index to (rank, index) */
1016: iremote[k].rank = rank;
1017: }
1018: disp += rlens[i];
1019: }
1021: PetscFree2(sstart, sendto);
1022: PetscFree(rlens);
1023: PetscFree(recvfrom);
1024: PetscFree(reqs);
1025: PetscFree2(rxindices, ryindices);
1026: PetscFree2(xindices_sorted, yindices_sorted);
1027: #endif
1028: } else {
1029: /* PtoS or StoS */
1030: ISGetLocalSize(iyy, &nleaves);
1031: PetscMalloc1(nleaves, &ilocal);
1032: PetscMalloc1(nleaves, &iremote);
1033: PetscArraycpy(ilocal, yindices, nleaves);
1034: for (i = 0; i < nleaves; i++) {
1035: PetscLayoutFindOwnerIndex(xlayout, xindices[i], &rank, &iremote[i].index);
1036: iremote[i].rank = rank;
1037: }
1038: }
1040: /* MUST build SF on xx's comm, which is not necessarily identical to yy's comm.
1041: In SF's view, xx contains the roots (i.e., the remote) and iremote[].rank are ranks in xx's comm.
1042: yy contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
1043: PetscSFCreate(PetscObjectComm((PetscObject)xx), &sf);
1044: sf->allow_multi_leaves = PETSC_TRUE;
1045: PetscSFSetFromOptions(sf);
1046: VecGetLocalSize(xx, &nroots);
1047: PetscSFSetGraph(sf, nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER); /* Give ilocal/iremote to petsc and no need to free them here */
1049: /* Free memory no longer needed */
1050: ISRestoreIndices(ixx, &xindices);
1051: ISRestoreIndices(iyy, &yindices);
1052: if (can_do_block_opt) {
1053: VecDestroy(&xx);
1054: VecDestroy(&yy);
1055: ISDestroy(&ixx);
1056: ISDestroy(&iyy);
1057: } else if (xcommsize == 1) {
1058: VecDestroy(&xx);
1059: }
1061: functionend:
1062: sf->vscat.bs = bs;
1063: if (sf->vscat.bs > 1) {
1064: MPI_Type_contiguous(sf->vscat.bs, MPIU_SCALAR, &sf->vscat.unit);
1065: MPI_Type_commit(&sf->vscat.unit);
1066: } else {
1067: sf->vscat.unit = MPIU_SCALAR;
1068: }
1069: VecGetLocalSize(x, &sf->vscat.from_n);
1070: VecGetLocalSize(y, &sf->vscat.to_n);
1071: if (!ix_old) ISDestroy(&ix); /* We created helper ix, iy. Free them */
1072: if (!iy_old) ISDestroy(&iy);
1074: /* Set default */
1075: VecScatterSetFromOptions(sf);
1077: *newsf = sf;
1078: return 0;
1079: }
1081: /*@C
1082: VecScatterCreateToAll - Creates a vector and a scatter context that copies all
1083: vector values to each processor
1085: Collective
1087: Input Parameter:
1088: . vin - an `MPIVEC`
1090: Output Parameters:
1091: + ctx - scatter context
1092: - vout - output `SEQVEC` that is large enough to scatter into
1094: Level: intermediate
1096: Usage:
1097: .vb
1098: VecScatterCreateToAll(vin,&ctx,&vout);
1100: // scatter as many times as you need
1101: VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1102: VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1104: // destroy scatter context and local vector when no longer needed
1105: VecScatterDestroy(&ctx);
1106: VecDestroy(&vout);
1107: .ve
1109: Notes:
1110: vout may be NULL [`PETSC_NULL_VEC` from fortran] if you do not
1111: need to have it created
1113: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1114: automatically (unless you pass NULL in for that argument if you do not need it).
1116: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToZero()`, `VecScatterBegin()`, `VecScatterEnd()`
1117: @*/
1118: PetscErrorCode VecScatterCreateToAll(Vec vin, VecScatter *ctx, Vec *vout)
1119: {
1120: PetscInt N;
1121: IS is;
1122: Vec tmp;
1123: Vec *tmpv;
1124: PetscBool tmpvout = PETSC_FALSE;
1125: VecType roottype;
1130: if (vout) {
1132: tmpv = vout;
1133: } else {
1134: tmpvout = PETSC_TRUE;
1135: tmpv = &tmp;
1136: }
1138: /* Create seq vec on each proc, with the same size of the original vec */
1139: VecGetSize(vin, &N);
1140: VecGetRootType_Private(vin, &roottype);
1141: VecCreate(PETSC_COMM_SELF, tmpv);
1142: VecSetSizes(*tmpv, N, PETSC_DECIDE);
1143: VecSetType(*tmpv, roottype);
1144: /* Create the VecScatter ctx with the communication info */
1145: ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is);
1146: VecScatterCreate(vin, is, *tmpv, is, ctx);
1147: ISDestroy(&is);
1148: if (tmpvout) VecDestroy(tmpv);
1149: return 0;
1150: }
1152: /*@C
1153: VecScatterCreateToZero - Creates an output vector and a scatter context used to
1154: copy all vector values into the output vector on the zeroth processor
1156: Collective
1158: Input Parameter:
1159: . vin - `Vec` of type `MPIVEC`
1161: Output Parameters:
1162: + ctx - scatter context
1163: - vout - output `SEQVEC` that is large enough to scatter into on processor 0 and
1164: of length zero on all other processors
1166: Level: intermediate
1168: Usage:
1169: .vb
1170: VecScatterCreateToZero(vin,&ctx,&vout);
1172: // scatter as many times as you need
1173: VecScatterBegin(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1174: VecScatterEnd(ctx,vin,vout,INSERT_VALUES,SCATTER_FORWARD);
1176: // destroy scatter context and local vector when no longer needed
1177: VecScatterDestroy(&ctx);
1178: VecDestroy(&vout);
1179: .ve
1181: Notes:
1182: vout may be NULL [`PETSC_NULL_VEC` from fortran] if you do not
1183: need to have it created
1185: Do NOT create a vector and then pass it in as the final argument vout! vout is created by this routine
1186: automatically (unless you pass NULL in for that argument if you do not need it).
1188: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterCreateToAll()`, `VecScatterBegin()`, `VecScatterEnd()`
1189: @*/
1190: PetscErrorCode VecScatterCreateToZero(Vec vin, VecScatter *ctx, Vec *vout)
1191: {
1192: PetscInt N;
1193: PetscMPIInt rank;
1194: IS is;
1195: Vec tmp;
1196: Vec *tmpv;
1197: PetscBool tmpvout = PETSC_FALSE;
1198: VecType roottype;
1203: if (vout) {
1205: tmpv = vout;
1206: } else {
1207: tmpvout = PETSC_TRUE;
1208: tmpv = &tmp;
1209: }
1211: /* Create vec on each proc, with the same size of the original vec all on process 0 */
1212: VecGetSize(vin, &N);
1213: MPI_Comm_rank(PetscObjectComm((PetscObject)vin), &rank);
1214: if (rank) N = 0;
1215: VecGetRootType_Private(vin, &roottype);
1216: VecCreate(PETSC_COMM_SELF, tmpv);
1217: VecSetSizes(*tmpv, N, PETSC_DECIDE);
1218: VecSetType(*tmpv, roottype);
1219: /* Create the VecScatter ctx with the communication info */
1220: ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &is);
1221: VecScatterCreate(vin, is, *tmpv, is, ctx);
1222: ISDestroy(&is);
1223: if (tmpvout) VecDestroy(tmpv);
1224: return 0;
1225: }
1227: /*@
1228: VecScatterBegin - Begins a generalized scatter from one vector to
1229: another. Complete the scattering phase with `VecScatterEnd()`.
1231: Neighbor-wise Collective
1233: Input Parameters:
1234: + sf - scatter context generated by VecScatterCreate()
1235: . x - the vector from which we scatter
1236: . y - the vector to which we scatter
1237: . addv - either `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`, with `INSERT_VALUES` mode any location
1238: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1239: - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD` or `SCATTER_REVERSE`
1241: Level: intermediate
1243: Notes:
1244: The vectors x and y need not be the same vectors used in the call
1245: to `VecScatterCreate()`, but x must have the same parallel data layout
1246: as that passed in as the x to `VecScatterCreate()`, similarly for the y.
1247: Most likely they have been obtained from VecDuplicate().
1249: You cannot change the values in the input vector between the calls to `VecScatterBegin()`
1250: and `VecScatterEnd()`.
1252: If you use `SCATTER_REVERSE` the two arguments x and y should be reversed, from
1253: the `SCATTER_FORWARD`.
1255: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1257: This scatter is far more general than the conventional
1258: scatter, since it can be a gather or a scatter or a combination,
1259: depending on the indices ix and iy. If x is a parallel vector and y
1260: is sequential, VecScatterBegin() can serve to gather values to a
1261: single processor. Similarly, if y is parallel and x sequential, the
1262: routine can scatter from one processor to many processors.
1264: .seealso: [](sec_scatter), `VecScatter`, `VecScatterCreate()`, `VecScatterEnd()`
1265: @*/
1266: PetscErrorCode VecScatterBegin(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1267: {
1268: PetscInt to_n, from_n;
1273: if (PetscDefined(USE_DEBUG)) {
1274: /*
1275: Error checking to make sure these vectors match the vectors used
1276: to create the vector scatter context. -1 in the from_n and to_n indicate the
1277: vector lengths are unknown (for example with mapped scatters) and thus
1278: no error checking is performed.
1279: */
1280: if (sf->vscat.from_n >= 0 && sf->vscat.to_n >= 0) {
1281: VecGetLocalSize(x, &from_n);
1282: VecGetLocalSize(y, &to_n);
1283: if (mode & SCATTER_REVERSE) {
1286: } else {
1289: }
1290: }
1291: }
1293: sf->vscat.logging = PETSC_TRUE;
1294: PetscLogEventBegin(VEC_ScatterBegin, sf, x, y, 0);
1295: VecScatterBegin_Internal(sf, x, y, addv, mode);
1296: if (sf->vscat.beginandendtogether) VecScatterEnd_Internal(sf, x, y, addv, mode);
1297: PetscLogEventEnd(VEC_ScatterBegin, sf, x, y, 0);
1298: sf->vscat.logging = PETSC_FALSE;
1299: return 0;
1300: }
1302: /*@
1303: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1304: after first calling `VecScatterBegin()`.
1306: Neighbor-wise Collective
1308: Input Parameters:
1309: + sf - scatter context generated by `VecScatterCreate()`
1310: . x - the vector from which we scatter
1311: . y - the vector to which we scatter
1312: . addv - one of `ADD_VALUES`, `MAX_VALUES`, `MIN_VALUES` or `INSERT_VALUES`
1313: - mode - the scattering mode, usually `SCATTER_FORWARD`. The available modes are: `SCATTER_FORWARD`, `SCATTER_REVERSE`
1315: Level: intermediate
1317: Notes:
1318: If you use `SCATTER_REVERSE` the arguments x and y should be reversed, from the `SCATTER_FORWARD`.
1320: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1322: .seealso: [](sec_scatter), `VecScatter`, `VecScatterBegin()`, `VecScatterCreate()`
1323: @*/
1324: PetscErrorCode VecScatterEnd(VecScatter sf, Vec x, Vec y, InsertMode addv, ScatterMode mode)
1325: {
1329: if (!sf->vscat.beginandendtogether) {
1330: sf->vscat.logging = PETSC_TRUE;
1331: PetscLogEventBegin(VEC_ScatterEnd, sf, x, y, 0);
1332: VecScatterEnd_Internal(sf, x, y, addv, mode);
1333: PetscLogEventEnd(VEC_ScatterEnd, sf, x, y, 0);
1334: sf->vscat.logging = PETSC_FALSE;
1335: }
1336: return 0;
1337: }