Actual source code: pdvec.c
1: /*
2: Code for some of the parallel vector primitives.
3: */
4: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
5: #include <petsc/private/viewerhdf5impl.h>
6: #include <petsc/private/glvisviewerimpl.h>
7: #include <petsc/private/glvisvecimpl.h>
8: #include <petscsf.h>
10: static PetscErrorCode VecResetPreallocationCOO_MPI(Vec v)
11: {
12: Vec_MPI *vmpi = (Vec_MPI *)v->data;
14: PetscFunctionBegin;
15: if (vmpi) {
16: PetscCall(PetscFree(vmpi->jmap1));
17: PetscCall(PetscFree(vmpi->perm1));
18: PetscCall(PetscFree(vmpi->Cperm));
19: PetscCall(PetscFree4(vmpi->imap2, vmpi->jmap2, vmpi->sendbuf, vmpi->recvbuf));
20: PetscCall(PetscFree(vmpi->perm2));
21: PetscCall(PetscSFDestroy(&vmpi->coo_sf));
22: }
23: PetscFunctionReturn(PETSC_SUCCESS);
24: }
26: PetscErrorCode VecDestroy_MPI(Vec v)
27: {
28: Vec_MPI *x = (Vec_MPI *)v->data;
30: PetscFunctionBegin;
31: PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->N));
32: if (!x) PetscFunctionReturn(PETSC_SUCCESS);
33: PetscCall(PetscFree(x->array_allocated));
35: /* Destroy local representation of vector if it exists */
36: if (x->localrep) {
37: PetscCall(VecDestroy(&x->localrep));
38: PetscCall(VecScatterDestroy(&x->localupdate));
39: PetscCall(ISDestroy(&x->ghost));
40: }
41: PetscCall(VecAssemblyReset_MPI(v));
43: /* Destroy the stashes: note the order - so that the tags are freed properly */
44: PetscCall(VecStashDestroy_Private(&v->bstash));
45: PetscCall(VecStashDestroy_Private(&v->stash));
47: PetscCall(VecResetPreallocationCOO_MPI(v));
48: PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", NULL));
49: PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", NULL));
50: PetscCall(PetscFree(v->data));
51: PetscFunctionReturn(PETSC_SUCCESS);
52: }
54: static PetscErrorCode VecView_MPI_ASCII(Vec xin, PetscViewer viewer)
55: {
56: PetscInt i, work = xin->map->n, cnt, nLen;
57: PetscMPIInt j, n = 0, size, rank, tag = ((PetscObject)viewer)->tag, len;
58: MPI_Status status;
59: PetscScalar *values;
60: const PetscScalar *xarray;
61: const char *name;
62: PetscViewerFormat format;
64: PetscFunctionBegin;
65: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
66: PetscCall(PetscViewerGetFormat(viewer, &format));
67: if (format == PETSC_VIEWER_LOAD_BALANCE) {
68: PetscInt nmax = 0, nmin = xin->map->n, navg;
69: for (PetscMPIInt i = 0; i < size; i++) {
70: nmax = PetscMax(nmax, xin->map->range[i + 1] - xin->map->range[i]);
71: nmin = PetscMin(nmin, xin->map->range[i + 1] - xin->map->range[i]);
72: }
73: navg = xin->map->N / size;
74: PetscCall(PetscViewerASCIIPrintf(viewer, " Load Balance - Local vector size Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: PetscCall(VecGetArrayRead(xin, &xarray));
79: /* determine maximum message to arrive */
80: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
81: PetscCallMPI(MPI_Reduce(rank ? &work : MPI_IN_PLACE, &work, 1, MPIU_INT, MPI_MAX, 0, PetscObjectComm((PetscObject)xin)));
82: PetscCall(PetscMPIIntCast(work, &len));
83: if (format == PETSC_VIEWER_ASCII_GLVIS) rank = 0, len = 0; /* no parallel distributed write support from GLVis */
84: if (rank == 0) {
85: PetscCall(PetscMalloc1(len, &values));
86: /*
87: MATLAB format and ASCII format are very similar except
88: MATLAB uses %18.16e format while ASCII uses %g
89: */
90: if (format == PETSC_VIEWER_ASCII_MATLAB) {
91: PetscCall(PetscObjectGetName((PetscObject)xin, &name));
92: PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
93: for (i = 0; i < xin->map->n; i++) {
94: #if defined(PETSC_USE_COMPLEX)
95: if (PetscImaginaryPart(xarray[i]) > 0.0) {
96: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
97: } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
98: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
99: } else {
100: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(xarray[i])));
101: }
102: #else
103: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
104: #endif
105: }
106: /* receive and print messages */
107: for (j = 1; j < size; j++) {
108: PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
109: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
110: for (i = 0; i < n; i++) {
111: #if defined(PETSC_USE_COMPLEX)
112: if (PetscImaginaryPart(values[i]) > 0.0) {
113: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
114: } else if (PetscImaginaryPart(values[i]) < 0.0) {
115: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
116: } else {
117: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(values[i])));
118: }
119: #else
120: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
121: #endif
122: }
123: }
124: PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));
126: } else if (format == PETSC_VIEWER_ASCII_SYMMODU) {
127: for (i = 0; i < xin->map->n; i++) {
128: #if defined(PETSC_USE_COMPLEX)
129: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
130: #else
131: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
132: #endif
133: }
134: /* receive and print messages */
135: for (j = 1; j < size; j++) {
136: PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
137: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
138: for (i = 0; i < n; i++) {
139: #if defined(PETSC_USE_COMPLEX)
140: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
141: #else
142: PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
143: #endif
144: }
145: }
146: } else if (format == PETSC_VIEWER_ASCII_PCICE) {
147: PetscInt bs, b, vertexCount = 1;
149: PetscCall(VecGetLocalSize(xin, &nLen));
150: PetscCall(PetscMPIIntCast(nLen, &n));
151: PetscCall(VecGetBlockSize(xin, &bs));
152: PetscCheck(bs >= 1 && bs <= 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PCICE can only handle up to 3D objects, but vector dimension is %" PetscInt_FMT, bs);
154: PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", xin->map->N / bs));
155: for (i = 0; i < n / bs; i++) {
156: PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++));
157: for (b = 0; b < bs; b++) {
158: if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
159: #if !defined(PETSC_USE_COMPLEX)
160: PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)xarray[i * bs + b]));
161: #endif
162: }
163: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
164: }
165: for (j = 1; j < size; j++) {
166: PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
167: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
168: for (i = 0; i < n / bs; i++) {
169: PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT " ", vertexCount++));
170: for (b = 0; b < bs; b++) {
171: if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
172: #if !defined(PETSC_USE_COMPLEX)
173: PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)values[i * bs + b]));
174: #endif
175: }
176: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
177: }
178: }
179: } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
180: /* GLVis ASCII visualization/dump: this function mimics mfem::GridFunction::Save() */
181: const PetscScalar *array;
182: PetscInt i, n, vdim, ordering = 1; /* mfem::FiniteElementSpace::Ordering::byVDIM */
183: PetscContainer glvis_container;
184: PetscViewerGLVisVecInfo glvis_vec_info;
185: PetscViewerGLVisInfo glvis_info;
187: /* mfem::FiniteElementSpace::Save() */
188: PetscCall(VecGetBlockSize(xin, &vdim));
189: PetscCall(PetscViewerASCIIPrintf(viewer, "FiniteElementSpace\n"));
190: PetscCall(PetscObjectQuery((PetscObject)xin, "_glvis_info_container", (PetscObject *)&glvis_container));
191: PetscCheck(glvis_container, PetscObjectComm((PetscObject)xin), PETSC_ERR_PLIB, "Missing GLVis container");
192: PetscCall(PetscContainerGetPointer(glvis_container, (void **)&glvis_vec_info));
193: PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", glvis_vec_info->fec_type));
194: PetscCall(PetscViewerASCIIPrintf(viewer, "VDim: %" PetscInt_FMT "\n", vdim));
195: PetscCall(PetscViewerASCIIPrintf(viewer, "Ordering: %" PetscInt_FMT "\n", ordering));
196: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
197: /* mfem::Vector::Print() */
198: PetscCall(PetscObjectQuery((PetscObject)viewer, "_glvis_info_container", (PetscObject *)&glvis_container));
199: PetscCheck(glvis_container, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Missing GLVis container");
200: PetscCall(PetscContainerGetPointer(glvis_container, (void **)&glvis_info));
201: if (glvis_info->enabled) {
202: PetscCall(VecGetLocalSize(xin, &n));
203: PetscCall(VecGetArrayRead(xin, &array));
204: for (i = 0; i < n; i++) {
205: PetscCall(PetscViewerASCIIPrintf(viewer, glvis_info->fmt, (double)PetscRealPart(array[i])));
206: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
207: }
208: PetscCall(VecRestoreArrayRead(xin, &array));
209: }
210: } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
211: /* No info */
212: } else {
213: if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
214: cnt = 0;
215: for (i = 0; i < xin->map->n; i++) {
216: if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
217: #if defined(PETSC_USE_COMPLEX)
218: if (PetscImaginaryPart(xarray[i]) > 0.0) {
219: PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
220: } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
221: PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
222: } else {
223: PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(xarray[i])));
224: }
225: #else
226: PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)xarray[i]));
227: #endif
228: }
229: /* receive and print messages */
230: for (j = 1; j < size; j++) {
231: PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
232: PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
233: if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", j));
234: for (i = 0; i < n; i++) {
235: if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
236: #if defined(PETSC_USE_COMPLEX)
237: if (PetscImaginaryPart(values[i]) > 0.0) {
238: PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
239: } else if (PetscImaginaryPart(values[i]) < 0.0) {
240: PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
241: } else {
242: PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(values[i])));
243: }
244: #else
245: PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)values[i]));
246: #endif
247: }
248: }
249: }
250: PetscCall(PetscFree(values));
251: } else {
252: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
253: /* Rank 0 is not trying to receive anything, so don't send anything */
254: } else {
255: if (format == PETSC_VIEWER_ASCII_MATLAB) {
256: /* this may be a collective operation so make sure everyone calls it */
257: PetscCall(PetscObjectGetName((PetscObject)xin, &name));
258: }
259: PetscCallMPI(MPIU_Send((void *)xarray, xin->map->n, MPIU_SCALAR, 0, tag, PetscObjectComm((PetscObject)xin)));
260: }
261: }
262: PetscCall(PetscViewerFlush(viewer));
263: PetscCall(VecRestoreArrayRead(xin, &xarray));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: PetscErrorCode VecView_MPI_Binary(Vec xin, PetscViewer viewer)
268: {
269: return VecView_Binary(xin, viewer);
270: }
272: #include <petscdraw.h>
273: PetscErrorCode VecView_MPI_Draw_LG(Vec xin, PetscViewer viewer)
274: {
275: PetscDraw draw;
276: PetscBool isnull;
277: PetscDrawLG lg;
278: PetscMPIInt i, size, rank, n, N, *lens = NULL, *disp = NULL;
279: PetscReal *values, *xx = NULL, *yy = NULL;
280: const PetscScalar *xarray;
281: int colors[] = {PETSC_DRAW_RED};
283: PetscFunctionBegin;
284: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
285: PetscCall(PetscDrawIsNull(draw, &isnull));
286: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
287: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
288: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
289: PetscCall(PetscMPIIntCast(xin->map->n, &n));
290: PetscCall(PetscMPIIntCast(xin->map->N, &N));
292: PetscCall(VecGetArrayRead(xin, &xarray));
293: #if defined(PETSC_USE_COMPLEX)
294: PetscCall(PetscMalloc1(n + 1, &values));
295: for (i = 0; i < n; i++) values[i] = PetscRealPart(xarray[i]);
296: #else
297: values = (PetscReal *)xarray;
298: #endif
299: if (rank == 0) {
300: PetscCall(PetscMalloc2(N, &xx, N, &yy));
301: for (i = 0; i < N; i++) xx[i] = (PetscReal)i;
302: PetscCall(PetscMalloc2(size, &lens, size, &disp));
303: for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i + 1] - xin->map->range[i], &lens[i]));
304: for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i], &disp[i]));
305: }
306: PetscCallMPI(MPI_Gatherv(values, n, MPIU_REAL, yy, lens, disp, MPIU_REAL, 0, PetscObjectComm((PetscObject)xin)));
307: PetscCall(PetscFree2(lens, disp));
308: #if defined(PETSC_USE_COMPLEX)
309: PetscCall(PetscFree(values));
310: #endif
311: PetscCall(VecRestoreArrayRead(xin, &xarray));
313: PetscCall(PetscViewerDrawGetDrawLG(viewer, 0, &lg));
314: PetscCall(PetscDrawLGReset(lg));
315: PetscCall(PetscDrawLGSetDimension(lg, 1));
316: PetscCall(PetscDrawLGSetColors(lg, colors));
317: if (rank == 0) {
318: PetscCall(PetscDrawLGAddPoints(lg, N, &xx, &yy));
319: PetscCall(PetscFree2(xx, yy));
320: }
321: PetscCall(PetscDrawLGDraw(lg));
322: PetscCall(PetscDrawLGSave(lg));
323: PetscFunctionReturn(PETSC_SUCCESS);
324: }
326: PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec xin, PetscViewer viewer)
327: {
328: PetscMPIInt rank, size, tag = ((PetscObject)viewer)->tag;
329: PetscInt i, start, end;
330: MPI_Status status;
331: PetscReal min, max, tmp = 0.0;
332: PetscDraw draw;
333: PetscBool isnull;
334: PetscDrawAxis axis;
335: const PetscScalar *xarray;
337: PetscFunctionBegin;
338: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
339: PetscCall(PetscDrawIsNull(draw, &isnull));
340: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
341: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
342: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
344: PetscCall(VecMin(xin, NULL, &min));
345: PetscCall(VecMax(xin, NULL, &max));
346: if (min == max) {
347: min -= 1.e-5;
348: max += 1.e-5;
349: }
351: PetscCall(PetscDrawCheckResizedWindow(draw));
352: PetscCall(PetscDrawClear(draw));
354: PetscCall(PetscDrawAxisCreate(draw, &axis));
355: PetscCall(PetscDrawAxisSetLimits(axis, 0.0, (PetscReal)xin->map->N, min, max));
356: PetscCall(PetscDrawAxisDraw(axis));
357: PetscCall(PetscDrawAxisDestroy(&axis));
359: /* draw local part of vector */
360: PetscCall(VecGetArrayRead(xin, &xarray));
361: PetscCall(VecGetOwnershipRange(xin, &start, &end));
362: if (rank < size - 1) { /* send value to right */
363: PetscCallMPI(MPI_Send((void *)&xarray[xin->map->n - 1], 1, MPIU_REAL, rank + 1, tag, PetscObjectComm((PetscObject)xin)));
364: }
365: if (rank) { /* receive value from right */
366: PetscCallMPI(MPI_Recv(&tmp, 1, MPIU_REAL, rank - 1, tag, PetscObjectComm((PetscObject)xin), &status));
367: }
368: PetscDrawCollectiveBegin(draw);
369: if (rank) PetscCall(PetscDrawLine(draw, (PetscReal)start - 1, tmp, (PetscReal)start, PetscRealPart(xarray[0]), PETSC_DRAW_RED));
370: for (i = 1; i < xin->map->n; i++) PetscCall(PetscDrawLine(draw, (PetscReal)(i - 1 + start), PetscRealPart(xarray[i - 1]), (PetscReal)(i + start), PetscRealPart(xarray[i]), PETSC_DRAW_RED));
371: PetscDrawCollectiveEnd(draw);
372: PetscCall(VecRestoreArrayRead(xin, &xarray));
374: PetscCall(PetscDrawFlush(draw));
375: PetscCall(PetscDrawPause(draw));
376: PetscCall(PetscDrawSave(draw));
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: #if defined(PETSC_HAVE_MATLAB)
381: PetscErrorCode VecView_MPI_Matlab(Vec xin, PetscViewer viewer)
382: {
383: PetscMPIInt rank, size, *lens;
384: PetscInt i, N = xin->map->N;
385: const PetscScalar *xarray;
386: PetscScalar *xx;
388: PetscFunctionBegin;
389: PetscCall(VecGetArrayRead(xin, &xarray));
390: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
391: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
392: if (rank == 0) {
393: PetscCall(PetscMalloc1(N, &xx));
394: PetscCall(PetscMalloc1(size, &lens));
395: for (i = 0; i < size; i++) lens[i] = xin->map->range[i + 1] - xin->map->range[i];
397: PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, xx, lens, xin->map->range, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
398: PetscCall(PetscFree(lens));
400: PetscCall(PetscObjectName((PetscObject)xin));
401: PetscCall(PetscViewerMatlabPutArray(viewer, N, 1, xx, ((PetscObject)xin)->name));
403: PetscCall(PetscFree(xx));
404: } else {
405: PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, 0, 0, 0, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
406: }
407: PetscCall(VecRestoreArrayRead(xin, &xarray));
408: PetscFunctionReturn(PETSC_SUCCESS);
409: }
410: #endif
412: #if defined(PETSC_HAVE_ADIOS)
413: #include <adios.h>
414: #include <adios_read.h>
415: #include <petsc/private/vieweradiosimpl.h>
416: #include <petsc/private/viewerimpl.h>
418: PetscErrorCode VecView_MPI_ADIOS(Vec xin, PetscViewer viewer)
419: {
420: PetscViewer_ADIOS *adios = (PetscViewer_ADIOS *)viewer->data;
421: const char *vecname;
422: int64_t id;
423: PetscInt n, N, rstart;
424: const PetscScalar *array;
425: char nglobalname[16], nlocalname[16], coffset[16];
427: PetscFunctionBegin;
428: PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
430: PetscCall(VecGetLocalSize(xin, &n));
431: PetscCall(VecGetSize(xin, &N));
432: PetscCall(VecGetOwnershipRange(xin, &rstart, NULL));
434: PetscCall(PetscSNPrintf(nlocalname, PETSC_STATIC_ARRAY_LENGTH(nlocalname), "%" PetscInt_FMT, n));
435: PetscCall(PetscSNPrintf(nglobalname, PETSC_STATIC_ARRAY_LENGTH(nglobalname), "%" PetscInt_FMT, N));
436: PetscCall(PetscSNPrintf(coffset, PETSC_STATIC_ARRAY_LENGTH(coffset), "%" PetscInt_FMT, rstart));
437: id = adios_define_var(Petsc_adios_group, vecname, "", adios_double, nlocalname, nglobalname, coffset);
438: PetscCall(VecGetArrayRead(xin, &array));
439: PetscCallExternal(adios_write_byid, adios->adios_handle, id, array);
440: PetscCall(VecRestoreArrayRead(xin, &array));
441: PetscFunctionReturn(PETSC_SUCCESS);
442: }
443: #endif
445: #if defined(PETSC_HAVE_HDF5)
446: PetscErrorCode VecView_MPI_HDF5(Vec xin, PetscViewer viewer)
447: {
448: PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
449: /* TODO: It looks like we can remove the H5Sclose(filespace) and H5Dget_space(dset_id). Why do we do this? */
450: hid_t filespace; /* file dataspace identifier */
451: hid_t chunkspace; /* chunk dataset property identifier */
452: hid_t dset_id; /* dataset identifier */
453: hid_t memspace; /* memory dataspace identifier */
454: hid_t file_id;
455: hid_t group;
456: hid_t memscalartype; /* scalar type for mem (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
457: hid_t filescalartype; /* scalar type for file (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
458: PetscInt bs = xin->map->bs;
459: hsize_t dim;
460: hsize_t maxDims[4], dims[4], chunkDims[4], count[4], offset[4];
461: PetscBool timestepping, dim2, spoutput;
462: PetscInt timestep = PETSC_INT_MIN, low;
463: hsize_t chunksize;
464: const PetscScalar *x;
465: const char *vecname;
466: size_t len;
468: PetscFunctionBegin;
469: PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &file_id, &group));
470: PetscCall(PetscViewerHDF5IsTimestepping(viewer, ×tepping));
471: if (timestepping) PetscCall(PetscViewerHDF5GetTimestep(viewer, ×tep));
472: PetscCall(PetscViewerHDF5GetBaseDimension2(viewer, &dim2));
473: PetscCall(PetscViewerHDF5GetSPOutput(viewer, &spoutput));
475: /* Create the dataspace for the dataset.
476: *
477: * dims - holds the current dimensions of the dataset
478: *
479: * maxDims - holds the maximum dimensions of the dataset (unlimited
480: * for the number of time steps with the current dimensions for the
481: * other dimensions; so only additional time steps can be added).
482: *
483: * chunkDims - holds the size of a single time step (required to
484: * permit extending dataset).
485: */
486: dim = 0;
487: chunksize = 1;
488: if (timestep >= 0) {
489: dims[dim] = timestep + 1;
490: maxDims[dim] = H5S_UNLIMITED;
491: chunkDims[dim] = 1;
492: ++dim;
493: }
494: PetscCall(PetscHDF5IntCast(xin->map->N / bs, dims + dim));
496: maxDims[dim] = dims[dim];
497: chunkDims[dim] = PetscMax(1, dims[dim]);
498: chunksize *= chunkDims[dim];
499: ++dim;
500: if (bs > 1 || dim2) {
501: dims[dim] = bs;
502: maxDims[dim] = dims[dim];
503: chunkDims[dim] = PetscMax(1, dims[dim]);
504: chunksize *= chunkDims[dim];
505: ++dim;
506: }
507: #if defined(PETSC_USE_COMPLEX)
508: dims[dim] = 2;
509: maxDims[dim] = dims[dim];
510: chunkDims[dim] = PetscMax(1, dims[dim]);
511: chunksize *= chunkDims[dim];
512: /* hdf5 chunks must be less than 4GB */
513: if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
514: if (bs > 1 || dim2) {
515: if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
516: if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
517: } else {
518: chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 128;
519: }
520: }
521: ++dim;
522: #else
523: /* hdf5 chunks must be less than 4GB */
524: if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
525: if (bs > 1 || dim2) {
526: if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
527: if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
528: } else {
529: chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 64;
530: }
531: }
532: #endif
534: PetscCallHDF5Return(filespace, H5Screate_simple, ((int)dim, dims, maxDims));
536: #if defined(PETSC_USE_REAL_SINGLE)
537: memscalartype = H5T_NATIVE_FLOAT;
538: filescalartype = H5T_NATIVE_FLOAT;
539: #elif defined(PETSC_USE_REAL___FLOAT128)
540: #error "HDF5 output with 128 bit floats not supported."
541: #elif defined(PETSC_USE_REAL___FP16)
542: #error "HDF5 output with 16 bit floats not supported."
543: #else
544: memscalartype = H5T_NATIVE_DOUBLE;
545: if (spoutput == PETSC_TRUE) filescalartype = H5T_NATIVE_FLOAT;
546: else filescalartype = H5T_NATIVE_DOUBLE;
547: #endif
549: /* Create the dataset with default properties and close filespace */
550: PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
551: PetscCall(PetscStrlen(vecname, &len));
552: PetscCheck(len, PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Object must be named");
553: if (H5Lexists(group, vecname, H5P_DEFAULT) < 1) {
554: /* Create chunk */
555: PetscCallHDF5Return(chunkspace, H5Pcreate, (H5P_DATASET_CREATE));
556: PetscCallHDF5(H5Pset_chunk, (chunkspace, (int)dim, chunkDims));
558: PetscCallHDF5Return(dset_id, H5Dcreate2, (group, vecname, filescalartype, filespace, H5P_DEFAULT, chunkspace, H5P_DEFAULT));
559: PetscCallHDF5(H5Pclose, (chunkspace));
560: } else {
561: PetscCallHDF5Return(dset_id, H5Dopen2, (group, vecname, H5P_DEFAULT));
562: PetscCallHDF5(H5Dset_extent, (dset_id, dims));
563: }
564: PetscCallHDF5(H5Sclose, (filespace));
566: /* Each process defines a dataset and writes it to the hyperslab in the file */
567: dim = 0;
568: if (timestep >= 0) {
569: count[dim] = 1;
570: ++dim;
571: }
572: PetscCall(PetscHDF5IntCast(xin->map->n / bs, count + dim));
573: ++dim;
574: if (bs > 1 || dim2) {
575: count[dim] = bs;
576: ++dim;
577: }
578: #if defined(PETSC_USE_COMPLEX)
579: count[dim] = 2;
580: ++dim;
581: #endif
582: if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
583: PetscCallHDF5Return(memspace, H5Screate_simple, ((int)dim, count, NULL));
584: } else {
585: /* Can't create dataspace with zero for any dimension, so create null dataspace. */
586: PetscCallHDF5Return(memspace, H5Screate, (H5S_NULL));
587: }
589: /* Select hyperslab in the file */
590: PetscCall(VecGetOwnershipRange(xin, &low, NULL));
591: dim = 0;
592: if (timestep >= 0) {
593: offset[dim] = timestep;
594: ++dim;
595: }
596: PetscCall(PetscHDF5IntCast(low / bs, offset + dim));
597: ++dim;
598: if (bs > 1 || dim2) {
599: offset[dim] = 0;
600: ++dim;
601: }
602: #if defined(PETSC_USE_COMPLEX)
603: offset[dim] = 0;
604: ++dim;
605: #endif
606: if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
607: PetscCallHDF5Return(filespace, H5Dget_space, (dset_id));
608: PetscCallHDF5(H5Sselect_hyperslab, (filespace, H5S_SELECT_SET, offset, NULL, count, NULL));
609: } else {
610: /* Create null filespace to match null memspace. */
611: PetscCallHDF5Return(filespace, H5Screate, (H5S_NULL));
612: }
614: PetscCall(VecGetArrayRead(xin, &x));
615: PetscCallHDF5(H5Dwrite, (dset_id, memscalartype, memspace, filespace, hdf5->dxpl_id, x));
616: PetscCallHDF5(H5Fflush, (file_id, H5F_SCOPE_GLOBAL));
617: PetscCall(VecRestoreArrayRead(xin, &x));
619: /* Close/release resources */
620: PetscCallHDF5(H5Gclose, (group));
621: PetscCallHDF5(H5Sclose, (filespace));
622: PetscCallHDF5(H5Sclose, (memspace));
623: PetscCallHDF5(H5Dclose, (dset_id));
625: #if defined(PETSC_USE_COMPLEX)
626: {
627: PetscBool tru = PETSC_TRUE;
628: PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "complex", PETSC_BOOL, &tru));
629: }
630: #endif
631: if (timestepping) PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "timestepping", PETSC_BOOL, ×tepping));
632: PetscCall(PetscInfo(xin, "Wrote Vec object with name %s\n", vecname));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
635: #endif
637: PETSC_EXTERN PetscErrorCode VecView_MPI(Vec xin, PetscViewer viewer)
638: {
639: PetscBool iascii, isbinary, isdraw;
640: #if defined(PETSC_HAVE_MATHEMATICA)
641: PetscBool ismathematica;
642: #endif
643: #if defined(PETSC_HAVE_HDF5)
644: PetscBool ishdf5;
645: #endif
646: #if defined(PETSC_HAVE_MATLAB)
647: PetscBool ismatlab;
648: #endif
649: #if defined(PETSC_HAVE_ADIOS)
650: PetscBool isadios;
651: #endif
652: PetscBool isglvis;
654: PetscFunctionBegin;
655: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
656: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
657: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
658: #if defined(PETSC_HAVE_MATHEMATICA)
659: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATHEMATICA, &ismathematica));
660: #endif
661: #if defined(PETSC_HAVE_HDF5)
662: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
663: #endif
664: #if defined(PETSC_HAVE_MATLAB)
665: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATLAB, &ismatlab));
666: #endif
667: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERGLVIS, &isglvis));
668: #if defined(PETSC_HAVE_ADIOS)
669: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERADIOS, &isadios));
670: #endif
671: if (iascii) {
672: PetscCall(VecView_MPI_ASCII(xin, viewer));
673: } else if (isbinary) {
674: PetscCall(VecView_MPI_Binary(xin, viewer));
675: } else if (isdraw) {
676: PetscViewerFormat format;
677: PetscCall(PetscViewerGetFormat(viewer, &format));
678: if (format == PETSC_VIEWER_DRAW_LG) {
679: PetscCall(VecView_MPI_Draw_LG(xin, viewer));
680: } else {
681: PetscCall(VecView_MPI_Draw(xin, viewer));
682: }
683: #if defined(PETSC_HAVE_MATHEMATICA)
684: } else if (ismathematica) {
685: PetscCall(PetscViewerMathematicaPutVector(viewer, xin));
686: #endif
687: #if defined(PETSC_HAVE_HDF5)
688: } else if (ishdf5) {
689: PetscCall(VecView_MPI_HDF5(xin, viewer));
690: #endif
691: #if defined(PETSC_HAVE_ADIOS)
692: } else if (isadios) {
693: PetscCall(VecView_MPI_ADIOS(xin, viewer));
694: #endif
695: #if defined(PETSC_HAVE_MATLAB)
696: } else if (ismatlab) {
697: PetscCall(VecView_MPI_Matlab(xin, viewer));
698: #endif
699: } else if (isglvis) PetscCall(VecView_GLVis(xin, viewer));
700: PetscFunctionReturn(PETSC_SUCCESS);
701: }
703: PetscErrorCode VecGetSize_MPI(Vec xin, PetscInt *N)
704: {
705: PetscFunctionBegin;
706: *N = xin->map->N;
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: PetscErrorCode VecGetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], PetscScalar y[])
711: {
712: const PetscScalar *xx;
713: const PetscInt start = xin->map->range[xin->stash.rank];
715: PetscFunctionBegin;
716: PetscCall(VecGetArrayRead(xin, &xx));
717: for (PetscInt i = 0; i < ni; i++) {
718: if (xin->stash.ignorenegidx && ix[i] < 0) continue;
719: const PetscInt tmp = ix[i] - start;
721: PetscCheck(tmp >= 0 && tmp < xin->map->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Can only get local values, trying %" PetscInt_FMT, ix[i]);
722: y[i] = xx[tmp];
723: }
724: PetscCall(VecRestoreArrayRead(xin, &xx));
725: PetscFunctionReturn(PETSC_SUCCESS);
726: }
728: PetscErrorCode VecSetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar y[], InsertMode addv)
729: {
730: const PetscBool ignorenegidx = xin->stash.ignorenegidx;
731: const PetscBool donotstash = xin->stash.donotstash;
732: const PetscMPIInt rank = xin->stash.rank;
733: const PetscInt *owners = xin->map->range;
734: const PetscInt start = owners[rank], end = owners[rank + 1];
735: PetscScalar *xx;
737: PetscFunctionBegin;
738: if (PetscDefined(USE_DEBUG)) {
739: PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
740: PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
741: }
742: PetscCall(VecGetArray(xin, &xx));
743: xin->stash.insertmode = addv;
744: for (PetscInt i = 0; i < ni; ++i) {
745: PetscInt row;
747: if (ignorenegidx && ix[i] < 0) continue;
748: PetscCheck(ix[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT " cannot be negative", ix[i]);
749: if ((row = ix[i]) >= start && row < end) {
750: if (addv == INSERT_VALUES) {
751: xx[row - start] = y[i];
752: } else {
753: xx[row - start] += y[i];
754: }
755: } else if (!donotstash) {
756: PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
757: PetscCall(VecStashValue_Private(&xin->stash, row, y[i]));
758: }
759: }
760: PetscCall(VecRestoreArray(xin, &xx));
761: PetscFunctionReturn(PETSC_SUCCESS);
762: }
764: PetscErrorCode VecSetValuesBlocked_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar yin[], InsertMode addv)
765: {
766: PetscMPIInt rank = xin->stash.rank;
767: PetscInt *owners = xin->map->range, start = owners[rank];
768: PetscInt end = owners[rank + 1], i, row, bs = xin->map->bs, j;
769: PetscScalar *xx, *y = (PetscScalar *)yin;
771: PetscFunctionBegin;
772: PetscCall(VecGetArray(xin, &xx));
773: if (PetscDefined(USE_DEBUG)) {
774: PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
775: PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
776: }
777: xin->stash.insertmode = addv;
779: if (addv == INSERT_VALUES) {
780: for (i = 0; i < ni; i++) {
781: if ((row = bs * ix[i]) >= start && row < end) {
782: for (j = 0; j < bs; j++) xx[row - start + j] = y[j];
783: } else if (!xin->stash.donotstash) {
784: if (ix[i] < 0) {
785: y += bs;
786: continue;
787: }
788: PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
789: PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
790: }
791: y += bs;
792: }
793: } else {
794: for (i = 0; i < ni; i++) {
795: if ((row = bs * ix[i]) >= start && row < end) {
796: for (j = 0; j < bs; j++) xx[row - start + j] += y[j];
797: } else if (!xin->stash.donotstash) {
798: if (ix[i] < 0) {
799: y += bs;
800: continue;
801: }
802: PetscCheck(ix[i] <= xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
803: PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
804: }
805: y += bs;
806: }
807: }
808: PetscCall(VecRestoreArray(xin, &xx));
809: PetscFunctionReturn(PETSC_SUCCESS);
810: }
812: /*
813: Since nsends or nreceives may be zero we add 1 in certain mallocs
814: to make sure we never malloc an empty one.
815: */
816: PetscErrorCode VecAssemblyBegin_MPI(Vec xin)
817: {
818: PetscInt *owners = xin->map->range, *bowners, i, bs, nstash, reallocs;
819: PetscMPIInt size;
820: InsertMode addv;
821: MPI_Comm comm;
823: PetscFunctionBegin;
824: PetscCall(PetscObjectGetComm((PetscObject)xin, &comm));
825: if (xin->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);
827: PetscCallMPI(MPIU_Allreduce((PetscEnum *)&xin->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
828: PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
829: xin->stash.insertmode = addv; /* in case this processor had no cache */
830: xin->bstash.insertmode = addv; /* Block stash implicitly tracks InsertMode of scalar stash */
832: PetscCall(VecGetBlockSize(xin, &bs));
833: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
834: if (!xin->bstash.bowners && xin->map->bs != -1) {
835: PetscCall(PetscMalloc1(size + 1, &bowners));
836: for (i = 0; i < size + 1; i++) bowners[i] = owners[i] / bs;
837: xin->bstash.bowners = bowners;
838: } else bowners = xin->bstash.bowners;
840: PetscCall(VecStashScatterBegin_Private(&xin->stash, owners));
841: PetscCall(VecStashScatterBegin_Private(&xin->bstash, bowners));
842: PetscCall(VecStashGetInfo_Private(&xin->stash, &nstash, &reallocs));
843: PetscCall(PetscInfo(xin, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
844: PetscCall(VecStashGetInfo_Private(&xin->bstash, &nstash, &reallocs));
845: PetscCall(PetscInfo(xin, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
846: PetscFunctionReturn(PETSC_SUCCESS);
847: }
849: PetscErrorCode VecAssemblyEnd_MPI(Vec vec)
850: {
851: PetscInt base, i, j, *row, flg, bs;
852: PetscMPIInt n;
853: PetscScalar *val, *vv, *array, *xarray;
855: PetscFunctionBegin;
856: if (!vec->stash.donotstash) {
857: PetscCall(VecGetArray(vec, &xarray));
858: PetscCall(VecGetBlockSize(vec, &bs));
859: base = vec->map->range[vec->stash.rank];
861: /* Process the stash */
862: while (1) {
863: PetscCall(VecStashScatterGetMesg_Private(&vec->stash, &n, &row, &val, &flg));
864: if (!flg) break;
865: if (vec->stash.insertmode == ADD_VALUES) {
866: for (i = 0; i < n; i++) xarray[row[i] - base] += val[i];
867: } else if (vec->stash.insertmode == INSERT_VALUES) {
868: for (i = 0; i < n; i++) xarray[row[i] - base] = val[i];
869: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
870: }
871: PetscCall(VecStashScatterEnd_Private(&vec->stash));
873: /* now process the block-stash */
874: while (1) {
875: PetscCall(VecStashScatterGetMesg_Private(&vec->bstash, &n, &row, &val, &flg));
876: if (!flg) break;
877: for (i = 0; i < n; i++) {
878: array = xarray + row[i] * bs - base;
879: vv = val + i * bs;
880: if (vec->stash.insertmode == ADD_VALUES) {
881: for (j = 0; j < bs; j++) array[j] += vv[j];
882: } else if (vec->stash.insertmode == INSERT_VALUES) {
883: for (j = 0; j < bs; j++) array[j] = vv[j];
884: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
885: }
886: }
887: PetscCall(VecStashScatterEnd_Private(&vec->bstash));
888: PetscCall(VecRestoreArray(vec, &xarray));
889: }
890: vec->stash.insertmode = NOT_SET_VALUES;
891: PetscFunctionReturn(PETSC_SUCCESS);
892: }
894: PetscErrorCode VecSetPreallocationCOO_MPI(Vec x, PetscCount coo_n, const PetscInt coo_i[])
895: {
896: PetscInt m, M, rstart, rend;
897: Vec_MPI *vmpi = (Vec_MPI *)x->data;
898: PetscCount k, p, q, rem; /* Loop variables over coo arrays */
899: PetscMPIInt size;
900: MPI_Comm comm;
902: PetscFunctionBegin;
903: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
904: PetscCallMPI(MPI_Comm_size(comm, &size));
905: PetscCall(VecResetPreallocationCOO_MPI(x));
907: PetscCall(PetscLayoutSetUp(x->map));
908: PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
909: PetscCall(VecGetLocalSize(x, &m));
910: PetscCall(VecGetSize(x, &M));
912: /* ---------------------------------------------------------------------------*/
913: /* Sort COOs along with a permutation array, so that negative indices come */
914: /* first, then local ones, then remote ones. */
915: /* ---------------------------------------------------------------------------*/
916: PetscCount n1 = coo_n, nneg, *perm;
917: PetscInt *i1; /* Copy of input COOs along with a permutation array */
918: PetscCall(PetscMalloc1(n1, &i1));
919: PetscCall(PetscMalloc1(n1, &perm));
920: PetscCall(PetscArraycpy(i1, coo_i, n1)); /* Make a copy since we'll modify it */
921: for (k = 0; k < n1; k++) perm[k] = k;
923: /* Manipulate i1[] so that entries with negative indices will have the smallest
924: index, local entries will have greater but negative indices, and remote entries
925: will have positive indices.
926: */
927: for (k = 0; k < n1; k++) {
928: if (i1[k] < 0) {
929: if (x->stash.ignorenegidx) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */
930: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found a negative index in VecSetPreallocateCOO() but VEC_IGNORE_NEGATIVE_INDICES was not set");
931: } else if (i1[k] >= rstart && i1[k] < rend) {
932: i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
933: } else {
934: PetscCheck(i1[k] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found index %" PetscInt_FMT " in VecSetPreallocateCOO() larger than the global size %" PetscInt_FMT, i1[k], M);
935: if (x->stash.donotstash) i1[k] = PETSC_INT_MIN; /* Ignore off-proc indices as if they were negative */
936: }
937: }
939: /* Sort the indices, after that, [0,nneg) have ignored entries, [nneg,rem) have local entries and [rem,n1) have remote entries */
940: PetscCall(PetscSortIntWithCountArray(n1, i1, perm));
941: for (k = 0; k < n1; k++) {
942: if (i1[k] > PETSC_INT_MIN) break;
943: } /* Advance k to the first entry we need to take care of */
944: nneg = k;
945: PetscCall(PetscSortedIntUpperBound(i1, nneg, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
946: for (k = nneg; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert indices of local entries */
948: /* ---------------------------------------------------------------------------*/
949: /* Build stuff for local entries */
950: /* ---------------------------------------------------------------------------*/
951: PetscCount tot1, *jmap1, *perm1;
952: PetscCall(PetscCalloc1(m + 1, &jmap1));
953: for (k = nneg; k < rem; k++) jmap1[i1[k] - rstart + 1]++; /* Count repeats of each local entry */
954: for (k = 0; k < m; k++) jmap1[k + 1] += jmap1[k]; /* Transform jmap1[] to CSR-like data structure */
955: tot1 = jmap1[m];
956: PetscAssert(tot1 == rem - nneg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected errors in VecSetPreallocationCOO_MPI");
957: PetscCall(PetscMalloc1(tot1, &perm1));
958: PetscCall(PetscArraycpy(perm1, perm + nneg, tot1));
960: /* ---------------------------------------------------------------------------*/
961: /* Record the permutation array for filling the send buffer */
962: /* ---------------------------------------------------------------------------*/
963: PetscCount *Cperm;
964: PetscCall(PetscMalloc1(n1 - rem, &Cperm));
965: PetscCall(PetscArraycpy(Cperm, perm + rem, n1 - rem));
966: PetscCall(PetscFree(perm));
968: /* ---------------------------------------------------------------------------*/
969: /* Send remote entries to their owner */
970: /* ---------------------------------------------------------------------------*/
971: /* Find which entries should be sent to which remote ranks*/
972: PetscInt nsend = 0; /* Number of MPI ranks to send data to */
973: PetscMPIInt *sendto; /* [nsend], storing remote ranks */
974: PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
975: const PetscInt *ranges;
976: PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
978: PetscCall(PetscLayoutGetRanges(x->map, &ranges));
979: PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
980: for (k = rem; k < n1;) {
981: PetscMPIInt owner;
982: PetscInt firstRow, lastRow;
984: /* Locate a row range */
985: firstRow = i1[k]; /* first row of this owner */
986: PetscCall(PetscLayoutFindOwner(x->map, firstRow, &owner));
987: lastRow = ranges[owner + 1] - 1; /* last row of this owner */
989: /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
990: PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
992: /* All entries in [k,p) belong to this remote owner */
993: if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
994: PetscMPIInt *sendto2;
995: PetscInt *nentries2;
996: PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
998: PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
999: PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
1000: PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
1001: PetscCall(PetscFree2(sendto, nentries2));
1002: sendto = sendto2;
1003: nentries = nentries2;
1004: maxNsend = maxNsend2;
1005: }
1006: sendto[nsend] = owner;
1007: PetscCall(PetscIntCast(p - k, &nentries[nsend]));
1008: nsend++;
1009: k = p;
1010: }
1012: /* Build 1st SF to know offsets on remote to send data */
1013: PetscSF sf1;
1014: PetscInt nroots = 1, nroots2 = 0;
1015: PetscInt nleaves = nsend, nleaves2 = 0;
1016: PetscInt *offsets;
1017: PetscSFNode *iremote;
1019: PetscCall(PetscSFCreate(comm, &sf1));
1020: PetscCall(PetscMalloc1(nsend, &iremote));
1021: PetscCall(PetscMalloc1(nsend, &offsets));
1022: for (k = 0; k < nsend; k++) {
1023: iremote[k].rank = sendto[k];
1024: iremote[k].index = 0;
1025: nleaves2 += nentries[k];
1026: PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
1027: }
1028: PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1029: PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
1030: PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
1031: PetscCall(PetscSFDestroy(&sf1));
1032: PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
1034: /* Build 2nd SF to send remote COOs to their owner */
1035: PetscSF sf2;
1036: nroots = nroots2;
1037: nleaves = nleaves2;
1038: PetscCall(PetscSFCreate(comm, &sf2));
1039: PetscCall(PetscSFSetFromOptions(sf2));
1040: PetscCall(PetscMalloc1(nleaves, &iremote));
1041: p = 0;
1042: for (k = 0; k < nsend; k++) {
1043: PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
1044: for (q = 0; q < nentries[k]; q++, p++) {
1045: iremote[p].rank = sendto[k];
1046: PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
1047: }
1048: }
1049: PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1051: /* Send the remote COOs to their owner */
1052: PetscInt n2 = nroots, *i2; /* Buffers for received COOs from other ranks, along with a permutation array */
1053: PetscCount *perm2;
1054: PetscCall(PetscMalloc1(n2, &i2));
1055: PetscCall(PetscMalloc1(n2, &perm2));
1056: PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
1057: PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
1059: PetscCall(PetscFree(i1));
1060: PetscCall(PetscFree(offsets));
1061: PetscCall(PetscFree2(sendto, nentries));
1063: /* ---------------------------------------------------------------*/
1064: /* Sort received COOs along with a permutation array */
1065: /* ---------------------------------------------------------------*/
1066: PetscCount *imap2;
1067: PetscCount *jmap2, nnz2;
1068: PetscScalar *sendbuf, *recvbuf;
1069: PetscInt old;
1070: PetscCount sendlen = n1 - rem, recvlen = n2;
1072: for (k = 0; k < n2; k++) perm2[k] = k;
1073: PetscCall(PetscSortIntWithCountArray(n2, i2, perm2));
1075: /* nnz2 will be # of unique entries in the recvbuf */
1076: nnz2 = n2;
1077: for (k = 1; k < n2; k++) {
1078: if (i2[k] == i2[k - 1]) nnz2--;
1079: }
1081: /* Build imap2[] and jmap2[] for each unique entry */
1082: PetscCall(PetscMalloc4(nnz2, &imap2, nnz2 + 1, &jmap2, sendlen, &sendbuf, recvlen, &recvbuf));
1083: p = -1;
1084: old = -1;
1085: jmap2[0] = 0;
1086: jmap2++;
1087: for (k = 0; k < n2; k++) {
1088: if (i2[k] != old) { /* Meet a new entry */
1089: p++;
1090: imap2[p] = i2[k] - rstart;
1091: jmap2[p] = 1;
1092: old = i2[k];
1093: } else {
1094: jmap2[p]++;
1095: }
1096: }
1097: jmap2--;
1098: for (k = 0; k < nnz2; k++) jmap2[k + 1] += jmap2[k];
1100: PetscCall(PetscFree(i2));
1102: vmpi->coo_n = coo_n;
1103: vmpi->tot1 = tot1;
1104: vmpi->jmap1 = jmap1;
1105: vmpi->perm1 = perm1;
1106: vmpi->nnz2 = nnz2;
1107: vmpi->imap2 = imap2;
1108: vmpi->jmap2 = jmap2;
1109: vmpi->perm2 = perm2;
1111: vmpi->Cperm = Cperm;
1112: vmpi->sendbuf = sendbuf;
1113: vmpi->recvbuf = recvbuf;
1114: vmpi->sendlen = sendlen;
1115: vmpi->recvlen = recvlen;
1116: vmpi->coo_sf = sf2;
1117: PetscFunctionReturn(PETSC_SUCCESS);
1118: }
1120: PetscErrorCode VecSetValuesCOO_MPI(Vec x, const PetscScalar v[], InsertMode imode)
1121: {
1122: Vec_MPI *vmpi = (Vec_MPI *)x->data;
1123: PetscInt m;
1124: PetscScalar *a, *sendbuf = vmpi->sendbuf, *recvbuf = vmpi->recvbuf;
1125: const PetscCount *jmap1 = vmpi->jmap1;
1126: const PetscCount *perm1 = vmpi->perm1;
1127: const PetscCount *imap2 = vmpi->imap2;
1128: const PetscCount *jmap2 = vmpi->jmap2;
1129: const PetscCount *perm2 = vmpi->perm2;
1130: const PetscCount *Cperm = vmpi->Cperm;
1131: const PetscCount nnz2 = vmpi->nnz2;
1133: PetscFunctionBegin;
1134: PetscCall(VecGetLocalSize(x, &m));
1135: PetscCall(VecGetArray(x, &a));
1137: /* Pack entries to be sent to remote */
1138: for (PetscInt i = 0; i < vmpi->sendlen; i++) sendbuf[i] = v[Cperm[i]];
1140: /* Send remote entries to their owner and overlap the communication with local computation */
1141: PetscCall(PetscSFReduceWithMemTypeBegin(vmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
1142: /* Add local entries to A and B */
1143: for (PetscInt i = 0; i < m; i++) { /* All entries in a[] are either zero'ed or added with a value (i.e., initialized) */
1144: PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */
1145: for (PetscCount k = jmap1[i]; k < jmap1[i + 1]; k++) sum += v[perm1[k]];
1146: a[i] = (imode == INSERT_VALUES ? 0.0 : a[i]) + sum;
1147: }
1148: PetscCall(PetscSFReduceEnd(vmpi->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
1150: /* Add received remote entries to A and B */
1151: for (PetscInt i = 0; i < nnz2; i++) {
1152: for (PetscCount k = jmap2[i]; k < jmap2[i + 1]; k++) a[imap2[i]] += recvbuf[perm2[k]];
1153: }
1155: PetscCall(VecRestoreArray(x, &a));
1156: PetscFunctionReturn(PETSC_SUCCESS);
1157: }