Actual source code: pdvec.c

  1: /*
  2:      Code for some of the parallel vector primitives.
  3: */
  4: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  5: #include <petsc/private/viewerhdf5impl.h>
  6: #include <petsc/private/glvisviewerimpl.h>
  7: #include <petsc/private/glvisvecimpl.h>
  8: #include <petscsf.h>

 10: static PetscErrorCode VecResetPreallocationCOO_MPI(Vec v)
 11: {
 12:   Vec_MPI *vmpi = (Vec_MPI *)v->data;

 14:   PetscFunctionBegin;
 15:   if (vmpi) {
 16:     PetscCall(PetscFree(vmpi->jmap1));
 17:     PetscCall(PetscFree(vmpi->perm1));
 18:     PetscCall(PetscFree(vmpi->Cperm));
 19:     PetscCall(PetscFree4(vmpi->imap2, vmpi->jmap2, vmpi->sendbuf, vmpi->recvbuf));
 20:     PetscCall(PetscFree(vmpi->perm2));
 21:     PetscCall(PetscSFDestroy(&vmpi->coo_sf));
 22:   }
 23:   PetscFunctionReturn(PETSC_SUCCESS);
 24: }

 26: PetscErrorCode VecDestroy_MPI(Vec v)
 27: {
 28:   Vec_MPI *x = (Vec_MPI *)v->data;

 30:   PetscFunctionBegin;
 31:   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->N));
 32:   if (!x) PetscFunctionReturn(PETSC_SUCCESS);
 33:   PetscCall(PetscFree(x->array_allocated));

 35:   /* Destroy local representation of vector if it exists */
 36:   if (x->localrep) {
 37:     PetscCall(VecDestroy(&x->localrep));
 38:     PetscCall(VecScatterDestroy(&x->localupdate));
 39:     PetscCall(ISDestroy(&x->ghost));
 40:   }
 41:   PetscCall(VecAssemblyReset_MPI(v));

 43:   /* Destroy the stashes: note the order - so that the tags are freed properly */
 44:   PetscCall(VecStashDestroy_Private(&v->bstash));
 45:   PetscCall(VecStashDestroy_Private(&v->stash));

 47:   PetscCall(VecResetPreallocationCOO_MPI(v));
 48:   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEnginePut_C", NULL));
 49:   PetscCall(PetscObjectComposeFunction((PetscObject)v, "PetscMatlabEngineGet_C", NULL));
 50:   PetscCall(PetscFree(v->data));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: static PetscErrorCode VecView_MPI_ASCII(Vec xin, PetscViewer viewer)
 55: {
 56:   PetscInt           i, work = xin->map->n, cnt, nLen;
 57:   PetscMPIInt        j, n = 0, size, rank, tag = ((PetscObject)viewer)->tag, len;
 58:   MPI_Status         status;
 59:   PetscScalar       *values;
 60:   const PetscScalar *xarray;
 61:   const char        *name;
 62:   PetscViewerFormat  format;

 64:   PetscFunctionBegin;
 65:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
 66:   PetscCall(PetscViewerGetFormat(viewer, &format));
 67:   if (format == PETSC_VIEWER_LOAD_BALANCE) {
 68:     PetscInt nmax = 0, nmin = xin->map->n, navg;
 69:     for (PetscMPIInt i = 0; i < size; i++) {
 70:       nmax = PetscMax(nmax, xin->map->range[i + 1] - xin->map->range[i]);
 71:       nmin = PetscMin(nmin, xin->map->range[i + 1] - xin->map->range[i]);
 72:     }
 73:     navg = xin->map->N / size;
 74:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Load Balance - Local vector size Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
 75:     PetscFunctionReturn(PETSC_SUCCESS);
 76:   }

 78:   PetscCall(VecGetArrayRead(xin, &xarray));
 79:   /* determine maximum message to arrive */
 80:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
 81:   PetscCallMPI(MPI_Reduce(rank ? &work : MPI_IN_PLACE, &work, 1, MPIU_INT, MPI_MAX, 0, PetscObjectComm((PetscObject)xin)));
 82:   PetscCall(PetscMPIIntCast(work, &len));
 83:   if (format == PETSC_VIEWER_ASCII_GLVIS) rank = 0, len = 0; /* no parallel distributed write support from GLVis */
 84:   if (rank == 0) {
 85:     PetscCall(PetscMalloc1(len, &values));
 86:     /*
 87:         MATLAB format and ASCII format are very similar except
 88:         MATLAB uses %18.16e format while ASCII uses %g
 89:     */
 90:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
 91:       PetscCall(PetscObjectGetName((PetscObject)xin, &name));
 92:       PetscCall(PetscViewerASCIIPrintf(viewer, "%s = [\n", name));
 93:       for (i = 0; i < xin->map->n; i++) {
 94: #if defined(PETSC_USE_COMPLEX)
 95:         if (PetscImaginaryPart(xarray[i]) > 0.0) {
 96:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
 97:         } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
 98:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
 99:         } else {
100:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(xarray[i])));
101:         }
102: #else
103:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
104: #endif
105:       }
106:       /* receive and print messages */
107:       for (j = 1; j < size; j++) {
108:         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
109:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
110:         for (i = 0; i < n; i++) {
111: #if defined(PETSC_USE_COMPLEX)
112:           if (PetscImaginaryPart(values[i]) > 0.0) {
113:             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e + %18.16ei\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
114:           } else if (PetscImaginaryPart(values[i]) < 0.0) {
115:             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e - %18.16ei\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
116:           } else {
117:             PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)PetscRealPart(values[i])));
118:           }
119: #else
120:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
121: #endif
122:         }
123:       }
124:       PetscCall(PetscViewerASCIIPrintf(viewer, "];\n"));

126:     } else if (format == PETSC_VIEWER_ASCII_SYMMODU) {
127:       for (i = 0; i < xin->map->n; i++) {
128: #if defined(PETSC_USE_COMPLEX)
129:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
130: #else
131:         PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)xarray[i]));
132: #endif
133:       }
134:       /* receive and print messages */
135:       for (j = 1; j < size; j++) {
136:         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
137:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
138:         for (i = 0; i < n; i++) {
139: #if defined(PETSC_USE_COMPLEX)
140:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
141: #else
142:           PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e\n", (double)values[i]));
143: #endif
144:         }
145:       }
146:     } else if (format == PETSC_VIEWER_ASCII_PCICE) {
147:       PetscInt bs, b, vertexCount = 1;

149:       PetscCall(VecGetLocalSize(xin, &nLen));
150:       PetscCall(PetscMPIIntCast(nLen, &n));
151:       PetscCall(VecGetBlockSize(xin, &bs));
152:       PetscCheck(bs >= 1 && bs <= 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "PCICE can only handle up to 3D objects, but vector dimension is %" PetscInt_FMT, bs);

154:       PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", xin->map->N / bs));
155:       for (i = 0; i < n / bs; i++) {
156:         PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT "   ", vertexCount++));
157:         for (b = 0; b < bs; b++) {
158:           if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
159: #if !defined(PETSC_USE_COMPLEX)
160:           PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)xarray[i * bs + b]));
161: #endif
162:         }
163:         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
164:       }
165:       for (j = 1; j < size; j++) {
166:         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
167:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
168:         for (i = 0; i < n / bs; i++) {
169:           PetscCall(PetscViewerASCIIPrintf(viewer, "%7" PetscInt_FMT "   ", vertexCount++));
170:           for (b = 0; b < bs; b++) {
171:             if (b > 0) PetscCall(PetscViewerASCIIPrintf(viewer, " "));
172: #if !defined(PETSC_USE_COMPLEX)
173:             PetscCall(PetscViewerASCIIPrintf(viewer, "% 12.5E", (double)values[i * bs + b]));
174: #endif
175:           }
176:           PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
177:         }
178:       }
179:     } else if (format == PETSC_VIEWER_ASCII_GLVIS) {
180:       /* GLVis ASCII visualization/dump: this function mimics mfem::GridFunction::Save() */
181:       const PetscScalar      *array;
182:       PetscInt                i, n, vdim, ordering = 1; /* mfem::FiniteElementSpace::Ordering::byVDIM */
183:       PetscContainer          glvis_container;
184:       PetscViewerGLVisVecInfo glvis_vec_info;
185:       PetscViewerGLVisInfo    glvis_info;

187:       /* mfem::FiniteElementSpace::Save() */
188:       PetscCall(VecGetBlockSize(xin, &vdim));
189:       PetscCall(PetscViewerASCIIPrintf(viewer, "FiniteElementSpace\n"));
190:       PetscCall(PetscObjectQuery((PetscObject)xin, "_glvis_info_container", (PetscObject *)&glvis_container));
191:       PetscCheck(glvis_container, PetscObjectComm((PetscObject)xin), PETSC_ERR_PLIB, "Missing GLVis container");
192:       PetscCall(PetscContainerGetPointer(glvis_container, (void **)&glvis_vec_info));
193:       PetscCall(PetscViewerASCIIPrintf(viewer, "%s\n", glvis_vec_info->fec_type));
194:       PetscCall(PetscViewerASCIIPrintf(viewer, "VDim: %" PetscInt_FMT "\n", vdim));
195:       PetscCall(PetscViewerASCIIPrintf(viewer, "Ordering: %" PetscInt_FMT "\n", ordering));
196:       PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
197:       /* mfem::Vector::Print() */
198:       PetscCall(PetscObjectQuery((PetscObject)viewer, "_glvis_info_container", (PetscObject *)&glvis_container));
199:       PetscCheck(glvis_container, PetscObjectComm((PetscObject)viewer), PETSC_ERR_PLIB, "Missing GLVis container");
200:       PetscCall(PetscContainerGetPointer(glvis_container, (void **)&glvis_info));
201:       if (glvis_info->enabled) {
202:         PetscCall(VecGetLocalSize(xin, &n));
203:         PetscCall(VecGetArrayRead(xin, &array));
204:         for (i = 0; i < n; i++) {
205:           PetscCall(PetscViewerASCIIPrintf(viewer, glvis_info->fmt, (double)PetscRealPart(array[i])));
206:           PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
207:         }
208:         PetscCall(VecRestoreArrayRead(xin, &array));
209:       }
210:     } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
211:       /* No info */
212:     } else {
213:       if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", rank));
214:       cnt = 0;
215:       for (i = 0; i < xin->map->n; i++) {
216:         if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
217: #if defined(PETSC_USE_COMPLEX)
218:         if (PetscImaginaryPart(xarray[i]) > 0.0) {
219:           PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(xarray[i]), (double)PetscImaginaryPart(xarray[i])));
220:         } else if (PetscImaginaryPart(xarray[i]) < 0.0) {
221:           PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(xarray[i]), -(double)PetscImaginaryPart(xarray[i])));
222:         } else {
223:           PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(xarray[i])));
224:         }
225: #else
226:         PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)xarray[i]));
227: #endif
228:       }
229:       /* receive and print messages */
230:       for (j = 1; j < size; j++) {
231:         PetscCallMPI(MPI_Recv(values, len, MPIU_SCALAR, j, tag, PetscObjectComm((PetscObject)xin), &status));
232:         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &n));
233:         if (format != PETSC_VIEWER_ASCII_COMMON) PetscCall(PetscViewerASCIIPrintf(viewer, "Process [%d]\n", j));
234:         for (i = 0; i < n; i++) {
235:           if (format == PETSC_VIEWER_ASCII_INDEX) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT ": ", cnt++));
236: #if defined(PETSC_USE_COMPLEX)
237:           if (PetscImaginaryPart(values[i]) > 0.0) {
238:             PetscCall(PetscViewerASCIIPrintf(viewer, "%g + %g i\n", (double)PetscRealPart(values[i]), (double)PetscImaginaryPart(values[i])));
239:           } else if (PetscImaginaryPart(values[i]) < 0.0) {
240:             PetscCall(PetscViewerASCIIPrintf(viewer, "%g - %g i\n", (double)PetscRealPart(values[i]), -(double)PetscImaginaryPart(values[i])));
241:           } else {
242:             PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)PetscRealPart(values[i])));
243:           }
244: #else
245:           PetscCall(PetscViewerASCIIPrintf(viewer, "%g\n", (double)values[i]));
246: #endif
247:         }
248:       }
249:     }
250:     PetscCall(PetscFree(values));
251:   } else {
252:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
253:       /* Rank 0 is not trying to receive anything, so don't send anything */
254:     } else {
255:       if (format == PETSC_VIEWER_ASCII_MATLAB) {
256:         /* this may be a collective operation so make sure everyone calls it */
257:         PetscCall(PetscObjectGetName((PetscObject)xin, &name));
258:       }
259:       PetscCallMPI(MPIU_Send((void *)xarray, xin->map->n, MPIU_SCALAR, 0, tag, PetscObjectComm((PetscObject)xin)));
260:     }
261:   }
262:   PetscCall(PetscViewerFlush(viewer));
263:   PetscCall(VecRestoreArrayRead(xin, &xarray));
264:   PetscFunctionReturn(PETSC_SUCCESS);
265: }

267: PetscErrorCode VecView_MPI_Binary(Vec xin, PetscViewer viewer)
268: {
269:   return VecView_Binary(xin, viewer);
270: }

272: #include <petscdraw.h>
273: PetscErrorCode VecView_MPI_Draw_LG(Vec xin, PetscViewer viewer)
274: {
275:   PetscDraw          draw;
276:   PetscBool          isnull;
277:   PetscDrawLG        lg;
278:   PetscMPIInt        i, size, rank, n, N, *lens = NULL, *disp = NULL;
279:   PetscReal         *values, *xx = NULL, *yy = NULL;
280:   const PetscScalar *xarray;
281:   int                colors[] = {PETSC_DRAW_RED};

283:   PetscFunctionBegin;
284:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
285:   PetscCall(PetscDrawIsNull(draw, &isnull));
286:   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
287:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
288:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
289:   PetscCall(PetscMPIIntCast(xin->map->n, &n));
290:   PetscCall(PetscMPIIntCast(xin->map->N, &N));

292:   PetscCall(VecGetArrayRead(xin, &xarray));
293: #if defined(PETSC_USE_COMPLEX)
294:   PetscCall(PetscMalloc1(n + 1, &values));
295:   for (i = 0; i < n; i++) values[i] = PetscRealPart(xarray[i]);
296: #else
297:   values = (PetscReal *)xarray;
298: #endif
299:   if (rank == 0) {
300:     PetscCall(PetscMalloc2(N, &xx, N, &yy));
301:     for (i = 0; i < N; i++) xx[i] = (PetscReal)i;
302:     PetscCall(PetscMalloc2(size, &lens, size, &disp));
303:     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i + 1] - xin->map->range[i], &lens[i]));
304:     for (i = 0; i < size; i++) PetscCall(PetscMPIIntCast(xin->map->range[i], &disp[i]));
305:   }
306:   PetscCallMPI(MPI_Gatherv(values, n, MPIU_REAL, yy, lens, disp, MPIU_REAL, 0, PetscObjectComm((PetscObject)xin)));
307:   PetscCall(PetscFree2(lens, disp));
308: #if defined(PETSC_USE_COMPLEX)
309:   PetscCall(PetscFree(values));
310: #endif
311:   PetscCall(VecRestoreArrayRead(xin, &xarray));

313:   PetscCall(PetscViewerDrawGetDrawLG(viewer, 0, &lg));
314:   PetscCall(PetscDrawLGReset(lg));
315:   PetscCall(PetscDrawLGSetDimension(lg, 1));
316:   PetscCall(PetscDrawLGSetColors(lg, colors));
317:   if (rank == 0) {
318:     PetscCall(PetscDrawLGAddPoints(lg, N, &xx, &yy));
319:     PetscCall(PetscFree2(xx, yy));
320:   }
321:   PetscCall(PetscDrawLGDraw(lg));
322:   PetscCall(PetscDrawLGSave(lg));
323:   PetscFunctionReturn(PETSC_SUCCESS);
324: }

326: PETSC_INTERN PetscErrorCode VecView_MPI_Draw(Vec xin, PetscViewer viewer)
327: {
328:   PetscMPIInt        rank, size, tag = ((PetscObject)viewer)->tag;
329:   PetscInt           i, start, end;
330:   MPI_Status         status;
331:   PetscReal          min, max, tmp = 0.0;
332:   PetscDraw          draw;
333:   PetscBool          isnull;
334:   PetscDrawAxis      axis;
335:   const PetscScalar *xarray;

337:   PetscFunctionBegin;
338:   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
339:   PetscCall(PetscDrawIsNull(draw, &isnull));
340:   if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
341:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
342:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));

344:   PetscCall(VecMin(xin, NULL, &min));
345:   PetscCall(VecMax(xin, NULL, &max));
346:   if (min == max) {
347:     min -= 1.e-5;
348:     max += 1.e-5;
349:   }

351:   PetscCall(PetscDrawCheckResizedWindow(draw));
352:   PetscCall(PetscDrawClear(draw));

354:   PetscCall(PetscDrawAxisCreate(draw, &axis));
355:   PetscCall(PetscDrawAxisSetLimits(axis, 0.0, (PetscReal)xin->map->N, min, max));
356:   PetscCall(PetscDrawAxisDraw(axis));
357:   PetscCall(PetscDrawAxisDestroy(&axis));

359:   /* draw local part of vector */
360:   PetscCall(VecGetArrayRead(xin, &xarray));
361:   PetscCall(VecGetOwnershipRange(xin, &start, &end));
362:   if (rank < size - 1) { /* send value to right */
363:     PetscCallMPI(MPI_Send((void *)&xarray[xin->map->n - 1], 1, MPIU_REAL, rank + 1, tag, PetscObjectComm((PetscObject)xin)));
364:   }
365:   if (rank) { /* receive value from right */
366:     PetscCallMPI(MPI_Recv(&tmp, 1, MPIU_REAL, rank - 1, tag, PetscObjectComm((PetscObject)xin), &status));
367:   }
368:   PetscDrawCollectiveBegin(draw);
369:   if (rank) PetscCall(PetscDrawLine(draw, (PetscReal)start - 1, tmp, (PetscReal)start, PetscRealPart(xarray[0]), PETSC_DRAW_RED));
370:   for (i = 1; i < xin->map->n; i++) PetscCall(PetscDrawLine(draw, (PetscReal)(i - 1 + start), PetscRealPart(xarray[i - 1]), (PetscReal)(i + start), PetscRealPart(xarray[i]), PETSC_DRAW_RED));
371:   PetscDrawCollectiveEnd(draw);
372:   PetscCall(VecRestoreArrayRead(xin, &xarray));

374:   PetscCall(PetscDrawFlush(draw));
375:   PetscCall(PetscDrawPause(draw));
376:   PetscCall(PetscDrawSave(draw));
377:   PetscFunctionReturn(PETSC_SUCCESS);
378: }

380: #if defined(PETSC_HAVE_MATLAB)
381: PetscErrorCode VecView_MPI_Matlab(Vec xin, PetscViewer viewer)
382: {
383:   PetscMPIInt        rank, size, *lens;
384:   PetscInt           i, N = xin->map->N;
385:   const PetscScalar *xarray;
386:   PetscScalar       *xx;

388:   PetscFunctionBegin;
389:   PetscCall(VecGetArrayRead(xin, &xarray));
390:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)xin), &rank));
391:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
392:   if (rank == 0) {
393:     PetscCall(PetscMalloc1(N, &xx));
394:     PetscCall(PetscMalloc1(size, &lens));
395:     for (i = 0; i < size; i++) lens[i] = xin->map->range[i + 1] - xin->map->range[i];

397:     PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, xx, lens, xin->map->range, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
398:     PetscCall(PetscFree(lens));

400:     PetscCall(PetscObjectName((PetscObject)xin));
401:     PetscCall(PetscViewerMatlabPutArray(viewer, N, 1, xx, ((PetscObject)xin)->name));

403:     PetscCall(PetscFree(xx));
404:   } else {
405:     PetscCallMPI(MPI_Gatherv((void *)xarray, xin->map->n, MPIU_SCALAR, 0, 0, 0, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)xin)));
406:   }
407:   PetscCall(VecRestoreArrayRead(xin, &xarray));
408:   PetscFunctionReturn(PETSC_SUCCESS);
409: }
410: #endif

412: #if defined(PETSC_HAVE_ADIOS)
413:   #include <adios.h>
414:   #include <adios_read.h>
415: #include <petsc/private/vieweradiosimpl.h>
416: #include <petsc/private/viewerimpl.h>

418: PetscErrorCode VecView_MPI_ADIOS(Vec xin, PetscViewer viewer)
419: {
420:   PetscViewer_ADIOS *adios = (PetscViewer_ADIOS *)viewer->data;
421:   const char        *vecname;
422:   int64_t            id;
423:   PetscInt           n, N, rstart;
424:   const PetscScalar *array;
425:   char               nglobalname[16], nlocalname[16], coffset[16];

427:   PetscFunctionBegin;
428:   PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));

430:   PetscCall(VecGetLocalSize(xin, &n));
431:   PetscCall(VecGetSize(xin, &N));
432:   PetscCall(VecGetOwnershipRange(xin, &rstart, NULL));

434:   PetscCall(PetscSNPrintf(nlocalname, PETSC_STATIC_ARRAY_LENGTH(nlocalname), "%" PetscInt_FMT, n));
435:   PetscCall(PetscSNPrintf(nglobalname, PETSC_STATIC_ARRAY_LENGTH(nglobalname), "%" PetscInt_FMT, N));
436:   PetscCall(PetscSNPrintf(coffset, PETSC_STATIC_ARRAY_LENGTH(coffset), "%" PetscInt_FMT, rstart));
437:   id = adios_define_var(Petsc_adios_group, vecname, "", adios_double, nlocalname, nglobalname, coffset);
438:   PetscCall(VecGetArrayRead(xin, &array));
439:   PetscCallExternal(adios_write_byid, adios->adios_handle, id, array);
440:   PetscCall(VecRestoreArrayRead(xin, &array));
441:   PetscFunctionReturn(PETSC_SUCCESS);
442: }
443: #endif

445: #if defined(PETSC_HAVE_HDF5)
446: PetscErrorCode VecView_MPI_HDF5(Vec xin, PetscViewer viewer)
447: {
448:   PetscViewer_HDF5 *hdf5 = (PetscViewer_HDF5 *)viewer->data;
449:   /* TODO: It looks like we can remove the H5Sclose(filespace) and H5Dget_space(dset_id). Why do we do this? */
450:   hid_t              filespace;  /* file dataspace identifier */
451:   hid_t              chunkspace; /* chunk dataset property identifier */
452:   hid_t              dset_id;    /* dataset identifier */
453:   hid_t              memspace;   /* memory dataspace identifier */
454:   hid_t              file_id;
455:   hid_t              group;
456:   hid_t              memscalartype;  /* scalar type for mem (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
457:   hid_t              filescalartype; /* scalar type for file (H5T_NATIVE_FLOAT or H5T_NATIVE_DOUBLE) */
458:   PetscInt           bs = xin->map->bs;
459:   hsize_t            dim;
460:   hsize_t            maxDims[4], dims[4], chunkDims[4], count[4], offset[4];
461:   PetscBool          timestepping, dim2, spoutput;
462:   PetscInt           timestep = PETSC_INT_MIN, low;
463:   hsize_t            chunksize;
464:   const PetscScalar *x;
465:   const char        *vecname;
466:   size_t             len;

468:   PetscFunctionBegin;
469:   PetscCall(PetscViewerHDF5OpenGroup(viewer, NULL, &file_id, &group));
470:   PetscCall(PetscViewerHDF5IsTimestepping(viewer, &timestepping));
471:   if (timestepping) PetscCall(PetscViewerHDF5GetTimestep(viewer, &timestep));
472:   PetscCall(PetscViewerHDF5GetBaseDimension2(viewer, &dim2));
473:   PetscCall(PetscViewerHDF5GetSPOutput(viewer, &spoutput));

475:   /* Create the dataspace for the dataset.
476:    *
477:    * dims - holds the current dimensions of the dataset
478:    *
479:    * maxDims - holds the maximum dimensions of the dataset (unlimited
480:    * for the number of time steps with the current dimensions for the
481:    * other dimensions; so only additional time steps can be added).
482:    *
483:    * chunkDims - holds the size of a single time step (required to
484:    * permit extending dataset).
485:    */
486:   dim       = 0;
487:   chunksize = 1;
488:   if (timestep >= 0) {
489:     dims[dim]      = timestep + 1;
490:     maxDims[dim]   = H5S_UNLIMITED;
491:     chunkDims[dim] = 1;
492:     ++dim;
493:   }
494:   PetscCall(PetscHDF5IntCast(xin->map->N / bs, dims + dim));

496:   maxDims[dim]   = dims[dim];
497:   chunkDims[dim] = PetscMax(1, dims[dim]);
498:   chunksize *= chunkDims[dim];
499:   ++dim;
500:   if (bs > 1 || dim2) {
501:     dims[dim]      = bs;
502:     maxDims[dim]   = dims[dim];
503:     chunkDims[dim] = PetscMax(1, dims[dim]);
504:     chunksize *= chunkDims[dim];
505:     ++dim;
506:   }
507:   #if defined(PETSC_USE_COMPLEX)
508:   dims[dim]      = 2;
509:   maxDims[dim]   = dims[dim];
510:   chunkDims[dim] = PetscMax(1, dims[dim]);
511:   chunksize *= chunkDims[dim];
512:   /* hdf5 chunks must be less than 4GB */
513:   if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
514:     if (bs > 1 || dim2) {
515:       if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
516:       if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 128));
517:     } else {
518:       chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 128;
519:     }
520:   }
521:   ++dim;
522:   #else
523:   /* hdf5 chunks must be less than 4GB */
524:   if (chunksize > PETSC_HDF5_MAX_CHUNKSIZE / 64) {
525:     if (bs > 1 || dim2) {
526:       if (chunkDims[dim - 2] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 2] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
527:       if (chunkDims[dim - 1] > (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64))) chunkDims[dim - 1] = (hsize_t)PetscSqrtReal((PetscReal)(PETSC_HDF5_MAX_CHUNKSIZE / 64));
528:     } else {
529:       chunkDims[dim - 1] = PETSC_HDF5_MAX_CHUNKSIZE / 64;
530:     }
531:   }
532:   #endif

534:   PetscCallHDF5Return(filespace, H5Screate_simple, ((int)dim, dims, maxDims));

536:   #if defined(PETSC_USE_REAL_SINGLE)
537:   memscalartype  = H5T_NATIVE_FLOAT;
538:   filescalartype = H5T_NATIVE_FLOAT;
539:   #elif defined(PETSC_USE_REAL___FLOAT128)
540:     #error "HDF5 output with 128 bit floats not supported."
541:   #elif defined(PETSC_USE_REAL___FP16)
542:     #error "HDF5 output with 16 bit floats not supported."
543:   #else
544:   memscalartype = H5T_NATIVE_DOUBLE;
545:   if (spoutput == PETSC_TRUE) filescalartype = H5T_NATIVE_FLOAT;
546:   else filescalartype = H5T_NATIVE_DOUBLE;
547:   #endif

549:   /* Create the dataset with default properties and close filespace */
550:   PetscCall(PetscObjectGetName((PetscObject)xin, &vecname));
551:   PetscCall(PetscStrlen(vecname, &len));
552:   PetscCheck(len, PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Object must be named");
553:   if (H5Lexists(group, vecname, H5P_DEFAULT) < 1) {
554:     /* Create chunk */
555:     PetscCallHDF5Return(chunkspace, H5Pcreate, (H5P_DATASET_CREATE));
556:     PetscCallHDF5(H5Pset_chunk, (chunkspace, (int)dim, chunkDims));

558:     PetscCallHDF5Return(dset_id, H5Dcreate2, (group, vecname, filescalartype, filespace, H5P_DEFAULT, chunkspace, H5P_DEFAULT));
559:     PetscCallHDF5(H5Pclose, (chunkspace));
560:   } else {
561:     PetscCallHDF5Return(dset_id, H5Dopen2, (group, vecname, H5P_DEFAULT));
562:     PetscCallHDF5(H5Dset_extent, (dset_id, dims));
563:   }
564:   PetscCallHDF5(H5Sclose, (filespace));

566:   /* Each process defines a dataset and writes it to the hyperslab in the file */
567:   dim = 0;
568:   if (timestep >= 0) {
569:     count[dim] = 1;
570:     ++dim;
571:   }
572:   PetscCall(PetscHDF5IntCast(xin->map->n / bs, count + dim));
573:   ++dim;
574:   if (bs > 1 || dim2) {
575:     count[dim] = bs;
576:     ++dim;
577:   }
578:   #if defined(PETSC_USE_COMPLEX)
579:   count[dim] = 2;
580:   ++dim;
581:   #endif
582:   if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
583:     PetscCallHDF5Return(memspace, H5Screate_simple, ((int)dim, count, NULL));
584:   } else {
585:     /* Can't create dataspace with zero for any dimension, so create null dataspace. */
586:     PetscCallHDF5Return(memspace, H5Screate, (H5S_NULL));
587:   }

589:   /* Select hyperslab in the file */
590:   PetscCall(VecGetOwnershipRange(xin, &low, NULL));
591:   dim = 0;
592:   if (timestep >= 0) {
593:     offset[dim] = timestep;
594:     ++dim;
595:   }
596:   PetscCall(PetscHDF5IntCast(low / bs, offset + dim));
597:   ++dim;
598:   if (bs > 1 || dim2) {
599:     offset[dim] = 0;
600:     ++dim;
601:   }
602:   #if defined(PETSC_USE_COMPLEX)
603:   offset[dim] = 0;
604:   ++dim;
605:   #endif
606:   if (xin->map->n > 0 || H5_VERSION_GE(1, 10, 0)) {
607:     PetscCallHDF5Return(filespace, H5Dget_space, (dset_id));
608:     PetscCallHDF5(H5Sselect_hyperslab, (filespace, H5S_SELECT_SET, offset, NULL, count, NULL));
609:   } else {
610:     /* Create null filespace to match null memspace. */
611:     PetscCallHDF5Return(filespace, H5Screate, (H5S_NULL));
612:   }

614:   PetscCall(VecGetArrayRead(xin, &x));
615:   PetscCallHDF5(H5Dwrite, (dset_id, memscalartype, memspace, filespace, hdf5->dxpl_id, x));
616:   PetscCallHDF5(H5Fflush, (file_id, H5F_SCOPE_GLOBAL));
617:   PetscCall(VecRestoreArrayRead(xin, &x));

619:   /* Close/release resources */
620:   PetscCallHDF5(H5Gclose, (group));
621:   PetscCallHDF5(H5Sclose, (filespace));
622:   PetscCallHDF5(H5Sclose, (memspace));
623:   PetscCallHDF5(H5Dclose, (dset_id));

625:   #if defined(PETSC_USE_COMPLEX)
626:   {
627:     PetscBool tru = PETSC_TRUE;
628:     PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "complex", PETSC_BOOL, &tru));
629:   }
630:   #endif
631:   if (timestepping) PetscCall(PetscViewerHDF5WriteObjectAttribute(viewer, (PetscObject)xin, "timestepping", PETSC_BOOL, &timestepping));
632:   PetscCall(PetscInfo(xin, "Wrote Vec object with name %s\n", vecname));
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }
635: #endif

637: PetscErrorCode VecView_MPI(Vec xin, PetscViewer viewer)
638: {
639:   PetscBool iascii, isbinary, isdraw;
640: #if defined(PETSC_HAVE_MATHEMATICA)
641:   PetscBool ismathematica;
642: #endif
643: #if defined(PETSC_HAVE_HDF5)
644:   PetscBool ishdf5;
645: #endif
646: #if defined(PETSC_HAVE_MATLAB)
647:   PetscBool ismatlab;
648: #endif
649: #if defined(PETSC_HAVE_ADIOS)
650:   PetscBool isadios;
651: #endif
652:   PetscBool isglvis;

654:   PetscFunctionBegin;
655:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
656:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
657:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
658: #if defined(PETSC_HAVE_MATHEMATICA)
659:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATHEMATICA, &ismathematica));
660: #endif
661: #if defined(PETSC_HAVE_HDF5)
662:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
663: #endif
664: #if defined(PETSC_HAVE_MATLAB)
665:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERMATLAB, &ismatlab));
666: #endif
667:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERGLVIS, &isglvis));
668: #if defined(PETSC_HAVE_ADIOS)
669:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERADIOS, &isadios));
670: #endif
671:   if (iascii) {
672:     PetscCall(VecView_MPI_ASCII(xin, viewer));
673:   } else if (isbinary) {
674:     PetscCall(VecView_MPI_Binary(xin, viewer));
675:   } else if (isdraw) {
676:     PetscViewerFormat format;
677:     PetscCall(PetscViewerGetFormat(viewer, &format));
678:     if (format == PETSC_VIEWER_DRAW_LG) {
679:       PetscCall(VecView_MPI_Draw_LG(xin, viewer));
680:     } else {
681:       PetscCall(VecView_MPI_Draw(xin, viewer));
682:     }
683: #if defined(PETSC_HAVE_MATHEMATICA)
684:   } else if (ismathematica) {
685:     PetscCall(PetscViewerMathematicaPutVector(viewer, xin));
686: #endif
687: #if defined(PETSC_HAVE_HDF5)
688:   } else if (ishdf5) {
689:     PetscCall(VecView_MPI_HDF5(xin, viewer));
690: #endif
691: #if defined(PETSC_HAVE_ADIOS)
692:   } else if (isadios) {
693:     PetscCall(VecView_MPI_ADIOS(xin, viewer));
694: #endif
695: #if defined(PETSC_HAVE_MATLAB)
696:   } else if (ismatlab) {
697:     PetscCall(VecView_MPI_Matlab(xin, viewer));
698: #endif
699:   } else if (isglvis) PetscCall(VecView_GLVis(xin, viewer));
700:   PetscFunctionReturn(PETSC_SUCCESS);
701: }

703: PetscErrorCode VecGetSize_MPI(Vec xin, PetscInt *N)
704: {
705:   PetscFunctionBegin;
706:   *N = xin->map->N;
707:   PetscFunctionReturn(PETSC_SUCCESS);
708: }

710: PetscErrorCode VecGetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], PetscScalar y[])
711: {
712:   const PetscScalar *xx;
713:   const PetscInt     start = xin->map->range[xin->stash.rank];

715:   PetscFunctionBegin;
716:   PetscCall(VecGetArrayRead(xin, &xx));
717:   for (PetscInt i = 0; i < ni; i++) {
718:     if (xin->stash.ignorenegidx && ix[i] < 0) continue;
719:     const PetscInt tmp = ix[i] - start;

721:     PetscCheck(tmp >= 0 && tmp < xin->map->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Can only get local values, trying %" PetscInt_FMT, ix[i]);
722:     y[i] = xx[tmp];
723:   }
724:   PetscCall(VecRestoreArrayRead(xin, &xx));
725:   PetscFunctionReturn(PETSC_SUCCESS);
726: }

728: PetscErrorCode VecSetValues_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar y[], InsertMode addv)
729: {
730:   const PetscBool   ignorenegidx = xin->stash.ignorenegidx;
731:   const PetscBool   donotstash   = xin->stash.donotstash;
732:   const PetscMPIInt rank         = xin->stash.rank;
733:   const PetscInt   *owners       = xin->map->range;
734:   const PetscInt    start = owners[rank], end = owners[rank + 1];
735:   PetscScalar      *xx;

737:   PetscFunctionBegin;
738:   if (PetscDefined(USE_DEBUG)) {
739:     PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
740:     PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
741:   }
742:   PetscCall(VecGetArray(xin, &xx));
743:   xin->stash.insertmode = addv;
744:   for (PetscInt i = 0; i < ni; ++i) {
745:     PetscInt row;

747:     if (ignorenegidx && ix[i] < 0) continue;
748:     PetscCheck(ix[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT " cannot be negative", ix[i]);
749:     if ((row = ix[i]) >= start && row < end) {
750:       if (addv == INSERT_VALUES) {
751:         xx[row - start] = y[i];
752:       } else {
753:         xx[row - start] += y[i];
754:       }
755:     } else if (!donotstash) {
756:       PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
757:       PetscCall(VecStashValue_Private(&xin->stash, row, y[i]));
758:     }
759:   }
760:   PetscCall(VecRestoreArray(xin, &xx));
761:   PetscFunctionReturn(PETSC_SUCCESS);
762: }

764: PetscErrorCode VecSetValuesBlocked_MPI(Vec xin, PetscInt ni, const PetscInt ix[], const PetscScalar yin[], InsertMode addv)
765: {
766:   PetscMPIInt  rank   = xin->stash.rank;
767:   PetscInt    *owners = xin->map->range, start = owners[rank];
768:   PetscInt     end = owners[rank + 1], i, row, bs = xin->map->bs, j;
769:   PetscScalar *xx, *y = (PetscScalar *)yin;

771:   PetscFunctionBegin;
772:   PetscCall(VecGetArray(xin, &xx));
773:   if (PetscDefined(USE_DEBUG)) {
774:     PetscCheck(xin->stash.insertmode != INSERT_VALUES || addv != ADD_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already inserted values; you cannot now add");
775:     PetscCheck(xin->stash.insertmode != ADD_VALUES || addv != INSERT_VALUES, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "You have already added values; you cannot now insert");
776:   }
777:   xin->stash.insertmode = addv;

779:   if (addv == INSERT_VALUES) {
780:     for (i = 0; i < ni; i++) {
781:       if ((row = bs * ix[i]) >= start && row < end) {
782:         for (j = 0; j < bs; j++) xx[row - start + j] = y[j];
783:       } else if (!xin->stash.donotstash) {
784:         if (ix[i] < 0) {
785:           y += bs;
786:           continue;
787:         }
788:         PetscCheck(ix[i] < xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
789:         PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
790:       }
791:       y += bs;
792:     }
793:   } else {
794:     for (i = 0; i < ni; i++) {
795:       if ((row = bs * ix[i]) >= start && row < end) {
796:         for (j = 0; j < bs; j++) xx[row - start + j] += y[j];
797:       } else if (!xin->stash.donotstash) {
798:         if (ix[i] < 0) {
799:           y += bs;
800:           continue;
801:         }
802:         PetscCheck(ix[i] <= xin->map->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Out of range index value %" PetscInt_FMT ", should be less than %" PetscInt_FMT, ix[i], xin->map->N);
803:         PetscCall(VecStashValuesBlocked_Private(&xin->bstash, ix[i], y));
804:       }
805:       y += bs;
806:     }
807:   }
808:   PetscCall(VecRestoreArray(xin, &xx));
809:   PetscFunctionReturn(PETSC_SUCCESS);
810: }

812: /*
813:    Since nsends or nreceives may be zero we add 1 in certain mallocs
814: to make sure we never malloc an empty one.
815: */
816: PetscErrorCode VecAssemblyBegin_MPI(Vec xin)
817: {
818:   PetscInt   *owners = xin->map->range, *bowners, i, bs, nstash, reallocs;
819:   PetscMPIInt size;
820:   InsertMode  addv;
821:   MPI_Comm    comm;

823:   PetscFunctionBegin;
824:   PetscCall(PetscObjectGetComm((PetscObject)xin, &comm));
825:   if (xin->stash.donotstash) PetscFunctionReturn(PETSC_SUCCESS);

827:   PetscCallMPI(MPIU_Allreduce((PetscEnum *)&xin->stash.insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, comm));
828:   PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), comm, PETSC_ERR_ARG_NOTSAMETYPE, "Some processors inserted values while others added");
829:   xin->stash.insertmode  = addv; /* in case this processor had no cache */
830:   xin->bstash.insertmode = addv; /* Block stash implicitly tracks InsertMode of scalar stash */

832:   PetscCall(VecGetBlockSize(xin, &bs));
833:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)xin), &size));
834:   if (!xin->bstash.bowners && xin->map->bs != -1) {
835:     PetscCall(PetscMalloc1(size + 1, &bowners));
836:     for (i = 0; i < size + 1; i++) bowners[i] = owners[i] / bs;
837:     xin->bstash.bowners = bowners;
838:   } else bowners = xin->bstash.bowners;

840:   PetscCall(VecStashScatterBegin_Private(&xin->stash, owners));
841:   PetscCall(VecStashScatterBegin_Private(&xin->bstash, bowners));
842:   PetscCall(VecStashGetInfo_Private(&xin->stash, &nstash, &reallocs));
843:   PetscCall(PetscInfo(xin, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
844:   PetscCall(VecStashGetInfo_Private(&xin->bstash, &nstash, &reallocs));
845:   PetscCall(PetscInfo(xin, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
846:   PetscFunctionReturn(PETSC_SUCCESS);
847: }

849: PetscErrorCode VecAssemblyEnd_MPI(Vec vec)
850: {
851:   PetscInt     base, i, j, *row, flg, bs;
852:   PetscMPIInt  n;
853:   PetscScalar *val, *vv, *array, *xarray;

855:   PetscFunctionBegin;
856:   if (!vec->stash.donotstash) {
857:     PetscCall(VecGetArray(vec, &xarray));
858:     PetscCall(VecGetBlockSize(vec, &bs));
859:     base = vec->map->range[vec->stash.rank];

861:     /* Process the stash */
862:     while (1) {
863:       PetscCall(VecStashScatterGetMesg_Private(&vec->stash, &n, &row, &val, &flg));
864:       if (!flg) break;
865:       if (vec->stash.insertmode == ADD_VALUES) {
866:         for (i = 0; i < n; i++) xarray[row[i] - base] += val[i];
867:       } else if (vec->stash.insertmode == INSERT_VALUES) {
868:         for (i = 0; i < n; i++) xarray[row[i] - base] = val[i];
869:       } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
870:     }
871:     PetscCall(VecStashScatterEnd_Private(&vec->stash));

873:     /* now process the block-stash */
874:     while (1) {
875:       PetscCall(VecStashScatterGetMesg_Private(&vec->bstash, &n, &row, &val, &flg));
876:       if (!flg) break;
877:       for (i = 0; i < n; i++) {
878:         array = xarray + row[i] * bs - base;
879:         vv    = val + i * bs;
880:         if (vec->stash.insertmode == ADD_VALUES) {
881:           for (j = 0; j < bs; j++) array[j] += vv[j];
882:         } else if (vec->stash.insertmode == INSERT_VALUES) {
883:           for (j = 0; j < bs; j++) array[j] = vv[j];
884:         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Insert mode is not set correctly; corrupted vector");
885:       }
886:     }
887:     PetscCall(VecStashScatterEnd_Private(&vec->bstash));
888:     PetscCall(VecRestoreArray(vec, &xarray));
889:   }
890:   vec->stash.insertmode = NOT_SET_VALUES;
891:   PetscFunctionReturn(PETSC_SUCCESS);
892: }

894: PetscErrorCode VecSetPreallocationCOO_MPI(Vec x, PetscCount coo_n, const PetscInt coo_i[])
895: {
896:   PetscInt    m, M, rstart, rend;
897:   Vec_MPI    *vmpi = (Vec_MPI *)x->data;
898:   PetscCount  k, p, q, rem; /* Loop variables over coo arrays */
899:   PetscMPIInt size;
900:   MPI_Comm    comm;

902:   PetscFunctionBegin;
903:   PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
904:   PetscCallMPI(MPI_Comm_size(comm, &size));
905:   PetscCall(VecResetPreallocationCOO_MPI(x));

907:   PetscCall(PetscLayoutSetUp(x->map));
908:   PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
909:   PetscCall(VecGetLocalSize(x, &m));
910:   PetscCall(VecGetSize(x, &M));

912:   /* ---------------------------------------------------------------------------*/
913:   /* Sort COOs along with a permutation array, so that negative indices come    */
914:   /* first, then local ones, then remote ones.                                  */
915:   /* ---------------------------------------------------------------------------*/
916:   PetscCount n1 = coo_n, nneg, *perm;
917:   PetscInt  *i1; /* Copy of input COOs along with a permutation array */
918:   PetscCall(PetscMalloc1(n1, &i1));
919:   PetscCall(PetscMalloc1(n1, &perm));
920:   PetscCall(PetscArraycpy(i1, coo_i, n1)); /* Make a copy since we'll modify it */
921:   for (k = 0; k < n1; k++) perm[k] = k;

923:   /* Manipulate i1[] so that entries with negative indices will have the smallest
924:      index, local entries will have greater but negative indices, and remote entries
925:      will have positive indices.
926:   */
927:   for (k = 0; k < n1; k++) {
928:     if (i1[k] < 0) {
929:       if (x->stash.ignorenegidx) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */
930:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found a negative index in VecSetPreallocateCOO() but VEC_IGNORE_NEGATIVE_INDICES was not set");
931:     } else if (i1[k] >= rstart && i1[k] < rend) {
932:       i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
933:     } else {
934:       PetscCheck(i1[k] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Found index %" PetscInt_FMT " in VecSetPreallocateCOO() larger than the global size %" PetscInt_FMT, i1[k], M);
935:       if (x->stash.donotstash) i1[k] = PETSC_INT_MIN; /* Ignore off-proc indices as if they were negative */
936:     }
937:   }

939:   /* Sort the indices, after that, [0,nneg) have ignored entries, [nneg,rem) have local entries and [rem,n1) have remote entries */
940:   PetscCall(PetscSortIntWithCountArray(n1, i1, perm));
941:   for (k = 0; k < n1; k++) {
942:     if (i1[k] > PETSC_INT_MIN) break;
943:   } /* Advance k to the first entry we need to take care of */
944:   nneg = k;
945:   PetscCall(PetscSortedIntUpperBound(i1, nneg, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
946:   for (k = nneg; k < rem; k++) i1[k] += PETSC_INT_MAX;                               /* Revert indices of local entries */

948:   /* ---------------------------------------------------------------------------*/
949:   /*           Build stuff for local entries                                    */
950:   /* ---------------------------------------------------------------------------*/
951:   PetscCount tot1, *jmap1, *perm1;
952:   PetscCall(PetscCalloc1(m + 1, &jmap1));
953:   for (k = nneg; k < rem; k++) jmap1[i1[k] - rstart + 1]++; /* Count repeats of each local entry */
954:   for (k = 0; k < m; k++) jmap1[k + 1] += jmap1[k];         /* Transform jmap1[] to CSR-like data structure */
955:   tot1 = jmap1[m];
956:   PetscAssert(tot1 == rem - nneg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected errors in VecSetPreallocationCOO_MPI");
957:   PetscCall(PetscMalloc1(tot1, &perm1));
958:   PetscCall(PetscArraycpy(perm1, perm + nneg, tot1));

960:   /* ---------------------------------------------------------------------------*/
961:   /*        Record the permutation array for filling the send buffer            */
962:   /* ---------------------------------------------------------------------------*/
963:   PetscCount *Cperm;
964:   PetscCall(PetscMalloc1(n1 - rem, &Cperm));
965:   PetscCall(PetscArraycpy(Cperm, perm + rem, n1 - rem));
966:   PetscCall(PetscFree(perm));

968:   /* ---------------------------------------------------------------------------*/
969:   /*           Send remote entries to their owner                                  */
970:   /* ---------------------------------------------------------------------------*/
971:   /* Find which entries should be sent to which remote ranks*/
972:   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
973:   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
974:   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
975:   const PetscInt *ranges;
976:   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */

978:   PetscCall(PetscLayoutGetRanges(x->map, &ranges));
979:   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
980:   for (k = rem; k < n1;) {
981:     PetscMPIInt owner;
982:     PetscInt    firstRow, lastRow;

984:     /* Locate a row range */
985:     firstRow = i1[k]; /* first row of this owner */
986:     PetscCall(PetscLayoutFindOwner(x->map, firstRow, &owner));
987:     lastRow = ranges[owner + 1] - 1; /* last row of this owner */

989:     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
990:     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));

992:     /* All entries in [k,p) belong to this remote owner */
993:     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
994:       PetscMPIInt *sendto2;
995:       PetscInt    *nentries2;
996:       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;

998:       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
999:       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
1000:       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
1001:       PetscCall(PetscFree2(sendto, nentries2));
1002:       sendto   = sendto2;
1003:       nentries = nentries2;
1004:       maxNsend = maxNsend2;
1005:     }
1006:     sendto[nsend] = owner;
1007:     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
1008:     nsend++;
1009:     k = p;
1010:   }

1012:   /* Build 1st SF to know offsets on remote to send data */
1013:   PetscSF      sf1;
1014:   PetscInt     nroots = 1, nroots2 = 0;
1015:   PetscInt     nleaves = nsend, nleaves2 = 0;
1016:   PetscInt    *offsets;
1017:   PetscSFNode *iremote;

1019:   PetscCall(PetscSFCreate(comm, &sf1));
1020:   PetscCall(PetscMalloc1(nsend, &iremote));
1021:   PetscCall(PetscMalloc1(nsend, &offsets));
1022:   for (k = 0; k < nsend; k++) {
1023:     iremote[k].rank  = sendto[k];
1024:     iremote[k].index = 0;
1025:     nleaves2 += nentries[k];
1026:     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
1027:   }
1028:   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
1029:   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
1030:   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
1031:   PetscCall(PetscSFDestroy(&sf1));
1032:   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);

1034:   /* Build 2nd SF to send remote COOs to their owner */
1035:   PetscSF sf2;
1036:   nroots  = nroots2;
1037:   nleaves = nleaves2;
1038:   PetscCall(PetscSFCreate(comm, &sf2));
1039:   PetscCall(PetscSFSetFromOptions(sf2));
1040:   PetscCall(PetscMalloc1(nleaves, &iremote));
1041:   p = 0;
1042:   for (k = 0; k < nsend; k++) {
1043:     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
1044:     for (q = 0; q < nentries[k]; q++, p++) {
1045:       iremote[p].rank = sendto[k];
1046:       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
1047:     }
1048:   }
1049:   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));

1051:   /* Send the remote COOs to their owner */
1052:   PetscInt    n2 = nroots, *i2; /* Buffers for received COOs from other ranks, along with a permutation array */
1053:   PetscCount *perm2;
1054:   PetscCall(PetscMalloc1(n2, &i2));
1055:   PetscCall(PetscMalloc1(n2, &perm2));
1056:   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
1057:   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));

1059:   PetscCall(PetscFree(i1));
1060:   PetscCall(PetscFree(offsets));
1061:   PetscCall(PetscFree2(sendto, nentries));

1063:   /* ---------------------------------------------------------------*/
1064:   /* Sort received COOs along with a permutation array            */
1065:   /* ---------------------------------------------------------------*/
1066:   PetscCount  *imap2;
1067:   PetscCount  *jmap2, nnz2;
1068:   PetscScalar *sendbuf, *recvbuf;
1069:   PetscInt     old;
1070:   PetscCount   sendlen = n1 - rem, recvlen = n2;

1072:   for (k = 0; k < n2; k++) perm2[k] = k;
1073:   PetscCall(PetscSortIntWithCountArray(n2, i2, perm2));

1075:   /* nnz2 will be # of unique entries in the recvbuf */
1076:   nnz2 = n2;
1077:   for (k = 1; k < n2; k++) {
1078:     if (i2[k] == i2[k - 1]) nnz2--;
1079:   }

1081:   /* Build imap2[] and jmap2[] for each unique entry */
1082:   PetscCall(PetscMalloc4(nnz2, &imap2, nnz2 + 1, &jmap2, sendlen, &sendbuf, recvlen, &recvbuf));
1083:   p        = -1;
1084:   old      = -1;
1085:   jmap2[0] = 0;
1086:   jmap2++;
1087:   for (k = 0; k < n2; k++) {
1088:     if (i2[k] != old) { /* Meet a new entry */
1089:       p++;
1090:       imap2[p] = i2[k] - rstart;
1091:       jmap2[p] = 1;
1092:       old      = i2[k];
1093:     } else {
1094:       jmap2[p]++;
1095:     }
1096:   }
1097:   jmap2--;
1098:   for (k = 0; k < nnz2; k++) jmap2[k + 1] += jmap2[k];

1100:   PetscCall(PetscFree(i2));

1102:   vmpi->coo_n = coo_n;
1103:   vmpi->tot1  = tot1;
1104:   vmpi->jmap1 = jmap1;
1105:   vmpi->perm1 = perm1;
1106:   vmpi->nnz2  = nnz2;
1107:   vmpi->imap2 = imap2;
1108:   vmpi->jmap2 = jmap2;
1109:   vmpi->perm2 = perm2;

1111:   vmpi->Cperm   = Cperm;
1112:   vmpi->sendbuf = sendbuf;
1113:   vmpi->recvbuf = recvbuf;
1114:   vmpi->sendlen = sendlen;
1115:   vmpi->recvlen = recvlen;
1116:   vmpi->coo_sf  = sf2;
1117:   PetscFunctionReturn(PETSC_SUCCESS);
1118: }

1120: PetscErrorCode VecSetValuesCOO_MPI(Vec x, const PetscScalar v[], InsertMode imode)
1121: {
1122:   Vec_MPI          *vmpi = (Vec_MPI *)x->data;
1123:   PetscInt          m;
1124:   PetscScalar      *a, *sendbuf = vmpi->sendbuf, *recvbuf = vmpi->recvbuf;
1125:   const PetscCount *jmap1 = vmpi->jmap1;
1126:   const PetscCount *perm1 = vmpi->perm1;
1127:   const PetscCount *imap2 = vmpi->imap2;
1128:   const PetscCount *jmap2 = vmpi->jmap2;
1129:   const PetscCount *perm2 = vmpi->perm2;
1130:   const PetscCount *Cperm = vmpi->Cperm;
1131:   const PetscCount  nnz2  = vmpi->nnz2;

1133:   PetscFunctionBegin;
1134:   PetscCall(VecGetLocalSize(x, &m));
1135:   PetscCall(VecGetArray(x, &a));

1137:   /* Pack entries to be sent to remote */
1138:   for (PetscInt i = 0; i < vmpi->sendlen; i++) sendbuf[i] = v[Cperm[i]];

1140:   /* Send remote entries to their owner and overlap the communication with local computation */
1141:   PetscCall(PetscSFReduceWithMemTypeBegin(vmpi->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
1142:   /* Add local entries to A and B */
1143:   for (PetscInt i = 0; i < m; i++) { /* All entries in a[] are either zero'ed or added with a value (i.e., initialized) */
1144:     PetscScalar sum = 0.0;           /* Do partial summation first to improve numerical stability */
1145:     for (PetscCount k = jmap1[i]; k < jmap1[i + 1]; k++) sum += v[perm1[k]];
1146:     a[i] = (imode == INSERT_VALUES ? 0.0 : a[i]) + sum;
1147:   }
1148:   PetscCall(PetscSFReduceEnd(vmpi->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));

1150:   /* Add received remote entries to A and B */
1151:   for (PetscInt i = 0; i < nnz2; i++) {
1152:     for (PetscCount k = jmap2[i]; k < jmap2[i + 1]; k++) a[imap2[i]] += recvbuf[perm2[k]];
1153:   }

1155:   PetscCall(VecRestoreArray(x, &a));
1156:   PetscFunctionReturn(PETSC_SUCCESS);
1157: }