Actual source code: fdda.c


  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>
  4: #include <petscbt.h>

  6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 11: /*
 12:    For ghost i that may be negative or greater than the upper bound this
 13:   maps it into the 0:m-1 range using periodicity
 14: */
 15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 18: {
 19:   PetscInt i, j, nz, *fill;

 21:   PetscFunctionBegin;
 22:   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);

 24:   /* count number nonzeros */
 25:   nz = 0;
 26:   for (i = 0; i < w; i++) {
 27:     for (j = 0; j < w; j++) {
 28:       if (dfill[w * i + j]) nz++;
 29:     }
 30:   }
 31:   PetscCall(PetscMalloc1(nz + w + 1, &fill));
 32:   /* construct modified CSR storage of nonzero structure */
 33:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 34:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 35:   nz = w + 1;
 36:   for (i = 0; i < w; i++) {
 37:     fill[i] = nz;
 38:     for (j = 0; j < w; j++) {
 39:       if (dfill[w * i + j]) {
 40:         fill[nz] = j;
 41:         nz++;
 42:       }
 43:     }
 44:   }
 45:   fill[w] = nz;

 47:   *rfill = fill;
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 52: {
 53:   PetscInt nz;

 55:   PetscFunctionBegin;
 56:   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);

 58:   /* Determine number of non-zeros */
 59:   nz = (dfillsparse[w] - w - 1);

 61:   /* Allocate space for our copy of the given sparse matrix representation. */
 62:   PetscCall(PetscMalloc1(nz + w + 1, rfill));
 63:   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 68: {
 69:   PetscInt i, k, cnt = 1;

 71:   PetscFunctionBegin;

 73:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 74:    columns to the left with any nonzeros in them plus 1 */
 75:   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
 76:   for (i = 0; i < dd->w; i++) {
 77:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 78:   }
 79:   for (i = 0; i < dd->w; i++) {
 80:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 81:   }
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: /*@
 86:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 87:     of the matrix returned by `DMCreateMatrix()`.

 89:     Logically Collective

 91:     Input Parameters:
 92: +   da - the distributed array
 93: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
 94: -   ofill - the fill pattern in the off-diagonal blocks

 96:     Level: developer

 98:     Notes:
 99:     This only makes sense when you are doing multicomponent problems but using the
100:        `MATMPIAIJ` matrix format

102:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103:        representing coupling and 0 entries for missing coupling. For example
104: .vb
105:             dfill[9] = {1, 0, 0,
106:                         1, 1, 0,
107:                         0, 1, 1}
108: .ve
109:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111:        diagonal block).

113:      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114:      can be represented in the dfill, ofill format

116:    Contributed by Glenn Hammond

118: .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119: @*/
120: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121: {
122:   DM_DA *dd = (DM_DA *)da->data;

124:   PetscFunctionBegin;
125:   /* save the given dfill and ofill information */
126:   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127:   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));

129:   /* count nonzeros in ofill columns */
130:   PetscCall(DMDASetBlockFills_Private2(dd));
131:   PetscFunctionReturn(PETSC_SUCCESS);
132: }

134: /*@
135:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136:     of the matrix returned by `DMCreateMatrix()`, using sparse representations
137:     of fill patterns.

139:     Logically Collective

141:     Input Parameters:
142: +   da - the distributed array
143: .   dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144: -   ofill - the sparse fill pattern in the off-diagonal blocks

146:     Level: developer

148:     Notes:
149:     This only makes sense when you are doing multicomponent problems but using the
150:        `MATMPIAIJ` matrix format

152:            The format for `dfill` and `ofill` is a sparse representation of a
153:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
154:            for missing coupling.  The sparse representation is a 1 dimensional
155:            array of length nz + dof + 1, where nz is the number of non-zeros in
156:            the matrix.  The first dof entries in the array give the
157:            starting array indices of each row's items in the rest of the array,
158:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159:            and the remaining nz items give the column indices of each of
160:            the 1s within the logical 2D matrix.  Each row's items within
161:            the array are the column indices of the 1s within that row
162:            of the 2D matrix.  PETSc developers may recognize that this is the
163:            same format as that computed by the `DMDASetBlockFills_Private()`
164:            function from a dense 2D matrix representation.

166:      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167:      can be represented in the `dfill`, `ofill` format

169:    Contributed by Philip C. Roth

171: .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175:   DM_DA *dd = (DM_DA *)da->data;

177:   PetscFunctionBegin;
178:   /* save the given dfill and ofill information */
179:   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180:   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));

182:   /* count nonzeros in ofill columns */
183:   PetscCall(DMDASetBlockFills_Private2(dd));
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189:   PetscInt       dim, m, n, p, nc;
190:   DMBoundaryType bx, by, bz;
191:   MPI_Comm       comm;
192:   PetscMPIInt    size;
193:   PetscBool      isBAIJ;
194:   DM_DA         *dd = (DM_DA *)da->data;

196:   PetscFunctionBegin;
197:   /*
198:                                   m
199:           ------------------------------------------------------
200:          |                                                     |
201:          |                                                     |
202:          |               ----------------------                |
203:          |               |                    |                |
204:       n  |           yn  |                    |                |
205:          |               |                    |                |
206:          |               .---------------------                |
207:          |             (xs,ys)     xn                          |
208:          |            .                                        |
209:          |         (gxs,gys)                                   |
210:          |                                                     |
211:           -----------------------------------------------------
212:   */

214:   /*
215:          nc - number of components per grid point
216:          col - number of colors needed in one direction for single component problem

218:   */
219:   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));

221:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222:   PetscCallMPI(MPI_Comm_size(comm, &size));
223:   if (ctype == IS_COLORING_LOCAL) {
224:     if (size == 1) {
225:       ctype = IS_COLORING_GLOBAL;
226:     } else {
227:       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228:     }
229:   }

231:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232:      matrices is for the blocks, not the individual matrix elements  */
233:   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236:   if (isBAIJ) {
237:     dd->w  = 1;
238:     dd->xs = dd->xs / nc;
239:     dd->xe = dd->xe / nc;
240:     dd->Xs = dd->Xs / nc;
241:     dd->Xe = dd->Xe / nc;
242:   }

244:   /*
245:      We do not provide a getcoloring function in the DMDA operations because
246:    the basic DMDA does not know about matrices. We think of DMDA as being
247:    more low-level then matrices.
248:   */
249:   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250:   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251:   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253:   if (isBAIJ) {
254:     dd->w  = nc;
255:     dd->xs = dd->xs * nc;
256:     dd->xe = dd->xe * nc;
257:     dd->Xs = dd->Xs * nc;
258:     dd->Xe = dd->Xe * nc;
259:   }
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264: {
265:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266:   PetscInt         ncolors = 0;
267:   MPI_Comm         comm;
268:   DMBoundaryType   bx, by;
269:   DMDAStencilType  st;
270:   ISColoringValue *colors;
271:   DM_DA           *dd = (DM_DA *)da->data;

273:   PetscFunctionBegin;
274:   /*
275:          nc - number of components per grid point
276:          col - number of colors needed in one direction for single component problem

278:   */
279:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280:   col = 2 * s + 1;
281:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

285:   /* special case as taught to us by Paul Hovland */
286:   if (st == DMDA_STENCIL_STAR && s == 1) {
287:     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288:   } else {
289:     if (ctype == IS_COLORING_GLOBAL) {
290:       if (!dd->localcoloring) {
291:         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292:         ii = 0;
293:         for (j = ys; j < ys + ny; j++) {
294:           for (i = xs; i < xs + nx; i++) {
295:             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296:           }
297:         }
298:         ncolors = nc + nc * (col - 1 + col * (col - 1));
299:         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300:       }
301:       *coloring = dd->localcoloring;
302:     } else if (ctype == IS_COLORING_LOCAL) {
303:       if (!dd->ghostedcoloring) {
304:         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305:         ii = 0;
306:         for (j = gys; j < gys + gny; j++) {
307:           for (i = gxs; i < gxs + gnx; i++) {
308:             for (k = 0; k < nc; k++) {
309:               /* the complicated stuff is to handle periodic boundaries */
310:               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311:             }
312:           }
313:         }
314:         ncolors = nc + nc * (col - 1 + col * (col - 1));
315:         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

318:         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319:       }
320:       *coloring = dd->ghostedcoloring;
321:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322:   }
323:   PetscCall(ISColoringReference(*coloring));
324:   PetscFunctionReturn(PETSC_SUCCESS);
325: }

327: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328: {
329:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330:   PetscInt         ncolors;
331:   MPI_Comm         comm;
332:   DMBoundaryType   bx, by, bz;
333:   DMDAStencilType  st;
334:   ISColoringValue *colors;
335:   DM_DA           *dd = (DM_DA *)da->data;

337:   PetscFunctionBegin;
338:   /*
339:          nc - number of components per grid point
340:          col - number of colors needed in one direction for single component problem
341:   */
342:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343:   col = 2 * s + 1;
344:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
345:                  by 2*stencil_width + 1\n");
346:   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
347:                  by 2*stencil_width + 1\n");
348:   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
349:                  by 2*stencil_width + 1\n");

351:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

355:   /* create the coloring */
356:   if (ctype == IS_COLORING_GLOBAL) {
357:     if (!dd->localcoloring) {
358:       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359:       ii = 0;
360:       for (k = zs; k < zs + nz; k++) {
361:         for (j = ys; j < ys + ny; j++) {
362:           for (i = xs; i < xs + nx; i++) {
363:             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364:           }
365:         }
366:       }
367:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369:     }
370:     *coloring = dd->localcoloring;
371:   } else if (ctype == IS_COLORING_LOCAL) {
372:     if (!dd->ghostedcoloring) {
373:       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374:       ii = 0;
375:       for (k = gzs; k < gzs + gnz; k++) {
376:         for (j = gys; j < gys + gny; j++) {
377:           for (i = gxs; i < gxs + gnx; i++) {
378:             for (l = 0; l < nc; l++) {
379:               /* the complicated stuff is to handle periodic boundaries */
380:               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381:             }
382:           }
383:         }
384:       }
385:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388:     }
389:     *coloring = dd->ghostedcoloring;
390:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391:   PetscCall(ISColoringReference(*coloring));
392:   PetscFunctionReturn(PETSC_SUCCESS);
393: }

395: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
396: {
397:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
398:   PetscInt         ncolors;
399:   MPI_Comm         comm;
400:   DMBoundaryType   bx;
401:   ISColoringValue *colors;
402:   DM_DA           *dd = (DM_DA *)da->data;

404:   PetscFunctionBegin;
405:   /*
406:          nc - number of components per grid point
407:          col - number of colors needed in one direction for single component problem
408:   */
409:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
410:   col = 2 * s + 1;
411:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
412:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
413:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

415:   /* create the coloring */
416:   if (ctype == IS_COLORING_GLOBAL) {
417:     if (!dd->localcoloring) {
418:       PetscCall(PetscMalloc1(nc * nx, &colors));
419:       if (dd->ofillcols) {
420:         PetscInt tc = 0;
421:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
422:         i1 = 0;
423:         for (i = xs; i < xs + nx; i++) {
424:           for (l = 0; l < nc; l++) {
425:             if (dd->ofillcols[l] && (i % col)) {
426:               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
427:             } else {
428:               colors[i1++] = l;
429:             }
430:           }
431:         }
432:         ncolors = nc + 2 * s * tc;
433:       } else {
434:         i1 = 0;
435:         for (i = xs; i < xs + nx; i++) {
436:           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
437:         }
438:         ncolors = nc + nc * (col - 1);
439:       }
440:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
441:     }
442:     *coloring = dd->localcoloring;
443:   } else if (ctype == IS_COLORING_LOCAL) {
444:     if (!dd->ghostedcoloring) {
445:       PetscCall(PetscMalloc1(nc * gnx, &colors));
446:       i1 = 0;
447:       for (i = gxs; i < gxs + gnx; i++) {
448:         for (l = 0; l < nc; l++) {
449:           /* the complicated stuff is to handle periodic boundaries */
450:           colors[i1++] = l + nc * (SetInRange(i, m) % col);
451:         }
452:       }
453:       ncolors = nc + nc * (col - 1);
454:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
455:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
456:     }
457:     *coloring = dd->ghostedcoloring;
458:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
459:   PetscCall(ISColoringReference(*coloring));
460:   PetscFunctionReturn(PETSC_SUCCESS);
461: }

463: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
464: {
465:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
466:   PetscInt         ncolors;
467:   MPI_Comm         comm;
468:   DMBoundaryType   bx, by;
469:   ISColoringValue *colors;
470:   DM_DA           *dd = (DM_DA *)da->data;

472:   PetscFunctionBegin;
473:   /*
474:          nc - number of components per grid point
475:          col - number of colors needed in one direction for single component problem
476:   */
477:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
478:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
479:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
480:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
481:   /* create the coloring */
482:   if (ctype == IS_COLORING_GLOBAL) {
483:     if (!dd->localcoloring) {
484:       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
485:       ii = 0;
486:       for (j = ys; j < ys + ny; j++) {
487:         for (i = xs; i < xs + nx; i++) {
488:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
489:         }
490:       }
491:       ncolors = 5 * nc;
492:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
493:     }
494:     *coloring = dd->localcoloring;
495:   } else if (ctype == IS_COLORING_LOCAL) {
496:     if (!dd->ghostedcoloring) {
497:       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
498:       ii = 0;
499:       for (j = gys; j < gys + gny; j++) {
500:         for (i = gxs; i < gxs + gnx; i++) {
501:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
502:         }
503:       }
504:       ncolors = 5 * nc;
505:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
506:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
507:     }
508:     *coloring = dd->ghostedcoloring;
509:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
510:   PetscFunctionReturn(PETSC_SUCCESS);
511: }

513: /* =========================================================================== */
514: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

529: /*@C
530:    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix

532:    Logically Collective

534:    Input Parameters:
535: +  mat - the matrix
536: -  da - the da

538:    Level: intermediate

540: .seealso: `DMDA`, `Mat`, `MatSetUp()`
541: @*/
542: PetscErrorCode MatSetupDM(Mat mat, DM da)
543: {
544:   PetscFunctionBegin;
547:   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
548:   PetscFunctionReturn(PETSC_SUCCESS);
549: }

551: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
552: {
553:   DM                da;
554:   const char       *prefix;
555:   Mat               Anatural;
556:   AO                ao;
557:   PetscInt          rstart, rend, *petsc, i;
558:   IS                is;
559:   MPI_Comm          comm;
560:   PetscViewerFormat format;

562:   PetscFunctionBegin;
563:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
564:   PetscCall(PetscViewerGetFormat(viewer, &format));
565:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);

567:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
568:   PetscCall(MatGetDM(A, &da));
569:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

571:   PetscCall(DMDAGetAO(da, &ao));
572:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
573:   PetscCall(PetscMalloc1(rend - rstart, &petsc));
574:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
575:   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
576:   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));

578:   /* call viewer on natural ordering */
579:   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
580:   PetscCall(ISDestroy(&is));
581:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
582:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
583:   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
584:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
585:   PetscCall(MatView(Anatural, viewer));
586:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
587:   PetscCall(MatDestroy(&Anatural));
588:   PetscFunctionReturn(PETSC_SUCCESS);
589: }

591: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
592: {
593:   DM       da;
594:   Mat      Anatural, Aapp;
595:   AO       ao;
596:   PetscInt rstart, rend, *app, i, m, n, M, N;
597:   IS       is;
598:   MPI_Comm comm;

600:   PetscFunctionBegin;
601:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
602:   PetscCall(MatGetDM(A, &da));
603:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

605:   /* Load the matrix in natural ordering */
606:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
607:   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
608:   PetscCall(MatGetSize(A, &M, &N));
609:   PetscCall(MatGetLocalSize(A, &m, &n));
610:   PetscCall(MatSetSizes(Anatural, m, n, M, N));
611:   PetscCall(MatLoad(Anatural, viewer));

613:   /* Map natural ordering to application ordering and create IS */
614:   PetscCall(DMDAGetAO(da, &ao));
615:   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
616:   PetscCall(PetscMalloc1(rend - rstart, &app));
617:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
618:   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
619:   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));

621:   /* Do permutation and replace header */
622:   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
623:   PetscCall(MatHeaderReplace(A, &Aapp));
624:   PetscCall(ISDestroy(&is));
625:   PetscCall(MatDestroy(&Anatural));
626:   PetscFunctionReturn(PETSC_SUCCESS);
627: }

629: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
630: {
631:   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
632:   Mat         A;
633:   MPI_Comm    comm;
634:   MatType     Atype;
635:   MatType     mtype;
636:   PetscMPIInt size;
637:   DM_DA      *dd    = (DM_DA *)da->data;
638:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;

640:   PetscFunctionBegin;
641:   PetscCall(MatInitializePackage());
642:   mtype = da->mattype;

644:   /*
645:                                   m
646:           ------------------------------------------------------
647:          |                                                     |
648:          |                                                     |
649:          |               ----------------------                |
650:          |               |                    |                |
651:       n  |           ny  |                    |                |
652:          |               |                    |                |
653:          |               .---------------------                |
654:          |             (xs,ys)     nx                          |
655:          |            .                                        |
656:          |         (gxs,gys)                                   |
657:          |                                                     |
658:           -----------------------------------------------------
659:   */

661:   /*
662:          nc - number of components per grid point
663:          col - number of colors needed in one direction for single component problem
664:   */
665:   M   = dd->M;
666:   N   = dd->N;
667:   P   = dd->P;
668:   dim = da->dim;
669:   dof = dd->w;
670:   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
671:   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
672:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
673:   PetscCall(MatCreate(comm, &A));
674:   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
675:   PetscCall(MatSetType(A, mtype));
676:   PetscCall(MatSetFromOptions(A));
677:   if (dof * nx * ny * nz < da->bind_below) {
678:     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
679:     PetscCall(MatBindToCPU(A, PETSC_TRUE));
680:   }
681:   PetscCall(MatSetDM(A, da));
682:   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
683:   PetscCall(MatGetType(A, &Atype));
684:   /*
685:      We do not provide a getmatrix function in the DMDA operations because
686:    the basic DMDA does not know about matrices. We think of DMDA as being more
687:    more low-level than matrices. This is kind of cheating but, cause sometimes
688:    we think of DMDA has higher level than matrices.

690:      We could switch based on Atype (or mtype), but we do not since the
691:    specialized setting routines depend only on the particular preallocation
692:    details of the matrix, not the type itself.
693:   */
694:   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
695:   if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
696:   if (!aij) {
697:     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
698:     if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
699:     if (!baij) {
700:       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
701:       if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
702:       if (!sbaij) {
703:         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
704:         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
705:       }
706:       if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
707:     }
708:   }
709:   if (aij) {
710:     if (dim == 1) {
711:       if (dd->ofill) {
712:         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
713:       } else {
714:         DMBoundaryType bx;
715:         PetscMPIInt    size;
716:         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
717:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
718:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
719:           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
720:         } else {
721:           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
722:         }
723:       }
724:     } else if (dim == 2) {
725:       if (dd->ofill) {
726:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
727:       } else {
728:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
729:       }
730:     } else if (dim == 3) {
731:       if (dd->ofill) {
732:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
733:       } else {
734:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
735:       }
736:     }
737:   } else if (baij) {
738:     if (dim == 2) {
739:       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
740:     } else if (dim == 3) {
741:       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
742:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743:   } else if (sbaij) {
744:     if (dim == 2) {
745:       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
746:     } else if (dim == 3) {
747:       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
748:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749:   } else if (sell) {
750:     if (dim == 2) {
751:       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
752:     } else if (dim == 3) {
753:       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
754:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755:   } else if (is) {
756:     PetscCall(DMCreateMatrix_DA_IS(da, A));
757:   } else {
758:     ISLocalToGlobalMapping ltog;

760:     PetscCall(MatSetBlockSize(A, dof));
761:     PetscCall(MatSetUp(A));
762:     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
763:     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
764:   }
765:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
766:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
767:   PetscCall(MatSetDM(A, da));
768:   PetscCallMPI(MPI_Comm_size(comm, &size));
769:   if (size > 1) {
770:     /* change viewer to display matrix in natural ordering */
771:     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
772:     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
773:   }
774:   *J = A;
775:   PetscFunctionReturn(PETSC_SUCCESS);
776: }

778: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

780: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
781: {
782:   DM_DA                 *da = (DM_DA *)dm->data;
783:   Mat                    lJ, P;
784:   ISLocalToGlobalMapping ltog;
785:   IS                     is;
786:   PetscBT                bt;
787:   const PetscInt        *e_loc, *idx;
788:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

790:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
791:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
792:   PetscFunctionBegin;
793:   dof = da->w;
794:   PetscCall(MatSetBlockSize(J, dof));
795:   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));

797:   /* flag local elements indices in local DMDA numbering */
798:   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
799:   PetscCall(PetscBTCreate(nv / dof, &bt));
800:   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
801:   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));

803:   /* filter out (set to -1) the global indices not used by the local elements */
804:   PetscCall(PetscMalloc1(nv / dof, &gidx));
805:   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
806:   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
807:   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
808:   for (i = 0; i < nv / dof; i++)
809:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
810:   PetscCall(PetscBTDestroy(&bt));
811:   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
812:   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
813:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
814:   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
815:   PetscCall(ISDestroy(&is));

817:   /* Preallocation */
818:   if (dm->prealloc_skip) {
819:     PetscCall(MatSetUp(J));
820:   } else {
821:     PetscCall(MatISGetLocalMat(J, &lJ));
822:     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
823:     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
824:     PetscCall(MatSetType(P, MATPREALLOCATOR));
825:     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
826:     PetscCall(MatGetSize(lJ, &N, NULL));
827:     PetscCall(MatGetLocalSize(lJ, &n, NULL));
828:     PetscCall(MatSetSizes(P, n, n, N, N));
829:     PetscCall(MatSetBlockSize(P, dof));
830:     PetscCall(MatSetUp(P));
831:     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
832:     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
833:     PetscCall(MatISRestoreLocalMat(J, &lJ));
834:     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
835:     PetscCall(MatDestroy(&P));

837:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
838:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
839:   }
840:   PetscFunctionReturn(PETSC_SUCCESS);
841: }

843: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
844: {
845:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
846:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
847:   MPI_Comm               comm;
848:   PetscScalar           *values;
849:   DMBoundaryType         bx, by;
850:   ISLocalToGlobalMapping ltog;
851:   DMDAStencilType        st;

853:   PetscFunctionBegin;
854:   /*
855:          nc - number of components per grid point
856:          col - number of colors needed in one direction for single component problem
857:   */
858:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
859:   col = 2 * s + 1;
860:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
861:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
862:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

864:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
865:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

867:   PetscCall(MatSetBlockSize(J, nc));
868:   /* determine the matrix preallocation information */
869:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
870:   for (i = xs; i < xs + nx; i++) {
871:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
872:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

874:     for (j = ys; j < ys + ny; j++) {
875:       slot = i - gxs + gnx * (j - gys);

877:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
878:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

880:       cnt = 0;
881:       for (k = 0; k < nc; k++) {
882:         for (l = lstart; l < lend + 1; l++) {
883:           for (p = pstart; p < pend + 1; p++) {
884:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
885:               cols[cnt++] = k + nc * (slot + gnx * l + p);
886:             }
887:           }
888:         }
889:         rows[k] = k + nc * (slot);
890:       }
891:       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
892:     }
893:   }
894:   PetscCall(MatSetBlockSize(J, nc));
895:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
896:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
897:   MatPreallocateEnd(dnz, onz);

899:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

901:   /*
902:     For each node in the grid: we get the neighbors in the local (on processor ordering
903:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
904:     PETSc ordering.
905:   */
906:   if (!da->prealloc_only) {
907:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
908:     for (i = xs; i < xs + nx; i++) {
909:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
910:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

912:       for (j = ys; j < ys + ny; j++) {
913:         slot = i - gxs + gnx * (j - gys);

915:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
916:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

918:         cnt = 0;
919:         for (k = 0; k < nc; k++) {
920:           for (l = lstart; l < lend + 1; l++) {
921:             for (p = pstart; p < pend + 1; p++) {
922:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
923:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
924:               }
925:             }
926:           }
927:           rows[k] = k + nc * (slot);
928:         }
929:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
930:       }
931:     }
932:     PetscCall(PetscFree(values));
933:     /* do not copy values to GPU since they are all zero and not yet needed there */
934:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
935:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
936:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
937:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
938:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
939:   }
940:   PetscCall(PetscFree2(rows, cols));
941:   PetscFunctionReturn(PETSC_SUCCESS);
942: }

944: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
945: {
946:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
947:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
948:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
949:   MPI_Comm               comm;
950:   PetscScalar           *values;
951:   DMBoundaryType         bx, by, bz;
952:   ISLocalToGlobalMapping ltog;
953:   DMDAStencilType        st;

955:   PetscFunctionBegin;
956:   /*
957:          nc - number of components per grid point
958:          col - number of colors needed in one direction for single component problem
959:   */
960:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
961:   col = 2 * s + 1;
962:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
963:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
964:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

966:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
967:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

969:   PetscCall(MatSetBlockSize(J, nc));
970:   /* determine the matrix preallocation information */
971:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
972:   for (i = xs; i < xs + nx; i++) {
973:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
974:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
975:     for (j = ys; j < ys + ny; j++) {
976:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
977:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
978:       for (k = zs; k < zs + nz; k++) {
979:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
980:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

982:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

984:         cnt = 0;
985:         for (l = 0; l < nc; l++) {
986:           for (ii = istart; ii < iend + 1; ii++) {
987:             for (jj = jstart; jj < jend + 1; jj++) {
988:               for (kk = kstart; kk < kend + 1; kk++) {
989:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
990:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
991:                 }
992:               }
993:             }
994:           }
995:           rows[l] = l + nc * (slot);
996:         }
997:         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
998:       }
999:     }
1000:   }
1001:   PetscCall(MatSetBlockSize(J, nc));
1002:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1003:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1004:   MatPreallocateEnd(dnz, onz);
1005:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1007:   /*
1008:     For each node in the grid: we get the neighbors in the local (on processor ordering
1009:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1010:     PETSc ordering.
1011:   */
1012:   if (!da->prealloc_only) {
1013:     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1014:     for (i = xs; i < xs + nx; i++) {
1015:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1016:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1017:       for (j = ys; j < ys + ny; j++) {
1018:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1019:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1020:         for (k = zs; k < zs + nz; k++) {
1021:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1022:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1024:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1026:           cnt = 0;
1027:           for (l = 0; l < nc; l++) {
1028:             for (ii = istart; ii < iend + 1; ii++) {
1029:               for (jj = jstart; jj < jend + 1; jj++) {
1030:                 for (kk = kstart; kk < kend + 1; kk++) {
1031:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1032:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1033:                   }
1034:                 }
1035:               }
1036:             }
1037:             rows[l] = l + nc * (slot);
1038:           }
1039:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1040:         }
1041:       }
1042:     }
1043:     PetscCall(PetscFree(values));
1044:     /* do not copy values to GPU since they are all zero and not yet needed there */
1045:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1046:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1047:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1048:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1049:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1050:   }
1051:   PetscCall(PetscFree2(rows, cols));
1052:   PetscFunctionReturn(PETSC_SUCCESS);
1053: }

1055: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1056: {
1057:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1058:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1059:   MPI_Comm               comm;
1060:   DMBoundaryType         bx, by;
1061:   ISLocalToGlobalMapping ltog, mltog;
1062:   DMDAStencilType        st;
1063:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1065:   PetscFunctionBegin;
1066:   /*
1067:          nc - number of components per grid point
1068:          col - number of colors needed in one direction for single component problem
1069:   */
1070:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1071:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1072:   col = 2 * s + 1;
1073:   /*
1074:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1075:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1076:   */
1077:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1078:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1079:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1080:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1081:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1083:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1084:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1086:   PetscCall(MatSetBlockSize(J, nc));
1087:   /* determine the matrix preallocation information */
1088:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1089:   for (i = xs; i < xs + nx; i++) {
1090:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1091:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1093:     for (j = ys; j < ys + ny; j++) {
1094:       slot = i - gxs + gnx * (j - gys);

1096:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1097:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1099:       cnt = 0;
1100:       for (k = 0; k < nc; k++) {
1101:         for (l = lstart; l < lend + 1; l++) {
1102:           for (p = pstart; p < pend + 1; p++) {
1103:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1104:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1105:             }
1106:           }
1107:         }
1108:         rows[k] = k + nc * (slot);
1109:       }
1110:       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1111:       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1112:     }
1113:   }
1114:   PetscCall(MatSetBlockSize(J, nc));
1115:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1116:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1117:   MatPreallocateEnd(dnz, onz);
1118:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1119:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1121:   /*
1122:     For each node in the grid: we get the neighbors in the local (on processor ordering
1123:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1124:     PETSc ordering.
1125:   */
1126:   if (!da->prealloc_only) {
1127:     for (i = xs; i < xs + nx; i++) {
1128:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1129:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1131:       for (j = ys; j < ys + ny; j++) {
1132:         slot = i - gxs + gnx * (j - gys);

1134:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1135:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1137:         cnt = 0;
1138:         for (l = lstart; l < lend + 1; l++) {
1139:           for (p = pstart; p < pend + 1; p++) {
1140:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1141:               cols[cnt++] = nc * (slot + gnx * l + p);
1142:               for (k = 1; k < nc; k++) {
1143:                 cols[cnt] = 1 + cols[cnt - 1];
1144:                 cnt++;
1145:               }
1146:             }
1147:           }
1148:         }
1149:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1150:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1151:       }
1152:     }
1153:     /* do not copy values to GPU since they are all zero and not yet needed there */
1154:     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1155:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1156:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1157:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1158:     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1159:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1160:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1161:   }
1162:   PetscCall(PetscFree2(rows, cols));
1163:   PetscFunctionReturn(PETSC_SUCCESS);
1164: }

1166: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1167: {
1168:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1169:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1170:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1171:   DM_DA                 *dd = (DM_DA *)da->data;
1172:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1173:   MPI_Comm               comm;
1174:   DMBoundaryType         bx, by;
1175:   ISLocalToGlobalMapping ltog;
1176:   DMDAStencilType        st;
1177:   PetscBool              removedups = PETSC_FALSE;

1179:   PetscFunctionBegin;
1180:   /*
1181:          nc - number of components per grid point
1182:          col - number of colors needed in one direction for single component problem
1183:   */
1184:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1185:   col = 2 * s + 1;
1186:   /*
1187:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1188:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1189:   */
1190:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1191:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1192:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1193:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1194:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1196:   PetscCall(PetscMalloc1(col * col * nc, &cols));
1197:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1199:   PetscCall(MatSetBlockSize(J, nc));
1200:   /* determine the matrix preallocation information */
1201:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1202:   for (i = xs; i < xs + nx; i++) {
1203:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1204:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1206:     for (j = ys; j < ys + ny; j++) {
1207:       slot = i - gxs + gnx * (j - gys);

1209:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1210:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1212:       for (k = 0; k < nc; k++) {
1213:         cnt = 0;
1214:         for (l = lstart; l < lend + 1; l++) {
1215:           for (p = pstart; p < pend + 1; p++) {
1216:             if (l || p) {
1217:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1218:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1219:               }
1220:             } else {
1221:               if (dfill) {
1222:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1223:               } else {
1224:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1225:               }
1226:             }
1227:           }
1228:         }
1229:         row    = k + nc * (slot);
1230:         maxcnt = PetscMax(maxcnt, cnt);
1231:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1232:         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1233:       }
1234:     }
1235:   }
1236:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1237:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1238:   MatPreallocateEnd(dnz, onz);
1239:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1241:   /*
1242:     For each node in the grid: we get the neighbors in the local (on processor ordering
1243:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1244:     PETSc ordering.
1245:   */
1246:   if (!da->prealloc_only) {
1247:     for (i = xs; i < xs + nx; i++) {
1248:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1249:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1251:       for (j = ys; j < ys + ny; j++) {
1252:         slot = i - gxs + gnx * (j - gys);

1254:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1255:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1257:         for (k = 0; k < nc; k++) {
1258:           cnt = 0;
1259:           for (l = lstart; l < lend + 1; l++) {
1260:             for (p = pstart; p < pend + 1; p++) {
1261:               if (l || p) {
1262:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1263:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1264:                 }
1265:               } else {
1266:                 if (dfill) {
1267:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1268:                 } else {
1269:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1270:                 }
1271:               }
1272:             }
1273:           }
1274:           row = k + nc * (slot);
1275:           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1276:         }
1277:       }
1278:     }
1279:     /* do not copy values to GPU since they are all zero and not yet needed there */
1280:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1281:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1282:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1283:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1284:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1285:   }
1286:   PetscCall(PetscFree(cols));
1287:   PetscFunctionReturn(PETSC_SUCCESS);
1288: }

1290: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1291: {
1292:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1293:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1294:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1295:   MPI_Comm               comm;
1296:   DMBoundaryType         bx, by, bz;
1297:   ISLocalToGlobalMapping ltog, mltog;
1298:   DMDAStencilType        st;
1299:   PetscBool              removedups = PETSC_FALSE;

1301:   PetscFunctionBegin;
1302:   /*
1303:          nc - number of components per grid point
1304:          col - number of colors needed in one direction for single component problem
1305:   */
1306:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1307:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1308:   col = 2 * s + 1;

1310:   /*
1311:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1312:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1313:   */
1314:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1315:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1316:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1318:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1319:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1320:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1322:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1323:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1325:   PetscCall(MatSetBlockSize(J, nc));
1326:   /* determine the matrix preallocation information */
1327:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1328:   for (i = xs; i < xs + nx; i++) {
1329:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1330:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1331:     for (j = ys; j < ys + ny; j++) {
1332:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1333:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1334:       for (k = zs; k < zs + nz; k++) {
1335:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1336:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1338:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1340:         cnt = 0;
1341:         for (l = 0; l < nc; l++) {
1342:           for (ii = istart; ii < iend + 1; ii++) {
1343:             for (jj = jstart; jj < jend + 1; jj++) {
1344:               for (kk = kstart; kk < kend + 1; kk++) {
1345:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1346:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1347:                 }
1348:               }
1349:             }
1350:           }
1351:           rows[l] = l + nc * (slot);
1352:         }
1353:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1354:         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1355:       }
1356:     }
1357:   }
1358:   PetscCall(MatSetBlockSize(J, nc));
1359:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1360:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1361:   MatPreallocateEnd(dnz, onz);
1362:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1363:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1365:   /*
1366:     For each node in the grid: we get the neighbors in the local (on processor ordering
1367:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1368:     PETSc ordering.
1369:   */
1370:   if (!da->prealloc_only) {
1371:     for (i = xs; i < xs + nx; i++) {
1372:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1373:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1374:       for (j = ys; j < ys + ny; j++) {
1375:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1376:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1377:         for (k = zs; k < zs + nz; k++) {
1378:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1379:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1381:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1383:           cnt = 0;
1384:           for (kk = kstart; kk < kend + 1; kk++) {
1385:             for (jj = jstart; jj < jend + 1; jj++) {
1386:               for (ii = istart; ii < iend + 1; ii++) {
1387:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1388:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1389:                   for (l = 1; l < nc; l++) {
1390:                     cols[cnt] = 1 + cols[cnt - 1];
1391:                     cnt++;
1392:                   }
1393:                 }
1394:               }
1395:             }
1396:           }
1397:           rows[0] = nc * (slot);
1398:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1399:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1400:         }
1401:       }
1402:     }
1403:     /* do not copy values to GPU since they are all zero and not yet needed there */
1404:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1405:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1406:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1407:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1408:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1409:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1410:   }
1411:   PetscCall(PetscFree2(rows, cols));
1412:   PetscFunctionReturn(PETSC_SUCCESS);
1413: }

1415: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1416: {
1417:   DM_DA                 *dd = (DM_DA *)da->data;
1418:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1419:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1420:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1421:   DMBoundaryType         bx;
1422:   ISLocalToGlobalMapping ltog;
1423:   PetscMPIInt            rank, size;

1425:   PetscFunctionBegin;
1426:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1427:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));

1429:   /*
1430:          nc - number of components per grid point
1431:   */
1432:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1433:   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1434:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1435:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1437:   PetscCall(MatSetBlockSize(J, nc));
1438:   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));

1440:   /*
1441:         note should be smaller for first and last process with no periodic
1442:         does not handle dfill
1443:   */
1444:   cnt = 0;
1445:   /* coupling with process to the left */
1446:   for (i = 0; i < s; i++) {
1447:     for (j = 0; j < nc; j++) {
1448:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1449:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1450:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1451:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1452:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1453:       }
1454:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455:       cnt++;
1456:     }
1457:   }
1458:   for (i = s; i < nx - s; i++) {
1459:     for (j = 0; j < nc; j++) {
1460:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1461:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1462:       cnt++;
1463:     }
1464:   }
1465:   /* coupling with process to the right */
1466:   for (i = nx - s; i < nx; i++) {
1467:     for (j = 0; j < nc; j++) {
1468:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1469:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1470:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1471:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1472:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1473:       }
1474:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1475:       cnt++;
1476:     }
1477:   }

1479:   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1480:   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1481:   PetscCall(PetscFree2(cols, ocols));

1483:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1484:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1486:   /*
1487:     For each node in the grid: we get the neighbors in the local (on processor ordering
1488:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1489:     PETSc ordering.
1490:   */
1491:   if (!da->prealloc_only) {
1492:     PetscCall(PetscMalloc1(maxcnt, &cols));
1493:     row = xs * nc;
1494:     /* coupling with process to the left */
1495:     for (i = xs; i < xs + s; i++) {
1496:       for (j = 0; j < nc; j++) {
1497:         cnt = 0;
1498:         if (rank) {
1499:           for (l = 0; l < s; l++) {
1500:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1501:           }
1502:         }
1503:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1504:           for (l = 0; l < s; l++) {
1505:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1506:           }
1507:         }
1508:         if (dfill) {
1509:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1510:         } else {
1511:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1512:         }
1513:         for (l = 0; l < s; l++) {
1514:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1515:         }
1516:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1517:         row++;
1518:       }
1519:     }
1520:     for (i = xs + s; i < xs + nx - s; i++) {
1521:       for (j = 0; j < nc; j++) {
1522:         cnt = 0;
1523:         for (l = 0; l < s; l++) {
1524:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525:         }
1526:         if (dfill) {
1527:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528:         } else {
1529:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530:         }
1531:         for (l = 0; l < s; l++) {
1532:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1533:         }
1534:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1535:         row++;
1536:       }
1537:     }
1538:     /* coupling with process to the right */
1539:     for (i = xs + nx - s; i < xs + nx; i++) {
1540:       for (j = 0; j < nc; j++) {
1541:         cnt = 0;
1542:         for (l = 0; l < s; l++) {
1543:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1544:         }
1545:         if (dfill) {
1546:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1547:         } else {
1548:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1549:         }
1550:         if (rank < size - 1) {
1551:           for (l = 0; l < s; l++) {
1552:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1553:           }
1554:         }
1555:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1556:           for (l = 0; l < s; l++) {
1557:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1558:           }
1559:         }
1560:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1561:         row++;
1562:       }
1563:     }
1564:     PetscCall(PetscFree(cols));
1565:     /* do not copy values to GPU since they are all zero and not yet needed there */
1566:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1567:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1568:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1569:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1570:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1571:   }
1572:   PetscFunctionReturn(PETSC_SUCCESS);
1573: }

1575: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1576: {
1577:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1578:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1579:   PetscInt               istart, iend;
1580:   DMBoundaryType         bx;
1581:   ISLocalToGlobalMapping ltog, mltog;

1583:   PetscFunctionBegin;
1584:   /*
1585:          nc - number of components per grid point
1586:          col - number of colors needed in one direction for single component problem
1587:   */
1588:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1589:   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1590:   col = 2 * s + 1;

1592:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1593:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1595:   PetscCall(MatSetBlockSize(J, nc));
1596:   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1597:   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));

1599:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1600:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1601:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1603:   /*
1604:     For each node in the grid: we get the neighbors in the local (on processor ordering
1605:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1606:     PETSc ordering.
1607:   */
1608:   if (!da->prealloc_only) {
1609:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1610:     for (i = xs; i < xs + nx; i++) {
1611:       istart = PetscMax(-s, gxs - i);
1612:       iend   = PetscMin(s, gxs + gnx - i - 1);
1613:       slot   = i - gxs;

1615:       cnt = 0;
1616:       for (i1 = istart; i1 < iend + 1; i1++) {
1617:         cols[cnt++] = nc * (slot + i1);
1618:         for (l = 1; l < nc; l++) {
1619:           cols[cnt] = 1 + cols[cnt - 1];
1620:           cnt++;
1621:         }
1622:       }
1623:       rows[0] = nc * (slot);
1624:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1625:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1626:     }
1627:     /* do not copy values to GPU since they are all zero and not yet needed there */
1628:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1629:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1630:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1631:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1632:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1633:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1634:     PetscCall(PetscFree2(rows, cols));
1635:   }
1636:   PetscFunctionReturn(PETSC_SUCCESS);
1637: }

1639: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1640: {
1641:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1642:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1643:   PetscInt               istart, iend;
1644:   DMBoundaryType         bx;
1645:   ISLocalToGlobalMapping ltog, mltog;

1647:   PetscFunctionBegin;
1648:   /*
1649:          nc - number of components per grid point
1650:          col - number of colors needed in one direction for single component problem
1651:   */
1652:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1653:   col = 2 * s + 1;

1655:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1656:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1658:   PetscCall(MatSetBlockSize(J, nc));
1659:   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));

1661:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1662:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1663:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1665:   /*
1666:     For each node in the grid: we get the neighbors in the local (on processor ordering
1667:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1668:     PETSc ordering.
1669:   */
1670:   if (!da->prealloc_only) {
1671:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1672:     for (i = xs; i < xs + nx; i++) {
1673:       istart = PetscMax(-s, gxs - i);
1674:       iend   = PetscMin(s, gxs + gnx - i - 1);
1675:       slot   = i - gxs;

1677:       cnt = 0;
1678:       for (i1 = istart; i1 < iend + 1; i1++) {
1679:         cols[cnt++] = nc * (slot + i1);
1680:         for (l = 1; l < nc; l++) {
1681:           cols[cnt] = 1 + cols[cnt - 1];
1682:           cnt++;
1683:         }
1684:       }
1685:       rows[0] = nc * (slot);
1686:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1687:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1688:     }
1689:     /* do not copy values to GPU since they are all zero and not yet needed there */
1690:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1691:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1692:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1693:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1694:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1695:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1696:     PetscCall(PetscFree2(rows, cols));
1697:   }
1698:   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1699:   PetscFunctionReturn(PETSC_SUCCESS);
1700: }

1702: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1703: {
1704:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1705:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1706:   PetscInt               istart, iend, jstart, jend, ii, jj;
1707:   MPI_Comm               comm;
1708:   PetscScalar           *values;
1709:   DMBoundaryType         bx, by;
1710:   DMDAStencilType        st;
1711:   ISLocalToGlobalMapping ltog;

1713:   PetscFunctionBegin;
1714:   /*
1715:      nc - number of components per grid point
1716:      col - number of colors needed in one direction for single component problem
1717:   */
1718:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1719:   col = 2 * s + 1;

1721:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1722:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1723:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1725:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1727:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1729:   /* determine the matrix preallocation information */
1730:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1731:   for (i = xs; i < xs + nx; i++) {
1732:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1733:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1734:     for (j = ys; j < ys + ny; j++) {
1735:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1736:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1737:       slot   = i - gxs + gnx * (j - gys);

1739:       /* Find block columns in block row */
1740:       cnt = 0;
1741:       for (ii = istart; ii < iend + 1; ii++) {
1742:         for (jj = jstart; jj < jend + 1; jj++) {
1743:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1744:             cols[cnt++] = slot + ii + gnx * jj;
1745:           }
1746:         }
1747:       }
1748:       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1749:     }
1750:   }
1751:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1752:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1753:   MatPreallocateEnd(dnz, onz);

1755:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1757:   /*
1758:     For each node in the grid: we get the neighbors in the local (on processor ordering
1759:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1760:     PETSc ordering.
1761:   */
1762:   if (!da->prealloc_only) {
1763:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1764:     for (i = xs; i < xs + nx; i++) {
1765:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1766:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1767:       for (j = ys; j < ys + ny; j++) {
1768:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1769:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1770:         slot   = i - gxs + gnx * (j - gys);
1771:         cnt    = 0;
1772:         for (ii = istart; ii < iend + 1; ii++) {
1773:           for (jj = jstart; jj < jend + 1; jj++) {
1774:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1775:               cols[cnt++] = slot + ii + gnx * jj;
1776:             }
1777:           }
1778:         }
1779:         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1780:       }
1781:     }
1782:     PetscCall(PetscFree(values));
1783:     /* do not copy values to GPU since they are all zero and not yet needed there */
1784:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1785:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1786:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1787:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1788:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1789:   }
1790:   PetscCall(PetscFree(cols));
1791:   PetscFunctionReturn(PETSC_SUCCESS);
1792: }

1794: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1795: {
1796:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1797:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1798:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1799:   MPI_Comm               comm;
1800:   PetscScalar           *values;
1801:   DMBoundaryType         bx, by, bz;
1802:   DMDAStencilType        st;
1803:   ISLocalToGlobalMapping ltog;

1805:   PetscFunctionBegin;
1806:   /*
1807:          nc - number of components per grid point
1808:          col - number of colors needed in one direction for single component problem
1809:   */
1810:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1811:   col = 2 * s + 1;

1813:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1814:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1815:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1817:   PetscCall(PetscMalloc1(col * col * col, &cols));

1819:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1821:   /* determine the matrix preallocation information */
1822:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1823:   for (i = xs; i < xs + nx; i++) {
1824:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1825:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1826:     for (j = ys; j < ys + ny; j++) {
1827:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1828:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1829:       for (k = zs; k < zs + nz; k++) {
1830:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1831:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1833:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1835:         /* Find block columns in block row */
1836:         cnt = 0;
1837:         for (ii = istart; ii < iend + 1; ii++) {
1838:           for (jj = jstart; jj < jend + 1; jj++) {
1839:             for (kk = kstart; kk < kend + 1; kk++) {
1840:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1841:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1842:               }
1843:             }
1844:           }
1845:         }
1846:         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1847:       }
1848:     }
1849:   }
1850:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1851:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1852:   MatPreallocateEnd(dnz, onz);

1854:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1856:   /*
1857:     For each node in the grid: we get the neighbors in the local (on processor ordering
1858:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1859:     PETSc ordering.
1860:   */
1861:   if (!da->prealloc_only) {
1862:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1863:     for (i = xs; i < xs + nx; i++) {
1864:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1865:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1866:       for (j = ys; j < ys + ny; j++) {
1867:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1868:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1869:         for (k = zs; k < zs + nz; k++) {
1870:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1871:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1873:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1875:           cnt = 0;
1876:           for (ii = istart; ii < iend + 1; ii++) {
1877:             for (jj = jstart; jj < jend + 1; jj++) {
1878:               for (kk = kstart; kk < kend + 1; kk++) {
1879:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1880:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1881:                 }
1882:               }
1883:             }
1884:           }
1885:           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1886:         }
1887:       }
1888:     }
1889:     PetscCall(PetscFree(values));
1890:     /* do not copy values to GPU since they are all zero and not yet needed there */
1891:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1892:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1893:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1894:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1895:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1896:   }
1897:   PetscCall(PetscFree(cols));
1898:   PetscFunctionReturn(PETSC_SUCCESS);
1899: }

1901: /*
1902:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1903:   identify in the local ordering with periodic domain.
1904: */
1905: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1906: {
1907:   PetscInt i, n;

1909:   PetscFunctionBegin;
1910:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1911:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1912:   for (i = 0, n = 0; i < *cnt; i++) {
1913:     if (col[i] >= *row) col[n++] = col[i];
1914:   }
1915:   *cnt = n;
1916:   PetscFunctionReturn(PETSC_SUCCESS);
1917: }

1919: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1920: {
1921:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1922:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1923:   PetscInt               istart, iend, jstart, jend, ii, jj;
1924:   MPI_Comm               comm;
1925:   PetscScalar           *values;
1926:   DMBoundaryType         bx, by;
1927:   DMDAStencilType        st;
1928:   ISLocalToGlobalMapping ltog;

1930:   PetscFunctionBegin;
1931:   /*
1932:      nc - number of components per grid point
1933:      col - number of colors needed in one direction for single component problem
1934:   */
1935:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1936:   col = 2 * s + 1;

1938:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1939:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1940:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1942:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1944:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1946:   /* determine the matrix preallocation information */
1947:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1948:   for (i = xs; i < xs + nx; i++) {
1949:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1950:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1951:     for (j = ys; j < ys + ny; j++) {
1952:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1953:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1954:       slot   = i - gxs + gnx * (j - gys);

1956:       /* Find block columns in block row */
1957:       cnt = 0;
1958:       for (ii = istart; ii < iend + 1; ii++) {
1959:         for (jj = jstart; jj < jend + 1; jj++) {
1960:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1961:         }
1962:       }
1963:       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1964:       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1965:     }
1966:   }
1967:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1968:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1969:   MatPreallocateEnd(dnz, onz);

1971:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1973:   /*
1974:     For each node in the grid: we get the neighbors in the local (on processor ordering
1975:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1976:     PETSc ordering.
1977:   */
1978:   if (!da->prealloc_only) {
1979:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1980:     for (i = xs; i < xs + nx; i++) {
1981:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1982:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1983:       for (j = ys; j < ys + ny; j++) {
1984:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1985:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1986:         slot   = i - gxs + gnx * (j - gys);

1988:         /* Find block columns in block row */
1989:         cnt = 0;
1990:         for (ii = istart; ii < iend + 1; ii++) {
1991:           for (jj = jstart; jj < jend + 1; jj++) {
1992:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1993:           }
1994:         }
1995:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1996:         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1997:       }
1998:     }
1999:     PetscCall(PetscFree(values));
2000:     /* do not copy values to GPU since they are all zero and not yet needed there */
2001:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2002:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2003:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2004:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2005:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2006:   }
2007:   PetscCall(PetscFree(cols));
2008:   PetscFunctionReturn(PETSC_SUCCESS);
2009: }

2011: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2012: {
2013:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2014:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2015:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2016:   MPI_Comm               comm;
2017:   PetscScalar           *values;
2018:   DMBoundaryType         bx, by, bz;
2019:   DMDAStencilType        st;
2020:   ISLocalToGlobalMapping ltog;

2022:   PetscFunctionBegin;
2023:   /*
2024:      nc - number of components per grid point
2025:      col - number of colors needed in one direction for single component problem
2026:   */
2027:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2028:   col = 2 * s + 1;

2030:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2031:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2032:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2034:   /* create the matrix */
2035:   PetscCall(PetscMalloc1(col * col * col, &cols));

2037:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2039:   /* determine the matrix preallocation information */
2040:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2041:   for (i = xs; i < xs + nx; i++) {
2042:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2043:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2044:     for (j = ys; j < ys + ny; j++) {
2045:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2046:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2047:       for (k = zs; k < zs + nz; k++) {
2048:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2049:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2051:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2053:         /* Find block columns in block row */
2054:         cnt = 0;
2055:         for (ii = istart; ii < iend + 1; ii++) {
2056:           for (jj = jstart; jj < jend + 1; jj++) {
2057:             for (kk = kstart; kk < kend + 1; kk++) {
2058:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2059:             }
2060:           }
2061:         }
2062:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2063:         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2064:       }
2065:     }
2066:   }
2067:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2068:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2069:   MatPreallocateEnd(dnz, onz);

2071:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2073:   /*
2074:     For each node in the grid: we get the neighbors in the local (on processor ordering
2075:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2076:     PETSc ordering.
2077:   */
2078:   if (!da->prealloc_only) {
2079:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2080:     for (i = xs; i < xs + nx; i++) {
2081:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2082:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2083:       for (j = ys; j < ys + ny; j++) {
2084:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2085:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2086:         for (k = zs; k < zs + nz; k++) {
2087:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2088:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2090:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2092:           cnt = 0;
2093:           for (ii = istart; ii < iend + 1; ii++) {
2094:             for (jj = jstart; jj < jend + 1; jj++) {
2095:               for (kk = kstart; kk < kend + 1; kk++) {
2096:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2097:               }
2098:             }
2099:           }
2100:           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2101:           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2102:         }
2103:       }
2104:     }
2105:     PetscCall(PetscFree(values));
2106:     /* do not copy values to GPU since they are all zero and not yet needed there */
2107:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2108:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2109:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2110:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2111:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2112:   }
2113:   PetscCall(PetscFree(cols));
2114:   PetscFunctionReturn(PETSC_SUCCESS);
2115: }

2117: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2118: {
2119:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2120:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2121:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2122:   DM_DA                 *dd = (DM_DA *)da->data;
2123:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2124:   MPI_Comm               comm;
2125:   PetscScalar           *values;
2126:   DMBoundaryType         bx, by, bz;
2127:   ISLocalToGlobalMapping ltog;
2128:   DMDAStencilType        st;
2129:   PetscBool              removedups = PETSC_FALSE;

2131:   PetscFunctionBegin;
2132:   /*
2133:          nc - number of components per grid point
2134:          col - number of colors needed in one direction for single component problem
2135:   */
2136:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2137:   col = 2 * s + 1;

2139:   /*
2140:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142:   */
2143:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2147:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2148:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2149:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2151:   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2152:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2154:   /* determine the matrix preallocation information */
2155:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2157:   PetscCall(MatSetBlockSize(J, nc));
2158:   for (i = xs; i < xs + nx; i++) {
2159:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161:     for (j = ys; j < ys + ny; j++) {
2162:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164:       for (k = zs; k < zs + nz; k++) {
2165:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2168:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2170:         for (l = 0; l < nc; l++) {
2171:           cnt = 0;
2172:           for (ii = istart; ii < iend + 1; ii++) {
2173:             for (jj = jstart; jj < jend + 1; jj++) {
2174:               for (kk = kstart; kk < kend + 1; kk++) {
2175:                 if (ii || jj || kk) {
2176:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178:                   }
2179:                 } else {
2180:                   if (dfill) {
2181:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182:                   } else {
2183:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184:                   }
2185:                 }
2186:               }
2187:             }
2188:           }
2189:           row    = l + nc * (slot);
2190:           maxcnt = PetscMax(maxcnt, cnt);
2191:           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2192:           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2193:         }
2194:       }
2195:     }
2196:   }
2197:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2198:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2199:   MatPreallocateEnd(dnz, onz);
2200:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2202:   /*
2203:     For each node in the grid: we get the neighbors in the local (on processor ordering
2204:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205:     PETSc ordering.
2206:   */
2207:   if (!da->prealloc_only) {
2208:     PetscCall(PetscCalloc1(maxcnt, &values));
2209:     for (i = xs; i < xs + nx; i++) {
2210:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212:       for (j = ys; j < ys + ny; j++) {
2213:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215:         for (k = zs; k < zs + nz; k++) {
2216:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2219:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2221:           for (l = 0; l < nc; l++) {
2222:             cnt = 0;
2223:             for (ii = istart; ii < iend + 1; ii++) {
2224:               for (jj = jstart; jj < jend + 1; jj++) {
2225:                 for (kk = kstart; kk < kend + 1; kk++) {
2226:                   if (ii || jj || kk) {
2227:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229:                     }
2230:                   } else {
2231:                     if (dfill) {
2232:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233:                     } else {
2234:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235:                     }
2236:                   }
2237:                 }
2238:               }
2239:             }
2240:             row = l + nc * (slot);
2241:             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2242:           }
2243:         }
2244:       }
2245:     }
2246:     PetscCall(PetscFree(values));
2247:     /* do not copy values to GPU since they are all zero and not yet needed there */
2248:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2249:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2250:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2251:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2252:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2253:   }
2254:   PetscCall(PetscFree(cols));
2255:   PetscFunctionReturn(PETSC_SUCCESS);
2256: }