Actual source code: fdda.c

  1: #include <petsc/private/dmdaimpl.h>
  2: #include <petscmat.h>
  3: #include <petscbt.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 17: {
 18:   PetscInt i, j, nz, *fill;

 20:   PetscFunctionBegin;
 21:   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscCall(PetscMalloc1(nz + w + 1, &fill));
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   PetscFunctionBegin;
 55:   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);

 57:   /* Determine number of non-zeros */
 58:   nz = (dfillsparse[w] - w - 1);

 60:   /* Allocate space for our copy of the given sparse matrix representation. */
 61:   PetscCall(PetscMalloc1(nz + w + 1, rfill));
 62:   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 67: {
 68:   PetscInt i, k, cnt = 1;

 70:   PetscFunctionBegin;
 71:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 72:    columns to the left with any nonzeros in them plus 1 */
 73:   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
 74:   for (i = 0; i < dd->w; i++) {
 75:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 76:   }
 77:   for (i = 0; i < dd->w; i++) {
 78:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 79:   }
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: /*@
 84:   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 85:   of the matrix returned by `DMCreateMatrix()`.

 87:   Logically Collective

 89:   Input Parameters:
 90: + da    - the `DMDA`
 91: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
 92: - ofill - the fill pattern in the off-diagonal blocks

 94:   Level: developer

 96:   Notes:
 97:   This only makes sense when you are doing multicomponent problems but using the
 98:   `MATMPIAIJ` matrix format

100:   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101:   representing coupling and 0 entries for missing coupling. For example
102: .vb
103:             dfill[9] = {1, 0, 0,
104:                         1, 1, 0,
105:                         0, 1, 1}
106: .ve
107:   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108:   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109:   diagonal block).

111:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112:   can be represented in the `dfill`, `ofill` format

114:   Contributed by\:
115:   Glenn Hammond

117: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118: @*/
119: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120: {
121:   DM_DA *dd = (DM_DA *)da->data;

123:   PetscFunctionBegin;
124:   /* save the given dfill and ofill information */
125:   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126:   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));

128:   /* count nonzeros in ofill columns */
129:   PetscCall(DMDASetBlockFills_Private2(dd));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: /*@
134:   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135:   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136:   of fill patterns.

138:   Logically Collective

140:   Input Parameters:
141: + da          - the `DMDA`
142: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143: - ofillsparse - the sparse fill pattern in the off-diagonal blocks

145:   Level: developer

147:   Notes:
148:   This only makes sense when you are doing multicomponent problems but using the
149:   `MATMPIAIJ` matrix format

151:   The format for `dfill` and `ofill` is a sparse representation of a
152:   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153:   for missing coupling.  The sparse representation is a 1 dimensional
154:   array of length nz + dof + 1, where nz is the number of non-zeros in
155:   the matrix.  The first dof entries in the array give the
156:   starting array indices of each row's items in the rest of the array,
157:   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158:   and the remaining nz items give the column indices of each of
159:   the 1s within the logical 2D matrix.  Each row's items within
160:   the array are the column indices of the 1s within that row
161:   of the 2D matrix.  PETSc developers may recognize that this is the
162:   same format as that computed by the `DMDASetBlockFills_Private()`
163:   function from a dense 2D matrix representation.

165:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166:   can be represented in the `dfill`, `ofill` format

168:   Contributed by\:
169:   Philip C. Roth

171: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175:   DM_DA *dd = (DM_DA *)da->data;

177:   PetscFunctionBegin;
178:   /* save the given dfill and ofill information */
179:   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180:   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));

182:   /* count nonzeros in ofill columns */
183:   PetscCall(DMDASetBlockFills_Private2(dd));
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189:   PetscInt       dim, m, n, p, nc;
190:   DMBoundaryType bx, by, bz;
191:   MPI_Comm       comm;
192:   PetscMPIInt    size;
193:   PetscBool      isBAIJ;
194:   DM_DA         *dd = (DM_DA *)da->data;

196:   PetscFunctionBegin;
197:   /*
198:                                   m
199:           ------------------------------------------------------
200:          |                                                     |
201:          |                                                     |
202:          |               ----------------------                |
203:          |               |                    |                |
204:       n  |           yn  |                    |                |
205:          |               |                    |                |
206:          |               .---------------------                |
207:          |             (xs,ys)     xn                          |
208:          |            .                                        |
209:          |         (gxs,gys)                                   |
210:          |                                                     |
211:           -----------------------------------------------------
212:   */

214:   /*
215:          nc - number of components per grid point
216:          col - number of colors needed in one direction for single component problem

218:   */
219:   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));

221:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222:   PetscCallMPI(MPI_Comm_size(comm, &size));
223:   if (ctype == IS_COLORING_LOCAL) {
224:     if (size == 1) {
225:       ctype = IS_COLORING_GLOBAL;
226:     } else {
227:       PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228:     }
229:   }

231:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232:      matrices is for the blocks, not the individual matrix elements  */
233:   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236:   if (isBAIJ) {
237:     dd->w  = 1;
238:     dd->xs = dd->xs / nc;
239:     dd->xe = dd->xe / nc;
240:     dd->Xs = dd->Xs / nc;
241:     dd->Xe = dd->Xe / nc;
242:   }

244:   /*
245:      We do not provide a getcoloring function in the DMDA operations because
246:    the basic DMDA does not know about matrices. We think of DMDA as being
247:    more low-level then matrices.
248:   */
249:   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250:   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251:   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253:   if (isBAIJ) {
254:     dd->w  = nc;
255:     dd->xs = dd->xs * nc;
256:     dd->xe = dd->xe * nc;
257:     dd->Xs = dd->Xs * nc;
258:     dd->Xe = dd->Xe * nc;
259:   }
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264: {
265:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266:   PetscInt         ncolors = 0;
267:   MPI_Comm         comm;
268:   DMBoundaryType   bx, by;
269:   DMDAStencilType  st;
270:   ISColoringValue *colors;
271:   DM_DA           *dd = (DM_DA *)da->data;

273:   PetscFunctionBegin;
274:   /*
275:          nc - number of components per grid point
276:          col - number of colors needed in one direction for single component problem

278:   */
279:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280:   col = 2 * s + 1;
281:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

285:   /* special case as taught to us by Paul Hovland */
286:   if (st == DMDA_STENCIL_STAR && s == 1) {
287:     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288:   } else {
289:     if (ctype == IS_COLORING_GLOBAL) {
290:       if (!dd->localcoloring) {
291:         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292:         ii = 0;
293:         for (j = ys; j < ys + ny; j++) {
294:           for (i = xs; i < xs + nx; i++) {
295:             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296:           }
297:         }
298:         ncolors = nc + nc * (col - 1 + col * (col - 1));
299:         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300:       }
301:       *coloring = dd->localcoloring;
302:     } else if (ctype == IS_COLORING_LOCAL) {
303:       if (!dd->ghostedcoloring) {
304:         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305:         ii = 0;
306:         for (j = gys; j < gys + gny; j++) {
307:           for (i = gxs; i < gxs + gnx; i++) {
308:             for (k = 0; k < nc; k++) {
309:               /* the complicated stuff is to handle periodic boundaries */
310:               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311:             }
312:           }
313:         }
314:         ncolors = nc + nc * (col - 1 + col * (col - 1));
315:         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

318:         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319:       }
320:       *coloring = dd->ghostedcoloring;
321:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322:   }
323:   PetscCall(ISColoringReference(*coloring));
324:   PetscFunctionReturn(PETSC_SUCCESS);
325: }

327: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328: {
329:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330:   PetscInt         ncolors;
331:   MPI_Comm         comm;
332:   DMBoundaryType   bx, by, bz;
333:   DMDAStencilType  st;
334:   ISColoringValue *colors;
335:   DM_DA           *dd = (DM_DA *)da->data;

337:   PetscFunctionBegin;
338:   /*
339:          nc - number of components per grid point
340:          col - number of colors needed in one direction for single component problem
341:   */
342:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343:   col = 2 * s + 1;
344:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
345:   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
346:   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");

348:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
349:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
350:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

352:   /* create the coloring */
353:   if (ctype == IS_COLORING_GLOBAL) {
354:     if (!dd->localcoloring) {
355:       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
356:       ii = 0;
357:       for (k = zs; k < zs + nz; k++) {
358:         for (j = ys; j < ys + ny; j++) {
359:           for (i = xs; i < xs + nx; i++) {
360:             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
361:           }
362:         }
363:       }
364:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
365:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
366:     }
367:     *coloring = dd->localcoloring;
368:   } else if (ctype == IS_COLORING_LOCAL) {
369:     if (!dd->ghostedcoloring) {
370:       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
371:       ii = 0;
372:       for (k = gzs; k < gzs + gnz; k++) {
373:         for (j = gys; j < gys + gny; j++) {
374:           for (i = gxs; i < gxs + gnx; i++) {
375:             for (l = 0; l < nc; l++) {
376:               /* the complicated stuff is to handle periodic boundaries */
377:               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
378:             }
379:           }
380:         }
381:       }
382:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
383:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
384:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
385:     }
386:     *coloring = dd->ghostedcoloring;
387:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
388:   PetscCall(ISColoringReference(*coloring));
389:   PetscFunctionReturn(PETSC_SUCCESS);
390: }

392: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
393: {
394:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
395:   PetscInt         ncolors;
396:   MPI_Comm         comm;
397:   DMBoundaryType   bx;
398:   ISColoringValue *colors;
399:   DM_DA           *dd = (DM_DA *)da->data;

401:   PetscFunctionBegin;
402:   /*
403:          nc - number of components per grid point
404:          col - number of colors needed in one direction for single component problem
405:   */
406:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
407:   col = 2 * s + 1;
408:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
409:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
410:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

412:   /* create the coloring */
413:   if (ctype == IS_COLORING_GLOBAL) {
414:     if (!dd->localcoloring) {
415:       PetscCall(PetscMalloc1(nc * nx, &colors));
416:       if (dd->ofillcols) {
417:         PetscInt tc = 0;
418:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
419:         i1 = 0;
420:         for (i = xs; i < xs + nx; i++) {
421:           for (l = 0; l < nc; l++) {
422:             if (dd->ofillcols[l] && (i % col)) {
423:               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
424:             } else {
425:               colors[i1++] = l;
426:             }
427:           }
428:         }
429:         ncolors = nc + 2 * s * tc;
430:       } else {
431:         i1 = 0;
432:         for (i = xs; i < xs + nx; i++) {
433:           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
434:         }
435:         ncolors = nc + nc * (col - 1);
436:       }
437:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
438:     }
439:     *coloring = dd->localcoloring;
440:   } else if (ctype == IS_COLORING_LOCAL) {
441:     if (!dd->ghostedcoloring) {
442:       PetscCall(PetscMalloc1(nc * gnx, &colors));
443:       i1 = 0;
444:       for (i = gxs; i < gxs + gnx; i++) {
445:         for (l = 0; l < nc; l++) {
446:           /* the complicated stuff is to handle periodic boundaries */
447:           colors[i1++] = l + nc * (SetInRange(i, m) % col);
448:         }
449:       }
450:       ncolors = nc + nc * (col - 1);
451:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
452:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
453:     }
454:     *coloring = dd->ghostedcoloring;
455:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
456:   PetscCall(ISColoringReference(*coloring));
457:   PetscFunctionReturn(PETSC_SUCCESS);
458: }

460: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
461: {
462:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
463:   PetscInt         ncolors;
464:   MPI_Comm         comm;
465:   DMBoundaryType   bx, by;
466:   ISColoringValue *colors;
467:   DM_DA           *dd = (DM_DA *)da->data;

469:   PetscFunctionBegin;
470:   /*
471:          nc - number of components per grid point
472:          col - number of colors needed in one direction for single component problem
473:   */
474:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
475:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
476:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
477:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
478:   /* create the coloring */
479:   if (ctype == IS_COLORING_GLOBAL) {
480:     if (!dd->localcoloring) {
481:       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
482:       ii = 0;
483:       for (j = ys; j < ys + ny; j++) {
484:         for (i = xs; i < xs + nx; i++) {
485:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
486:         }
487:       }
488:       ncolors = 5 * nc;
489:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
490:     }
491:     *coloring = dd->localcoloring;
492:   } else if (ctype == IS_COLORING_LOCAL) {
493:     if (!dd->ghostedcoloring) {
494:       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
495:       ii = 0;
496:       for (j = gys; j < gys + gny; j++) {
497:         for (i = gxs; i < gxs + gnx; i++) {
498:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
499:         }
500:       }
501:       ncolors = 5 * nc;
502:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
503:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
504:     }
505:     *coloring = dd->ghostedcoloring;
506:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
507:   PetscFunctionReturn(PETSC_SUCCESS);
508: }

510: /* =========================================================================== */
511: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

526: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
527: {
528:   DM                da;
529:   const char       *prefix;
530:   Mat               AA, Anatural;
531:   AO                ao;
532:   PetscInt          rstart, rend, *petsc, i;
533:   IS                is;
534:   MPI_Comm          comm;
535:   PetscViewerFormat format;
536:   PetscBool         flag;

538:   PetscFunctionBegin;
539:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
540:   PetscCall(PetscViewerGetFormat(viewer, &format));
541:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);

543:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
544:   PetscCall(MatGetDM(A, &da));
545:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

547:   PetscCall(DMDAGetAO(da, &ao));
548:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
549:   PetscCall(PetscMalloc1(rend - rstart, &petsc));
550:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
551:   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
552:   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));

554:   /* call viewer on natural ordering */
555:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
556:   if (flag) {
557:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
558:     A = AA;
559:   }
560:   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
561:   PetscCall(ISDestroy(&is));
562:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
563:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
564:   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
565:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
566:   PetscCall(MatView(Anatural, viewer));
567:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
568:   PetscCall(MatDestroy(&Anatural));
569:   if (flag) PetscCall(MatDestroy(&AA));
570:   PetscFunctionReturn(PETSC_SUCCESS);
571: }

573: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
574: {
575:   DM       da;
576:   Mat      Anatural, Aapp;
577:   AO       ao;
578:   PetscInt rstart, rend, *app, i, m, n, M, N;
579:   IS       is;
580:   MPI_Comm comm;

582:   PetscFunctionBegin;
583:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
584:   PetscCall(MatGetDM(A, &da));
585:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

587:   /* Load the matrix in natural ordering */
588:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
589:   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
590:   PetscCall(MatGetSize(A, &M, &N));
591:   PetscCall(MatGetLocalSize(A, &m, &n));
592:   PetscCall(MatSetSizes(Anatural, m, n, M, N));
593:   PetscCall(MatLoad(Anatural, viewer));

595:   /* Map natural ordering to application ordering and create IS */
596:   PetscCall(DMDAGetAO(da, &ao));
597:   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
598:   PetscCall(PetscMalloc1(rend - rstart, &app));
599:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
600:   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
601:   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));

603:   /* Do permutation and replace header */
604:   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
605:   PetscCall(MatHeaderReplace(A, &Aapp));
606:   PetscCall(ISDestroy(&is));
607:   PetscCall(MatDestroy(&Anatural));
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }

611: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
612: {
613:   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
614:   Mat         A;
615:   MPI_Comm    comm;
616:   MatType     Atype;
617:   MatType     mtype;
618:   PetscMPIInt size;
619:   DM_DA      *dd    = (DM_DA *)da->data;
620:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;

622:   PetscFunctionBegin;
623:   PetscCall(MatInitializePackage());
624:   mtype = da->mattype;

626:   /*
627:                                   m
628:           ------------------------------------------------------
629:          |                                                     |
630:          |                                                     |
631:          |               ----------------------                |
632:          |               |                    |                |
633:       n  |           ny  |                    |                |
634:          |               |                    |                |
635:          |               .---------------------                |
636:          |             (xs,ys)     nx                          |
637:          |            .                                        |
638:          |         (gxs,gys)                                   |
639:          |                                                     |
640:           -----------------------------------------------------
641:   */

643:   /*
644:          nc - number of components per grid point
645:          col - number of colors needed in one direction for single component problem
646:   */
647:   M   = dd->M;
648:   N   = dd->N;
649:   P   = dd->P;
650:   dim = da->dim;
651:   dof = dd->w;
652:   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
653:   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
654:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
655:   PetscCall(MatCreate(comm, &A));
656:   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
657:   PetscCall(MatSetType(A, mtype));
658:   PetscCall(MatSetFromOptions(A));
659:   if (dof * nx * ny * nz < da->bind_below) {
660:     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
661:     PetscCall(MatBindToCPU(A, PETSC_TRUE));
662:   }
663:   PetscCall(MatSetDM(A, da));
664:   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
665:   PetscCall(MatGetType(A, &Atype));
666:   /*
667:      We do not provide a getmatrix function in the DMDA operations because
668:    the basic DMDA does not know about matrices. We think of DMDA as being more
669:    more low-level than matrices. This is kind of cheating but, cause sometimes
670:    we think of DMDA has higher level than matrices.

672:      We could switch based on Atype (or mtype), but we do not since the
673:    specialized setting routines depend only on the particular preallocation
674:    details of the matrix, not the type itself.
675:   */
676:   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
677:     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
678:     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
679:     if (!aij) {
680:       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
681:       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
682:       if (!baij) {
683:         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
684:         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
685:         if (!sbaij) {
686:           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
687:           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
688:         }
689:         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
690:       }
691:     }
692:   }
693:   if (aij) {
694:     if (dim == 1) {
695:       if (dd->ofill) {
696:         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
697:       } else {
698:         DMBoundaryType bx;
699:         PetscMPIInt    size;
700:         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
701:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
702:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
703:           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
704:         } else {
705:           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
706:         }
707:       }
708:     } else if (dim == 2) {
709:       if (dd->ofill) {
710:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
711:       } else {
712:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
713:       }
714:     } else if (dim == 3) {
715:       if (dd->ofill) {
716:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
717:       } else {
718:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
719:       }
720:     }
721:   } else if (baij) {
722:     if (dim == 2) {
723:       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
724:     } else if (dim == 3) {
725:       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
726:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
727:   } else if (sbaij) {
728:     if (dim == 2) {
729:       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
730:     } else if (dim == 3) {
731:       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
732:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
733:   } else if (sell) {
734:     if (dim == 2) {
735:       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
736:     } else if (dim == 3) {
737:       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
738:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
739:   } else if (is) {
740:     PetscCall(DMCreateMatrix_DA_IS(da, A));
741:   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
742:     ISLocalToGlobalMapping ltog;

744:     PetscCall(MatSetBlockSize(A, dof));
745:     PetscCall(MatSetUp(A));
746:     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
747:     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
748:   }
749:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
750:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
751:   PetscCall(MatSetDM(A, da));
752:   PetscCallMPI(MPI_Comm_size(comm, &size));
753:   if (size > 1) {
754:     /* change viewer to display matrix in natural ordering */
755:     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
756:     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
757:   }
758:   *J = A;
759:   PetscFunctionReturn(PETSC_SUCCESS);
760: }

762: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

764: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
765: {
766:   DM_DA                 *da = (DM_DA *)dm->data;
767:   Mat                    lJ, P;
768:   ISLocalToGlobalMapping ltog;
769:   IS                     is;
770:   PetscBT                bt;
771:   const PetscInt        *e_loc, *idx;
772:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

774:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
775:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
776:   PetscFunctionBegin;
777:   dof = da->w;
778:   PetscCall(MatSetBlockSize(J, dof));
779:   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));

781:   /* flag local elements indices in local DMDA numbering */
782:   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
783:   PetscCall(PetscBTCreate(nv / dof, &bt));
784:   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
785:   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));

787:   /* filter out (set to -1) the global indices not used by the local elements */
788:   PetscCall(PetscMalloc1(nv / dof, &gidx));
789:   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
790:   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
791:   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
792:   for (i = 0; i < nv / dof; i++)
793:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
794:   PetscCall(PetscBTDestroy(&bt));
795:   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
796:   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
797:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
798:   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
799:   PetscCall(ISDestroy(&is));

801:   /* Preallocation */
802:   if (dm->prealloc_skip) {
803:     PetscCall(MatSetUp(J));
804:   } else {
805:     PetscCall(MatISGetLocalMat(J, &lJ));
806:     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
807:     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
808:     PetscCall(MatSetType(P, MATPREALLOCATOR));
809:     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
810:     PetscCall(MatGetSize(lJ, &N, NULL));
811:     PetscCall(MatGetLocalSize(lJ, &n, NULL));
812:     PetscCall(MatSetSizes(P, n, n, N, N));
813:     PetscCall(MatSetBlockSize(P, dof));
814:     PetscCall(MatSetUp(P));
815:     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
816:     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
817:     PetscCall(MatISRestoreLocalMat(J, &lJ));
818:     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
819:     PetscCall(MatDestroy(&P));

821:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
822:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
823:   }
824:   PetscFunctionReturn(PETSC_SUCCESS);
825: }

827: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
828: {
829:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
830:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
831:   MPI_Comm               comm;
832:   PetscScalar           *values;
833:   DMBoundaryType         bx, by;
834:   ISLocalToGlobalMapping ltog;
835:   DMDAStencilType        st;

837:   PetscFunctionBegin;
838:   /*
839:          nc - number of components per grid point
840:          col - number of colors needed in one direction for single component problem
841:   */
842:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
843:   col = 2 * s + 1;
844:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
845:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
846:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

848:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
849:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

851:   PetscCall(MatSetBlockSize(J, nc));
852:   /* determine the matrix preallocation information */
853:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
854:   for (i = xs; i < xs + nx; i++) {
855:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
856:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

858:     for (j = ys; j < ys + ny; j++) {
859:       slot = i - gxs + gnx * (j - gys);

861:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
862:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

864:       cnt = 0;
865:       for (k = 0; k < nc; k++) {
866:         for (l = lstart; l < lend + 1; l++) {
867:           for (p = pstart; p < pend + 1; p++) {
868:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
869:               cols[cnt++] = k + nc * (slot + gnx * l + p);
870:             }
871:           }
872:         }
873:         rows[k] = k + nc * (slot);
874:       }
875:       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
876:     }
877:   }
878:   PetscCall(MatSetBlockSize(J, nc));
879:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
880:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
881:   MatPreallocateEnd(dnz, onz);

883:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

885:   /*
886:     For each node in the grid: we get the neighbors in the local (on processor ordering
887:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
888:     PETSc ordering.
889:   */
890:   if (!da->prealloc_only) {
891:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
892:     for (i = xs; i < xs + nx; i++) {
893:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
894:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

896:       for (j = ys; j < ys + ny; j++) {
897:         slot = i - gxs + gnx * (j - gys);

899:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
900:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

902:         cnt = 0;
903:         for (k = 0; k < nc; k++) {
904:           for (l = lstart; l < lend + 1; l++) {
905:             for (p = pstart; p < pend + 1; p++) {
906:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
907:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
908:               }
909:             }
910:           }
911:           rows[k] = k + nc * (slot);
912:         }
913:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
914:       }
915:     }
916:     PetscCall(PetscFree(values));
917:     /* do not copy values to GPU since they are all zero and not yet needed there */
918:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
919:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
920:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
921:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
922:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
923:   }
924:   PetscCall(PetscFree2(rows, cols));
925:   PetscFunctionReturn(PETSC_SUCCESS);
926: }

928: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
929: {
930:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
931:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
932:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
933:   MPI_Comm               comm;
934:   PetscScalar           *values;
935:   DMBoundaryType         bx, by, bz;
936:   ISLocalToGlobalMapping ltog;
937:   DMDAStencilType        st;

939:   PetscFunctionBegin;
940:   /*
941:          nc - number of components per grid point
942:          col - number of colors needed in one direction for single component problem
943:   */
944:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
945:   col = 2 * s + 1;
946:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
947:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
948:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

950:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
951:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

953:   PetscCall(MatSetBlockSize(J, nc));
954:   /* determine the matrix preallocation information */
955:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
956:   for (i = xs; i < xs + nx; i++) {
957:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
958:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
959:     for (j = ys; j < ys + ny; j++) {
960:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
961:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
962:       for (k = zs; k < zs + nz; k++) {
963:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
964:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

966:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

968:         cnt = 0;
969:         for (l = 0; l < nc; l++) {
970:           for (ii = istart; ii < iend + 1; ii++) {
971:             for (jj = jstart; jj < jend + 1; jj++) {
972:               for (kk = kstart; kk < kend + 1; kk++) {
973:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
974:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
975:                 }
976:               }
977:             }
978:           }
979:           rows[l] = l + nc * (slot);
980:         }
981:         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
982:       }
983:     }
984:   }
985:   PetscCall(MatSetBlockSize(J, nc));
986:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
987:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
988:   MatPreallocateEnd(dnz, onz);
989:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

991:   /*
992:     For each node in the grid: we get the neighbors in the local (on processor ordering
993:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
994:     PETSc ordering.
995:   */
996:   if (!da->prealloc_only) {
997:     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
998:     for (i = xs; i < xs + nx; i++) {
999:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1000:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1001:       for (j = ys; j < ys + ny; j++) {
1002:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1003:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1004:         for (k = zs; k < zs + nz; k++) {
1005:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1006:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1008:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1010:           cnt = 0;
1011:           for (l = 0; l < nc; l++) {
1012:             for (ii = istart; ii < iend + 1; ii++) {
1013:               for (jj = jstart; jj < jend + 1; jj++) {
1014:                 for (kk = kstart; kk < kend + 1; kk++) {
1015:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1016:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1017:                   }
1018:                 }
1019:               }
1020:             }
1021:             rows[l] = l + nc * (slot);
1022:           }
1023:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1024:         }
1025:       }
1026:     }
1027:     PetscCall(PetscFree(values));
1028:     /* do not copy values to GPU since they are all zero and not yet needed there */
1029:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1030:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1031:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1032:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1033:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1034:   }
1035:   PetscCall(PetscFree2(rows, cols));
1036:   PetscFunctionReturn(PETSC_SUCCESS);
1037: }

1039: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1040: {
1041:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1042:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1043:   MPI_Comm               comm;
1044:   DMBoundaryType         bx, by;
1045:   ISLocalToGlobalMapping ltog, mltog;
1046:   DMDAStencilType        st;
1047:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1049:   PetscFunctionBegin;
1050:   /*
1051:          nc - number of components per grid point
1052:          col - number of colors needed in one direction for single component problem
1053:   */
1054:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1055:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1056:   col = 2 * s + 1;
1057:   /*
1058:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1059:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1060:   */
1061:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1062:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1063:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1064:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1065:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1067:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1068:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1070:   PetscCall(MatSetBlockSize(J, nc));
1071:   /* determine the matrix preallocation information */
1072:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1073:   for (i = xs; i < xs + nx; i++) {
1074:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1075:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1077:     for (j = ys; j < ys + ny; j++) {
1078:       slot = i - gxs + gnx * (j - gys);

1080:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1081:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1083:       cnt = 0;
1084:       for (k = 0; k < nc; k++) {
1085:         for (l = lstart; l < lend + 1; l++) {
1086:           for (p = pstart; p < pend + 1; p++) {
1087:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1088:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1089:             }
1090:           }
1091:         }
1092:         rows[k] = k + nc * (slot);
1093:       }
1094:       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1095:       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1096:     }
1097:   }
1098:   PetscCall(MatSetBlockSize(J, nc));
1099:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1100:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1101:   MatPreallocateEnd(dnz, onz);
1102:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1103:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1105:   /*
1106:     For each node in the grid: we get the neighbors in the local (on processor ordering
1107:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1108:     PETSc ordering.
1109:   */
1110:   if (!da->prealloc_only) {
1111:     for (i = xs; i < xs + nx; i++) {
1112:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1113:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1115:       for (j = ys; j < ys + ny; j++) {
1116:         slot = i - gxs + gnx * (j - gys);

1118:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1119:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1121:         cnt = 0;
1122:         for (l = lstart; l < lend + 1; l++) {
1123:           for (p = pstart; p < pend + 1; p++) {
1124:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1125:               cols[cnt++] = nc * (slot + gnx * l + p);
1126:               for (k = 1; k < nc; k++) {
1127:                 cols[cnt] = 1 + cols[cnt - 1];
1128:                 cnt++;
1129:               }
1130:             }
1131:           }
1132:         }
1133:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1134:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1135:       }
1136:     }
1137:     /* do not copy values to GPU since they are all zero and not yet needed there */
1138:     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1139:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1140:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1141:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1142:     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1143:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1144:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1145:   }
1146:   PetscCall(PetscFree2(rows, cols));
1147:   PetscFunctionReturn(PETSC_SUCCESS);
1148: }

1150: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1151: {
1152:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1153:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1154:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1155:   DM_DA                 *dd = (DM_DA *)da->data;
1156:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1157:   MPI_Comm               comm;
1158:   DMBoundaryType         bx, by;
1159:   ISLocalToGlobalMapping ltog;
1160:   DMDAStencilType        st;
1161:   PetscBool              removedups = PETSC_FALSE;

1163:   PetscFunctionBegin;
1164:   /*
1165:          nc - number of components per grid point
1166:          col - number of colors needed in one direction for single component problem
1167:   */
1168:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1169:   col = 2 * s + 1;
1170:   /*
1171:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1172:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1173:   */
1174:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1175:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1176:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1177:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1178:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1180:   PetscCall(PetscMalloc1(col * col * nc, &cols));
1181:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1183:   PetscCall(MatSetBlockSize(J, nc));
1184:   /* determine the matrix preallocation information */
1185:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1186:   for (i = xs; i < xs + nx; i++) {
1187:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1188:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1190:     for (j = ys; j < ys + ny; j++) {
1191:       slot = i - gxs + gnx * (j - gys);

1193:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1194:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1196:       for (k = 0; k < nc; k++) {
1197:         cnt = 0;
1198:         for (l = lstart; l < lend + 1; l++) {
1199:           for (p = pstart; p < pend + 1; p++) {
1200:             if (l || p) {
1201:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1202:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1203:               }
1204:             } else {
1205:               if (dfill) {
1206:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1207:               } else {
1208:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1209:               }
1210:             }
1211:           }
1212:         }
1213:         row    = k + nc * (slot);
1214:         maxcnt = PetscMax(maxcnt, cnt);
1215:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1216:         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1217:       }
1218:     }
1219:   }
1220:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1221:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1222:   MatPreallocateEnd(dnz, onz);
1223:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1225:   /*
1226:     For each node in the grid: we get the neighbors in the local (on processor ordering
1227:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1228:     PETSc ordering.
1229:   */
1230:   if (!da->prealloc_only) {
1231:     for (i = xs; i < xs + nx; i++) {
1232:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1233:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1235:       for (j = ys; j < ys + ny; j++) {
1236:         slot = i - gxs + gnx * (j - gys);

1238:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1239:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1241:         for (k = 0; k < nc; k++) {
1242:           cnt = 0;
1243:           for (l = lstart; l < lend + 1; l++) {
1244:             for (p = pstart; p < pend + 1; p++) {
1245:               if (l || p) {
1246:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1247:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1248:                 }
1249:               } else {
1250:                 if (dfill) {
1251:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1252:                 } else {
1253:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1254:                 }
1255:               }
1256:             }
1257:           }
1258:           row = k + nc * (slot);
1259:           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1260:         }
1261:       }
1262:     }
1263:     /* do not copy values to GPU since they are all zero and not yet needed there */
1264:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1265:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1266:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1267:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1268:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1269:   }
1270:   PetscCall(PetscFree(cols));
1271:   PetscFunctionReturn(PETSC_SUCCESS);
1272: }

1274: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1275: {
1276:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1277:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1278:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1279:   MPI_Comm               comm;
1280:   DMBoundaryType         bx, by, bz;
1281:   ISLocalToGlobalMapping ltog, mltog;
1282:   DMDAStencilType        st;
1283:   PetscBool              removedups = PETSC_FALSE;

1285:   PetscFunctionBegin;
1286:   /*
1287:          nc - number of components per grid point
1288:          col - number of colors needed in one direction for single component problem
1289:   */
1290:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1291:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1292:   col = 2 * s + 1;

1294:   /*
1295:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1296:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1297:   */
1298:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1299:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1300:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1302:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1303:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1304:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1306:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1307:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1309:   PetscCall(MatSetBlockSize(J, nc));
1310:   /* determine the matrix preallocation information */
1311:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1312:   for (i = xs; i < xs + nx; i++) {
1313:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1314:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1315:     for (j = ys; j < ys + ny; j++) {
1316:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1317:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1318:       for (k = zs; k < zs + nz; k++) {
1319:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1320:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1322:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1324:         cnt = 0;
1325:         for (l = 0; l < nc; l++) {
1326:           for (ii = istart; ii < iend + 1; ii++) {
1327:             for (jj = jstart; jj < jend + 1; jj++) {
1328:               for (kk = kstart; kk < kend + 1; kk++) {
1329:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1330:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1331:                 }
1332:               }
1333:             }
1334:           }
1335:           rows[l] = l + nc * (slot);
1336:         }
1337:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1338:         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1339:       }
1340:     }
1341:   }
1342:   PetscCall(MatSetBlockSize(J, nc));
1343:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1344:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1345:   MatPreallocateEnd(dnz, onz);
1346:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1347:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1349:   /*
1350:     For each node in the grid: we get the neighbors in the local (on processor ordering
1351:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1352:     PETSc ordering.
1353:   */
1354:   if (!da->prealloc_only) {
1355:     for (i = xs; i < xs + nx; i++) {
1356:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1357:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1358:       for (j = ys; j < ys + ny; j++) {
1359:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1360:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1361:         for (k = zs; k < zs + nz; k++) {
1362:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1363:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1365:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1367:           cnt = 0;
1368:           for (kk = kstart; kk < kend + 1; kk++) {
1369:             for (jj = jstart; jj < jend + 1; jj++) {
1370:               for (ii = istart; ii < iend + 1; ii++) {
1371:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1372:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1373:                   for (l = 1; l < nc; l++) {
1374:                     cols[cnt] = 1 + cols[cnt - 1];
1375:                     cnt++;
1376:                   }
1377:                 }
1378:               }
1379:             }
1380:           }
1381:           rows[0] = nc * (slot);
1382:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1383:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1384:         }
1385:       }
1386:     }
1387:     /* do not copy values to GPU since they are all zero and not yet needed there */
1388:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1389:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1390:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1391:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1392:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1393:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1394:   }
1395:   PetscCall(PetscFree2(rows, cols));
1396:   PetscFunctionReturn(PETSC_SUCCESS);
1397: }

1399: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1400: {
1401:   DM_DA                 *dd = (DM_DA *)da->data;
1402:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1403:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1404:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1405:   DMBoundaryType         bx;
1406:   ISLocalToGlobalMapping ltog;
1407:   PetscMPIInt            rank, size;

1409:   PetscFunctionBegin;
1410:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1411:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));

1413:   /*
1414:          nc - number of components per grid point
1415:   */
1416:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1417:   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1418:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1419:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1421:   PetscCall(MatSetBlockSize(J, nc));
1422:   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));

1424:   /*
1425:         note should be smaller for first and last process with no periodic
1426:         does not handle dfill
1427:   */
1428:   cnt = 0;
1429:   /* coupling with process to the left */
1430:   for (i = 0; i < s; i++) {
1431:     for (j = 0; j < nc; j++) {
1432:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1433:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1434:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1435:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1436:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1437:       }
1438:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1439:       cnt++;
1440:     }
1441:   }
1442:   for (i = s; i < nx - s; i++) {
1443:     for (j = 0; j < nc; j++) {
1444:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1445:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1446:       cnt++;
1447:     }
1448:   }
1449:   /* coupling with process to the right */
1450:   for (i = nx - s; i < nx; i++) {
1451:     for (j = 0; j < nc; j++) {
1452:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1453:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1454:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1455:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1456:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1457:       }
1458:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1459:       cnt++;
1460:     }
1461:   }

1463:   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1464:   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1465:   PetscCall(PetscFree2(cols, ocols));

1467:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1468:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1470:   /*
1471:     For each node in the grid: we get the neighbors in the local (on processor ordering
1472:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1473:     PETSc ordering.
1474:   */
1475:   if (!da->prealloc_only) {
1476:     PetscCall(PetscMalloc1(maxcnt, &cols));
1477:     row = xs * nc;
1478:     /* coupling with process to the left */
1479:     for (i = xs; i < xs + s; i++) {
1480:       for (j = 0; j < nc; j++) {
1481:         cnt = 0;
1482:         if (rank) {
1483:           for (l = 0; l < s; l++) {
1484:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1485:           }
1486:         }
1487:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1488:           for (l = 0; l < s; l++) {
1489:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1490:           }
1491:         }
1492:         if (dfill) {
1493:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1494:         } else {
1495:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1496:         }
1497:         for (l = 0; l < s; l++) {
1498:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1499:         }
1500:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1501:         row++;
1502:       }
1503:     }
1504:     for (i = xs + s; i < xs + nx - s; i++) {
1505:       for (j = 0; j < nc; j++) {
1506:         cnt = 0;
1507:         for (l = 0; l < s; l++) {
1508:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1509:         }
1510:         if (dfill) {
1511:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1512:         } else {
1513:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1514:         }
1515:         for (l = 0; l < s; l++) {
1516:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1517:         }
1518:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1519:         row++;
1520:       }
1521:     }
1522:     /* coupling with process to the right */
1523:     for (i = xs + nx - s; i < xs + nx; i++) {
1524:       for (j = 0; j < nc; j++) {
1525:         cnt = 0;
1526:         for (l = 0; l < s; l++) {
1527:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1528:         }
1529:         if (dfill) {
1530:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1531:         } else {
1532:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1533:         }
1534:         if (rank < size - 1) {
1535:           for (l = 0; l < s; l++) {
1536:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1537:           }
1538:         }
1539:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1540:           for (l = 0; l < s; l++) {
1541:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1542:           }
1543:         }
1544:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1545:         row++;
1546:       }
1547:     }
1548:     PetscCall(PetscFree(cols));
1549:     /* do not copy values to GPU since they are all zero and not yet needed there */
1550:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1551:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1552:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1553:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1554:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1555:   }
1556:   PetscFunctionReturn(PETSC_SUCCESS);
1557: }

1559: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1560: {
1561:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1562:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1563:   PetscInt               istart, iend;
1564:   DMBoundaryType         bx;
1565:   ISLocalToGlobalMapping ltog, mltog;

1567:   PetscFunctionBegin;
1568:   /*
1569:          nc - number of components per grid point
1570:          col - number of colors needed in one direction for single component problem
1571:   */
1572:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1573:   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1574:   col = 2 * s + 1;

1576:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1577:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1579:   PetscCall(MatSetBlockSize(J, nc));
1580:   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1581:   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));

1583:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1584:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1585:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1587:   /*
1588:     For each node in the grid: we get the neighbors in the local (on processor ordering
1589:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1590:     PETSc ordering.
1591:   */
1592:   if (!da->prealloc_only) {
1593:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1594:     for (i = xs; i < xs + nx; i++) {
1595:       istart = PetscMax(-s, gxs - i);
1596:       iend   = PetscMin(s, gxs + gnx - i - 1);
1597:       slot   = i - gxs;

1599:       cnt = 0;
1600:       for (i1 = istart; i1 < iend + 1; i1++) {
1601:         cols[cnt++] = nc * (slot + i1);
1602:         for (l = 1; l < nc; l++) {
1603:           cols[cnt] = 1 + cols[cnt - 1];
1604:           cnt++;
1605:         }
1606:       }
1607:       rows[0] = nc * (slot);
1608:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1609:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1610:     }
1611:     /* do not copy values to GPU since they are all zero and not yet needed there */
1612:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1613:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1614:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1615:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1616:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1617:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1618:     PetscCall(PetscFree2(rows, cols));
1619:   }
1620:   PetscFunctionReturn(PETSC_SUCCESS);
1621: }

1623: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1624: {
1625:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1626:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1627:   PetscInt               istart, iend;
1628:   DMBoundaryType         bx;
1629:   ISLocalToGlobalMapping ltog, mltog;

1631:   PetscFunctionBegin;
1632:   /*
1633:          nc - number of components per grid point
1634:          col - number of colors needed in one direction for single component problem
1635:   */
1636:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1637:   col = 2 * s + 1;

1639:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1640:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1642:   PetscCall(MatSetBlockSize(J, nc));
1643:   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));

1645:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1646:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1647:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1649:   /*
1650:     For each node in the grid: we get the neighbors in the local (on processor ordering
1651:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1652:     PETSc ordering.
1653:   */
1654:   if (!da->prealloc_only) {
1655:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1656:     for (i = xs; i < xs + nx; i++) {
1657:       istart = PetscMax(-s, gxs - i);
1658:       iend   = PetscMin(s, gxs + gnx - i - 1);
1659:       slot   = i - gxs;

1661:       cnt = 0;
1662:       for (i1 = istart; i1 < iend + 1; i1++) {
1663:         cols[cnt++] = nc * (slot + i1);
1664:         for (l = 1; l < nc; l++) {
1665:           cols[cnt] = 1 + cols[cnt - 1];
1666:           cnt++;
1667:         }
1668:       }
1669:       rows[0] = nc * (slot);
1670:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1671:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1672:     }
1673:     /* do not copy values to GPU since they are all zero and not yet needed there */
1674:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1675:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1676:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1677:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1679:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1680:     PetscCall(PetscFree2(rows, cols));
1681:   }
1682:   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1683:   PetscFunctionReturn(PETSC_SUCCESS);
1684: }

1686: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1687: {
1688:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1689:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1690:   PetscInt               istart, iend, jstart, jend, ii, jj;
1691:   MPI_Comm               comm;
1692:   PetscScalar           *values;
1693:   DMBoundaryType         bx, by;
1694:   DMDAStencilType        st;
1695:   ISLocalToGlobalMapping ltog;

1697:   PetscFunctionBegin;
1698:   /*
1699:      nc - number of components per grid point
1700:      col - number of colors needed in one direction for single component problem
1701:   */
1702:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1703:   col = 2 * s + 1;

1705:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1706:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1707:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1709:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1711:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1713:   /* determine the matrix preallocation information */
1714:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1715:   for (i = xs; i < xs + nx; i++) {
1716:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1717:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1718:     for (j = ys; j < ys + ny; j++) {
1719:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1720:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1721:       slot   = i - gxs + gnx * (j - gys);

1723:       /* Find block columns in block row */
1724:       cnt = 0;
1725:       for (ii = istart; ii < iend + 1; ii++) {
1726:         for (jj = jstart; jj < jend + 1; jj++) {
1727:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1728:             cols[cnt++] = slot + ii + gnx * jj;
1729:           }
1730:         }
1731:       }
1732:       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1733:     }
1734:   }
1735:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1736:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1737:   MatPreallocateEnd(dnz, onz);

1739:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1741:   /*
1742:     For each node in the grid: we get the neighbors in the local (on processor ordering
1743:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1744:     PETSc ordering.
1745:   */
1746:   if (!da->prealloc_only) {
1747:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1748:     for (i = xs; i < xs + nx; i++) {
1749:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1750:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1751:       for (j = ys; j < ys + ny; j++) {
1752:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1753:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1754:         slot   = i - gxs + gnx * (j - gys);
1755:         cnt    = 0;
1756:         for (ii = istart; ii < iend + 1; ii++) {
1757:           for (jj = jstart; jj < jend + 1; jj++) {
1758:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1759:               cols[cnt++] = slot + ii + gnx * jj;
1760:             }
1761:           }
1762:         }
1763:         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1764:       }
1765:     }
1766:     PetscCall(PetscFree(values));
1767:     /* do not copy values to GPU since they are all zero and not yet needed there */
1768:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1769:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1770:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1771:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1772:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1773:   }
1774:   PetscCall(PetscFree(cols));
1775:   PetscFunctionReturn(PETSC_SUCCESS);
1776: }

1778: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1779: {
1780:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1781:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1782:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1783:   MPI_Comm               comm;
1784:   PetscScalar           *values;
1785:   DMBoundaryType         bx, by, bz;
1786:   DMDAStencilType        st;
1787:   ISLocalToGlobalMapping ltog;

1789:   PetscFunctionBegin;
1790:   /*
1791:          nc - number of components per grid point
1792:          col - number of colors needed in one direction for single component problem
1793:   */
1794:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1795:   col = 2 * s + 1;

1797:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1798:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1799:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1801:   PetscCall(PetscMalloc1(col * col * col, &cols));

1803:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1805:   /* determine the matrix preallocation information */
1806:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1807:   for (i = xs; i < xs + nx; i++) {
1808:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1809:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1810:     for (j = ys; j < ys + ny; j++) {
1811:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1812:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1813:       for (k = zs; k < zs + nz; k++) {
1814:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1815:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1817:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1819:         /* Find block columns in block row */
1820:         cnt = 0;
1821:         for (ii = istart; ii < iend + 1; ii++) {
1822:           for (jj = jstart; jj < jend + 1; jj++) {
1823:             for (kk = kstart; kk < kend + 1; kk++) {
1824:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1825:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1826:               }
1827:             }
1828:           }
1829:         }
1830:         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1831:       }
1832:     }
1833:   }
1834:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1835:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1836:   MatPreallocateEnd(dnz, onz);

1838:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1840:   /*
1841:     For each node in the grid: we get the neighbors in the local (on processor ordering
1842:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1843:     PETSc ordering.
1844:   */
1845:   if (!da->prealloc_only) {
1846:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1847:     for (i = xs; i < xs + nx; i++) {
1848:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1849:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1850:       for (j = ys; j < ys + ny; j++) {
1851:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1852:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1853:         for (k = zs; k < zs + nz; k++) {
1854:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1855:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1857:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1859:           cnt = 0;
1860:           for (ii = istart; ii < iend + 1; ii++) {
1861:             for (jj = jstart; jj < jend + 1; jj++) {
1862:               for (kk = kstart; kk < kend + 1; kk++) {
1863:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1864:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1865:                 }
1866:               }
1867:             }
1868:           }
1869:           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1870:         }
1871:       }
1872:     }
1873:     PetscCall(PetscFree(values));
1874:     /* do not copy values to GPU since they are all zero and not yet needed there */
1875:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1876:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1877:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1878:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1879:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1880:   }
1881:   PetscCall(PetscFree(cols));
1882:   PetscFunctionReturn(PETSC_SUCCESS);
1883: }

1885: /*
1886:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1887:   identify in the local ordering with periodic domain.
1888: */
1889: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1890: {
1891:   PetscInt i, n;

1893:   PetscFunctionBegin;
1894:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1895:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1896:   for (i = 0, n = 0; i < *cnt; i++) {
1897:     if (col[i] >= *row) col[n++] = col[i];
1898:   }
1899:   *cnt = n;
1900:   PetscFunctionReturn(PETSC_SUCCESS);
1901: }

1903: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1904: {
1905:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1906:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1907:   PetscInt               istart, iend, jstart, jend, ii, jj;
1908:   MPI_Comm               comm;
1909:   PetscScalar           *values;
1910:   DMBoundaryType         bx, by;
1911:   DMDAStencilType        st;
1912:   ISLocalToGlobalMapping ltog;

1914:   PetscFunctionBegin;
1915:   /*
1916:      nc - number of components per grid point
1917:      col - number of colors needed in one direction for single component problem
1918:   */
1919:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1920:   col = 2 * s + 1;

1922:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1923:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1924:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1926:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1928:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1930:   /* determine the matrix preallocation information */
1931:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1932:   for (i = xs; i < xs + nx; i++) {
1933:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1934:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1935:     for (j = ys; j < ys + ny; j++) {
1936:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1937:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1938:       slot   = i - gxs + gnx * (j - gys);

1940:       /* Find block columns in block row */
1941:       cnt = 0;
1942:       for (ii = istart; ii < iend + 1; ii++) {
1943:         for (jj = jstart; jj < jend + 1; jj++) {
1944:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1945:         }
1946:       }
1947:       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1948:       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1949:     }
1950:   }
1951:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1952:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1953:   MatPreallocateEnd(dnz, onz);

1955:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1957:   /*
1958:     For each node in the grid: we get the neighbors in the local (on processor ordering
1959:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1960:     PETSc ordering.
1961:   */
1962:   if (!da->prealloc_only) {
1963:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1964:     for (i = xs; i < xs + nx; i++) {
1965:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1966:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1967:       for (j = ys; j < ys + ny; j++) {
1968:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1969:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1970:         slot   = i - gxs + gnx * (j - gys);

1972:         /* Find block columns in block row */
1973:         cnt = 0;
1974:         for (ii = istart; ii < iend + 1; ii++) {
1975:           for (jj = jstart; jj < jend + 1; jj++) {
1976:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1977:           }
1978:         }
1979:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1980:         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1981:       }
1982:     }
1983:     PetscCall(PetscFree(values));
1984:     /* do not copy values to GPU since they are all zero and not yet needed there */
1985:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1986:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1987:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1988:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1989:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1990:   }
1991:   PetscCall(PetscFree(cols));
1992:   PetscFunctionReturn(PETSC_SUCCESS);
1993: }

1995: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1996: {
1997:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1998:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1999:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2000:   MPI_Comm               comm;
2001:   PetscScalar           *values;
2002:   DMBoundaryType         bx, by, bz;
2003:   DMDAStencilType        st;
2004:   ISLocalToGlobalMapping ltog;

2006:   PetscFunctionBegin;
2007:   /*
2008:      nc - number of components per grid point
2009:      col - number of colors needed in one direction for single component problem
2010:   */
2011:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2012:   col = 2 * s + 1;

2014:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2015:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2016:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2018:   /* create the matrix */
2019:   PetscCall(PetscMalloc1(col * col * col, &cols));

2021:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2023:   /* determine the matrix preallocation information */
2024:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2025:   for (i = xs; i < xs + nx; i++) {
2026:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2027:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2028:     for (j = ys; j < ys + ny; j++) {
2029:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2030:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2031:       for (k = zs; k < zs + nz; k++) {
2032:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2033:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2035:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2037:         /* Find block columns in block row */
2038:         cnt = 0;
2039:         for (ii = istart; ii < iend + 1; ii++) {
2040:           for (jj = jstart; jj < jend + 1; jj++) {
2041:             for (kk = kstart; kk < kend + 1; kk++) {
2042:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2043:             }
2044:           }
2045:         }
2046:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2047:         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2048:       }
2049:     }
2050:   }
2051:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2052:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2053:   MatPreallocateEnd(dnz, onz);

2055:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2057:   /*
2058:     For each node in the grid: we get the neighbors in the local (on processor ordering
2059:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2060:     PETSc ordering.
2061:   */
2062:   if (!da->prealloc_only) {
2063:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2064:     for (i = xs; i < xs + nx; i++) {
2065:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2066:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2067:       for (j = ys; j < ys + ny; j++) {
2068:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2069:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2070:         for (k = zs; k < zs + nz; k++) {
2071:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2072:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2074:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2076:           cnt = 0;
2077:           for (ii = istart; ii < iend + 1; ii++) {
2078:             for (jj = jstart; jj < jend + 1; jj++) {
2079:               for (kk = kstart; kk < kend + 1; kk++) {
2080:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2081:               }
2082:             }
2083:           }
2084:           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2085:           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2086:         }
2087:       }
2088:     }
2089:     PetscCall(PetscFree(values));
2090:     /* do not copy values to GPU since they are all zero and not yet needed there */
2091:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2092:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2093:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2094:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2095:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2096:   }
2097:   PetscCall(PetscFree(cols));
2098:   PetscFunctionReturn(PETSC_SUCCESS);
2099: }

2101: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2102: {
2103:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2104:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2105:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2106:   DM_DA                 *dd = (DM_DA *)da->data;
2107:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2108:   MPI_Comm               comm;
2109:   PetscScalar           *values;
2110:   DMBoundaryType         bx, by, bz;
2111:   ISLocalToGlobalMapping ltog;
2112:   DMDAStencilType        st;
2113:   PetscBool              removedups = PETSC_FALSE;

2115:   PetscFunctionBegin;
2116:   /*
2117:          nc - number of components per grid point
2118:          col - number of colors needed in one direction for single component problem
2119:   */
2120:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2121:   col = 2 * s + 1;

2123:   /*
2124:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2125:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2126:   */
2127:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2128:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2129:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2131:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2132:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2133:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2135:   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2136:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2138:   /* determine the matrix preallocation information */
2139:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2141:   PetscCall(MatSetBlockSize(J, nc));
2142:   for (i = xs; i < xs + nx; i++) {
2143:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2144:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2145:     for (j = ys; j < ys + ny; j++) {
2146:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2147:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2148:       for (k = zs; k < zs + nz; k++) {
2149:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2150:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2152:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2154:         for (l = 0; l < nc; l++) {
2155:           cnt = 0;
2156:           for (ii = istart; ii < iend + 1; ii++) {
2157:             for (jj = jstart; jj < jend + 1; jj++) {
2158:               for (kk = kstart; kk < kend + 1; kk++) {
2159:                 if (ii || jj || kk) {
2160:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2161:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2162:                   }
2163:                 } else {
2164:                   if (dfill) {
2165:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2166:                   } else {
2167:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2168:                   }
2169:                 }
2170:               }
2171:             }
2172:           }
2173:           row    = l + nc * (slot);
2174:           maxcnt = PetscMax(maxcnt, cnt);
2175:           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2176:           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2177:         }
2178:       }
2179:     }
2180:   }
2181:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2182:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2183:   MatPreallocateEnd(dnz, onz);
2184:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2186:   /*
2187:     For each node in the grid: we get the neighbors in the local (on processor ordering
2188:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2189:     PETSc ordering.
2190:   */
2191:   if (!da->prealloc_only) {
2192:     PetscCall(PetscCalloc1(maxcnt, &values));
2193:     for (i = xs; i < xs + nx; i++) {
2194:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2195:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2196:       for (j = ys; j < ys + ny; j++) {
2197:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2198:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2199:         for (k = zs; k < zs + nz; k++) {
2200:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2201:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2203:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2205:           for (l = 0; l < nc; l++) {
2206:             cnt = 0;
2207:             for (ii = istart; ii < iend + 1; ii++) {
2208:               for (jj = jstart; jj < jend + 1; jj++) {
2209:                 for (kk = kstart; kk < kend + 1; kk++) {
2210:                   if (ii || jj || kk) {
2211:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2212:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2213:                     }
2214:                   } else {
2215:                     if (dfill) {
2216:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2217:                     } else {
2218:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2219:                     }
2220:                   }
2221:                 }
2222:               }
2223:             }
2224:             row = l + nc * (slot);
2225:             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2226:           }
2227:         }
2228:       }
2229:     }
2230:     PetscCall(PetscFree(values));
2231:     /* do not copy values to GPU since they are all zero and not yet needed there */
2232:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2233:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2234:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2235:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2236:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2237:   }
2238:   PetscCall(PetscFree(cols));
2239:   PetscFunctionReturn(PETSC_SUCCESS);
2240: }