Actual source code: fdda.c

  1: #include <petsc/private/dmdaimpl.h>
  2: #include <petscmat.h>
  3: #include <petscbt.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 17: {
 18:   PetscInt i, j, nz, *fill;

 20:   PetscFunctionBegin;
 21:   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscCall(PetscMalloc1(nz + w + 1, &fill));
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   PetscFunctionBegin;
 55:   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);

 57:   /* Determine number of non-zeros */
 58:   nz = (dfillsparse[w] - w - 1);

 60:   /* Allocate space for our copy of the given sparse matrix representation. */
 61:   PetscCall(PetscMalloc1(nz + w + 1, rfill));
 62:   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 67: {
 68:   PetscInt i, k, cnt = 1;

 70:   PetscFunctionBegin;
 71:   /* ofillcount tracks the columns of ofill that have any nonzero in them; the value in each location is the number of
 72:    columns to the left with any nonzeros in them plus 1 */
 73:   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
 74:   for (i = 0; i < dd->w; i++) {
 75:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 76:   }
 77:   for (i = 0; i < dd->w; i++) {
 78:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 79:   }
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: /*@
 84:   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 85:   of the matrix returned by `DMCreateMatrix()`.

 87:   Logically Collective

 89:   Input Parameters:
 90: + da    - the `DMDA`
 91: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
 92: - ofill - the fill pattern in the off-diagonal blocks

 94:   Level: developer

 96:   Notes:
 97:   This only makes sense when you are doing multicomponent problems but using the
 98:   `MATMPIAIJ` matrix format

100:   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101:   representing coupling and 0 entries for missing coupling. For example
102: .vb
103:             dfill[9] = {1, 0, 0,
104:                         1, 1, 0,
105:                         0, 1, 1}
106: .ve
107:   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108:   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109:   diagonal block).

111:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112:   can be represented in the `dfill`, `ofill` format

114:   Contributed by\:
115:   Glenn Hammond

117: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118: @*/
119: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120: {
121:   DM_DA *dd = (DM_DA *)da->data;

123:   PetscFunctionBegin;
124:   /* save the given dfill and ofill information */
125:   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126:   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));

128:   /* count nonzeros in ofill columns */
129:   PetscCall(DMDASetBlockFills_Private2(dd));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: /*@
134:   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135:   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136:   of fill patterns.

138:   Logically Collective

140:   Input Parameters:
141: + da          - the `DMDA`
142: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143: - ofillsparse - the sparse fill pattern in the off-diagonal blocks

145:   Level: developer

147:   Notes:
148:   This only makes sense when you are doing multicomponent problems but using the
149:   `MATMPIAIJ` matrix format

151:   The format for `dfill` and `ofill` is a sparse representation of a
152:   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153:   for missing coupling.  The sparse representation is a 1 dimensional
154:   array of length nz + dof + 1, where nz is the number of non-zeros in
155:   the matrix.  The first dof entries in the array give the
156:   starting array indices of each row's items in the rest of the array,
157:   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158:   and the remaining nz items give the column indices of each of
159:   the 1s within the logical 2D matrix.  Each row's items within
160:   the array are the column indices of the 1s within that row
161:   of the 2D matrix.  PETSc developers may recognize that this is the
162:   same format as that computed by the `DMDASetBlockFills_Private()`
163:   function from a dense 2D matrix representation.

165:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166:   can be represented in the `dfill`, `ofill` format

168:   Contributed by\:
169:   Philip C. Roth

171: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175:   DM_DA *dd = (DM_DA *)da->data;

177:   PetscFunctionBegin;
178:   /* save the given dfill and ofill information */
179:   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180:   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));

182:   /* count nonzeros in ofill columns */
183:   PetscCall(DMDASetBlockFills_Private2(dd));
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189:   PetscInt       dim, m, n, p, nc;
190:   DMBoundaryType bx, by, bz;
191:   MPI_Comm       comm;
192:   PetscMPIInt    size;
193:   PetscBool      isBAIJ;
194:   DM_DA         *dd = (DM_DA *)da->data;

196:   PetscFunctionBegin;
197:   /*
198:                                   m
199:           ------------------------------------------------------
200:          |                                                     |
201:          |                                                     |
202:          |               ----------------------                |
203:          |               |                    |                |
204:       n  |           yn  |                    |                |
205:          |               |                    |                |
206:          |               .---------------------                |
207:          |             (xs,ys)     xn                          |
208:          |            .                                        |
209:          |         (gxs,gys)                                   |
210:          |                                                     |
211:           -----------------------------------------------------
212:   */

214:   /*
215:          nc - number of components per grid point
216:          col - number of colors needed in one direction for single component problem

218:   */
219:   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));

221:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222:   PetscCallMPI(MPI_Comm_size(comm, &size));
223:   if (ctype == IS_COLORING_LOCAL) {
224:     PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225:   }

227:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228:      matrices is for the blocks, not the individual matrix elements  */
229:   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232:   if (isBAIJ) {
233:     dd->w  = 1;
234:     dd->xs = dd->xs / nc;
235:     dd->xe = dd->xe / nc;
236:     dd->Xs = dd->Xs / nc;
237:     dd->Xe = dd->Xe / nc;
238:   }

240:   /*
241:      We do not provide a getcoloring function in the DMDA operations because
242:    the basic DMDA does not know about matrices. We think of DMDA as being
243:    more low-level then matrices.
244:   */
245:   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246:   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247:   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249:   if (isBAIJ) {
250:     dd->w  = nc;
251:     dd->xs = dd->xs * nc;
252:     dd->xe = dd->xe * nc;
253:     dd->Xs = dd->Xs * nc;
254:     dd->Xe = dd->Xe * nc;
255:   }
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260: {
261:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262:   PetscInt         ncolors = 0;
263:   MPI_Comm         comm;
264:   DMBoundaryType   bx, by;
265:   DMDAStencilType  st;
266:   ISColoringValue *colors;
267:   DM_DA           *dd = (DM_DA *)da->data;

269:   PetscFunctionBegin;
270:   /*
271:          nc - number of components per grid point
272:          col - number of colors needed in one direction for single component problem

274:   */
275:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276:   col = 2 * s + 1;
277:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

281:   /* special case as taught to us by Paul Hovland */
282:   if (st == DMDA_STENCIL_STAR && s == 1) {
283:     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284:   } else {
285:     if (ctype == IS_COLORING_GLOBAL) {
286:       if (!dd->localcoloring) {
287:         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288:         ii = 0;
289:         for (j = ys; j < ys + ny; j++) {
290:           for (i = xs; i < xs + nx; i++) {
291:             for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++));
292:           }
293:         }
294:         ncolors = nc + nc * (col - 1 + col * (col - 1));
295:         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296:       }
297:       *coloring = dd->localcoloring;
298:     } else if (ctype == IS_COLORING_LOCAL) {
299:       if (!dd->ghostedcoloring) {
300:         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301:         ii = 0;
302:         for (j = gys; j < gys + gny; j++) {
303:           for (i = gxs; i < gxs + gnx; i++) {
304:             for (k = 0; k < nc; k++) {
305:               /* the complicated stuff is to handle periodic boundaries */
306:               PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++));
307:             }
308:           }
309:         }
310:         ncolors = nc + nc * (col - 1 + col * (col - 1));
311:         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

314:         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315:       }
316:       *coloring = dd->ghostedcoloring;
317:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318:   }
319:   PetscCall(ISColoringReference(*coloring));
320:   PetscFunctionReturn(PETSC_SUCCESS);
321: }

323: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324: {
325:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326:   PetscInt         ncolors;
327:   MPI_Comm         comm;
328:   DMBoundaryType   bx, by, bz;
329:   DMDAStencilType  st;
330:   ISColoringValue *colors;
331:   DM_DA           *dd = (DM_DA *)da->data;

333:   PetscFunctionBegin;
334:   /*
335:          nc - number of components per grid point
336:          col - number of colors needed in one direction for single component problem
337:   */
338:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339:   col = 2 * s + 1;
340:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341:   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342:   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");

344:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

348:   /* create the coloring */
349:   if (ctype == IS_COLORING_GLOBAL) {
350:     if (!dd->localcoloring) {
351:       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352:       ii = 0;
353:       for (k = zs; k < zs + nz; k++) {
354:         for (j = ys; j < ys + ny; j++) {
355:           for (i = xs; i < xs + nx; i++) {
356:             for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++));
357:           }
358:         }
359:       }
360:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362:     }
363:     *coloring = dd->localcoloring;
364:   } else if (ctype == IS_COLORING_LOCAL) {
365:     if (!dd->ghostedcoloring) {
366:       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367:       ii = 0;
368:       for (k = gzs; k < gzs + gnz; k++) {
369:         for (j = gys; j < gys + gny; j++) {
370:           for (i = gxs; i < gxs + gnx; i++) {
371:             for (l = 0; l < nc; l++) {
372:               /* the complicated stuff is to handle periodic boundaries */
373:               PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++));
374:             }
375:           }
376:         }
377:       }
378:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381:     }
382:     *coloring = dd->ghostedcoloring;
383:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384:   PetscCall(ISColoringReference(*coloring));
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389: {
390:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391:   PetscInt         ncolors;
392:   MPI_Comm         comm;
393:   DMBoundaryType   bx;
394:   ISColoringValue *colors;
395:   DM_DA           *dd = (DM_DA *)da->data;

397:   PetscFunctionBegin;
398:   /*
399:          nc - number of components per grid point
400:          col - number of colors needed in one direction for single component problem
401:   */
402:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403:   col = 2 * s + 1;
404:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

409:   /* create the coloring */
410:   if (ctype == IS_COLORING_GLOBAL) {
411:     if (!dd->localcoloring) {
412:       PetscCall(PetscMalloc1(nc * nx, &colors));
413:       if (dd->ofillcols) {
414:         PetscInt tc = 0;
415:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416:         i1 = 0;
417:         for (i = xs; i < xs + nx; i++) {
418:           for (l = 0; l < nc; l++) {
419:             if (dd->ofillcols[l] && (i % col)) {
420:               PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++));
421:             } else {
422:               PetscCall(ISColoringValueCast(l, colors + i1++));
423:             }
424:           }
425:         }
426:         ncolors = nc + 2 * s * tc;
427:       } else {
428:         i1 = 0;
429:         for (i = xs; i < xs + nx; i++) {
430:           for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++));
431:         }
432:         ncolors = nc + nc * (col - 1);
433:       }
434:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435:     }
436:     *coloring = dd->localcoloring;
437:   } else if (ctype == IS_COLORING_LOCAL) {
438:     if (!dd->ghostedcoloring) {
439:       PetscCall(PetscMalloc1(nc * gnx, &colors));
440:       i1 = 0;
441:       for (i = gxs; i < gxs + gnx; i++) {
442:         for (l = 0; l < nc; l++) {
443:           /* the complicated stuff is to handle periodic boundaries */
444:           PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++));
445:         }
446:       }
447:       ncolors = nc + nc * (col - 1);
448:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450:     }
451:     *coloring = dd->ghostedcoloring;
452:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453:   PetscCall(ISColoringReference(*coloring));
454:   PetscFunctionReturn(PETSC_SUCCESS);
455: }

457: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458: {
459:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460:   PetscInt         ncolors;
461:   MPI_Comm         comm;
462:   DMBoundaryType   bx, by;
463:   ISColoringValue *colors;
464:   DM_DA           *dd = (DM_DA *)da->data;

466:   PetscFunctionBegin;
467:   /*
468:          nc - number of components per grid point
469:          col - number of colors needed in one direction for single component problem
470:   */
471:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475:   /* create the coloring */
476:   if (ctype == IS_COLORING_GLOBAL) {
477:     if (!dd->localcoloring) {
478:       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479:       ii = 0;
480:       for (j = ys; j < ys + ny; j++) {
481:         for (i = xs; i < xs + nx; i++) {
482:           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++));
483:         }
484:       }
485:       ncolors = 5 * nc;
486:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487:     }
488:     *coloring = dd->localcoloring;
489:   } else if (ctype == IS_COLORING_LOCAL) {
490:     if (!dd->ghostedcoloring) {
491:       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492:       ii = 0;
493:       for (j = gys; j < gys + gny; j++) {
494:         for (i = gxs; i < gxs + gnx; i++) {
495:           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++));
496:         }
497:       }
498:       ncolors = 5 * nc;
499:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501:     }
502:     *coloring = dd->ghostedcoloring;
503:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504:   PetscFunctionReturn(PETSC_SUCCESS);
505: }

507: /* =========================================================================== */
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

523: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524: {
525:   DM                da;
526:   const char       *prefix;
527:   Mat               AA, Anatural;
528:   AO                ao;
529:   PetscInt          rstart, rend, *petsc, i;
530:   IS                is;
531:   MPI_Comm          comm;
532:   PetscViewerFormat format;
533:   PetscBool         flag;

535:   PetscFunctionBegin;
536:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537:   PetscCall(PetscViewerGetFormat(viewer, &format));
538:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);

540:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541:   PetscCall(MatGetDM(A, &da));
542:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

544:   PetscCall(DMDAGetAO(da, &ao));
545:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546:   PetscCall(PetscMalloc1(rend - rstart, &petsc));
547:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548:   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549:   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));

551:   /* call viewer on natural ordering */
552:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553:   if (flag) {
554:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555:     A = AA;
556:   }
557:   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558:   PetscCall(ISDestroy(&is));
559:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561:   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563:   PetscCall(MatView(Anatural, viewer));
564:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565:   PetscCall(MatDestroy(&Anatural));
566:   if (flag) PetscCall(MatDestroy(&AA));
567:   PetscFunctionReturn(PETSC_SUCCESS);
568: }

570: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571: {
572:   DM       da;
573:   Mat      Anatural, Aapp;
574:   AO       ao;
575:   PetscInt rstart, rend, *app, i, m, n, M, N;
576:   IS       is;
577:   MPI_Comm comm;

579:   PetscFunctionBegin;
580:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581:   PetscCall(MatGetDM(A, &da));
582:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

584:   /* Load the matrix in natural ordering */
585:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586:   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587:   PetscCall(MatGetSize(A, &M, &N));
588:   PetscCall(MatGetLocalSize(A, &m, &n));
589:   PetscCall(MatSetSizes(Anatural, m, n, M, N));
590:   PetscCall(MatLoad(Anatural, viewer));

592:   /* Map natural ordering to application ordering and create IS */
593:   PetscCall(DMDAGetAO(da, &ao));
594:   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595:   PetscCall(PetscMalloc1(rend - rstart, &app));
596:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
597:   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598:   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));

600:   /* Do permutation and replace header */
601:   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602:   PetscCall(MatHeaderReplace(A, &Aapp));
603:   PetscCall(ISDestroy(&is));
604:   PetscCall(MatDestroy(&Anatural));
605:   PetscFunctionReturn(PETSC_SUCCESS);
606: }

608: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609: {
610:   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611:   Mat         A;
612:   MPI_Comm    comm;
613:   MatType     Atype;
614:   MatType     mtype;
615:   PetscMPIInt size;
616:   DM_DA      *dd  = (DM_DA *)da->data;
617:   PetscBool   aij = PETSC_FALSE, baij = PETSC_FALSE, sbaij = PETSC_FALSE, sell = PETSC_FALSE, is = PETSC_FALSE;

619:   PetscFunctionBegin;
620:   PetscCall(MatInitializePackage());
621:   mtype = da->mattype;

623:   /*
624:                                   m
625:           ------------------------------------------------------
626:          |                                                     |
627:          |                                                     |
628:          |               ----------------------                |
629:          |               |                    |                |
630:       n  |           ny  |                    |                |
631:          |               |                    |                |
632:          |               .---------------------                |
633:          |             (xs,ys)     nx                          |
634:          |            .                                        |
635:          |         (gxs,gys)                                   |
636:          |                                                     |
637:           -----------------------------------------------------
638:   */

640:   /*
641:          nc - number of components per grid point
642:          col - number of colors needed in one direction for single component problem
643:   */
644:   M   = dd->M;
645:   N   = dd->N;
646:   P   = dd->P;
647:   dim = da->dim;
648:   dof = dd->w;
649:   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650:   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652:   PetscCall(MatCreate(comm, &A));
653:   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654:   PetscCall(MatSetType(A, mtype));
655:   PetscCall(MatSetFromOptions(A));
656:   if (dof * nx * ny * nz < da->bind_below) {
657:     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658:     PetscCall(MatBindToCPU(A, PETSC_TRUE));
659:   }
660:   PetscCall(MatSetDM(A, da));
661:   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662:   PetscCall(MatGetType(A, &Atype));
663:   /*
664:      We do not provide a getmatrix function in the DMDA operations because
665:    the basic DMDA does not know about matrices. We think of DMDA as being more
666:    more low-level than matrices. This is kind of cheating but, cause sometimes
667:    we think of DMDA has higher level than matrices.

669:      We could switch based on Atype (or mtype), but we do not since the
670:    specialized setting routines depend only on the particular preallocation
671:    details of the matrix, not the type itself.
672:   */
673:   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674:     PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675:     if (!aij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676:     if (!aij) {
677:       PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678:       if (!baij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679:       if (!baij) {
680:         PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681:         if (!sbaij) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682:         if (!sbaij) {
683:           PetscCall(PetscObjectHasFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684:           if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685:         }
686:         if (!sell) PetscCall(PetscObjectHasFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687:       }
688:     }
689:   }
690:   if (aij) {
691:     if (dim == 1) {
692:       if (dd->ofill) {
693:         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694:       } else {
695:         DMBoundaryType bx;
696:         PetscMPIInt    size;
697:         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
700:           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701:         } else {
702:           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703:         }
704:       }
705:     } else if (dim == 2) {
706:       if (dd->ofill) {
707:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708:       } else {
709:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710:       }
711:     } else if (dim == 3) {
712:       if (dd->ofill) {
713:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714:       } else {
715:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716:       }
717:     }
718:   } else if (baij) {
719:     if (dim == 2) {
720:       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721:     } else if (dim == 3) {
722:       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724:   } else if (sbaij) {
725:     if (dim == 2) {
726:       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727:     } else if (dim == 3) {
728:       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730:   } else if (sell) {
731:     if (dim == 2) {
732:       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733:     } else if (dim == 3) {
734:       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736:   } else if (is) {
737:     PetscCall(DMCreateMatrix_DA_IS(da, A));
738:   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739:     ISLocalToGlobalMapping ltog;

741:     PetscCall(MatSetBlockSize(A, dof));
742:     PetscCall(MatSetUp(A));
743:     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
744:     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745:   }
746:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748:   PetscCall(MatSetDM(A, da));
749:   PetscCallMPI(MPI_Comm_size(comm, &size));
750:   if (size > 1) {
751:     /* change viewer to display matrix in natural ordering */
752:     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
753:     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
754:   }
755:   *J = A;
756:   PetscFunctionReturn(PETSC_SUCCESS);
757: }

759: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
760: {
761:   DM_DA                 *da = (DM_DA *)dm->data;
762:   Mat                    lJ, P;
763:   ISLocalToGlobalMapping ltog;
764:   IS                     is;
765:   PetscBT                bt;
766:   const PetscInt        *e_loc, *idx;
767:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

769:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
770:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
771:   PetscFunctionBegin;
772:   dof = da->w;
773:   PetscCall(MatSetBlockSize(J, dof));
774:   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));

776:   /* flag local elements indices in local DMDA numbering */
777:   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
778:   PetscCall(PetscBTCreate(nv / dof, &bt));
779:   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
780:   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));

782:   /* filter out (set to -1) the global indices not used by the local elements */
783:   PetscCall(PetscMalloc1(nv / dof, &gidx));
784:   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
785:   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
786:   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
787:   for (i = 0; i < nv / dof; i++)
788:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
789:   PetscCall(PetscBTDestroy(&bt));
790:   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
791:   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
792:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
793:   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
794:   PetscCall(ISDestroy(&is));

796:   /* Preallocation */
797:   if (dm->prealloc_skip) {
798:     PetscCall(MatSetUp(J));
799:   } else {
800:     PetscCall(MatISGetLocalMat(J, &lJ));
801:     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
802:     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
803:     PetscCall(MatSetType(P, MATPREALLOCATOR));
804:     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
805:     PetscCall(MatGetSize(lJ, &N, NULL));
806:     PetscCall(MatGetLocalSize(lJ, &n, NULL));
807:     PetscCall(MatSetSizes(P, n, n, N, N));
808:     PetscCall(MatSetBlockSize(P, dof));
809:     PetscCall(MatSetUp(P));
810:     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
811:     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
812:     PetscCall(MatISRestoreLocalMat(J, &lJ));
813:     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
814:     PetscCall(MatDestroy(&P));

816:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
817:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
818:   }
819:   PetscFunctionReturn(PETSC_SUCCESS);
820: }

822: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
823: {
824:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
825:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
826:   MPI_Comm               comm;
827:   PetscScalar           *values;
828:   DMBoundaryType         bx, by;
829:   ISLocalToGlobalMapping ltog;
830:   DMDAStencilType        st;

832:   PetscFunctionBegin;
833:   /*
834:          nc - number of components per grid point
835:          col - number of colors needed in one direction for single component problem
836:   */
837:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
838:   col = 2 * s + 1;
839:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
840:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
841:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

843:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
844:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

846:   PetscCall(MatSetBlockSize(J, nc));
847:   /* determine the matrix preallocation information */
848:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
849:   for (i = xs; i < xs + nx; i++) {
850:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
851:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

853:     for (j = ys; j < ys + ny; j++) {
854:       slot = i - gxs + gnx * (j - gys);

856:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
857:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

859:       cnt = 0;
860:       for (k = 0; k < nc; k++) {
861:         for (l = lstart; l < lend + 1; l++) {
862:           for (p = pstart; p < pend + 1; p++) {
863:             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
864:               cols[cnt++] = k + nc * (slot + gnx * l + p);
865:             }
866:           }
867:         }
868:         rows[k] = k + nc * (slot);
869:       }
870:       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
871:     }
872:   }
873:   PetscCall(MatSetBlockSize(J, nc));
874:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
875:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
876:   MatPreallocateEnd(dnz, onz);

878:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

880:   /*
881:     For each node in the grid: we get the neighbors in the local (on processor ordering
882:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
883:     PETSc ordering.
884:   */
885:   if (!da->prealloc_only) {
886:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
887:     for (i = xs; i < xs + nx; i++) {
888:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
889:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

891:       for (j = ys; j < ys + ny; j++) {
892:         slot = i - gxs + gnx * (j - gys);

894:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
895:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

897:         cnt = 0;
898:         for (k = 0; k < nc; k++) {
899:           for (l = lstart; l < lend + 1; l++) {
900:             for (p = pstart; p < pend + 1; p++) {
901:               if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
902:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
903:               }
904:             }
905:           }
906:           rows[k] = k + nc * (slot);
907:         }
908:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
909:       }
910:     }
911:     PetscCall(PetscFree(values));
912:     /* do not copy values to GPU since they are all zero and not yet needed there */
913:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
914:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
915:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
916:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
917:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
918:   }
919:   PetscCall(PetscFree2(rows, cols));
920:   PetscFunctionReturn(PETSC_SUCCESS);
921: }

923: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
924: {
925:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
926:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
927:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
928:   MPI_Comm               comm;
929:   PetscScalar           *values;
930:   DMBoundaryType         bx, by, bz;
931:   ISLocalToGlobalMapping ltog;
932:   DMDAStencilType        st;

934:   PetscFunctionBegin;
935:   /*
936:          nc - number of components per grid point
937:          col - number of colors needed in one direction for single component problem
938:   */
939:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
940:   col = 2 * s + 1;
941:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
942:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
943:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

945:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
946:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

948:   PetscCall(MatSetBlockSize(J, nc));
949:   /* determine the matrix preallocation information */
950:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
951:   for (i = xs; i < xs + nx; i++) {
952:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
953:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
954:     for (j = ys; j < ys + ny; j++) {
955:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
956:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
957:       for (k = zs; k < zs + nz; k++) {
958:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
959:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

961:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

963:         cnt = 0;
964:         for (l = 0; l < nc; l++) {
965:           for (ii = istart; ii < iend + 1; ii++) {
966:             for (jj = jstart; jj < jend + 1; jj++) {
967:               for (kk = kstart; kk < kend + 1; kk++) {
968:                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
969:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
970:                 }
971:               }
972:             }
973:           }
974:           rows[l] = l + nc * (slot);
975:         }
976:         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
977:       }
978:     }
979:   }
980:   PetscCall(MatSetBlockSize(J, nc));
981:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
982:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
983:   MatPreallocateEnd(dnz, onz);
984:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

986:   /*
987:     For each node in the grid: we get the neighbors in the local (on processor ordering
988:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
989:     PETSc ordering.
990:   */
991:   if (!da->prealloc_only) {
992:     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
993:     for (i = xs; i < xs + nx; i++) {
994:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
995:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
996:       for (j = ys; j < ys + ny; j++) {
997:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
998:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
999:         for (k = zs; k < zs + nz; k++) {
1000:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1001:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1003:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1005:           cnt = 0;
1006:           for (l = 0; l < nc; l++) {
1007:             for (ii = istart; ii < iend + 1; ii++) {
1008:               for (jj = jstart; jj < jend + 1; jj++) {
1009:                 for (kk = kstart; kk < kend + 1; kk++) {
1010:                   if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1011:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1012:                   }
1013:                 }
1014:               }
1015:             }
1016:             rows[l] = l + nc * (slot);
1017:           }
1018:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1019:         }
1020:       }
1021:     }
1022:     PetscCall(PetscFree(values));
1023:     /* do not copy values to GPU since they are all zero and not yet needed there */
1024:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1025:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1026:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1027:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1028:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1029:   }
1030:   PetscCall(PetscFree2(rows, cols));
1031:   PetscFunctionReturn(PETSC_SUCCESS);
1032: }

1034: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1035: {
1036:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1037:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1038:   MPI_Comm               comm;
1039:   DMBoundaryType         bx, by;
1040:   ISLocalToGlobalMapping ltog, mltog;
1041:   DMDAStencilType        st;
1042:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1044:   PetscFunctionBegin;
1045:   /*
1046:          nc - number of components per grid point
1047:          col - number of colors needed in one direction for single component problem
1048:   */
1049:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1050:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1051:   col = 2 * s + 1;
1052:   /*
1053:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1054:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1055:   */
1056:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1057:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1058:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1059:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1060:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1062:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1063:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1065:   PetscCall(MatSetBlockSize(J, nc));
1066:   /* determine the matrix preallocation information */
1067:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1068:   for (i = xs; i < xs + nx; i++) {
1069:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1070:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1072:     for (j = ys; j < ys + ny; j++) {
1073:       slot = i - gxs + gnx * (j - gys);

1075:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1076:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1078:       cnt = 0;
1079:       for (k = 0; k < nc; k++) {
1080:         for (l = lstart; l < lend + 1; l++) {
1081:           for (p = pstart; p < pend + 1; p++) {
1082:             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1083:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1084:             }
1085:           }
1086:         }
1087:         rows[k] = k + nc * (slot);
1088:       }
1089:       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1090:       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1091:     }
1092:   }
1093:   PetscCall(MatSetBlockSize(J, nc));
1094:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1095:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1096:   MatPreallocateEnd(dnz, onz);
1097:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1098:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1100:   /*
1101:     For each node in the grid: we get the neighbors in the local (on processor ordering
1102:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1103:     PETSc ordering.
1104:   */
1105:   if (!da->prealloc_only) {
1106:     for (i = xs; i < xs + nx; i++) {
1107:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1108:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1110:       for (j = ys; j < ys + ny; j++) {
1111:         slot = i - gxs + gnx * (j - gys);

1113:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1114:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1116:         cnt = 0;
1117:         for (l = lstart; l < lend + 1; l++) {
1118:           for (p = pstart; p < pend + 1; p++) {
1119:             if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star have either l = 0 or p = 0 */
1120:               cols[cnt++] = nc * (slot + gnx * l + p);
1121:               for (k = 1; k < nc; k++) {
1122:                 cols[cnt] = 1 + cols[cnt - 1];
1123:                 cnt++;
1124:               }
1125:             }
1126:           }
1127:         }
1128:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1129:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1130:       }
1131:     }
1132:     /* do not copy values to GPU since they are all zero and not yet needed there */
1133:     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1134:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1135:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1136:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1137:     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1138:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1139:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1140:   }
1141:   PetscCall(PetscFree2(rows, cols));
1142:   PetscFunctionReturn(PETSC_SUCCESS);
1143: }

1145: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1146: {
1147:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1148:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1149:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1150:   DM_DA                 *dd = (DM_DA *)da->data;
1151:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1152:   MPI_Comm               comm;
1153:   DMBoundaryType         bx, by;
1154:   ISLocalToGlobalMapping ltog;
1155:   DMDAStencilType        st;
1156:   PetscBool              removedups = PETSC_FALSE;

1158:   PetscFunctionBegin;
1159:   /*
1160:          nc - number of components per grid point
1161:          col - number of colors needed in one direction for single component problem
1162:   */
1163:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1164:   col = 2 * s + 1;
1165:   /*
1166:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1167:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1168:   */
1169:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1170:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1171:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1172:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1173:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1175:   PetscCall(PetscMalloc1(col * col * nc, &cols));
1176:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1178:   PetscCall(MatSetBlockSize(J, nc));
1179:   /* determine the matrix preallocation information */
1180:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1181:   for (i = xs; i < xs + nx; i++) {
1182:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1183:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1185:     for (j = ys; j < ys + ny; j++) {
1186:       slot = i - gxs + gnx * (j - gys);

1188:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1189:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1191:       for (k = 0; k < nc; k++) {
1192:         cnt = 0;
1193:         for (l = lstart; l < lend + 1; l++) {
1194:           for (p = pstart; p < pend + 1; p++) {
1195:             if (l || p) {
1196:               if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1197:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1198:               }
1199:             } else {
1200:               if (dfill) {
1201:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1202:               } else {
1203:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1204:               }
1205:             }
1206:           }
1207:         }
1208:         row    = k + nc * (slot);
1209:         maxcnt = PetscMax(maxcnt, cnt);
1210:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1211:         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1212:       }
1213:     }
1214:   }
1215:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1216:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1217:   MatPreallocateEnd(dnz, onz);
1218:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1220:   /*
1221:     For each node in the grid: we get the neighbors in the local (on processor ordering
1222:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1223:     PETSc ordering.
1224:   */
1225:   if (!da->prealloc_only) {
1226:     for (i = xs; i < xs + nx; i++) {
1227:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1228:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1230:       for (j = ys; j < ys + ny; j++) {
1231:         slot = i - gxs + gnx * (j - gys);

1233:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1234:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1236:         for (k = 0; k < nc; k++) {
1237:           cnt = 0;
1238:           for (l = lstart; l < lend + 1; l++) {
1239:             for (p = pstart; p < pend + 1; p++) {
1240:               if (l || p) {
1241:                 if (st == DMDA_STENCIL_BOX || !l || !p) { /* entries on star */
1242:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1243:                 }
1244:               } else {
1245:                 if (dfill) {
1246:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1247:                 } else {
1248:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1249:                 }
1250:               }
1251:             }
1252:           }
1253:           row = k + nc * (slot);
1254:           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1255:         }
1256:       }
1257:     }
1258:     /* do not copy values to GPU since they are all zero and not yet needed there */
1259:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1260:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1261:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1262:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1263:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1264:   }
1265:   PetscCall(PetscFree(cols));
1266:   PetscFunctionReturn(PETSC_SUCCESS);
1267: }

1269: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1270: {
1271:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1272:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1273:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1274:   MPI_Comm               comm;
1275:   DMBoundaryType         bx, by, bz;
1276:   ISLocalToGlobalMapping ltog, mltog;
1277:   DMDAStencilType        st;
1278:   PetscBool              removedups = PETSC_FALSE;

1280:   PetscFunctionBegin;
1281:   /*
1282:          nc - number of components per grid point
1283:          col - number of colors needed in one direction for single component problem
1284:   */
1285:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1286:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1287:   col = 2 * s + 1;

1289:   /*
1290:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1291:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1292:   */
1293:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1294:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1295:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1297:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1298:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1299:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1301:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1302:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1304:   PetscCall(MatSetBlockSize(J, nc));
1305:   /* determine the matrix preallocation information */
1306:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1307:   for (i = xs; i < xs + nx; i++) {
1308:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1309:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1310:     for (j = ys; j < ys + ny; j++) {
1311:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1312:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1313:       for (k = zs; k < zs + nz; k++) {
1314:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1315:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1317:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1319:         cnt = 0;
1320:         for (l = 0; l < nc; l++) {
1321:           for (ii = istart; ii < iend + 1; ii++) {
1322:             for (jj = jstart; jj < jend + 1; jj++) {
1323:               for (kk = kstart; kk < kend + 1; kk++) {
1324:                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1325:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1326:                 }
1327:               }
1328:             }
1329:           }
1330:           rows[l] = l + nc * (slot);
1331:         }
1332:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1333:         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1334:       }
1335:     }
1336:   }
1337:   PetscCall(MatSetBlockSize(J, nc));
1338:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1339:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1340:   MatPreallocateEnd(dnz, onz);
1341:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1342:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1344:   /*
1345:     For each node in the grid: we get the neighbors in the local (on processor ordering
1346:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1347:     PETSc ordering.
1348:   */
1349:   if (!da->prealloc_only) {
1350:     for (i = xs; i < xs + nx; i++) {
1351:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1352:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1353:       for (j = ys; j < ys + ny; j++) {
1354:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1355:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1356:         for (k = zs; k < zs + nz; k++) {
1357:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1358:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1360:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1362:           cnt = 0;
1363:           for (kk = kstart; kk < kend + 1; kk++) {
1364:             for (jj = jstart; jj < jend + 1; jj++) {
1365:               for (ii = istart; ii < iend + 1; ii++) {
1366:                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1367:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1368:                   for (l = 1; l < nc; l++) {
1369:                     cols[cnt] = 1 + cols[cnt - 1];
1370:                     cnt++;
1371:                   }
1372:                 }
1373:               }
1374:             }
1375:           }
1376:           rows[0] = nc * (slot);
1377:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1378:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1379:         }
1380:       }
1381:     }
1382:     /* do not copy values to GPU since they are all zero and not yet needed there */
1383:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1384:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1385:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1386:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1387:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1388:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1389:   }
1390:   PetscCall(PetscFree2(rows, cols));
1391:   PetscFunctionReturn(PETSC_SUCCESS);
1392: }

1394: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1395: {
1396:   DM_DA                 *dd = (DM_DA *)da->data;
1397:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1398:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1399:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1400:   DMBoundaryType         bx;
1401:   ISLocalToGlobalMapping ltog;
1402:   PetscMPIInt            rank, size;

1404:   PetscFunctionBegin;
1405:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1406:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));

1408:   /*
1409:          nc - number of components per grid point
1410:   */
1411:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1412:   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1413:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1414:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1416:   PetscCall(MatSetBlockSize(J, nc));
1417:   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));

1419:   /*
1420:         note should be smaller for first and last process with no periodic
1421:         does not handle dfill
1422:   */
1423:   cnt = 0;
1424:   /* coupling with process to the left */
1425:   for (i = 0; i < s; i++) {
1426:     for (j = 0; j < nc; j++) {
1427:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1428:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1429:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1430:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1431:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1432:       }
1433:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1434:       cnt++;
1435:     }
1436:   }
1437:   for (i = s; i < nx - s; i++) {
1438:     for (j = 0; j < nc; j++) {
1439:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1440:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1441:       cnt++;
1442:     }
1443:   }
1444:   /* coupling with process to the right */
1445:   for (i = nx - s; i < nx; i++) {
1446:     for (j = 0; j < nc; j++) {
1447:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1448:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1449:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1450:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1451:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1452:       }
1453:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1454:       cnt++;
1455:     }
1456:   }

1458:   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1459:   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1460:   PetscCall(PetscFree2(cols, ocols));

1462:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1463:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1465:   /*
1466:     For each node in the grid: we get the neighbors in the local (on processor ordering
1467:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1468:     PETSc ordering.
1469:   */
1470:   if (!da->prealloc_only) {
1471:     PetscCall(PetscMalloc1(maxcnt, &cols));
1472:     row = xs * nc;
1473:     /* coupling with process to the left */
1474:     for (i = xs; i < xs + s; i++) {
1475:       for (j = 0; j < nc; j++) {
1476:         cnt = 0;
1477:         if (rank) {
1478:           for (l = 0; l < s; l++) {
1479:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1480:           }
1481:         }
1482:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1483:           for (l = 0; l < s; l++) {
1484:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1485:           }
1486:         }
1487:         if (dfill) {
1488:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1489:         } else {
1490:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1491:         }
1492:         for (l = 0; l < s; l++) {
1493:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1494:         }
1495:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1496:         row++;
1497:       }
1498:     }
1499:     for (i = xs + s; i < xs + nx - s; i++) {
1500:       for (j = 0; j < nc; j++) {
1501:         cnt = 0;
1502:         for (l = 0; l < s; l++) {
1503:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1504:         }
1505:         if (dfill) {
1506:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1507:         } else {
1508:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1509:         }
1510:         for (l = 0; l < s; l++) {
1511:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1512:         }
1513:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1514:         row++;
1515:       }
1516:     }
1517:     /* coupling with process to the right */
1518:     for (i = xs + nx - s; i < xs + nx; i++) {
1519:       for (j = 0; j < nc; j++) {
1520:         cnt = 0;
1521:         for (l = 0; l < s; l++) {
1522:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1523:         }
1524:         if (dfill) {
1525:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1526:         } else {
1527:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1528:         }
1529:         if (rank < size - 1) {
1530:           for (l = 0; l < s; l++) {
1531:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1532:           }
1533:         }
1534:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1535:           for (l = 0; l < s; l++) {
1536:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1537:           }
1538:         }
1539:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1540:         row++;
1541:       }
1542:     }
1543:     PetscCall(PetscFree(cols));
1544:     /* do not copy values to GPU since they are all zero and not yet needed there */
1545:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1546:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1547:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1548:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1549:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1550:   }
1551:   PetscFunctionReturn(PETSC_SUCCESS);
1552: }

1554: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1555: {
1556:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1557:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1558:   PetscInt               istart, iend;
1559:   DMBoundaryType         bx;
1560:   ISLocalToGlobalMapping ltog, mltog;

1562:   PetscFunctionBegin;
1563:   /*
1564:          nc - number of components per grid point
1565:          col - number of colors needed in one direction for single component problem
1566:   */
1567:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1568:   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1569:   col = 2 * s + 1;

1571:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1572:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1574:   PetscCall(MatSetBlockSize(J, nc));
1575:   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1576:   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));

1578:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1579:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1580:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1582:   /*
1583:     For each node in the grid: we get the neighbors in the local (on processor ordering
1584:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1585:     PETSc ordering.
1586:   */
1587:   if (!da->prealloc_only) {
1588:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1589:     for (i = xs; i < xs + nx; i++) {
1590:       istart = PetscMax(-s, gxs - i);
1591:       iend   = PetscMin(s, gxs + gnx - i - 1);
1592:       slot   = i - gxs;

1594:       cnt = 0;
1595:       for (i1 = istart; i1 < iend + 1; i1++) {
1596:         cols[cnt++] = nc * (slot + i1);
1597:         for (l = 1; l < nc; l++) {
1598:           cols[cnt] = 1 + cols[cnt - 1];
1599:           cnt++;
1600:         }
1601:       }
1602:       rows[0] = nc * (slot);
1603:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1604:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1605:     }
1606:     /* do not copy values to GPU since they are all zero and not yet needed there */
1607:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1608:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1609:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1610:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1611:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1612:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1613:     PetscCall(PetscFree2(rows, cols));
1614:   }
1615:   PetscFunctionReturn(PETSC_SUCCESS);
1616: }

1618: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1619: {
1620:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1621:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1622:   PetscInt               istart, iend;
1623:   DMBoundaryType         bx;
1624:   ISLocalToGlobalMapping ltog, mltog;

1626:   PetscFunctionBegin;
1627:   /*
1628:          nc - number of components per grid point
1629:          col - number of colors needed in one direction for single component problem
1630:   */
1631:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1632:   col = 2 * s + 1;

1634:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1635:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1637:   PetscCall(MatSetBlockSize(J, nc));
1638:   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));

1640:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1641:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1642:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1644:   /*
1645:     For each node in the grid: we get the neighbors in the local (on processor ordering
1646:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1647:     PETSc ordering.
1648:   */
1649:   if (!da->prealloc_only) {
1650:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1651:     for (i = xs; i < xs + nx; i++) {
1652:       istart = PetscMax(-s, gxs - i);
1653:       iend   = PetscMin(s, gxs + gnx - i - 1);
1654:       slot   = i - gxs;

1656:       cnt = 0;
1657:       for (i1 = istart; i1 < iend + 1; i1++) {
1658:         cols[cnt++] = nc * (slot + i1);
1659:         for (l = 1; l < nc; l++) {
1660:           cols[cnt] = 1 + cols[cnt - 1];
1661:           cnt++;
1662:         }
1663:       }
1664:       rows[0] = nc * (slot);
1665:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1666:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1667:     }
1668:     /* do not copy values to GPU since they are all zero and not yet needed there */
1669:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1670:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1671:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1672:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1673:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1674:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1675:     PetscCall(PetscFree2(rows, cols));
1676:   }
1677:   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1678:   PetscFunctionReturn(PETSC_SUCCESS);
1679: }

1681: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1682: {
1683:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1684:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1685:   PetscInt               istart, iend, jstart, jend, ii, jj;
1686:   MPI_Comm               comm;
1687:   PetscScalar           *values;
1688:   DMBoundaryType         bx, by;
1689:   DMDAStencilType        st;
1690:   ISLocalToGlobalMapping ltog;

1692:   PetscFunctionBegin;
1693:   /*
1694:      nc - number of components per grid point
1695:      col - number of colors needed in one direction for single component problem
1696:   */
1697:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1698:   col = 2 * s + 1;

1700:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1701:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1702:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1704:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1706:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1708:   /* determine the matrix preallocation information */
1709:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1710:   for (i = xs; i < xs + nx; i++) {
1711:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1712:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1713:     for (j = ys; j < ys + ny; j++) {
1714:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1715:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1716:       slot   = i - gxs + gnx * (j - gys);

1718:       /* Find block columns in block row */
1719:       cnt = 0;
1720:       for (ii = istart; ii < iend + 1; ii++) {
1721:         for (jj = jstart; jj < jend + 1; jj++) {
1722:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1723:             cols[cnt++] = slot + ii + gnx * jj;
1724:           }
1725:         }
1726:       }
1727:       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1728:     }
1729:   }
1730:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1731:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1732:   MatPreallocateEnd(dnz, onz);

1734:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1736:   /*
1737:     For each node in the grid: we get the neighbors in the local (on processor ordering
1738:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1739:     PETSc ordering.
1740:   */
1741:   if (!da->prealloc_only) {
1742:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1743:     for (i = xs; i < xs + nx; i++) {
1744:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1745:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1746:       for (j = ys; j < ys + ny; j++) {
1747:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1748:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1749:         slot   = i - gxs + gnx * (j - gys);
1750:         cnt    = 0;
1751:         for (ii = istart; ii < iend + 1; ii++) {
1752:           for (jj = jstart; jj < jend + 1; jj++) {
1753:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1754:               cols[cnt++] = slot + ii + gnx * jj;
1755:             }
1756:           }
1757:         }
1758:         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1759:       }
1760:     }
1761:     PetscCall(PetscFree(values));
1762:     /* do not copy values to GPU since they are all zero and not yet needed there */
1763:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1764:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1765:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1766:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1767:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1768:   }
1769:   PetscCall(PetscFree(cols));
1770:   PetscFunctionReturn(PETSC_SUCCESS);
1771: }

1773: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1774: {
1775:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1776:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1777:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1778:   MPI_Comm               comm;
1779:   PetscScalar           *values;
1780:   DMBoundaryType         bx, by, bz;
1781:   DMDAStencilType        st;
1782:   ISLocalToGlobalMapping ltog;

1784:   PetscFunctionBegin;
1785:   /*
1786:          nc - number of components per grid point
1787:          col - number of colors needed in one direction for single component problem
1788:   */
1789:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1790:   col = 2 * s + 1;

1792:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1793:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1794:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1796:   PetscCall(PetscMalloc1(col * col * col, &cols));

1798:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1800:   /* determine the matrix preallocation information */
1801:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1802:   for (i = xs; i < xs + nx; i++) {
1803:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1804:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1805:     for (j = ys; j < ys + ny; j++) {
1806:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1807:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1808:       for (k = zs; k < zs + nz; k++) {
1809:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1810:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1812:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1814:         /* Find block columns in block row */
1815:         cnt = 0;
1816:         for (ii = istart; ii < iend + 1; ii++) {
1817:           for (jj = jstart; jj < jend + 1; jj++) {
1818:             for (kk = kstart; kk < kend + 1; kk++) {
1819:               if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1820:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1821:               }
1822:             }
1823:           }
1824:         }
1825:         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1826:       }
1827:     }
1828:   }
1829:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1830:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1831:   MatPreallocateEnd(dnz, onz);

1833:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1835:   /*
1836:     For each node in the grid: we get the neighbors in the local (on processor ordering
1837:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1838:     PETSc ordering.
1839:   */
1840:   if (!da->prealloc_only) {
1841:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1842:     for (i = xs; i < xs + nx; i++) {
1843:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1844:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1845:       for (j = ys; j < ys + ny; j++) {
1846:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1847:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1848:         for (k = zs; k < zs + nz; k++) {
1849:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1850:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1852:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1854:           cnt = 0;
1855:           for (ii = istart; ii < iend + 1; ii++) {
1856:             for (jj = jstart; jj < jend + 1; jj++) {
1857:               for (kk = kstart; kk < kend + 1; kk++) {
1858:                 if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1859:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1860:                 }
1861:               }
1862:             }
1863:           }
1864:           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1865:         }
1866:       }
1867:     }
1868:     PetscCall(PetscFree(values));
1869:     /* do not copy values to GPU since they are all zero and not yet needed there */
1870:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1871:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1872:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1873:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1874:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1875:   }
1876:   PetscCall(PetscFree(cols));
1877:   PetscFunctionReturn(PETSC_SUCCESS);
1878: }

1880: /*
1881:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1882:   identify in the local ordering with periodic domain.
1883: */
1884: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1885: {
1886:   PetscInt i, n;

1888:   PetscFunctionBegin;
1889:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1890:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1891:   for (i = 0, n = 0; i < *cnt; i++) {
1892:     if (col[i] >= *row) col[n++] = col[i];
1893:   }
1894:   *cnt = n;
1895:   PetscFunctionReturn(PETSC_SUCCESS);
1896: }

1898: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1899: {
1900:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1901:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1902:   PetscInt               istart, iend, jstart, jend, ii, jj;
1903:   MPI_Comm               comm;
1904:   PetscScalar           *values;
1905:   DMBoundaryType         bx, by;
1906:   DMDAStencilType        st;
1907:   ISLocalToGlobalMapping ltog;

1909:   PetscFunctionBegin;
1910:   /*
1911:      nc - number of components per grid point
1912:      col - number of colors needed in one direction for single component problem
1913:   */
1914:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1915:   col = 2 * s + 1;

1917:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1918:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1919:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1921:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1923:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1925:   /* determine the matrix preallocation information */
1926:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1927:   for (i = xs; i < xs + nx; i++) {
1928:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1929:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1930:     for (j = ys; j < ys + ny; j++) {
1931:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1932:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1933:       slot   = i - gxs + gnx * (j - gys);

1935:       /* Find block columns in block row */
1936:       cnt = 0;
1937:       for (ii = istart; ii < iend + 1; ii++) {
1938:         for (jj = jstart; jj < jend + 1; jj++) {
1939:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1940:         }
1941:       }
1942:       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1943:       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1944:     }
1945:   }
1946:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1947:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1948:   MatPreallocateEnd(dnz, onz);

1950:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1952:   /*
1953:     For each node in the grid: we get the neighbors in the local (on processor ordering
1954:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1955:     PETSc ordering.
1956:   */
1957:   if (!da->prealloc_only) {
1958:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1959:     for (i = xs; i < xs + nx; i++) {
1960:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1961:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1962:       for (j = ys; j < ys + ny; j++) {
1963:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1964:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1965:         slot   = i - gxs + gnx * (j - gys);

1967:         /* Find block columns in block row */
1968:         cnt = 0;
1969:         for (ii = istart; ii < iend + 1; ii++) {
1970:           for (jj = jstart; jj < jend + 1; jj++) {
1971:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1972:           }
1973:         }
1974:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1975:         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1976:       }
1977:     }
1978:     PetscCall(PetscFree(values));
1979:     /* do not copy values to GPU since they are all zero and not yet needed there */
1980:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1981:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1982:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1983:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1984:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1985:   }
1986:   PetscCall(PetscFree(cols));
1987:   PetscFunctionReturn(PETSC_SUCCESS);
1988: }

1990: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1991: {
1992:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1993:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1994:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1995:   MPI_Comm               comm;
1996:   PetscScalar           *values;
1997:   DMBoundaryType         bx, by, bz;
1998:   DMDAStencilType        st;
1999:   ISLocalToGlobalMapping ltog;

2001:   PetscFunctionBegin;
2002:   /*
2003:      nc - number of components per grid point
2004:      col - number of colors needed in one direction for single component problem
2005:   */
2006:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2007:   col = 2 * s + 1;

2009:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2010:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2011:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2013:   /* create the matrix */
2014:   PetscCall(PetscMalloc1(col * col * col, &cols));

2016:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2018:   /* determine the matrix preallocation information */
2019:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2020:   for (i = xs; i < xs + nx; i++) {
2021:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2022:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2023:     for (j = ys; j < ys + ny; j++) {
2024:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2025:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2026:       for (k = zs; k < zs + nz; k++) {
2027:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2028:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2030:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2032:         /* Find block columns in block row */
2033:         cnt = 0;
2034:         for (ii = istart; ii < iend + 1; ii++) {
2035:           for (jj = jstart; jj < jend + 1; jj++) {
2036:             for (kk = kstart; kk < kend + 1; kk++) {
2037:               if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2038:             }
2039:           }
2040:         }
2041:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2042:         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2043:       }
2044:     }
2045:   }
2046:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2047:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2048:   MatPreallocateEnd(dnz, onz);

2050:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2052:   /*
2053:     For each node in the grid: we get the neighbors in the local (on processor ordering
2054:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2055:     PETSc ordering.
2056:   */
2057:   if (!da->prealloc_only) {
2058:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2059:     for (i = xs; i < xs + nx; i++) {
2060:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2061:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2062:       for (j = ys; j < ys + ny; j++) {
2063:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2064:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2065:         for (k = zs; k < zs + nz; k++) {
2066:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2067:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2069:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2071:           cnt = 0;
2072:           for (ii = istart; ii < iend + 1; ii++) {
2073:             for (jj = jstart; jj < jend + 1; jj++) {
2074:               for (kk = kstart; kk < kend + 1; kk++) {
2075:                 if (st == DMDA_STENCIL_BOX || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2076:               }
2077:             }
2078:           }
2079:           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2080:           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2081:         }
2082:       }
2083:     }
2084:     PetscCall(PetscFree(values));
2085:     /* do not copy values to GPU since they are all zero and not yet needed there */
2086:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2087:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2088:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2089:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2090:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2091:   }
2092:   PetscCall(PetscFree(cols));
2093:   PetscFunctionReturn(PETSC_SUCCESS);
2094: }

2096: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2097: {
2098:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2099:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2100:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2101:   DM_DA                 *dd = (DM_DA *)da->data;
2102:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2103:   MPI_Comm               comm;
2104:   PetscScalar           *values;
2105:   DMBoundaryType         bx, by, bz;
2106:   ISLocalToGlobalMapping ltog;
2107:   DMDAStencilType        st;
2108:   PetscBool              removedups = PETSC_FALSE;

2110:   PetscFunctionBegin;
2111:   /*
2112:          nc - number of components per grid point
2113:          col - number of colors needed in one direction for single component problem
2114:   */
2115:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2116:   col = 2 * s + 1;

2118:   /*
2119:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2120:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2121:   */
2122:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2123:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2124:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2126:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2127:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2128:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2130:   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2131:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2133:   /* determine the matrix preallocation information */
2134:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2136:   PetscCall(MatSetBlockSize(J, nc));
2137:   for (i = xs; i < xs + nx; i++) {
2138:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2139:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2140:     for (j = ys; j < ys + ny; j++) {
2141:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2142:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2143:       for (k = zs; k < zs + nz; k++) {
2144:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2145:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2147:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2149:         for (l = 0; l < nc; l++) {
2150:           cnt = 0;
2151:           for (ii = istart; ii < iend + 1; ii++) {
2152:             for (jj = jstart; jj < jend + 1; jj++) {
2153:               for (kk = kstart; kk < kend + 1; kk++) {
2154:                 if (ii || jj || kk) {
2155:                   if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2156:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2157:                   }
2158:                 } else {
2159:                   if (dfill) {
2160:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2161:                   } else {
2162:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163:                   }
2164:                 }
2165:               }
2166:             }
2167:           }
2168:           row    = l + nc * (slot);
2169:           maxcnt = PetscMax(maxcnt, cnt);
2170:           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2171:           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2172:         }
2173:       }
2174:     }
2175:   }
2176:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2177:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2178:   MatPreallocateEnd(dnz, onz);
2179:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2181:   /*
2182:     For each node in the grid: we get the neighbors in the local (on processor ordering
2183:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2184:     PETSc ordering.
2185:   */
2186:   if (!da->prealloc_only) {
2187:     PetscCall(PetscCalloc1(maxcnt, &values));
2188:     for (i = xs; i < xs + nx; i++) {
2189:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2190:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2191:       for (j = ys; j < ys + ny; j++) {
2192:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2193:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2194:         for (k = zs; k < zs + nz; k++) {
2195:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2196:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2198:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2200:           for (l = 0; l < nc; l++) {
2201:             cnt = 0;
2202:             for (ii = istart; ii < iend + 1; ii++) {
2203:               for (jj = jstart; jj < jend + 1; jj++) {
2204:                 for (kk = kstart; kk < kend + 1; kk++) {
2205:                   if (ii || jj || kk) {
2206:                     if (st == DMDA_STENCIL_BOX || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2207:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2208:                     }
2209:                   } else {
2210:                     if (dfill) {
2211:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2212:                     } else {
2213:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214:                     }
2215:                   }
2216:                 }
2217:               }
2218:             }
2219:             row = l + nc * (slot);
2220:             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2221:           }
2222:         }
2223:       }
2224:     }
2225:     PetscCall(PetscFree(values));
2226:     /* do not copy values to GPU since they are all zero and not yet needed there */
2227:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2228:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2229:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2230:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2231:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2232:   }
2233:   PetscCall(PetscFree(cols));
2234:   PetscFunctionReturn(PETSC_SUCCESS);
2235: }