Actual source code: fdda.c

  1: #include <petsc/private/dmdaimpl.h>
  2: #include <petscmat.h>
  3: #include <petscbt.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 17: {
 18:   PetscInt i, j, nz, *fill;

 20:   PetscFunctionBegin;
 21:   if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscCall(PetscMalloc1(nz + w + 1, &fill));
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   PetscFunctionBegin;
 55:   if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);

 57:   /* Determine number of non-zeros */
 58:   nz = (dfillsparse[w] - w - 1);

 60:   /* Allocate space for our copy of the given sparse matrix representation. */
 61:   PetscCall(PetscMalloc1(nz + w + 1, rfill));
 62:   PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
 63:   PetscFunctionReturn(PETSC_SUCCESS);
 64: }

 66: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 67: {
 68:   PetscInt i, k, cnt = 1;

 70:   PetscFunctionBegin;
 71:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 72:    columns to the left with any nonzeros in them plus 1 */
 73:   PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
 74:   for (i = 0; i < dd->w; i++) {
 75:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 76:   }
 77:   for (i = 0; i < dd->w; i++) {
 78:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 79:   }
 80:   PetscFunctionReturn(PETSC_SUCCESS);
 81: }

 83: /*@
 84:   DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 85:   of the matrix returned by `DMCreateMatrix()`.

 87:   Logically Collective

 89:   Input Parameters:
 90: + da    - the `DMDA`
 91: . dfill - the fill pattern in the diagonal block (may be `NULL`, means use dense block)
 92: - ofill - the fill pattern in the off-diagonal blocks

 94:   Level: developer

 96:   Notes:
 97:   This only makes sense when you are doing multicomponent problems but using the
 98:   `MATMPIAIJ` matrix format

100:   The format for `dfill` and `ofill` is a 2 dimensional dof by dof matrix with 1 entries
101:   representing coupling and 0 entries for missing coupling. For example
102: .vb
103:             dfill[9] = {1, 0, 0,
104:                         1, 1, 0,
105:                         0, 1, 1}
106: .ve
107:   means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
108:   itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
109:   diagonal block).

111:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
112:   can be represented in the `dfill`, `ofill` format

114:   Contributed by\:
115:   Glenn Hammond

117: .seealso: [](sec_struct), `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`, `DMDASetBlockFillsSparse()`
118: @*/
119: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
120: {
121:   DM_DA *dd = (DM_DA *)da->data;

123:   PetscFunctionBegin;
124:   /* save the given dfill and ofill information */
125:   PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
126:   PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));

128:   /* count nonzeros in ofill columns */
129:   PetscCall(DMDASetBlockFills_Private2(dd));
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: /*@
134:   DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
135:   of the matrix returned by `DMCreateMatrix()`, using sparse representations
136:   of fill patterns.

138:   Logically Collective

140:   Input Parameters:
141: + da          - the `DMDA`
142: . dfillsparse - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
143: - ofillsparse - the sparse fill pattern in the off-diagonal blocks

145:   Level: developer

147:   Notes:
148:   This only makes sense when you are doing multicomponent problems but using the
149:   `MATMPIAIJ` matrix format

151:   The format for `dfill` and `ofill` is a sparse representation of a
152:   dof-by-dof matrix with 1 entries representing coupling and 0 entries
153:   for missing coupling.  The sparse representation is a 1 dimensional
154:   array of length nz + dof + 1, where nz is the number of non-zeros in
155:   the matrix.  The first dof entries in the array give the
156:   starting array indices of each row's items in the rest of the array,
157:   the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
158:   and the remaining nz items give the column indices of each of
159:   the 1s within the logical 2D matrix.  Each row's items within
160:   the array are the column indices of the 1s within that row
161:   of the 2D matrix.  PETSc developers may recognize that this is the
162:   same format as that computed by the `DMDASetBlockFills_Private()`
163:   function from a dense 2D matrix representation.

165:   `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
166:   can be represented in the `dfill`, `ofill` format

168:   Contributed by\:
169:   Philip C. Roth

171: .seealso: [](sec_struct), `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175:   DM_DA *dd = (DM_DA *)da->data;

177:   PetscFunctionBegin;
178:   /* save the given dfill and ofill information */
179:   PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180:   PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));

182:   /* count nonzeros in ofill columns */
183:   PetscCall(DMDASetBlockFills_Private2(dd));
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189:   PetscInt       dim, m, n, p, nc;
190:   DMBoundaryType bx, by, bz;
191:   MPI_Comm       comm;
192:   PetscMPIInt    size;
193:   PetscBool      isBAIJ;
194:   DM_DA         *dd = (DM_DA *)da->data;

196:   PetscFunctionBegin;
197:   /*
198:                                   m
199:           ------------------------------------------------------
200:          |                                                     |
201:          |                                                     |
202:          |               ----------------------                |
203:          |               |                    |                |
204:       n  |           yn  |                    |                |
205:          |               |                    |                |
206:          |               .---------------------                |
207:          |             (xs,ys)     xn                          |
208:          |            .                                        |
209:          |         (gxs,gys)                                   |
210:          |                                                     |
211:           -----------------------------------------------------
212:   */

214:   /*
215:          nc - number of components per grid point
216:          col - number of colors needed in one direction for single component problem

218:   */
219:   PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));

221:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222:   PetscCallMPI(MPI_Comm_size(comm, &size));
223:   if (ctype == IS_COLORING_LOCAL) {
224:     PetscCheck(!((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both sides of the domain on the same process");
225:   }

227:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228:      matrices is for the blocks, not the individual matrix elements  */
229:   PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
230:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
231:   if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
232:   if (isBAIJ) {
233:     dd->w  = 1;
234:     dd->xs = dd->xs / nc;
235:     dd->xe = dd->xe / nc;
236:     dd->Xs = dd->Xs / nc;
237:     dd->Xe = dd->Xe / nc;
238:   }

240:   /*
241:      We do not provide a getcoloring function in the DMDA operations because
242:    the basic DMDA does not know about matrices. We think of DMDA as being
243:    more low-level then matrices.
244:   */
245:   if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
246:   else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
247:   else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
248:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249:   if (isBAIJ) {
250:     dd->w  = nc;
251:     dd->xs = dd->xs * nc;
252:     dd->xe = dd->xe * nc;
253:     dd->Xs = dd->Xs * nc;
254:     dd->Xe = dd->Xe * nc;
255:   }
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
260: {
261:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
262:   PetscInt         ncolors = 0;
263:   MPI_Comm         comm;
264:   DMBoundaryType   bx, by;
265:   DMDAStencilType  st;
266:   ISColoringValue *colors;
267:   DM_DA           *dd = (DM_DA *)da->data;

269:   PetscFunctionBegin;
270:   /*
271:          nc - number of components per grid point
272:          col - number of colors needed in one direction for single component problem

274:   */
275:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
276:   col = 2 * s + 1;
277:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
278:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
279:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

281:   /* special case as taught to us by Paul Hovland */
282:   if (st == DMDA_STENCIL_STAR && s == 1) {
283:     PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
284:   } else {
285:     if (ctype == IS_COLORING_GLOBAL) {
286:       if (!dd->localcoloring) {
287:         PetscCall(PetscMalloc1(nc * nx * ny, &colors));
288:         ii = 0;
289:         for (j = ys; j < ys + ny; j++) {
290:           for (i = xs; i < xs + nx; i++) {
291:             for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((i % col) + col * (j % col)), colors + ii++));
292:           }
293:         }
294:         ncolors = nc + nc * (col - 1 + col * (col - 1));
295:         PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
296:       }
297:       *coloring = dd->localcoloring;
298:     } else if (ctype == IS_COLORING_LOCAL) {
299:       if (!dd->ghostedcoloring) {
300:         PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
301:         ii = 0;
302:         for (j = gys; j < gys + gny; j++) {
303:           for (i = gxs; i < gxs + gnx; i++) {
304:             for (k = 0; k < nc; k++) {
305:               /* the complicated stuff is to handle periodic boundaries */
306:               PetscCall(ISColoringValueCast(k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col)), colors + ii++));
307:             }
308:           }
309:         }
310:         ncolors = nc + nc * (col - 1 + col * (col - 1));
311:         PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
312:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

314:         PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
315:       }
316:       *coloring = dd->ghostedcoloring;
317:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
318:   }
319:   PetscCall(ISColoringReference(*coloring));
320:   PetscFunctionReturn(PETSC_SUCCESS);
321: }

323: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
324: {
325:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
326:   PetscInt         ncolors;
327:   MPI_Comm         comm;
328:   DMBoundaryType   bx, by, bz;
329:   DMDAStencilType  st;
330:   ISColoringValue *colors;
331:   DM_DA           *dd = (DM_DA *)da->data;

333:   PetscFunctionBegin;
334:   /*
335:          nc - number of components per grid point
336:          col - number of colors needed in one direction for single component problem
337:   */
338:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
339:   col = 2 * s + 1;
340:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible by 2*stencil_width + 1");
341:   PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible by 2*stencil_width + 1");
342:   PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible by 2*stencil_width + 1");

344:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
345:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
346:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

348:   /* create the coloring */
349:   if (ctype == IS_COLORING_GLOBAL) {
350:     if (!dd->localcoloring) {
351:       PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
352:       ii = 0;
353:       for (k = zs; k < zs + nz; k++) {
354:         for (j = ys; j < ys + ny; j++) {
355:           for (i = xs; i < xs + nx; i++) {
356:             for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * ((i % col) + col * (j % col) + col * col * (k % col)), colors + ii++));
357:           }
358:         }
359:       }
360:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
361:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
362:     }
363:     *coloring = dd->localcoloring;
364:   } else if (ctype == IS_COLORING_LOCAL) {
365:     if (!dd->ghostedcoloring) {
366:       PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
367:       ii = 0;
368:       for (k = gzs; k < gzs + gnz; k++) {
369:         for (j = gys; j < gys + gny; j++) {
370:           for (i = gxs; i < gxs + gnx; i++) {
371:             for (l = 0; l < nc; l++) {
372:               /* the complicated stuff is to handle periodic boundaries */
373:               PetscCall(ISColoringValueCast(l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col)), colors + ii++));
374:             }
375:           }
376:         }
377:       }
378:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
379:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
380:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
381:     }
382:     *coloring = dd->ghostedcoloring;
383:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
384:   PetscCall(ISColoringReference(*coloring));
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
389: {
390:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
391:   PetscInt         ncolors;
392:   MPI_Comm         comm;
393:   DMBoundaryType   bx;
394:   ISColoringValue *colors;
395:   DM_DA           *dd = (DM_DA *)da->data;

397:   PetscFunctionBegin;
398:   /*
399:          nc - number of components per grid point
400:          col - number of colors needed in one direction for single component problem
401:   */
402:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
403:   col = 2 * s + 1;
404:   PetscCheck(bx != DM_BOUNDARY_PERIODIC || !(m % col), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_GLOBAL can only be used for periodic boundary conditions if the number of grid points %" PetscInt_FMT " is divisible by the number of colors %" PetscInt_FMT, m, col);
405:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
406:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
407:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

409:   /* create the coloring */
410:   if (ctype == IS_COLORING_GLOBAL) {
411:     if (!dd->localcoloring) {
412:       PetscCall(PetscMalloc1(nc * nx, &colors));
413:       if (dd->ofillcols) {
414:         PetscInt tc = 0;
415:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416:         i1 = 0;
417:         for (i = xs; i < xs + nx; i++) {
418:           for (l = 0; l < nc; l++) {
419:             if (dd->ofillcols[l] && (i % col)) {
420:               PetscCall(ISColoringValueCast(nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l], colors + i1++));
421:             } else {
422:               PetscCall(ISColoringValueCast(l, colors + i1++));
423:             }
424:           }
425:         }
426:         ncolors = nc + 2 * s * tc;
427:       } else {
428:         i1 = 0;
429:         for (i = xs; i < xs + nx; i++) {
430:           for (l = 0; l < nc; l++) PetscCall(ISColoringValueCast(l + nc * (i % col), colors + i1++));
431:         }
432:         ncolors = nc + nc * (col - 1);
433:       }
434:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
435:     }
436:     *coloring = dd->localcoloring;
437:   } else if (ctype == IS_COLORING_LOCAL) {
438:     if (!dd->ghostedcoloring) {
439:       PetscCall(PetscMalloc1(nc * gnx, &colors));
440:       i1 = 0;
441:       for (i = gxs; i < gxs + gnx; i++) {
442:         for (l = 0; l < nc; l++) {
443:           /* the complicated stuff is to handle periodic boundaries */
444:           PetscCall(ISColoringValueCast(l + nc * (SetInRange(i, m) % col), colors + i1++));
445:         }
446:       }
447:       ncolors = nc + nc * (col - 1);
448:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
449:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
450:     }
451:     *coloring = dd->ghostedcoloring;
452:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453:   PetscCall(ISColoringReference(*coloring));
454:   PetscFunctionReturn(PETSC_SUCCESS);
455: }

457: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458: {
459:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460:   PetscInt         ncolors;
461:   MPI_Comm         comm;
462:   DMBoundaryType   bx, by;
463:   ISColoringValue *colors;
464:   DM_DA           *dd = (DM_DA *)da->data;

466:   PetscFunctionBegin;
467:   /*
468:          nc - number of components per grid point
469:          col - number of colors needed in one direction for single component problem
470:   */
471:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
472:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
473:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
474:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
475:   /* create the coloring */
476:   if (ctype == IS_COLORING_GLOBAL) {
477:     if (!dd->localcoloring) {
478:       PetscCall(PetscMalloc1(nc * nx * ny, &colors));
479:       ii = 0;
480:       for (j = ys; j < ys + ny; j++) {
481:         for (i = xs; i < xs + nx; i++) {
482:           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * j + i) % 5), colors + ii++));
483:         }
484:       }
485:       ncolors = 5 * nc;
486:       PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
487:     }
488:     *coloring = dd->localcoloring;
489:   } else if (ctype == IS_COLORING_LOCAL) {
490:     if (!dd->ghostedcoloring) {
491:       PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
492:       ii = 0;
493:       for (j = gys; j < gys + gny; j++) {
494:         for (i = gxs; i < gxs + gnx; i++) {
495:           for (k = 0; k < nc; k++) PetscCall(ISColoringValueCast(k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5), colors + ii++));
496:         }
497:       }
498:       ncolors = 5 * nc;
499:       PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
500:       PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
501:     }
502:     *coloring = dd->ghostedcoloring;
503:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504:   PetscFunctionReturn(PETSC_SUCCESS);
505: }

507: /* =========================================================================== */
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

523: static PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
524: {
525:   DM                da;
526:   const char       *prefix;
527:   Mat               AA, Anatural;
528:   AO                ao;
529:   PetscInt          rstart, rend, *petsc, i;
530:   IS                is;
531:   MPI_Comm          comm;
532:   PetscViewerFormat format;
533:   PetscBool         flag;

535:   PetscFunctionBegin;
536:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
537:   PetscCall(PetscViewerGetFormat(viewer, &format));
538:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);

540:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
541:   PetscCall(MatGetDM(A, &da));
542:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

544:   PetscCall(DMDAGetAO(da, &ao));
545:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
546:   PetscCall(PetscMalloc1(rend - rstart, &petsc));
547:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
548:   PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
549:   PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));

551:   /* call viewer on natural ordering */
552:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPISELL, &flag));
553:   if (flag) {
554:     PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &AA));
555:     A = AA;
556:   }
557:   PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
558:   PetscCall(ISDestroy(&is));
559:   PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
560:   PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
561:   PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
562:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
563:   PetscCall(MatView(Anatural, viewer));
564:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
565:   PetscCall(MatDestroy(&Anatural));
566:   if (flag) PetscCall(MatDestroy(&AA));
567:   PetscFunctionReturn(PETSC_SUCCESS);
568: }

570: static PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
571: {
572:   DM       da;
573:   Mat      Anatural, Aapp;
574:   AO       ao;
575:   PetscInt rstart, rend, *app, i, m, n, M, N;
576:   IS       is;
577:   MPI_Comm comm;

579:   PetscFunctionBegin;
580:   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
581:   PetscCall(MatGetDM(A, &da));
582:   PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");

584:   /* Load the matrix in natural ordering */
585:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
586:   PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
587:   PetscCall(MatGetSize(A, &M, &N));
588:   PetscCall(MatGetLocalSize(A, &m, &n));
589:   PetscCall(MatSetSizes(Anatural, m, n, M, N));
590:   PetscCall(MatLoad(Anatural, viewer));

592:   /* Map natural ordering to application ordering and create IS */
593:   PetscCall(DMDAGetAO(da, &ao));
594:   PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
595:   PetscCall(PetscMalloc1(rend - rstart, &app));
596:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
597:   PetscCall(AOPetscToApplication(ao, rend - rstart, app));
598:   PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));

600:   /* Do permutation and replace header */
601:   PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
602:   PetscCall(MatHeaderReplace(A, &Aapp));
603:   PetscCall(ISDestroy(&is));
604:   PetscCall(MatDestroy(&Anatural));
605:   PetscFunctionReturn(PETSC_SUCCESS);
606: }

608: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
609: {
610:   PetscInt    dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
611:   Mat         A;
612:   MPI_Comm    comm;
613:   MatType     Atype;
614:   MatType     mtype;
615:   PetscMPIInt size;
616:   DM_DA      *dd    = (DM_DA *)da->data;
617:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;

619:   PetscFunctionBegin;
620:   PetscCall(MatInitializePackage());
621:   mtype = da->mattype;

623:   /*
624:                                   m
625:           ------------------------------------------------------
626:          |                                                     |
627:          |                                                     |
628:          |               ----------------------                |
629:          |               |                    |                |
630:       n  |           ny  |                    |                |
631:          |               |                    |                |
632:          |               .---------------------                |
633:          |             (xs,ys)     nx                          |
634:          |            .                                        |
635:          |         (gxs,gys)                                   |
636:          |                                                     |
637:           -----------------------------------------------------
638:   */

640:   /*
641:          nc - number of components per grid point
642:          col - number of colors needed in one direction for single component problem
643:   */
644:   M   = dd->M;
645:   N   = dd->N;
646:   P   = dd->P;
647:   dim = da->dim;
648:   dof = dd->w;
649:   /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
650:   PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
651:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
652:   PetscCall(MatCreate(comm, &A));
653:   PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
654:   PetscCall(MatSetType(A, mtype));
655:   PetscCall(MatSetFromOptions(A));
656:   if (dof * nx * ny * nz < da->bind_below) {
657:     PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
658:     PetscCall(MatBindToCPU(A, PETSC_TRUE));
659:   }
660:   PetscCall(MatSetDM(A, da));
661:   if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
662:   PetscCall(MatGetType(A, &Atype));
663:   /*
664:      We do not provide a getmatrix function in the DMDA operations because
665:    the basic DMDA does not know about matrices. We think of DMDA as being more
666:    more low-level than matrices. This is kind of cheating but, cause sometimes
667:    we think of DMDA has higher level than matrices.

669:      We could switch based on Atype (or mtype), but we do not since the
670:    specialized setting routines depend only on the particular preallocation
671:    details of the matrix, not the type itself.
672:   */
673:   if (!da->prealloc_skip) { // Flag is likely set when user intends to use MatSetPreallocationCOO()
674:     PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
675:     if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
676:     if (!aij) {
677:       PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
678:       if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
679:       if (!baij) {
680:         PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
681:         if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
682:         if (!sbaij) {
683:           PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
684:           if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
685:         }
686:         if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
687:       }
688:     }
689:   }
690:   if (aij) {
691:     if (dim == 1) {
692:       if (dd->ofill) {
693:         PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
694:       } else {
695:         DMBoundaryType bx;
696:         PetscMPIInt    size;
697:         PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
698:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
699:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
700:           PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
701:         } else {
702:           PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
703:         }
704:       }
705:     } else if (dim == 2) {
706:       if (dd->ofill) {
707:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
708:       } else {
709:         PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
710:       }
711:     } else if (dim == 3) {
712:       if (dd->ofill) {
713:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
714:       } else {
715:         PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
716:       }
717:     }
718:   } else if (baij) {
719:     if (dim == 2) {
720:       PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
721:     } else if (dim == 3) {
722:       PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
723:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
724:   } else if (sbaij) {
725:     if (dim == 2) {
726:       PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
727:     } else if (dim == 3) {
728:       PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
729:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
730:   } else if (sell) {
731:     if (dim == 2) {
732:       PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
733:     } else if (dim == 3) {
734:       PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
735:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
736:   } else if (is) {
737:     PetscCall(DMCreateMatrix_DA_IS(da, A));
738:   } else { // unknown type or da->prealloc_skip so structural information may be needed, but no prealloc
739:     ISLocalToGlobalMapping ltog;

741:     PetscCall(MatSetBlockSize(A, dof));
742:     PetscCall(MatSetUp(A));
743:     PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
744:     PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
745:   }
746:   PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
747:   PetscCall(MatSetStencil(A, dim, dims, starts, dof));
748:   PetscCall(MatSetDM(A, da));
749:   PetscCallMPI(MPI_Comm_size(comm, &size));
750:   if (size > 1) {
751:     /* change viewer to display matrix in natural ordering */
752:     PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
753:     PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
754:   }
755:   *J = A;
756:   PetscFunctionReturn(PETSC_SUCCESS);
757: }

759: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

761: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
762: {
763:   DM_DA                 *da = (DM_DA *)dm->data;
764:   Mat                    lJ, P;
765:   ISLocalToGlobalMapping ltog;
766:   IS                     is;
767:   PetscBT                bt;
768:   const PetscInt        *e_loc, *idx;
769:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

771:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
772:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
773:   PetscFunctionBegin;
774:   dof = da->w;
775:   PetscCall(MatSetBlockSize(J, dof));
776:   PetscCall(DMGetLocalToGlobalMapping(dm, &ltog));

778:   /* flag local elements indices in local DMDA numbering */
779:   PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
780:   PetscCall(PetscBTCreate(nv / dof, &bt));
781:   PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
782:   for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));

784:   /* filter out (set to -1) the global indices not used by the local elements */
785:   PetscCall(PetscMalloc1(nv / dof, &gidx));
786:   PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
787:   PetscCall(PetscArraycpy(gidx, idx, nv / dof));
788:   PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
789:   for (i = 0; i < nv / dof; i++)
790:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
791:   PetscCall(PetscBTDestroy(&bt));
792:   PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
793:   PetscCall(ISLocalToGlobalMappingCreateIS(is, &ltog));
794:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
795:   PetscCall(ISLocalToGlobalMappingDestroy(&ltog));
796:   PetscCall(ISDestroy(&is));

798:   /* Preallocation */
799:   if (dm->prealloc_skip) {
800:     PetscCall(MatSetUp(J));
801:   } else {
802:     PetscCall(MatISGetLocalMat(J, &lJ));
803:     PetscCall(MatGetLocalToGlobalMapping(lJ, &ltog, NULL));
804:     PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
805:     PetscCall(MatSetType(P, MATPREALLOCATOR));
806:     PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
807:     PetscCall(MatGetSize(lJ, &N, NULL));
808:     PetscCall(MatGetLocalSize(lJ, &n, NULL));
809:     PetscCall(MatSetSizes(P, n, n, N, N));
810:     PetscCall(MatSetBlockSize(P, dof));
811:     PetscCall(MatSetUp(P));
812:     for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
813:     PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
814:     PetscCall(MatISRestoreLocalMat(J, &lJ));
815:     PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
816:     PetscCall(MatDestroy(&P));

818:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
819:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
820:   }
821:   PetscFunctionReturn(PETSC_SUCCESS);
822: }

824: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
825: {
826:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
827:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
828:   MPI_Comm               comm;
829:   PetscScalar           *values;
830:   DMBoundaryType         bx, by;
831:   ISLocalToGlobalMapping ltog;
832:   DMDAStencilType        st;

834:   PetscFunctionBegin;
835:   /*
836:          nc - number of components per grid point
837:          col - number of colors needed in one direction for single component problem
838:   */
839:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
840:   col = 2 * s + 1;
841:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
842:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
843:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

845:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
846:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

848:   PetscCall(MatSetBlockSize(J, nc));
849:   /* determine the matrix preallocation information */
850:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
851:   for (i = xs; i < xs + nx; i++) {
852:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
853:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

855:     for (j = ys; j < ys + ny; j++) {
856:       slot = i - gxs + gnx * (j - gys);

858:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
859:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

861:       cnt = 0;
862:       for (k = 0; k < nc; k++) {
863:         for (l = lstart; l < lend + 1; l++) {
864:           for (p = pstart; p < pend + 1; p++) {
865:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
866:               cols[cnt++] = k + nc * (slot + gnx * l + p);
867:             }
868:           }
869:         }
870:         rows[k] = k + nc * (slot);
871:       }
872:       PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
873:     }
874:   }
875:   PetscCall(MatSetBlockSize(J, nc));
876:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
877:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
878:   MatPreallocateEnd(dnz, onz);

880:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

882:   /*
883:     For each node in the grid: we get the neighbors in the local (on processor ordering
884:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
885:     PETSc ordering.
886:   */
887:   if (!da->prealloc_only) {
888:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
889:     for (i = xs; i < xs + nx; i++) {
890:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
891:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

893:       for (j = ys; j < ys + ny; j++) {
894:         slot = i - gxs + gnx * (j - gys);

896:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
897:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

899:         cnt = 0;
900:         for (k = 0; k < nc; k++) {
901:           for (l = lstart; l < lend + 1; l++) {
902:             for (p = pstart; p < pend + 1; p++) {
903:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
904:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
905:               }
906:             }
907:           }
908:           rows[k] = k + nc * (slot);
909:         }
910:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
911:       }
912:     }
913:     PetscCall(PetscFree(values));
914:     /* do not copy values to GPU since they are all zero and not yet needed there */
915:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
916:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
917:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
918:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
919:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
920:   }
921:   PetscCall(PetscFree2(rows, cols));
922:   PetscFunctionReturn(PETSC_SUCCESS);
923: }

925: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
926: {
927:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
928:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
929:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
930:   MPI_Comm               comm;
931:   PetscScalar           *values;
932:   DMBoundaryType         bx, by, bz;
933:   ISLocalToGlobalMapping ltog;
934:   DMDAStencilType        st;

936:   PetscFunctionBegin;
937:   /*
938:          nc - number of components per grid point
939:          col - number of colors needed in one direction for single component problem
940:   */
941:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
942:   col = 2 * s + 1;
943:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
944:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
945:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

947:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
948:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

950:   PetscCall(MatSetBlockSize(J, nc));
951:   /* determine the matrix preallocation information */
952:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
953:   for (i = xs; i < xs + nx; i++) {
954:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
955:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
956:     for (j = ys; j < ys + ny; j++) {
957:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
958:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
959:       for (k = zs; k < zs + nz; k++) {
960:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
961:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

963:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

965:         cnt = 0;
966:         for (l = 0; l < nc; l++) {
967:           for (ii = istart; ii < iend + 1; ii++) {
968:             for (jj = jstart; jj < jend + 1; jj++) {
969:               for (kk = kstart; kk < kend + 1; kk++) {
970:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
971:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
972:                 }
973:               }
974:             }
975:           }
976:           rows[l] = l + nc * (slot);
977:         }
978:         PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
979:       }
980:     }
981:   }
982:   PetscCall(MatSetBlockSize(J, nc));
983:   PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
984:   PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
985:   MatPreallocateEnd(dnz, onz);
986:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

988:   /*
989:     For each node in the grid: we get the neighbors in the local (on processor ordering
990:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
991:     PETSc ordering.
992:   */
993:   if (!da->prealloc_only) {
994:     PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
995:     for (i = xs; i < xs + nx; i++) {
996:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
997:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
998:       for (j = ys; j < ys + ny; j++) {
999:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1000:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1001:         for (k = zs; k < zs + nz; k++) {
1002:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1003:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1005:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1007:           cnt = 0;
1008:           for (l = 0; l < nc; l++) {
1009:             for (ii = istart; ii < iend + 1; ii++) {
1010:               for (jj = jstart; jj < jend + 1; jj++) {
1011:                 for (kk = kstart; kk < kend + 1; kk++) {
1012:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1013:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1014:                   }
1015:                 }
1016:               }
1017:             }
1018:             rows[l] = l + nc * (slot);
1019:           }
1020:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1021:         }
1022:       }
1023:     }
1024:     PetscCall(PetscFree(values));
1025:     /* do not copy values to GPU since they are all zero and not yet needed there */
1026:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1027:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1028:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1029:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1030:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1031:   }
1032:   PetscCall(PetscFree2(rows, cols));
1033:   PetscFunctionReturn(PETSC_SUCCESS);
1034: }

1036: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1037: {
1038:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1039:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1040:   MPI_Comm               comm;
1041:   DMBoundaryType         bx, by;
1042:   ISLocalToGlobalMapping ltog, mltog;
1043:   DMDAStencilType        st;
1044:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1046:   PetscFunctionBegin;
1047:   /*
1048:          nc - number of components per grid point
1049:          col - number of colors needed in one direction for single component problem
1050:   */
1051:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1052:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1053:   col = 2 * s + 1;
1054:   /*
1055:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1056:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1057:   */
1058:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1059:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1060:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1061:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1062:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1064:   PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1065:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1067:   PetscCall(MatSetBlockSize(J, nc));
1068:   /* determine the matrix preallocation information */
1069:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1070:   for (i = xs; i < xs + nx; i++) {
1071:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1072:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1074:     for (j = ys; j < ys + ny; j++) {
1075:       slot = i - gxs + gnx * (j - gys);

1077:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1078:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1080:       cnt = 0;
1081:       for (k = 0; k < nc; k++) {
1082:         for (l = lstart; l < lend + 1; l++) {
1083:           for (p = pstart; p < pend + 1; p++) {
1084:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1085:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1086:             }
1087:           }
1088:         }
1089:         rows[k] = k + nc * (slot);
1090:       }
1091:       if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1092:       else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1093:     }
1094:   }
1095:   PetscCall(MatSetBlockSize(J, nc));
1096:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1097:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1098:   MatPreallocateEnd(dnz, onz);
1099:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1100:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1102:   /*
1103:     For each node in the grid: we get the neighbors in the local (on processor ordering
1104:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1105:     PETSc ordering.
1106:   */
1107:   if (!da->prealloc_only) {
1108:     for (i = xs; i < xs + nx; i++) {
1109:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1110:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1112:       for (j = ys; j < ys + ny; j++) {
1113:         slot = i - gxs + gnx * (j - gys);

1115:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1116:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1118:         cnt = 0;
1119:         for (l = lstart; l < lend + 1; l++) {
1120:           for (p = pstart; p < pend + 1; p++) {
1121:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1122:               cols[cnt++] = nc * (slot + gnx * l + p);
1123:               for (k = 1; k < nc; k++) {
1124:                 cols[cnt] = 1 + cols[cnt - 1];
1125:                 cnt++;
1126:               }
1127:             }
1128:           }
1129:         }
1130:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1131:         PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1132:       }
1133:     }
1134:     /* do not copy values to GPU since they are all zero and not yet needed there */
1135:     PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1136:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1137:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1138:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1139:     if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1140:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1141:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1142:   }
1143:   PetscCall(PetscFree2(rows, cols));
1144:   PetscFunctionReturn(PETSC_SUCCESS);
1145: }

1147: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1148: {
1149:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1150:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1151:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1152:   DM_DA                 *dd = (DM_DA *)da->data;
1153:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1154:   MPI_Comm               comm;
1155:   DMBoundaryType         bx, by;
1156:   ISLocalToGlobalMapping ltog;
1157:   DMDAStencilType        st;
1158:   PetscBool              removedups = PETSC_FALSE;

1160:   PetscFunctionBegin;
1161:   /*
1162:          nc - number of components per grid point
1163:          col - number of colors needed in one direction for single component problem
1164:   */
1165:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1166:   col = 2 * s + 1;
1167:   /*
1168:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1169:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1170:   */
1171:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1172:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1173:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1174:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1175:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1177:   PetscCall(PetscMalloc1(col * col * nc, &cols));
1178:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1180:   PetscCall(MatSetBlockSize(J, nc));
1181:   /* determine the matrix preallocation information */
1182:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1183:   for (i = xs; i < xs + nx; i++) {
1184:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1185:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1187:     for (j = ys; j < ys + ny; j++) {
1188:       slot = i - gxs + gnx * (j - gys);

1190:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1191:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1193:       for (k = 0; k < nc; k++) {
1194:         cnt = 0;
1195:         for (l = lstart; l < lend + 1; l++) {
1196:           for (p = pstart; p < pend + 1; p++) {
1197:             if (l || p) {
1198:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1199:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1200:               }
1201:             } else {
1202:               if (dfill) {
1203:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1204:               } else {
1205:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1206:               }
1207:             }
1208:           }
1209:         }
1210:         row    = k + nc * (slot);
1211:         maxcnt = PetscMax(maxcnt, cnt);
1212:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1213:         else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1214:       }
1215:     }
1216:   }
1217:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1218:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1219:   MatPreallocateEnd(dnz, onz);
1220:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1222:   /*
1223:     For each node in the grid: we get the neighbors in the local (on processor ordering
1224:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1225:     PETSc ordering.
1226:   */
1227:   if (!da->prealloc_only) {
1228:     for (i = xs; i < xs + nx; i++) {
1229:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1230:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1232:       for (j = ys; j < ys + ny; j++) {
1233:         slot = i - gxs + gnx * (j - gys);

1235:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1236:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1238:         for (k = 0; k < nc; k++) {
1239:           cnt = 0;
1240:           for (l = lstart; l < lend + 1; l++) {
1241:             for (p = pstart; p < pend + 1; p++) {
1242:               if (l || p) {
1243:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1244:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1245:                 }
1246:               } else {
1247:                 if (dfill) {
1248:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1249:                 } else {
1250:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1251:                 }
1252:               }
1253:             }
1254:           }
1255:           row = k + nc * (slot);
1256:           PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1257:         }
1258:       }
1259:     }
1260:     /* do not copy values to GPU since they are all zero and not yet needed there */
1261:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1262:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1263:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1264:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1265:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1266:   }
1267:   PetscCall(PetscFree(cols));
1268:   PetscFunctionReturn(PETSC_SUCCESS);
1269: }

1271: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1272: {
1273:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1274:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1275:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1276:   MPI_Comm               comm;
1277:   DMBoundaryType         bx, by, bz;
1278:   ISLocalToGlobalMapping ltog, mltog;
1279:   DMDAStencilType        st;
1280:   PetscBool              removedups = PETSC_FALSE;

1282:   PetscFunctionBegin;
1283:   /*
1284:          nc - number of components per grid point
1285:          col - number of colors needed in one direction for single component problem
1286:   */
1287:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1288:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1289:   col = 2 * s + 1;

1291:   /*
1292:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1293:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1294:   */
1295:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1296:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1297:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1299:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1300:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1301:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1303:   PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1304:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1306:   PetscCall(MatSetBlockSize(J, nc));
1307:   /* determine the matrix preallocation information */
1308:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1309:   for (i = xs; i < xs + nx; i++) {
1310:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1311:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1312:     for (j = ys; j < ys + ny; j++) {
1313:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1314:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1315:       for (k = zs; k < zs + nz; k++) {
1316:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1317:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1319:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1321:         cnt = 0;
1322:         for (l = 0; l < nc; l++) {
1323:           for (ii = istart; ii < iend + 1; ii++) {
1324:             for (jj = jstart; jj < jend + 1; jj++) {
1325:               for (kk = kstart; kk < kend + 1; kk++) {
1326:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1327:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1328:                 }
1329:               }
1330:             }
1331:           }
1332:           rows[l] = l + nc * (slot);
1333:         }
1334:         if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1335:         else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1336:       }
1337:     }
1338:   }
1339:   PetscCall(MatSetBlockSize(J, nc));
1340:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1341:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1342:   MatPreallocateEnd(dnz, onz);
1343:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1344:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1346:   /*
1347:     For each node in the grid: we get the neighbors in the local (on processor ordering
1348:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1349:     PETSc ordering.
1350:   */
1351:   if (!da->prealloc_only) {
1352:     for (i = xs; i < xs + nx; i++) {
1353:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1354:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1355:       for (j = ys; j < ys + ny; j++) {
1356:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1357:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1358:         for (k = zs; k < zs + nz; k++) {
1359:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1360:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1362:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1364:           cnt = 0;
1365:           for (kk = kstart; kk < kend + 1; kk++) {
1366:             for (jj = jstart; jj < jend + 1; jj++) {
1367:               for (ii = istart; ii < iend + 1; ii++) {
1368:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1369:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1370:                   for (l = 1; l < nc; l++) {
1371:                     cols[cnt] = 1 + cols[cnt - 1];
1372:                     cnt++;
1373:                   }
1374:                 }
1375:               }
1376:             }
1377:           }
1378:           rows[0] = nc * (slot);
1379:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1380:           PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1381:         }
1382:       }
1383:     }
1384:     /* do not copy values to GPU since they are all zero and not yet needed there */
1385:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1386:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1387:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1388:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1389:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1390:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1391:   }
1392:   PetscCall(PetscFree2(rows, cols));
1393:   PetscFunctionReturn(PETSC_SUCCESS);
1394: }

1396: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1397: {
1398:   DM_DA                 *dd = (DM_DA *)da->data;
1399:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1400:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1401:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1402:   DMBoundaryType         bx;
1403:   ISLocalToGlobalMapping ltog;
1404:   PetscMPIInt            rank, size;

1406:   PetscFunctionBegin;
1407:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1408:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));

1410:   /*
1411:          nc - number of components per grid point
1412:   */
1413:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1414:   PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1415:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1416:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1418:   PetscCall(MatSetBlockSize(J, nc));
1419:   PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));

1421:   /*
1422:         note should be smaller for first and last process with no periodic
1423:         does not handle dfill
1424:   */
1425:   cnt = 0;
1426:   /* coupling with process to the left */
1427:   for (i = 0; i < s; i++) {
1428:     for (j = 0; j < nc; j++) {
1429:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1430:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1431:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1432:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1433:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1434:       }
1435:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1436:       cnt++;
1437:     }
1438:   }
1439:   for (i = s; i < nx - s; i++) {
1440:     for (j = 0; j < nc; j++) {
1441:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1442:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1443:       cnt++;
1444:     }
1445:   }
1446:   /* coupling with process to the right */
1447:   for (i = nx - s; i < nx; i++) {
1448:     for (j = 0; j < nc; j++) {
1449:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1450:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1451:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1452:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1453:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1454:       }
1455:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1456:       cnt++;
1457:     }
1458:   }

1460:   PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1461:   PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1462:   PetscCall(PetscFree2(cols, ocols));

1464:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1465:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1467:   /*
1468:     For each node in the grid: we get the neighbors in the local (on processor ordering
1469:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1470:     PETSc ordering.
1471:   */
1472:   if (!da->prealloc_only) {
1473:     PetscCall(PetscMalloc1(maxcnt, &cols));
1474:     row = xs * nc;
1475:     /* coupling with process to the left */
1476:     for (i = xs; i < xs + s; i++) {
1477:       for (j = 0; j < nc; j++) {
1478:         cnt = 0;
1479:         if (rank) {
1480:           for (l = 0; l < s; l++) {
1481:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1482:           }
1483:         }
1484:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1485:           for (l = 0; l < s; l++) {
1486:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1487:           }
1488:         }
1489:         if (dfill) {
1490:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1491:         } else {
1492:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1493:         }
1494:         for (l = 0; l < s; l++) {
1495:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1496:         }
1497:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1498:         row++;
1499:       }
1500:     }
1501:     for (i = xs + s; i < xs + nx - s; i++) {
1502:       for (j = 0; j < nc; j++) {
1503:         cnt = 0;
1504:         for (l = 0; l < s; l++) {
1505:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1506:         }
1507:         if (dfill) {
1508:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1509:         } else {
1510:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1511:         }
1512:         for (l = 0; l < s; l++) {
1513:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1514:         }
1515:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1516:         row++;
1517:       }
1518:     }
1519:     /* coupling with process to the right */
1520:     for (i = xs + nx - s; i < xs + nx; i++) {
1521:       for (j = 0; j < nc; j++) {
1522:         cnt = 0;
1523:         for (l = 0; l < s; l++) {
1524:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525:         }
1526:         if (dfill) {
1527:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528:         } else {
1529:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530:         }
1531:         if (rank < size - 1) {
1532:           for (l = 0; l < s; l++) {
1533:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1534:           }
1535:         }
1536:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1537:           for (l = 0; l < s; l++) {
1538:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1539:           }
1540:         }
1541:         PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1542:         row++;
1543:       }
1544:     }
1545:     PetscCall(PetscFree(cols));
1546:     /* do not copy values to GPU since they are all zero and not yet needed there */
1547:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1548:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1549:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1550:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1551:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1552:   }
1553:   PetscFunctionReturn(PETSC_SUCCESS);
1554: }

1556: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1557: {
1558:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1559:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1560:   PetscInt               istart, iend;
1561:   DMBoundaryType         bx;
1562:   ISLocalToGlobalMapping ltog, mltog;

1564:   PetscFunctionBegin;
1565:   /*
1566:          nc - number of components per grid point
1567:          col - number of colors needed in one direction for single component problem
1568:   */
1569:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1570:   if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1571:   col = 2 * s + 1;

1573:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1574:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1576:   PetscCall(MatSetBlockSize(J, nc));
1577:   PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1578:   PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));

1580:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1581:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1582:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1584:   /*
1585:     For each node in the grid: we get the neighbors in the local (on processor ordering
1586:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1587:     PETSc ordering.
1588:   */
1589:   if (!da->prealloc_only) {
1590:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1591:     for (i = xs; i < xs + nx; i++) {
1592:       istart = PetscMax(-s, gxs - i);
1593:       iend   = PetscMin(s, gxs + gnx - i - 1);
1594:       slot   = i - gxs;

1596:       cnt = 0;
1597:       for (i1 = istart; i1 < iend + 1; i1++) {
1598:         cols[cnt++] = nc * (slot + i1);
1599:         for (l = 1; l < nc; l++) {
1600:           cols[cnt] = 1 + cols[cnt - 1];
1601:           cnt++;
1602:         }
1603:       }
1604:       rows[0] = nc * (slot);
1605:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1606:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1607:     }
1608:     /* do not copy values to GPU since they are all zero and not yet needed there */
1609:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1610:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1611:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1612:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1613:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1614:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1615:     PetscCall(PetscFree2(rows, cols));
1616:   }
1617:   PetscFunctionReturn(PETSC_SUCCESS);
1618: }

1620: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1621: {
1622:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1623:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1624:   PetscInt               istart, iend;
1625:   DMBoundaryType         bx;
1626:   ISLocalToGlobalMapping ltog, mltog;

1628:   PetscFunctionBegin;
1629:   /*
1630:          nc - number of components per grid point
1631:          col - number of colors needed in one direction for single component problem
1632:   */
1633:   PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1634:   col = 2 * s + 1;

1636:   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1637:   PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));

1639:   PetscCall(MatSetBlockSize(J, nc));
1640:   PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));

1642:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));
1643:   PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1644:   if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1646:   /*
1647:     For each node in the grid: we get the neighbors in the local (on processor ordering
1648:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1649:     PETSc ordering.
1650:   */
1651:   if (!da->prealloc_only) {
1652:     PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1653:     for (i = xs; i < xs + nx; i++) {
1654:       istart = PetscMax(-s, gxs - i);
1655:       iend   = PetscMin(s, gxs + gnx - i - 1);
1656:       slot   = i - gxs;

1658:       cnt = 0;
1659:       for (i1 = istart; i1 < iend + 1; i1++) {
1660:         cols[cnt++] = nc * (slot + i1);
1661:         for (l = 1; l < nc; l++) {
1662:           cols[cnt] = 1 + cols[cnt - 1];
1663:           cnt++;
1664:         }
1665:       }
1666:       rows[0] = nc * (slot);
1667:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1668:       PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1669:     }
1670:     /* do not copy values to GPU since they are all zero and not yet needed there */
1671:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1672:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1673:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1674:     if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1675:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1676:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1677:     PetscCall(PetscFree2(rows, cols));
1678:   }
1679:   PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1680:   PetscFunctionReturn(PETSC_SUCCESS);
1681: }

1683: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1684: {
1685:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1686:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1687:   PetscInt               istart, iend, jstart, jend, ii, jj;
1688:   MPI_Comm               comm;
1689:   PetscScalar           *values;
1690:   DMBoundaryType         bx, by;
1691:   DMDAStencilType        st;
1692:   ISLocalToGlobalMapping ltog;

1694:   PetscFunctionBegin;
1695:   /*
1696:      nc - number of components per grid point
1697:      col - number of colors needed in one direction for single component problem
1698:   */
1699:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1700:   col = 2 * s + 1;

1702:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1703:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1704:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1706:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1708:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1710:   /* determine the matrix preallocation information */
1711:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1712:   for (i = xs; i < xs + nx; i++) {
1713:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1714:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1715:     for (j = ys; j < ys + ny; j++) {
1716:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1717:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1718:       slot   = i - gxs + gnx * (j - gys);

1720:       /* Find block columns in block row */
1721:       cnt = 0;
1722:       for (ii = istart; ii < iend + 1; ii++) {
1723:         for (jj = jstart; jj < jend + 1; jj++) {
1724:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1725:             cols[cnt++] = slot + ii + gnx * jj;
1726:           }
1727:         }
1728:       }
1729:       PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1730:     }
1731:   }
1732:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1733:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1734:   MatPreallocateEnd(dnz, onz);

1736:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1738:   /*
1739:     For each node in the grid: we get the neighbors in the local (on processor ordering
1740:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1741:     PETSc ordering.
1742:   */
1743:   if (!da->prealloc_only) {
1744:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1745:     for (i = xs; i < xs + nx; i++) {
1746:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1747:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1748:       for (j = ys; j < ys + ny; j++) {
1749:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1750:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1751:         slot   = i - gxs + gnx * (j - gys);
1752:         cnt    = 0;
1753:         for (ii = istart; ii < iend + 1; ii++) {
1754:           for (jj = jstart; jj < jend + 1; jj++) {
1755:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1756:               cols[cnt++] = slot + ii + gnx * jj;
1757:             }
1758:           }
1759:         }
1760:         PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1761:       }
1762:     }
1763:     PetscCall(PetscFree(values));
1764:     /* do not copy values to GPU since they are all zero and not yet needed there */
1765:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1766:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1767:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1768:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1769:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1770:   }
1771:   PetscCall(PetscFree(cols));
1772:   PetscFunctionReturn(PETSC_SUCCESS);
1773: }

1775: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1776: {
1777:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1778:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1779:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1780:   MPI_Comm               comm;
1781:   PetscScalar           *values;
1782:   DMBoundaryType         bx, by, bz;
1783:   DMDAStencilType        st;
1784:   ISLocalToGlobalMapping ltog;

1786:   PetscFunctionBegin;
1787:   /*
1788:          nc - number of components per grid point
1789:          col - number of colors needed in one direction for single component problem
1790:   */
1791:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1792:   col = 2 * s + 1;

1794:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1795:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1796:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1798:   PetscCall(PetscMalloc1(col * col * col, &cols));

1800:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1802:   /* determine the matrix preallocation information */
1803:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1804:   for (i = xs; i < xs + nx; i++) {
1805:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1806:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1807:     for (j = ys; j < ys + ny; j++) {
1808:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1809:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1810:       for (k = zs; k < zs + nz; k++) {
1811:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1812:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1814:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1816:         /* Find block columns in block row */
1817:         cnt = 0;
1818:         for (ii = istart; ii < iend + 1; ii++) {
1819:           for (jj = jstart; jj < jend + 1; jj++) {
1820:             for (kk = kstart; kk < kend + 1; kk++) {
1821:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1822:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1823:               }
1824:             }
1825:           }
1826:         }
1827:         PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1828:       }
1829:     }
1830:   }
1831:   PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1832:   PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1833:   MatPreallocateEnd(dnz, onz);

1835:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1837:   /*
1838:     For each node in the grid: we get the neighbors in the local (on processor ordering
1839:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1840:     PETSc ordering.
1841:   */
1842:   if (!da->prealloc_only) {
1843:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1844:     for (i = xs; i < xs + nx; i++) {
1845:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1846:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1847:       for (j = ys; j < ys + ny; j++) {
1848:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1849:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1850:         for (k = zs; k < zs + nz; k++) {
1851:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1852:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1854:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1856:           cnt = 0;
1857:           for (ii = istart; ii < iend + 1; ii++) {
1858:             for (jj = jstart; jj < jend + 1; jj++) {
1859:               for (kk = kstart; kk < kend + 1; kk++) {
1860:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1861:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1862:                 }
1863:               }
1864:             }
1865:           }
1866:           PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1867:         }
1868:       }
1869:     }
1870:     PetscCall(PetscFree(values));
1871:     /* do not copy values to GPU since they are all zero and not yet needed there */
1872:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1873:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1874:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1875:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1876:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1877:   }
1878:   PetscCall(PetscFree(cols));
1879:   PetscFunctionReturn(PETSC_SUCCESS);
1880: }

1882: /*
1883:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1884:   identify in the local ordering with periodic domain.
1885: */
1886: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1887: {
1888:   PetscInt i, n;

1890:   PetscFunctionBegin;
1891:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1892:   PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1893:   for (i = 0, n = 0; i < *cnt; i++) {
1894:     if (col[i] >= *row) col[n++] = col[i];
1895:   }
1896:   *cnt = n;
1897:   PetscFunctionReturn(PETSC_SUCCESS);
1898: }

1900: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1901: {
1902:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1903:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1904:   PetscInt               istart, iend, jstart, jend, ii, jj;
1905:   MPI_Comm               comm;
1906:   PetscScalar           *values;
1907:   DMBoundaryType         bx, by;
1908:   DMDAStencilType        st;
1909:   ISLocalToGlobalMapping ltog;

1911:   PetscFunctionBegin;
1912:   /*
1913:      nc - number of components per grid point
1914:      col - number of colors needed in one direction for single component problem
1915:   */
1916:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1917:   col = 2 * s + 1;

1919:   PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1920:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1921:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

1923:   PetscCall(PetscMalloc1(col * col * nc * nc, &cols));

1925:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

1927:   /* determine the matrix preallocation information */
1928:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1929:   for (i = xs; i < xs + nx; i++) {
1930:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1931:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1932:     for (j = ys; j < ys + ny; j++) {
1933:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1934:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1935:       slot   = i - gxs + gnx * (j - gys);

1937:       /* Find block columns in block row */
1938:       cnt = 0;
1939:       for (ii = istart; ii < iend + 1; ii++) {
1940:         for (jj = jstart; jj < jend + 1; jj++) {
1941:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1942:         }
1943:       }
1944:       PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1945:       PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1946:     }
1947:   }
1948:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1949:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1950:   MatPreallocateEnd(dnz, onz);

1952:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

1954:   /*
1955:     For each node in the grid: we get the neighbors in the local (on processor ordering
1956:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1957:     PETSc ordering.
1958:   */
1959:   if (!da->prealloc_only) {
1960:     PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1961:     for (i = xs; i < xs + nx; i++) {
1962:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1963:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1964:       for (j = ys; j < ys + ny; j++) {
1965:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1966:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1967:         slot   = i - gxs + gnx * (j - gys);

1969:         /* Find block columns in block row */
1970:         cnt = 0;
1971:         for (ii = istart; ii < iend + 1; ii++) {
1972:           for (jj = jstart; jj < jend + 1; jj++) {
1973:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1974:           }
1975:         }
1976:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1977:         PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1978:       }
1979:     }
1980:     PetscCall(PetscFree(values));
1981:     /* do not copy values to GPU since they are all zero and not yet needed there */
1982:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
1983:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1984:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1985:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
1986:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1987:   }
1988:   PetscCall(PetscFree(cols));
1989:   PetscFunctionReturn(PETSC_SUCCESS);
1990: }

1992: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
1993: {
1994:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1995:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1996:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1997:   MPI_Comm               comm;
1998:   PetscScalar           *values;
1999:   DMBoundaryType         bx, by, bz;
2000:   DMDAStencilType        st;
2001:   ISLocalToGlobalMapping ltog;

2003:   PetscFunctionBegin;
2004:   /*
2005:      nc - number of components per grid point
2006:      col - number of colors needed in one direction for single component problem
2007:   */
2008:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2009:   col = 2 * s + 1;

2011:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2012:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2013:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2015:   /* create the matrix */
2016:   PetscCall(PetscMalloc1(col * col * col, &cols));

2018:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2020:   /* determine the matrix preallocation information */
2021:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2022:   for (i = xs; i < xs + nx; i++) {
2023:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2024:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2025:     for (j = ys; j < ys + ny; j++) {
2026:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2027:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2028:       for (k = zs; k < zs + nz; k++) {
2029:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2030:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2032:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2034:         /* Find block columns in block row */
2035:         cnt = 0;
2036:         for (ii = istart; ii < iend + 1; ii++) {
2037:           for (jj = jstart; jj < jend + 1; jj++) {
2038:             for (kk = kstart; kk < kend + 1; kk++) {
2039:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2040:             }
2041:           }
2042:         }
2043:         PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2044:         PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2045:       }
2046:     }
2047:   }
2048:   PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2049:   PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2050:   MatPreallocateEnd(dnz, onz);

2052:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2054:   /*
2055:     For each node in the grid: we get the neighbors in the local (on processor ordering
2056:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2057:     PETSc ordering.
2058:   */
2059:   if (!da->prealloc_only) {
2060:     PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2061:     for (i = xs; i < xs + nx; i++) {
2062:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2063:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2064:       for (j = ys; j < ys + ny; j++) {
2065:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2066:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2067:         for (k = zs; k < zs + nz; k++) {
2068:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2069:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2071:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2073:           cnt = 0;
2074:           for (ii = istart; ii < iend + 1; ii++) {
2075:             for (jj = jstart; jj < jend + 1; jj++) {
2076:               for (kk = kstart; kk < kend + 1; kk++) {
2077:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2078:               }
2079:             }
2080:           }
2081:           PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2082:           PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2083:         }
2084:       }
2085:     }
2086:     PetscCall(PetscFree(values));
2087:     /* do not copy values to GPU since they are all zero and not yet needed there */
2088:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2089:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2090:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2091:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2092:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2093:   }
2094:   PetscCall(PetscFree(cols));
2095:   PetscFunctionReturn(PETSC_SUCCESS);
2096: }

2098: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2099: {
2100:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2101:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2102:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2103:   DM_DA                 *dd = (DM_DA *)da->data;
2104:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2105:   MPI_Comm               comm;
2106:   PetscScalar           *values;
2107:   DMBoundaryType         bx, by, bz;
2108:   ISLocalToGlobalMapping ltog;
2109:   DMDAStencilType        st;
2110:   PetscBool              removedups = PETSC_FALSE;

2112:   PetscFunctionBegin;
2113:   /*
2114:          nc - number of components per grid point
2115:          col - number of colors needed in one direction for single component problem
2116:   */
2117:   PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2118:   col = 2 * s + 1;

2120:   /*
2121:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2122:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2123:   */
2124:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2125:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2126:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2128:   PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2129:   PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2130:   PetscCall(PetscObjectGetComm((PetscObject)da, &comm));

2132:   PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2133:   PetscCall(DMGetLocalToGlobalMapping(da, &ltog));

2135:   /* determine the matrix preallocation information */
2136:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2138:   PetscCall(MatSetBlockSize(J, nc));
2139:   for (i = xs; i < xs + nx; i++) {
2140:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2141:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2142:     for (j = ys; j < ys + ny; j++) {
2143:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2144:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2145:       for (k = zs; k < zs + nz; k++) {
2146:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2147:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2149:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2151:         for (l = 0; l < nc; l++) {
2152:           cnt = 0;
2153:           for (ii = istart; ii < iend + 1; ii++) {
2154:             for (jj = jstart; jj < jend + 1; jj++) {
2155:               for (kk = kstart; kk < kend + 1; kk++) {
2156:                 if (ii || jj || kk) {
2157:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2158:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2159:                   }
2160:                 } else {
2161:                   if (dfill) {
2162:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2163:                   } else {
2164:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2165:                   }
2166:                 }
2167:               }
2168:             }
2169:           }
2170:           row    = l + nc * (slot);
2171:           maxcnt = PetscMax(maxcnt, cnt);
2172:           if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2173:           else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2174:         }
2175:       }
2176:     }
2177:   }
2178:   PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2179:   PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2180:   MatPreallocateEnd(dnz, onz);
2181:   PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));

2183:   /*
2184:     For each node in the grid: we get the neighbors in the local (on processor ordering
2185:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2186:     PETSc ordering.
2187:   */
2188:   if (!da->prealloc_only) {
2189:     PetscCall(PetscCalloc1(maxcnt, &values));
2190:     for (i = xs; i < xs + nx; i++) {
2191:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2192:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2193:       for (j = ys; j < ys + ny; j++) {
2194:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2195:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2196:         for (k = zs; k < zs + nz; k++) {
2197:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2198:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2200:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2202:           for (l = 0; l < nc; l++) {
2203:             cnt = 0;
2204:             for (ii = istart; ii < iend + 1; ii++) {
2205:               for (jj = jstart; jj < jend + 1; jj++) {
2206:                 for (kk = kstart; kk < kend + 1; kk++) {
2207:                   if (ii || jj || kk) {
2208:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2209:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2210:                     }
2211:                   } else {
2212:                     if (dfill) {
2213:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2214:                     } else {
2215:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2216:                     }
2217:                   }
2218:                 }
2219:               }
2220:             }
2221:             row = l + nc * (slot);
2222:             PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2223:           }
2224:         }
2225:       }
2226:     }
2227:     PetscCall(PetscFree(values));
2228:     /* do not copy values to GPU since they are all zero and not yet needed there */
2229:     PetscCall(MatBindToCPU(J, PETSC_TRUE));
2230:     PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2231:     PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2232:     PetscCall(MatBindToCPU(J, PETSC_FALSE));
2233:     PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2234:   }
2235:   PetscCall(PetscFree(cols));
2236:   PetscFunctionReturn(PETSC_SUCCESS);
2237: }