Actual source code: fdda.c


  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>
  4: #include <petscbt.h>

  6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
  8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
  9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);

 11: /*
 12:    For ghost i that may be negative or greater than the upper bound this
 13:   maps it into the 0:m-1 range using periodicity
 14: */
 15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))

 17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
 18: {
 19:   PetscInt i, j, nz, *fill;

 21:   if (!dfill) return 0;

 23:   /* count number nonzeros */
 24:   nz = 0;
 25:   for (i = 0; i < w; i++) {
 26:     for (j = 0; j < w; j++) {
 27:       if (dfill[w * i + j]) nz++;
 28:     }
 29:   }
 30:   PetscMalloc1(nz + w + 1, &fill);
 31:   /* construct modified CSR storage of nonzero structure */
 32:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 33:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 34:   nz = w + 1;
 35:   for (i = 0; i < w; i++) {
 36:     fill[i] = nz;
 37:     for (j = 0; j < w; j++) {
 38:       if (dfill[w * i + j]) {
 39:         fill[nz] = j;
 40:         nz++;
 41:       }
 42:     }
 43:   }
 44:   fill[w] = nz;

 46:   *rfill = fill;
 47:   return 0;
 48: }

 50: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
 51: {
 52:   PetscInt nz;

 54:   if (!dfillsparse) return 0;

 56:   /* Determine number of non-zeros */
 57:   nz = (dfillsparse[w] - w - 1);

 59:   /* Allocate space for our copy of the given sparse matrix representation. */
 60:   PetscMalloc1(nz + w + 1, rfill);
 61:   PetscArraycpy(*rfill, dfillsparse, nz + w + 1);
 62:   return 0;
 63: }

 65: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 66: {
 67:   PetscInt i, k, cnt = 1;


 70:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 71:    columns to the left with any nonzeros in them plus 1 */
 72:   PetscCalloc1(dd->w, &dd->ofillcols);
 73:   for (i = 0; i < dd->w; i++) {
 74:     for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 75:   }
 76:   for (i = 0; i < dd->w; i++) {
 77:     if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
 78:   }
 79:   return 0;
 80: }

 82: /*@
 83:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 84:     of the matrix returned by `DMCreateMatrix()`.

 86:     Logically Collective on da

 88:     Input Parameters:
 89: +   da - the distributed array
 90: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
 91: -   ofill - the fill pattern in the off-diagonal blocks

 93:     Level: developer

 95:     Notes:
 96:     This only makes sense when you are doing multicomponent problems but using the
 97:        `MATMPIAIJ` matrix format

 99:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
100:        representing coupling and 0 entries for missing coupling. For example
101: .vb
102:             dfill[9] = {1, 0, 0,
103:                         1, 1, 0,
104:                         0, 1, 1}
105: .ve
106:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
107:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
108:        diagonal block).

110:      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
111:      can be represented in the dfill, ofill format

113:    Contributed by Glenn Hammond

115: .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
116: @*/
117: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
118: {
119:   DM_DA *dd = (DM_DA *)da->data;

121:   /* save the given dfill and ofill information */
122:   DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill);
123:   DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill);

125:   /* count nonzeros in ofill columns */
126:   DMDASetBlockFills_Private2(dd);
127:   return 0;
128: }

130: /*@
131:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
132:     of the matrix returned by `DMCreateMatrix()`, using sparse representations
133:     of fill patterns.

135:     Logically Collective on da

137:     Input Parameters:
138: +   da - the distributed array
139: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
140: -   ofill - the sparse fill pattern in the off-diagonal blocks

142:     Level: developer

144:     Notes:
145:     This only makes sense when you are doing multicomponent problems but using the
146:        `MATMPIAIJ` matrix format

148:            The format for dfill and ofill is a sparse representation of a
149:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
150:            for missing coupling.  The sparse representation is a 1 dimensional
151:            array of length nz + dof + 1, where nz is the number of non-zeros in
152:            the matrix.  The first dof entries in the array give the
153:            starting array indices of each row's items in the rest of the array,
154:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
155:            and the remaining nz items give the column indices of each of
156:            the 1s within the logical 2D matrix.  Each row's items within
157:            the array are the column indices of the 1s within that row
158:            of the 2D matrix.  PETSc developers may recognize that this is the
159:            same format as that computed by the `DMDASetBlockFills_Private()`
160:            function from a dense 2D matrix representation.

162:      `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
163:      can be represented in the dfill, ofill format

165:    Contributed by Philip C. Roth

167: .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
168: @*/
169: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
170: {
171:   DM_DA *dd = (DM_DA *)da->data;

173:   /* save the given dfill and ofill information */
174:   DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill);
175:   DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill);

177:   /* count nonzeros in ofill columns */
178:   DMDASetBlockFills_Private2(dd);
179:   return 0;
180: }

182: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
183: {
184:   PetscInt       dim, m, n, p, nc;
185:   DMBoundaryType bx, by, bz;
186:   MPI_Comm       comm;
187:   PetscMPIInt    size;
188:   PetscBool      isBAIJ;
189:   DM_DA         *dd = (DM_DA *)da->data;

191:   /*
192:                                   m
193:           ------------------------------------------------------
194:          |                                                     |
195:          |                                                     |
196:          |               ----------------------                |
197:          |               |                    |                |
198:       n  |           yn  |                    |                |
199:          |               |                    |                |
200:          |               .---------------------                |
201:          |             (xs,ys)     xn                          |
202:          |            .                                        |
203:          |         (gxs,gys)                                   |
204:          |                                                     |
205:           -----------------------------------------------------
206:   */

208:   /*
209:          nc - number of components per grid point
210:          col - number of colors needed in one direction for single component problem

212:   */
213:   DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL);

215:   PetscObjectGetComm((PetscObject)da, &comm);
216:   MPI_Comm_size(comm, &size);
217:   if (ctype == IS_COLORING_LOCAL) {
218:     if (size == 1) {
219:       ctype = IS_COLORING_GLOBAL;
220:     } else if (dim > 1) {
221:       if ((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)) {
222:         SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
223:       }
224:     }
225:   }

227:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
228:      matrices is for the blocks, not the individual matrix elements  */
229:   PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ);
230:   if (!isBAIJ) PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ);
231:   if (!isBAIJ) PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ);
232:   if (isBAIJ) {
233:     dd->w  = 1;
234:     dd->xs = dd->xs / nc;
235:     dd->xe = dd->xe / nc;
236:     dd->Xs = dd->Xs / nc;
237:     dd->Xe = dd->Xe / nc;
238:   }

240:   /*
241:      We do not provide a getcoloring function in the DMDA operations because
242:    the basic DMDA does not know about matrices. We think of DMDA as being
243:    more low-level then matrices.
244:   */
245:   if (dim == 1) DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring);
246:   else if (dim == 2) DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring);
247:   else if (dim == 3) DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring);
248:   else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
249:   if (isBAIJ) {
250:     dd->w  = nc;
251:     dd->xs = dd->xs * nc;
252:     dd->xe = dd->xe * nc;
253:     dd->Xs = dd->Xs * nc;
254:     dd->Xe = dd->Xe * nc;
255:   }
256:   return 0;
257: }

259: /* ---------------------------------------------------------------------------------*/

261: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
262: {
263:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
264:   PetscInt         ncolors;
265:   MPI_Comm         comm;
266:   DMBoundaryType   bx, by;
267:   DMDAStencilType  st;
268:   ISColoringValue *colors;
269:   DM_DA           *dd = (DM_DA *)da->data;

271:   /*
272:          nc - number of components per grid point
273:          col - number of colors needed in one direction for single component problem

275:   */
276:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
277:   col = 2 * s + 1;
278:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
279:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
280:   PetscObjectGetComm((PetscObject)da, &comm);

282:   /* special case as taught to us by Paul Hovland */
283:   if (st == DMDA_STENCIL_STAR && s == 1) {
284:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring);
285:   } else {
286:     if (ctype == IS_COLORING_GLOBAL) {
287:       if (!dd->localcoloring) {
288:         PetscMalloc1(nc * nx * ny, &colors);
289:         ii = 0;
290:         for (j = ys; j < ys + ny; j++) {
291:           for (i = xs; i < xs + nx; i++) {
292:             for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
293:           }
294:         }
295:         ncolors = nc + nc * (col - 1 + col * (col - 1));
296:         ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
297:       }
298:       *coloring = dd->localcoloring;
299:     } else if (ctype == IS_COLORING_LOCAL) {
300:       if (!dd->ghostedcoloring) {
301:         PetscMalloc1(nc * gnx * gny, &colors);
302:         ii = 0;
303:         for (j = gys; j < gys + gny; j++) {
304:           for (i = gxs; i < gxs + gnx; i++) {
305:             for (k = 0; k < nc; k++) {
306:               /* the complicated stuff is to handle periodic boundaries */
307:               colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
308:             }
309:           }
310:         }
311:         ncolors = nc + nc * (col - 1 + col * (col - 1));
312:         ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
313:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

315:         ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
316:       }
317:       *coloring = dd->ghostedcoloring;
318:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
319:   }
320:   ISColoringReference(*coloring);
321:   return 0;
322: }

324: /* ---------------------------------------------------------------------------------*/

326: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
327: {
328:   PetscInt         xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
329:   PetscInt         ncolors;
330:   MPI_Comm         comm;
331:   DMBoundaryType   bx, by, bz;
332:   DMDAStencilType  st;
333:   ISColoringValue *colors;
334:   DM_DA           *dd = (DM_DA *)da->data;

336:   /*
337:          nc - number of components per grid point
338:          col - number of colors needed in one direction for single component problem

340:   */
341:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
342:   col = 2 * s + 1;
343:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
344:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
345:   PetscObjectGetComm((PetscObject)da, &comm);

347:   /* create the coloring */
348:   if (ctype == IS_COLORING_GLOBAL) {
349:     if (!dd->localcoloring) {
350:       PetscMalloc1(nc * nx * ny * nz, &colors);
351:       ii = 0;
352:       for (k = zs; k < zs + nz; k++) {
353:         for (j = ys; j < ys + ny; j++) {
354:           for (i = xs; i < xs + nx; i++) {
355:             for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
356:           }
357:         }
358:       }
359:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
360:       ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring);
361:     }
362:     *coloring = dd->localcoloring;
363:   } else if (ctype == IS_COLORING_LOCAL) {
364:     if (!dd->ghostedcoloring) {
365:       PetscMalloc1(nc * gnx * gny * gnz, &colors);
366:       ii = 0;
367:       for (k = gzs; k < gzs + gnz; k++) {
368:         for (j = gys; j < gys + gny; j++) {
369:           for (i = gxs; i < gxs + gnx; i++) {
370:             for (l = 0; l < nc; l++) {
371:               /* the complicated stuff is to handle periodic boundaries */
372:               colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
373:             }
374:           }
375:         }
376:       }
377:       ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
378:       ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
379:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
380:     }
381:     *coloring = dd->ghostedcoloring;
382:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
383:   ISColoringReference(*coloring);
384:   return 0;
385: }

387: /* ---------------------------------------------------------------------------------*/

389: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
390: {
391:   PetscInt         xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
392:   PetscInt         ncolors;
393:   MPI_Comm         comm;
394:   DMBoundaryType   bx;
395:   ISColoringValue *colors;
396:   DM_DA           *dd = (DM_DA *)da->data;

398:   /*
399:          nc - number of components per grid point
400:          col - number of colors needed in one direction for single component problem

402:   */
403:   DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
404:   col = 2 * s + 1;
405:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
406:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);
407:   PetscObjectGetComm((PetscObject)da, &comm);

409:   /* create the coloring */
410:   if (ctype == IS_COLORING_GLOBAL) {
411:     if (!dd->localcoloring) {
412:       PetscMalloc1(nc * nx, &colors);
413:       if (dd->ofillcols) {
414:         PetscInt tc = 0;
415:         for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
416:         i1 = 0;
417:         for (i = xs; i < xs + nx; i++) {
418:           for (l = 0; l < nc; l++) {
419:             if (dd->ofillcols[l] && (i % col)) {
420:               colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
421:             } else {
422:               colors[i1++] = l;
423:             }
424:           }
425:         }
426:         ncolors = nc + 2 * s * tc;
427:       } else {
428:         i1 = 0;
429:         for (i = xs; i < xs + nx; i++) {
430:           for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
431:         }
432:         ncolors = nc + nc * (col - 1);
433:       }
434:       ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring);
435:     }
436:     *coloring = dd->localcoloring;
437:   } else if (ctype == IS_COLORING_LOCAL) {
438:     if (!dd->ghostedcoloring) {
439:       PetscMalloc1(nc * gnx, &colors);
440:       i1 = 0;
441:       for (i = gxs; i < gxs + gnx; i++) {
442:         for (l = 0; l < nc; l++) {
443:           /* the complicated stuff is to handle periodic boundaries */
444:           colors[i1++] = l + nc * (SetInRange(i, m) % col);
445:         }
446:       }
447:       ncolors = nc + nc * (col - 1);
448:       ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
449:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
450:     }
451:     *coloring = dd->ghostedcoloring;
452:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
453:   ISColoringReference(*coloring);
454:   return 0;
455: }

457: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
458: {
459:   PetscInt         xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
460:   PetscInt         ncolors;
461:   MPI_Comm         comm;
462:   DMBoundaryType   bx, by;
463:   ISColoringValue *colors;
464:   DM_DA           *dd = (DM_DA *)da->data;

466:   /*
467:          nc - number of components per grid point
468:          col - number of colors needed in one direction for single component problem

470:   */
471:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL);
472:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
473:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
474:   PetscObjectGetComm((PetscObject)da, &comm);
475:   /* create the coloring */
476:   if (ctype == IS_COLORING_GLOBAL) {
477:     if (!dd->localcoloring) {
478:       PetscMalloc1(nc * nx * ny, &colors);
479:       ii = 0;
480:       for (j = ys; j < ys + ny; j++) {
481:         for (i = xs; i < xs + nx; i++) {
482:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
483:         }
484:       }
485:       ncolors = 5 * nc;
486:       ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring);
487:     }
488:     *coloring = dd->localcoloring;
489:   } else if (ctype == IS_COLORING_LOCAL) {
490:     if (!dd->ghostedcoloring) {
491:       PetscMalloc1(nc * gnx * gny, &colors);
492:       ii = 0;
493:       for (j = gys; j < gys + gny; j++) {
494:         for (i = gxs; i < gxs + gnx; i++) {
495:           for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
496:         }
497:       }
498:       ncolors = 5 * nc;
499:       ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring);
500:       ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL);
501:     }
502:     *coloring = dd->ghostedcoloring;
503:   } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
504:   return 0;
505: }

507: /* =========================================================================== */
508: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
509: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
510: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
511: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
512: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
513: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
514: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);

523: /*@C
524:    MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix

526:    Logically Collective on mat

528:    Input Parameters:
529: +  mat - the matrix
530: -  da - the da

532:    Level: intermediate

534: .seealso: `Mat`, `MatSetUp()`
535: @*/
536: PetscErrorCode MatSetupDM(Mat mat, DM da)
537: {
540:   PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
541:   return 0;
542: }

544: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
545: {
546:   DM                da;
547:   const char       *prefix;
548:   Mat               Anatural;
549:   AO                ao;
550:   PetscInt          rstart, rend, *petsc, i;
551:   IS                is;
552:   MPI_Comm          comm;
553:   PetscViewerFormat format;

555:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
556:   PetscViewerGetFormat(viewer, &format);
557:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return 0;

559:   PetscObjectGetComm((PetscObject)A, &comm);
560:   MatGetDM(A, &da);

563:   DMDAGetAO(da, &ao);
564:   MatGetOwnershipRange(A, &rstart, &rend);
565:   PetscMalloc1(rend - rstart, &petsc);
566:   for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
567:   AOApplicationToPetsc(ao, rend - rstart, petsc);
568:   ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is);

570:   /* call viewer on natural ordering */
571:   MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural);
572:   ISDestroy(&is);
573:   PetscObjectGetOptionsPrefix((PetscObject)A, &prefix);
574:   PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix);
575:   PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name);
576:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
577:   MatView(Anatural, viewer);
578:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
579:   MatDestroy(&Anatural);
580:   return 0;
581: }

583: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
584: {
585:   DM       da;
586:   Mat      Anatural, Aapp;
587:   AO       ao;
588:   PetscInt rstart, rend, *app, i, m, n, M, N;
589:   IS       is;
590:   MPI_Comm comm;

592:   PetscObjectGetComm((PetscObject)A, &comm);
593:   MatGetDM(A, &da);

596:   /* Load the matrix in natural ordering */
597:   MatCreate(PetscObjectComm((PetscObject)A), &Anatural);
598:   MatSetType(Anatural, ((PetscObject)A)->type_name);
599:   MatGetSize(A, &M, &N);
600:   MatGetLocalSize(A, &m, &n);
601:   MatSetSizes(Anatural, m, n, M, N);
602:   MatLoad(Anatural, viewer);

604:   /* Map natural ordering to application ordering and create IS */
605:   DMDAGetAO(da, &ao);
606:   MatGetOwnershipRange(Anatural, &rstart, &rend);
607:   PetscMalloc1(rend - rstart, &app);
608:   for (i = rstart; i < rend; i++) app[i - rstart] = i;
609:   AOPetscToApplication(ao, rend - rstart, app);
610:   ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is);

612:   /* Do permutation and replace header */
613:   MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp);
614:   MatHeaderReplace(A, &Aapp);
615:   ISDestroy(&is);
616:   MatDestroy(&Anatural);
617:   return 0;
618: }

620: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
621: {
622:   PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
623:   Mat      A;
624:   MPI_Comm comm;
625:   MatType  Atype;
626:   void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
627:   MatType     mtype;
628:   PetscMPIInt size;
629:   DM_DA      *dd = (DM_DA *)da->data;

631:   MatInitializePackage();
632:   mtype = da->mattype;

634:   /*
635:                                   m
636:           ------------------------------------------------------
637:          |                                                     |
638:          |                                                     |
639:          |               ----------------------                |
640:          |               |                    |                |
641:       n  |           ny  |                    |                |
642:          |               |                    |                |
643:          |               .---------------------                |
644:          |             (xs,ys)     nx                          |
645:          |            .                                        |
646:          |         (gxs,gys)                                   |
647:          |                                                     |
648:           -----------------------------------------------------
649:   */

651:   /*
652:          nc - number of components per grid point
653:          col - number of colors needed in one direction for single component problem

655:   */
656:   M   = dd->M;
657:   N   = dd->N;
658:   P   = dd->P;
659:   dim = da->dim;
660:   dof = dd->w;
661:   /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
662:   DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz);
663:   PetscObjectGetComm((PetscObject)da, &comm);
664:   MatCreate(comm, &A);
665:   MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P);
666:   MatSetType(A, mtype);
667:   MatSetFromOptions(A);
668:   if (dof * nx * ny * nz < da->bind_below) {
669:     MatSetBindingPropagates(A, PETSC_TRUE);
670:     MatBindToCPU(A, PETSC_TRUE);
671:   }
672:   MatSetDM(A, da);
673:   if (da->structure_only) MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE);
674:   MatGetType(A, &Atype);
675:   /*
676:      We do not provide a getmatrix function in the DMDA operations because
677:    the basic DMDA does not know about matrices. We think of DMDA as being more
678:    more low-level than matrices. This is kind of cheating but, cause sometimes
679:    we think of DMDA has higher level than matrices.

681:      We could switch based on Atype (or mtype), but we do not since the
682:    specialized setting routines depend only on the particular preallocation
683:    details of the matrix, not the type itself.
684:   */
685:   PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij);
686:   if (!aij) PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij);
687:   if (!aij) {
688:     PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij);
689:     if (!baij) PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij);
690:     if (!baij) {
691:       PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij);
692:       if (!sbaij) PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij);
693:       if (!sbaij) {
694:         PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell);
695:         if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell);
696:       }
697:       if (!sell) PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is);
698:     }
699:   }
700:   if (aij) {
701:     if (dim == 1) {
702:       if (dd->ofill) {
703:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A);
704:       } else {
705:         DMBoundaryType bx;
706:         PetscMPIInt    size;
707:         DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL);
708:         MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);
709:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
710:           DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A);
711:         } else {
712:           DMCreateMatrix_DA_1d_MPIAIJ(da, A);
713:         }
714:       }
715:     } else if (dim == 2) {
716:       if (dd->ofill) {
717:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A);
718:       } else {
719:         DMCreateMatrix_DA_2d_MPIAIJ(da, A);
720:       }
721:     } else if (dim == 3) {
722:       if (dd->ofill) {
723:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A);
724:       } else {
725:         DMCreateMatrix_DA_3d_MPIAIJ(da, A);
726:       }
727:     }
728:   } else if (baij) {
729:     if (dim == 2) {
730:       DMCreateMatrix_DA_2d_MPIBAIJ(da, A);
731:     } else if (dim == 3) {
732:       DMCreateMatrix_DA_3d_MPIBAIJ(da, A);
733:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
734:   } else if (sbaij) {
735:     if (dim == 2) {
736:       DMCreateMatrix_DA_2d_MPISBAIJ(da, A);
737:     } else if (dim == 3) {
738:       DMCreateMatrix_DA_3d_MPISBAIJ(da, A);
739:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
740:   } else if (sell) {
741:     if (dim == 2) {
742:       DMCreateMatrix_DA_2d_MPISELL(da, A);
743:     } else if (dim == 3) {
744:       DMCreateMatrix_DA_3d_MPISELL(da, A);
745:     } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
746:   } else if (is) {
747:     DMCreateMatrix_DA_IS(da, A);
748:   } else {
749:     ISLocalToGlobalMapping ltog;

751:     MatSetBlockSize(A, dof);
752:     MatSetUp(A);
753:     DMGetLocalToGlobalMapping(da, &ltog);
754:     MatSetLocalToGlobalMapping(A, ltog, ltog);
755:   }
756:   DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]);
757:   MatSetStencil(A, dim, dims, starts, dof);
758:   MatSetDM(A, da);
759:   MPI_Comm_size(comm, &size);
760:   if (size > 1) {
761:     /* change viewer to display matrix in natural ordering */
762:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
763:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
764:   }
765:   *J = A;
766:   return 0;
767: }

769: /* ---------------------------------------------------------------------------------*/
770: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);

772: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
773: {
774:   DM_DA                 *da = (DM_DA *)dm->data;
775:   Mat                    lJ, P;
776:   ISLocalToGlobalMapping ltog;
777:   IS                     is;
778:   PetscBT                bt;
779:   const PetscInt        *e_loc, *idx;
780:   PetscInt               i, nel, nen, nv, dof, *gidx, n, N;

782:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
783:      We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
784:   dof = da->w;
785:   MatSetBlockSize(J, dof);
786:   DMGetLocalToGlobalMapping(dm, &ltog);

788:   /* flag local elements indices in local DMDA numbering */
789:   ISLocalToGlobalMappingGetSize(ltog, &nv);
790:   PetscBTCreate(nv / dof, &bt);
791:   DMDAGetElements(dm, &nel, &nen, &e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
792:   for (i = 0; i < nel * nen; i++) PetscBTSet(bt, e_loc[i]);

794:   /* filter out (set to -1) the global indices not used by the local elements */
795:   PetscMalloc1(nv / dof, &gidx);
796:   ISLocalToGlobalMappingGetBlockIndices(ltog, &idx);
797:   PetscArraycpy(gidx, idx, nv / dof);
798:   ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx);
799:   for (i = 0; i < nv / dof; i++)
800:     if (!PetscBTLookup(bt, i)) gidx[i] = -1;
801:   PetscBTDestroy(&bt);
802:   ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is);
803:   ISLocalToGlobalMappingCreateIS(is, &ltog);
804:   MatSetLocalToGlobalMapping(J, ltog, ltog);
805:   ISLocalToGlobalMappingDestroy(&ltog);
806:   ISDestroy(&is);

808:   /* Preallocation */
809:   if (dm->prealloc_skip) {
810:     MatSetUp(J);
811:   } else {
812:     MatISGetLocalMat(J, &lJ);
813:     MatGetLocalToGlobalMapping(lJ, &ltog, NULL);
814:     MatCreate(PetscObjectComm((PetscObject)lJ), &P);
815:     MatSetType(P, MATPREALLOCATOR);
816:     MatSetLocalToGlobalMapping(P, ltog, ltog);
817:     MatGetSize(lJ, &N, NULL);
818:     MatGetLocalSize(lJ, &n, NULL);
819:     MatSetSizes(P, n, n, N, N);
820:     MatSetBlockSize(P, dof);
821:     MatSetUp(P);
822:     for (i = 0; i < nel; i++) MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES);
823:     MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ);
824:     MatISRestoreLocalMat(J, &lJ);
825:     DMDARestoreElements(dm, &nel, &nen, &e_loc);
826:     MatDestroy(&P);

828:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
829:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
830:   }
831:   return 0;
832: }

834: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
835: {
836:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
837:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
838:   MPI_Comm               comm;
839:   PetscScalar           *values;
840:   DMBoundaryType         bx, by;
841:   ISLocalToGlobalMapping ltog;
842:   DMDAStencilType        st;

844:   /*
845:          nc - number of components per grid point
846:          col - number of colors needed in one direction for single component problem

848:   */
849:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
850:   col = 2 * s + 1;
851:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
852:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
853:   PetscObjectGetComm((PetscObject)da, &comm);

855:   PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
856:   DMGetLocalToGlobalMapping(da, &ltog);

858:   MatSetBlockSize(J, nc);
859:   /* determine the matrix preallocation information */
860:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
861:   for (i = xs; i < xs + nx; i++) {
862:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
863:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

865:     for (j = ys; j < ys + ny; j++) {
866:       slot = i - gxs + gnx * (j - gys);

868:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
869:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

871:       cnt = 0;
872:       for (k = 0; k < nc; k++) {
873:         for (l = lstart; l < lend + 1; l++) {
874:           for (p = pstart; p < pend + 1; p++) {
875:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
876:               cols[cnt++] = k + nc * (slot + gnx * l + p);
877:             }
878:           }
879:         }
880:         rows[k] = k + nc * (slot);
881:       }
882:       MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
883:     }
884:   }
885:   MatSetBlockSize(J, nc);
886:   MatSeqSELLSetPreallocation(J, 0, dnz);
887:   MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
888:   MatPreallocateEnd(dnz, onz);

890:   MatSetLocalToGlobalMapping(J, ltog, ltog);

892:   /*
893:     For each node in the grid: we get the neighbors in the local (on processor ordering
894:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
895:     PETSc ordering.
896:   */
897:   if (!da->prealloc_only) {
898:     PetscCalloc1(col * col * nc * nc, &values);
899:     for (i = xs; i < xs + nx; i++) {
900:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
901:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

903:       for (j = ys; j < ys + ny; j++) {
904:         slot = i - gxs + gnx * (j - gys);

906:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
907:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

909:         cnt = 0;
910:         for (k = 0; k < nc; k++) {
911:           for (l = lstart; l < lend + 1; l++) {
912:             for (p = pstart; p < pend + 1; p++) {
913:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
914:                 cols[cnt++] = k + nc * (slot + gnx * l + p);
915:               }
916:             }
917:           }
918:           rows[k] = k + nc * (slot);
919:         }
920:         MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
921:       }
922:     }
923:     PetscFree(values);
924:     /* do not copy values to GPU since they are all zero and not yet needed there */
925:     MatBindToCPU(J, PETSC_TRUE);
926:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
927:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
928:     MatBindToCPU(J, PETSC_FALSE);
929:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
930:   }
931:   PetscFree2(rows, cols);
932:   return 0;
933: }

935: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
936: {
937:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
938:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
939:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
940:   MPI_Comm               comm;
941:   PetscScalar           *values;
942:   DMBoundaryType         bx, by, bz;
943:   ISLocalToGlobalMapping ltog;
944:   DMDAStencilType        st;

946:   /*
947:          nc - number of components per grid point
948:          col - number of colors needed in one direction for single component problem

950:   */
951:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
952:   col = 2 * s + 1;
953:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
954:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
955:   PetscObjectGetComm((PetscObject)da, &comm);

957:   PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
958:   DMGetLocalToGlobalMapping(da, &ltog);

960:   MatSetBlockSize(J, nc);
961:   /* determine the matrix preallocation information */
962:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
963:   for (i = xs; i < xs + nx; i++) {
964:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
965:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
966:     for (j = ys; j < ys + ny; j++) {
967:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
968:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
969:       for (k = zs; k < zs + nz; k++) {
970:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
971:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

973:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

975:         cnt = 0;
976:         for (l = 0; l < nc; l++) {
977:           for (ii = istart; ii < iend + 1; ii++) {
978:             for (jj = jstart; jj < jend + 1; jj++) {
979:               for (kk = kstart; kk < kend + 1; kk++) {
980:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
981:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
982:                 }
983:               }
984:             }
985:           }
986:           rows[l] = l + nc * (slot);
987:         }
988:         MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
989:       }
990:     }
991:   }
992:   MatSetBlockSize(J, nc);
993:   MatSeqSELLSetPreallocation(J, 0, dnz);
994:   MatMPISELLSetPreallocation(J, 0, dnz, 0, onz);
995:   MatPreallocateEnd(dnz, onz);
996:   MatSetLocalToGlobalMapping(J, ltog, ltog);

998:   /*
999:     For each node in the grid: we get the neighbors in the local (on processor ordering
1000:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1001:     PETSc ordering.
1002:   */
1003:   if (!da->prealloc_only) {
1004:     PetscCalloc1(col * col * col * nc * nc * nc, &values);
1005:     for (i = xs; i < xs + nx; i++) {
1006:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1007:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1008:       for (j = ys; j < ys + ny; j++) {
1009:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1010:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1011:         for (k = zs; k < zs + nz; k++) {
1012:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1013:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1015:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1017:           cnt = 0;
1018:           for (l = 0; l < nc; l++) {
1019:             for (ii = istart; ii < iend + 1; ii++) {
1020:               for (jj = jstart; jj < jend + 1; jj++) {
1021:                 for (kk = kstart; kk < kend + 1; kk++) {
1022:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1023:                     cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1024:                   }
1025:                 }
1026:               }
1027:             }
1028:             rows[l] = l + nc * (slot);
1029:           }
1030:           MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES);
1031:         }
1032:       }
1033:     }
1034:     PetscFree(values);
1035:     /* do not copy values to GPU since they are all zero and not yet needed there */
1036:     MatBindToCPU(J, PETSC_TRUE);
1037:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1038:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1039:     MatBindToCPU(J, PETSC_FALSE);
1040:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1041:   }
1042:   PetscFree2(rows, cols);
1043:   return 0;
1044: }

1046: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1047: {
1048:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1049:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1050:   MPI_Comm               comm;
1051:   DMBoundaryType         bx, by;
1052:   ISLocalToGlobalMapping ltog, mltog;
1053:   DMDAStencilType        st;
1054:   PetscBool              removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;

1056:   /*
1057:          nc - number of components per grid point
1058:          col - number of colors needed in one direction for single component problem

1060:   */
1061:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1062:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1063:   col = 2 * s + 1;
1064:   /*
1065:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1066:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1067:   */
1068:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1069:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1070:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1071:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1072:   PetscObjectGetComm((PetscObject)da, &comm);

1074:   PetscMalloc2(nc, &rows, col * col * nc * nc, &cols);
1075:   DMGetLocalToGlobalMapping(da, &ltog);

1077:   MatSetBlockSize(J, nc);
1078:   /* determine the matrix preallocation information */
1079:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1080:   for (i = xs; i < xs + nx; i++) {
1081:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1082:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1084:     for (j = ys; j < ys + ny; j++) {
1085:       slot = i - gxs + gnx * (j - gys);

1087:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1088:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1090:       cnt = 0;
1091:       for (k = 0; k < nc; k++) {
1092:         for (l = lstart; l < lend + 1; l++) {
1093:           for (p = pstart; p < pend + 1; p++) {
1094:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1095:               cols[cnt++] = k + nc * (slot + gnx * l + p);
1096:             }
1097:           }
1098:         }
1099:         rows[k] = k + nc * (slot);
1100:       }
1101:       if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1102:       else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1103:     }
1104:   }
1105:   MatSetBlockSize(J, nc);
1106:   MatSeqAIJSetPreallocation(J, 0, dnz);
1107:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1108:   MatPreallocateEnd(dnz, onz);
1109:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1110:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1112:   /*
1113:     For each node in the grid: we get the neighbors in the local (on processor ordering
1114:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1115:     PETSc ordering.
1116:   */
1117:   if (!da->prealloc_only) {
1118:     for (i = xs; i < xs + nx; i++) {
1119:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1120:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1122:       for (j = ys; j < ys + ny; j++) {
1123:         slot = i - gxs + gnx * (j - gys);

1125:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1126:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1128:         cnt = 0;
1129:         for (l = lstart; l < lend + 1; l++) {
1130:           for (p = pstart; p < pend + 1; p++) {
1131:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1132:               cols[cnt++] = nc * (slot + gnx * l + p);
1133:               for (k = 1; k < nc; k++) {
1134:                 cols[cnt] = 1 + cols[cnt - 1];
1135:                 cnt++;
1136:               }
1137:             }
1138:           }
1139:         }
1140:         for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1141:         MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1142:       }
1143:     }
1144:     /* do not copy values to GPU since they are all zero and not yet needed there */
1145:     MatBoundToCPU(J, &alreadyboundtocpu);
1146:     MatBindToCPU(J, PETSC_TRUE);
1147:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1148:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1149:     if (!alreadyboundtocpu) MatBindToCPU(J, PETSC_FALSE);
1150:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1151:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1152:   }
1153:   PetscFree2(rows, cols);
1154:   return 0;
1155: }

1157: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1158: {
1159:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1160:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1161:   PetscInt               lstart, lend, pstart, pend, *dnz, *onz;
1162:   DM_DA                 *dd = (DM_DA *)da->data;
1163:   PetscInt               ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1164:   MPI_Comm               comm;
1165:   DMBoundaryType         bx, by;
1166:   ISLocalToGlobalMapping ltog;
1167:   DMDAStencilType        st;
1168:   PetscBool              removedups = PETSC_FALSE;

1170:   /*
1171:          nc - number of components per grid point
1172:          col - number of colors needed in one direction for single component problem

1174:   */
1175:   DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st);
1176:   col = 2 * s + 1;
1177:   /*
1178:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1179:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1180:   */
1181:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1182:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1183:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1184:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1185:   PetscObjectGetComm((PetscObject)da, &comm);

1187:   PetscMalloc1(col * col * nc, &cols);
1188:   DMGetLocalToGlobalMapping(da, &ltog);

1190:   MatSetBlockSize(J, nc);
1191:   /* determine the matrix preallocation information */
1192:   MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1193:   for (i = xs; i < xs + nx; i++) {
1194:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1195:     pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1197:     for (j = ys; j < ys + ny; j++) {
1198:       slot = i - gxs + gnx * (j - gys);

1200:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1201:       lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1203:       for (k = 0; k < nc; k++) {
1204:         cnt = 0;
1205:         for (l = lstart; l < lend + 1; l++) {
1206:           for (p = pstart; p < pend + 1; p++) {
1207:             if (l || p) {
1208:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1209:                 for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1210:               }
1211:             } else {
1212:               if (dfill) {
1213:                 for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1214:               } else {
1215:                 for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1216:               }
1217:             }
1218:           }
1219:         }
1220:         row    = k + nc * (slot);
1221:         maxcnt = PetscMax(maxcnt, cnt);
1222:         if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1223:         else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
1224:       }
1225:     }
1226:   }
1227:   MatSeqAIJSetPreallocation(J, 0, dnz);
1228:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1229:   MatPreallocateEnd(dnz, onz);
1230:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1232:   /*
1233:     For each node in the grid: we get the neighbors in the local (on processor ordering
1234:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1235:     PETSc ordering.
1236:   */
1237:   if (!da->prealloc_only) {
1238:     for (i = xs; i < xs + nx; i++) {
1239:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1240:       pend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));

1242:       for (j = ys; j < ys + ny; j++) {
1243:         slot = i - gxs + gnx * (j - gys);

1245:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1246:         lend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));

1248:         for (k = 0; k < nc; k++) {
1249:           cnt = 0;
1250:           for (l = lstart; l < lend + 1; l++) {
1251:             for (p = pstart; p < pend + 1; p++) {
1252:               if (l || p) {
1253:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1254:                   for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1255:                 }
1256:               } else {
1257:                 if (dfill) {
1258:                   for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1259:                 } else {
1260:                   for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1261:                 }
1262:               }
1263:             }
1264:           }
1265:           row = k + nc * (slot);
1266:           MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1267:         }
1268:       }
1269:     }
1270:     /* do not copy values to GPU since they are all zero and not yet needed there */
1271:     MatBindToCPU(J, PETSC_TRUE);
1272:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1273:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1274:     MatBindToCPU(J, PETSC_FALSE);
1275:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1276:   }
1277:   PetscFree(cols);
1278:   return 0;
1279: }

1281: /* ---------------------------------------------------------------------------------*/

1283: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1284: {
1285:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1286:   PetscInt               m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1287:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1288:   MPI_Comm               comm;
1289:   DMBoundaryType         bx, by, bz;
1290:   ISLocalToGlobalMapping ltog, mltog;
1291:   DMDAStencilType        st;
1292:   PetscBool              removedups = PETSC_FALSE;

1294:   /*
1295:          nc - number of components per grid point
1296:          col - number of colors needed in one direction for single component problem

1298:   */
1299:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
1300:   if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1301:   col = 2 * s + 1;

1303:   /*
1304:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1305:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1306:   */
1307:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1308:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1309:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

1311:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1312:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1313:   PetscObjectGetComm((PetscObject)da, &comm);

1315:   PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols);
1316:   DMGetLocalToGlobalMapping(da, &ltog);

1318:   MatSetBlockSize(J, nc);
1319:   /* determine the matrix preallocation information */
1320:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1321:   for (i = xs; i < xs + nx; i++) {
1322:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1323:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1324:     for (j = ys; j < ys + ny; j++) {
1325:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1326:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1327:       for (k = zs; k < zs + nz; k++) {
1328:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1329:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1331:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1333:         cnt = 0;
1334:         for (l = 0; l < nc; l++) {
1335:           for (ii = istart; ii < iend + 1; ii++) {
1336:             for (jj = jstart; jj < jend + 1; jj++) {
1337:               for (kk = kstart; kk < kend + 1; kk++) {
1338:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1339:                   cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1340:                 }
1341:               }
1342:             }
1343:           }
1344:           rows[l] = l + nc * (slot);
1345:         }
1346:         if (removedups) MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1347:         else MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz);
1348:       }
1349:     }
1350:   }
1351:   MatSetBlockSize(J, nc);
1352:   MatSeqAIJSetPreallocation(J, 0, dnz);
1353:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
1354:   MatPreallocateEnd(dnz, onz);
1355:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1356:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1358:   /*
1359:     For each node in the grid: we get the neighbors in the local (on processor ordering
1360:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1361:     PETSc ordering.
1362:   */
1363:   if (!da->prealloc_only) {
1364:     for (i = xs; i < xs + nx; i++) {
1365:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1366:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1367:       for (j = ys; j < ys + ny; j++) {
1368:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1369:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1370:         for (k = zs; k < zs + nz; k++) {
1371:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1372:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1374:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1376:           cnt = 0;
1377:           for (kk = kstart; kk < kend + 1; kk++) {
1378:             for (jj = jstart; jj < jend + 1; jj++) {
1379:               for (ii = istart; ii < iend + 1; ii++) {
1380:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1381:                   cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1382:                   for (l = 1; l < nc; l++) {
1383:                     cols[cnt] = 1 + cols[cnt - 1];
1384:                     cnt++;
1385:                   }
1386:                 }
1387:               }
1388:             }
1389:           }
1390:           rows[0] = nc * (slot);
1391:           for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1392:           MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1393:         }
1394:       }
1395:     }
1396:     /* do not copy values to GPU since they are all zero and not yet needed there */
1397:     MatBindToCPU(J, PETSC_TRUE);
1398:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1399:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1400:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1401:     MatBindToCPU(J, PETSC_FALSE);
1402:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1403:   }
1404:   PetscFree2(rows, cols);
1405:   return 0;
1406: }

1408: /* ---------------------------------------------------------------------------------*/

1410: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1411: {
1412:   DM_DA                 *dd = (DM_DA *)da->data;
1413:   PetscInt               xs, nx, i, j, gxs, gnx, row, k, l;
1414:   PetscInt               m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1415:   PetscInt              *ofill = dd->ofill, *dfill = dd->dfill;
1416:   DMBoundaryType         bx;
1417:   ISLocalToGlobalMapping ltog;
1418:   PetscMPIInt            rank, size;

1420:   MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank);
1421:   MPI_Comm_size(PetscObjectComm((PetscObject)da), &size);

1423:   /*
1424:          nc - number of components per grid point

1426:   */
1427:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1429:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1430:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1432:   MatSetBlockSize(J, nc);
1433:   PetscCalloc2(nx * nc, &cols, nx * nc, &ocols);

1435:   /*
1436:         note should be smaller for first and last process with no periodic
1437:         does not handle dfill
1438:   */
1439:   cnt = 0;
1440:   /* coupling with process to the left */
1441:   for (i = 0; i < s; i++) {
1442:     for (j = 0; j < nc; j++) {
1443:       ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1444:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1445:       if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1446:         if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1447:         else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1448:       }
1449:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1450:       cnt++;
1451:     }
1452:   }
1453:   for (i = s; i < nx - s; i++) {
1454:     for (j = 0; j < nc; j++) {
1455:       cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1456:       maxcnt    = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1457:       cnt++;
1458:     }
1459:   }
1460:   /* coupling with process to the right */
1461:   for (i = nx - s; i < nx; i++) {
1462:     for (j = 0; j < nc; j++) {
1463:       ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1464:       cols[cnt]  = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1465:       if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1466:         if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1467:         else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1468:       }
1469:       maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1470:       cnt++;
1471:     }
1472:   }

1474:   MatSeqAIJSetPreallocation(J, 0, cols);
1475:   MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols);
1476:   PetscFree2(cols, ocols);

1478:   DMGetLocalToGlobalMapping(da, &ltog);
1479:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1481:   /*
1482:     For each node in the grid: we get the neighbors in the local (on processor ordering
1483:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1484:     PETSc ordering.
1485:   */
1486:   if (!da->prealloc_only) {
1487:     PetscMalloc1(maxcnt, &cols);
1488:     row = xs * nc;
1489:     /* coupling with process to the left */
1490:     for (i = xs; i < xs + s; i++) {
1491:       for (j = 0; j < nc; j++) {
1492:         cnt = 0;
1493:         if (rank) {
1494:           for (l = 0; l < s; l++) {
1495:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1496:           }
1497:         }
1498:         if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1499:           for (l = 0; l < s; l++) {
1500:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1501:           }
1502:         }
1503:         if (dfill) {
1504:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1505:         } else {
1506:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1507:         }
1508:         for (l = 0; l < s; l++) {
1509:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1510:         }
1511:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1512:         row++;
1513:       }
1514:     }
1515:     for (i = xs + s; i < xs + nx - s; i++) {
1516:       for (j = 0; j < nc; j++) {
1517:         cnt = 0;
1518:         for (l = 0; l < s; l++) {
1519:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1520:         }
1521:         if (dfill) {
1522:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1523:         } else {
1524:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1525:         }
1526:         for (l = 0; l < s; l++) {
1527:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1528:         }
1529:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1530:         row++;
1531:       }
1532:     }
1533:     /* coupling with process to the right */
1534:     for (i = xs + nx - s; i < xs + nx; i++) {
1535:       for (j = 0; j < nc; j++) {
1536:         cnt = 0;
1537:         for (l = 0; l < s; l++) {
1538:           for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1539:         }
1540:         if (dfill) {
1541:           for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1542:         } else {
1543:           for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1544:         }
1545:         if (rank < size - 1) {
1546:           for (l = 0; l < s; l++) {
1547:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1548:           }
1549:         }
1550:         if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1551:           for (l = 0; l < s; l++) {
1552:             for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1553:           }
1554:         }
1555:         MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES);
1556:         row++;
1557:       }
1558:     }
1559:     PetscFree(cols);
1560:     /* do not copy values to GPU since they are all zero and not yet needed there */
1561:     MatBindToCPU(J, PETSC_TRUE);
1562:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1563:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1564:     MatBindToCPU(J, PETSC_FALSE);
1565:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1566:   }
1567:   return 0;
1568: }

1570: /* ---------------------------------------------------------------------------------*/

1572: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1573: {
1574:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1575:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1576:   PetscInt               istart, iend;
1577:   DMBoundaryType         bx;
1578:   ISLocalToGlobalMapping ltog, mltog;

1580:   /*
1581:          nc - number of components per grid point
1582:          col - number of colors needed in one direction for single component problem

1584:   */
1585:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1586:   if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE);
1587:   col = 2 * s + 1;

1589:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1590:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1592:   MatSetBlockSize(J, nc);
1593:   MatSeqAIJSetPreallocation(J, col * nc, NULL);
1594:   MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL);

1596:   DMGetLocalToGlobalMapping(da, &ltog);
1597:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1598:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1600:   /*
1601:     For each node in the grid: we get the neighbors in the local (on processor ordering
1602:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1603:     PETSc ordering.
1604:   */
1605:   if (!da->prealloc_only) {
1606:     PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1607:     for (i = xs; i < xs + nx; i++) {
1608:       istart = PetscMax(-s, gxs - i);
1609:       iend   = PetscMin(s, gxs + gnx - i - 1);
1610:       slot   = i - gxs;

1612:       cnt = 0;
1613:       for (i1 = istart; i1 < iend + 1; i1++) {
1614:         cols[cnt++] = nc * (slot + i1);
1615:         for (l = 1; l < nc; l++) {
1616:           cols[cnt] = 1 + cols[cnt - 1];
1617:           cnt++;
1618:         }
1619:       }
1620:       rows[0] = nc * (slot);
1621:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1622:       MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1623:     }
1624:     /* do not copy values to GPU since they are all zero and not yet needed there */
1625:     MatBindToCPU(J, PETSC_TRUE);
1626:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1627:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1628:     if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1629:     MatBindToCPU(J, PETSC_FALSE);
1630:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1631:     PetscFree2(rows, cols);
1632:   }
1633:   return 0;
1634: }

1636: /* ---------------------------------------------------------------------------------*/

1638: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1639: {
1640:   PetscInt               xs, nx, i, i1, slot, gxs, gnx;
1641:   PetscInt               m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1642:   PetscInt               istart, iend;
1643:   DMBoundaryType         bx;
1644:   ISLocalToGlobalMapping ltog, mltog;

1646:   /*
1647:          nc - number of components per grid point
1648:          col - number of colors needed in one direction for single component problem
1649:   */
1650:   DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL);
1651:   col = 2 * s + 1;

1653:   DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL);
1654:   DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL);

1656:   MatSetBlockSize(J, nc);
1657:   MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc);

1659:   DMGetLocalToGlobalMapping(da, &ltog);
1660:   MatGetLocalToGlobalMapping(J, &mltog, NULL);
1661:   if (!mltog) MatSetLocalToGlobalMapping(J, ltog, ltog);

1663:   /*
1664:     For each node in the grid: we get the neighbors in the local (on processor ordering
1665:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1666:     PETSc ordering.
1667:   */
1668:   if (!da->prealloc_only) {
1669:     PetscMalloc2(nc, &rows, col * nc * nc, &cols);
1670:     for (i = xs; i < xs + nx; i++) {
1671:       istart = PetscMax(-s, gxs - i);
1672:       iend   = PetscMin(s, gxs + gnx - i - 1);
1673:       slot   = i - gxs;

1675:       cnt = 0;
1676:       for (i1 = istart; i1 < iend + 1; i1++) {
1677:         cols[cnt++] = nc * (slot + i1);
1678:         for (l = 1; l < nc; l++) {
1679:           cols[cnt] = 1 + cols[cnt - 1];
1680:           cnt++;
1681:         }
1682:       }
1683:       rows[0] = nc * (slot);
1684:       for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1685:       MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES);
1686:     }
1687:     /* do not copy values to GPU since they are all zero and not yet needed there */
1688:     MatBindToCPU(J, PETSC_TRUE);
1689:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1690:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1691:     if (bx == DM_BOUNDARY_NONE) MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1692:     MatBindToCPU(J, PETSC_FALSE);
1693:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1694:     PetscFree2(rows, cols);
1695:   }
1696:   MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE);
1697:   return 0;
1698: }

1700: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1701: {
1702:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1703:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1704:   PetscInt               istart, iend, jstart, jend, ii, jj;
1705:   MPI_Comm               comm;
1706:   PetscScalar           *values;
1707:   DMBoundaryType         bx, by;
1708:   DMDAStencilType        st;
1709:   ISLocalToGlobalMapping ltog;

1711:   /*
1712:      nc - number of components per grid point
1713:      col - number of colors needed in one direction for single component problem
1714:   */
1715:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1716:   col = 2 * s + 1;

1718:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1719:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1720:   PetscObjectGetComm((PetscObject)da, &comm);

1722:   PetscMalloc1(col * col * nc * nc, &cols);

1724:   DMGetLocalToGlobalMapping(da, &ltog);

1726:   /* determine the matrix preallocation information */
1727:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1728:   for (i = xs; i < xs + nx; i++) {
1729:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1730:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1731:     for (j = ys; j < ys + ny; j++) {
1732:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1733:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1734:       slot   = i - gxs + gnx * (j - gys);

1736:       /* Find block columns in block row */
1737:       cnt = 0;
1738:       for (ii = istart; ii < iend + 1; ii++) {
1739:         for (jj = jstart; jj < jend + 1; jj++) {
1740:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1741:             cols[cnt++] = slot + ii + gnx * jj;
1742:           }
1743:         }
1744:       }
1745:       MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1746:     }
1747:   }
1748:   MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1749:   MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1750:   MatPreallocateEnd(dnz, onz);

1752:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1754:   /*
1755:     For each node in the grid: we get the neighbors in the local (on processor ordering
1756:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1757:     PETSc ordering.
1758:   */
1759:   if (!da->prealloc_only) {
1760:     PetscCalloc1(col * col * nc * nc, &values);
1761:     for (i = xs; i < xs + nx; i++) {
1762:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1763:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1764:       for (j = ys; j < ys + ny; j++) {
1765:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1766:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1767:         slot   = i - gxs + gnx * (j - gys);
1768:         cnt    = 0;
1769:         for (ii = istart; ii < iend + 1; ii++) {
1770:           for (jj = jstart; jj < jend + 1; jj++) {
1771:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1772:               cols[cnt++] = slot + ii + gnx * jj;
1773:             }
1774:           }
1775:         }
1776:         MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1777:       }
1778:     }
1779:     PetscFree(values);
1780:     /* do not copy values to GPU since they are all zero and not yet needed there */
1781:     MatBindToCPU(J, PETSC_TRUE);
1782:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1783:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1784:     MatBindToCPU(J, PETSC_FALSE);
1785:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1786:   }
1787:   PetscFree(cols);
1788:   return 0;
1789: }

1791: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1792: {
1793:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1794:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1795:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1796:   MPI_Comm               comm;
1797:   PetscScalar           *values;
1798:   DMBoundaryType         bx, by, bz;
1799:   DMDAStencilType        st;
1800:   ISLocalToGlobalMapping ltog;

1802:   /*
1803:          nc - number of components per grid point
1804:          col - number of colors needed in one direction for single component problem

1806:   */
1807:   DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
1808:   col = 2 * s + 1;

1810:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
1811:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
1812:   PetscObjectGetComm((PetscObject)da, &comm);

1814:   PetscMalloc1(col * col * col, &cols);

1816:   DMGetLocalToGlobalMapping(da, &ltog);

1818:   /* determine the matrix preallocation information */
1819:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1820:   for (i = xs; i < xs + nx; i++) {
1821:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1822:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1823:     for (j = ys; j < ys + ny; j++) {
1824:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1825:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1826:       for (k = zs; k < zs + nz; k++) {
1827:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1828:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1830:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1832:         /* Find block columns in block row */
1833:         cnt = 0;
1834:         for (ii = istart; ii < iend + 1; ii++) {
1835:           for (jj = jstart; jj < jend + 1; jj++) {
1836:             for (kk = kstart; kk < kend + 1; kk++) {
1837:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1838:                 cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1839:               }
1840:             }
1841:           }
1842:         }
1843:         MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz);
1844:       }
1845:     }
1846:   }
1847:   MatSeqBAIJSetPreallocation(J, nc, 0, dnz);
1848:   MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1849:   MatPreallocateEnd(dnz, onz);

1851:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1853:   /*
1854:     For each node in the grid: we get the neighbors in the local (on processor ordering
1855:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1856:     PETSc ordering.
1857:   */
1858:   if (!da->prealloc_only) {
1859:     PetscCalloc1(col * col * col * nc * nc, &values);
1860:     for (i = xs; i < xs + nx; i++) {
1861:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1862:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1863:       for (j = ys; j < ys + ny; j++) {
1864:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1865:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1866:         for (k = zs; k < zs + nz; k++) {
1867:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1868:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

1870:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

1872:           cnt = 0;
1873:           for (ii = istart; ii < iend + 1; ii++) {
1874:             for (jj = jstart; jj < jend + 1; jj++) {
1875:               for (kk = kstart; kk < kend + 1; kk++) {
1876:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1877:                   cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1878:                 }
1879:               }
1880:             }
1881:           }
1882:           MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1883:         }
1884:       }
1885:     }
1886:     PetscFree(values);
1887:     /* do not copy values to GPU since they are all zero and not yet needed there */
1888:     MatBindToCPU(J, PETSC_TRUE);
1889:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1890:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1891:     MatBindToCPU(J, PETSC_FALSE);
1892:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
1893:   }
1894:   PetscFree(cols);
1895:   return 0;
1896: }

1898: /*
1899:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1900:   identify in the local ordering with periodic domain.
1901: */
1902: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1903: {
1904:   PetscInt i, n;

1906:   ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row);
1907:   ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col);
1908:   for (i = 0, n = 0; i < *cnt; i++) {
1909:     if (col[i] >= *row) col[n++] = col[i];
1910:   }
1911:   *cnt = n;
1912:   return 0;
1913: }

1915: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1916: {
1917:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1918:   PetscInt               m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1919:   PetscInt               istart, iend, jstart, jend, ii, jj;
1920:   MPI_Comm               comm;
1921:   PetscScalar           *values;
1922:   DMBoundaryType         bx, by;
1923:   DMDAStencilType        st;
1924:   ISLocalToGlobalMapping ltog;

1926:   /*
1927:      nc - number of components per grid point
1928:      col - number of colors needed in one direction for single component problem
1929:   */
1930:   DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st);
1931:   col = 2 * s + 1;

1933:   DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL);
1934:   DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL);
1935:   PetscObjectGetComm((PetscObject)da, &comm);

1937:   PetscMalloc1(col * col * nc * nc, &cols);

1939:   DMGetLocalToGlobalMapping(da, &ltog);

1941:   /* determine the matrix preallocation information */
1942:   MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1943:   for (i = xs; i < xs + nx; i++) {
1944:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1945:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1946:     for (j = ys; j < ys + ny; j++) {
1947:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1948:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1949:       slot   = i - gxs + gnx * (j - gys);

1951:       /* Find block columns in block row */
1952:       cnt = 0;
1953:       for (ii = istart; ii < iend + 1; ii++) {
1954:         for (jj = jstart; jj < jend + 1; jj++) {
1955:           if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1956:         }
1957:       }
1958:       L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1959:       MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
1960:     }
1961:   }
1962:   MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
1963:   MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
1964:   MatPreallocateEnd(dnz, onz);

1966:   MatSetLocalToGlobalMapping(J, ltog, ltog);

1968:   /*
1969:     For each node in the grid: we get the neighbors in the local (on processor ordering
1970:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1971:     PETSc ordering.
1972:   */
1973:   if (!da->prealloc_only) {
1974:     PetscCalloc1(col * col * nc * nc, &values);
1975:     for (i = xs; i < xs + nx; i++) {
1976:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1977:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1978:       for (j = ys; j < ys + ny; j++) {
1979:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1980:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1981:         slot   = i - gxs + gnx * (j - gys);

1983:         /* Find block columns in block row */
1984:         cnt = 0;
1985:         for (ii = istart; ii < iend + 1; ii++) {
1986:           for (jj = jstart; jj < jend + 1; jj++) {
1987:             if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1988:           }
1989:         }
1990:         L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
1991:         MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
1992:       }
1993:     }
1994:     PetscFree(values);
1995:     /* do not copy values to GPU since they are all zero and not yet needed there */
1996:     MatBindToCPU(J, PETSC_TRUE);
1997:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
1998:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
1999:     MatBindToCPU(J, PETSC_FALSE);
2000:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2001:   }
2002:   PetscFree(cols);
2003:   return 0;
2004: }

2006: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2007: {
2008:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2009:   PetscInt               m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2010:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2011:   MPI_Comm               comm;
2012:   PetscScalar           *values;
2013:   DMBoundaryType         bx, by, bz;
2014:   DMDAStencilType        st;
2015:   ISLocalToGlobalMapping ltog;

2017:   /*
2018:      nc - number of components per grid point
2019:      col - number of colors needed in one direction for single component problem
2020:   */
2021:   DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st);
2022:   col = 2 * s + 1;

2024:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2025:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2026:   PetscObjectGetComm((PetscObject)da, &comm);

2028:   /* create the matrix */
2029:   PetscMalloc1(col * col * col, &cols);

2031:   DMGetLocalToGlobalMapping(da, &ltog);

2033:   /* determine the matrix preallocation information */
2034:   MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2035:   for (i = xs; i < xs + nx; i++) {
2036:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2037:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2038:     for (j = ys; j < ys + ny; j++) {
2039:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2040:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2041:       for (k = zs; k < zs + nz; k++) {
2042:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2043:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2045:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2047:         /* Find block columns in block row */
2048:         cnt = 0;
2049:         for (ii = istart; ii < iend + 1; ii++) {
2050:           for (jj = jstart; jj < jend + 1; jj++) {
2051:             for (kk = kstart; kk < kend + 1; kk++) {
2052:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2053:             }
2054:           }
2055:         }
2056:         L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2057:         MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz);
2058:       }
2059:     }
2060:   }
2061:   MatSeqSBAIJSetPreallocation(J, nc, 0, dnz);
2062:   MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz);
2063:   MatPreallocateEnd(dnz, onz);

2065:   MatSetLocalToGlobalMapping(J, ltog, ltog);

2067:   /*
2068:     For each node in the grid: we get the neighbors in the local (on processor ordering
2069:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2070:     PETSc ordering.
2071:   */
2072:   if (!da->prealloc_only) {
2073:     PetscCalloc1(col * col * col * nc * nc, &values);
2074:     for (i = xs; i < xs + nx; i++) {
2075:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2076:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2077:       for (j = ys; j < ys + ny; j++) {
2078:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2079:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2080:         for (k = zs; k < zs + nz; k++) {
2081:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2082:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2084:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2086:           cnt = 0;
2087:           for (ii = istart; ii < iend + 1; ii++) {
2088:             for (jj = jstart; jj < jend + 1; jj++) {
2089:               for (kk = kstart; kk < kend + 1; kk++) {
2090:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2091:               }
2092:             }
2093:           }
2094:           L2GFilterUpperTriangular(ltog, &slot, &cnt, cols);
2095:           MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES);
2096:         }
2097:       }
2098:     }
2099:     PetscFree(values);
2100:     /* do not copy values to GPU since they are all zero and not yet needed there */
2101:     MatBindToCPU(J, PETSC_TRUE);
2102:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2103:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2104:     MatBindToCPU(J, PETSC_FALSE);
2105:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2106:   }
2107:   PetscFree(cols);
2108:   return 0;
2109: }

2111: /* ---------------------------------------------------------------------------------*/

2113: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2114: {
2115:   PetscInt               xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2116:   PetscInt               m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2117:   PetscInt               istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2118:   DM_DA                 *dd = (DM_DA *)da->data;
2119:   PetscInt               ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2120:   MPI_Comm               comm;
2121:   PetscScalar           *values;
2122:   DMBoundaryType         bx, by, bz;
2123:   ISLocalToGlobalMapping ltog;
2124:   DMDAStencilType        st;
2125:   PetscBool              removedups = PETSC_FALSE;

2127:   /*
2128:          nc - number of components per grid point
2129:          col - number of colors needed in one direction for single component problem

2131:   */
2132:   DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st);
2133:   col = 2 * s + 1;
2135:                  by 2*stencil_width + 1\n");
2137:                  by 2*stencil_width + 1\n");
2139:                  by 2*stencil_width + 1\n");

2141:   /*
2142:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2143:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2144:   */
2145:   if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2146:   if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2147:   if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;

2149:   DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz);
2150:   DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz);
2151:   PetscObjectGetComm((PetscObject)da, &comm);

2153:   PetscMalloc1(col * col * col * nc, &cols);
2154:   DMGetLocalToGlobalMapping(da, &ltog);

2156:   /* determine the matrix preallocation information */
2157:   MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);

2159:   MatSetBlockSize(J, nc);
2160:   for (i = xs; i < xs + nx; i++) {
2161:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2162:     iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2163:     for (j = ys; j < ys + ny; j++) {
2164:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2165:       jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2166:       for (k = zs; k < zs + nz; k++) {
2167:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2168:         kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2170:         slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2172:         for (l = 0; l < nc; l++) {
2173:           cnt = 0;
2174:           for (ii = istart; ii < iend + 1; ii++) {
2175:             for (jj = jstart; jj < jend + 1; jj++) {
2176:               for (kk = kstart; kk < kend + 1; kk++) {
2177:                 if (ii || jj || kk) {
2178:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2179:                     for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2180:                   }
2181:                 } else {
2182:                   if (dfill) {
2183:                     for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184:                   } else {
2185:                     for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2186:                   }
2187:                 }
2188:               }
2189:             }
2190:           }
2191:           row    = l + nc * (slot);
2192:           maxcnt = PetscMax(maxcnt, cnt);
2193:           if (removedups) MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2194:           else MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz);
2195:         }
2196:       }
2197:     }
2198:   }
2199:   MatSeqAIJSetPreallocation(J, 0, dnz);
2200:   MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz);
2201:   MatPreallocateEnd(dnz, onz);
2202:   MatSetLocalToGlobalMapping(J, ltog, ltog);

2204:   /*
2205:     For each node in the grid: we get the neighbors in the local (on processor ordering
2206:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2207:     PETSc ordering.
2208:   */
2209:   if (!da->prealloc_only) {
2210:     PetscCalloc1(maxcnt, &values);
2211:     for (i = xs; i < xs + nx; i++) {
2212:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2213:       iend   = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2214:       for (j = ys; j < ys + ny; j++) {
2215:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2216:         jend   = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2217:         for (k = zs; k < zs + nz; k++) {
2218:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2219:           kend   = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));

2221:           slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);

2223:           for (l = 0; l < nc; l++) {
2224:             cnt = 0;
2225:             for (ii = istart; ii < iend + 1; ii++) {
2226:               for (jj = jstart; jj < jend + 1; jj++) {
2227:                 for (kk = kstart; kk < kend + 1; kk++) {
2228:                   if (ii || jj || kk) {
2229:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2230:                       for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2231:                     }
2232:                   } else {
2233:                     if (dfill) {
2234:                       for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235:                     } else {
2236:                       for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2237:                     }
2238:                   }
2239:                 }
2240:               }
2241:             }
2242:             row = l + nc * (slot);
2243:             MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES);
2244:           }
2245:         }
2246:       }
2247:     }
2248:     PetscFree(values);
2249:     /* do not copy values to GPU since they are all zero and not yet needed there */
2250:     MatBindToCPU(J, PETSC_TRUE);
2251:     MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
2252:     MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
2253:     MatBindToCPU(J, PETSC_FALSE);
2254:     MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE);
2255:   }
2256:   PetscFree(cols);
2257:   return 0;
2258: }