Actual source code: fdda.c
2: #include <petsc/private/dmdaimpl.h>
3: #include <petscmat.h>
4: #include <petscbt.h>
6: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM, ISColoringType, ISColoring *);
7: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM, ISColoringType, ISColoring *);
8: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM, ISColoringType, ISColoring *);
9: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM, ISColoringType, ISColoring *);
11: /*
12: For ghost i that may be negative or greater than the upper bound this
13: maps it into the 0:m-1 range using periodicity
14: */
15: #define SetInRange(i, m) ((i < 0) ? m + i : ((i >= m) ? i - m : i))
17: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill, PetscInt w, PetscInt **rfill)
18: {
19: PetscInt i, j, nz, *fill;
21: PetscFunctionBegin;
22: if (!dfill) PetscFunctionReturn(PETSC_SUCCESS);
24: /* count number nonzeros */
25: nz = 0;
26: for (i = 0; i < w; i++) {
27: for (j = 0; j < w; j++) {
28: if (dfill[w * i + j]) nz++;
29: }
30: }
31: PetscCall(PetscMalloc1(nz + w + 1, &fill));
32: /* construct modified CSR storage of nonzero structure */
33: /* fill[0 -- w] marks starts of each row of column indices (and end of last row)
34: so fill[1] - fill[0] gives number of nonzeros in first row etc */
35: nz = w + 1;
36: for (i = 0; i < w; i++) {
37: fill[i] = nz;
38: for (j = 0; j < w; j++) {
39: if (dfill[w * i + j]) {
40: fill[nz] = j;
41: nz++;
42: }
43: }
44: }
45: fill[w] = nz;
47: *rfill = fill;
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse, PetscInt w, PetscInt **rfill)
52: {
53: PetscInt nz;
55: PetscFunctionBegin;
56: if (!dfillsparse) PetscFunctionReturn(PETSC_SUCCESS);
58: /* Determine number of non-zeros */
59: nz = (dfillsparse[w] - w - 1);
61: /* Allocate space for our copy of the given sparse matrix representation. */
62: PetscCall(PetscMalloc1(nz + w + 1, rfill));
63: PetscCall(PetscArraycpy(*rfill, dfillsparse, nz + w + 1));
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
68: {
69: PetscInt i, k, cnt = 1;
71: PetscFunctionBegin;
73: /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
74: columns to the left with any nonzeros in them plus 1 */
75: PetscCall(PetscCalloc1(dd->w, &dd->ofillcols));
76: for (i = 0; i < dd->w; i++) {
77: for (k = dd->ofill[i]; k < dd->ofill[i + 1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
78: }
79: for (i = 0; i < dd->w; i++) {
80: if (dd->ofillcols[i]) dd->ofillcols[i] = cnt++;
81: }
82: PetscFunctionReturn(PETSC_SUCCESS);
83: }
85: /*@
86: DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
87: of the matrix returned by `DMCreateMatrix()`.
89: Logically Collective
91: Input Parameters:
92: + da - the distributed array
93: . dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
94: - ofill - the fill pattern in the off-diagonal blocks
96: Level: developer
98: Notes:
99: This only makes sense when you are doing multicomponent problems but using the
100: `MATMPIAIJ` matrix format
102: The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
103: representing coupling and 0 entries for missing coupling. For example
104: .vb
105: dfill[9] = {1, 0, 0,
106: 1, 1, 0,
107: 0, 1, 1}
108: .ve
109: means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
110: itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
111: diagonal block).
113: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
114: can be represented in the dfill, ofill format
116: Contributed by Glenn Hammond
118: .seealso: `DM`, `DMDA`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
119: @*/
120: PetscErrorCode DMDASetBlockFills(DM da, const PetscInt *dfill, const PetscInt *ofill)
121: {
122: DM_DA *dd = (DM_DA *)da->data;
124: PetscFunctionBegin;
125: /* save the given dfill and ofill information */
126: PetscCall(DMDASetBlockFills_Private(dfill, dd->w, &dd->dfill));
127: PetscCall(DMDASetBlockFills_Private(ofill, dd->w, &dd->ofill));
129: /* count nonzeros in ofill columns */
130: PetscCall(DMDASetBlockFills_Private2(dd));
131: PetscFunctionReturn(PETSC_SUCCESS);
132: }
134: /*@
135: DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
136: of the matrix returned by `DMCreateMatrix()`, using sparse representations
137: of fill patterns.
139: Logically Collective
141: Input Parameters:
142: + da - the distributed array
143: . dfill - the sparse fill pattern in the diagonal block (may be `NULL`, means use dense block)
144: - ofill - the sparse fill pattern in the off-diagonal blocks
146: Level: developer
148: Notes:
149: This only makes sense when you are doing multicomponent problems but using the
150: `MATMPIAIJ` matrix format
152: The format for `dfill` and `ofill` is a sparse representation of a
153: dof-by-dof matrix with 1 entries representing coupling and 0 entries
154: for missing coupling. The sparse representation is a 1 dimensional
155: array of length nz + dof + 1, where nz is the number of non-zeros in
156: the matrix. The first dof entries in the array give the
157: starting array indices of each row's items in the rest of the array,
158: the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
159: and the remaining nz items give the column indices of each of
160: the 1s within the logical 2D matrix. Each row's items within
161: the array are the column indices of the 1s within that row
162: of the 2D matrix. PETSc developers may recognize that this is the
163: same format as that computed by the `DMDASetBlockFills_Private()`
164: function from a dense 2D matrix representation.
166: `DMDASetGetMatrix()` allows you to provide general code for those more complicated nonzero patterns then
167: can be represented in the `dfill`, `ofill` format
169: Contributed by Philip C. Roth
171: .seealso: `DM`, `DMDA`, `DMDASetBlockFills()`, `DMCreateMatrix()`, `DMDASetGetMatrix()`, `DMSetMatrixPreallocateOnly()`
172: @*/
173: PetscErrorCode DMDASetBlockFillsSparse(DM da, const PetscInt *dfillsparse, const PetscInt *ofillsparse)
174: {
175: DM_DA *dd = (DM_DA *)da->data;
177: PetscFunctionBegin;
178: /* save the given dfill and ofill information */
179: PetscCall(DMDASetBlockFillsSparse_Private(dfillsparse, dd->w, &dd->dfill));
180: PetscCall(DMDASetBlockFillsSparse_Private(ofillsparse, dd->w, &dd->ofill));
182: /* count nonzeros in ofill columns */
183: PetscCall(DMDASetBlockFills_Private2(dd));
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: PetscErrorCode DMCreateColoring_DA(DM da, ISColoringType ctype, ISColoring *coloring)
188: {
189: PetscInt dim, m, n, p, nc;
190: DMBoundaryType bx, by, bz;
191: MPI_Comm comm;
192: PetscMPIInt size;
193: PetscBool isBAIJ;
194: DM_DA *dd = (DM_DA *)da->data;
196: PetscFunctionBegin;
197: /*
198: m
199: ------------------------------------------------------
200: | |
201: | |
202: | ---------------------- |
203: | | | |
204: n | yn | | |
205: | | | |
206: | .--------------------- |
207: | (xs,ys) xn |
208: | . |
209: | (gxs,gys) |
210: | |
211: -----------------------------------------------------
212: */
214: /*
215: nc - number of components per grid point
216: col - number of colors needed in one direction for single component problem
218: */
219: PetscCall(DMDAGetInfo(da, &dim, NULL, NULL, NULL, &m, &n, &p, &nc, NULL, &bx, &by, &bz, NULL));
221: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
222: PetscCallMPI(MPI_Comm_size(comm, &size));
223: if (ctype == IS_COLORING_LOCAL) {
224: if (size == 1) {
225: ctype = IS_COLORING_GLOBAL;
226: } else {
227: PetscCheck((dim == 1) || !((m == 1 && bx == DM_BOUNDARY_PERIODIC) || (n == 1 && by == DM_BOUNDARY_PERIODIC) || (p == 1 && bz == DM_BOUNDARY_PERIODIC)), PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain on the same process");
228: }
229: }
231: /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
232: matrices is for the blocks, not the individual matrix elements */
233: PetscCall(PetscStrbeginswith(da->mattype, MATBAIJ, &isBAIJ));
234: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATMPIBAIJ, &isBAIJ));
235: if (!isBAIJ) PetscCall(PetscStrbeginswith(da->mattype, MATSEQBAIJ, &isBAIJ));
236: if (isBAIJ) {
237: dd->w = 1;
238: dd->xs = dd->xs / nc;
239: dd->xe = dd->xe / nc;
240: dd->Xs = dd->Xs / nc;
241: dd->Xe = dd->Xe / nc;
242: }
244: /*
245: We do not provide a getcoloring function in the DMDA operations because
246: the basic DMDA does not know about matrices. We think of DMDA as being
247: more low-level then matrices.
248: */
249: if (dim == 1) PetscCall(DMCreateColoring_DA_1d_MPIAIJ(da, ctype, coloring));
250: else if (dim == 2) PetscCall(DMCreateColoring_DA_2d_MPIAIJ(da, ctype, coloring));
251: else if (dim == 3) PetscCall(DMCreateColoring_DA_3d_MPIAIJ(da, ctype, coloring));
252: else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not done for %" PetscInt_FMT " dimension, send us mail petsc-maint@mcs.anl.gov for code", dim);
253: if (isBAIJ) {
254: dd->w = nc;
255: dd->xs = dd->xs * nc;
256: dd->xe = dd->xe * nc;
257: dd->Xs = dd->Xs * nc;
258: dd->Xe = dd->Xe * nc;
259: }
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
264: {
265: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, M, N, dim, s, k, nc, col;
266: PetscInt ncolors = 0;
267: MPI_Comm comm;
268: DMBoundaryType bx, by;
269: DMDAStencilType st;
270: ISColoringValue *colors;
271: DM_DA *dd = (DM_DA *)da->data;
273: PetscFunctionBegin;
274: /*
275: nc - number of components per grid point
276: col - number of colors needed in one direction for single component problem
278: */
279: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
280: col = 2 * s + 1;
281: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
282: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
283: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
285: /* special case as taught to us by Paul Hovland */
286: if (st == DMDA_STENCIL_STAR && s == 1) {
287: PetscCall(DMCreateColoring_DA_2d_5pt_MPIAIJ(da, ctype, coloring));
288: } else {
289: if (ctype == IS_COLORING_GLOBAL) {
290: if (!dd->localcoloring) {
291: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
292: ii = 0;
293: for (j = ys; j < ys + ny; j++) {
294: for (i = xs; i < xs + nx; i++) {
295: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((i % col) + col * (j % col));
296: }
297: }
298: ncolors = nc + nc * (col - 1 + col * (col - 1));
299: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
300: }
301: *coloring = dd->localcoloring;
302: } else if (ctype == IS_COLORING_LOCAL) {
303: if (!dd->ghostedcoloring) {
304: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
305: ii = 0;
306: for (j = gys; j < gys + gny; j++) {
307: for (i = gxs; i < gxs + gnx; i++) {
308: for (k = 0; k < nc; k++) {
309: /* the complicated stuff is to handle periodic boundaries */
310: colors[ii++] = k + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col));
311: }
312: }
313: }
314: ncolors = nc + nc * (col - 1 + col * (col - 1));
315: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
316: /* PetscIntView(ncolors,(PetscInt*)colors,0); */
318: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
319: }
320: *coloring = dd->ghostedcoloring;
321: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
322: }
323: PetscCall(ISColoringReference(*coloring));
324: PetscFunctionReturn(PETSC_SUCCESS);
325: }
327: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
328: {
329: PetscInt xs, ys, nx, ny, i, j, gxs, gys, gnx, gny, m, n, p, dim, s, k, nc, col, zs, gzs, ii, l, nz, gnz, M, N, P;
330: PetscInt ncolors;
331: MPI_Comm comm;
332: DMBoundaryType bx, by, bz;
333: DMDAStencilType st;
334: ISColoringValue *colors;
335: DM_DA *dd = (DM_DA *)da->data;
337: PetscFunctionBegin;
338: /*
339: nc - number of components per grid point
340: col - number of colors needed in one direction for single component problem
341: */
342: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
343: col = 2 * s + 1;
344: PetscCheck(bx != DM_BOUNDARY_PERIODIC || (m % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in X is divisible\n\
345: by 2*stencil_width + 1\n");
346: PetscCheck(by != DM_BOUNDARY_PERIODIC || (n % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Y is divisible\n\
347: by 2*stencil_width + 1\n");
348: PetscCheck(bz != DM_BOUNDARY_PERIODIC || (p % col) == 0, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "For coloring efficiency ensure number of grid points in Z is divisible\n\
349: by 2*stencil_width + 1\n");
351: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
352: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
353: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
355: /* create the coloring */
356: if (ctype == IS_COLORING_GLOBAL) {
357: if (!dd->localcoloring) {
358: PetscCall(PetscMalloc1(nc * nx * ny * nz, &colors));
359: ii = 0;
360: for (k = zs; k < zs + nz; k++) {
361: for (j = ys; j < ys + ny; j++) {
362: for (i = xs; i < xs + nx; i++) {
363: for (l = 0; l < nc; l++) colors[ii++] = l + nc * ((i % col) + col * (j % col) + col * col * (k % col));
364: }
365: }
366: }
367: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
368: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny * nz, colors, PETSC_OWN_POINTER, &dd->localcoloring));
369: }
370: *coloring = dd->localcoloring;
371: } else if (ctype == IS_COLORING_LOCAL) {
372: if (!dd->ghostedcoloring) {
373: PetscCall(PetscMalloc1(nc * gnx * gny * gnz, &colors));
374: ii = 0;
375: for (k = gzs; k < gzs + gnz; k++) {
376: for (j = gys; j < gys + gny; j++) {
377: for (i = gxs; i < gxs + gnx; i++) {
378: for (l = 0; l < nc; l++) {
379: /* the complicated stuff is to handle periodic boundaries */
380: colors[ii++] = l + nc * ((SetInRange(i, m) % col) + col * (SetInRange(j, n) % col) + col * col * (SetInRange(k, p) % col));
381: }
382: }
383: }
384: }
385: ncolors = nc + nc * (col - 1 + col * (col - 1) + col * col * (col - 1));
386: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny * gnz, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
387: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
388: }
389: *coloring = dd->ghostedcoloring;
390: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
391: PetscCall(ISColoringReference(*coloring));
392: PetscFunctionReturn(PETSC_SUCCESS);
393: }
395: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
396: {
397: PetscInt xs, nx, i, i1, gxs, gnx, l, m, M, dim, s, nc, col;
398: PetscInt ncolors;
399: MPI_Comm comm;
400: DMBoundaryType bx;
401: ISColoringValue *colors;
402: DM_DA *dd = (DM_DA *)da->data;
404: PetscFunctionBegin;
405: /*
406: nc - number of components per grid point
407: col - number of colors needed in one direction for single component problem
408: */
409: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, &M, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
410: col = 2 * s + 1;
411: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
412: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
413: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
415: /* create the coloring */
416: if (ctype == IS_COLORING_GLOBAL) {
417: if (!dd->localcoloring) {
418: PetscCall(PetscMalloc1(nc * nx, &colors));
419: if (dd->ofillcols) {
420: PetscInt tc = 0;
421: for (i = 0; i < nc; i++) tc += (PetscInt)(dd->ofillcols[i] > 0);
422: i1 = 0;
423: for (i = xs; i < xs + nx; i++) {
424: for (l = 0; l < nc; l++) {
425: if (dd->ofillcols[l] && (i % col)) {
426: colors[i1++] = nc - 1 + tc * ((i % col) - 1) + dd->ofillcols[l];
427: } else {
428: colors[i1++] = l;
429: }
430: }
431: }
432: ncolors = nc + 2 * s * tc;
433: } else {
434: i1 = 0;
435: for (i = xs; i < xs + nx; i++) {
436: for (l = 0; l < nc; l++) colors[i1++] = l + nc * (i % col);
437: }
438: ncolors = nc + nc * (col - 1);
439: }
440: PetscCall(ISColoringCreate(comm, ncolors, nc * nx, colors, PETSC_OWN_POINTER, &dd->localcoloring));
441: }
442: *coloring = dd->localcoloring;
443: } else if (ctype == IS_COLORING_LOCAL) {
444: if (!dd->ghostedcoloring) {
445: PetscCall(PetscMalloc1(nc * gnx, &colors));
446: i1 = 0;
447: for (i = gxs; i < gxs + gnx; i++) {
448: for (l = 0; l < nc; l++) {
449: /* the complicated stuff is to handle periodic boundaries */
450: colors[i1++] = l + nc * (SetInRange(i, m) % col);
451: }
452: }
453: ncolors = nc + nc * (col - 1);
454: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
455: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
456: }
457: *coloring = dd->ghostedcoloring;
458: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
459: PetscCall(ISColoringReference(*coloring));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da, ISColoringType ctype, ISColoring *coloring)
464: {
465: PetscInt xs, ys, nx, ny, i, j, ii, gxs, gys, gnx, gny, m, n, dim, s, k, nc;
466: PetscInt ncolors;
467: MPI_Comm comm;
468: DMBoundaryType bx, by;
469: ISColoringValue *colors;
470: DM_DA *dd = (DM_DA *)da->data;
472: PetscFunctionBegin;
473: /*
474: nc - number of components per grid point
475: col - number of colors needed in one direction for single component problem
476: */
477: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, NULL));
478: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
479: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
480: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
481: /* create the coloring */
482: if (ctype == IS_COLORING_GLOBAL) {
483: if (!dd->localcoloring) {
484: PetscCall(PetscMalloc1(nc * nx * ny, &colors));
485: ii = 0;
486: for (j = ys; j < ys + ny; j++) {
487: for (i = xs; i < xs + nx; i++) {
488: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * j + i) % 5);
489: }
490: }
491: ncolors = 5 * nc;
492: PetscCall(ISColoringCreate(comm, ncolors, nc * nx * ny, colors, PETSC_OWN_POINTER, &dd->localcoloring));
493: }
494: *coloring = dd->localcoloring;
495: } else if (ctype == IS_COLORING_LOCAL) {
496: if (!dd->ghostedcoloring) {
497: PetscCall(PetscMalloc1(nc * gnx * gny, &colors));
498: ii = 0;
499: for (j = gys; j < gys + gny; j++) {
500: for (i = gxs; i < gxs + gnx; i++) {
501: for (k = 0; k < nc; k++) colors[ii++] = k + nc * ((3 * SetInRange(j, n) + SetInRange(i, m)) % 5);
502: }
503: }
504: ncolors = 5 * nc;
505: PetscCall(ISColoringCreate(comm, ncolors, nc * gnx * gny, colors, PETSC_OWN_POINTER, &dd->ghostedcoloring));
506: PetscCall(ISColoringSetType(dd->ghostedcoloring, IS_COLORING_LOCAL));
507: }
508: *coloring = dd->ghostedcoloring;
509: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_ARG_WRONG, "Unknown ISColoringType %d", (int)ctype);
510: PetscFunctionReturn(PETSC_SUCCESS);
511: }
513: /* =========================================================================== */
514: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM, Mat);
515: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM, Mat);
516: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM, Mat);
517: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM, Mat);
518: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM, Mat);
519: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM, Mat);
520: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM, Mat);
521: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM, Mat);
522: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM, Mat);
523: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM, Mat);
524: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM, Mat);
525: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM, Mat);
526: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM, Mat);
527: extern PetscErrorCode DMCreateMatrix_DA_IS(DM, Mat);
529: /*@C
530: MatSetupDM - Sets the `DMDA` that is to be used by the HYPRE_StructMatrix PETSc matrix
532: Logically Collective
534: Input Parameters:
535: + mat - the matrix
536: - da - the da
538: Level: intermediate
540: .seealso: `DMDA`, `Mat`, `MatSetUp()`
541: @*/
542: PetscErrorCode MatSetupDM(Mat mat, DM da)
543: {
544: PetscFunctionBegin;
547: PetscTryMethod(mat, "MatSetupDM_C", (Mat, DM), (mat, da));
548: PetscFunctionReturn(PETSC_SUCCESS);
549: }
551: PetscErrorCode MatView_MPI_DA(Mat A, PetscViewer viewer)
552: {
553: DM da;
554: const char *prefix;
555: Mat Anatural;
556: AO ao;
557: PetscInt rstart, rend, *petsc, i;
558: IS is;
559: MPI_Comm comm;
560: PetscViewerFormat format;
562: PetscFunctionBegin;
563: /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
564: PetscCall(PetscViewerGetFormat(viewer, &format));
565: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
567: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
568: PetscCall(MatGetDM(A, &da));
569: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
571: PetscCall(DMDAGetAO(da, &ao));
572: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
573: PetscCall(PetscMalloc1(rend - rstart, &petsc));
574: for (i = rstart; i < rend; i++) petsc[i - rstart] = i;
575: PetscCall(AOApplicationToPetsc(ao, rend - rstart, petsc));
576: PetscCall(ISCreateGeneral(comm, rend - rstart, petsc, PETSC_OWN_POINTER, &is));
578: /* call viewer on natural ordering */
579: PetscCall(MatCreateSubMatrix(A, is, is, MAT_INITIAL_MATRIX, &Anatural));
580: PetscCall(ISDestroy(&is));
581: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)A, &prefix));
582: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)Anatural, prefix));
583: PetscCall(PetscObjectSetName((PetscObject)Anatural, ((PetscObject)A)->name));
584: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
585: PetscCall(MatView(Anatural, viewer));
586: ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
587: PetscCall(MatDestroy(&Anatural));
588: PetscFunctionReturn(PETSC_SUCCESS);
589: }
591: PetscErrorCode MatLoad_MPI_DA(Mat A, PetscViewer viewer)
592: {
593: DM da;
594: Mat Anatural, Aapp;
595: AO ao;
596: PetscInt rstart, rend, *app, i, m, n, M, N;
597: IS is;
598: MPI_Comm comm;
600: PetscFunctionBegin;
601: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
602: PetscCall(MatGetDM(A, &da));
603: PetscCheck(da, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix not generated from a DMDA");
605: /* Load the matrix in natural ordering */
606: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Anatural));
607: PetscCall(MatSetType(Anatural, ((PetscObject)A)->type_name));
608: PetscCall(MatGetSize(A, &M, &N));
609: PetscCall(MatGetLocalSize(A, &m, &n));
610: PetscCall(MatSetSizes(Anatural, m, n, M, N));
611: PetscCall(MatLoad(Anatural, viewer));
613: /* Map natural ordering to application ordering and create IS */
614: PetscCall(DMDAGetAO(da, &ao));
615: PetscCall(MatGetOwnershipRange(Anatural, &rstart, &rend));
616: PetscCall(PetscMalloc1(rend - rstart, &app));
617: for (i = rstart; i < rend; i++) app[i - rstart] = i;
618: PetscCall(AOPetscToApplication(ao, rend - rstart, app));
619: PetscCall(ISCreateGeneral(comm, rend - rstart, app, PETSC_OWN_POINTER, &is));
621: /* Do permutation and replace header */
622: PetscCall(MatCreateSubMatrix(Anatural, is, is, MAT_INITIAL_MATRIX, &Aapp));
623: PetscCall(MatHeaderReplace(A, &Aapp));
624: PetscCall(ISDestroy(&is));
625: PetscCall(MatDestroy(&Anatural));
626: PetscFunctionReturn(PETSC_SUCCESS);
627: }
629: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
630: {
631: PetscInt dim, dof, nx, ny, nz, dims[3], starts[3], M, N, P;
632: Mat A;
633: MPI_Comm comm;
634: MatType Atype;
635: MatType mtype;
636: PetscMPIInt size;
637: DM_DA *dd = (DM_DA *)da->data;
638: void (*aij)(void) = NULL, (*baij)(void) = NULL, (*sbaij)(void) = NULL, (*sell)(void) = NULL, (*is)(void) = NULL;
640: PetscFunctionBegin;
641: PetscCall(MatInitializePackage());
642: mtype = da->mattype;
644: /*
645: m
646: ------------------------------------------------------
647: | |
648: | |
649: | ---------------------- |
650: | | | |
651: n | ny | | |
652: | | | |
653: | .--------------------- |
654: | (xs,ys) nx |
655: | . |
656: | (gxs,gys) |
657: | |
658: -----------------------------------------------------
659: */
661: /*
662: nc - number of components per grid point
663: col - number of colors needed in one direction for single component problem
664: */
665: M = dd->M;
666: N = dd->N;
667: P = dd->P;
668: dim = da->dim;
669: dof = dd->w;
670: /* PetscCall(DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL)); */
671: PetscCall(DMDAGetCorners(da, NULL, NULL, NULL, &nx, &ny, &nz));
672: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
673: PetscCall(MatCreate(comm, &A));
674: PetscCall(MatSetSizes(A, dof * nx * ny * nz, dof * nx * ny * nz, dof * M * N * P, dof * M * N * P));
675: PetscCall(MatSetType(A, mtype));
676: PetscCall(MatSetFromOptions(A));
677: if (dof * nx * ny * nz < da->bind_below) {
678: PetscCall(MatSetBindingPropagates(A, PETSC_TRUE));
679: PetscCall(MatBindToCPU(A, PETSC_TRUE));
680: }
681: PetscCall(MatSetDM(A, da));
682: if (da->structure_only) PetscCall(MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE));
683: PetscCall(MatGetType(A, &Atype));
684: /*
685: We do not provide a getmatrix function in the DMDA operations because
686: the basic DMDA does not know about matrices. We think of DMDA as being more
687: more low-level than matrices. This is kind of cheating but, cause sometimes
688: we think of DMDA has higher level than matrices.
690: We could switch based on Atype (or mtype), but we do not since the
691: specialized setting routines depend only on the particular preallocation
692: details of the matrix, not the type itself.
693: */
694: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJSetPreallocation_C", &aij));
695: if (!aij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJSetPreallocation_C", &aij));
696: if (!aij) {
697: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIBAIJSetPreallocation_C", &baij));
698: if (!baij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", &baij));
699: if (!baij) {
700: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISBAIJSetPreallocation_C", &sbaij));
701: if (!sbaij) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSBAIJSetPreallocation_C", &sbaij));
702: if (!sbaij) {
703: PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPISELLSetPreallocation_C", &sell));
704: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqSELLSetPreallocation_C", &sell));
705: }
706: if (!sell) PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatISSetPreallocation_C", &is));
707: }
708: }
709: if (aij) {
710: if (dim == 1) {
711: if (dd->ofill) {
712: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ_Fill(da, A));
713: } else {
714: DMBoundaryType bx;
715: PetscMPIInt size;
716: PetscCall(DMDAGetInfo(da, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &bx, NULL, NULL, NULL));
717: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
718: if (size == 1 && bx == DM_BOUNDARY_NONE) {
719: PetscCall(DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da, A));
720: } else {
721: PetscCall(DMCreateMatrix_DA_1d_MPIAIJ(da, A));
722: }
723: }
724: } else if (dim == 2) {
725: if (dd->ofill) {
726: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ_Fill(da, A));
727: } else {
728: PetscCall(DMCreateMatrix_DA_2d_MPIAIJ(da, A));
729: }
730: } else if (dim == 3) {
731: if (dd->ofill) {
732: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ_Fill(da, A));
733: } else {
734: PetscCall(DMCreateMatrix_DA_3d_MPIAIJ(da, A));
735: }
736: }
737: } else if (baij) {
738: if (dim == 2) {
739: PetscCall(DMCreateMatrix_DA_2d_MPIBAIJ(da, A));
740: } else if (dim == 3) {
741: PetscCall(DMCreateMatrix_DA_3d_MPIBAIJ(da, A));
742: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
743: } else if (sbaij) {
744: if (dim == 2) {
745: PetscCall(DMCreateMatrix_DA_2d_MPISBAIJ(da, A));
746: } else if (dim == 3) {
747: PetscCall(DMCreateMatrix_DA_3d_MPISBAIJ(da, A));
748: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
749: } else if (sell) {
750: if (dim == 2) {
751: PetscCall(DMCreateMatrix_DA_2d_MPISELL(da, A));
752: } else if (dim == 3) {
753: PetscCall(DMCreateMatrix_DA_3d_MPISELL(da, A));
754: } else SETERRQ(PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Not implemented for %" PetscInt_FMT " dimension and Matrix Type: %s in %" PetscInt_FMT " dimension! Send mail to petsc-maint@mcs.anl.gov for code", dim, Atype, dim);
755: } else if (is) {
756: PetscCall(DMCreateMatrix_DA_IS(da, A));
757: } else {
758: ISLocalToGlobalMapping ltog;
760: PetscCall(MatSetBlockSize(A, dof));
761: PetscCall(MatSetUp(A));
762: PetscCall(DMGetLocalToGlobalMapping(da, <og));
763: PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog));
764: }
765: PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2]));
766: PetscCall(MatSetStencil(A, dim, dims, starts, dof));
767: PetscCall(MatSetDM(A, da));
768: PetscCallMPI(MPI_Comm_size(comm, &size));
769: if (size > 1) {
770: /* change viewer to display matrix in natural ordering */
771: PetscCall(MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA));
772: PetscCall(MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA));
773: }
774: *J = A;
775: PetscFunctionReturn(PETSC_SUCCESS);
776: }
778: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]);
780: PetscErrorCode DMCreateMatrix_DA_IS(DM dm, Mat J)
781: {
782: DM_DA *da = (DM_DA *)dm->data;
783: Mat lJ, P;
784: ISLocalToGlobalMapping ltog;
785: IS is;
786: PetscBT bt;
787: const PetscInt *e_loc, *idx;
788: PetscInt i, nel, nen, nv, dof, *gidx, n, N;
790: /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
791: We need to filter out the local indices that are not represented through the DMDAGetElements decomposition */
792: PetscFunctionBegin;
793: dof = da->w;
794: PetscCall(MatSetBlockSize(J, dof));
795: PetscCall(DMGetLocalToGlobalMapping(dm, <og));
797: /* flag local elements indices in local DMDA numbering */
798: PetscCall(ISLocalToGlobalMappingGetSize(ltog, &nv));
799: PetscCall(PetscBTCreate(nv / dof, &bt));
800: PetscCall(DMDAGetElements(dm, &nel, &nen, &e_loc)); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
801: for (i = 0; i < nel * nen; i++) PetscCall(PetscBTSet(bt, e_loc[i]));
803: /* filter out (set to -1) the global indices not used by the local elements */
804: PetscCall(PetscMalloc1(nv / dof, &gidx));
805: PetscCall(ISLocalToGlobalMappingGetBlockIndices(ltog, &idx));
806: PetscCall(PetscArraycpy(gidx, idx, nv / dof));
807: PetscCall(ISLocalToGlobalMappingRestoreBlockIndices(ltog, &idx));
808: for (i = 0; i < nv / dof; i++)
809: if (!PetscBTLookup(bt, i)) gidx[i] = -1;
810: PetscCall(PetscBTDestroy(&bt));
811: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)dm), dof, nv / dof, gidx, PETSC_OWN_POINTER, &is));
812: PetscCall(ISLocalToGlobalMappingCreateIS(is, <og));
813: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
814: PetscCall(ISLocalToGlobalMappingDestroy(<og));
815: PetscCall(ISDestroy(&is));
817: /* Preallocation */
818: if (dm->prealloc_skip) {
819: PetscCall(MatSetUp(J));
820: } else {
821: PetscCall(MatISGetLocalMat(J, &lJ));
822: PetscCall(MatGetLocalToGlobalMapping(lJ, <og, NULL));
823: PetscCall(MatCreate(PetscObjectComm((PetscObject)lJ), &P));
824: PetscCall(MatSetType(P, MATPREALLOCATOR));
825: PetscCall(MatSetLocalToGlobalMapping(P, ltog, ltog));
826: PetscCall(MatGetSize(lJ, &N, NULL));
827: PetscCall(MatGetLocalSize(lJ, &n, NULL));
828: PetscCall(MatSetSizes(P, n, n, N, N));
829: PetscCall(MatSetBlockSize(P, dof));
830: PetscCall(MatSetUp(P));
831: for (i = 0; i < nel; i++) PetscCall(MatSetValuesBlockedLocal(P, nen, e_loc + i * nen, nen, e_loc + i * nen, NULL, INSERT_VALUES));
832: PetscCall(MatPreallocatorPreallocate(P, (PetscBool)!da->prealloc_only, lJ));
833: PetscCall(MatISRestoreLocalMat(J, &lJ));
834: PetscCall(DMDARestoreElements(dm, &nel, &nen, &e_loc));
835: PetscCall(MatDestroy(&P));
837: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
838: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
839: }
840: PetscFunctionReturn(PETSC_SUCCESS);
841: }
843: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da, Mat J)
844: {
845: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p;
846: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
847: MPI_Comm comm;
848: PetscScalar *values;
849: DMBoundaryType bx, by;
850: ISLocalToGlobalMapping ltog;
851: DMDAStencilType st;
853: PetscFunctionBegin;
854: /*
855: nc - number of components per grid point
856: col - number of colors needed in one direction for single component problem
857: */
858: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
859: col = 2 * s + 1;
860: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
861: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
862: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
864: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
865: PetscCall(DMGetLocalToGlobalMapping(da, <og));
867: PetscCall(MatSetBlockSize(J, nc));
868: /* determine the matrix preallocation information */
869: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
870: for (i = xs; i < xs + nx; i++) {
871: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
872: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
874: for (j = ys; j < ys + ny; j++) {
875: slot = i - gxs + gnx * (j - gys);
877: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
878: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
880: cnt = 0;
881: for (k = 0; k < nc; k++) {
882: for (l = lstart; l < lend + 1; l++) {
883: for (p = pstart; p < pend + 1; p++) {
884: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
885: cols[cnt++] = k + nc * (slot + gnx * l + p);
886: }
887: }
888: }
889: rows[k] = k + nc * (slot);
890: }
891: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
892: }
893: }
894: PetscCall(MatSetBlockSize(J, nc));
895: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
896: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
897: MatPreallocateEnd(dnz, onz);
899: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
901: /*
902: For each node in the grid: we get the neighbors in the local (on processor ordering
903: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
904: PETSc ordering.
905: */
906: if (!da->prealloc_only) {
907: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
908: for (i = xs; i < xs + nx; i++) {
909: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
910: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
912: for (j = ys; j < ys + ny; j++) {
913: slot = i - gxs + gnx * (j - gys);
915: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
916: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
918: cnt = 0;
919: for (k = 0; k < nc; k++) {
920: for (l = lstart; l < lend + 1; l++) {
921: for (p = pstart; p < pend + 1; p++) {
922: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
923: cols[cnt++] = k + nc * (slot + gnx * l + p);
924: }
925: }
926: }
927: rows[k] = k + nc * (slot);
928: }
929: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
930: }
931: }
932: PetscCall(PetscFree(values));
933: /* do not copy values to GPU since they are all zero and not yet needed there */
934: PetscCall(MatBindToCPU(J, PETSC_TRUE));
935: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
936: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
937: PetscCall(MatBindToCPU(J, PETSC_FALSE));
938: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
939: }
940: PetscCall(PetscFree2(rows, cols));
941: PetscFunctionReturn(PETSC_SUCCESS);
942: }
944: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da, Mat J)
945: {
946: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
947: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
948: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
949: MPI_Comm comm;
950: PetscScalar *values;
951: DMBoundaryType bx, by, bz;
952: ISLocalToGlobalMapping ltog;
953: DMDAStencilType st;
955: PetscFunctionBegin;
956: /*
957: nc - number of components per grid point
958: col - number of colors needed in one direction for single component problem
959: */
960: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
961: col = 2 * s + 1;
962: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
963: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
964: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
966: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
967: PetscCall(DMGetLocalToGlobalMapping(da, <og));
969: PetscCall(MatSetBlockSize(J, nc));
970: /* determine the matrix preallocation information */
971: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
972: for (i = xs; i < xs + nx; i++) {
973: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
974: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
975: for (j = ys; j < ys + ny; j++) {
976: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
977: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
978: for (k = zs; k < zs + nz; k++) {
979: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
980: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
982: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
984: cnt = 0;
985: for (l = 0; l < nc; l++) {
986: for (ii = istart; ii < iend + 1; ii++) {
987: for (jj = jstart; jj < jend + 1; jj++) {
988: for (kk = kstart; kk < kend + 1; kk++) {
989: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
990: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
991: }
992: }
993: }
994: }
995: rows[l] = l + nc * (slot);
996: }
997: PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
998: }
999: }
1000: }
1001: PetscCall(MatSetBlockSize(J, nc));
1002: PetscCall(MatSeqSELLSetPreallocation(J, 0, dnz));
1003: PetscCall(MatMPISELLSetPreallocation(J, 0, dnz, 0, onz));
1004: MatPreallocateEnd(dnz, onz);
1005: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1007: /*
1008: For each node in the grid: we get the neighbors in the local (on processor ordering
1009: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1010: PETSc ordering.
1011: */
1012: if (!da->prealloc_only) {
1013: PetscCall(PetscCalloc1(col * col * col * nc * nc * nc, &values));
1014: for (i = xs; i < xs + nx; i++) {
1015: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1016: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1017: for (j = ys; j < ys + ny; j++) {
1018: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1019: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1020: for (k = zs; k < zs + nz; k++) {
1021: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1022: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1024: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1026: cnt = 0;
1027: for (l = 0; l < nc; l++) {
1028: for (ii = istart; ii < iend + 1; ii++) {
1029: for (jj = jstart; jj < jend + 1; jj++) {
1030: for (kk = kstart; kk < kend + 1; kk++) {
1031: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1032: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1033: }
1034: }
1035: }
1036: }
1037: rows[l] = l + nc * (slot);
1038: }
1039: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, values, INSERT_VALUES));
1040: }
1041: }
1042: }
1043: PetscCall(PetscFree(values));
1044: /* do not copy values to GPU since they are all zero and not yet needed there */
1045: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1046: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1047: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1048: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1049: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1050: }
1051: PetscCall(PetscFree2(rows, cols));
1052: PetscFunctionReturn(PETSC_SUCCESS);
1053: }
1055: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da, Mat J)
1056: {
1057: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny, m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, M, N;
1058: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1059: MPI_Comm comm;
1060: DMBoundaryType bx, by;
1061: ISLocalToGlobalMapping ltog, mltog;
1062: DMDAStencilType st;
1063: PetscBool removedups = PETSC_FALSE, alreadyboundtocpu = PETSC_TRUE;
1065: PetscFunctionBegin;
1066: /*
1067: nc - number of components per grid point
1068: col - number of colors needed in one direction for single component problem
1069: */
1070: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1071: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1072: col = 2 * s + 1;
1073: /*
1074: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1075: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1076: */
1077: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1078: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1079: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1080: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1081: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1083: PetscCall(PetscMalloc2(nc, &rows, col * col * nc * nc, &cols));
1084: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1086: PetscCall(MatSetBlockSize(J, nc));
1087: /* determine the matrix preallocation information */
1088: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1089: for (i = xs; i < xs + nx; i++) {
1090: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1091: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1093: for (j = ys; j < ys + ny; j++) {
1094: slot = i - gxs + gnx * (j - gys);
1096: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1097: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1099: cnt = 0;
1100: for (k = 0; k < nc; k++) {
1101: for (l = lstart; l < lend + 1; l++) {
1102: for (p = pstart; p < pend + 1; p++) {
1103: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1104: cols[cnt++] = k + nc * (slot + gnx * l + p);
1105: }
1106: }
1107: }
1108: rows[k] = k + nc * (slot);
1109: }
1110: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1111: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1112: }
1113: }
1114: PetscCall(MatSetBlockSize(J, nc));
1115: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1116: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1117: MatPreallocateEnd(dnz, onz);
1118: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1119: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1121: /*
1122: For each node in the grid: we get the neighbors in the local (on processor ordering
1123: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1124: PETSc ordering.
1125: */
1126: if (!da->prealloc_only) {
1127: for (i = xs; i < xs + nx; i++) {
1128: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1129: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1131: for (j = ys; j < ys + ny; j++) {
1132: slot = i - gxs + gnx * (j - gys);
1134: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1135: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1137: cnt = 0;
1138: for (l = lstart; l < lend + 1; l++) {
1139: for (p = pstart; p < pend + 1; p++) {
1140: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star have either l = 0 or p = 0 */
1141: cols[cnt++] = nc * (slot + gnx * l + p);
1142: for (k = 1; k < nc; k++) {
1143: cols[cnt] = 1 + cols[cnt - 1];
1144: cnt++;
1145: }
1146: }
1147: }
1148: }
1149: for (k = 0; k < nc; k++) rows[k] = k + nc * (slot);
1150: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1151: }
1152: }
1153: /* do not copy values to GPU since they are all zero and not yet needed there */
1154: PetscCall(MatBoundToCPU(J, &alreadyboundtocpu));
1155: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1156: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1157: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1158: if (!alreadyboundtocpu) PetscCall(MatBindToCPU(J, PETSC_FALSE));
1159: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1160: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1161: }
1162: PetscCall(PetscFree2(rows, cols));
1163: PetscFunctionReturn(PETSC_SUCCESS);
1164: }
1166: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da, Mat J)
1167: {
1168: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1169: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, M, N;
1170: PetscInt lstart, lend, pstart, pend, *dnz, *onz;
1171: DM_DA *dd = (DM_DA *)da->data;
1172: PetscInt ifill_col, *ofill = dd->ofill, *dfill = dd->dfill;
1173: MPI_Comm comm;
1174: DMBoundaryType bx, by;
1175: ISLocalToGlobalMapping ltog;
1176: DMDAStencilType st;
1177: PetscBool removedups = PETSC_FALSE;
1179: PetscFunctionBegin;
1180: /*
1181: nc - number of components per grid point
1182: col - number of colors needed in one direction for single component problem
1183: */
1184: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, &M, &N, NULL, &nc, &s, &bx, &by, NULL, &st));
1185: col = 2 * s + 1;
1186: /*
1187: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1188: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1189: */
1190: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1191: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1192: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1193: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1194: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1196: PetscCall(PetscMalloc1(col * col * nc, &cols));
1197: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1199: PetscCall(MatSetBlockSize(J, nc));
1200: /* determine the matrix preallocation information */
1201: MatPreallocateBegin(comm, nc * nx * ny, nc * nx * ny, dnz, onz);
1202: for (i = xs; i < xs + nx; i++) {
1203: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1204: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1206: for (j = ys; j < ys + ny; j++) {
1207: slot = i - gxs + gnx * (j - gys);
1209: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1210: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1212: for (k = 0; k < nc; k++) {
1213: cnt = 0;
1214: for (l = lstart; l < lend + 1; l++) {
1215: for (p = pstart; p < pend + 1; p++) {
1216: if (l || p) {
1217: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1218: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1219: }
1220: } else {
1221: if (dfill) {
1222: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1223: } else {
1224: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1225: }
1226: }
1227: }
1228: }
1229: row = k + nc * (slot);
1230: maxcnt = PetscMax(maxcnt, cnt);
1231: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1232: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
1233: }
1234: }
1235: }
1236: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1237: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1238: MatPreallocateEnd(dnz, onz);
1239: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1241: /*
1242: For each node in the grid: we get the neighbors in the local (on processor ordering
1243: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1244: PETSc ordering.
1245: */
1246: if (!da->prealloc_only) {
1247: for (i = xs; i < xs + nx; i++) {
1248: pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1249: pend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1251: for (j = ys; j < ys + ny; j++) {
1252: slot = i - gxs + gnx * (j - gys);
1254: lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1255: lend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1257: for (k = 0; k < nc; k++) {
1258: cnt = 0;
1259: for (l = lstart; l < lend + 1; l++) {
1260: for (p = pstart; p < pend + 1; p++) {
1261: if (l || p) {
1262: if ((st == DMDA_STENCIL_BOX) || (!l || !p)) { /* entries on star */
1263: for (ifill_col = ofill[k]; ifill_col < ofill[k + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + gnx * l + p);
1264: }
1265: } else {
1266: if (dfill) {
1267: for (ifill_col = dfill[k]; ifill_col < dfill[k + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + gnx * l + p);
1268: } else {
1269: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + gnx * l + p);
1270: }
1271: }
1272: }
1273: }
1274: row = k + nc * (slot);
1275: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1276: }
1277: }
1278: }
1279: /* do not copy values to GPU since they are all zero and not yet needed there */
1280: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1281: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1282: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1283: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1284: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1285: }
1286: PetscCall(PetscFree(cols));
1287: PetscFunctionReturn(PETSC_SUCCESS);
1288: }
1290: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da, Mat J)
1291: {
1292: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1293: PetscInt m, n, dim, s, *cols = NULL, k, nc, *rows = NULL, col, cnt, l, p, *dnz = NULL, *onz = NULL;
1294: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
1295: MPI_Comm comm;
1296: DMBoundaryType bx, by, bz;
1297: ISLocalToGlobalMapping ltog, mltog;
1298: DMDAStencilType st;
1299: PetscBool removedups = PETSC_FALSE;
1301: PetscFunctionBegin;
1302: /*
1303: nc - number of components per grid point
1304: col - number of colors needed in one direction for single component problem
1305: */
1306: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
1307: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1308: col = 2 * s + 1;
1310: /*
1311: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1312: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1313: */
1314: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
1315: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
1316: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
1318: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1319: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1320: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1322: PetscCall(PetscMalloc2(nc, &rows, col * col * col * nc * nc, &cols));
1323: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1325: PetscCall(MatSetBlockSize(J, nc));
1326: /* determine the matrix preallocation information */
1327: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
1328: for (i = xs; i < xs + nx; i++) {
1329: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1330: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1331: for (j = ys; j < ys + ny; j++) {
1332: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1333: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1334: for (k = zs; k < zs + nz; k++) {
1335: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1336: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1338: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1340: cnt = 0;
1341: for (l = 0; l < nc; l++) {
1342: for (ii = istart; ii < iend + 1; ii++) {
1343: for (jj = jstart; jj < jend + 1; jj++) {
1344: for (kk = kstart; kk < kend + 1; kk++) {
1345: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1346: cols[cnt++] = l + nc * (slot + ii + gnx * jj + gnx * gny * kk);
1347: }
1348: }
1349: }
1350: }
1351: rows[l] = l + nc * (slot);
1352: }
1353: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1354: else PetscCall(MatPreallocateSetLocal(ltog, nc, rows, ltog, cnt, cols, dnz, onz));
1355: }
1356: }
1357: }
1358: PetscCall(MatSetBlockSize(J, nc));
1359: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
1360: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
1361: MatPreallocateEnd(dnz, onz);
1362: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1363: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1365: /*
1366: For each node in the grid: we get the neighbors in the local (on processor ordering
1367: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1368: PETSc ordering.
1369: */
1370: if (!da->prealloc_only) {
1371: for (i = xs; i < xs + nx; i++) {
1372: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1373: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1374: for (j = ys; j < ys + ny; j++) {
1375: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1376: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1377: for (k = zs; k < zs + nz; k++) {
1378: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1379: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1381: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1383: cnt = 0;
1384: for (kk = kstart; kk < kend + 1; kk++) {
1385: for (jj = jstart; jj < jend + 1; jj++) {
1386: for (ii = istart; ii < iend + 1; ii++) {
1387: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1388: cols[cnt++] = nc * (slot + ii + gnx * jj + gnx * gny * kk);
1389: for (l = 1; l < nc; l++) {
1390: cols[cnt] = 1 + cols[cnt - 1];
1391: cnt++;
1392: }
1393: }
1394: }
1395: }
1396: }
1397: rows[0] = nc * (slot);
1398: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1399: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1400: }
1401: }
1402: }
1403: /* do not copy values to GPU since they are all zero and not yet needed there */
1404: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1405: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1406: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1407: if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1408: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1409: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1410: }
1411: PetscCall(PetscFree2(rows, cols));
1412: PetscFunctionReturn(PETSC_SUCCESS);
1413: }
1415: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da, Mat J)
1416: {
1417: DM_DA *dd = (DM_DA *)da->data;
1418: PetscInt xs, nx, i, j, gxs, gnx, row, k, l;
1419: PetscInt m, dim, s, *cols = NULL, nc, cnt, maxcnt = 0, *ocols;
1420: PetscInt *ofill = dd->ofill, *dfill = dd->dfill;
1421: DMBoundaryType bx;
1422: ISLocalToGlobalMapping ltog;
1423: PetscMPIInt rank, size;
1425: PetscFunctionBegin;
1426: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)da), &rank));
1427: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)da), &size));
1429: /*
1430: nc - number of components per grid point
1431: */
1432: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1433: PetscCheck(s <= 1, PetscObjectComm((PetscObject)da), PETSC_ERR_SUP, "Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1434: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1435: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1437: PetscCall(MatSetBlockSize(J, nc));
1438: PetscCall(PetscCalloc2(nx * nc, &cols, nx * nc, &ocols));
1440: /*
1441: note should be smaller for first and last process with no periodic
1442: does not handle dfill
1443: */
1444: cnt = 0;
1445: /* coupling with process to the left */
1446: for (i = 0; i < s; i++) {
1447: for (j = 0; j < nc; j++) {
1448: ocols[cnt] = ((rank == 0) ? 0 : (s - i) * (ofill[j + 1] - ofill[j]));
1449: cols[cnt] = dfill[j + 1] - dfill[j] + (s + i) * (ofill[j + 1] - ofill[j]);
1450: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1451: if (size > 1) ocols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1452: else cols[cnt] += (s - i) * (ofill[j + 1] - ofill[j]);
1453: }
1454: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1455: cnt++;
1456: }
1457: }
1458: for (i = s; i < nx - s; i++) {
1459: for (j = 0; j < nc; j++) {
1460: cols[cnt] = dfill[j + 1] - dfill[j] + 2 * s * (ofill[j + 1] - ofill[j]);
1461: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1462: cnt++;
1463: }
1464: }
1465: /* coupling with process to the right */
1466: for (i = nx - s; i < nx; i++) {
1467: for (j = 0; j < nc; j++) {
1468: ocols[cnt] = ((rank == (size - 1)) ? 0 : (i - nx + s + 1) * (ofill[j + 1] - ofill[j]));
1469: cols[cnt] = dfill[j + 1] - dfill[j] + (s + nx - i - 1) * (ofill[j + 1] - ofill[j]);
1470: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1471: if (size > 1) ocols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1472: else cols[cnt] += (i - nx + s + 1) * (ofill[j + 1] - ofill[j]);
1473: }
1474: maxcnt = PetscMax(maxcnt, ocols[cnt] + cols[cnt]);
1475: cnt++;
1476: }
1477: }
1479: PetscCall(MatSeqAIJSetPreallocation(J, 0, cols));
1480: PetscCall(MatMPIAIJSetPreallocation(J, 0, cols, 0, ocols));
1481: PetscCall(PetscFree2(cols, ocols));
1483: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1484: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1486: /*
1487: For each node in the grid: we get the neighbors in the local (on processor ordering
1488: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1489: PETSc ordering.
1490: */
1491: if (!da->prealloc_only) {
1492: PetscCall(PetscMalloc1(maxcnt, &cols));
1493: row = xs * nc;
1494: /* coupling with process to the left */
1495: for (i = xs; i < xs + s; i++) {
1496: for (j = 0; j < nc; j++) {
1497: cnt = 0;
1498: if (rank) {
1499: for (l = 0; l < s; l++) {
1500: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1501: }
1502: }
1503: if (rank == 0 && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1504: for (l = 0; l < s; l++) {
1505: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (m + i - s - l) * nc + ofill[k];
1506: }
1507: }
1508: if (dfill) {
1509: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1510: } else {
1511: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1512: }
1513: for (l = 0; l < s; l++) {
1514: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1515: }
1516: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1517: row++;
1518: }
1519: }
1520: for (i = xs + s; i < xs + nx - s; i++) {
1521: for (j = 0; j < nc; j++) {
1522: cnt = 0;
1523: for (l = 0; l < s; l++) {
1524: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1525: }
1526: if (dfill) {
1527: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1528: } else {
1529: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1530: }
1531: for (l = 0; l < s; l++) {
1532: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1533: }
1534: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1535: row++;
1536: }
1537: }
1538: /* coupling with process to the right */
1539: for (i = xs + nx - s; i < xs + nx; i++) {
1540: for (j = 0; j < nc; j++) {
1541: cnt = 0;
1542: for (l = 0; l < s; l++) {
1543: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s + l) * nc + ofill[k];
1544: }
1545: if (dfill) {
1546: for (k = dfill[j]; k < dfill[j + 1]; k++) cols[cnt++] = i * nc + dfill[k];
1547: } else {
1548: for (k = 0; k < nc; k++) cols[cnt++] = i * nc + k;
1549: }
1550: if (rank < size - 1) {
1551: for (l = 0; l < s; l++) {
1552: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i + s - l) * nc + ofill[k];
1553: }
1554: }
1555: if ((rank == size - 1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1556: for (l = 0; l < s; l++) {
1557: for (k = ofill[j]; k < ofill[j + 1]; k++) cols[cnt++] = (i - s - l - m + 2) * nc + ofill[k];
1558: }
1559: }
1560: PetscCall(MatSetValues(J, 1, &row, cnt, cols, NULL, INSERT_VALUES));
1561: row++;
1562: }
1563: }
1564: PetscCall(PetscFree(cols));
1565: /* do not copy values to GPU since they are all zero and not yet needed there */
1566: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1567: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1568: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1569: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1570: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1571: }
1572: PetscFunctionReturn(PETSC_SUCCESS);
1573: }
1575: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da, Mat J)
1576: {
1577: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1578: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1579: PetscInt istart, iend;
1580: DMBoundaryType bx;
1581: ISLocalToGlobalMapping ltog, mltog;
1583: PetscFunctionBegin;
1584: /*
1585: nc - number of components per grid point
1586: col - number of colors needed in one direction for single component problem
1587: */
1588: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1589: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_TRUE));
1590: col = 2 * s + 1;
1592: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1593: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1595: PetscCall(MatSetBlockSize(J, nc));
1596: PetscCall(MatSeqAIJSetPreallocation(J, col * nc, NULL));
1597: PetscCall(MatMPIAIJSetPreallocation(J, col * nc, NULL, col * nc, NULL));
1599: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1600: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1601: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1603: /*
1604: For each node in the grid: we get the neighbors in the local (on processor ordering
1605: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1606: PETSc ordering.
1607: */
1608: if (!da->prealloc_only) {
1609: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1610: for (i = xs; i < xs + nx; i++) {
1611: istart = PetscMax(-s, gxs - i);
1612: iend = PetscMin(s, gxs + gnx - i - 1);
1613: slot = i - gxs;
1615: cnt = 0;
1616: for (i1 = istart; i1 < iend + 1; i1++) {
1617: cols[cnt++] = nc * (slot + i1);
1618: for (l = 1; l < nc; l++) {
1619: cols[cnt] = 1 + cols[cnt - 1];
1620: cnt++;
1621: }
1622: }
1623: rows[0] = nc * (slot);
1624: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1625: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1626: }
1627: /* do not copy values to GPU since they are all zero and not yet needed there */
1628: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1629: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1630: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1631: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1632: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1633: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1634: PetscCall(PetscFree2(rows, cols));
1635: }
1636: PetscFunctionReturn(PETSC_SUCCESS);
1637: }
1639: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da, Mat J)
1640: {
1641: PetscInt xs, nx, i, i1, slot, gxs, gnx;
1642: PetscInt m, dim, s, *cols = NULL, nc, *rows = NULL, col, cnt, l;
1643: PetscInt istart, iend;
1644: DMBoundaryType bx;
1645: ISLocalToGlobalMapping ltog, mltog;
1647: PetscFunctionBegin;
1648: /*
1649: nc - number of components per grid point
1650: col - number of colors needed in one direction for single component problem
1651: */
1652: PetscCall(DMDAGetInfo(da, &dim, &m, NULL, NULL, NULL, NULL, NULL, &nc, &s, &bx, NULL, NULL, NULL));
1653: col = 2 * s + 1;
1655: PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &nx, NULL, NULL));
1656: PetscCall(DMDAGetGhostCorners(da, &gxs, NULL, NULL, &gnx, NULL, NULL));
1658: PetscCall(MatSetBlockSize(J, nc));
1659: PetscCall(MatSeqAIJSetTotalPreallocation(J, nx * nc * col * nc));
1661: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1662: PetscCall(MatGetLocalToGlobalMapping(J, &mltog, NULL));
1663: if (!mltog) PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1665: /*
1666: For each node in the grid: we get the neighbors in the local (on processor ordering
1667: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1668: PETSc ordering.
1669: */
1670: if (!da->prealloc_only) {
1671: PetscCall(PetscMalloc2(nc, &rows, col * nc * nc, &cols));
1672: for (i = xs; i < xs + nx; i++) {
1673: istart = PetscMax(-s, gxs - i);
1674: iend = PetscMin(s, gxs + gnx - i - 1);
1675: slot = i - gxs;
1677: cnt = 0;
1678: for (i1 = istart; i1 < iend + 1; i1++) {
1679: cols[cnt++] = nc * (slot + i1);
1680: for (l = 1; l < nc; l++) {
1681: cols[cnt] = 1 + cols[cnt - 1];
1682: cnt++;
1683: }
1684: }
1685: rows[0] = nc * (slot);
1686: for (l = 1; l < nc; l++) rows[l] = 1 + rows[l - 1];
1687: PetscCall(MatSetValuesLocal(J, nc, rows, cnt, cols, NULL, INSERT_VALUES));
1688: }
1689: /* do not copy values to GPU since they are all zero and not yet needed there */
1690: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1691: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1692: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1693: if (bx == DM_BOUNDARY_NONE) PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1694: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1695: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1696: PetscCall(PetscFree2(rows, cols));
1697: }
1698: PetscCall(MatSetOption(J, MAT_SORTED_FULL, PETSC_FALSE));
1699: PetscFunctionReturn(PETSC_SUCCESS);
1700: }
1702: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da, Mat J)
1703: {
1704: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1705: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1706: PetscInt istart, iend, jstart, jend, ii, jj;
1707: MPI_Comm comm;
1708: PetscScalar *values;
1709: DMBoundaryType bx, by;
1710: DMDAStencilType st;
1711: ISLocalToGlobalMapping ltog;
1713: PetscFunctionBegin;
1714: /*
1715: nc - number of components per grid point
1716: col - number of colors needed in one direction for single component problem
1717: */
1718: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1719: col = 2 * s + 1;
1721: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1722: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1723: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1725: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1727: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1729: /* determine the matrix preallocation information */
1730: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1731: for (i = xs; i < xs + nx; i++) {
1732: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1733: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1734: for (j = ys; j < ys + ny; j++) {
1735: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1736: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1737: slot = i - gxs + gnx * (j - gys);
1739: /* Find block columns in block row */
1740: cnt = 0;
1741: for (ii = istart; ii < iend + 1; ii++) {
1742: for (jj = jstart; jj < jend + 1; jj++) {
1743: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1744: cols[cnt++] = slot + ii + gnx * jj;
1745: }
1746: }
1747: }
1748: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1749: }
1750: }
1751: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1752: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1753: MatPreallocateEnd(dnz, onz);
1755: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1757: /*
1758: For each node in the grid: we get the neighbors in the local (on processor ordering
1759: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1760: PETSc ordering.
1761: */
1762: if (!da->prealloc_only) {
1763: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1764: for (i = xs; i < xs + nx; i++) {
1765: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1766: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1767: for (j = ys; j < ys + ny; j++) {
1768: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1769: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1770: slot = i - gxs + gnx * (j - gys);
1771: cnt = 0;
1772: for (ii = istart; ii < iend + 1; ii++) {
1773: for (jj = jstart; jj < jend + 1; jj++) {
1774: if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1775: cols[cnt++] = slot + ii + gnx * jj;
1776: }
1777: }
1778: }
1779: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1780: }
1781: }
1782: PetscCall(PetscFree(values));
1783: /* do not copy values to GPU since they are all zero and not yet needed there */
1784: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1785: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1786: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1787: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1788: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1789: }
1790: PetscCall(PetscFree(cols));
1791: PetscFunctionReturn(PETSC_SUCCESS);
1792: }
1794: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da, Mat J)
1795: {
1796: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1797: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
1798: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
1799: MPI_Comm comm;
1800: PetscScalar *values;
1801: DMBoundaryType bx, by, bz;
1802: DMDAStencilType st;
1803: ISLocalToGlobalMapping ltog;
1805: PetscFunctionBegin;
1806: /*
1807: nc - number of components per grid point
1808: col - number of colors needed in one direction for single component problem
1809: */
1810: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
1811: col = 2 * s + 1;
1813: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
1814: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
1815: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1817: PetscCall(PetscMalloc1(col * col * col, &cols));
1819: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1821: /* determine the matrix preallocation information */
1822: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
1823: for (i = xs; i < xs + nx; i++) {
1824: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1825: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1826: for (j = ys; j < ys + ny; j++) {
1827: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1828: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1829: for (k = zs; k < zs + nz; k++) {
1830: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1831: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1833: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1835: /* Find block columns in block row */
1836: cnt = 0;
1837: for (ii = istart; ii < iend + 1; ii++) {
1838: for (jj = jstart; jj < jend + 1; jj++) {
1839: for (kk = kstart; kk < kend + 1; kk++) {
1840: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1841: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1842: }
1843: }
1844: }
1845: }
1846: PetscCall(MatPreallocateSetLocalBlock(ltog, 1, &slot, ltog, cnt, cols, dnz, onz));
1847: }
1848: }
1849: }
1850: PetscCall(MatSeqBAIJSetPreallocation(J, nc, 0, dnz));
1851: PetscCall(MatMPIBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1852: MatPreallocateEnd(dnz, onz);
1854: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1856: /*
1857: For each node in the grid: we get the neighbors in the local (on processor ordering
1858: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1859: PETSc ordering.
1860: */
1861: if (!da->prealloc_only) {
1862: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
1863: for (i = xs; i < xs + nx; i++) {
1864: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1865: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1866: for (j = ys; j < ys + ny; j++) {
1867: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1868: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1869: for (k = zs; k < zs + nz; k++) {
1870: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
1871: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
1873: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
1875: cnt = 0;
1876: for (ii = istart; ii < iend + 1; ii++) {
1877: for (jj = jstart; jj < jend + 1; jj++) {
1878: for (kk = kstart; kk < kend + 1; kk++) {
1879: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
1880: cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
1881: }
1882: }
1883: }
1884: }
1885: PetscCall(MatSetValuesBlockedLocal(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1886: }
1887: }
1888: }
1889: PetscCall(PetscFree(values));
1890: /* do not copy values to GPU since they are all zero and not yet needed there */
1891: PetscCall(MatBindToCPU(J, PETSC_TRUE));
1892: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1893: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1894: PetscCall(MatBindToCPU(J, PETSC_FALSE));
1895: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
1896: }
1897: PetscCall(PetscFree(cols));
1898: PetscFunctionReturn(PETSC_SUCCESS);
1899: }
1901: /*
1902: This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
1903: identify in the local ordering with periodic domain.
1904: */
1905: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog, PetscInt *row, PetscInt *cnt, PetscInt col[])
1906: {
1907: PetscInt i, n;
1909: PetscFunctionBegin;
1910: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, 1, row, row));
1911: PetscCall(ISLocalToGlobalMappingApplyBlock(ltog, *cnt, col, col));
1912: for (i = 0, n = 0; i < *cnt; i++) {
1913: if (col[i] >= *row) col[n++] = col[i];
1914: }
1915: *cnt = n;
1916: PetscFunctionReturn(PETSC_SUCCESS);
1917: }
1919: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da, Mat J)
1920: {
1921: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
1922: PetscInt m, n, dim, s, *cols, nc, col, cnt, *dnz, *onz;
1923: PetscInt istart, iend, jstart, jend, ii, jj;
1924: MPI_Comm comm;
1925: PetscScalar *values;
1926: DMBoundaryType bx, by;
1927: DMDAStencilType st;
1928: ISLocalToGlobalMapping ltog;
1930: PetscFunctionBegin;
1931: /*
1932: nc - number of components per grid point
1933: col - number of colors needed in one direction for single component problem
1934: */
1935: PetscCall(DMDAGetInfo(da, &dim, &m, &n, NULL, NULL, NULL, NULL, &nc, &s, &bx, &by, NULL, &st));
1936: col = 2 * s + 1;
1938: PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &nx, &ny, NULL));
1939: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, NULL, &gnx, &gny, NULL));
1940: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
1942: PetscCall(PetscMalloc1(col * col * nc * nc, &cols));
1944: PetscCall(DMGetLocalToGlobalMapping(da, <og));
1946: /* determine the matrix preallocation information */
1947: MatPreallocateBegin(comm, nx * ny, nx * ny, dnz, onz);
1948: for (i = xs; i < xs + nx; i++) {
1949: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1950: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1951: for (j = ys; j < ys + ny; j++) {
1952: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1953: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1954: slot = i - gxs + gnx * (j - gys);
1956: /* Find block columns in block row */
1957: cnt = 0;
1958: for (ii = istart; ii < iend + 1; ii++) {
1959: for (jj = jstart; jj < jend + 1; jj++) {
1960: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1961: }
1962: }
1963: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1964: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
1965: }
1966: }
1967: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
1968: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
1969: MatPreallocateEnd(dnz, onz);
1971: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
1973: /*
1974: For each node in the grid: we get the neighbors in the local (on processor ordering
1975: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1976: PETSc ordering.
1977: */
1978: if (!da->prealloc_only) {
1979: PetscCall(PetscCalloc1(col * col * nc * nc, &values));
1980: for (i = xs; i < xs + nx; i++) {
1981: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
1982: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
1983: for (j = ys; j < ys + ny; j++) {
1984: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
1985: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
1986: slot = i - gxs + gnx * (j - gys);
1988: /* Find block columns in block row */
1989: cnt = 0;
1990: for (ii = istart; ii < iend + 1; ii++) {
1991: for (jj = jstart; jj < jend + 1; jj++) {
1992: if (st == DMDA_STENCIL_BOX || !ii || !jj) cols[cnt++] = slot + ii + gnx * jj;
1993: }
1994: }
1995: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
1996: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
1997: }
1998: }
1999: PetscCall(PetscFree(values));
2000: /* do not copy values to GPU since they are all zero and not yet needed there */
2001: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2002: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2003: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2004: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2005: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2006: }
2007: PetscCall(PetscFree(cols));
2008: PetscFunctionReturn(PETSC_SUCCESS);
2009: }
2011: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da, Mat J)
2012: {
2013: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2014: PetscInt m, n, dim, s, *cols, k, nc, col, cnt, p, *dnz, *onz;
2015: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk;
2016: MPI_Comm comm;
2017: PetscScalar *values;
2018: DMBoundaryType bx, by, bz;
2019: DMDAStencilType st;
2020: ISLocalToGlobalMapping ltog;
2022: PetscFunctionBegin;
2023: /*
2024: nc - number of components per grid point
2025: col - number of colors needed in one direction for single component problem
2026: */
2027: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, NULL, NULL, NULL, &nc, &s, &bx, &by, &bz, &st));
2028: col = 2 * s + 1;
2030: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2031: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2032: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2034: /* create the matrix */
2035: PetscCall(PetscMalloc1(col * col * col, &cols));
2037: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2039: /* determine the matrix preallocation information */
2040: MatPreallocateBegin(comm, nx * ny * nz, nx * ny * nz, dnz, onz);
2041: for (i = xs; i < xs + nx; i++) {
2042: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2043: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2044: for (j = ys; j < ys + ny; j++) {
2045: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2046: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2047: for (k = zs; k < zs + nz; k++) {
2048: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2049: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2051: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2053: /* Find block columns in block row */
2054: cnt = 0;
2055: for (ii = istart; ii < iend + 1; ii++) {
2056: for (jj = jstart; jj < jend + 1; jj++) {
2057: for (kk = kstart; kk < kend + 1; kk++) {
2058: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2059: }
2060: }
2061: }
2062: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2063: PetscCall(MatPreallocateSymmetricSetBlock(slot, cnt, cols, dnz, onz));
2064: }
2065: }
2066: }
2067: PetscCall(MatSeqSBAIJSetPreallocation(J, nc, 0, dnz));
2068: PetscCall(MatMPISBAIJSetPreallocation(J, nc, 0, dnz, 0, onz));
2069: MatPreallocateEnd(dnz, onz);
2071: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2073: /*
2074: For each node in the grid: we get the neighbors in the local (on processor ordering
2075: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2076: PETSc ordering.
2077: */
2078: if (!da->prealloc_only) {
2079: PetscCall(PetscCalloc1(col * col * col * nc * nc, &values));
2080: for (i = xs; i < xs + nx; i++) {
2081: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2082: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2083: for (j = ys; j < ys + ny; j++) {
2084: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2085: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2086: for (k = zs; k < zs + nz; k++) {
2087: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2088: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2090: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2092: cnt = 0;
2093: for (ii = istart; ii < iend + 1; ii++) {
2094: for (jj = jstart; jj < jend + 1; jj++) {
2095: for (kk = kstart; kk < kend + 1; kk++) {
2096: if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) cols[cnt++] = slot + ii + gnx * jj + gnx * gny * kk;
2097: }
2098: }
2099: }
2100: PetscCall(L2GFilterUpperTriangular(ltog, &slot, &cnt, cols));
2101: PetscCall(MatSetValuesBlocked(J, 1, &slot, cnt, cols, values, INSERT_VALUES));
2102: }
2103: }
2104: }
2105: PetscCall(PetscFree(values));
2106: /* do not copy values to GPU since they are all zero and not yet needed there */
2107: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2108: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2109: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2110: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2111: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2112: }
2113: PetscCall(PetscFree(cols));
2114: PetscFunctionReturn(PETSC_SUCCESS);
2115: }
2117: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da, Mat J)
2118: {
2119: PetscInt xs, ys, nx, ny, i, j, slot, gxs, gys, gnx, gny;
2120: PetscInt m, n, dim, s, *cols, k, nc, row, col, cnt, maxcnt = 0, l, p, *dnz, *onz;
2121: PetscInt istart, iend, jstart, jend, kstart, kend, zs, nz, gzs, gnz, ii, jj, kk, M, N, P;
2122: DM_DA *dd = (DM_DA *)da->data;
2123: PetscInt ifill_col, *dfill = dd->dfill, *ofill = dd->ofill;
2124: MPI_Comm comm;
2125: PetscScalar *values;
2126: DMBoundaryType bx, by, bz;
2127: ISLocalToGlobalMapping ltog;
2128: DMDAStencilType st;
2129: PetscBool removedups = PETSC_FALSE;
2131: PetscFunctionBegin;
2132: /*
2133: nc - number of components per grid point
2134: col - number of colors needed in one direction for single component problem
2135: */
2136: PetscCall(DMDAGetInfo(da, &dim, &m, &n, &p, &M, &N, &P, &nc, &s, &bx, &by, &bz, &st));
2137: col = 2 * s + 1;
2139: /*
2140: With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2141: because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2142: */
2143: if (M == 1 && 2 * s >= m) removedups = PETSC_TRUE;
2144: if (N == 1 && 2 * s >= n) removedups = PETSC_TRUE;
2145: if (P == 1 && 2 * s >= p) removedups = PETSC_TRUE;
2147: PetscCall(DMDAGetCorners(da, &xs, &ys, &zs, &nx, &ny, &nz));
2148: PetscCall(DMDAGetGhostCorners(da, &gxs, &gys, &gzs, &gnx, &gny, &gnz));
2149: PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
2151: PetscCall(PetscMalloc1(col * col * col * nc, &cols));
2152: PetscCall(DMGetLocalToGlobalMapping(da, <og));
2154: /* determine the matrix preallocation information */
2155: MatPreallocateBegin(comm, nc * nx * ny * nz, nc * nx * ny * nz, dnz, onz);
2157: PetscCall(MatSetBlockSize(J, nc));
2158: for (i = xs; i < xs + nx; i++) {
2159: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2160: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2161: for (j = ys; j < ys + ny; j++) {
2162: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2163: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2164: for (k = zs; k < zs + nz; k++) {
2165: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2166: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2168: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2170: for (l = 0; l < nc; l++) {
2171: cnt = 0;
2172: for (ii = istart; ii < iend + 1; ii++) {
2173: for (jj = jstart; jj < jend + 1; jj++) {
2174: for (kk = kstart; kk < kend + 1; kk++) {
2175: if (ii || jj || kk) {
2176: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2177: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2178: }
2179: } else {
2180: if (dfill) {
2181: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2182: } else {
2183: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2184: }
2185: }
2186: }
2187: }
2188: }
2189: row = l + nc * (slot);
2190: maxcnt = PetscMax(maxcnt, cnt);
2191: if (removedups) PetscCall(MatPreallocateSetLocalRemoveDups(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2192: else PetscCall(MatPreallocateSetLocal(ltog, 1, &row, ltog, cnt, cols, dnz, onz));
2193: }
2194: }
2195: }
2196: }
2197: PetscCall(MatSeqAIJSetPreallocation(J, 0, dnz));
2198: PetscCall(MatMPIAIJSetPreallocation(J, 0, dnz, 0, onz));
2199: MatPreallocateEnd(dnz, onz);
2200: PetscCall(MatSetLocalToGlobalMapping(J, ltog, ltog));
2202: /*
2203: For each node in the grid: we get the neighbors in the local (on processor ordering
2204: that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2205: PETSc ordering.
2206: */
2207: if (!da->prealloc_only) {
2208: PetscCall(PetscCalloc1(maxcnt, &values));
2209: for (i = xs; i < xs + nx; i++) {
2210: istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -i));
2211: iend = (bx == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, m - i - 1));
2212: for (j = ys; j < ys + ny; j++) {
2213: jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -j));
2214: jend = (by == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, n - j - 1));
2215: for (k = zs; k < zs + nz; k++) {
2216: kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s, -k));
2217: kend = (bz == DM_BOUNDARY_PERIODIC) ? s : (PetscMin(s, p - k - 1));
2219: slot = i - gxs + gnx * (j - gys) + gnx * gny * (k - gzs);
2221: for (l = 0; l < nc; l++) {
2222: cnt = 0;
2223: for (ii = istart; ii < iend + 1; ii++) {
2224: for (jj = jstart; jj < jend + 1; jj++) {
2225: for (kk = kstart; kk < kend + 1; kk++) {
2226: if (ii || jj || kk) {
2227: if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) { /* entries on star*/
2228: for (ifill_col = ofill[l]; ifill_col < ofill[l + 1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2229: }
2230: } else {
2231: if (dfill) {
2232: for (ifill_col = dfill[l]; ifill_col < dfill[l + 1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2233: } else {
2234: for (ifill_col = 0; ifill_col < nc; ifill_col++) cols[cnt++] = ifill_col + nc * (slot + ii + gnx * jj + gnx * gny * kk);
2235: }
2236: }
2237: }
2238: }
2239: }
2240: row = l + nc * (slot);
2241: PetscCall(MatSetValuesLocal(J, 1, &row, cnt, cols, values, INSERT_VALUES));
2242: }
2243: }
2244: }
2245: }
2246: PetscCall(PetscFree(values));
2247: /* do not copy values to GPU since they are all zero and not yet needed there */
2248: PetscCall(MatBindToCPU(J, PETSC_TRUE));
2249: PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
2250: PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
2251: PetscCall(MatBindToCPU(J, PETSC_FALSE));
2252: PetscCall(MatSetOption(J, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2253: }
2254: PetscCall(PetscFree(cols));
2255: PetscFunctionReturn(PETSC_SUCCESS);
2256: }