Actual source code: stag2d.c

  1: /* Functions specific to the 2-dimensional implementation of DMStag */
  2: #include <petsc/private/dmstagimpl.h>

  4: /*@
  5:   DMStagCreate2d - Create an object to manage data living on the elements, faces, and vertices of a parallelized regular 2D grid.

  7:   Collective

  9:   Input Parameters:
 10: + comm         - MPI communicator
 11: . bndx         - x boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or
 12: `DM_BOUNDARY_GHOSTED`
 13: . bndy         - y boundary type, `DM_BOUNDARY_NONE`, `DM_BOUNDARY_PERIODIC`, or `DM_BOUNDARY_GHOSTED`
 14: . M            - global number of elements in x direction
 15: . N            - global number of elements in y direction
 16: . m            - number of ranks in the x direction (may be `PETSC_DECIDE`)
 17: . n            - number of ranks in the y direction (may be `PETSC_DECIDE`)
 18: . dof0         - number of degrees of freedom per vertex/0-cell
 19: . dof1         - number of degrees of freedom per face/1-cell
 20: . dof2         - number of degrees of freedom per element/2-cell
 21: . stencilType  - ghost/halo region type: `DMSTAG_STENCIL_NONE`, `DMSTAG_STENCIL_BOX`, or `DMSTAG_STENCIL_STAR`
 22: . stencilWidth - width, in elements, of halo/ghost region
 23: . lx           - array of local x element counts, of length equal to `m`, summing to `M`, or `NULL`
 24: - ly           - array of local y element counts, of length equal to `n`, summing to `N`, or `NULL`

 26:   Output Parameter:
 27: . dm - the new `DMSTAG` object

 29:   Options Database Keys:
 30: + -dm_view                                      - calls `DMViewFromOptions()` at the conclusion of `DMSetUp()`
 31: . -stag_grid_x <nx>                             - number of elements in the x direction
 32: . -stag_grid_y <ny>                             - number of elements in the y direction
 33: . -stag_ranks_x <rx>                            - number of ranks in the x direction
 34: . -stag_ranks_y <ry>                            - number of ranks in the y direction
 35: . -stag_ghost_stencil_width                     - width of ghost region, in elements
 36: . -stag_boundary_type_x <none,ghosted,periodic> - `DMBoundaryType` value
 37: - -stag_boundary_type_y <none,ghosted,periodic> - `DMBoundaryType` value

 39:   Level: beginner

 41:   Notes:
 42:   You must call `DMSetUp()` after this call, before using the `DM`.
 43:   If you wish to use the options database (see the keys above) to change values in the `DMSTAG`, you must call
 44:   `DMSetFromOptions()` after this function but before `DMSetUp()`.

 46: .seealso: [](ch_stag), `DMSTAG`, `DMStagCreate1d()`, `DMStagCreate3d()`, `DMDestroy()`, `DMView()`, `DMCreateGlobalVector()`, `DMCreateLocalVector()`, `DMLocalToGlobalBegin()`, `DMDACreate2d()`
 47: @*/
 48: PetscErrorCode DMStagCreate2d(MPI_Comm comm, DMBoundaryType bndx, DMBoundaryType bndy, PetscInt M, PetscInt N, PetscInt m, PetscInt n, PetscInt dof0, PetscInt dof1, PetscInt dof2, DMStagStencilType stencilType, PetscInt stencilWidth, const PetscInt lx[], const PetscInt ly[], DM *dm)
 49: {
 50:   PetscFunctionBegin;
 51:   PetscCall(DMCreate(comm, dm));
 52:   PetscCall(DMSetDimension(*dm, 2));
 53:   PetscCall(DMStagInitialize(bndx, bndy, DM_BOUNDARY_NONE, M, N, 0, m, n, 0, dof0, dof1, dof2, 0, stencilType, stencilWidth, lx, ly, NULL, *dm));
 54:   PetscFunctionReturn(PETSC_SUCCESS);
 55: }

 57: PETSC_INTERN PetscErrorCode DMStagRestrictSimple_2d(DM dmf, Vec xf_local, DM dmc, Vec xc_local)
 58: {
 59:   PetscInt             Mf, Nf, Mc, Nc, factorx, factory, dof[3];
 60:   PetscInt             xc, yc, mc, nc, nExtraxc, nExtrayc, i, j, d;
 61:   PetscInt             idownleftf, ileftf, idownf, ielemf, idownleftc, ileftc, idownc, ielemc;
 62:   const PetscScalar ***arrf;
 63:   PetscScalar       ***arrc;

 65:   PetscFunctionBegin;
 66:   PetscCall(DMStagGetGlobalSizes(dmf, &Mf, &Nf, NULL));
 67:   PetscCall(DMStagGetGlobalSizes(dmc, &Mc, &Nc, NULL));
 68:   factorx = Mf / Mc;
 69:   factory = Nf / Nc;
 70:   PetscCall(DMStagGetDOF(dmc, &dof[0], &dof[1], &dof[2], NULL));

 72:   PetscCall(DMStagGetCorners(dmc, &xc, &yc, NULL, &mc, &nc, NULL, &nExtraxc, &nExtrayc, NULL));
 73:   PetscCall(VecZeroEntries(xc_local));
 74:   PetscCall(DMStagVecGetArray(dmf, xf_local, &arrf));
 75:   PetscCall(DMStagVecGetArray(dmc, xc_local, &arrc));
 76:   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN_LEFT, 0, &idownleftf));
 77:   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_LEFT, 0, &ileftf));
 78:   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_DOWN, 0, &idownf));
 79:   PetscCall(DMStagGetLocationSlot(dmf, DMSTAG_ELEMENT, 0, &ielemf));
 80:   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN_LEFT, 0, &idownleftc));
 81:   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_LEFT, 0, &ileftc));
 82:   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_DOWN, 0, &idownc));
 83:   PetscCall(DMStagGetLocationSlot(dmc, DMSTAG_ELEMENT, 0, &ielemc));

 85:   for (d = 0; d < dof[0]; ++d)
 86:     for (j = yc; j < yc + nc + nExtrayc; ++j)
 87:       for (i = xc; i < xc + mc + nExtraxc; ++i) {
 88:         const PetscInt ii = factorx * i, jj = factory * j;

 90:         arrc[j][i][idownleftc + d] = arrf[jj][ii][idownleftf + d];
 91:       }

 93:   for (d = 0; d < dof[1]; ++d)
 94:     for (j = yc; j < yc + nc; ++j)
 95:       for (i = xc; i < xc + mc + nExtraxc; ++i) {
 96:         const PetscInt ii = factorx * i, jj = factory * j + factory / 2;

 98:         if (factory % 2 == 0) arrc[j][i][ileftc + d] = 0.5 * (arrf[jj - 1][ii][ileftf + d] + arrf[jj][ii][ileftf + d]);
 99:         else arrc[j][i][ileftc + d] = arrf[jj][ii][ileftf + d];
100:       }

102:   for (d = 0; d < dof[1]; ++d)
103:     for (j = yc; j < yc + nc + nExtrayc; ++j)
104:       for (i = xc; i < xc + mc; ++i) {
105:         const PetscInt ii = factorx * i + factorx / 2, jj = factory * j;

107:         if (factorx % 2 == 0) arrc[j][i][idownc + d] = 0.5 * (arrf[jj][ii - 1][idownf + d] + arrf[jj][ii][idownf + d]);
108:         else arrc[j][i][idownc + d] = arrf[jj][ii][idownf + d];
109:       }

111:   for (d = 0; d < dof[2]; ++d)
112:     for (j = yc; j < yc + nc; ++j)
113:       for (i = xc; i < xc + mc; ++i) {
114:         const PetscInt ii = factorx * i + factorx / 2, jj = factory * j + factory / 2;

116:         if (factorx % 2 == 0 && factory % 2 == 0) arrc[j][i][ielemc + d] = 0.25 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d] + arrf[jj][ii][ielemf + d]);
117:         else if (factorx % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj][ii - 1][ielemf + d]);
118:         else if (factory % 2 == 0) arrc[j][i][ielemc + d] = 0.5 * (arrf[jj - 1][ii - 1][ielemf + d] + arrf[jj - 1][ii][ielemf + d]);
119:         else arrc[j][i][ielemc + d] = arrf[jj][ii][ielemf + d];
120:       }

122:   PetscCall(DMStagVecRestoreArray(dmf, xf_local, &arrf));
123:   PetscCall(DMStagVecRestoreArray(dmc, xc_local, &arrc));
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: PETSC_INTERN PetscErrorCode DMStagSetUniformCoordinatesExplicit_2d(DM dm, PetscReal xmin, PetscReal xmax, PetscReal ymin, PetscReal ymax)
128: {
129:   DM_Stag       *stagCoord;
130:   DM             dmCoord;
131:   Vec            coordLocal;
132:   PetscReal      h[2], min[2];
133:   PetscScalar ***arr;
134:   PetscInt       ind[2], start_ghost[2], n_ghost[2], s, c;
135:   PetscInt       idownleft, idown, ileft, ielement;

137:   PetscFunctionBegin;
138:   PetscCall(DMGetCoordinateDM(dm, &dmCoord));
139:   stagCoord = (DM_Stag *)dmCoord->data;
140:   for (s = 0; s < 3; ++s) {
141:     PetscCheck(stagCoord->dof[s] == 0 || stagCoord->dof[s] == 2, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Coordinate DM in 2 dimensions must have 0 or 2 dof on each stratum, but stratum %" PetscInt_FMT " has %" PetscInt_FMT " dof", s,
142:                stagCoord->dof[s]);
143:   }
144:   PetscCall(DMCreateLocalVector(dmCoord, &coordLocal));

146:   PetscCall(DMStagVecGetArray(dmCoord, coordLocal, &arr));
147:   if (stagCoord->dof[0]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN_LEFT, 0, &idownleft));
148:   if (stagCoord->dof[1]) {
149:     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_DOWN, 0, &idown));
150:     PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_LEFT, 0, &ileft));
151:   }
152:   if (stagCoord->dof[2]) PetscCall(DMStagGetLocationSlot(dmCoord, DMSTAG_ELEMENT, 0, &ielement));
153:   PetscCall(DMStagGetGhostCorners(dmCoord, &start_ghost[0], &start_ghost[1], NULL, &n_ghost[0], &n_ghost[1], NULL));

155:   min[0] = xmin;
156:   min[1] = ymin;
157:   h[0]   = (xmax - xmin) / stagCoord->N[0];
158:   h[1]   = (ymax - ymin) / stagCoord->N[1];

160:   for (ind[1] = start_ghost[1]; ind[1] < start_ghost[1] + n_ghost[1]; ++ind[1]) {
161:     for (ind[0] = start_ghost[0]; ind[0] < start_ghost[0] + n_ghost[0]; ++ind[0]) {
162:       if (stagCoord->dof[0]) {
163:         const PetscReal offs[2] = {0.0, 0.0};
164:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idownleft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
165:       }
166:       if (stagCoord->dof[1]) {
167:         const PetscReal offs[2] = {0.5, 0.0};
168:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][idown + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
169:       }
170:       if (stagCoord->dof[1]) {
171:         const PetscReal offs[2] = {0.0, 0.5};
172:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ileft + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
173:       }
174:       if (stagCoord->dof[2]) {
175:         const PetscReal offs[2] = {0.5, 0.5};
176:         for (c = 0; c < 2; ++c) arr[ind[1]][ind[0]][ielement + c] = min[c] + ((PetscReal)ind[c] + offs[c]) * h[c];
177:       }
178:     }
179:   }
180:   PetscCall(DMStagVecRestoreArray(dmCoord, coordLocal, &arr));
181:   PetscCall(DMSetCoordinatesLocal(dm, coordLocal));
182:   PetscCall(VecDestroy(&coordLocal));
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: /* Helper functions used in DMSetUp_Stag() */
187: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM);
188: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM);
189: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM, PetscInt **);
190: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM);

192: PETSC_INTERN PetscErrorCode DMSetUp_Stag_2d(DM dm)
193: {
194:   DM_Stag *const stag = (DM_Stag *)dm->data;
195:   PetscMPIInt    size, rank;
196:   PetscInt       i, j, d, entriesPerElementRowGhost, entriesPerCorner, entriesPerFace, entriesPerElementRow;
197:   MPI_Comm       comm;
198:   PetscInt      *globalOffsets;
199:   PetscBool      star, dummyStart[2], dummyEnd[2];
200:   const PetscInt dim = 2;

202:   PetscFunctionBegin;
203:   PetscCall(PetscObjectGetComm((PetscObject)dm, &comm));
204:   PetscCallMPI(MPI_Comm_size(comm, &size));
205:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

207:   /* Rank grid sizes (populates stag->nRanks) */
208:   PetscCall(DMStagSetUpBuildRankGrid_2d(dm));

210:   /* Determine location of rank in grid (these get extra boundary points on the last element)
211:      Order is x-fast, as usual */
212:   stag->rank[0] = rank % stag->nRanks[0];
213:   stag->rank[1] = rank / stag->nRanks[0];
214:   for (i = 0; i < dim; ++i) {
215:     stag->firstRank[i] = PetscNot(stag->rank[i]);
216:     stag->lastRank[i]  = (PetscBool)(stag->rank[i] == stag->nRanks[i] - 1);
217:   }

219:   /* Determine Locally owned region

221:    Divide equally, giving lower ranks in each dimension and extra element if needbe.

223:    Note that this uses O(P) storage. If this ever becomes an issue, this could
224:    be refactored to not keep this data around.  */
225:   for (i = 0; i < dim; ++i) {
226:     if (!stag->l[i]) {
227:       const PetscInt Ni = stag->N[i], nRanksi = stag->nRanks[i];
228:       PetscCall(PetscMalloc1(stag->nRanks[i], &stag->l[i]));
229:       for (j = 0; j < stag->nRanks[i]; ++j) stag->l[i][j] = Ni / nRanksi + ((Ni % nRanksi) > j);
230:     }
231:   }

233:   /* Retrieve local size in stag->n */
234:   for (i = 0; i < dim; ++i) stag->n[i] = stag->l[i][stag->rank[i]];
235:   if (PetscDefined(USE_DEBUG)) {
236:     for (i = 0; i < dim; ++i) {
237:       PetscInt Ncheck, j;
238:       Ncheck = 0;
239:       for (j = 0; j < stag->nRanks[i]; ++j) Ncheck += stag->l[i][j];
240:       PetscCheck(Ncheck == stag->N[i], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local sizes in dimension %" PetscInt_FMT " don't add up. %" PetscInt_FMT " != %" PetscInt_FMT, i, Ncheck, stag->N[i]);
241:     }
242:   }

244:   /* Compute starting elements */
245:   for (i = 0; i < dim; ++i) {
246:     stag->start[i] = 0;
247:     for (j = 0; j < stag->rank[i]; ++j) stag->start[i] += stag->l[i][j];
248:   }

250:   /* Determine ranks of neighbors, using DMDA's convention

252:      n6 n7 n8
253:      n3    n5
254:      n0 n1 n2                                               */
255:   PetscCall(DMStagSetUpBuildNeighbors_2d(dm));

257:   /* Determine whether the ghost region includes dummies or not. This is currently
258:        equivalent to having a non-periodic boundary. If not, then
259:        ghostOffset{Start,End}[d] elements correspond to elements on the neighbor.
260:        If true, then
261:        - at the start, there are ghostOffsetStart[d] ghost elements
262:        - at the end, there is a layer of extra "physical" points inside a layer of
263:          ghostOffsetEnd[d] ghost elements
264:        Note that this computation should be updated if any boundary types besides
265:        NONE, GHOSTED, and PERIODIC are supported.  */
266:   for (d = 0; d < 2; ++d) dummyStart[d] = (PetscBool)(stag->firstRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
267:   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);

269:   /* Define useful sizes */
270:   stag->entriesPerElement = stag->dof[0] + 2 * stag->dof[1] + stag->dof[2];
271:   entriesPerFace          = stag->dof[0] + stag->dof[1];
272:   entriesPerCorner        = stag->dof[0];
273:   entriesPerElementRow    = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
274:   stag->entries           = stag->n[1] * entriesPerElementRow + (dummyEnd[1] ? stag->n[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);

276:   /* Compute offsets for each rank into global vectors
277:      This again requires O(P) storage, which could be replaced with some global
278:      communication.  */
279:   PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsets));

281:   for (d = 0; d < dim; ++d)
282:     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type");

284:   /* Define ghosted/local sizes */
285:   if (stag->stencilType != DMSTAG_STENCIL_NONE && (stag->n[0] < stag->stencilWidth || stag->n[1] < stag->stencilWidth)) {
286:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "DMStag 2d setup does not support local sizes (%" PetscInt_FMT " x %" PetscInt_FMT ") smaller than the elementwise stencil width (%" PetscInt_FMT ")", stag->n[0], stag->n[1], stag->stencilWidth);
287:   }
288:   for (d = 0; d < dim; ++d) {
289:     switch (stag->boundaryType[d]) {
290:     case DM_BOUNDARY_NONE:
291:       /* Note: for a elements-only DMStag, the extra elements on the faces aren't necessary but we include them anyway */
292:       switch (stag->stencilType) {
293:       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
294:         stag->nGhost[d]     = stag->n[d];
295:         stag->startGhost[d] = stag->start[d];
296:         if (stag->lastRank[d]) stag->nGhost[d] += 1;
297:         break;
298:       case DMSTAG_STENCIL_STAR: /* allocate the corners but don't use them */
299:       case DMSTAG_STENCIL_BOX:
300:         stag->nGhost[d]     = stag->n[d];
301:         stag->startGhost[d] = stag->start[d];
302:         if (!stag->firstRank[d]) {
303:           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
304:           stag->startGhost[d] -= stag->stencilWidth;
305:         }
306:         if (!stag->lastRank[d]) {
307:           stag->nGhost[d] += stag->stencilWidth; /* add interior ghost elements */
308:         } else {
309:           stag->nGhost[d] += 1; /* one element on the boundary to complete blocking */
310:         }
311:         break;
312:       default:
313:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
314:       }
315:       break;
316:     case DM_BOUNDARY_GHOSTED:
317:       switch (stag->stencilType) {
318:       case DMSTAG_STENCIL_NONE:
319:         stag->startGhost[d] = stag->start[d];
320:         stag->nGhost[d]     = stag->n[d] + (stag->lastRank[d] ? 1 : 0);
321:         break;
322:       case DMSTAG_STENCIL_STAR:
323:       case DMSTAG_STENCIL_BOX:
324:         stag->startGhost[d] = stag->start[d] - stag->stencilWidth; /* This value may be negative */
325:         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth + (stag->lastRank[d] && stag->stencilWidth == 0 ? 1 : 0);
326:         break;
327:       default:
328:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
329:       }
330:       break;
331:     case DM_BOUNDARY_PERIODIC:
332:       switch (stag->stencilType) {
333:       case DMSTAG_STENCIL_NONE: /* only the extra one on the right/top faces */
334:         stag->nGhost[d]     = stag->n[d];
335:         stag->startGhost[d] = stag->start[d];
336:         break;
337:       case DMSTAG_STENCIL_STAR:
338:       case DMSTAG_STENCIL_BOX:
339:         stag->nGhost[d]     = stag->n[d] + 2 * stag->stencilWidth;
340:         stag->startGhost[d] = stag->start[d] - stag->stencilWidth;
341:         break;
342:       default:
343:         SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unrecognized ghost stencil type %d", stag->stencilType);
344:       }
345:       break;
346:     default:
347:       SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported boundary type in dimension %" PetscInt_FMT, d);
348:     }
349:   }
350:   stag->entriesGhost        = stag->nGhost[0] * stag->nGhost[1] * stag->entriesPerElement;
351:   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;

353:   /* Create global-->local VecScatter and local->global ISLocalToGlobalMapping

355:      We iterate over all local points twice. First, we iterate over each neighbor, populating
356:      1. idxLocal[] : the subset of points, in local numbering ("S" from 0 on all points including ghosts), which correspond to global points. That is, the set of all non-dummy points in the ghosted representation
357:      2. idxGlobal[]: the corresponding global points, in global numbering (Nested "S"s - ranks then non-ghost points in each rank)

359:      Next, we iterate over all points in the local ordering, populating
360:      3. idxGlobalAll[] : entry i is the global point corresponding to local point i, or -1 if local point i is a dummy.

362:      Note further here that the local/ghosted vectors:
363:      - Are always an integral number of elements-worth of points, in all directions.
364:      - Contain three flavors of points:
365:      1. Points which "live here" in the global representation
366:      2. Ghost points which correspond to points on other ranks in the global representation
367:      3. Ghost points, which we call "dummy points," which do not correspond to any point in the global representation

369:      Dummy ghost points arise in at least three ways:
370:      1. As padding for the right, top, and front physical boundaries, to complete partial elements
371:      2. As unused space in the "corners" on interior ranks when using a star stencil
372:      3. As additional work space on all physical boundaries, when DM_BOUNDARY_GHOSTED is used

374:      Note that, because of the boundary dummies,
375:      with a stencil width of zero, on 1 rank, local and global vectors
376:      are still different!

378:      We assume that the size on each rank is greater than or equal to the
379:      stencil width.
380:      */

382:   /* Check stencil type */
383:   PetscCheck(stag->stencilType == DMSTAG_STENCIL_NONE || stag->stencilType == DMSTAG_STENCIL_BOX || stag->stencilType == DMSTAG_STENCIL_STAR, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Unsupported stencil type %s", DMStagStencilTypes[stag->stencilType]);
384:   star = (PetscBool)(stag->stencilType == DMSTAG_STENCIL_STAR || stag->stencilType == DMSTAG_STENCIL_NONE);

386:   {
387:     PetscInt *idxLocal, *idxGlobal, *idxGlobalAll;
388:     PetscInt  count, countAll, entriesToTransferTotal, i, j, d, ghostOffsetStart[2], ghostOffsetEnd[2];
389:     IS        isLocal, isGlobal;
390:     PetscInt  jghost, ighost;
391:     PetscInt  nNeighbors[9][2];
392:     PetscBool nextToDummyEnd[2];

394:     /* Compute numbers of elements on each neighbor */
395:     for (i = 0; i < 9; ++i) {
396:       const PetscInt neighborRank = stag->neighbors[i];
397:       if (neighborRank >= 0) { /* note we copy the values for our own rank (neighbor 4) */
398:         nNeighbors[i][0] = stag->l[0][neighborRank % stag->nRanks[0]];
399:         nNeighbors[i][1] = stag->l[1][neighborRank / stag->nRanks[0]];
400:       } else {
401:         nNeighbors[i][0] = 0;
402:         nNeighbors[i][1] = 0;
403:       }
404:     }

406:     /* These offsets should always be non-negative, and describe how many
407:        ghost elements exist at each boundary. These are not always equal to the stencil width,
408:        because we may have different numbers of ghost elements at the boundaries. In particular,
409:        we always have at least one ghost (dummy) element at the right/top/front. */
410:     for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
411:     for (d = 0; d < 2; ++d) ghostOffsetEnd[d] = stag->startGhost[d] + stag->nGhost[d] - (stag->start[d] + stag->n[d]);

413:     /* Compute whether the next rank has an extra point (only used in x direction) */
414:     for (d = 0; d < 2; ++d) nextToDummyEnd[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC && stag->rank[d] == stag->nRanks[d] - 2);

416:     /* Compute the number of local entries which correspond to any global entry */
417:     {
418:       PetscInt nNonDummyGhost[2];
419:       for (d = 0; d < 2; ++d) nNonDummyGhost[d] = stag->nGhost[d] - (dummyStart[d] ? ghostOffsetStart[d] : 0) - (dummyEnd[d] ? ghostOffsetEnd[d] : 0);
420:       if (star) {
421:         entriesToTransferTotal = (nNonDummyGhost[0] * stag->n[1] + stag->n[0] * nNonDummyGhost[1] - stag->n[0] * stag->n[1]) * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
422:       } else {
423:         entriesToTransferTotal = nNonDummyGhost[0] * nNonDummyGhost[1] * stag->entriesPerElement + (dummyEnd[0] ? nNonDummyGhost[1] * entriesPerFace : 0) + (dummyEnd[1] ? nNonDummyGhost[0] * entriesPerFace : 0) + (dummyEnd[0] && dummyEnd[1] ? entriesPerCorner : 0);
424:       }
425:     }

427:     /* Allocate arrays to populate */
428:     PetscCall(PetscMalloc1(entriesToTransferTotal, &idxLocal));
429:     PetscCall(PetscMalloc1(entriesToTransferTotal, &idxGlobal));

431:     /* Counts into idxLocal/idxGlobal */
432:     count = 0;

434:     /* Here and below, we work with (i,j) describing element numbers within a neighboring rank's global ordering,
435:        to be offset by that rank's global offset,
436:        and (ighost,jghost) referring to element numbers within this ranks local (ghosted) ordering */

438:     /* Neighbor 0 (down left) */
439:     if (!star && !dummyStart[0] && !dummyStart[1]) {
440:       const PetscInt        neighbor                     = 0;
441:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
442:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
443:       const PetscInt        entriesPerElementRowNeighbor = stag->entriesPerElement * nNeighbor[0];
444:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
445:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
446:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
447:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
448:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
449:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
450:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
451:           }
452:         }
453:       }
454:     }

456:     /* Neighbor 1 (down) */
457:     if (!dummyStart[1]) {
458:       /* We may be a ghosted boundary in x, in which case the neighbor is also */
459:       const PetscInt        neighbor                     = 1;
460:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
461:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
462:       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
463:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
464:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
465:         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
466:           const PetscInt i = ighost - ghostOffsetStart[0];
467:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
468:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
469:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
470:           }
471:         }
472:         if (dummyEnd[0]) {
473:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
474:           const PetscInt i      = stag->n[0];
475:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
476:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
477:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
478:           }
479:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
480:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
481:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
482:           }
483:         }
484:       }
485:     }

487:     /* Neighbor 2 (down right) */
488:     if (!star && !dummyEnd[0] && !dummyStart[1]) {
489:       const PetscInt        neighbor                     = 2;
490:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
491:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
492:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
493:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
494:         const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost;
495:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
496:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
497:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
498:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
499:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
500:           }
501:         }
502:       }
503:     }

505:     /* Neighbor 3 (left) */
506:     if (!dummyStart[0]) {
507:       /* Our neighbor is never a ghosted boundary in x, but we may be
508:          Here, we may be a ghosted boundary in y and thus so will our neighbor be */
509:       const PetscInt        neighbor                     = 3;
510:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
511:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
512:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
513:       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
514:         const PetscInt j = jghost - ghostOffsetStart[1];
515:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
516:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
517:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
518:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
519:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
520:           }
521:         }
522:       }
523:       if (dummyEnd[1]) {
524:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
525:         const PetscInt j      = stag->n[1];
526:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
527:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
528:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
529:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* i moves by face here */
530:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
531:           }
532:         }
533:       }
534:     }

536:     /* Interior/Resident-here-in-global elements ("Neighbor 4" - same rank)
537:        *including* entries from boundary dummy elements */
538:     {
539:       const PetscInt neighbor     = 4;
540:       const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
541:       for (j = 0; j < stag->n[1]; ++j) {
542:         const PetscInt jghost = j + ghostOffsetStart[1];
543:         for (i = 0; i < stag->n[0]; ++i) {
544:           const PetscInt ighost = i + ghostOffsetStart[0];
545:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
546:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
547:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
548:           }
549:         }
550:         if (dummyEnd[0]) {
551:           const PetscInt ighost = i + ghostOffsetStart[0];
552:           i                     = stag->n[0];
553:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
554:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
555:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
556:           }
557:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
558:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
559:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
560:           }
561:         }
562:       }
563:       if (dummyEnd[1]) {
564:         const PetscInt jghost = j + ghostOffsetStart[1];
565:         j                     = stag->n[1];
566:         for (i = 0; i < stag->n[0]; ++i) {
567:           const PetscInt ighost = i + ghostOffsetStart[0];
568:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
569:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
570:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
571:           }
572:         }
573:         if (dummyEnd[0]) {
574:           const PetscInt ighost = i + ghostOffsetStart[0];
575:           i                     = stag->n[0];
576:           for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
577:             idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
578:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
579:           }
580:         }
581:       }
582:     }

584:     /* Neighbor 5 (right) */
585:     if (!dummyEnd[0]) {
586:       /* We can never be right boundary, but we may be a top boundary, along with the right neighbor */
587:       const PetscInt        neighbor                     = 5;
588:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
589:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
590:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
591:       for (jghost = ghostOffsetStart[1]; jghost < stag->nGhost[1] - ghostOffsetEnd[1]; ++jghost) {
592:         const PetscInt j = jghost - ghostOffsetStart[1];
593:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
594:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
595:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
596:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
597:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
598:           }
599:         }
600:       }
601:       if (dummyEnd[1]) {
602:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1];
603:         const PetscInt j      = nNeighbor[1];
604:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
605:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
606:           for (d = 0; d < entriesPerFace; ++d, ++count) {                                                /* only vertices and horizontal face (which are the first dof) */
607:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * entriesPerFace + d; /* Note i increment by entriesPerFace */
608:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
609:           }
610:         }
611:       }
612:     }

614:     /* Neighbor 6 (up left) */
615:     if (!star && !dummyStart[0] && !dummyEnd[1]) {
616:       /* We can never be a top boundary, but our neighbor may be
617:        We may be a right boundary, but our neighbor cannot be */
618:       const PetscInt        neighbor                     = 6;
619:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
620:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
621:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement;
622:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
623:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
624:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
625:           const PetscInt i = nNeighbor[0] - ghostOffsetStart[0] + ighost;
626:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
627:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
628:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
629:           }
630:         }
631:       }
632:     }

634:     /* Neighbor 7 (up) */
635:     if (!dummyEnd[1]) {
636:       /* We cannot be the last rank in y, though our neighbor may be
637:        We may be the last rank in x, in which case our neighbor is also */
638:       const PetscInt        neighbor                     = 7;
639:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
640:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
641:       const PetscInt        entriesPerElementRowNeighbor = entriesPerElementRow; /* same as here */
642:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
643:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
644:         for (ighost = ghostOffsetStart[0]; ighost < stag->nGhost[0] - ghostOffsetEnd[0]; ++ighost) {
645:           const PetscInt i = ighost - ghostOffsetStart[0];
646:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
647:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
648:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
649:           }
650:         }
651:         if (dummyEnd[0]) {
652:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0];
653:           const PetscInt i      = nNeighbor[0];
654:           for (d = 0; d < stag->dof[0]; ++d, ++count) { /* Vertex */
655:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
656:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
657:           }
658:           for (d = 0; d < stag->dof[1]; ++d, ++count) { /* Face */
659:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + stag->dof[0] + d;
660:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
661:           }
662:         }
663:       }
664:     }

666:     /* Neighbor 8 (up right) */
667:     if (!star && !dummyEnd[0] && !dummyEnd[1]) {
668:       /* We can never be a ghosted boundary
669:          Our neighbor may be a top boundary, a right boundary, or both */
670:       const PetscInt        neighbor                     = 8;
671:       const PetscInt        globalOffset                 = globalOffsets[stag->neighbors[neighbor]];
672:       const PetscInt *const nNeighbor                    = nNeighbors[neighbor];
673:       const PetscInt        entriesPerElementRowNeighbor = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
674:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
675:         const PetscInt jghost = stag->nGhost[1] - ghostOffsetEnd[1] + j;
676:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
677:           const PetscInt ighost = stag->nGhost[0] - ghostOffsetEnd[0] + i;
678:           for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
679:             idxGlobal[count] = globalOffset + j * entriesPerElementRowNeighbor + i * stag->entriesPerElement + d;
680:             idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
681:           }
682:         }
683:       }
684:     }

686:     PetscCheck(count == entriesToTransferTotal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in gtol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, entriesToTransferTotal);

688:     /* Create Local and Global ISs (transferring pointer ownership) */
689:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxLocal, PETSC_OWN_POINTER, &isLocal));
690:     PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), entriesToTransferTotal, idxGlobal, PETSC_OWN_POINTER, &isGlobal));

692:     /* Create stag->gtol. The order is computed as PETSc ordering, and doesn't include dummy entries */
693:     {
694:       Vec local, global;
695:       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
696:       PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, stag->entriesPerElement, stag->entriesGhost, NULL, &local));
697:       PetscCall(VecScatterCreate(global, isGlobal, local, isLocal, &stag->gtol));
698:       PetscCall(VecDestroy(&global));
699:       PetscCall(VecDestroy(&local));
700:     }

702:     /* Destroy ISs */
703:     PetscCall(ISDestroy(&isLocal));
704:     PetscCall(ISDestroy(&isGlobal));

706:     /* Next, we iterate over the local entries  again, in local order, recording the global entry to which each maps,
707:        or -1 if there is none */
708:     PetscCall(PetscMalloc1(stag->entriesGhost, &idxGlobalAll));

710:     countAll = 0;

712:     /* Loop over rows 1/3 : down */
713:     if (!dummyStart[1]) {
714:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
715:         /* Loop over columns 1/3 : down left */
716:         if (!star && !dummyStart[0]) {
717:           const PetscInt        neighbor     = 0;
718:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
719:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
720:           const PetscInt j = nNeighbor[1] - ghostOffsetStart[1] + jghost; /* Note: this is actually the same value for the whole row of ranks below, so recomputing it for the next two ranks is redundant, and one could even get rid of jghost entirely if desired */
721:           const PetscInt eprNeighbor = nNeighbor[0] * stag->entriesPerElement;
722:           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
723:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
724:           }
725:         } else {
726:           /* Down Left dummies */
727:           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
728:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
729:           }
730:         }

732:         /* Loop over columns 2/3 : down middle */
733:         {
734:           const PetscInt        neighbor     = 1;
735:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
736:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
737:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
738:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
739:           for (i = 0; i < nNeighbor[0]; ++i) {
740:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
741:           }
742:         }

744:         /* Loop over columns 3/3 : down right */
745:         if (!star && !dummyEnd[0]) {
746:           const PetscInt        neighbor     = 2;
747:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
748:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
749:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
750:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
751:           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
752:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
753:           }
754:         } else if (dummyEnd[0]) {
755:           /* Down right partial dummy elements, living on the *down* rank */
756:           const PetscInt        neighbor     = 1;
757:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
758:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
759:           const PetscInt        j            = nNeighbor[1] - ghostOffsetStart[1] + jghost;
760:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here */
761:           PetscInt              dGlobal;
762:           i = nNeighbor[0];
763:           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
764:           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
765:           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
766:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
767:           ++i;
768:           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
769:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
770:           }
771:         } else {
772:           /* Down Right dummies */
773:           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
774:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
775:           }
776:         }
777:       }
778:     } else {
779:       /* Down dummies row */
780:       for (jghost = 0; jghost < ghostOffsetStart[1]; ++jghost) {
781:         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
782:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
783:         }
784:       }
785:     }

787:     /* Loop over rows 2/3 : center */
788:     for (j = 0; j < stag->n[1]; ++j) {
789:       /* Loop over columns 1/3 : left */
790:       if (!dummyStart[0]) {
791:         const PetscInt        neighbor     = 3;
792:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
793:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
794:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
795:         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
796:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
797:         }
798:       } else {
799:         /* (Middle) Left dummies */
800:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
801:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
802:         }
803:       }

805:       /* Loop over columns 2/3 : here (the "neighbor" is ourselves, here) */
806:       {
807:         const PetscInt neighbor     = 4;
808:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
809:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
810:         for (i = 0; i < stag->n[0]; ++i) {
811:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
812:         }
813:       }

815:       /* Loop over columns 3/3 : right */
816:       if (!dummyEnd[0]) {
817:         const PetscInt        neighbor     = 5;
818:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
819:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
820:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
821:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
822:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
823:         }
824:       } else {
825:         /* -1's for right layer of partial dummies, living on *this* rank */
826:         const PetscInt        neighbor     = 4;
827:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
828:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
829:         const PetscInt        eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
830:         PetscInt              dGlobal;
831:         i = nNeighbor[0];
832:         for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
833:         for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
834:         for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
835:         for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
836:         ++i;
837:         for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
838:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
839:         }
840:       }
841:     }

843:     /* Loop over rows 3/3 : up */
844:     if (!dummyEnd[1]) {
845:       for (j = 0; j < ghostOffsetEnd[1]; ++j) {
846:         /* Loop over columns 1/3 : up left */
847:         if (!star && !dummyStart[0]) {
848:           const PetscInt        neighbor     = 6;
849:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
850:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
851:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
852:           for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
853:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
854:           }
855:         } else {
856:           /* Up Left dummies */
857:           for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
858:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
859:           }
860:         }

862:         /* Loop over columns 2/3 : up */
863:         {
864:           const PetscInt        neighbor     = 7;
865:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
866:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
867:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
868:           for (i = 0; i < nNeighbor[0]; ++i) {
869:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
870:           }
871:         }

873:         /* Loop over columns 3/3 : up right */
874:         if (!star && !dummyEnd[0]) {
875:           const PetscInt        neighbor     = 8;
876:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
877:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
878:           const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
879:           for (i = 0; i < ghostOffsetEnd[0]; ++i) {
880:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + d;
881:           }
882:         } else if (dummyEnd[0]) {
883:           /* -1's for right layer of partial dummies, living on rank above */
884:           const PetscInt        neighbor     = 7;
885:           const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
886:           const PetscInt *const nNeighbor    = nNeighbors[neighbor];
887:           const PetscInt        eprNeighbor  = entriesPerElementRow; /* Same as here */
888:           PetscInt              dGlobal;
889:           i = nNeighbor[0];
890:           for (d = 0, dGlobal = 0; d < stag->dof[0]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
891:           for (; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy down face point */ }
892:           for (; d < stag->dof[0] + 2 * stag->dof[1]; ++d, ++dGlobal, ++countAll) idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * stag->entriesPerElement + dGlobal;
893:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy element point */ }
894:           ++i;
895:           for (; i < nNeighbor[0] + ghostOffsetEnd[0]; ++i) {
896:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
897:           }
898:         } else {
899:           /* Up Right dummies */
900:           for (ighost = 0; ighost < ghostOffsetEnd[0]; ++ighost) {
901:             for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
902:           }
903:         }
904:       }
905:     } else {
906:       j = stag->n[1];
907:       /* Top layer of partial dummies */

909:       /* up left partial dummies layer : Loop over columns 1/3 : living on *left* neighbor */
910:       if (!dummyStart[0]) {
911:         const PetscInt        neighbor     = 3;
912:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
913:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
914:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement;
915:         for (i = nNeighbor[0] - ghostOffsetStart[0]; i < nNeighbor[0]; ++i) {
916:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
917:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
918:         }
919:       } else {
920:         for (ighost = 0; ighost < ghostOffsetStart[0]; ++ighost) {
921:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
922:         }
923:       }

925:       /* up partial dummies layer : Loop over columns 2/3 : living on *this* rank */
926:       {
927:         const PetscInt neighbor     = 4;
928:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
929:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
930:         for (i = 0; i < stag->n[0]; ++i) {
931:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
932:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
933:         }
934:       }

936:       if (!dummyEnd[0]) {
937:         /* up right partial dummies layer : Loop over columns 3/3 :  living on *right* neighbor */
938:         const PetscInt        neighbor     = 5;
939:         const PetscInt        globalOffset = globalOffsets[stag->neighbors[neighbor]];
940:         const PetscInt *const nNeighbor    = nNeighbors[neighbor];
941:         const PetscInt        eprNeighbor  = nNeighbor[0] * stag->entriesPerElement + (nextToDummyEnd[0] ? entriesPerFace : 0);
942:         for (i = 0; i < ghostOffsetEnd[0]; ++i) {
943:           for (d = 0; d < stag->dof[0] + stag->dof[1]; ++d, ++countAll) { idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */ }
944:           for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy left face and element points */ }
945:         }
946:       } else {
947:         /* Top partial dummies layer : Loop over columns 3/3 : right, living *here* */
948:         const PetscInt neighbor     = 4;
949:         const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
950:         const PetscInt eprNeighbor  = entriesPerElementRow; /* same as here (obviously) */
951:         i                           = stag->n[0];
952:         for (d = 0; d < stag->dof[0]; ++d, ++countAll) {                                    /* Note just the vertex here */
953:           idxGlobalAll[countAll] = globalOffset + j * eprNeighbor + i * entriesPerFace + d; /* Note entriesPerFace here */
954:         }
955:         for (; d < stag->entriesPerElement; ++d, ++countAll) { idxGlobalAll[countAll] = -1; /* dummy bottom face, left face and element points */ }
956:         ++i;
957:         for (; i < stag->n[0] + ghostOffsetEnd[0]; ++i) {
958:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
959:         }
960:       }
961:       ++j;
962:       /* Additional top dummy layers */
963:       for (; j < stag->n[1] + ghostOffsetEnd[1]; ++j) {
964:         for (ighost = 0; ighost < stag->nGhost[0]; ++ighost) {
965:           for (d = 0; d < stag->entriesPerElement; ++d, ++countAll) idxGlobalAll[countAll] = -1;
966:         }
967:       }
968:     }

970:     /* Create local-to-global map (in local ordering, includes maps to -1 for dummy points) */
971:     PetscCall(ISLocalToGlobalMappingCreate(comm, 1, stag->entriesGhost, idxGlobalAll, PETSC_OWN_POINTER, &dm->ltogmap));
972:   }

974:   /* In special cases, create a dedicated injective local-to-global map */
975:   if ((stag->boundaryType[0] == DM_BOUNDARY_PERIODIC && stag->nRanks[0] == 1) || (stag->boundaryType[1] == DM_BOUNDARY_PERIODIC && stag->nRanks[1] == 1)) PetscCall(DMStagPopulateLocalToGlobalInjective(dm));

977:   /* Free global offsets */
978:   PetscCall(PetscFree(globalOffsets));

980:   /* Precompute location offsets */
981:   PetscCall(DMStagComputeLocationOffsets_2d(dm));

983:   /* View from Options */
984:   PetscCall(DMViewFromOptions(dm, NULL, "-dm_view"));
985:   PetscFunctionReturn(PETSC_SUCCESS);
986: }

988: /* adapted from da2.c */
989: static PetscErrorCode DMStagSetUpBuildRankGrid_2d(DM dm)
990: {
991:   DM_Stag *const stag = (DM_Stag *)dm->data;
992:   PetscMPIInt    rank, size, m, n;
993:   const PetscInt M = stag->N[0];
994:   const PetscInt N = stag->N[1];

996:   PetscFunctionBegin;
997:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
998:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)dm), &rank));
999:   m = stag->nRanks[0];
1000:   n = stag->nRanks[1];
1001:   if (m != PETSC_DECIDE) {
1002:     PetscCheck(m >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in X direction: %d", m);
1003:     PetscCheck(m <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in X direction: %d %d", m, size);
1004:   }
1005:   if (n != PETSC_DECIDE) {
1006:     PetscCheck(n >= 1, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Non-positive number of ranks in Y direction: %d", n);
1007:     PetscCheck(n <= size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Too many ranks in Y direction: %d %d", n, size);
1008:   }
1009:   if (m == PETSC_DECIDE || n == PETSC_DECIDE) {
1010:     if (n != PETSC_DECIDE) {
1011:       m = size / n;
1012:     } else if (m != PETSC_DECIDE) {
1013:       n = size / m;
1014:     } else {
1015:       /* try for squarish distribution */
1016:       m = (PetscMPIInt)(0.5 + PetscSqrtReal(((PetscReal)M) * ((PetscReal)size) / ((PetscReal)N)));
1017:       if (!m) m = 1;
1018:       while (m > 0) {
1019:         n = size / m;
1020:         if (m * n == size) break;
1021:         m--;
1022:       }
1023:       if (M > N && m < n) {
1024:         PetscMPIInt _m = m;
1025:         m              = n;
1026:         n              = _m;
1027:       }
1028:     }
1029:     PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_PLIB, "Unable to create partition, check the size of the communicator and input m and n ");
1030:   } else PetscCheck(m * n == size, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Given Bad partition. Product of sizes (%d) does not equal communicator size (%d)", m * n, size);
1031:   PetscCheck(M >= m, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in x direction is too fine! %" PetscInt_FMT " %d", M, m);
1032:   PetscCheck(N >= n, PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Partition in y direction is too fine! %" PetscInt_FMT " %d", N, n);
1033:   stag->nRanks[0] = m;
1034:   stag->nRanks[1] = n;
1035:   PetscFunctionReturn(PETSC_SUCCESS);
1036: }

1038: static PetscErrorCode DMStagSetUpBuildNeighbors_2d(DM dm)
1039: {
1040:   DM_Stag *const stag = (DM_Stag *)dm->data;
1041:   PetscInt       d, i;
1042:   PetscBool      per[2], first[2], last[2];
1043:   PetscMPIInt    neighborRank[9][2], r[2], n[2];
1044:   const PetscInt dim = 2;

1046:   PetscFunctionBegin;
1047:   for (d = 0; d < dim; ++d)
1048:     PetscCheck(stag->boundaryType[d] == DM_BOUNDARY_NONE || stag->boundaryType[d] == DM_BOUNDARY_PERIODIC || stag->boundaryType[d] == DM_BOUNDARY_GHOSTED, PetscObjectComm((PetscObject)dm), PETSC_ERR_SUP, "Neighbor determination not implemented for %s",
1049:                DMBoundaryTypes[stag->boundaryType[d]]);

1051:   /* Assemble some convenience variables */
1052:   for (d = 0; d < dim; ++d) {
1053:     per[d]   = (PetscBool)(stag->boundaryType[d] == DM_BOUNDARY_PERIODIC);
1054:     first[d] = stag->firstRank[d];
1055:     last[d]  = stag->lastRank[d];
1056:     r[d]     = stag->rank[d];
1057:     n[d]     = stag->nRanks[d];
1058:   }

1060:   /* First, compute the position in the rank grid for all neighbors */
1061:   neighborRank[0][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  down */
1062:   neighborRank[0][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;

1064:   neighborRank[1][0] = r[0]; /*       down */
1065:   neighborRank[1][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;

1067:   neighborRank[2][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right down */
1068:   neighborRank[2][1] = first[1] ? (per[1] ? n[1] - 1 : -1) : r[1] - 1;

1070:   neighborRank[3][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left       */
1071:   neighborRank[3][1] = r[1];

1073:   neighborRank[4][0] = r[0];
1074:   neighborRank[4][1] = r[1];

1076:   neighborRank[5][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right      */
1077:   neighborRank[5][1] = r[1];

1079:   neighborRank[6][0] = first[0] ? (per[0] ? n[0] - 1 : -1) : r[0] - 1; /* left  up   */
1080:   neighborRank[6][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;

1082:   neighborRank[7][0] = r[0]; /*       up   */
1083:   neighborRank[7][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;

1085:   neighborRank[8][0] = last[0] ? (per[0] ? 0 : -1) : r[0] + 1; /* right up   */
1086:   neighborRank[8][1] = last[1] ? (per[1] ? 0 : -1) : r[1] + 1;

1088:   /* Then, compute the rank of each in the linear ordering */
1089:   PetscCall(PetscMalloc1(9, &stag->neighbors));
1090:   for (i = 0; i < 9; ++i) {
1091:     if (neighborRank[i][0] >= 0 && neighborRank[i][1] >= 0) {
1092:       stag->neighbors[i] = neighborRank[i][0] + n[0] * neighborRank[i][1];
1093:     } else {
1094:       stag->neighbors[i] = -1;
1095:     }
1096:   }
1097:   PetscFunctionReturn(PETSC_SUCCESS);
1098: }

1100: static PetscErrorCode DMStagSetUpBuildGlobalOffsets_2d(DM dm, PetscInt **pGlobalOffsets)
1101: {
1102:   const DM_Stag *const stag = (DM_Stag *)dm->data;
1103:   PetscInt            *globalOffsets;
1104:   PetscInt             i, j, d, entriesPerFace, count;
1105:   PetscMPIInt          size;
1106:   PetscBool            extra[2];

1108:   PetscFunctionBegin;
1109:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)dm), &size));
1110:   for (d = 0; d < 2; ++d) extra[d] = (PetscBool)(stag->boundaryType[d] != DM_BOUNDARY_PERIODIC); /* Extra points in global rep */
1111:   entriesPerFace = stag->dof[0] + stag->dof[1];
1112:   PetscCall(PetscMalloc1(size, pGlobalOffsets));
1113:   globalOffsets    = *pGlobalOffsets;
1114:   globalOffsets[0] = 0;
1115:   count            = 1; /* note the count is offset by 1 here. We add the size of the previous rank */
1116:   for (j = 0; j < stag->nRanks[1] - 1; ++j) {
1117:     const PetscInt nnj = stag->l[1][j];
1118:     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1119:       const PetscInt nni   = stag->l[0][i];
1120:       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement; /* No right/top/front boundaries */
1121:       ++count;
1122:     }
1123:     {
1124:       /* i = stag->nRanks[0]-1; */
1125:       const PetscInt nni   = stag->l[0][i];
1126:       globalOffsets[count] = globalOffsets[count - 1] + nnj * nni * stag->entriesPerElement + (extra[0] ? nnj * entriesPerFace : 0); /* Extra faces on the right */
1127:       ++count;
1128:     }
1129:   }
1130:   {
1131:     /* j = stag->nRanks[1]-1; */
1132:     const PetscInt nnj = stag->l[1][j];
1133:     for (i = 0; i < stag->nRanks[0] - 1; ++i) {
1134:       const PetscInt nni   = stag->l[0][i];
1135:       globalOffsets[count] = globalOffsets[count - 1] + nni * nnj * stag->entriesPerElement + (extra[1] ? nni * entriesPerFace : 0); /* Extra faces on the top */
1136:       ++count;
1137:     }
1138:     /* Don't need to compute entries in last element */
1139:   }
1140:   PetscFunctionReturn(PETSC_SUCCESS);
1141: }

1143: static PetscErrorCode DMStagComputeLocationOffsets_2d(DM dm)
1144: {
1145:   DM_Stag *const stag = (DM_Stag *)dm->data;
1146:   const PetscInt epe  = stag->entriesPerElement;
1147:   const PetscInt epr  = stag->nGhost[0] * epe;

1149:   PetscFunctionBegin;
1150:   PetscCall(PetscMalloc1(DMSTAG_NUMBER_LOCATIONS, &stag->locationOffsets));
1151:   stag->locationOffsets[DMSTAG_DOWN_LEFT]  = 0;
1152:   stag->locationOffsets[DMSTAG_DOWN]       = stag->locationOffsets[DMSTAG_DOWN_LEFT] + stag->dof[0];
1153:   stag->locationOffsets[DMSTAG_DOWN_RIGHT] = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epe;
1154:   stag->locationOffsets[DMSTAG_LEFT]       = stag->locationOffsets[DMSTAG_DOWN] + stag->dof[1];
1155:   stag->locationOffsets[DMSTAG_ELEMENT]    = stag->locationOffsets[DMSTAG_LEFT] + stag->dof[1];
1156:   stag->locationOffsets[DMSTAG_RIGHT]      = stag->locationOffsets[DMSTAG_LEFT] + epe;
1157:   stag->locationOffsets[DMSTAG_UP_LEFT]    = stag->locationOffsets[DMSTAG_DOWN_LEFT] + epr;
1158:   stag->locationOffsets[DMSTAG_UP]         = stag->locationOffsets[DMSTAG_DOWN] + epr;
1159:   stag->locationOffsets[DMSTAG_UP_RIGHT]   = stag->locationOffsets[DMSTAG_UP_LEFT] + epe;
1160:   PetscFunctionReturn(PETSC_SUCCESS);
1161: }

1163: PETSC_INTERN PetscErrorCode DMStagPopulateLocalToGlobalInjective_2d(DM dm)
1164: {
1165:   DM_Stag *const  stag = (DM_Stag *)dm->data;
1166:   PetscInt       *idxLocal, *idxGlobal, *globalOffsetsRecomputed;
1167:   const PetscInt *globalOffsets;
1168:   PetscInt        i, j, d, count, entriesPerCorner, entriesPerFace, entriesPerElementRowGhost, entriesPerElementRow, ghostOffsetStart[2];
1169:   IS              isLocal, isGlobal;
1170:   PetscBool       dummyEnd[2];

1172:   PetscFunctionBegin;
1173:   PetscCall(DMStagSetUpBuildGlobalOffsets_2d(dm, &globalOffsetsRecomputed)); /* note that we don't actually use all of these. An available optimization is to pass them, when available */
1174:   globalOffsets = globalOffsetsRecomputed;
1175:   PetscCall(PetscMalloc1(stag->entries, &idxLocal));
1176:   PetscCall(PetscMalloc1(stag->entries, &idxGlobal));
1177:   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1178:   entriesPerCorner          = stag->dof[0];
1179:   entriesPerFace            = stag->dof[0] + stag->dof[1];
1180:   entriesPerElementRow      = stag->n[0] * stag->entriesPerElement + (dummyEnd[0] ? entriesPerFace : 0);
1181:   entriesPerElementRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1182:   count                     = 0;
1183:   for (d = 0; d < 2; ++d) ghostOffsetStart[d] = stag->start[d] - stag->startGhost[d];
1184:   {
1185:     const PetscInt neighbor     = 4;
1186:     const PetscInt globalOffset = globalOffsets[stag->neighbors[neighbor]];
1187:     for (j = 0; j < stag->n[1]; ++j) {
1188:       const PetscInt jghost = j + ghostOffsetStart[1];
1189:       for (i = 0; i < stag->n[0]; ++i) {
1190:         const PetscInt ighost = i + ghostOffsetStart[0];
1191:         for (d = 0; d < stag->entriesPerElement; ++d, ++count) {
1192:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1193:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1194:         }
1195:       }
1196:       if (dummyEnd[0]) {
1197:         const PetscInt ighost = i + ghostOffsetStart[0];
1198:         i                     = stag->n[0];
1199:         for (d = 0; d < stag->dof[0]; ++d, ++count) { /* vertex first */
1200:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + d;
1201:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1202:         }
1203:         for (d = 0; d < stag->dof[1]; ++d, ++count) { /* then left edge (skipping bottom face) */
1204:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * stag->entriesPerElement + stag->dof[0] + d;
1205:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + stag->dof[0] + stag->dof[1] + d;
1206:         }
1207:       }
1208:     }
1209:     if (dummyEnd[1]) {
1210:       const PetscInt jghost = j + ghostOffsetStart[1];
1211:       j                     = stag->n[1];
1212:       for (i = 0; i < stag->n[0]; ++i) {
1213:         const PetscInt ighost = i + ghostOffsetStart[0];
1214:         for (d = 0; d < entriesPerFace; ++d, ++count) {                                        /* vertex and bottom face (which are the first entries) */
1215:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1216:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1217:         }
1218:       }
1219:       if (dummyEnd[0]) {
1220:         const PetscInt ighost = i + ghostOffsetStart[0];
1221:         i                     = stag->n[0];
1222:         for (d = 0; d < entriesPerCorner; ++d, ++count) {                                      /* vertex only */
1223:           idxGlobal[count] = globalOffset + j * entriesPerElementRow + i * entriesPerFace + d; /* note i increment by entriesPerFace */
1224:           idxLocal[count]  = jghost * entriesPerElementRowGhost + ighost * stag->entriesPerElement + d;
1225:         }
1226:       }
1227:     }
1228:   }
1229:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxLocal, PETSC_OWN_POINTER, &isLocal));
1230:   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)dm), stag->entries, idxGlobal, PETSC_OWN_POINTER, &isGlobal));
1231:   {
1232:     Vec local, global;
1233:     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)dm), 1, stag->entries, PETSC_DECIDE, NULL, &global));
1234:     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, stag->entriesPerElement, stag->entriesGhost, NULL, &local));
1235:     PetscCall(VecScatterCreate(local, isLocal, global, isGlobal, &stag->ltog_injective));
1236:     PetscCall(VecDestroy(&global));
1237:     PetscCall(VecDestroy(&local));
1238:   }
1239:   PetscCall(ISDestroy(&isLocal));
1240:   PetscCall(ISDestroy(&isGlobal));
1241:   if (globalOffsetsRecomputed) PetscCall(PetscFree(globalOffsetsRecomputed));
1242:   PetscFunctionReturn(PETSC_SUCCESS);
1243: }

1245: PETSC_INTERN PetscErrorCode DMStagPopulateLocalToLocal2d_Internal(DM dm)
1246: {
1247:   DM_Stag *const stag = (DM_Stag *)dm->data;
1248:   PetscInt      *idxRemap;
1249:   PetscBool      dummyEnd[2];
1250:   PetscInt       i, j, d, count, leftGhostElements, downGhostElements, entriesPerRowGhost, iOffset, jOffset;
1251:   PetscInt       dOffset[4] = {0};

1253:   PetscFunctionBegin;
1254:   PetscCall(VecScatterCopy(stag->gtol, &stag->ltol));
1255:   PetscCall(PetscMalloc1(stag->entries, &idxRemap));

1257:   for (d = 0; d < 2; ++d) dummyEnd[d] = (PetscBool)(stag->lastRank[d] && stag->boundaryType[d] != DM_BOUNDARY_PERIODIC);
1258:   leftGhostElements  = stag->start[0] - stag->startGhost[0];
1259:   downGhostElements  = stag->start[1] - stag->startGhost[1];
1260:   entriesPerRowGhost = stag->nGhost[0] * stag->entriesPerElement;
1261:   dOffset[1]         = dOffset[0] + stag->dof[0];
1262:   dOffset[2]         = dOffset[1] + stag->dof[1];
1263:   dOffset[3]         = dOffset[2] + stag->dof[1];

1265:   count = 0;
1266:   for (j = 0; j < stag->n[1]; ++j) {
1267:     jOffset = entriesPerRowGhost * (downGhostElements + j);
1268:     for (i = 0; i < stag->n[0]; ++i) {
1269:       iOffset = stag->entriesPerElement * (leftGhostElements + i);
1270:       // all
1271:       for (d = 0; d < stag->entriesPerElement; ++d) idxRemap[count++] = jOffset + iOffset + d;
1272:     }
1273:     if (dummyEnd[0]) {
1274:       iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1275:       // down left, left
1276:       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1277:       for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[2] + d;
1278:     }
1279:   }
1280:   if (dummyEnd[1]) {
1281:     jOffset = entriesPerRowGhost * (downGhostElements + stag->n[1]);
1282:     for (i = 0; i < stag->n[0]; ++i) {
1283:       iOffset = stag->entriesPerElement * (leftGhostElements + i);
1284:       // down left, down
1285:       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1286:       for (d = 0; d < stag->dof[1]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[1] + d;
1287:     }
1288:     if (dummyEnd[0]) {
1289:       iOffset = stag->entriesPerElement * (leftGhostElements + stag->n[0]);
1290:       // down left
1291:       for (d = 0; d < stag->dof[0]; ++d) idxRemap[count++] = jOffset + iOffset + dOffset[0] + d;
1292:     }
1293:   }

1295:   PetscCheck(count == stag->entries, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Number of entries computed in ltol (%" PetscInt_FMT ") is not as expected (%" PetscInt_FMT ")", count, stag->entries);

1297:   PetscCall(VecScatterRemap(stag->ltol, idxRemap, NULL));
1298:   PetscCall(PetscFree(idxRemap));
1299:   PetscFunctionReturn(PETSC_SUCCESS);
1300: }

1302: PETSC_INTERN PetscErrorCode DMCreateMatrix_Stag_2D_AIJ_Assemble(DM dm, Mat A)
1303: {
1304:   PetscInt          entries, dof[DMSTAG_MAX_STRATA], epe, stencil_width, N[2], start[2], n[2], n_extra[2];
1305:   DMStagStencilType stencil_type;
1306:   DMBoundaryType    boundary_type[2];

1308:   PetscFunctionBegin;
1309:   PetscCall(DMStagGetDOF(dm, &dof[0], &dof[1], &dof[2], NULL));
1310:   PetscCall(DMStagGetStencilType(dm, &stencil_type));
1311:   PetscCall(DMStagGetStencilWidth(dm, &stencil_width));
1312:   PetscCall(DMStagGetEntries(dm, &entries));
1313:   PetscCall(DMStagGetEntriesPerElement(dm, &epe));
1314:   PetscCall(DMStagGetCorners(dm, &start[0], &start[1], NULL, &n[0], &n[1], NULL, &n_extra[0], &n_extra[1], NULL));
1315:   PetscCall(DMStagGetGlobalSizes(dm, &N[0], &N[1], NULL));
1316:   PetscCall(DMStagGetBoundaryTypes(dm, &boundary_type[0], &boundary_type[1], NULL));

1318:   if (stencil_type == DMSTAG_STENCIL_NONE) {
1319:     /* Couple all DOF at each location to each other */
1320:     DMStagStencil *row_vertex, *row_face_down, *row_face_left, *row_element;

1322:     PetscCall(PetscMalloc1(dof[0], &row_vertex));
1323:     for (PetscInt c = 0; c < dof[0]; ++c) {
1324:       row_vertex[c].loc = DMSTAG_DOWN_LEFT;
1325:       row_vertex[c].c   = c;
1326:     }

1328:     PetscCall(PetscMalloc1(dof[1], &row_face_down));
1329:     for (PetscInt c = 0; c < dof[1]; ++c) {
1330:       row_face_down[c].loc = DMSTAG_DOWN;
1331:       row_face_down[c].c   = c;
1332:     }

1334:     PetscCall(PetscMalloc1(dof[1], &row_face_left));
1335:     for (PetscInt c = 0; c < dof[1]; ++c) {
1336:       row_face_left[c].loc = DMSTAG_LEFT;
1337:       row_face_left[c].c   = c;
1338:     }

1340:     PetscCall(PetscMalloc1(dof[2], &row_element));
1341:     for (PetscInt c = 0; c < dof[2]; ++c) {
1342:       row_element[c].loc = DMSTAG_ELEMENT;
1343:       row_element[c].c   = c;
1344:     }

1346:     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1347:       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1348:         {
1349:           for (PetscInt c = 0; c < dof[0]; ++c) {
1350:             row_vertex[c].i = ex;
1351:             row_vertex[c].j = ey;
1352:           }
1353:           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[0], row_vertex, dof[0], row_vertex, NULL, INSERT_VALUES));
1354:         }
1355:         if (ex < N[0]) {
1356:           for (PetscInt c = 0; c < dof[1]; ++c) {
1357:             row_face_down[c].i = ex;
1358:             row_face_down[c].j = ey;
1359:           }
1360:           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_down, dof[1], row_face_down, NULL, INSERT_VALUES));
1361:         }
1362:         if (ey < N[1]) {
1363:           for (PetscInt c = 0; c < dof[1]; ++c) {
1364:             row_face_left[c].i = ex;
1365:             row_face_left[c].j = ey;
1366:           }
1367:           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[1], row_face_left, dof[1], row_face_left, NULL, INSERT_VALUES));
1368:         }
1369:         if (ex < N[0] && ey < N[1]) {
1370:           for (PetscInt c = 0; c < dof[2]; ++c) {
1371:             row_element[c].i = ex;
1372:             row_element[c].j = ey;
1373:           }
1374:           PetscCall(DMStagMatSetValuesStencil(dm, A, dof[2], row_element, dof[2], row_element, NULL, INSERT_VALUES));
1375:         }
1376:       }
1377:     }
1378:     PetscCall(PetscFree(row_vertex));
1379:     PetscCall(PetscFree(row_face_left));
1380:     PetscCall(PetscFree(row_face_down));
1381:     PetscCall(PetscFree(row_element));
1382:   } else if (stencil_type == DMSTAG_STENCIL_STAR || stencil_type == DMSTAG_STENCIL_BOX) {
1383:     DMStagStencil *col, *row;

1385:     PetscCall(PetscMalloc1(epe, &row));
1386:     {
1387:       PetscInt nrows = 0;

1389:       for (PetscInt c = 0; c < dof[0]; ++c) {
1390:         row[nrows].c   = c;
1391:         row[nrows].loc = DMSTAG_DOWN_LEFT;
1392:         ++nrows;
1393:       }
1394:       for (PetscInt c = 0; c < dof[1]; ++c) {
1395:         row[nrows].c   = c;
1396:         row[nrows].loc = DMSTAG_LEFT;
1397:         ++nrows;
1398:       }
1399:       for (PetscInt c = 0; c < dof[1]; ++c) {
1400:         row[nrows].c   = c;
1401:         row[nrows].loc = DMSTAG_DOWN;
1402:         ++nrows;
1403:       }
1404:       for (PetscInt c = 0; c < dof[2]; ++c) {
1405:         row[nrows].c   = c;
1406:         row[nrows].loc = DMSTAG_ELEMENT;
1407:         ++nrows;
1408:       }
1409:     }

1411:     PetscCall(PetscMalloc1(epe, &col));
1412:     {
1413:       PetscInt ncols = 0;

1415:       for (PetscInt c = 0; c < dof[0]; ++c) {
1416:         col[ncols].c   = c;
1417:         col[ncols].loc = DMSTAG_DOWN_LEFT;
1418:         ++ncols;
1419:       }
1420:       for (PetscInt c = 0; c < dof[1]; ++c) {
1421:         col[ncols].c   = c;
1422:         col[ncols].loc = DMSTAG_LEFT;
1423:         ++ncols;
1424:       }
1425:       for (PetscInt c = 0; c < dof[1]; ++c) {
1426:         col[ncols].c   = c;
1427:         col[ncols].loc = DMSTAG_DOWN;
1428:         ++ncols;
1429:       }
1430:       for (PetscInt c = 0; c < dof[2]; ++c) {
1431:         col[ncols].c   = c;
1432:         col[ncols].loc = DMSTAG_ELEMENT;
1433:         ++ncols;
1434:       }
1435:     }

1437:     for (PetscInt ey = start[1]; ey < start[1] + n[1] + n_extra[1]; ++ey) {
1438:       for (PetscInt ex = start[0]; ex < start[0] + n[0] + n_extra[0]; ++ex) {
1439:         for (PetscInt i = 0; i < epe; ++i) {
1440:           row[i].i = ex;
1441:           row[i].j = ey;
1442:         }
1443:         for (PetscInt offset_y = -stencil_width; offset_y <= stencil_width; ++offset_y) {
1444:           const PetscInt ey_offset = ey + offset_y;
1445:           for (PetscInt offset_x = -stencil_width; offset_x <= stencil_width; ++offset_x) {
1446:             const PetscInt ex_offset = ex + offset_x;
1447:             /* Only set values corresponding to elements which can have non-dummy entries,
1448:                meaning those that map to unknowns in the global representation. In the periodic
1449:                case, this is the entire stencil, but in all other cases, only includes a single
1450:                "extra" element which is partially outside the physical domain (those points in the
1451:                global representation */
1452:             if ((stencil_type == DMSTAG_STENCIL_BOX || offset_x == 0 || offset_y == 0) && (boundary_type[0] == DM_BOUNDARY_PERIODIC || (ex_offset < N[0] + 1 && ex_offset >= 0)) && (boundary_type[1] == DM_BOUNDARY_PERIODIC || (ey_offset < N[1] + 1 && ey_offset >= 0))) {
1453:               for (PetscInt i = 0; i < epe; ++i) {
1454:                 col[i].i = ex_offset;
1455:                 col[i].j = ey_offset;
1456:               }
1457:               PetscCall(DMStagMatSetValuesStencil(dm, A, epe, row, epe, col, NULL, INSERT_VALUES));
1458:             }
1459:           }
1460:         }
1461:       }
1462:     }
1463:     PetscCall(PetscFree(row));
1464:     PetscCall(PetscFree(col));
1465:   } else SETERRQ(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Unsupported stencil type %s", DMStagStencilTypes[stencil_type]);
1466:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1467:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1468:   PetscFunctionReturn(PETSC_SUCCESS);
1469: }