Actual source code: gasm.c

  1: /*
  2:   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
  3:   In this version, each MPI process may intersect multiple subdomains and any subdomain may
  4:   intersect multiple MPI processes.  Intersections of subdomains with MPI processes are called *local
  5:   subdomains*.

  7:        N    - total number of distinct global subdomains  (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
  8:        n    - actual number of local subdomains on this process (set in `PCGASMSetSubdomains()` or calculated in `PCGASMSetTotalSubdomains()`)
  9:        nmax - maximum number of local subdomains per process (calculated in PCSetUp_GASM())
 10: */
 11: #include <petsc/private/pcimpl.h>
 12: #include <petscdm.h>

 14: typedef struct {
 15:   PetscInt   N, n, nmax;
 16:   PetscInt   overlap;                /* overlap requested by user */
 17:   PCGASMType type;                   /* use reduced interpolation, restriction or both */
 18:   PetscBool  type_set;               /* if user set this value (so won't change it for symmetric problems) */
 19:   PetscBool  same_subdomain_solvers; /* flag indicating whether all local solvers are same */
 20:   PetscBool  sort_indices;           /* flag to sort subdomain indices */
 21:   PetscBool  user_subdomains;        /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
 22:   PetscBool  dm_subdomains;          /* whether DM is allowed to define subdomains */
 23:   PetscBool  hierarchicalpartitioning;
 24:   IS        *ois;           /* index sets that define the outer (conceptually, overlapping) subdomains */
 25:   IS        *iis;           /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
 26:   KSP       *ksp;           /* linear solvers for each subdomain */
 27:   Mat       *pmat;          /* subdomain block matrices */
 28:   Vec        gx, gy;        /* Merged work vectors */
 29:   Vec       *x, *y;         /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
 30:   VecScatter gorestriction; /* merged restriction to disjoint union of outer subdomains */
 31:   VecScatter girestriction; /* merged restriction to disjoint union of inner subdomains */
 32:   VecScatter pctoouter;
 33:   IS         permutationIS;
 34:   Mat        permutationP;
 35:   Mat        pcmat;
 36:   Vec        pcx, pcy;
 37: } PC_GASM;

 39: static PetscErrorCode PCGASMComputeGlobalSubdomainNumbering_Private(PC pc, PetscInt **numbering, PetscInt **permutation)
 40: {
 41:   PC_GASM *osm = (PC_GASM *)pc->data;
 42:   PetscInt i;

 44:   PetscFunctionBegin;
 45:   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
 46:   PetscCall(PetscMalloc2(osm->n, numbering, osm->n, permutation));
 47:   PetscCall(PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->iis, NULL, *numbering));
 48:   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
 49:   PetscCall(PetscSortIntWithPermutation(osm->n, *numbering, *permutation));
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: static PetscErrorCode PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
 54: {
 55:   PC_GASM        *osm = (PC_GASM *)pc->data;
 56:   PetscInt        j, nidx;
 57:   const PetscInt *idx;
 58:   PetscViewer     sviewer;
 59:   char           *cidx;

 61:   PetscFunctionBegin;
 62:   PetscCheck(i >= -1 && i < osm->n, PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %" PetscInt_FMT ": must nonnegative and less than %" PetscInt_FMT, i, osm->n);

 64:   /* Inner subdomains. */
 65:   /*
 66:    No more than 15 characters per index plus a space.
 67:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 68:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 69:    For nidx == 0, the whole string 16 '\0'.
 70:    */
 71:   PetscCall(PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n"));
 72:   PetscCall(PetscViewerFlush(viewer));
 73:   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
 74:   if (i > -1) {
 75:     PetscCall(ISGetLocalSize(osm->iis[i], &nidx));
 76:     PetscCall(PetscMalloc1(16 * (nidx + 1) + 1, &cidx));
 77:     PetscCall(PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16 * (nidx + 1) + 1, &sviewer));
 78:     PetscCall(ISGetIndices(osm->iis[i], &idx));
 79:     for (j = 0; j < nidx; ++j) PetscCall(PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j]));
 80:     PetscCall(ISRestoreIndices(osm->iis[i], &idx));
 81:     PetscCall(PetscViewerDestroy(&sviewer));
 82:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx));
 83:     PetscCall(PetscFree(cidx));
 84:   }
 85:   PetscCall(PetscViewerFlush(viewer));
 86:   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
 87:   PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
 88:   PetscCall(PetscViewerFlush(viewer));

 90:   /* Outer subdomains. */
 91:   /*
 92:    No more than 15 characters per index plus a space.
 93:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 94:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 95:    For nidx == 0, the whole string 16 '\0'.
 96:    */
 97:   PetscCall(PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n"));
 98:   PetscCall(PetscViewerFlush(viewer));
 99:   PetscCall(PetscViewerASCIIPushSynchronized(viewer));
100:   if (i > -1) {
101:     PetscCall(ISGetLocalSize(osm->ois[i], &nidx));
102:     PetscCall(PetscMalloc1(16 * (nidx + 1) + 1, &cidx));
103:     PetscCall(PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16 * (nidx + 1) + 1, &sviewer));
104:     PetscCall(ISGetIndices(osm->ois[i], &idx));
105:     for (j = 0; j < nidx; ++j) PetscCall(PetscViewerStringSPrintf(sviewer, "%" PetscInt_FMT " ", idx[j]));
106:     PetscCall(PetscViewerDestroy(&sviewer));
107:     PetscCall(ISRestoreIndices(osm->ois[i], &idx));
108:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx));
109:     PetscCall(PetscFree(cidx));
110:   }
111:   PetscCall(PetscViewerFlush(viewer));
112:   PetscCall(PetscViewerASCIIPopSynchronized(viewer));
113:   PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
114:   PetscCall(PetscViewerFlush(viewer));
115:   PetscFunctionReturn(PETSC_SUCCESS);
116: }

118: static PetscErrorCode PCGASMPrintSubdomains(PC pc)
119: {
120:   PC_GASM    *osm = (PC_GASM *)pc->data;
121:   const char *prefix;
122:   char        fname[PETSC_MAX_PATH_LEN + 1];
123:   PetscInt    l, d, count;
124:   PetscBool   found;
125:   PetscViewer viewer;
126:   PetscInt   *numbering, *permutation; /* global numbering of locally-supported subdomains and the permutation from the local ordering */

128:   PetscFunctionBegin;
129:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
130:   PetscCall(PetscOptionsHasName(NULL, prefix, "-pc_gasm_print_subdomains", &found));
131:   if (!found) PetscFunctionReturn(PETSC_SUCCESS);
132:   PetscCall(PetscOptionsGetString(NULL, prefix, "-pc_gasm_print_subdomains", fname, sizeof(fname), &found));
133:   if (!found) PetscCall(PetscStrncpy(fname, "stdout", sizeof(fname)));
134:   PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc), fname, &viewer));
135:   /*
136:    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
137:    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
138:   */
139:   PetscCall(PetscObjectName((PetscObject)viewer));
140:   l = 0;
141:   PetscCall(PCGASMComputeGlobalSubdomainNumbering_Private(pc, &numbering, &permutation));
142:   for (count = 0; count < osm->N; ++count) {
143:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
144:     if (l < osm->n) {
145:       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
146:       if (numbering[d] == count) l++;
147:       else d = -1;
148:     } else d = -1;
149:     PetscCall(PCGASMSubdomainView_Private(pc, d, viewer));
150:   }
151:   PetscCall(PetscFree2(numbering, permutation));
152:   PetscCall(PetscViewerDestroy(&viewer));
153:   PetscFunctionReturn(PETSC_SUCCESS);
154: }

156: static PetscErrorCode PCView_GASM(PC pc, PetscViewer viewer)
157: {
158:   PC_GASM    *osm = (PC_GASM *)pc->data;
159:   const char *prefix;
160:   PetscMPIInt rank, size;
161:   PetscInt    bsz;
162:   PetscBool   iascii, view_subdomains = PETSC_FALSE;
163:   PetscViewer sviewer;
164:   PetscInt    count, l;
165:   char        overlap[256]     = "user-defined overlap";
166:   char        gsubdomains[256] = "unknown total number of subdomains";
167:   char        msubdomains[256] = "unknown max number of local subdomains";
168:   PetscInt   *numbering, *permutation; /* global numbering of locally-supported subdomains and the permutation from the local ordering */

170:   PetscFunctionBegin;
171:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
172:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));

174:   if (osm->overlap >= 0) PetscCall(PetscSNPrintf(overlap, sizeof(overlap), "requested amount of overlap = %" PetscInt_FMT, osm->overlap));
175:   if (osm->N != PETSC_DETERMINE) PetscCall(PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %" PetscInt_FMT, osm->N));
176:   if (osm->nmax != PETSC_DETERMINE) PetscCall(PetscSNPrintf(msubdomains, sizeof(msubdomains), "max number of local subdomains = %" PetscInt_FMT, osm->nmax));

178:   PetscCall(PCGetOptionsPrefix(pc, &prefix));
179:   PetscCall(PetscOptionsGetBool(NULL, prefix, "-pc_gasm_view_subdomains", &view_subdomains, NULL));

181:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
182:   if (iascii) {
183:     /*
184:      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
185:      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
186:      collectively on the comm.
187:      */
188:     PetscCall(PetscObjectName((PetscObject)viewer));
189:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Restriction/interpolation type: %s\n", PCGASMTypes[osm->type]));
190:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", overlap));
191:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", gsubdomains));
192:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", msubdomains));
193:     PetscCall(PetscViewerASCIIPushSynchronized(viewer));
194:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d|%d] number of locally-supported subdomains = %" PetscInt_FMT "\n", rank, size, osm->n));
195:     PetscCall(PetscViewerFlush(viewer));
196:     PetscCall(PetscViewerASCIIPopSynchronized(viewer));
197:     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
198:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Subdomain solver info is as follows:\n"));
199:     PetscCall(PetscViewerASCIIPushTab(viewer));
200:     PetscCall(PetscViewerASCIIPrintf(viewer, "  - - - - - - - - - - - - - - - - - -\n"));
201:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
202:     PetscCall(PCGASMComputeGlobalSubdomainNumbering_Private(pc, &numbering, &permutation));
203:     l = 0;
204:     for (count = 0; count < osm->N; ++count) {
205:       PetscMPIInt srank, ssize;
206:       if (l < osm->n) {
207:         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
208:         if (numbering[d] == count) {
209:           PetscCallMPI(MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize));
210:           PetscCallMPI(MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank));
211:           PetscCall(PetscViewerGetSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer));
212:           PetscCall(ISGetLocalSize(osm->ois[d], &bsz));
213:           PetscCall(PetscViewerASCIISynchronizedPrintf(sviewer, "  [%d|%d] (subcomm [%d|%d]) local subdomain number %" PetscInt_FMT ", local size = %" PetscInt_FMT "\n", rank, size, srank, ssize, d, bsz));
214:           PetscCall(PetscViewerFlush(sviewer));
215:           PetscCall(PetscViewerASCIIPushTab(sviewer));
216:           if (view_subdomains) PetscCall(PCGASMSubdomainView_Private(pc, d, sviewer));
217:           if (!pc->setupcalled) {
218:             PetscCall(PetscViewerASCIISynchronizedPrintf(sviewer, "  Solver not set up yet: PCSetUp() not yet called\n"));
219:           } else {
220:             PetscCall(KSPView(osm->ksp[d], sviewer));
221:           }
222:           PetscCall(PetscViewerASCIIPopTab(sviewer));
223:           PetscCall(PetscViewerASCIIPrintf(sviewer, "  - - - - - - - - - - - - - - - - - -\n"));
224:           PetscCall(PetscViewerFlush(sviewer));
225:           PetscCall(PetscViewerRestoreSubViewer(viewer, ((PetscObject)osm->ois[d])->comm, &sviewer));
226:           ++l;
227:         } else {
228:           PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
229:           PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
230:         }
231:       } else {
232:         PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
233:         PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
234:       }
235:     }
236:     PetscCall(PetscFree2(numbering, permutation));
237:     PetscCall(PetscViewerASCIIPopTab(viewer));
238:     PetscCall(PetscViewerFlush(viewer));
239:     /* this line is needed to match the extra PetscViewerASCIIPushSynchronized() in PetscViewerGetSubViewer() */
240:     PetscCall(PetscViewerASCIIPopSynchronized(viewer));
241:   }
242:   PetscFunctionReturn(PETSC_SUCCESS);
243: }

245: PETSC_INTERN PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);

247: static PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
248: {
249:   PC_GASM        *osm = (PC_GASM *)pc->data;
250:   MatPartitioning part;
251:   MPI_Comm        comm;
252:   PetscMPIInt     size;
253:   PetscInt        nlocalsubdomains, fromrows_localsize;
254:   IS              partitioning, fromrows, isn;
255:   Vec             outervec;

257:   PetscFunctionBegin;
258:   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
259:   PetscCallMPI(MPI_Comm_size(comm, &size));
260:   /* we do not need a hierarchical partitioning when
261:     * the total number of subdomains is consistent with
262:     * the number of MPI tasks.
263:     * For the following cases, we do not need to use HP
264:     * */
265:   if (osm->N == PETSC_DETERMINE || osm->N >= size || osm->N == 1) PetscFunctionReturn(PETSC_SUCCESS);
266:   PetscCheck(size % osm->N == 0, PETSC_COMM_WORLD, PETSC_ERR_ARG_INCOMP, "have to specify the total number of subdomains %" PetscInt_FMT " to be a factor of the number of ranks %d ", osm->N, size);
267:   nlocalsubdomains = size / osm->N;
268:   osm->n           = 1;
269:   PetscCall(MatPartitioningCreate(comm, &part));
270:   PetscCall(MatPartitioningSetAdjacency(part, pc->pmat));
271:   PetscCall(MatPartitioningSetType(part, MATPARTITIONINGHIERARCH));
272:   PetscCall(MatPartitioningHierarchicalSetNcoarseparts(part, osm->N));
273:   PetscCall(MatPartitioningHierarchicalSetNfineparts(part, nlocalsubdomains));
274:   PetscCall(MatPartitioningSetFromOptions(part));
275:   /* get new rank owner number of each vertex */
276:   PetscCall(MatPartitioningApply(part, &partitioning));
277:   PetscCall(ISBuildTwoSided(partitioning, NULL, &fromrows));
278:   PetscCall(ISPartitioningToNumbering(partitioning, &isn));
279:   PetscCall(ISDestroy(&isn));
280:   PetscCall(ISGetLocalSize(fromrows, &fromrows_localsize));
281:   PetscCall(MatPartitioningDestroy(&part));
282:   PetscCall(MatCreateVecs(pc->pmat, &outervec, NULL));
283:   PetscCall(VecCreateMPI(comm, fromrows_localsize, PETSC_DETERMINE, &osm->pcx));
284:   PetscCall(VecDuplicate(osm->pcx, &osm->pcy));
285:   PetscCall(VecScatterCreate(osm->pcx, NULL, outervec, fromrows, &osm->pctoouter));
286:   PetscCall(MatCreateSubMatrix(pc->pmat, fromrows, fromrows, MAT_INITIAL_MATRIX, &osm->permutationP));
287:   PetscCall(PetscObjectReference((PetscObject)fromrows));
288:   osm->permutationIS = fromrows;
289:   osm->pcmat         = pc->pmat;
290:   PetscCall(PetscObjectReference((PetscObject)osm->permutationP));
291:   pc->pmat = osm->permutationP;
292:   PetscCall(VecDestroy(&outervec));
293:   PetscCall(ISDestroy(&fromrows));
294:   PetscCall(ISDestroy(&partitioning));
295:   osm->n = PETSC_DETERMINE;
296:   PetscFunctionReturn(PETSC_SUCCESS);
297: }

299: static PetscErrorCode PCSetUp_GASM(PC pc)
300: {
301:   PC_GASM        *osm = (PC_GASM *)pc->data;
302:   PetscInt        i, nInnerIndices, nTotalInnerIndices;
303:   PetscMPIInt     rank, size;
304:   MatReuse        scall = MAT_REUSE_MATRIX;
305:   KSP             ksp;
306:   PC              subpc;
307:   const char     *prefix, *pprefix;
308:   Vec             x, y;
309:   PetscInt        oni;   /* Number of indices in the i-th local outer subdomain.               */
310:   const PetscInt *oidxi; /* Indices from the i-th subdomain local outer subdomain.             */
311:   PetscInt        on;    /* Number of indices in the disjoint union of local outer subdomains. */
312:   PetscInt       *oidx;  /* Indices in the disjoint union of local outer subdomains. */
313:   IS              gois;  /* Disjoint union the global indices of outer subdomains.             */
314:   IS              goid;  /* Identity IS of the size of the disjoint union of outer subdomains. */
315:   PetscScalar    *gxarray, *gyarray;
316:   PetscInt        gostart; /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
317:   PetscInt        num_subdomains  = 0;
318:   DM             *subdomain_dm    = NULL;
319:   char          **subdomain_names = NULL;
320:   PetscInt       *numbering;

322:   PetscFunctionBegin;
323:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
324:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
325:   if (!pc->setupcalled) {
326:     /* use a hierarchical partitioning */
327:     if (osm->hierarchicalpartitioning) PetscCall(PCGASMSetHierarchicalPartitioning(pc));
328:     if (osm->n == PETSC_DETERMINE) {
329:       if (osm->N != PETSC_DETERMINE) {
330:         /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
331:         PetscCall(PCGASMCreateSubdomains(pc->pmat, osm->N, &osm->n, &osm->iis));
332:       } else if (osm->dm_subdomains && pc->dm) {
333:         /* try pc->dm next, if allowed */
334:         PetscInt d;
335:         IS      *inner_subdomain_is, *outer_subdomain_is;
336:         PetscCall(DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm));
337:         if (num_subdomains) PetscCall(PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is));
338:         for (d = 0; d < num_subdomains; ++d) {
339:           if (inner_subdomain_is) PetscCall(ISDestroy(&inner_subdomain_is[d]));
340:           if (outer_subdomain_is) PetscCall(ISDestroy(&outer_subdomain_is[d]));
341:         }
342:         PetscCall(PetscFree(inner_subdomain_is));
343:         PetscCall(PetscFree(outer_subdomain_is));
344:       } else {
345:         /* still no subdomains; use one per rank */
346:         osm->nmax = osm->n = 1;
347:         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
348:         osm->N = size;
349:         PetscCall(PCGASMCreateLocalSubdomains(pc->pmat, osm->n, &osm->iis));
350:       }
351:     }
352:     if (!osm->iis) {
353:       /*
354:        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
355:        We create the requisite number of local inner subdomains and then expand them into
356:        out subdomains, if necessary.
357:        */
358:       PetscCall(PCGASMCreateLocalSubdomains(pc->pmat, osm->n, &osm->iis));
359:     }
360:     if (!osm->ois) {
361:       /*
362:             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
363:             has been requested, copy the inner subdomains over so they can be modified.
364:       */
365:       PetscCall(PetscMalloc1(osm->n, &osm->ois));
366:       for (i = 0; i < osm->n; ++i) {
367:         if (osm->overlap > 0 && osm->N > 1) { /* With positive overlap, osm->iis[i] will be modified */
368:           PetscCall(ISDuplicate(osm->iis[i], (osm->ois) + i));
369:           PetscCall(ISCopy(osm->iis[i], osm->ois[i]));
370:         } else {
371:           PetscCall(PetscObjectReference((PetscObject)osm->iis[i]));
372:           osm->ois[i] = osm->iis[i];
373:         }
374:       }
375:       if (osm->overlap > 0 && osm->N > 1) {
376:         /* Extend the "overlapping" regions by a number of steps */
377:         PetscCall(MatIncreaseOverlapSplit(pc->pmat, osm->n, osm->ois, osm->overlap));
378:       }
379:     }

381:     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
382:     if (osm->nmax == PETSC_DETERMINE) {
383:       PetscInt inwork, outwork;
384:       /* determine global number of subdomains and the max number of local subdomains */
385:       inwork = osm->n;
386:       PetscCallMPI(MPIU_Allreduce(&inwork, &outwork, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
387:       osm->nmax = outwork;
388:     }
389:     if (osm->N == PETSC_DETERMINE) {
390:       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
391:       PetscCall(PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->ois, &osm->N, NULL));
392:     }

394:     if (osm->sort_indices) {
395:       for (i = 0; i < osm->n; i++) {
396:         PetscCall(ISSort(osm->ois[i]));
397:         PetscCall(ISSort(osm->iis[i]));
398:       }
399:     }
400:     PetscCall(PCGetOptionsPrefix(pc, &prefix));
401:     PetscCall(PCGASMPrintSubdomains(pc));

403:     /*
404:        Merge the ISs, create merged vectors and restrictions.
405:      */
406:     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
407:     on = 0;
408:     for (i = 0; i < osm->n; i++) {
409:       PetscCall(ISGetLocalSize(osm->ois[i], &oni));
410:       on += oni;
411:     }
412:     PetscCall(PetscMalloc1(on, &oidx));
413:     on = 0;
414:     /* Merge local indices together */
415:     for (i = 0; i < osm->n; i++) {
416:       PetscCall(ISGetLocalSize(osm->ois[i], &oni));
417:       PetscCall(ISGetIndices(osm->ois[i], &oidxi));
418:       PetscCall(PetscArraycpy(oidx + on, oidxi, oni));
419:       PetscCall(ISRestoreIndices(osm->ois[i], &oidxi));
420:       on += oni;
421:     }
422:     PetscCall(ISCreateGeneral(((PetscObject)pc)->comm, on, oidx, PETSC_OWN_POINTER, &gois));
423:     nTotalInnerIndices = 0;
424:     for (i = 0; i < osm->n; i++) {
425:       PetscCall(ISGetLocalSize(osm->iis[i], &nInnerIndices));
426:       nTotalInnerIndices += nInnerIndices;
427:     }
428:     PetscCall(VecCreateMPI(((PetscObject)pc)->comm, nTotalInnerIndices, PETSC_DETERMINE, &x));
429:     PetscCall(VecDuplicate(x, &y));

431:     PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), on, PETSC_DECIDE, &osm->gx));
432:     PetscCall(VecDuplicate(osm->gx, &osm->gy));
433:     PetscCall(VecGetOwnershipRange(osm->gx, &gostart, NULL));
434:     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), on, gostart, 1, &goid));
435:     /* gois might indices not on local */
436:     PetscCall(VecScatterCreate(x, gois, osm->gx, goid, &osm->gorestriction));
437:     PetscCall(PetscMalloc1(osm->n, &numbering));
438:     PetscCall(PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc), osm->n, (PetscObject *)osm->ois, NULL, numbering));
439:     PetscCall(VecDestroy(&x));
440:     PetscCall(ISDestroy(&gois));

442:     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
443:     {
444:       PetscInt        ini;   /* Number of indices the i-th a local inner subdomain. */
445:       PetscInt        in;    /* Number of indices in the disjoint union of local inner subdomains. */
446:       PetscInt       *iidx;  /* Global indices in the merged local inner subdomain. */
447:       PetscInt       *ioidx; /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
448:       IS              giis;  /* IS for the disjoint union of inner subdomains. */
449:       IS              giois; /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
450:       PetscScalar    *array;
451:       const PetscInt *indices;
452:       PetscInt        k;
453:       on = 0;
454:       for (i = 0; i < osm->n; i++) {
455:         PetscCall(ISGetLocalSize(osm->ois[i], &oni));
456:         on += oni;
457:       }
458:       PetscCall(PetscMalloc1(on, &iidx));
459:       PetscCall(PetscMalloc1(on, &ioidx));
460:       PetscCall(VecGetArray(y, &array));
461:       /* set communicator id to determine where overlap is */
462:       in = 0;
463:       for (i = 0; i < osm->n; i++) {
464:         PetscCall(ISGetLocalSize(osm->iis[i], &ini));
465:         for (k = 0; k < ini; ++k) array[in + k] = numbering[i];
466:         in += ini;
467:       }
468:       PetscCall(VecRestoreArray(y, &array));
469:       PetscCall(VecScatterBegin(osm->gorestriction, y, osm->gy, INSERT_VALUES, SCATTER_FORWARD));
470:       PetscCall(VecScatterEnd(osm->gorestriction, y, osm->gy, INSERT_VALUES, SCATTER_FORWARD));
471:       PetscCall(VecGetOwnershipRange(osm->gy, &gostart, NULL));
472:       PetscCall(VecGetArray(osm->gy, &array));
473:       on = 0;
474:       in = 0;
475:       for (i = 0; i < osm->n; i++) {
476:         PetscCall(ISGetLocalSize(osm->ois[i], &oni));
477:         PetscCall(ISGetIndices(osm->ois[i], &indices));
478:         for (k = 0; k < oni; k++) {
479:           /*  skip overlapping indices to get inner domain */
480:           if (PetscRealPart(array[on + k]) != numbering[i]) continue;
481:           iidx[in]    = indices[k];
482:           ioidx[in++] = gostart + on + k;
483:         }
484:         PetscCall(ISRestoreIndices(osm->ois[i], &indices));
485:         on += oni;
486:       }
487:       PetscCall(VecRestoreArray(osm->gy, &array));
488:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), in, iidx, PETSC_OWN_POINTER, &giis));
489:       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), in, ioidx, PETSC_OWN_POINTER, &giois));
490:       PetscCall(VecScatterCreate(y, giis, osm->gy, giois, &osm->girestriction));
491:       PetscCall(VecDestroy(&y));
492:       PetscCall(ISDestroy(&giis));
493:       PetscCall(ISDestroy(&giois));
494:     }
495:     PetscCall(ISDestroy(&goid));
496:     PetscCall(PetscFree(numbering));

498:     /* Create the subdomain work vectors. */
499:     PetscCall(PetscMalloc1(osm->n, &osm->x));
500:     PetscCall(PetscMalloc1(osm->n, &osm->y));
501:     PetscCall(VecGetArray(osm->gx, &gxarray));
502:     PetscCall(VecGetArray(osm->gy, &gyarray));
503:     for (i = 0, on = 0; i < osm->n; ++i, on += oni) {
504:       PetscInt oNi;
505:       PetscCall(ISGetLocalSize(osm->ois[i], &oni));
506:       /* on a sub communicator */
507:       PetscCall(ISGetSize(osm->ois[i], &oNi));
508:       PetscCall(VecCreateMPIWithArray(((PetscObject)osm->ois[i])->comm, 1, oni, oNi, gxarray + on, &osm->x[i]));
509:       PetscCall(VecCreateMPIWithArray(((PetscObject)osm->ois[i])->comm, 1, oni, oNi, gyarray + on, &osm->y[i]));
510:     }
511:     PetscCall(VecRestoreArray(osm->gx, &gxarray));
512:     PetscCall(VecRestoreArray(osm->gy, &gyarray));
513:     /* Create the subdomain solvers */
514:     PetscCall(PetscMalloc1(osm->n, &osm->ksp));
515:     for (i = 0; i < osm->n; i++) {
516:       char subprefix[PETSC_MAX_PATH_LEN + 1];
517:       PetscCall(KSPCreate(((PetscObject)osm->ois[i])->comm, &ksp));
518:       PetscCall(KSPSetNestLevel(ksp, pc->kspnestlevel));
519:       PetscCall(KSPSetErrorIfNotConverged(ksp, pc->erroriffailure));
520:       PetscCall(PetscObjectIncrementTabLevel((PetscObject)ksp, (PetscObject)pc, 1));
521:       PetscCall(KSPSetType(ksp, KSPPREONLY));
522:       PetscCall(KSPGetPC(ksp, &subpc)); /* Why do we need this here? */
523:       if (subdomain_dm) {
524:         PetscCall(KSPSetDM(ksp, subdomain_dm[i]));
525:         PetscCall(DMDestroy(subdomain_dm + i));
526:       }
527:       PetscCall(PCGetOptionsPrefix(pc, &prefix));
528:       PetscCall(KSPSetOptionsPrefix(ksp, prefix));
529:       if (subdomain_names && subdomain_names[i]) {
530:         PetscCall(PetscSNPrintf(subprefix, PETSC_MAX_PATH_LEN, "sub_%s_", subdomain_names[i]));
531:         PetscCall(KSPAppendOptionsPrefix(ksp, subprefix));
532:         PetscCall(PetscFree(subdomain_names[i]));
533:       }
534:       PetscCall(KSPAppendOptionsPrefix(ksp, "sub_"));
535:       osm->ksp[i] = ksp;
536:     }
537:     PetscCall(PetscFree(subdomain_dm));
538:     PetscCall(PetscFree(subdomain_names));
539:     scall = MAT_INITIAL_MATRIX;
540:   } else { /* if (pc->setupcalled) */
541:     /*
542:        Destroy the submatrices from the previous iteration
543:     */
544:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
545:       PetscCall(MatDestroyMatrices(osm->n, &osm->pmat));
546:       scall = MAT_INITIAL_MATRIX;
547:     }
548:     if (osm->permutationIS) {
549:       PetscCall(MatCreateSubMatrix(pc->pmat, osm->permutationIS, osm->permutationIS, scall, &osm->permutationP));
550:       PetscCall(PetscObjectReference((PetscObject)osm->permutationP));
551:       osm->pcmat = pc->pmat;
552:       pc->pmat   = osm->permutationP;
553:     }
554:   }

556:   /*
557:      Extract the submatrices.
558:   */
559:   if (size > 1) {
560:     PetscCall(MatCreateSubMatricesMPI(pc->pmat, osm->n, osm->ois, osm->ois, scall, &osm->pmat));
561:   } else {
562:     PetscCall(MatCreateSubMatrices(pc->pmat, osm->n, osm->ois, osm->ois, scall, &osm->pmat));
563:   }
564:   if (scall == MAT_INITIAL_MATRIX) {
565:     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)pc->pmat, &pprefix));
566:     for (i = 0; i < osm->n; i++) PetscCall(PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i], pprefix));
567:   }

569:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
570:      different boundary conditions for the submatrices than for the global problem) */
571:   PetscCall(PCModifySubMatrices(pc, osm->n, osm->ois, osm->ois, osm->pmat, pc->modifysubmatricesP));

573:   /*
574:      Loop over submatrices putting them into local ksps
575:   */
576:   for (i = 0; i < osm->n; i++) {
577:     PetscCall(KSPSetOperators(osm->ksp[i], osm->pmat[i], osm->pmat[i]));
578:     PetscCall(KSPGetOptionsPrefix(osm->ksp[i], &prefix));
579:     PetscCall(MatSetOptionsPrefix(osm->pmat[i], prefix));
580:     if (!pc->setupcalled) PetscCall(KSPSetFromOptions(osm->ksp[i]));
581:   }
582:   if (osm->pcmat) {
583:     PetscCall(MatDestroy(&pc->pmat));
584:     pc->pmat   = osm->pcmat;
585:     osm->pcmat = NULL;
586:   }
587:   PetscFunctionReturn(PETSC_SUCCESS);
588: }

590: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
591: {
592:   PC_GASM *osm = (PC_GASM *)pc->data;
593:   PetscInt i;

595:   PetscFunctionBegin;
596:   for (i = 0; i < osm->n; i++) PetscCall(KSPSetUp(osm->ksp[i]));
597:   PetscFunctionReturn(PETSC_SUCCESS);
598: }

600: static PetscErrorCode PCApply_GASM(PC pc, Vec xin, Vec yout)
601: {
602:   PC_GASM    *osm = (PC_GASM *)pc->data;
603:   PetscInt    i;
604:   Vec         x, y;
605:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

607:   PetscFunctionBegin;
608:   if (osm->pctoouter) {
609:     PetscCall(VecScatterBegin(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE));
610:     PetscCall(VecScatterEnd(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE));
611:     x = osm->pcx;
612:     y = osm->pcy;
613:   } else {
614:     x = xin;
615:     y = yout;
616:   }
617:   /*
618:      support for limiting the restriction or interpolation only to the inner
619:      subdomain values (leaving the other values 0).
620:   */
621:   if (!(osm->type & PC_GASM_RESTRICT)) {
622:     /* have to zero the work RHS since scatter may leave some slots empty */
623:     PetscCall(VecZeroEntries(osm->gx));
624:     PetscCall(VecScatterBegin(osm->girestriction, x, osm->gx, INSERT_VALUES, forward));
625:   } else {
626:     PetscCall(VecScatterBegin(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward));
627:   }
628:   PetscCall(VecZeroEntries(osm->gy));
629:   if (!(osm->type & PC_GASM_RESTRICT)) {
630:     PetscCall(VecScatterEnd(osm->girestriction, x, osm->gx, INSERT_VALUES, forward));
631:   } else {
632:     PetscCall(VecScatterEnd(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward));
633:   }
634:   /* do the subdomain solves */
635:   for (i = 0; i < osm->n; ++i) {
636:     PetscCall(KSPSolve(osm->ksp[i], osm->x[i], osm->y[i]));
637:     PetscCall(KSPCheckSolve(osm->ksp[i], pc, osm->y[i]));
638:   }
639:   /* do we need to zero y? */
640:   PetscCall(VecZeroEntries(y));
641:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
642:     PetscCall(VecScatterBegin(osm->girestriction, osm->gy, y, ADD_VALUES, reverse));
643:     PetscCall(VecScatterEnd(osm->girestriction, osm->gy, y, ADD_VALUES, reverse));
644:   } else {
645:     PetscCall(VecScatterBegin(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse));
646:     PetscCall(VecScatterEnd(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse));
647:   }
648:   if (osm->pctoouter) {
649:     PetscCall(VecScatterBegin(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD));
650:     PetscCall(VecScatterEnd(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD));
651:   }
652:   PetscFunctionReturn(PETSC_SUCCESS);
653: }

655: static PetscErrorCode PCMatApply_GASM(PC pc, Mat Xin, Mat Yout)
656: {
657:   PC_GASM    *osm = (PC_GASM *)pc->data;
658:   Mat         X, Y, O = NULL, Z, W;
659:   Vec         x, y;
660:   PetscInt    i, m, M, N;
661:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

663:   PetscFunctionBegin;
664:   PetscCheck(osm->n == 1, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Not yet implemented");
665:   PetscCall(MatGetSize(Xin, NULL, &N));
666:   if (osm->pctoouter) {
667:     PetscCall(VecGetLocalSize(osm->pcx, &m));
668:     PetscCall(VecGetSize(osm->pcx, &M));
669:     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &O));
670:     for (i = 0; i < N; ++i) {
671:       PetscCall(MatDenseGetColumnVecRead(Xin, i, &x));
672:       PetscCall(MatDenseGetColumnVecWrite(O, i, &y));
673:       PetscCall(VecScatterBegin(osm->pctoouter, x, y, INSERT_VALUES, SCATTER_REVERSE));
674:       PetscCall(VecScatterEnd(osm->pctoouter, x, y, INSERT_VALUES, SCATTER_REVERSE));
675:       PetscCall(MatDenseRestoreColumnVecWrite(O, i, &y));
676:       PetscCall(MatDenseRestoreColumnVecRead(Xin, i, &x));
677:     }
678:     X = Y = O;
679:   } else {
680:     X = Xin;
681:     Y = Yout;
682:   }
683:   /*
684:      support for limiting the restriction or interpolation only to the inner
685:      subdomain values (leaving the other values 0).
686:   */
687:   PetscCall(VecGetLocalSize(osm->x[0], &m));
688:   PetscCall(VecGetSize(osm->x[0], &M));
689:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &Z));
690:   for (i = 0; i < N; ++i) {
691:     PetscCall(MatDenseGetColumnVecRead(X, i, &x));
692:     PetscCall(MatDenseGetColumnVecWrite(Z, i, &y));
693:     if (!(osm->type & PC_GASM_RESTRICT)) {
694:       /* have to zero the work RHS since scatter may leave some slots empty */
695:       PetscCall(VecZeroEntries(y));
696:       PetscCall(VecScatterBegin(osm->girestriction, x, y, INSERT_VALUES, forward));
697:       PetscCall(VecScatterEnd(osm->girestriction, x, y, INSERT_VALUES, forward));
698:     } else {
699:       PetscCall(VecScatterBegin(osm->gorestriction, x, y, INSERT_VALUES, forward));
700:       PetscCall(VecScatterEnd(osm->gorestriction, x, y, INSERT_VALUES, forward));
701:     }
702:     PetscCall(MatDenseRestoreColumnVecWrite(Z, i, &y));
703:     PetscCall(MatDenseRestoreColumnVecRead(X, i, &x));
704:   }
705:   PetscCall(MatCreateDense(PetscObjectComm((PetscObject)osm->ois[0]), m, PETSC_DECIDE, M, N, NULL, &W));
706:   PetscCall(MatSetOption(Z, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
707:   PetscCall(MatAssemblyBegin(Z, MAT_FINAL_ASSEMBLY));
708:   PetscCall(MatAssemblyEnd(Z, MAT_FINAL_ASSEMBLY));
709:   /* do the subdomain solve */
710:   PetscCall(KSPMatSolve(osm->ksp[0], Z, W));
711:   PetscCall(KSPCheckSolve(osm->ksp[0], pc, NULL));
712:   PetscCall(MatDestroy(&Z));
713:   /* do we need to zero y? */
714:   PetscCall(MatZeroEntries(Y));
715:   for (i = 0; i < N; ++i) {
716:     PetscCall(MatDenseGetColumnVecWrite(Y, i, &y));
717:     PetscCall(MatDenseGetColumnVecRead(W, i, &x));
718:     if (!(osm->type & PC_GASM_INTERPOLATE)) {
719:       PetscCall(VecScatterBegin(osm->girestriction, x, y, ADD_VALUES, reverse));
720:       PetscCall(VecScatterEnd(osm->girestriction, x, y, ADD_VALUES, reverse));
721:     } else {
722:       PetscCall(VecScatterBegin(osm->gorestriction, x, y, ADD_VALUES, reverse));
723:       PetscCall(VecScatterEnd(osm->gorestriction, x, y, ADD_VALUES, reverse));
724:     }
725:     PetscCall(MatDenseRestoreColumnVecRead(W, i, &x));
726:     if (osm->pctoouter) {
727:       PetscCall(MatDenseGetColumnVecWrite(Yout, i, &x));
728:       PetscCall(VecScatterBegin(osm->pctoouter, y, x, INSERT_VALUES, SCATTER_FORWARD));
729:       PetscCall(VecScatterEnd(osm->pctoouter, y, x, INSERT_VALUES, SCATTER_FORWARD));
730:       PetscCall(MatDenseRestoreColumnVecRead(Yout, i, &x));
731:     }
732:     PetscCall(MatDenseRestoreColumnVecWrite(Y, i, &y));
733:   }
734:   PetscCall(MatDestroy(&W));
735:   PetscCall(MatDestroy(&O));
736:   PetscFunctionReturn(PETSC_SUCCESS);
737: }

739: static PetscErrorCode PCApplyTranspose_GASM(PC pc, Vec xin, Vec yout)
740: {
741:   PC_GASM    *osm = (PC_GASM *)pc->data;
742:   PetscInt    i;
743:   Vec         x, y;
744:   ScatterMode forward = SCATTER_FORWARD, reverse = SCATTER_REVERSE;

746:   PetscFunctionBegin;
747:   if (osm->pctoouter) {
748:     PetscCall(VecScatterBegin(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE));
749:     PetscCall(VecScatterEnd(osm->pctoouter, xin, osm->pcx, INSERT_VALUES, SCATTER_REVERSE));
750:     x = osm->pcx;
751:     y = osm->pcy;
752:   } else {
753:     x = xin;
754:     y = yout;
755:   }
756:   /*
757:      Support for limiting the restriction or interpolation to only local
758:      subdomain values (leaving the other values 0).

760:      Note: these are reversed from the PCApply_GASM() because we are applying the
761:      transpose of the three terms
762:   */
763:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
764:     /* have to zero the work RHS since scatter may leave some slots empty */
765:     PetscCall(VecZeroEntries(osm->gx));
766:     PetscCall(VecScatterBegin(osm->girestriction, x, osm->gx, INSERT_VALUES, forward));
767:   } else {
768:     PetscCall(VecScatterBegin(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward));
769:   }
770:   PetscCall(VecZeroEntries(osm->gy));
771:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
772:     PetscCall(VecScatterEnd(osm->girestriction, x, osm->gx, INSERT_VALUES, forward));
773:   } else {
774:     PetscCall(VecScatterEnd(osm->gorestriction, x, osm->gx, INSERT_VALUES, forward));
775:   }
776:   /* do the local solves */
777:   for (i = 0; i < osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
778:     PetscCall(KSPSolveTranspose(osm->ksp[i], osm->x[i], osm->y[i]));
779:     PetscCall(KSPCheckSolve(osm->ksp[i], pc, osm->y[i]));
780:   }
781:   PetscCall(VecZeroEntries(y));
782:   if (!(osm->type & PC_GASM_RESTRICT)) {
783:     PetscCall(VecScatterBegin(osm->girestriction, osm->gy, y, ADD_VALUES, reverse));
784:     PetscCall(VecScatterEnd(osm->girestriction, osm->gy, y, ADD_VALUES, reverse));
785:   } else {
786:     PetscCall(VecScatterBegin(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse));
787:     PetscCall(VecScatterEnd(osm->gorestriction, osm->gy, y, ADD_VALUES, reverse));
788:   }
789:   if (osm->pctoouter) {
790:     PetscCall(VecScatterBegin(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD));
791:     PetscCall(VecScatterEnd(osm->pctoouter, y, yout, INSERT_VALUES, SCATTER_FORWARD));
792:   }
793:   PetscFunctionReturn(PETSC_SUCCESS);
794: }

796: static PetscErrorCode PCReset_GASM(PC pc)
797: {
798:   PC_GASM *osm = (PC_GASM *)pc->data;
799:   PetscInt i;

801:   PetscFunctionBegin;
802:   if (osm->ksp) {
803:     for (i = 0; i < osm->n; i++) PetscCall(KSPReset(osm->ksp[i]));
804:   }
805:   if (osm->pmat) {
806:     if (osm->n > 0) {
807:       PetscMPIInt size;
808:       PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
809:       if (size > 1) {
810:         /* osm->pmat is created by MatCreateSubMatricesMPI(), cannot use MatDestroySubMatrices() */
811:         PetscCall(MatDestroyMatrices(osm->n, &osm->pmat));
812:       } else {
813:         PetscCall(MatDestroySubMatrices(osm->n, &osm->pmat));
814:       }
815:     }
816:   }
817:   if (osm->x) {
818:     for (i = 0; i < osm->n; i++) {
819:       PetscCall(VecDestroy(&osm->x[i]));
820:       PetscCall(VecDestroy(&osm->y[i]));
821:     }
822:   }
823:   PetscCall(VecDestroy(&osm->gx));
824:   PetscCall(VecDestroy(&osm->gy));

826:   PetscCall(VecScatterDestroy(&osm->gorestriction));
827:   PetscCall(VecScatterDestroy(&osm->girestriction));
828:   if (!osm->user_subdomains) {
829:     PetscCall(PCGASMDestroySubdomains(osm->n, &osm->ois, &osm->iis));
830:     osm->N    = PETSC_DETERMINE;
831:     osm->nmax = PETSC_DETERMINE;
832:   }
833:   if (osm->pctoouter) PetscCall(VecScatterDestroy(&osm->pctoouter));
834:   if (osm->permutationIS) PetscCall(ISDestroy(&osm->permutationIS));
835:   if (osm->pcx) PetscCall(VecDestroy(&osm->pcx));
836:   if (osm->pcy) PetscCall(VecDestroy(&osm->pcy));
837:   if (osm->permutationP) PetscCall(MatDestroy(&osm->permutationP));
838:   if (osm->pcmat) PetscCall(MatDestroy(&osm->pcmat));
839:   PetscFunctionReturn(PETSC_SUCCESS);
840: }

842: static PetscErrorCode PCDestroy_GASM(PC pc)
843: {
844:   PC_GASM *osm = (PC_GASM *)pc->data;
845:   PetscInt i;

847:   PetscFunctionBegin;
848:   PetscCall(PCReset_GASM(pc));
849:   /* PCReset will not destroy subdomains, if user_subdomains is true. */
850:   PetscCall(PCGASMDestroySubdomains(osm->n, &osm->ois, &osm->iis));
851:   if (osm->ksp) {
852:     for (i = 0; i < osm->n; i++) PetscCall(KSPDestroy(&osm->ksp[i]));
853:     PetscCall(PetscFree(osm->ksp));
854:   }
855:   PetscCall(PetscFree(osm->x));
856:   PetscCall(PetscFree(osm->y));
857:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSubdomains_C", NULL));
858:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetOverlap_C", NULL));
859:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetType_C", NULL));
860:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSortIndices_C", NULL));
861:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMGetSubKSP_C", NULL));
862:   PetscCall(PetscFree(pc->data));
863:   PetscFunctionReturn(PETSC_SUCCESS);
864: }

866: static PetscErrorCode PCSetFromOptions_GASM(PC pc, PetscOptionItems *PetscOptionsObject)
867: {
868:   PC_GASM   *osm = (PC_GASM *)pc->data;
869:   PetscInt   blocks, ovl;
870:   PetscBool  flg;
871:   PCGASMType gasmtype;

873:   PetscFunctionBegin;
874:   PetscOptionsHeadBegin(PetscOptionsObject, "Generalized additive Schwarz options");
875:   PetscCall(PetscOptionsBool("-pc_gasm_use_dm_subdomains", "If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.", "PCGASMSetUseDMSubdomains", osm->dm_subdomains, &osm->dm_subdomains, &flg));
876:   PetscCall(PetscOptionsInt("-pc_gasm_total_subdomains", "Total number of subdomains across communicator", "PCGASMSetTotalSubdomains", osm->N, &blocks, &flg));
877:   if (flg) PetscCall(PCGASMSetTotalSubdomains(pc, blocks));
878:   PetscCall(PetscOptionsInt("-pc_gasm_overlap", "Number of overlapping degrees of freedom", "PCGASMSetOverlap", osm->overlap, &ovl, &flg));
879:   if (flg) {
880:     PetscCall(PCGASMSetOverlap(pc, ovl));
881:     osm->dm_subdomains = PETSC_FALSE;
882:   }
883:   flg = PETSC_FALSE;
884:   PetscCall(PetscOptionsEnum("-pc_gasm_type", "Type of restriction/extension", "PCGASMSetType", PCGASMTypes, (PetscEnum)osm->type, (PetscEnum *)&gasmtype, &flg));
885:   if (flg) PetscCall(PCGASMSetType(pc, gasmtype));
886:   PetscCall(PetscOptionsBool("-pc_gasm_use_hierachical_partitioning", "use hierarchical partitioning", NULL, osm->hierarchicalpartitioning, &osm->hierarchicalpartitioning, &flg));
887:   PetscOptionsHeadEnd();
888:   PetscFunctionReturn(PETSC_SUCCESS);
889: }

891: /*@
892:   PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the communicator for `PCGASM`

894:   Logically Collective

896:   Input Parameters:
897: + pc - the preconditioner
898: - N  - total number of subdomains

900:   Level: beginner

902: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`
903:           `PCGASMCreateSubdomains2D()`
904: @*/
905: PetscErrorCode PCGASMSetTotalSubdomains(PC pc, PetscInt N)
906: {
907:   PC_GASM    *osm = (PC_GASM *)pc->data;
908:   PetscMPIInt size, rank;

910:   PetscFunctionBegin;
911:   PetscCheck(N >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Total number of subdomains must be 1 or more, got N = %" PetscInt_FMT, N);
912:   PetscCheck(!pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");

914:   PetscCall(PCGASMDestroySubdomains(osm->n, &osm->iis, &osm->ois));
915:   osm->ois = osm->iis = NULL;

917:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
918:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
919:   osm->N             = N;
920:   osm->n             = PETSC_DETERMINE;
921:   osm->nmax          = PETSC_DETERMINE;
922:   osm->dm_subdomains = PETSC_FALSE;
923:   PetscFunctionReturn(PETSC_SUCCESS);
924: }

926: static PetscErrorCode PCGASMSetSubdomains_GASM(PC pc, PetscInt n, IS iis[], IS ois[])
927: {
928:   PC_GASM *osm = (PC_GASM *)pc->data;
929:   PetscInt i;

931:   PetscFunctionBegin;
932:   PetscCheck(n >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Each MPI rank must have 1 or more subdomains, got n = %" PetscInt_FMT, n);
933:   PetscCheck(!pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "PCGASMSetSubdomains() should be called before calling PCSetUp().");

935:   PetscCall(PCGASMDestroySubdomains(osm->n, &osm->iis, &osm->ois));
936:   osm->iis = osm->ois = NULL;
937:   osm->n              = n;
938:   osm->N              = PETSC_DETERMINE;
939:   osm->nmax           = PETSC_DETERMINE;
940:   if (ois) {
941:     PetscCall(PetscMalloc1(n, &osm->ois));
942:     for (i = 0; i < n; i++) {
943:       PetscCall(PetscObjectReference((PetscObject)ois[i]));
944:       osm->ois[i] = ois[i];
945:     }
946:     /*
947:        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
948:        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
949:     */
950:     osm->overlap = -1;
951:     /* inner subdomains must be provided  */
952:     PetscCheck(iis, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "inner indices have to be provided ");
953:   } /* end if */
954:   if (iis) {
955:     PetscCall(PetscMalloc1(n, &osm->iis));
956:     for (i = 0; i < n; i++) {
957:       PetscCall(PetscObjectReference((PetscObject)iis[i]));
958:       osm->iis[i] = iis[i];
959:     }
960:     if (!ois) {
961:       osm->ois = NULL;
962:       /* if user does not provide outer indices, we will create the corresponding outer indices using  osm->overlap =1 in PCSetUp_GASM */
963:     }
964:   }
965:   if (PetscDefined(USE_DEBUG)) {
966:     PetscInt        j, rstart, rend, *covered, lsize;
967:     const PetscInt *indices;
968:     /* check if the inner indices cover and only cover the local portion of the preconditioning matrix */
969:     PetscCall(MatGetOwnershipRange(pc->pmat, &rstart, &rend));
970:     PetscCall(PetscCalloc1(rend - rstart, &covered));
971:     /* check if the current MPI process owns indices from others */
972:     for (i = 0; i < n; i++) {
973:       PetscCall(ISGetIndices(osm->iis[i], &indices));
974:       PetscCall(ISGetLocalSize(osm->iis[i], &lsize));
975:       for (j = 0; j < lsize; j++) {
976:         PetscCheck(indices[j] >= rstart && indices[j] < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "inner subdomains can not own an index %" PetscInt_FMT " from other ranks", indices[j]);
977:         PetscCheck(covered[indices[j] - rstart] != 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "inner subdomains can not have an overlapping index %" PetscInt_FMT " ", indices[j]);
978:         covered[indices[j] - rstart] = 1;
979:       }
980:       PetscCall(ISRestoreIndices(osm->iis[i], &indices));
981:     }
982:     /* check if we miss any indices */
983:     for (i = rstart; i < rend; i++) PetscCheck(covered[i - rstart], PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "local entity %" PetscInt_FMT " was not covered by inner subdomains", i);
984:     PetscCall(PetscFree(covered));
985:   }
986:   if (iis) osm->user_subdomains = PETSC_TRUE;
987:   PetscFunctionReturn(PETSC_SUCCESS);
988: }

990: static PetscErrorCode PCGASMSetOverlap_GASM(PC pc, PetscInt ovl)
991: {
992:   PC_GASM *osm = (PC_GASM *)pc->data;

994:   PetscFunctionBegin;
995:   PetscCheck(ovl >= 0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Negative overlap value requested");
996:   PetscCheck(!pc->setupcalled || ovl == osm->overlap, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "PCGASMSetOverlap() should be called before PCSetUp().");
997:   if (!pc->setupcalled) osm->overlap = ovl;
998:   PetscFunctionReturn(PETSC_SUCCESS);
999: }

1001: static PetscErrorCode PCGASMSetType_GASM(PC pc, PCGASMType type)
1002: {
1003:   PC_GASM *osm = (PC_GASM *)pc->data;

1005:   PetscFunctionBegin;
1006:   osm->type     = type;
1007:   osm->type_set = PETSC_TRUE;
1008:   PetscFunctionReturn(PETSC_SUCCESS);
1009: }

1011: static PetscErrorCode PCGASMSetSortIndices_GASM(PC pc, PetscBool doSort)
1012: {
1013:   PC_GASM *osm = (PC_GASM *)pc->data;

1015:   PetscFunctionBegin;
1016:   osm->sort_indices = doSort;
1017:   PetscFunctionReturn(PETSC_SUCCESS);
1018: }

1020: /*
1021:    FIXME: This routine might need to be modified now that multiple processes per subdomain are allowed.
1022:         In particular, it would upset the global subdomain number calculation.
1023: */
1024: static PetscErrorCode PCGASMGetSubKSP_GASM(PC pc, PetscInt *n, PetscInt *first, KSP **ksp)
1025: {
1026:   PC_GASM *osm = (PC_GASM *)pc->data;

1028:   PetscFunctionBegin;
1029:   PetscCheck(osm->n >= 1, PetscObjectComm((PetscObject)pc), PETSC_ERR_ORDER, "Need to call PCSetUp() on PC (or KSPSetUp() on the outer KSP object) before calling here");

1031:   if (n) *n = osm->n;
1032:   if (first) {
1033:     PetscCallMPI(MPI_Scan(&osm->n, first, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
1034:     *first -= osm->n;
1035:   }
1036:   if (ksp) {
1037:     /* Assume that local solves are now different; not necessarily
1038:        true, though!  This flag is used only for PCView_GASM() */
1039:     *ksp                        = osm->ksp;
1040:     osm->same_subdomain_solvers = PETSC_FALSE;
1041:   }
1042:   PetscFunctionReturn(PETSC_SUCCESS);
1043: } /* PCGASMGetSubKSP_GASM() */

1045: /*@
1046:   PCGASMSetSubdomains - Sets the subdomains for this MPI process
1047:   for the additive Schwarz preconditioner with multiple MPI processes per subdomain, `PCGASM`

1049:   Collective

1051:   Input Parameters:
1052: + pc  - the preconditioner object
1053: . n   - the number of subdomains for this MPI process
1054: . iis - the index sets that define the inner subdomains (or `NULL` for PETSc to determine subdomains), the `iis` array is
1055:         copied so may be freed after this call.
1056: - ois - the index sets that define the outer subdomains (or `NULL` to use the same as `iis`, or to construct by expanding `iis` by
1057:         the requested overlap), the `ois` array is copied so may be freed after this call.

1059:   Level: advanced

1061:   Notes:
1062:   The `IS` indices use the parallel, global numbering of the vector entries.

1064:   Inner subdomains are those where the correction is applied.

1066:   Outer subdomains are those where the residual necessary to obtain the
1067:   corrections is obtained (see `PCGASMType` for the use of inner/outer subdomains).

1069:   Both inner and outer subdomains can extend over several MPI processes.
1070:   This process' portion of a subdomain is known as a local subdomain.

1072:   Inner subdomains can not overlap with each other, do not have any entities from remote processes,
1073:   and  have to cover the entire local subdomain owned by the current process. The index sets on each
1074:   process should be ordered such that the ith local subdomain is connected to the ith remote subdomain
1075:   on another MPI process.

1077:   By default the `PGASM` preconditioner uses 1 (local) subdomain per MPI process.

1079:   The `iis` and `ois` arrays may be freed after this call using `PCGASMDestroySubdomains()`

1081: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`, `PCGASMDestroySubdomains()`,
1082:           `PCGASMCreateSubdomains2D()`, `PCGASMGetSubdomains()`
1083: @*/
1084: PetscErrorCode PCGASMSetSubdomains(PC pc, PetscInt n, IS iis[], IS ois[])
1085: {
1086:   PC_GASM *osm = (PC_GASM *)pc->data;

1088:   PetscFunctionBegin;
1090:   PetscTryMethod(pc, "PCGASMSetSubdomains_C", (PC, PetscInt, IS[], IS[]), (pc, n, iis, ois));
1091:   osm->dm_subdomains = PETSC_FALSE;
1092:   PetscFunctionReturn(PETSC_SUCCESS);
1093: }

1095: /*@
1096:   PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1097:   additive Schwarz preconditioner `PCGASM`.  Either all or no MPI processes in the
1098:   pc communicator must call this routine.

1100:   Logically Collective

1102:   Input Parameters:
1103: + pc  - the preconditioner context
1104: - ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)

1106:   Options Database Key:
1107: . -pc_gasm_overlap <overlap> - Sets overlap

1109:   Level: intermediate

1111:   Notes:
1112:   By default the `PCGASM` preconditioner uses 1 subdomain per process.  To use
1113:   multiple subdomain per perocessor or "straddling" subdomains that intersect
1114:   multiple processes use `PCGASMSetSubdomains()` (or option `-pc_gasm_total_subdomains` <n>).

1116:   The overlap defaults to 0, so if one desires that no additional
1117:   overlap be computed beyond what may have been set with a call to
1118:   `PCGASMSetSubdomains()`, then `ovl` must be set to be 0.  In particular, if one does
1119:   not explicitly set the subdomains in application code, then all overlap would be computed
1120:   internally by PETSc, and using an overlap of 0 would result in an `PCGASM`
1121:   variant that is equivalent to the block Jacobi preconditioner.

1123:   One can define initial index sets with any overlap via
1124:   `PCGASMSetSubdomains()`; the routine `PCGASMSetOverlap()` merely allows
1125:   PETSc to extend that overlap further, if desired.

1127: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1128:           `PCGASMCreateSubdomains2D()`, `PCGASMGetSubdomains()`
1129: @*/
1130: PetscErrorCode PCGASMSetOverlap(PC pc, PetscInt ovl)
1131: {
1132:   PC_GASM *osm = (PC_GASM *)pc->data;

1134:   PetscFunctionBegin;
1137:   PetscTryMethod(pc, "PCGASMSetOverlap_C", (PC, PetscInt), (pc, ovl));
1138:   osm->dm_subdomains = PETSC_FALSE;
1139:   PetscFunctionReturn(PETSC_SUCCESS);
1140: }

1142: /*@
1143:   PCGASMSetType - Sets the type of restriction and interpolation used
1144:   for local problems in the `PCGASM` additive Schwarz method.

1146:   Logically Collective

1148:   Input Parameters:
1149: + pc   - the preconditioner context
1150: - type - variant of `PCGASM`, one of
1151: .vb
1152:       `PC_GASM_BASIC`       - full interpolation and restriction
1153:       `PC_GASM_RESTRICT`    - full restriction, local MPI process interpolation
1154:       `PC_GASM_INTERPOLATE` - full interpolation, local MPI process restriction
1155:       `PC_GASM_NONE`        - local MPI process restriction and interpolation
1156: .ve

1158:   Options Database Key:
1159: . -pc_gasm_type [basic,restrict,interpolate,none] - Sets `PCGASM` type

1161:   Level: intermediate

1163: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1164:           `PCGASMCreateSubdomains2D()`, `PCASM`, `PCASMSetType()`
1165: @*/
1166: PetscErrorCode PCGASMSetType(PC pc, PCGASMType type)
1167: {
1168:   PetscFunctionBegin;
1171:   PetscTryMethod(pc, "PCGASMSetType_C", (PC, PCGASMType), (pc, type));
1172:   PetscFunctionReturn(PETSC_SUCCESS);
1173: }

1175: /*@
1176:   PCGASMSetSortIndices - Determines whether subdomain indices are sorted.

1178:   Logically Collective

1180:   Input Parameters:
1181: + pc     - the preconditioner context
1182: - doSort - sort the subdomain indices

1184:   Level: intermediate

1186: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`,
1187:           `PCGASMCreateSubdomains2D()`
1188: @*/
1189: PetscErrorCode PCGASMSetSortIndices(PC pc, PetscBool doSort)
1190: {
1191:   PetscFunctionBegin;
1194:   PetscTryMethod(pc, "PCGASMSetSortIndices_C", (PC, PetscBool), (pc, doSort));
1195:   PetscFunctionReturn(PETSC_SUCCESS);
1196: }

1198: /*@C
1199:   PCGASMGetSubKSP - Gets the local `KSP` contexts for all subdomains on this MPI process.

1201:   Collective iff first_local is requested

1203:   Input Parameter:
1204: . pc - the preconditioner context

1206:   Output Parameters:
1207: + n_local     - the number of blocks on this MPI process or `NULL`
1208: . first_local - the global number of the first block on this process or `NULL`, all processes must request or all must pass `NULL`
1209: - ksp         - the array of `KSP` contexts

1211:   Level: advanced

1213:   Note:
1214:   After `PCGASMGetSubKSP()` the array of `KSP`es is not to be freed

1216:   Currently for some matrix implementations only 1 block per MPI process
1217:   is supported.

1219:   You must call `KSPSetUp()` before calling `PCGASMGetSubKSP()`.

1221: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`,
1222:           `PCGASMCreateSubdomains2D()`,
1223: @*/
1224: PetscErrorCode PCGASMGetSubKSP(PC pc, PetscInt *n_local, PetscInt *first_local, KSP *ksp[])
1225: {
1226:   PetscFunctionBegin;
1228:   PetscUseMethod(pc, "PCGASMGetSubKSP_C", (PC, PetscInt *, PetscInt *, KSP **), (pc, n_local, first_local, ksp));
1229:   PetscFunctionReturn(PETSC_SUCCESS);
1230: }

1232: /*MC
1233:    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1234:            its own `KSP` object on a subset of MPI processes

1236:    Options Database Keys:
1237: +  -pc_gasm_total_subdomains <n>                   - Sets total number of local subdomains to be distributed among the MPI processes
1238: .  -pc_gasm_view_subdomains                        - activates the printing of subdomain indices in `PCView()`, -ksp_view or -snes_view
1239: .  -pc_gasm_print_subdomains                       - activates the printing of subdomain indices in `PCSetUp()`
1240: .  -pc_gasm_overlap <ovl>                          - Sets overlap by which to (automatically) extend local subdomains
1241: -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets `PCGASMType`

1243:    Level: beginner

1245:    Notes:
1246:    To set options on the solvers for each block append `-sub_` to all the `KSP`, and `PC`
1247:    options database keys. For example, `-sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly`

1249:    To set the options on the solvers separate for each block call `PCGASMGetSubKSP()`
1250:    and set the options directly on the resulting `KSP` object (you can access its `PC`
1251:    with `KSPGetPC()`)

1253:    See {cite}`dryja1987additive` and {cite}`1sbg` for details on additive Schwarz algorithms

1255: .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCASM`, `PCGASMType`, `PCGASMSetType()`,
1256:           `PCBJACOBI`, `PCGASMGetSubKSP()`, `PCGASMSetSubdomains()`,
1257:           `PCSetModifySubMatrices()`, `PCGASMSetOverlap()`, `PCGASMSetType()`
1258: M*/

1260: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1261: {
1262:   PC_GASM *osm;

1264:   PetscFunctionBegin;
1265:   PetscCall(PetscNew(&osm));

1267:   osm->N                        = PETSC_DETERMINE;
1268:   osm->n                        = PETSC_DECIDE;
1269:   osm->nmax                     = PETSC_DETERMINE;
1270:   osm->overlap                  = 0;
1271:   osm->ksp                      = NULL;
1272:   osm->gorestriction            = NULL;
1273:   osm->girestriction            = NULL;
1274:   osm->pctoouter                = NULL;
1275:   osm->gx                       = NULL;
1276:   osm->gy                       = NULL;
1277:   osm->x                        = NULL;
1278:   osm->y                        = NULL;
1279:   osm->pcx                      = NULL;
1280:   osm->pcy                      = NULL;
1281:   osm->permutationIS            = NULL;
1282:   osm->permutationP             = NULL;
1283:   osm->pcmat                    = NULL;
1284:   osm->ois                      = NULL;
1285:   osm->iis                      = NULL;
1286:   osm->pmat                     = NULL;
1287:   osm->type                     = PC_GASM_RESTRICT;
1288:   osm->same_subdomain_solvers   = PETSC_TRUE;
1289:   osm->sort_indices             = PETSC_TRUE;
1290:   osm->dm_subdomains            = PETSC_FALSE;
1291:   osm->hierarchicalpartitioning = PETSC_FALSE;

1293:   pc->data                 = (void *)osm;
1294:   pc->ops->apply           = PCApply_GASM;
1295:   pc->ops->matapply        = PCMatApply_GASM;
1296:   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1297:   pc->ops->setup           = PCSetUp_GASM;
1298:   pc->ops->reset           = PCReset_GASM;
1299:   pc->ops->destroy         = PCDestroy_GASM;
1300:   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1301:   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1302:   pc->ops->view            = PCView_GASM;
1303:   pc->ops->applyrichardson = NULL;

1305:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSubdomains_C", PCGASMSetSubdomains_GASM));
1306:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetOverlap_C", PCGASMSetOverlap_GASM));
1307:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetType_C", PCGASMSetType_GASM));
1308:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMSetSortIndices_C", PCGASMSetSortIndices_GASM));
1309:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGASMGetSubKSP_C", PCGASMGetSubKSP_GASM));
1310:   PetscFunctionReturn(PETSC_SUCCESS);
1311: }

1313: PetscErrorCode PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1314: {
1315:   MatPartitioning mpart;
1316:   const char     *prefix;
1317:   PetscInt        i, j, rstart, rend, bs;
1318:   PetscBool       hasop, isbaij = PETSC_FALSE, foundpart = PETSC_FALSE;
1319:   Mat             Ad = NULL, adj;
1320:   IS              ispart, isnumb, *is;

1322:   PetscFunctionBegin;
1323:   PetscCheck(nloc >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of local subdomains must > 0, got nloc = %" PetscInt_FMT, nloc);

1325:   /* Get prefix, row distribution, and block size */
1326:   PetscCall(MatGetOptionsPrefix(A, &prefix));
1327:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
1328:   PetscCall(MatGetBlockSize(A, &bs));
1329:   PetscCheck(rstart / bs * bs == rstart && rend / bs * bs == rend, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "bad row distribution [%" PetscInt_FMT ",%" PetscInt_FMT ") for matrix block size %" PetscInt_FMT, rstart, rend, bs);

1331:   /* Get diagonal block from matrix if possible */
1332:   PetscCall(MatHasOperation(A, MATOP_GET_DIAGONAL_BLOCK, &hasop));
1333:   if (hasop) PetscCall(MatGetDiagonalBlock(A, &Ad));
1334:   if (Ad) {
1335:     PetscCall(PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQBAIJ, &isbaij));
1336:     if (!isbaij) PetscCall(PetscObjectBaseTypeCompare((PetscObject)Ad, MATSEQSBAIJ, &isbaij));
1337:   }
1338:   if (Ad && nloc > 1) {
1339:     PetscBool match, done;
1340:     /* Try to setup a good matrix partitioning if available */
1341:     PetscCall(MatPartitioningCreate(PETSC_COMM_SELF, &mpart));
1342:     PetscCall(PetscObjectSetOptionsPrefix((PetscObject)mpart, prefix));
1343:     PetscCall(MatPartitioningSetFromOptions(mpart));
1344:     PetscCall(PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGCURRENT, &match));
1345:     if (!match) PetscCall(PetscObjectTypeCompare((PetscObject)mpart, MATPARTITIONINGSQUARE, &match));
1346:     if (!match) { /* assume a "good" partitioner is available */
1347:       PetscInt        na;
1348:       const PetscInt *ia, *ja;
1349:       PetscCall(MatGetRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done));
1350:       if (done) {
1351:         /* Build adjacency matrix by hand. Unfortunately a call to
1352:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1353:            remove the block-aij structure and we cannot expect
1354:            MatPartitioning to split vertices as we need */
1355:         PetscInt        i, j, len, nnz, cnt, *iia = NULL, *jja = NULL;
1356:         const PetscInt *row;
1357:         nnz = 0;
1358:         for (i = 0; i < na; i++) { /* count number of nonzeros */
1359:           len = ia[i + 1] - ia[i];
1360:           row = ja + ia[i];
1361:           for (j = 0; j < len; j++) {
1362:             if (row[j] == i) { /* don't count diagonal */
1363:               len--;
1364:               break;
1365:             }
1366:           }
1367:           nnz += len;
1368:         }
1369:         PetscCall(PetscMalloc1(na + 1, &iia));
1370:         PetscCall(PetscMalloc1(nnz, &jja));
1371:         nnz    = 0;
1372:         iia[0] = 0;
1373:         for (i = 0; i < na; i++) { /* fill adjacency */
1374:           cnt = 0;
1375:           len = ia[i + 1] - ia[i];
1376:           row = ja + ia[i];
1377:           for (j = 0; j < len; j++) {
1378:             if (row[j] != i) jja[nnz + cnt++] = row[j]; /* if not diagonal */
1379:           }
1380:           nnz += cnt;
1381:           iia[i + 1] = nnz;
1382:         }
1383:         /* Partitioning of the adjacency matrix */
1384:         PetscCall(MatCreateMPIAdj(PETSC_COMM_SELF, na, na, iia, jja, NULL, &adj));
1385:         PetscCall(MatPartitioningSetAdjacency(mpart, adj));
1386:         PetscCall(MatPartitioningSetNParts(mpart, nloc));
1387:         PetscCall(MatPartitioningApply(mpart, &ispart));
1388:         PetscCall(ISPartitioningToNumbering(ispart, &isnumb));
1389:         PetscCall(MatDestroy(&adj));
1390:         foundpart = PETSC_TRUE;
1391:       }
1392:       PetscCall(MatRestoreRowIJ(Ad, 0, PETSC_TRUE, isbaij, &na, &ia, &ja, &done));
1393:     }
1394:     PetscCall(MatPartitioningDestroy(&mpart));
1395:   }
1396:   PetscCall(PetscMalloc1(nloc, &is));
1397:   if (!foundpart) {
1398:     /* Partitioning by contiguous chunks of rows */

1400:     PetscInt mbs   = (rend - rstart) / bs;
1401:     PetscInt start = rstart;
1402:     for (i = 0; i < nloc; i++) {
1403:       PetscInt count = (mbs / nloc + ((mbs % nloc) > i)) * bs;
1404:       PetscCall(ISCreateStride(PETSC_COMM_SELF, count, start, 1, &is[i]));
1405:       start += count;
1406:     }

1408:   } else {
1409:     /* Partitioning by adjacency of diagonal block  */

1411:     const PetscInt *numbering;
1412:     PetscInt       *count, nidx, *indices, *newidx, start = 0;
1413:     /* Get node count in each partition */
1414:     PetscCall(PetscMalloc1(nloc, &count));
1415:     PetscCall(ISPartitioningCount(ispart, nloc, count));
1416:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1417:       for (i = 0; i < nloc; i++) count[i] *= bs;
1418:     }
1419:     /* Build indices from node numbering */
1420:     PetscCall(ISGetLocalSize(isnumb, &nidx));
1421:     PetscCall(PetscMalloc1(nidx, &indices));
1422:     for (i = 0; i < nidx; i++) indices[i] = i; /* needs to be initialized */
1423:     PetscCall(ISGetIndices(isnumb, &numbering));
1424:     PetscCall(PetscSortIntWithPermutation(nidx, numbering, indices));
1425:     PetscCall(ISRestoreIndices(isnumb, &numbering));
1426:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1427:       PetscCall(PetscMalloc1(nidx * bs, &newidx));
1428:       for (i = 0; i < nidx; i++) {
1429:         for (j = 0; j < bs; j++) newidx[i * bs + j] = indices[i] * bs + j;
1430:       }
1431:       PetscCall(PetscFree(indices));
1432:       nidx *= bs;
1433:       indices = newidx;
1434:     }
1435:     /* Shift to get global indices */
1436:     for (i = 0; i < nidx; i++) indices[i] += rstart;

1438:     /* Build the index sets for each block */
1439:     for (i = 0; i < nloc; i++) {
1440:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count[i], &indices[start], PETSC_COPY_VALUES, &is[i]));
1441:       PetscCall(ISSort(is[i]));
1442:       start += count[i];
1443:     }

1445:     PetscCall(PetscFree(count));
1446:     PetscCall(PetscFree(indices));
1447:     PetscCall(ISDestroy(&isnumb));
1448:     PetscCall(ISDestroy(&ispart));
1449:   }
1450:   *iis = is;
1451:   PetscFunctionReturn(PETSC_SUCCESS);
1452: }

1454: PETSC_INTERN PetscErrorCode PCGASMCreateStraddlingSubdomains(Mat A, PetscInt N, PetscInt *n, IS *iis[])
1455: {
1456:   PetscFunctionBegin;
1457:   PetscCall(MatSubdomainsCreateCoalesce(A, N, n, iis));
1458:   PetscFunctionReturn(PETSC_SUCCESS);
1459: }

1461: /*@C
1462:   PCGASMCreateSubdomains - Creates `n` index sets defining `n` nonoverlapping subdomains on this MPI process for the `PCGASM` additive
1463:   Schwarz preconditioner for a any problem based on its matrix.

1465:   Collective

1467:   Input Parameters:
1468: + A - The global matrix operator
1469: - N - the number of global subdomains requested

1471:   Output Parameters:
1472: + n   - the number of subdomains created on this MPI process
1473: - iis - the array of index sets defining the local inner subdomains (on which the correction is applied)

1475:   Level: advanced

1477:   Notes:
1478:   When `N` >= A's communicator size, each subdomain is local -- contained within a single MPI process.
1479:   When `N` < size, the subdomains are 'straddling' (process boundaries) and are no longer local.
1480:   The resulting subdomains can be use in `PCGASMSetSubdomains`(pc,n,iss,`NULL`).  The overlapping
1481:   outer subdomains will be automatically generated from these according to the requested amount of
1482:   overlap; this is currently supported only with local subdomains.

1484:   Use `PCGASMDestroySubdomains()` to free the array and the list of index sets.

1486: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMDestroySubdomains()`
1487: @*/
1488: PetscErrorCode PCGASMCreateSubdomains(Mat A, PetscInt N, PetscInt *n, IS *iis[])
1489: {
1490:   PetscMPIInt size;

1492:   PetscFunctionBegin;
1494:   PetscAssertPointer(iis, 4);

1496:   PetscCheck(N >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Number of subdomains must be > 0, N = %" PetscInt_FMT, N);
1497:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1498:   if (N >= size) {
1499:     *n = N / size + (N % size);
1500:     PetscCall(PCGASMCreateLocalSubdomains(A, *n, iis));
1501:   } else {
1502:     PetscCall(PCGASMCreateStraddlingSubdomains(A, N, n, iis));
1503:   }
1504:   PetscFunctionReturn(PETSC_SUCCESS);
1505: }

1507: /*@C
1508:   PCGASMDestroySubdomains - Destroys the index sets created with
1509:   `PCGASMCreateSubdomains()` or `PCGASMCreateSubdomains2D()`. Should be
1510:   called after setting subdomains with `PCGASMSetSubdomains()`.

1512:   Collective

1514:   Input Parameters:
1515: + n   - the number of index sets
1516: . iis - the array of inner subdomains
1517: - ois - the array of outer subdomains, can be `NULL`

1519:   Level: intermediate

1521:   Note:
1522:   This is a convenience subroutine that walks each list,
1523:   destroys each `IS` on the list, and then frees the list. At the end the
1524:   list pointers are set to `NULL`.

1526:   Fortran Note:
1527:   The arrays are not freed, only the `IS` within the arrays are destroyed

1529: .seealso: [](ch_ksp), `PCGASM`, `PCGASMCreateSubdomains()`, `PCGASMSetSubdomains()`
1530: @*/
1531: PetscErrorCode PCGASMDestroySubdomains(PetscInt n, IS *iis[], IS *ois[])
1532: {
1533:   PetscInt i;

1535:   PetscFunctionBegin;
1536:   if (n <= 0) PetscFunctionReturn(PETSC_SUCCESS);
1537:   if (ois) {
1538:     PetscAssertPointer(ois, 3);
1539:     if (*ois) {
1540:       PetscAssertPointer(*ois, 3);
1541:       for (i = 0; i < n; i++) PetscCall(ISDestroy(&(*ois)[i]));
1542:       PetscCall(PetscFree(*ois));
1543:     }
1544:   }
1545:   if (iis) {
1546:     PetscAssertPointer(iis, 2);
1547:     if (*iis) {
1548:       PetscAssertPointer(*iis, 2);
1549:       for (i = 0; i < n; i++) PetscCall(ISDestroy(&(*iis)[i]));
1550:       PetscCall(PetscFree(*iis));
1551:     }
1552:   }
1553:   PetscFunctionReturn(PETSC_SUCCESS);
1554: }

1556: #define PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, xleft_loc, ylow_loc, xright_loc, yhigh_loc, n) \
1557:   do { \
1558:     PetscInt first_row = first / M, last_row = last / M + 1; \
1559:     /*                                                                                                    \
1560:      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1561:      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1562:      subdomain).                                                                                          \
1563:      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1564:      of the intersection.                                                                                 \
1565:     */ \
1566:     /* ylow_loc is the grid row containing the first element of the local sumbdomain */ \
1567:     *ylow_loc = PetscMax(first_row, ylow); \
1568:     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1569:     *xleft_loc = *ylow_loc == first_row ? PetscMax(first % M, xleft) : xleft; \
1570:     /* yhigh_loc is the grid row above the last local subdomain element */ \
1571:     *yhigh_loc = PetscMin(last_row, yhigh); \
1572:     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */ \
1573:     *xright_loc = *yhigh_loc == last_row ? PetscMin(xright, last % M) : xright; \
1574:     /* Now compute the size of the local subdomain n. */ \
1575:     *n = 0; \
1576:     if (*ylow_loc < *yhigh_loc) { \
1577:       PetscInt width = xright - xleft; \
1578:       *n += width * (*yhigh_loc - *ylow_loc - 1); \
1579:       *n += PetscMin(PetscMax(*xright_loc - xleft, 0), width); \
1580:       *n -= PetscMin(PetscMax(*xleft_loc - xleft, 0), width); \
1581:     } \
1582:   } while (0)

1584: /*@C
1585:   PCGASMCreateSubdomains2D - Creates the index sets for the `PCGASM` overlapping Schwarz
1586:   preconditioner for a two-dimensional problem on a regular grid.

1588:   Collective

1590:   Input Parameters:
1591: + pc       - the preconditioner context
1592: . M        - the global number of grid points in the x direction
1593: . N        - the global number of grid points in the y direction
1594: . Mdomains - the global number of subdomains in the x direction
1595: . Ndomains - the global number of subdomains in the y direction
1596: . dof      - degrees of freedom per node
1597: - overlap  - overlap in mesh lines

1599:   Output Parameters:
1600: + nsub - the number of local subdomains created
1601: . iis  - array of index sets defining inner (nonoverlapping) subdomains
1602: - ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains

1604:   Level: advanced

1606:   Note:
1607:   Use `PCGASMDestroySubdomains()` to free the index sets and the arrays

1609:   Fortran Notes:
1610:   The `IS` must be declared as an array of length long enough to hold `Nsub` entries

1612: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetSubdomains()`, `PCGASMGetSubKSP()`, `PCGASMSetOverlap()`, `PCASMCreateSubdomains2D()`,
1613:           `PCGASMDestroySubdomains()`
1614: @*/
1615: PetscErrorCode PCGASMCreateSubdomains2D(PC pc, PetscInt M, PetscInt N, PetscInt Mdomains, PetscInt Ndomains, PetscInt dof, PetscInt overlap, PetscInt *nsub, IS *iis[], IS *ois[])
1616: {
1617:   PetscMPIInt size, rank;
1618:   PetscInt    i, j;
1619:   PetscInt    maxheight, maxwidth;
1620:   PetscInt    xstart, xleft, xright, xleft_loc, xright_loc;
1621:   PetscInt    ystart, ylow, yhigh, ylow_loc, yhigh_loc;
1622:   PetscInt    x[2][2], y[2][2], n[2];
1623:   PetscInt    first, last;
1624:   PetscInt    nidx, *idx;
1625:   PetscInt    ii, jj, s, q, d;
1626:   PetscInt    k, kk;
1627:   PetscMPIInt color;
1628:   MPI_Comm    comm, subcomm;
1629:   IS        **xis = NULL, **is = ois, **is_local = iis;

1631:   PetscFunctionBegin;
1632:   PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
1633:   PetscCallMPI(MPI_Comm_size(comm, &size));
1634:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1635:   PetscCall(MatGetOwnershipRange(pc->pmat, &first, &last));
1636:   PetscCheck((first % dof) == 0 && (last % dof) == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE,
1637:              "Matrix row partitioning unsuitable for domain decomposition: local row range (%" PetscInt_FMT ",%" PetscInt_FMT ") "
1638:              "does not respect the number of degrees of freedom per grid point %" PetscInt_FMT,
1639:              first, last, dof);

1641:   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1642:   s      = 0;
1643:   ystart = 0;
1644:   for (j = 0; j < Ndomains; ++j) {
1645:     maxheight = N / Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1646:     PetscCheck(maxheight >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many %" PetscInt_FMT " subdomains in the vertical direction for mesh height %" PetscInt_FMT, Ndomains, N);
1647:     /* Vertical domain limits with an overlap. */
1648:     ylow   = PetscMax(ystart - overlap, 0);
1649:     yhigh  = PetscMin(ystart + maxheight + overlap, N);
1650:     xstart = 0;
1651:     for (i = 0; i < Mdomains; ++i) {
1652:       maxwidth = M / Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1653:       PetscCheck(maxwidth >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many %" PetscInt_FMT " subdomains in the horizontal direction for mesh width %" PetscInt_FMT, Mdomains, M);
1654:       /* Horizontal domain limits with an overlap. */
1655:       xleft  = PetscMax(xstart - overlap, 0);
1656:       xright = PetscMin(xstart + maxwidth + overlap, M);
1657:       /*
1658:          Determine whether this subdomain intersects this rank's ownership range of pc->pmat.
1659:       */
1660:       PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, (&xleft_loc), (&ylow_loc), (&xright_loc), (&yhigh_loc), (&nidx));
1661:       if (nidx) ++s;
1662:       xstart += maxwidth;
1663:     } /* for (i = 0; i < Mdomains; ++i) */
1664:     ystart += maxheight;
1665:   } /* for (j = 0; j < Ndomains; ++j) */

1667:   /* Now we can allocate the necessary number of ISs. */
1668:   *nsub = s;
1669:   PetscCall(PetscMalloc1(*nsub, is));
1670:   PetscCall(PetscMalloc1(*nsub, is_local));
1671:   s      = 0;
1672:   ystart = 0;
1673:   for (j = 0; j < Ndomains; ++j) {
1674:     maxheight = N / Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1675:     PetscCheck(maxheight >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many %" PetscInt_FMT " subdomains in the vertical direction for mesh height %" PetscInt_FMT, Ndomains, N);
1676:     /* Vertical domain limits with an overlap. */
1677:     y[0][0] = PetscMax(ystart - overlap, 0);
1678:     y[0][1] = PetscMin(ystart + maxheight + overlap, N);
1679:     /* Vertical domain limits without an overlap. */
1680:     y[1][0] = ystart;
1681:     y[1][1] = PetscMin(ystart + maxheight, N);
1682:     xstart  = 0;
1683:     for (i = 0; i < Mdomains; ++i) {
1684:       maxwidth = M / Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1685:       PetscCheck(maxwidth >= 2, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Too many %" PetscInt_FMT " subdomains in the horizontal direction for mesh width %" PetscInt_FMT, Mdomains, M);
1686:       /* Horizontal domain limits with an overlap. */
1687:       x[0][0] = PetscMax(xstart - overlap, 0);
1688:       x[0][1] = PetscMin(xstart + maxwidth + overlap, M);
1689:       /* Horizontal domain limits without an overlap. */
1690:       x[1][0] = xstart;
1691:       x[1][1] = PetscMin(xstart + maxwidth, M);
1692:       /*
1693:          Determine whether this domain intersects this rank's ownership range of pc->pmat.
1694:          Do this twice: first for the domains with overlaps, and once without.
1695:          During the first pass create the subcommunicators, and use them on the second pass as well.
1696:       */
1697:       for (q = 0; q < 2; ++q) {
1698:         PetscBool split = PETSC_FALSE;
1699:         /*
1700:           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1701:           according to whether the domain with an overlap or without is considered.
1702:         */
1703:         xleft  = x[q][0];
1704:         xright = x[q][1];
1705:         ylow   = y[q][0];
1706:         yhigh  = y[q][1];
1707:         PCGASMLocalSubdomainBounds2D(M, N, xleft, ylow, xright, yhigh, first, last, (&xleft_loc), (&ylow_loc), (&xright_loc), (&yhigh_loc), (&nidx));
1708:         nidx *= dof;
1709:         n[q] = nidx;
1710:         /*
1711:          Based on the counted number of indices in the local domain *with an overlap*,
1712:          construct a subcommunicator of all the MPI ranks supporting this domain.
1713:          Observe that a domain with an overlap might have nontrivial local support,
1714:          while the domain without an overlap might not.  Hence, the decision to participate
1715:          in the subcommunicator must be based on the domain with an overlap.
1716:          */
1717:         if (q == 0) {
1718:           if (nidx) color = 1;
1719:           else color = MPI_UNDEFINED;
1720:           PetscCallMPI(MPI_Comm_split(comm, color, rank, &subcomm));
1721:           split = PETSC_TRUE;
1722:         }
1723:         /*
1724:          Proceed only if the number of local indices *with an overlap* is nonzero.
1725:          */
1726:         if (n[0]) {
1727:           if (q == 0) xis = is;
1728:           if (q == 1) {
1729:             /*
1730:              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1731:              Moreover, if the overlap is zero, the two ISs are identical.
1732:              */
1733:             if (overlap == 0) {
1734:               (*is_local)[s] = (*is)[s];
1735:               PetscCall(PetscObjectReference((PetscObject)(*is)[s]));
1736:               continue;
1737:             } else {
1738:               xis     = is_local;
1739:               subcomm = ((PetscObject)(*is)[s])->comm;
1740:             }
1741:           } /* if (q == 1) */
1742:           idx = NULL;
1743:           PetscCall(PetscMalloc1(nidx, &idx));
1744:           if (nidx) {
1745:             k = 0;
1746:             for (jj = ylow_loc; jj < yhigh_loc; ++jj) {
1747:               PetscInt x0 = (jj == ylow_loc) ? xleft_loc : xleft;
1748:               PetscInt x1 = (jj == yhigh_loc - 1) ? xright_loc : xright;
1749:               kk          = dof * (M * jj + x0);
1750:               for (ii = x0; ii < x1; ++ii) {
1751:                 for (d = 0; d < dof; ++d) idx[k++] = kk++;
1752:               }
1753:             }
1754:           }
1755:           PetscCall(ISCreateGeneral(subcomm, nidx, idx, PETSC_OWN_POINTER, (*xis) + s));
1756:           if (split) PetscCallMPI(MPI_Comm_free(&subcomm));
1757:         } /* if (n[0]) */
1758:       } /* for (q = 0; q < 2; ++q) */
1759:       if (n[0]) ++s;
1760:       xstart += maxwidth;
1761:     } /* for (i = 0; i < Mdomains; ++i) */
1762:     ystart += maxheight;
1763:   } /* for (j = 0; j < Ndomains; ++j) */
1764:   PetscFunctionReturn(PETSC_SUCCESS);
1765: }

1767: /*@C
1768:   PCGASMGetSubdomains - Gets the subdomains supported on this MPI process
1769:   for the `PCGASM` additive Schwarz preconditioner.

1771:   Not Collective

1773:   Input Parameter:
1774: . pc - the preconditioner context

1776:   Output Parameters:
1777: + n   - the number of subdomains for this MPI process (default value = 1)
1778: . iis - the index sets that define the inner subdomains (without overlap) supported on this process (can be `NULL`)
1779: - ois - the index sets that define the outer subdomains (with overlap) supported on this process (can be `NULL`)

1781:   Level: advanced

1783:   Notes:
1784:   The user is responsible for destroying the `IS`s and freeing the returned arrays, this can be done with
1785:   `PCGASMDestroySubdomains()`

1787:   The `IS` numbering is in the parallel, global numbering of the vector.

1789: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`, `PCGASMCreateSubdomains2D()`,
1790:           `PCGASMSetSubdomains()`, `PCGASMGetSubmatrices()`, `PCGASMDestroySubdomains()`
1791: @*/
1792: PetscErrorCode PCGASMGetSubdomains(PC pc, PetscInt *n, IS *iis[], IS *ois[])
1793: {
1794:   PC_GASM  *osm;
1795:   PetscBool match;
1796:   PetscInt  i;

1798:   PetscFunctionBegin;
1800:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match));
1801:   PetscCheck(match, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1802:   osm = (PC_GASM *)pc->data;
1803:   if (n) *n = osm->n;
1804:   if (iis) PetscCall(PetscMalloc1(osm->n, iis));
1805:   if (ois) PetscCall(PetscMalloc1(osm->n, ois));
1806:   if (iis || ois) {
1807:     for (i = 0; i < osm->n; ++i) {
1808:       if (iis) (*iis)[i] = osm->iis[i];
1809:       if (ois) (*ois)[i] = osm->ois[i];
1810:     }
1811:   }
1812:   PetscFunctionReturn(PETSC_SUCCESS);
1813: }

1815: /*@C
1816:   PCGASMGetSubmatrices - Gets the local submatrices (for this MPI process
1817:   only) for the `PCGASM` additive Schwarz preconditioner.

1819:   Not Collective

1821:   Input Parameter:
1822: . pc - the preconditioner context

1824:   Output Parameters:
1825: + n   - the number of matrices for this MPI process (default value = 1)
1826: - mat - the matrices

1828:   Level: advanced

1830:   Note:
1831:   Matrices returned by this routine have the same communicators as the index sets (`IS`)
1832:   used to define subdomains in `PCGASMSetSubdomains()`

1834: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetOverlap()`, `PCGASMGetSubKSP()`,
1835:           `PCGASMCreateSubdomains2D()`, `PCGASMSetSubdomains()`, `PCGASMGetSubdomains()`
1836: @*/
1837: PetscErrorCode PCGASMGetSubmatrices(PC pc, PetscInt *n, Mat *mat[])
1838: {
1839:   PC_GASM  *osm;
1840:   PetscBool match;

1842:   PetscFunctionBegin;
1844:   PetscAssertPointer(n, 2);
1845:   if (mat) PetscAssertPointer(mat, 3);
1846:   PetscCheck(pc->setupcalled, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONGSTATE, "Must call after KSPSetUp() or PCSetUp().");
1847:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match));
1848:   PetscCheck(match, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1849:   osm = (PC_GASM *)pc->data;
1850:   if (n) *n = osm->n;
1851:   if (mat) *mat = osm->pmat;
1852:   PetscFunctionReturn(PETSC_SUCCESS);
1853: }

1855: /*@
1856:   PCGASMSetUseDMSubdomains - Indicates whether to use `DMCreateDomainDecomposition()` to define the subdomains, whenever possible for `PCGASM`

1858:   Logically Collective

1860:   Input Parameters:
1861: + pc  - the preconditioner
1862: - flg - boolean indicating whether to use subdomains defined by the `DM`

1864:   Options Database Key:
1865: + -pc_gasm_dm_subdomains    - configure subdomains
1866: . -pc_gasm_overlap          - set overlap
1867: - -pc_gasm_total_subdomains - set number of subdomains

1869:   Level: intermediate

1871:   Note:
1872:   `PCGASMSetSubdomains()`, `PCGASMSetTotalSubdomains()` or `PCGASMSetOverlap()` take precedence over `PCGASMSetUseDMSubdomains()`,
1873:   so setting `PCGASMSetSubdomains()` with nontrivial subdomain ISs or any of `PCGASMSetTotalSubdomains()` and `PCGASMSetOverlap()`
1874:   automatically turns the latter off.

1876: .seealso: [](ch_ksp), `PCGASM`, `PCGASMGetUseDMSubdomains()`, `PCGASMSetSubdomains()`, `PCGASMSetOverlap()`
1877:           `PCGASMCreateSubdomains2D()`
1878: @*/
1879: PetscErrorCode PCGASMSetUseDMSubdomains(PC pc, PetscBool flg)
1880: {
1881:   PC_GASM  *osm = (PC_GASM *)pc->data;
1882:   PetscBool match;

1884:   PetscFunctionBegin;
1887:   PetscCheck(!pc->setupcalled, ((PetscObject)pc)->comm, PETSC_ERR_ARG_WRONGSTATE, "Not for a setup PC.");
1888:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match));
1889:   if (match) {
1890:     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) osm->dm_subdomains = flg;
1891:   }
1892:   PetscFunctionReturn(PETSC_SUCCESS);
1893: }

1895: /*@
1896:   PCGASMGetUseDMSubdomains - Returns flag indicating whether to use `DMCreateDomainDecomposition()` to define the subdomains, whenever possible with `PCGASM`

1898:   Not Collective

1900:   Input Parameter:
1901: . pc - the preconditioner

1903:   Output Parameter:
1904: . flg - boolean indicating whether to use subdomains defined by the `DM`

1906:   Level: intermediate

1908: .seealso: [](ch_ksp), `PCGASM`, `PCGASMSetUseDMSubdomains()`, `PCGASMSetOverlap()`
1909:           `PCGASMCreateSubdomains2D()`
1910: @*/
1911: PetscErrorCode PCGASMGetUseDMSubdomains(PC pc, PetscBool *flg)
1912: {
1913:   PC_GASM  *osm = (PC_GASM *)pc->data;
1914:   PetscBool match;

1916:   PetscFunctionBegin;
1918:   PetscAssertPointer(flg, 2);
1919:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCGASM, &match));
1920:   if (match) {
1921:     if (flg) *flg = osm->dm_subdomains;
1922:   }
1923:   PetscFunctionReturn(PETSC_SUCCESS);
1924: }