Actual source code: mspart.c

  1: #include <petscsf.h>
  2: #include <petscdmplex.h>
  3: #include <petsc/private/dmimpl.h>
  4: #include <petsc/private/dmpleximpl.h>
  5: #include <petsc/private/partitionerimpl.h>

  7: PetscBool  MSPartitionerCite       = PETSC_FALSE;
  8: const char MSPartitionerCitation[] = "@article{PETScMSPart2021,\n"
  9:                                      "  author  = {Parsani, Matteo and Boukharfane, Radouan and Nolasco, Irving Reyna and Fern{\'a}ndez, David C Del Rey and Zampini, Stefano and Hadri, Bilel and Dalcin, Lisandro},\n"
 10:                                      "  title   = {High-order accurate entropy-stable discontinuous collocated Galerkin methods with the summation-by-parts property for compressible {CFD} frameworks: Scalable {SSDC} algorithms and flow solver},\n"
 11:                                      "  journal = {Journal of Computational Physics},\n"
 12:                                      "  volume  = {424},\n"
 13:                                      "  pages   = {109844},\n"
 14:                                      "  year    = {2021}\n"
 15:                                      "}\n";

 17: PetscLogEvent PetscPartitioner_MS_SetUp;
 18: PetscLogEvent PetscPartitioner_MS_Stage[PETSCPARTITIONER_MS_NUMSTAGE];

 20: typedef struct {
 21:   PetscInt   levels;
 22:   MPI_Group *lgroup;
 23:   /* Need access to the DM in inner stages */
 24:   PetscInt stage;
 25:   DM       stagedm;
 26:   /* Stage partitioners */
 27:   PetscPartitioner *ppart;
 28:   /* Diagnostic */
 29:   PetscInt *edgeCut;
 30:   /* Target partition weights */
 31:   PetscBool     view_tpwgs;
 32:   PetscSection *tpwgs;
 33:   PetscBool     compute_tpwgs;
 34: } PetscPartitioner_MS;

 36: const char *const PetscPartitionerMultistageStrategyList[] = {"NODE", "MSECTION", "PetscPartitionerMultistageStrategy", "PETSCPARTITIONER_MS_", NULL};

 38: static void PetscLCM_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
 39: {
 40:   PetscInt  count = *cnt;
 41:   PetscInt *xin = (PetscInt *)in, *xout = (PetscInt *)out;

 43:   PetscFunctionBegin;
 44:   if (*datatype != MPIU_INT) {
 45:     (void)(*PetscErrorPrintf)("Can only handle MPIU_INT");
 46:     PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
 47:   }
 48:   for (PetscInt i = 0; i < count; i++) xout[i] = PetscLCM(xin[i], xout[i]);
 49:   PetscFunctionReturnVoid();
 50: }

 52: static PetscErrorCode PetscPartitionerView_Multistage(PetscPartitioner part, PetscViewer viewer)
 53: {
 54:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
 55:   PetscViewerFormat    format;
 56:   PetscBool            iascii;

 58:   PetscFunctionBegin;
 59:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
 60:   PetscCall(PetscViewerGetFormat(viewer, &format));
 61:   if (iascii) {
 62:     MPI_Comm  comm;
 63:     MPI_Group group;

 65:     PetscCall(PetscViewerASCIIPushTab(viewer));
 66:     if (!p->stagedm || p->stage == 0) PetscCall(PetscViewerASCIIPrintf(viewer, "Multistage graph partitioner: total stages %" PetscInt_FMT "\n", p->levels));
 67:     comm = PetscObjectComm((PetscObject)part);
 68:     PetscCallMPI(MPI_Comm_group(comm, &group));
 69:     for (PetscInt l = 0; l < p->levels; l++) {
 70:       PetscPartitioner ppart  = p->ppart[l];
 71:       MPI_Comm         pcomm  = PetscObjectComm((PetscObject)ppart);
 72:       MPI_Group        pgroup = MPI_GROUP_EMPTY;

 74:       PetscCall(PetscViewerASCIIPushTab(viewer));
 75:       if (l) {
 76:         if (pcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
 77:       } else {
 78:         pgroup = p->lgroup[0];
 79:       }

 81:       if (l) {
 82:         IS          is;
 83:         PetscMPIInt gr, gem = 1;
 84:         PetscInt    uniq;

 86:         PetscCallMPI(MPI_Group_size(group, &gr));
 87:         if (pgroup != MPI_GROUP_EMPTY) {
 88:           gem = 0;
 89:           PetscCallMPI(MPI_Group_translate_ranks(pgroup, 1, &gem, group, &gr));
 90:         }
 91:         PetscCall(ISCreateStride(PetscObjectComm((PetscObject)part), 1, gr, 1, &is));
 92:         PetscCall(ISRenumber(is, NULL, &uniq, NULL));
 93:         PetscCall(ISDestroy(&is));
 94:         PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gem, 1, MPI_INT, MPI_SUM, PetscObjectComm((PetscObject)part)));
 95:         if (gem) uniq--;
 96:         if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioners (%" PetscInt_FMT " unique groups, %d empty processes)\n", l, uniq, gem));
 97:       } else {
 98:         PetscMPIInt psize;
 99:         PetscCallMPI(MPI_Group_size(pgroup, &psize));
100:         if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioner to %d processes\n", l, psize));
101:       }
102:       PetscCall(PetscViewerFlush(viewer));
103:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL && (!p->stagedm || l == p->stage)) {
104:         PetscViewer pviewer;

106:         if (pcomm == MPI_COMM_NULL) pcomm = PETSC_COMM_SELF;
107:         PetscCall(PetscViewerGetSubViewer(viewer, pcomm, &pviewer));
108:         if (ppart) {
109:           PetscMPIInt size, *ranks, *granks;
110:           char        tstr[16], strranks[3072]; /* I'm lazy: max 12 chars (including spaces) per rank -> 256 ranks max*/

112:           PetscCallMPI(MPI_Group_size(pgroup, &size));
113:           PetscCall(PetscMalloc2(size, &ranks, size, &granks));
114:           for (PetscMPIInt i = 0; i < size; i++) ranks[i] = i;
115:           PetscCallMPI(MPI_Group_translate_ranks(pgroup, size, ranks, group, granks));
116:           if (size <= 256) {
117:             PetscCall(PetscStrncpy(strranks, "", sizeof(strranks)));
118:             for (PetscInt i = 0; i < size; i++) {
119:               PetscCall(PetscSNPrintf(tstr, sizeof(tstr), " %d", granks[i]));
120:               PetscCall(PetscStrlcat(strranks, tstr, sizeof(strranks)));
121:             }
122:           } else PetscCall(PetscStrncpy(strranks, " not showing > 256", sizeof(strranks))); /* LCOV_EXCL_LINE */
123:           PetscCall(PetscFree2(ranks, granks));
124:           if (!l) {
125:             PetscCall(PetscViewerASCIIPrintf(pviewer, "Destination ranks:%s\n", strranks));
126:             PetscCall(PetscPartitionerView(ppart, pviewer));
127:           } else {
128:             PetscCall(PetscPartitionerView(ppart, pviewer));
129:             PetscCall(PetscViewerASCIIPrintf(pviewer, "  Participating ranks:%s\n", strranks));
130:           }
131:           if (p->view_tpwgs) PetscCall(PetscSectionView(p->tpwgs[l], pviewer));
132:         }
133:         PetscCall(PetscViewerRestoreSubViewer(viewer, pcomm, &pviewer));
134:       }
135:       PetscCall(PetscViewerFlush(viewer));

137:       if (l && pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
138:     }
139:     for (PetscInt l = 0; l < p->levels; l++) PetscCall(PetscViewerASCIIPopTab(viewer));
140:     PetscCall(PetscViewerASCIIPopTab(viewer));
141:     PetscCallMPI(MPI_Group_free(&group));
142:   }
143:   PetscFunctionReturn(PETSC_SUCCESS);
144: }

146: static PetscErrorCode PetscPartitionerMultistage_CreateStages(MPI_Comm comm, const PetscInt *options, PetscInt *nlevels, MPI_Group *levels[])
147: {
148:   MPI_Comm     ncomm;
149:   MPI_Group   *lgroup;
150:   MPI_Group    ggroup, ngroup;
151:   PetscMPIInt *ranks, *granks;
152:   PetscMPIInt  size, nsize, isize, rank, nrank, i, l, n, m;
153:   PetscInt     strategy = options ? options[0] : (PetscInt)PETSCPARTITIONER_MS_STRATEGY_NODE;

155:   PetscFunctionBegin;
156:   PetscCallMPI(MPI_Comm_size(comm, &size));
157:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

159:   if (size == 1) {
160:     PetscCall(PetscMalloc1(1, &lgroup));
161:     PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
162:     *nlevels = 1;
163:     *levels  = lgroup;
164:     PetscFunctionReturn(PETSC_SUCCESS);
165:   }

167:   switch (strategy) {
168:   case PETSCPARTITIONER_MS_STRATEGY_NODE:
169:     /* create groups (in global rank ordering of comm) for the 2-level partitioner */
170:     if (options && options[1] > 0) {
171:       PetscMPIInt node_size = (PetscMPIInt)options[1];
172:       if (node_size > 1) {
173:         PetscMPIInt color;
174:         if (options[2]) { /* interleaved */
175:           PetscMPIInt ngroups = size / node_size;

177:           if (size % node_size) ngroups += 1;
178:           color = rank % ngroups;
179:         } else {
180:           color = rank / node_size;
181:         }
182:         PetscCallMPI(MPI_Comm_split(comm, color, rank, &ncomm));
183:       } else {
184:         PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
185:       }
186:     } else {
187: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
188:       PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &ncomm));
189: #else
190:       /* if users do not specify the node size and MPI_Comm_split_type is not available, defaults to same comm */
191:       PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
192: #endif
193:     }

195:     PetscCallMPI(MPI_Comm_size(ncomm, &nsize));
196:     if (nsize == size) { /* one node */
197:       PetscCall(PetscMalloc1(1, &lgroup));
198:       PetscCallMPI(MPI_Comm_group(ncomm, &lgroup[0]));
199:       PetscCallMPI(MPI_Comm_free(&ncomm));

201:       *nlevels = 1;
202:       *levels  = lgroup;
203:       break;
204:     }

206:     PetscCall(PetscMalloc1(2, &lgroup));

208:     /* intranode group (in terms of global rank) */
209:     PetscCall(PetscMalloc2(size, &ranks, nsize, &granks));
210:     for (i = 0; i < nsize; i++) ranks[i] = i;
211:     PetscCallMPI(MPI_Comm_group(comm, &ggroup));
212:     PetscCallMPI(MPI_Comm_group(ncomm, &ngroup));
213:     PetscCallMPI(MPI_Group_translate_ranks(ngroup, nsize, ranks, ggroup, granks));
214:     PetscCallMPI(MPI_Group_incl(ggroup, nsize, granks, &lgroup[1]));

216:     /* internode group (master processes on the nodes only)
217:        this group must be specified by all processes in the comm
218:        we need to gather those master ranks */
219:     PetscCallMPI(MPI_Group_rank(ngroup, &nrank));
220:     granks[0] = !nrank ? rank : -1;
221:     PetscCallMPI(MPI_Allgather(granks, 1, MPI_INT, ranks, 1, MPI_INT, comm));
222:     for (i = 0, isize = 0; i < size; i++)
223:       if (ranks[i] >= 0) ranks[isize++] = ranks[i];
224:     PetscCallMPI(MPI_Group_incl(ggroup, isize, ranks, &lgroup[0]));

226:     PetscCall(PetscFree2(ranks, granks));
227:     PetscCallMPI(MPI_Group_free(&ggroup));
228:     PetscCallMPI(MPI_Group_free(&ngroup));
229:     PetscCallMPI(MPI_Comm_free(&ncomm));

231:     *nlevels = 2;
232:     *levels  = lgroup;
233:     break;

235:   case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
236:     /* recursive m-section (m=2 bisection) */
237:     m = options ? (PetscMPIInt)options[1] : 2;
238:     PetscCheck(m >= 2, comm, PETSC_ERR_SUP, "Invalid split %d, must be greater than one", m);
239:     l = 0;
240:     n = 1;
241:     while (n <= size) {
242:       l++;
243:       n *= m;
244:     }
245:     l -= 1;
246:     n /= m;

248:     PetscCheck(l != 0, comm, PETSC_ERR_SUP, "Invalid split %d with communicator size %d", m, size);
249:     PetscCall(PetscMalloc1(l, &lgroup));
250:     for (i = 1; i < l; i++) lgroup[i] = MPI_GROUP_EMPTY;

252:     if (l > 1) {
253:       PetscMPIInt rem = size - n;
254:       PetscCall(PetscMalloc1((m + rem), &ranks));
255:       for (i = 0; i < m; i++) ranks[i] = i * (n / m);
256:       for (i = 0; i < rem; i++) ranks[i + m] = n + i;
257:       PetscCallMPI(MPI_Comm_group(comm, &ggroup));
258:       PetscCallMPI(MPI_Group_incl(ggroup, m + rem, ranks, &lgroup[0]));
259:       if (rank < n) {
260:         for (i = 1; i < l; i++) {
261:           PetscMPIInt inc = (PetscMPIInt)PetscPowInt(m, l - i - 1);
262:           if (rank % inc == 0) {
263:             PetscMPIInt anch = (rank - rank % (inc * m)), r;
264:             for (r = 0; r < m; r++) ranks[r] = anch + r * inc;
265:             PetscCallMPI(MPI_Group_incl(ggroup, m, ranks, &lgroup[i]));
266:           }
267:         }
268:       }
269:       PetscCall(PetscFree(ranks));
270:       PetscCallMPI(MPI_Group_free(&ggroup));
271:     } else {
272:       PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
273:     }

275:     *nlevels = l;
276:     *levels  = lgroup;
277:     break;

279:   default:
280:     SETERRQ(comm, PETSC_ERR_SUP, "Not implemented"); /* LCOV_EXCL_LINE */
281:   }
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: static PetscErrorCode PetscPartitionerMultistage_DestroyStages(PetscInt nstages, MPI_Group *groups[])
286: {
287:   PetscFunctionBegin;
288:   for (PetscInt l = 0; l < nstages; l++) {
289:     MPI_Group group = (*groups)[l];
290:     if (group != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&group));
291:   }
292:   PetscCall(PetscFree(*groups));
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: static PetscErrorCode PetscPartitionerReset_Multistage(PetscPartitioner part)
297: {
298:   PetscPartitioner_MS *p       = (PetscPartitioner_MS *)part->data;
299:   PetscInt             nstages = p->levels;

301:   PetscFunctionBegin;
302:   p->levels = 0;
303:   PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &p->lgroup));
304:   for (PetscInt l = 0; l < nstages; l++) {
305:     PetscCall(PetscPartitionerDestroy(&p->ppart[l]));
306:     PetscCall(PetscSectionDestroy(&p->tpwgs[l]));
307:   }
308:   PetscCall(PetscFree(p->ppart));
309:   PetscCall(PetscFree(p->tpwgs));
310:   PetscCall(PetscFree(p->edgeCut));
311:   PetscFunctionReturn(PETSC_SUCCESS);
312: }

314: static PetscErrorCode PetscPartitionerDestroy_Multistage(PetscPartitioner part)
315: {
316:   PetscFunctionBegin;
317:   PetscCall(PetscPartitionerReset_Multistage(part));
318:   PetscCall(PetscFree(part->data));
319:   PetscFunctionReturn(PETSC_SUCCESS);
320: }

322: /*@
323:   PetscPartitionerMultistageSetStages - Sets stages for the partitioning

325:   Collective

327:   Input Parameters:
328: + part   - the `PetscPartitioner` object.
329: . levels - the number of stages.
330: - lgroup - array of `MPI_Group`s for each stage.

332:   Level: advanced

334:   Notes:
335:   `MPI_Comm_create(comm, lgroup[l], &lcomm)` is used to compute the communicator for the stage partitioner at level `l`.
336:   The groups must be specified in the process numbering of the partitioner communicator.
337:   `lgroup[0]` must be collectively specified and it must represent a proper subset of the communicator associated with the original partitioner.
338:   For each level, ranks can be listed in one group only (but they can be listed on different levels)

340: .seealso: `PetscPartitionerSetType()`, `PetscPartitionerDestroy()`, `PETSCPARTITIONERMULTISTAGE`
341: @*/
342: PetscErrorCode PetscPartitionerMultistageSetStages(PetscPartitioner part, PetscInt levels, MPI_Group lgroup[])
343: {
344:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
345:   MPI_Comm             comm;
346:   MPI_Group            wgroup;
347:   PetscMPIInt        **lparts = NULL;
348:   PetscMPIInt          rank, size;
349:   PetscBool            touched, isms;

351:   PetscFunctionBegin;
354:   PetscCheck(levels >= 0, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Number of levels must be non-negative");
355:   if (levels) PetscAssertPointer(lgroup, 3);
356:   PetscCall(PetscObjectTypeCompare((PetscObject)part, PETSCPARTITIONERMULTISTAGE, &isms));
357:   if (!isms) PetscFunctionReturn(PETSC_SUCCESS);
358:   PetscCall(PetscLogEventBegin(PetscPartitioner_MS_SetUp, part, 0, 0, 0));

360:   PetscCall(PetscPartitionerReset_Multistage(part));

362:   PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
363:   PetscCallMPI(MPI_Comm_group(comm, &wgroup));
364:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
365:   PetscCallMPI(MPI_Comm_size(comm, &size));

367:   p->levels = levels;
368:   PetscCall(PetscCalloc1(p->levels, &p->ppart));
369:   PetscCall(PetscCalloc1(p->levels, &p->lgroup));
370:   PetscCall(PetscCalloc1(p->levels, &p->tpwgs));
371:   PetscCall(PetscCalloc1(p->levels, &p->edgeCut));

373:   /* support for target partition weights */
374:   touched = PETSC_FALSE;
375:   if (p->compute_tpwgs) PetscCall(PetscMalloc1(p->levels, &lparts));

377:   for (PetscInt l = 0; l < p->levels; l++) {
378:     const char *prefix;
379:     char        aprefix[256];
380:     MPI_Comm    lcomm;

382:     if (l) { /* let MPI complain/hang if the user did not specify the groups properly */
383:       PetscCallMPI(MPI_Comm_create(comm, lgroup[l], &lcomm));
384:     } else { /* in debug mode, we check that the initial group must be consistently (collectively) specified on comm */
385: #if defined(PETSC_USE_DEBUG)
386:       MPI_Group    group, igroup = lgroup[0];
387:       PetscMPIInt *ranks, *granks;
388:       PetscMPIInt  b[2], b2[2], csize, gsize;

390:       PetscCallMPI(MPI_Group_size(igroup, &gsize));
391:       b[0] = -gsize;
392:       b[1] = +gsize;
393:       PetscCallMPI(MPIU_Allreduce(b, b2, 2, MPI_INT, MPI_MAX, comm));
394:       PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
395:       PetscCallMPI(MPI_Comm_group(comm, &group));
396:       PetscCallMPI(MPI_Group_size(group, &csize));
397:       PetscCall(PetscMalloc2(gsize, &ranks, (csize * gsize), &granks));
398:       for (PetscMPIInt i = 0; i < gsize; i++) granks[i] = i;
399:       PetscCallMPI(MPI_Group_translate_ranks(igroup, gsize, granks, group, ranks));
400:       PetscCallMPI(MPI_Group_free(&group));
401:       PetscCallMPI(MPI_Allgather(ranks, gsize, MPI_INT, granks, gsize, MPI_INT, comm));
402:       for (PetscMPIInt i = 0; i < gsize; i++) {
403:         PetscMPIInt chkr = granks[i];
404:         for (PetscMPIInt j = 0; j < csize; j++) {
405:           PetscMPIInt shft = j * gsize + i;
406:           PetscCheck(chkr == granks[shft], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
407:         }
408:       }
409:       PetscCall(PetscFree2(ranks, granks));
410: #endif
411:       lcomm = comm;
412:     }
413:     PetscCall(PetscObjectGetOptionsPrefix((PetscObject)part, &prefix));
414:     if (lcomm != MPI_COMM_NULL) {
415:       /* MPI_Group_dup */
416:       PetscCallMPI(MPI_Group_union(lgroup[l], MPI_GROUP_EMPTY, &p->lgroup[l]));
417:       PetscCall(PetscPartitionerCreate(lcomm, &p->ppart[l]));
418:       PetscCall(PetscObjectSetOptionsPrefix((PetscObject)p->ppart[l], prefix));
419:       PetscCall(PetscSNPrintf(aprefix, sizeof(aprefix), "petscpartitioner_multistage_levels_%" PetscInt_FMT "_", l));
420:       PetscCall(PetscObjectAppendOptionsPrefix((PetscObject)p->ppart[l], aprefix));
421:     } else {
422:       PetscCheck(l != 0, PetscObjectComm((PetscObject)part), PETSC_ERR_USER, "Invalid first group");
423:       p->lgroup[l] = MPI_GROUP_EMPTY;
424:     }
425:     if (lcomm != comm && lcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&lcomm));

427:     /* compute number of partitions per level and detect if a process is part of the process (at any level) */
428:     if (p->compute_tpwgs) {
429:       PetscMPIInt gsize;
430:       PetscCall(PetscMalloc1(size, &lparts[l]));
431:       PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
432:       if (!l) {
433:         PetscMPIInt tr;
434:         PetscCallMPI(MPI_Group_translate_ranks(wgroup, 1, &rank, p->lgroup[0], &tr));
435:         if (tr == MPI_UNDEFINED) gsize = 0;
436:       }
437:       if (touched && !gsize) gsize = 1;
438:       PetscCallMPI(MPI_Allgather(&gsize, 1, MPI_INT, lparts[l], 1, MPI_INT, comm));
439:       if (lparts[l][rank]) touched = PETSC_TRUE;
440:     }
441:   }

443:   /* determine weights (bottom-up) */
444:   if (p->compute_tpwgs) {
445:     PetscMPIInt *tranks;
446:     PetscInt    *lwgts, wgt;
447:     MPI_Op       MPIU_LCM; /* XXX this is actually recreated at every setup */

449:     PetscCall(PetscMalloc1((2 * size), &tranks));

451:     /* we need to compute the least common multiple across processes */
452:     PetscCallMPI(MPI_Op_create(PetscLCM_Local, 1, &MPIU_LCM));

454:     /* final target has to have all ones as weights (if the process gets touched) */
455:     wgt = touched ? 1 : 0;
456:     PetscCall(PetscMalloc1(size, &lwgts));
457:     PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));

459:     /* now go up the hierarchy and populate the PetscSection describing the partition weights */
460:     for (PetscInt l = p->levels - 1; l >= 0; l--) {
461:       MPI_Comm    pcomm;
462:       MPI_Group   igroup;
463:       PetscMPIInt isize, isizer = 0;
464:       PetscInt    a, b, wgtr;
465:       PetscMPIInt gsize;
466:       PetscBool   usep = PETSC_FALSE;

468:       if (p->ppart[l]) {
469:         PetscCall(PetscObjectGetComm((PetscObject)p->ppart[l], &pcomm));
470:       } else {
471:         pcomm = PETSC_COMM_SELF;
472:       }

474:       PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
475:       if (gsize) {
476:         usep = PETSC_TRUE;
477:         for (PetscMPIInt i = 0; i < gsize; i++) tranks[i] = i;
478:         PetscCallMPI(MPI_Group_translate_ranks(p->lgroup[l], gsize, tranks, wgroup, tranks + size));
479:       } else gsize = lparts[l][rank];
480:       PetscCall(PetscFree(lparts[l]));
481:       PetscCall(PetscSectionCreate(pcomm, &p->tpwgs[l]));
482:       PetscCall(PetscObjectSetName((PetscObject)p->tpwgs[l], "Target partition weights"));
483:       PetscCall(PetscSectionSetChart(p->tpwgs[l], 0, gsize));

485:       if (usep) {
486:         PetscMPIInt *tr = tranks + size;
487:         for (PetscMPIInt i = 0; i < gsize; i++) PetscCall(PetscSectionSetDof(p->tpwgs[l], i, lwgts[tr[i]]));
488:       } else if (gsize) {
489:         PetscCall(PetscSectionSetDof(p->tpwgs[l], 0, 1));
490:       }
491:       PetscCall(PetscSectionSetUp(p->tpwgs[l]));
492:       if (!l) break;

494:       /* determine number of processes shared by two consecutive levels */
495:       PetscCallMPI(MPI_Group_intersection(p->lgroup[l], p->lgroup[l - 1], &igroup));
496:       PetscCallMPI(MPI_Group_size(igroup, &isize));
497:       PetscCallMPI(MPI_Group_free(&igroup));
498:       if (!p->ppart[l] && touched) isize = 1; /* if this process gets touched, needs to propagate its data from one level to the other */

500:       /* reduce on level partitioner comm the size of the max size of the igroup */
501:       PetscCallMPI(MPIU_Allreduce(&isize, &isizer, 1, MPI_INT, MPI_MAX, pcomm));

503:       /* sum previously computed partition weights on the level comm */
504:       wgt  = lwgts[rank];
505:       wgtr = wgt;
506:       PetscCallMPI(MPIU_Allreduce(&wgt, &wgtr, 1, MPIU_INT, MPI_SUM, pcomm));

508:       /* partition weights are given with integers; to properly compute these and be able to propagate them to the next level,
509:          we need to compute the least common multiple of isizer across the global comm */
510:       a = isizer ? isizer : 1;
511:       b = a;
512:       PetscCallMPI(MPIU_Allreduce(&a, &b, 1, MPIU_INT, MPIU_LCM, comm));

514:       /* finally share this process weight with all the other processes */
515:       wgt = isizer ? (b * wgtr) / isizer : 0;
516:       PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
517:     }
518:     PetscCall(PetscFree(lwgts));
519:     PetscCall(PetscFree(tranks));
520:     PetscCall(PetscFree(lparts));
521:     PetscCallMPI(MPI_Op_free(&MPIU_LCM));
522:   }
523:   PetscCallMPI(MPI_Group_free(&wgroup));
524:   PetscCall(PetscLogEventEnd(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
525:   PetscFunctionReturn(PETSC_SUCCESS);
526: }

528: PetscErrorCode PetscPartitionerMultistageGetStages_Multistage(PetscPartitioner part, PetscInt *levels, MPI_Group *lgroup[])
529: {
530:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;

532:   PetscFunctionBegin;
533:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
535:   if (levels) *levels = p->levels;
536:   if (lgroup) *lgroup = p->lgroup;
537:   PetscFunctionReturn(PETSC_SUCCESS);
538: }

540: PetscErrorCode PetscPartitionerMultistageSetStage_Multistage(PetscPartitioner part, PetscInt stage, PetscObject odm)
541: {
542:   DM                   dm = (DM)odm;
543:   PetscPartitioner_MS *p  = (PetscPartitioner_MS *)part->data;

545:   PetscFunctionBegin;
546:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
547:   PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
548:   PetscCheck(stage >= 0 && stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", stage, p->levels);

550:   p->stage = stage;
551:   PetscCall(PetscObjectReference((PetscObject)dm));
552:   PetscCall(DMDestroy(&p->stagedm));
553:   p->stagedm = dm;
554:   PetscFunctionReturn(PETSC_SUCCESS);
555: }

557: PetscErrorCode PetscPartitionerMultistageGetStage_Multistage(PetscPartitioner part, PetscInt *stage, PetscObject *odm)
558: {
559:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;

561:   PetscFunctionBegin;
562:   PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
563:   if (stage) *stage = p->stage;
564:   if (odm) *odm = (PetscObject)p->stagedm;
565:   PetscFunctionReturn(PETSC_SUCCESS);
566: }

568: static PetscErrorCode PetscPartitionerSetUp_Multistage(PetscPartitioner part)
569: {
570:   PetscPartitioner_MS *p    = (PetscPartitioner_MS *)part->data;
571:   MPI_Comm             comm = PetscObjectComm((PetscObject)part);
572:   PetscInt             nstages;
573:   MPI_Group           *groups;

575:   PetscFunctionBegin;
576:   if (p->levels) PetscFunctionReturn(PETSC_SUCCESS);
577:   PetscCall(PetscPartitionerMultistage_CreateStages(comm, NULL, &nstages, &groups));
578:   PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
579:   PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
580:   PetscFunctionReturn(PETSC_SUCCESS);
581: }

583: /* targetSection argument unused, target partition weights are computed internally */
584: static PetscErrorCode PetscPartitionerPartition_Multistage(PetscPartitioner part, PetscInt nparts, PetscInt numVertices, PetscInt start[], PetscInt adjacency[], PetscSection vertSection, PetscSection edgeSection, PETSC_UNUSED PetscSection targetSection, PetscSection partSection, IS *partition)
585: {
586:   PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
587:   PetscPartitioner     ppart;
588:   PetscSection         ppartSection = NULL;
589:   IS                   lpartition;
590:   const PetscInt      *idxs;
591:   MPI_Comm             comm, pcomm;
592:   MPI_Group            group, lgroup, pgroup;
593:   PetscInt            *pstart, *padjacency;
594:   PetscInt             nid, i, pedgeCut;
595:   PetscMPIInt          sameComm, pparts;
596:   PetscBool            freeadj = PETSC_FALSE;

598:   PetscFunctionBegin;
599:   PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
600:   PetscCheck(p->stage >= 0 && p->stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", p->stage, p->levels);
601:   PetscCall(PetscLogEventBegin(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));

603:   /* Group for current stage (size of the group defines number of "local" partitions) */
604:   lgroup = p->lgroup[p->stage];
605:   PetscCallMPI(MPI_Group_size(lgroup, &pparts));

607:   /* Current stage partitioner */
608:   ppart = p->ppart[p->stage];
609:   if (ppart) {
610:     PetscCall(PetscObjectGetComm((PetscObject)ppart, &pcomm));
611:     PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
612:   } else {
613:     pcomm  = PETSC_COMM_SELF;
614:     pgroup = MPI_GROUP_EMPTY;
615:     pparts = -1;
616:   }

618:   /* Global comm of partitioner */
619:   PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
620:   PetscCallMPI(MPI_Comm_group(comm, &group));

622:   /* Get adjacency */
623:   PetscCallMPI(MPI_Group_compare(group, pgroup, &sameComm));
624:   if (sameComm != MPI_UNEQUAL) {
625:     pstart     = start;
626:     padjacency = adjacency;
627:   } else { /* restrict to partitioner comm */
628:     ISLocalToGlobalMapping l2g, g2l;
629:     IS                     gid, rid;
630:     const PetscInt        *idxs1;
631:     PetscInt              *idxs2;
632:     PetscInt               cStart, cEnd, cum;
633:     DM                     dm = p->stagedm;
634:     PetscSF                sf;

636:     PetscCheck(dm, PetscObjectComm((PetscObject)part), PETSC_ERR_PLIB, "Missing stage DM");
637:     PetscCall(DMGetPointSF(dm, &sf));
638:     PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
639:     PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, part->height, NULL, sf, &gid));
640:     /* filter overlapped local cells (if any) */
641:     PetscCall(ISGetIndices(gid, &idxs1));
642:     PetscCall(ISGetLocalSize(gid, &cum));
643:     PetscCall(PetscMalloc1(cum, &idxs2));
644:     for (i = cStart, cum = 0; i < cEnd; i++) {
645:       if (idxs1[i - cStart] < 0) continue;
646:       idxs2[cum++] = idxs1[i - cStart];
647:     }
648:     PetscCall(ISRestoreIndices(gid, &idxs1));

650:     PetscCheck(numVertices == cum, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, numVertices, cum);
651:     PetscCall(ISDestroy(&gid));

653:     /*
654:        g2l from full numbering to local numbering
655:        l2g from local numbering to restricted numbering
656:     */
657:     PetscCall(ISCreateGeneral(pcomm, numVertices, idxs2, PETSC_OWN_POINTER, &gid));
658:     PetscCall(ISRenumber(gid, NULL, NULL, &rid));
659:     PetscCall(ISLocalToGlobalMappingCreateIS(gid, &g2l));
660:     PetscCall(ISLocalToGlobalMappingSetType(g2l, ISLOCALTOGLOBALMAPPINGHASH));
661:     PetscCall(ISLocalToGlobalMappingCreateIS(rid, &l2g));
662:     PetscCall(ISDestroy(&gid));
663:     PetscCall(ISDestroy(&rid));

665:     PetscCall(PetscMalloc2(numVertices + 1, &pstart, start[numVertices], &padjacency));
666:     pstart[0] = 0;
667:     for (i = 0; i < numVertices; i++) {
668:       PetscCall(ISGlobalToLocalMappingApply(g2l, IS_GTOLM_DROP, start[i + 1] - start[i], adjacency + start[i], &pstart[i + 1], padjacency + pstart[i]));
669:       PetscCall(ISLocalToGlobalMappingApply(l2g, pstart[i + 1], padjacency + pstart[i], padjacency + pstart[i]));
670:       pstart[i + 1] += pstart[i];
671:     }
672:     PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
673:     PetscCall(ISLocalToGlobalMappingDestroy(&g2l));

675:     freeadj = PETSC_TRUE;
676:   }

678:   /* Compute partitioning */
679:   pedgeCut = 0;
680:   PetscCall(PetscSectionCreate(pcomm, &ppartSection));
681:   if (ppart) {
682:     PetscMPIInt prank;

684:     PetscCheck(ppart->ops->partition, PetscObjectComm((PetscObject)ppart), PETSC_ERR_ARG_WRONGSTATE, "PetscPartitioner has no partitioning method on stage %" PetscInt_FMT, p->stage);
685:     PetscCall(PetscPartitionerPartition(ppart, pparts, numVertices, pstart, padjacency, vertSection, edgeSection, p->tpwgs[p->stage], ppartSection, &lpartition));
686:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ppart), &prank));
687:     if (!prank) pedgeCut = ppart->edgeCut; /* only the master rank will reduce */
688:   } else {                                 /* not participating */
689:     PetscCall(ISCreateStride(PETSC_COMM_SELF, numVertices, 0, 1, &lpartition));
690:     pparts = numVertices > 0 ? 1 : 0;
691:   }
692:   if (freeadj) PetscCall(PetscFree2(pstart, padjacency));

694:   /* Init final partition (output) */
695:   PetscCall(PetscSectionReset(partSection));
696:   PetscCall(PetscSectionSetChart(partSection, 0, nparts));

698:   /* We need to map the section back to the global comm numbering */
699:   for (i = 0; i < pparts; i++) {
700:     PetscInt    dof;
701:     PetscMPIInt mp, mpt;

703:     if (lgroup != MPI_GROUP_EMPTY) {
704:       PetscCall(PetscMPIIntCast(i, &mp));
705:       PetscCall(PetscSectionGetDof(ppartSection, i, &dof));
706:       PetscCallMPI(MPI_Group_translate_ranks(lgroup, 1, &mp, group, &mpt));
707:     } else {
708:       PetscCheck(pparts == 1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unexpected pparts %d", pparts);
709:       PetscCallMPI(MPI_Comm_rank(comm, &mpt));
710:       PetscCall(ISGetLocalSize(lpartition, &dof));
711:     }
712:     PetscCall(PetscSectionSetDof(partSection, mpt, dof));
713:   }
714:   PetscCall(PetscSectionSetUp(partSection));
715:   PetscCall(PetscSectionDestroy(&ppartSection));

717:   /* No need to translate the "partition" output, as it is in local cell numbering
718:      we only change the comm of the index set */
719:   PetscCall(ISGetIndices(lpartition, &idxs));
720:   PetscCall(ISGetLocalSize(lpartition, &nid));
721:   PetscCall(ISCreateGeneral(comm, nid, idxs, PETSC_COPY_VALUES, partition));
722:   PetscCall(ISRestoreIndices(lpartition, &idxs));
723:   PetscCall(ISDestroy(&lpartition));

725:   PetscCall(PetscSectionViewFromOptions(partSection, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
726:   PetscCall(ISViewFromOptions(*partition, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));

728:   /* Update edge-cut */
729:   p->edgeCut[p->stage] = pedgeCut;
730:   for (i = p->stage + 1; i < p->levels; i++) p->edgeCut[i] = 0;
731:   for (i = 0; i < p->stage; i++) pedgeCut += p->edgeCut[i];
732:   part->edgeCut = -1;
733:   PetscCallMPI(MPI_Reduce(&pedgeCut, &part->edgeCut, 1, MPIU_INT, MPI_SUM, 0, comm));

735:   PetscCallMPI(MPI_Group_free(&group));
736:   if (pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
737:   PetscCall(PetscLogEventEnd(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
738:   PetscFunctionReturn(PETSC_SUCCESS);
739: }

741: static PetscErrorCode PetscPartitionerSetFromOptions_Multistage(PetscPartitioner part, PetscOptionItems PetscOptionsObject)
742: {
743:   PetscPartitioner_MS *p        = (PetscPartitioner_MS *)part->data;
744:   PetscEnum            strategy = (PetscEnum)PETSCPARTITIONER_MS_STRATEGY_NODE;
745:   PetscBool            set, roundrobin;
746:   PetscInt             options[3] = {PETSC_DETERMINE, PETSC_DETERMINE, PETSC_DETERMINE};

748:   PetscFunctionBegin;
749:   PetscOptionsHeadBegin(PetscOptionsObject, "PetscPartitioner Multistate Options");
750:   PetscCall(PetscOptionsEnum("-petscpartitioner_multistage_strategy", "Default partitioning strategy", "", PetscPartitionerMultistageStrategyList, strategy, &strategy, &set));
751:   if (set || !p->levels) {
752:     options[0] = (PetscInt)strategy;
753:     switch (options[0]) {
754:     case PETSCPARTITIONER_MS_STRATEGY_NODE:
755:       options[1] = PETSC_DETERMINE;
756:       roundrobin = PETSC_FALSE;
757:       PetscCall(PetscOptionsInt("-petscpartitioner_multistage_node_size", "Number of processes per node", "", options[1], &options[1], NULL));
758:       PetscCall(PetscOptionsBool("-petscpartitioner_multistage_node_interleaved", "Use round robin rank assignments", "", roundrobin, &roundrobin, NULL));
759:       options[2] = (PetscInt)roundrobin;
760:       break;
761:     case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
762:       options[1] = 2;
763:       PetscCall(PetscOptionsInt("-petscpartitioner_multistage_msection", "Number of splits per level", "", options[1], &options[1], NULL));
764:       break;
765:     default:
766:       break; /* LCOV_EXCL_LINE */
767:     }
768:   }
769:   PetscCall(PetscOptionsBool("-petscpartitioner_multistage_tpwgts", "Use target partition weights in stage partitioners", "", p->compute_tpwgs, &p->compute_tpwgs, NULL));
770:   PetscCall(PetscOptionsBool("-petscpartitioner_multistage_viewtpwgts", "View target partition weights", "", p->view_tpwgs, &p->view_tpwgs, NULL));
771:   PetscOptionsHeadEnd();

773:   if (options[0] != PETSC_DETERMINE) {
774:     MPI_Comm   comm = PetscObjectComm((PetscObject)part);
775:     PetscInt   nstages;
776:     MPI_Group *groups;

778:     PetscCall(PetscPartitionerMultistage_CreateStages(comm, options, &nstages, &groups));
779:     PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
780:     PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
781:   }

783:   {
784:     PetscInt nstages = p->levels, l;
785:     for (l = 0; l < nstages; l++) {
786:       if (p->ppart[l]) PetscCall(PetscPartitionerSetFromOptions(p->ppart[l]));
787:     }
788:   }
789:   PetscFunctionReturn(PETSC_SUCCESS);
790: }

792: /*MC
793:   PETSCPARTITIONERMULTISTAGE = "multistage" - A PetscPartitioner object using a multistage distribution strategy

795:   Level: intermediate

797:   Options Database Keys:
798: +  -petscpartitioner_multistage_strategy <strategy> - Either `PETSCPARTITIONER_MS_STRATEGY_NODE`, or `PETSCPARTITIONER_MS_STRATEGY_MSECTION`
799: .  -petscpartitioner_multistage_node_size <int> - Number of processes per computing node (or `PETSC_DECIDE`)
800: .  -petscpartitioner_multistage_node_interleaved <bool> - Assign ranks round-robin.
801: .  -petscpartitioner_multistage_msection <int> - Number of splits per level in recursive m-section splits (use `2` for bisection)
802: -  -petscpartitioner_multistage_tpwgts <bool> - Use target partition weights in stage partitioner

804:   Notes:
805:   The default multistage strategy use `PETSCPARTITIONER_MS_STRATEGY_NODE` and automatically discovers node information using `MPI_Comm_split_type`.
806:   `PETSCPARTITIONER_MS_STRATEGY_MSECTION` is more for testing purposes.
807:   Options for single stage partitioners are prefixed by `-petscpartitioner_multistage_levels_`.
808:   For example, to use parmetis in all stages, `-petscpartitioner_multistage_levels_petscpartitioner_type parmetis`
809:   Finer grained control is also possible: `-petscpartitioner_multistage_levels_0_petscpartitioner_type parmetis`, `-petscpartitioner_multistage_levels_1_petscpartitioner_type simple`

811: .seealso: `PetscPartitionerType`, `PetscPartitionerCreate()`, `PetscPartitionerSetType()`
812: M*/

814: PETSC_EXTERN PetscErrorCode PetscPartitionerCreate_Multistage(PetscPartitioner part)
815: {
816:   PetscPartitioner_MS *p;

818:   PetscFunctionBegin;
820:   PetscCall(PetscNew(&p));
821:   p->compute_tpwgs = PETSC_TRUE;

823:   part->data                = p;
824:   part->ops->view           = PetscPartitionerView_Multistage;
825:   part->ops->destroy        = PetscPartitionerDestroy_Multistage;
826:   part->ops->partition      = PetscPartitionerPartition_Multistage;
827:   part->ops->setfromoptions = PetscPartitionerSetFromOptions_Multistage;
828:   part->ops->setup          = PetscPartitionerSetUp_Multistage;

830:   PetscCall(PetscCitationsRegister(MSPartitionerCitation, &MSPartitionerCite));
831:   PetscFunctionReturn(PETSC_SUCCESS);
832: }