Actual source code: mspart.c
1: #include <petscsf.h>
2: #include <petscdmplex.h>
3: #include <petsc/private/dmimpl.h>
4: #include <petsc/private/dmpleximpl.h>
5: #include <petsc/private/partitionerimpl.h>
7: PetscBool MSPartitionerCite = PETSC_FALSE;
8: const char MSPartitionerCitation[] = "@article{PETScMSPart2021,\n"
9: " author = {Parsani, Matteo and Boukharfane, Radouan and Nolasco, Irving Reyna and Fern{\'a}ndez, David C Del Rey and Zampini, Stefano and Hadri, Bilel and Dalcin, Lisandro},\n"
10: " title = {High-order accurate entropy-stable discontinuous collocated Galerkin methods with the summation-by-parts property for compressible {CFD} frameworks: Scalable {SSDC} algorithms and flow solver},\n"
11: " journal = {Journal of Computational Physics},\n"
12: " volume = {424},\n"
13: " pages = {109844},\n"
14: " year = {2021}\n"
15: "}\n";
17: PetscLogEvent PetscPartitioner_MS_SetUp;
18: PetscLogEvent PetscPartitioner_MS_Stage[PETSCPARTITIONER_MS_NUMSTAGE];
20: typedef struct {
21: PetscInt levels;
22: MPI_Group *lgroup;
23: /* Need access to the DM in inner stages */
24: PetscInt stage;
25: DM stagedm;
26: /* Stage partitioners */
27: PetscPartitioner *ppart;
28: /* Diagnostic */
29: PetscInt *edgeCut;
30: /* Target partition weights */
31: PetscBool view_tpwgs;
32: PetscSection *tpwgs;
33: PetscBool compute_tpwgs;
34: } PetscPartitioner_MS;
36: const char *const PetscPartitionerMultistageStrategyList[] = {"NODE", "MSECTION", "PetscPartitionerMultistageStrategy", "PETSCPARTITIONER_MS_", NULL};
38: static void PetscLCM_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
39: {
40: PetscInt count = *cnt;
41: PetscInt *xin = (PetscInt *)in, *xout = (PetscInt *)out;
43: PetscFunctionBegin;
44: if (*datatype != MPIU_INT) {
45: (void)(*PetscErrorPrintf)("Can only handle MPIU_INT");
46: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
47: }
48: for (PetscInt i = 0; i < count; i++) xout[i] = PetscLCM(xin[i], xout[i]);
49: PetscFunctionReturnVoid();
50: }
52: static PetscErrorCode PetscPartitionerView_Multistage(PetscPartitioner part, PetscViewer viewer)
53: {
54: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
55: PetscViewerFormat format;
56: PetscBool iascii;
58: PetscFunctionBegin;
59: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
60: PetscCall(PetscViewerGetFormat(viewer, &format));
61: if (iascii) {
62: MPI_Comm comm;
63: MPI_Group group;
65: PetscCall(PetscViewerASCIIPushTab(viewer));
66: if (!p->stagedm || p->stage == 0) PetscCall(PetscViewerASCIIPrintf(viewer, "Multistage graph partitioner: total stages %" PetscInt_FMT "\n", p->levels));
67: comm = PetscObjectComm((PetscObject)part);
68: PetscCallMPI(MPI_Comm_group(comm, &group));
69: for (PetscInt l = 0; l < p->levels; l++) {
70: PetscPartitioner ppart = p->ppart[l];
71: MPI_Comm pcomm = PetscObjectComm((PetscObject)ppart);
72: MPI_Group pgroup = MPI_GROUP_EMPTY;
74: PetscCall(PetscViewerASCIIPushTab(viewer));
75: if (l) {
76: if (pcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
77: } else {
78: pgroup = p->lgroup[0];
79: }
81: if (l) {
82: IS is;
83: PetscMPIInt gr, gem = 1;
84: PetscInt uniq;
86: PetscCallMPI(MPI_Group_size(group, &gr));
87: if (pgroup != MPI_GROUP_EMPTY) {
88: gem = 0;
89: PetscCallMPI(MPI_Group_translate_ranks(pgroup, 1, &gem, group, &gr));
90: }
91: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)part), 1, gr, 1, &is));
92: PetscCall(ISRenumber(is, NULL, &uniq, NULL));
93: PetscCall(ISDestroy(&is));
94: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gem, 1, MPI_INT, MPI_SUM, PetscObjectComm((PetscObject)part)));
95: if (gem) uniq--;
96: if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioners (%" PetscInt_FMT " unique groups, %d empty processes)\n", l, uniq, gem));
97: } else {
98: PetscMPIInt psize;
99: PetscCallMPI(MPI_Group_size(pgroup, &psize));
100: if (!p->stagedm || l == p->stage) PetscCall(PetscViewerASCIIPrintf(viewer, "Stage %" PetscInt_FMT " partitioner to %d processes\n", l, psize));
101: }
102: PetscCall(PetscViewerFlush(viewer));
103: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL && (!p->stagedm || l == p->stage)) {
104: PetscViewer pviewer;
106: if (pcomm == MPI_COMM_NULL) pcomm = PETSC_COMM_SELF;
107: PetscCall(PetscViewerGetSubViewer(viewer, pcomm, &pviewer));
108: if (ppart) {
109: PetscMPIInt size, *ranks, *granks;
110: char tstr[16], strranks[3072]; /* I'm lazy: max 12 chars (including spaces) per rank -> 256 ranks max*/
112: PetscCallMPI(MPI_Group_size(pgroup, &size));
113: PetscCall(PetscMalloc2(size, &ranks, size, &granks));
114: for (PetscMPIInt i = 0; i < size; i++) ranks[i] = i;
115: PetscCallMPI(MPI_Group_translate_ranks(pgroup, size, ranks, group, granks));
116: if (size <= 256) {
117: PetscCall(PetscStrncpy(strranks, "", sizeof(strranks)));
118: for (PetscInt i = 0; i < size; i++) {
119: PetscCall(PetscSNPrintf(tstr, sizeof(tstr), " %d", granks[i]));
120: PetscCall(PetscStrlcat(strranks, tstr, sizeof(strranks)));
121: }
122: } else PetscCall(PetscStrncpy(strranks, " not showing > 256", sizeof(strranks))); /* LCOV_EXCL_LINE */
123: PetscCall(PetscFree2(ranks, granks));
124: if (!l) {
125: PetscCall(PetscViewerASCIIPrintf(pviewer, "Destination ranks:%s\n", strranks));
126: PetscCall(PetscPartitionerView(ppart, pviewer));
127: } else {
128: PetscCall(PetscPartitionerView(ppart, pviewer));
129: PetscCall(PetscViewerASCIIPrintf(pviewer, " Participating ranks:%s\n", strranks));
130: }
131: if (p->view_tpwgs) PetscCall(PetscSectionView(p->tpwgs[l], pviewer));
132: }
133: PetscCall(PetscViewerRestoreSubViewer(viewer, pcomm, &pviewer));
134: }
135: PetscCall(PetscViewerFlush(viewer));
137: if (l && pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
138: }
139: for (PetscInt l = 0; l < p->levels; l++) PetscCall(PetscViewerASCIIPopTab(viewer));
140: PetscCall(PetscViewerASCIIPopTab(viewer));
141: PetscCallMPI(MPI_Group_free(&group));
142: }
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
146: static PetscErrorCode PetscPartitionerMultistage_CreateStages(MPI_Comm comm, const PetscInt *options, PetscInt *nlevels, MPI_Group *levels[])
147: {
148: MPI_Comm ncomm;
149: MPI_Group *lgroup;
150: MPI_Group ggroup, ngroup;
151: PetscMPIInt *ranks, *granks;
152: PetscMPIInt size, nsize, isize, rank, nrank, i, l, n, m;
153: PetscInt strategy = options ? options[0] : (PetscInt)PETSCPARTITIONER_MS_STRATEGY_NODE;
155: PetscFunctionBegin;
156: PetscCallMPI(MPI_Comm_size(comm, &size));
157: PetscCallMPI(MPI_Comm_rank(comm, &rank));
159: if (size == 1) {
160: PetscCall(PetscMalloc1(1, &lgroup));
161: PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
162: *nlevels = 1;
163: *levels = lgroup;
164: PetscFunctionReturn(PETSC_SUCCESS);
165: }
167: switch (strategy) {
168: case PETSCPARTITIONER_MS_STRATEGY_NODE:
169: /* create groups (in global rank ordering of comm) for the 2-level partitioner */
170: if (options && options[1] > 0) {
171: PetscMPIInt node_size = (PetscMPIInt)options[1];
172: if (node_size > 1) {
173: PetscMPIInt color;
174: if (options[2]) { /* interleaved */
175: PetscMPIInt ngroups = size / node_size;
177: if (size % node_size) ngroups += 1;
178: color = rank % ngroups;
179: } else {
180: color = rank / node_size;
181: }
182: PetscCallMPI(MPI_Comm_split(comm, color, rank, &ncomm));
183: } else {
184: PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
185: }
186: } else {
187: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
188: PetscCallMPI(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, rank, MPI_INFO_NULL, &ncomm));
189: #else
190: /* if users do not specify the node size and MPI_Comm_split_type is not available, defaults to same comm */
191: PetscCallMPI(MPI_Comm_dup(comm, &ncomm));
192: #endif
193: }
195: PetscCallMPI(MPI_Comm_size(ncomm, &nsize));
196: if (nsize == size) { /* one node */
197: PetscCall(PetscMalloc1(1, &lgroup));
198: PetscCallMPI(MPI_Comm_group(ncomm, &lgroup[0]));
199: PetscCallMPI(MPI_Comm_free(&ncomm));
201: *nlevels = 1;
202: *levels = lgroup;
203: break;
204: }
206: PetscCall(PetscMalloc1(2, &lgroup));
208: /* intranode group (in terms of global rank) */
209: PetscCall(PetscMalloc2(size, &ranks, nsize, &granks));
210: for (i = 0; i < nsize; i++) ranks[i] = i;
211: PetscCallMPI(MPI_Comm_group(comm, &ggroup));
212: PetscCallMPI(MPI_Comm_group(ncomm, &ngroup));
213: PetscCallMPI(MPI_Group_translate_ranks(ngroup, nsize, ranks, ggroup, granks));
214: PetscCallMPI(MPI_Group_incl(ggroup, nsize, granks, &lgroup[1]));
216: /* internode group (master processes on the nodes only)
217: this group must be specified by all processes in the comm
218: we need to gather those master ranks */
219: PetscCallMPI(MPI_Group_rank(ngroup, &nrank));
220: granks[0] = !nrank ? rank : -1;
221: PetscCallMPI(MPI_Allgather(granks, 1, MPI_INT, ranks, 1, MPI_INT, comm));
222: for (i = 0, isize = 0; i < size; i++)
223: if (ranks[i] >= 0) ranks[isize++] = ranks[i];
224: PetscCallMPI(MPI_Group_incl(ggroup, isize, ranks, &lgroup[0]));
226: PetscCall(PetscFree2(ranks, granks));
227: PetscCallMPI(MPI_Group_free(&ggroup));
228: PetscCallMPI(MPI_Group_free(&ngroup));
229: PetscCallMPI(MPI_Comm_free(&ncomm));
231: *nlevels = 2;
232: *levels = lgroup;
233: break;
235: case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
236: /* recursive m-section (m=2 bisection) */
237: m = options ? (PetscMPIInt)options[1] : 2;
238: PetscCheck(m >= 2, comm, PETSC_ERR_SUP, "Invalid split %d, must be greater than one", m);
239: l = 0;
240: n = 1;
241: while (n <= size) {
242: l++;
243: n *= m;
244: }
245: l -= 1;
246: n /= m;
248: PetscCheck(l != 0, comm, PETSC_ERR_SUP, "Invalid split %d with communicator size %d", m, size);
249: PetscCall(PetscMalloc1(l, &lgroup));
250: for (i = 1; i < l; i++) lgroup[i] = MPI_GROUP_EMPTY;
252: if (l > 1) {
253: PetscMPIInt rem = size - n;
254: PetscCall(PetscMalloc1((m + rem), &ranks));
255: for (i = 0; i < m; i++) ranks[i] = i * (n / m);
256: for (i = 0; i < rem; i++) ranks[i + m] = n + i;
257: PetscCallMPI(MPI_Comm_group(comm, &ggroup));
258: PetscCallMPI(MPI_Group_incl(ggroup, m + rem, ranks, &lgroup[0]));
259: if (rank < n) {
260: for (i = 1; i < l; i++) {
261: PetscMPIInt inc = (PetscMPIInt)PetscPowInt(m, l - i - 1);
262: if (rank % inc == 0) {
263: PetscMPIInt anch = (rank - rank % (inc * m)), r;
264: for (r = 0; r < m; r++) ranks[r] = anch + r * inc;
265: PetscCallMPI(MPI_Group_incl(ggroup, m, ranks, &lgroup[i]));
266: }
267: }
268: }
269: PetscCall(PetscFree(ranks));
270: PetscCallMPI(MPI_Group_free(&ggroup));
271: } else {
272: PetscCallMPI(MPI_Comm_group(comm, &lgroup[0]));
273: }
275: *nlevels = l;
276: *levels = lgroup;
277: break;
279: default:
280: SETERRQ(comm, PETSC_ERR_SUP, "Not implemented"); /* LCOV_EXCL_LINE */
281: }
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: static PetscErrorCode PetscPartitionerMultistage_DestroyStages(PetscInt nstages, MPI_Group *groups[])
286: {
287: PetscFunctionBegin;
288: for (PetscInt l = 0; l < nstages; l++) {
289: MPI_Group group = (*groups)[l];
290: if (group != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&group));
291: }
292: PetscCall(PetscFree(*groups));
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode PetscPartitionerReset_Multistage(PetscPartitioner part)
297: {
298: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
299: PetscInt nstages = p->levels;
301: PetscFunctionBegin;
302: p->levels = 0;
303: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &p->lgroup));
304: for (PetscInt l = 0; l < nstages; l++) {
305: PetscCall(PetscPartitionerDestroy(&p->ppart[l]));
306: PetscCall(PetscSectionDestroy(&p->tpwgs[l]));
307: }
308: PetscCall(PetscFree(p->ppart));
309: PetscCall(PetscFree(p->tpwgs));
310: PetscCall(PetscFree(p->edgeCut));
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: static PetscErrorCode PetscPartitionerDestroy_Multistage(PetscPartitioner part)
315: {
316: PetscFunctionBegin;
317: PetscCall(PetscPartitionerReset_Multistage(part));
318: PetscCall(PetscFree(part->data));
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /*@
323: PetscPartitionerMultistageSetStages - Sets stages for the partitioning
325: Collective
327: Input Parameters:
328: + part - the `PetscPartitioner` object.
329: . levels - the number of stages.
330: - lgroup - array of `MPI_Group`s for each stage.
332: Level: advanced
334: Notes:
335: `MPI_Comm_create(comm, lgroup[l], &lcomm)` is used to compute the communicator for the stage partitioner at level `l`.
336: The groups must be specified in the process numbering of the partitioner communicator.
337: `lgroup[0]` must be collectively specified and it must represent a proper subset of the communicator associated with the original partitioner.
338: For each level, ranks can be listed in one group only (but they can be listed on different levels)
340: .seealso: `PetscPartitionerSetType()`, `PetscPartitionerDestroy()`, `PETSCPARTITIONERMULTISTAGE`
341: @*/
342: PetscErrorCode PetscPartitionerMultistageSetStages(PetscPartitioner part, PetscInt levels, MPI_Group lgroup[])
343: {
344: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
345: MPI_Comm comm;
346: MPI_Group wgroup;
347: PetscMPIInt **lparts = NULL;
348: PetscMPIInt rank, size;
349: PetscBool touched, isms;
351: PetscFunctionBegin;
354: PetscCheck(levels >= 0, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Number of levels must be non-negative");
355: if (levels) PetscAssertPointer(lgroup, 3);
356: PetscCall(PetscObjectTypeCompare((PetscObject)part, PETSCPARTITIONERMULTISTAGE, &isms));
357: if (!isms) PetscFunctionReturn(PETSC_SUCCESS);
358: PetscCall(PetscLogEventBegin(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
360: PetscCall(PetscPartitionerReset_Multistage(part));
362: PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
363: PetscCallMPI(MPI_Comm_group(comm, &wgroup));
364: PetscCallMPI(MPI_Comm_rank(comm, &rank));
365: PetscCallMPI(MPI_Comm_size(comm, &size));
367: p->levels = levels;
368: PetscCall(PetscCalloc1(p->levels, &p->ppart));
369: PetscCall(PetscCalloc1(p->levels, &p->lgroup));
370: PetscCall(PetscCalloc1(p->levels, &p->tpwgs));
371: PetscCall(PetscCalloc1(p->levels, &p->edgeCut));
373: /* support for target partition weights */
374: touched = PETSC_FALSE;
375: if (p->compute_tpwgs) PetscCall(PetscMalloc1(p->levels, &lparts));
377: for (PetscInt l = 0; l < p->levels; l++) {
378: const char *prefix;
379: char aprefix[256];
380: MPI_Comm lcomm;
382: if (l) { /* let MPI complain/hang if the user did not specify the groups properly */
383: PetscCallMPI(MPI_Comm_create(comm, lgroup[l], &lcomm));
384: } else { /* in debug mode, we check that the initial group must be consistently (collectively) specified on comm */
385: #if defined(PETSC_USE_DEBUG)
386: MPI_Group group, igroup = lgroup[0];
387: PetscMPIInt *ranks, *granks;
388: PetscMPIInt b[2], b2[2], csize, gsize;
390: PetscCallMPI(MPI_Group_size(igroup, &gsize));
391: b[0] = -gsize;
392: b[1] = +gsize;
393: PetscCallMPI(MPIU_Allreduce(b, b2, 2, MPI_INT, MPI_MAX, comm));
394: PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
395: PetscCallMPI(MPI_Comm_group(comm, &group));
396: PetscCallMPI(MPI_Group_size(group, &csize));
397: PetscCall(PetscMalloc2(gsize, &ranks, (csize * gsize), &granks));
398: for (PetscMPIInt i = 0; i < gsize; i++) granks[i] = i;
399: PetscCallMPI(MPI_Group_translate_ranks(igroup, gsize, granks, group, ranks));
400: PetscCallMPI(MPI_Group_free(&group));
401: PetscCallMPI(MPI_Allgather(ranks, gsize, MPI_INT, granks, gsize, MPI_INT, comm));
402: for (PetscMPIInt i = 0; i < gsize; i++) {
403: PetscMPIInt chkr = granks[i];
404: for (PetscMPIInt j = 0; j < csize; j++) {
405: PetscMPIInt shft = j * gsize + i;
406: PetscCheck(chkr == granks[shft], comm, PETSC_ERR_ARG_WRONG, "Initial group must be collectively specified");
407: }
408: }
409: PetscCall(PetscFree2(ranks, granks));
410: #endif
411: lcomm = comm;
412: }
413: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)part, &prefix));
414: if (lcomm != MPI_COMM_NULL) {
415: /* MPI_Group_dup */
416: PetscCallMPI(MPI_Group_union(lgroup[l], MPI_GROUP_EMPTY, &p->lgroup[l]));
417: PetscCall(PetscPartitionerCreate(lcomm, &p->ppart[l]));
418: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)p->ppart[l], prefix));
419: PetscCall(PetscSNPrintf(aprefix, sizeof(aprefix), "petscpartitioner_multistage_levels_%" PetscInt_FMT "_", l));
420: PetscCall(PetscObjectAppendOptionsPrefix((PetscObject)p->ppart[l], aprefix));
421: } else {
422: PetscCheck(l != 0, PetscObjectComm((PetscObject)part), PETSC_ERR_USER, "Invalid first group");
423: p->lgroup[l] = MPI_GROUP_EMPTY;
424: }
425: if (lcomm != comm && lcomm != MPI_COMM_NULL) PetscCallMPI(MPI_Comm_free(&lcomm));
427: /* compute number of partitions per level and detect if a process is part of the process (at any level) */
428: if (p->compute_tpwgs) {
429: PetscMPIInt gsize;
430: PetscCall(PetscMalloc1(size, &lparts[l]));
431: PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
432: if (!l) {
433: PetscMPIInt tr;
434: PetscCallMPI(MPI_Group_translate_ranks(wgroup, 1, &rank, p->lgroup[0], &tr));
435: if (tr == MPI_UNDEFINED) gsize = 0;
436: }
437: if (touched && !gsize) gsize = 1;
438: PetscCallMPI(MPI_Allgather(&gsize, 1, MPI_INT, lparts[l], 1, MPI_INT, comm));
439: if (lparts[l][rank]) touched = PETSC_TRUE;
440: }
441: }
443: /* determine weights (bottom-up) */
444: if (p->compute_tpwgs) {
445: PetscMPIInt *tranks;
446: PetscInt *lwgts, wgt;
447: MPI_Op MPIU_LCM; /* XXX this is actually recreated at every setup */
449: PetscCall(PetscMalloc1((2 * size), &tranks));
451: /* we need to compute the least common multiple across processes */
452: PetscCallMPI(MPI_Op_create(PetscLCM_Local, 1, &MPIU_LCM));
454: /* final target has to have all ones as weights (if the process gets touched) */
455: wgt = touched ? 1 : 0;
456: PetscCall(PetscMalloc1(size, &lwgts));
457: PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
459: /* now go up the hierarchy and populate the PetscSection describing the partition weights */
460: for (PetscInt l = p->levels - 1; l >= 0; l--) {
461: MPI_Comm pcomm;
462: MPI_Group igroup;
463: PetscMPIInt isize, isizer = 0;
464: PetscInt a, b, wgtr;
465: PetscMPIInt gsize;
466: PetscBool usep = PETSC_FALSE;
468: if (p->ppart[l]) {
469: PetscCall(PetscObjectGetComm((PetscObject)p->ppart[l], &pcomm));
470: } else {
471: pcomm = PETSC_COMM_SELF;
472: }
474: PetscCallMPI(MPI_Group_size(p->lgroup[l], &gsize));
475: if (gsize) {
476: usep = PETSC_TRUE;
477: for (PetscMPIInt i = 0; i < gsize; i++) tranks[i] = i;
478: PetscCallMPI(MPI_Group_translate_ranks(p->lgroup[l], gsize, tranks, wgroup, tranks + size));
479: } else gsize = lparts[l][rank];
480: PetscCall(PetscFree(lparts[l]));
481: PetscCall(PetscSectionCreate(pcomm, &p->tpwgs[l]));
482: PetscCall(PetscObjectSetName((PetscObject)p->tpwgs[l], "Target partition weights"));
483: PetscCall(PetscSectionSetChart(p->tpwgs[l], 0, gsize));
485: if (usep) {
486: PetscMPIInt *tr = tranks + size;
487: for (PetscMPIInt i = 0; i < gsize; i++) PetscCall(PetscSectionSetDof(p->tpwgs[l], i, lwgts[tr[i]]));
488: } else if (gsize) {
489: PetscCall(PetscSectionSetDof(p->tpwgs[l], 0, 1));
490: }
491: PetscCall(PetscSectionSetUp(p->tpwgs[l]));
492: if (!l) break;
494: /* determine number of processes shared by two consecutive levels */
495: PetscCallMPI(MPI_Group_intersection(p->lgroup[l], p->lgroup[l - 1], &igroup));
496: PetscCallMPI(MPI_Group_size(igroup, &isize));
497: PetscCallMPI(MPI_Group_free(&igroup));
498: if (!p->ppart[l] && touched) isize = 1; /* if this process gets touched, needs to propagate its data from one level to the other */
500: /* reduce on level partitioner comm the size of the max size of the igroup */
501: PetscCallMPI(MPIU_Allreduce(&isize, &isizer, 1, MPI_INT, MPI_MAX, pcomm));
503: /* sum previously computed partition weights on the level comm */
504: wgt = lwgts[rank];
505: wgtr = wgt;
506: PetscCallMPI(MPIU_Allreduce(&wgt, &wgtr, 1, MPIU_INT, MPI_SUM, pcomm));
508: /* partition weights are given with integers; to properly compute these and be able to propagate them to the next level,
509: we need to compute the least common multiple of isizer across the global comm */
510: a = isizer ? isizer : 1;
511: b = a;
512: PetscCallMPI(MPIU_Allreduce(&a, &b, 1, MPIU_INT, MPIU_LCM, comm));
514: /* finally share this process weight with all the other processes */
515: wgt = isizer ? (b * wgtr) / isizer : 0;
516: PetscCallMPI(MPI_Allgather(&wgt, 1, MPIU_INT, lwgts, 1, MPIU_INT, comm));
517: }
518: PetscCall(PetscFree(lwgts));
519: PetscCall(PetscFree(tranks));
520: PetscCall(PetscFree(lparts));
521: PetscCallMPI(MPI_Op_free(&MPIU_LCM));
522: }
523: PetscCallMPI(MPI_Group_free(&wgroup));
524: PetscCall(PetscLogEventEnd(PetscPartitioner_MS_SetUp, part, 0, 0, 0));
525: PetscFunctionReturn(PETSC_SUCCESS);
526: }
528: PetscErrorCode PetscPartitionerMultistageGetStages_Multistage(PetscPartitioner part, PetscInt *levels, MPI_Group *lgroup[])
529: {
530: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
532: PetscFunctionBegin;
533: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
535: if (levels) *levels = p->levels;
536: if (lgroup) *lgroup = p->lgroup;
537: PetscFunctionReturn(PETSC_SUCCESS);
538: }
540: PetscErrorCode PetscPartitionerMultistageSetStage_Multistage(PetscPartitioner part, PetscInt stage, PetscObject odm)
541: {
542: DM dm = (DM)odm;
543: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
545: PetscFunctionBegin;
546: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
547: PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
548: PetscCheck(stage >= 0 && stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", stage, p->levels);
550: p->stage = stage;
551: PetscCall(PetscObjectReference((PetscObject)dm));
552: PetscCall(DMDestroy(&p->stagedm));
553: p->stagedm = dm;
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: PetscErrorCode PetscPartitionerMultistageGetStage_Multistage(PetscPartitioner part, PetscInt *stage, PetscObject *odm)
558: {
559: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
561: PetscFunctionBegin;
562: PetscCheckTypeName(part, PETSCPARTITIONERMULTISTAGE);
563: if (stage) *stage = p->stage;
564: if (odm) *odm = (PetscObject)p->stagedm;
565: PetscFunctionReturn(PETSC_SUCCESS);
566: }
568: static PetscErrorCode PetscPartitionerSetUp_Multistage(PetscPartitioner part)
569: {
570: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
571: MPI_Comm comm = PetscObjectComm((PetscObject)part);
572: PetscInt nstages;
573: MPI_Group *groups;
575: PetscFunctionBegin;
576: if (p->levels) PetscFunctionReturn(PETSC_SUCCESS);
577: PetscCall(PetscPartitionerMultistage_CreateStages(comm, NULL, &nstages, &groups));
578: PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
579: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
580: PetscFunctionReturn(PETSC_SUCCESS);
581: }
583: /* targetSection argument unused, target partition weights are computed internally */
584: static PetscErrorCode PetscPartitionerPartition_Multistage(PetscPartitioner part, PetscInt nparts, PetscInt numVertices, PetscInt start[], PetscInt adjacency[], PetscSection vertSection, PetscSection edgeSection, PETSC_UNUSED PetscSection targetSection, PetscSection partSection, IS *partition)
585: {
586: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
587: PetscPartitioner ppart;
588: PetscSection ppartSection = NULL;
589: IS lpartition;
590: const PetscInt *idxs;
591: MPI_Comm comm, pcomm;
592: MPI_Group group, lgroup, pgroup;
593: PetscInt *pstart, *padjacency;
594: PetscInt nid, i, pedgeCut;
595: PetscMPIInt sameComm, pparts;
596: PetscBool freeadj = PETSC_FALSE;
598: PetscFunctionBegin;
599: PetscCheck(p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ORDER, "Number of stages not set yet");
600: PetscCheck(p->stage >= 0 && p->stage < p->levels, PetscObjectComm((PetscObject)part), PETSC_ERR_ARG_OUTOFRANGE, "Invalid stage index %" PetscInt_FMT ", not in [0,%" PetscInt_FMT ")", p->stage, p->levels);
601: PetscCall(PetscLogEventBegin(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
603: /* Group for current stage (size of the group defines number of "local" partitions) */
604: lgroup = p->lgroup[p->stage];
605: PetscCallMPI(MPI_Group_size(lgroup, &pparts));
607: /* Current stage partitioner */
608: ppart = p->ppart[p->stage];
609: if (ppart) {
610: PetscCall(PetscObjectGetComm((PetscObject)ppart, &pcomm));
611: PetscCallMPI(MPI_Comm_group(pcomm, &pgroup));
612: } else {
613: pcomm = PETSC_COMM_SELF;
614: pgroup = MPI_GROUP_EMPTY;
615: pparts = -1;
616: }
618: /* Global comm of partitioner */
619: PetscCall(PetscObjectGetComm((PetscObject)part, &comm));
620: PetscCallMPI(MPI_Comm_group(comm, &group));
622: /* Get adjacency */
623: PetscCallMPI(MPI_Group_compare(group, pgroup, &sameComm));
624: if (sameComm != MPI_UNEQUAL) {
625: pstart = start;
626: padjacency = adjacency;
627: } else { /* restrict to partitioner comm */
628: ISLocalToGlobalMapping l2g, g2l;
629: IS gid, rid;
630: const PetscInt *idxs1;
631: PetscInt *idxs2;
632: PetscInt cStart, cEnd, cum;
633: DM dm = p->stagedm;
634: PetscSF sf;
636: PetscCheck(dm, PetscObjectComm((PetscObject)part), PETSC_ERR_PLIB, "Missing stage DM");
637: PetscCall(DMGetPointSF(dm, &sf));
638: PetscCall(DMPlexGetHeightStratum(dm, part->height, &cStart, &cEnd));
639: PetscCall(DMPlexCreateNumbering_Plex(dm, cStart, cEnd, part->height, NULL, sf, &gid));
640: /* filter overlapped local cells (if any) */
641: PetscCall(ISGetIndices(gid, &idxs1));
642: PetscCall(ISGetLocalSize(gid, &cum));
643: PetscCall(PetscMalloc1(cum, &idxs2));
644: for (i = cStart, cum = 0; i < cEnd; i++) {
645: if (idxs1[i - cStart] < 0) continue;
646: idxs2[cum++] = idxs1[i - cStart];
647: }
648: PetscCall(ISRestoreIndices(gid, &idxs1));
650: PetscCheck(numVertices == cum, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected %" PetscInt_FMT " != %" PetscInt_FMT, numVertices, cum);
651: PetscCall(ISDestroy(&gid));
653: /*
654: g2l from full numbering to local numbering
655: l2g from local numbering to restricted numbering
656: */
657: PetscCall(ISCreateGeneral(pcomm, numVertices, idxs2, PETSC_OWN_POINTER, &gid));
658: PetscCall(ISRenumber(gid, NULL, NULL, &rid));
659: PetscCall(ISLocalToGlobalMappingCreateIS(gid, &g2l));
660: PetscCall(ISLocalToGlobalMappingSetType(g2l, ISLOCALTOGLOBALMAPPINGHASH));
661: PetscCall(ISLocalToGlobalMappingCreateIS(rid, &l2g));
662: PetscCall(ISDestroy(&gid));
663: PetscCall(ISDestroy(&rid));
665: PetscCall(PetscMalloc2(numVertices + 1, &pstart, start[numVertices], &padjacency));
666: pstart[0] = 0;
667: for (i = 0; i < numVertices; i++) {
668: PetscCall(ISGlobalToLocalMappingApply(g2l, IS_GTOLM_DROP, start[i + 1] - start[i], adjacency + start[i], &pstart[i + 1], padjacency + pstart[i]));
669: PetscCall(ISLocalToGlobalMappingApply(l2g, pstart[i + 1], padjacency + pstart[i], padjacency + pstart[i]));
670: pstart[i + 1] += pstart[i];
671: }
672: PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
673: PetscCall(ISLocalToGlobalMappingDestroy(&g2l));
675: freeadj = PETSC_TRUE;
676: }
678: /* Compute partitioning */
679: pedgeCut = 0;
680: PetscCall(PetscSectionCreate(pcomm, &ppartSection));
681: if (ppart) {
682: PetscMPIInt prank;
684: PetscCheck(ppart->ops->partition, PetscObjectComm((PetscObject)ppart), PETSC_ERR_ARG_WRONGSTATE, "PetscPartitioner has no partitioning method on stage %" PetscInt_FMT, p->stage);
685: PetscCall(PetscPartitionerPartition(ppart, pparts, numVertices, pstart, padjacency, vertSection, edgeSection, p->tpwgs[p->stage], ppartSection, &lpartition));
686: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ppart), &prank));
687: if (!prank) pedgeCut = ppart->edgeCut; /* only the master rank will reduce */
688: } else { /* not participating */
689: PetscCall(ISCreateStride(PETSC_COMM_SELF, numVertices, 0, 1, &lpartition));
690: pparts = numVertices > 0 ? 1 : 0;
691: }
692: if (freeadj) PetscCall(PetscFree2(pstart, padjacency));
694: /* Init final partition (output) */
695: PetscCall(PetscSectionReset(partSection));
696: PetscCall(PetscSectionSetChart(partSection, 0, nparts));
698: /* We need to map the section back to the global comm numbering */
699: for (i = 0; i < pparts; i++) {
700: PetscInt dof;
701: PetscMPIInt mp, mpt;
703: if (lgroup != MPI_GROUP_EMPTY) {
704: PetscCall(PetscMPIIntCast(i, &mp));
705: PetscCall(PetscSectionGetDof(ppartSection, i, &dof));
706: PetscCallMPI(MPI_Group_translate_ranks(lgroup, 1, &mp, group, &mpt));
707: } else {
708: PetscCheck(pparts == 1, PETSC_COMM_SELF, PETSC_ERR_LIB, "Unexpected pparts %d", pparts);
709: PetscCallMPI(MPI_Comm_rank(comm, &mpt));
710: PetscCall(ISGetLocalSize(lpartition, &dof));
711: }
712: PetscCall(PetscSectionSetDof(partSection, mpt, dof));
713: }
714: PetscCall(PetscSectionSetUp(partSection));
715: PetscCall(PetscSectionDestroy(&ppartSection));
717: /* No need to translate the "partition" output, as it is in local cell numbering
718: we only change the comm of the index set */
719: PetscCall(ISGetIndices(lpartition, &idxs));
720: PetscCall(ISGetLocalSize(lpartition, &nid));
721: PetscCall(ISCreateGeneral(comm, nid, idxs, PETSC_COPY_VALUES, partition));
722: PetscCall(ISRestoreIndices(lpartition, &idxs));
723: PetscCall(ISDestroy(&lpartition));
725: PetscCall(PetscSectionViewFromOptions(partSection, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
726: PetscCall(ISViewFromOptions(*partition, (PetscObject)part, "-petscpartitioner_multistage_partition_view"));
728: /* Update edge-cut */
729: p->edgeCut[p->stage] = pedgeCut;
730: for (i = p->stage + 1; i < p->levels; i++) p->edgeCut[i] = 0;
731: for (i = 0; i < p->stage; i++) pedgeCut += p->edgeCut[i];
732: part->edgeCut = -1;
733: PetscCallMPI(MPI_Reduce(&pedgeCut, &part->edgeCut, 1, MPIU_INT, MPI_SUM, 0, comm));
735: PetscCallMPI(MPI_Group_free(&group));
736: if (pgroup != MPI_GROUP_EMPTY) PetscCallMPI(MPI_Group_free(&pgroup));
737: PetscCall(PetscLogEventEnd(PetscPartitioner_MS_Stage[PetscMin(p->stage, PETSCPARTITIONER_MS_MAXSTAGE)], part, 0, 0, 0));
738: PetscFunctionReturn(PETSC_SUCCESS);
739: }
741: static PetscErrorCode PetscPartitionerSetFromOptions_Multistage(PetscPartitioner part, PetscOptionItems PetscOptionsObject)
742: {
743: PetscPartitioner_MS *p = (PetscPartitioner_MS *)part->data;
744: PetscEnum strategy = (PetscEnum)PETSCPARTITIONER_MS_STRATEGY_NODE;
745: PetscBool set, roundrobin;
746: PetscInt options[3] = {PETSC_DETERMINE, PETSC_DETERMINE, PETSC_DETERMINE};
748: PetscFunctionBegin;
749: PetscOptionsHeadBegin(PetscOptionsObject, "PetscPartitioner Multistate Options");
750: PetscCall(PetscOptionsEnum("-petscpartitioner_multistage_strategy", "Default partitioning strategy", "", PetscPartitionerMultistageStrategyList, strategy, &strategy, &set));
751: if (set || !p->levels) {
752: options[0] = (PetscInt)strategy;
753: switch (options[0]) {
754: case PETSCPARTITIONER_MS_STRATEGY_NODE:
755: options[1] = PETSC_DETERMINE;
756: roundrobin = PETSC_FALSE;
757: PetscCall(PetscOptionsInt("-petscpartitioner_multistage_node_size", "Number of processes per node", "", options[1], &options[1], NULL));
758: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_node_interleaved", "Use round robin rank assignments", "", roundrobin, &roundrobin, NULL));
759: options[2] = (PetscInt)roundrobin;
760: break;
761: case PETSCPARTITIONER_MS_STRATEGY_MSECTION:
762: options[1] = 2;
763: PetscCall(PetscOptionsInt("-petscpartitioner_multistage_msection", "Number of splits per level", "", options[1], &options[1], NULL));
764: break;
765: default:
766: break; /* LCOV_EXCL_LINE */
767: }
768: }
769: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_tpwgts", "Use target partition weights in stage partitioners", "", p->compute_tpwgs, &p->compute_tpwgs, NULL));
770: PetscCall(PetscOptionsBool("-petscpartitioner_multistage_viewtpwgts", "View target partition weights", "", p->view_tpwgs, &p->view_tpwgs, NULL));
771: PetscOptionsHeadEnd();
773: if (options[0] != PETSC_DETERMINE) {
774: MPI_Comm comm = PetscObjectComm((PetscObject)part);
775: PetscInt nstages;
776: MPI_Group *groups;
778: PetscCall(PetscPartitionerMultistage_CreateStages(comm, options, &nstages, &groups));
779: PetscCall(PetscPartitionerMultistageSetStages(part, nstages, groups));
780: PetscCall(PetscPartitionerMultistage_DestroyStages(nstages, &groups));
781: }
783: {
784: PetscInt nstages = p->levels, l;
785: for (l = 0; l < nstages; l++) {
786: if (p->ppart[l]) PetscCall(PetscPartitionerSetFromOptions(p->ppart[l]));
787: }
788: }
789: PetscFunctionReturn(PETSC_SUCCESS);
790: }
792: /*MC
793: PETSCPARTITIONERMULTISTAGE = "multistage" - A PetscPartitioner object using a multistage distribution strategy
795: Level: intermediate
797: Options Database Keys:
798: + -petscpartitioner_multistage_strategy <strategy> - Either `PETSCPARTITIONER_MS_STRATEGY_NODE`, or `PETSCPARTITIONER_MS_STRATEGY_MSECTION`
799: . -petscpartitioner_multistage_node_size <int> - Number of processes per computing node (or `PETSC_DECIDE`)
800: . -petscpartitioner_multistage_node_interleaved <bool> - Assign ranks round-robin.
801: . -petscpartitioner_multistage_msection <int> - Number of splits per level in recursive m-section splits (use `2` for bisection)
802: - -petscpartitioner_multistage_tpwgts <bool> - Use target partition weights in stage partitioner
804: Notes:
805: The default multistage strategy use `PETSCPARTITIONER_MS_STRATEGY_NODE` and automatically discovers node information using `MPI_Comm_split_type`.
806: `PETSCPARTITIONER_MS_STRATEGY_MSECTION` is more for testing purposes.
807: Options for single stage partitioners are prefixed by `-petscpartitioner_multistage_levels_`.
808: For example, to use parmetis in all stages, `-petscpartitioner_multistage_levels_petscpartitioner_type parmetis`
809: Finer grained control is also possible: `-petscpartitioner_multistage_levels_0_petscpartitioner_type parmetis`, `-petscpartitioner_multistage_levels_1_petscpartitioner_type simple`
811: .seealso: `PetscPartitionerType`, `PetscPartitionerCreate()`, `PetscPartitionerSetType()`
812: M*/
814: PETSC_EXTERN PetscErrorCode PetscPartitionerCreate_Multistage(PetscPartitioner part)
815: {
816: PetscPartitioner_MS *p;
818: PetscFunctionBegin;
820: PetscCall(PetscNew(&p));
821: p->compute_tpwgs = PETSC_TRUE;
823: part->data = p;
824: part->ops->view = PetscPartitionerView_Multistage;
825: part->ops->destroy = PetscPartitionerDestroy_Multistage;
826: part->ops->partition = PetscPartitionerPartition_Multistage;
827: part->ops->setfromoptions = PetscPartitionerSetFromOptions_Multistage;
828: part->ops->setup = PetscPartitionerSetUp_Multistage;
830: PetscCall(PetscCitationsRegister(MSPartitionerCitation, &MSPartitionerCite));
831: PetscFunctionReturn(PETSC_SUCCESS);
832: }