Actual source code: agg.c
1: /*
2: GAMG geometric-algebric multigrid PC - Mark Adams 2011
3: */
5: #include <../src/ksp/pc/impls/gamg/gamg.h>
6: #include <petscblaslapack.h>
7: #include <petscdm.h>
8: #include <petsc/private/kspimpl.h>
10: typedef struct {
11: PetscInt nsmooths; // number of smoothing steps to construct prolongation
12: PetscInt aggressive_coarsening_levels; // number of aggressive coarsening levels (square or MISk)
13: PetscInt aggressive_mis_k; // the k in MIS-k
14: PetscBool use_aggressive_square_graph;
15: PetscBool use_minimum_degree_ordering;
16: PetscBool use_low_mem_filter;
17: PetscBool graph_symmetrize;
18: MatCoarsen crs;
19: } PC_GAMG_AGG;
21: /*@
22: PCGAMGSetNSmooths - Set number of smoothing steps (1 is typical) used to construct the prolongation operator
24: Logically Collective
26: Input Parameters:
27: + pc - the preconditioner context
28: - n - the number of smooths, default is 1
30: Options Database Key:
31: . -pc_gamg_agg_nsmooths nsmooth - number of smooths
33: Level: intermediate
35: Note:
36: This is a different concept from the number smoothing steps used during the linear solution process which
37: can be set with `-mg_levels_ksp_max_it`
39: Developer Note:
40: This should be named `PCGAMGAGGSetNSmooths()`.
42: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCMG`, `PCGAMG`
43: @*/
44: PetscErrorCode PCGAMGSetNSmooths(PC pc, PetscInt n)
45: {
46: PetscFunctionBegin;
49: PetscTryMethod(pc, "PCGAMGSetNSmooths_C", (PC, PetscInt), (pc, n));
50: PetscFunctionReturn(PETSC_SUCCESS);
51: }
53: static PetscErrorCode PCGAMGSetNSmooths_AGG(PC pc, PetscInt n)
54: {
55: PC_MG *mg = (PC_MG *)pc->data;
56: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
57: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
59: PetscFunctionBegin;
60: pc_gamg_agg->nsmooths = n;
61: PetscFunctionReturn(PETSC_SUCCESS);
62: }
64: /*@
65: PCGAMGSetAggressiveLevels - Use aggressive coarsening on first n levels
67: Logically Collective
69: Input Parameters:
70: + pc - the preconditioner context
71: - n - 0, 1 or more, the default is 1
73: Options Database Key:
74: . -pc_gamg_aggressive_coarsening n - the number of coarsenings
76: Level: intermediate
78: Note:
79: By default, aggressive coarsening squares the matrix (computes $ A^T A$) before coarsening. Calling `PCGAMGSetAggressiveSquareGraph()` with a value of `PETSC_FALSE` changes the aggressive coarsening strategy to use MIS-k, see `PCGAMGMISkSetAggressive()`.
81: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
82: @*/
83: PetscErrorCode PCGAMGSetAggressiveLevels(PC pc, PetscInt n)
84: {
85: PetscFunctionBegin;
88: PetscTryMethod(pc, "PCGAMGSetAggressiveLevels_C", (PC, PetscInt), (pc, n));
89: PetscFunctionReturn(PETSC_SUCCESS);
90: }
92: /*@
93: PCGAMGMISkSetAggressive - Number (k) distance in MIS coarsening (>2 is 'aggressive')
95: Logically Collective
97: Input Parameters:
98: + pc - the preconditioner context
99: - n - 1 or more (default = 2)
101: Options Database Key:
102: . -pc_gamg_aggressive_mis_k n - the distance to use
104: Level: intermediate
106: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
107: @*/
108: PetscErrorCode PCGAMGMISkSetAggressive(PC pc, PetscInt n)
109: {
110: PetscFunctionBegin;
113: PetscTryMethod(pc, "PCGAMGMISkSetAggressive_C", (PC, PetscInt), (pc, n));
114: PetscFunctionReturn(PETSC_SUCCESS);
115: }
117: /*@
118: PCGAMGSetAggressiveSquareGraph - Use graph square, $A^T A$, for aggressive coarsening. Coarsening is slower than the alternative (MIS-2), which is faster and uses less memory
120: Logically Collective
122: Input Parameters:
123: + pc - the preconditioner context
124: - b - default true
126: Options Database Key:
127: . -pc_gamg_aggressive_square_graph (true|false) - whether to use the graph square to aggressively coarsen
129: Level: intermediate
131: Notes:
132: If `b` is `PETSC_FALSE` then MIS-k is used for aggressive coarsening, see `PCGAMGMISkSetAggressive()`
134: Squaring the matrix to perform the aggressive coarsening is slower and requires more memory than using MIS-k, but may result in a better preconditioner
135: that converges faster.
137: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGMISkSetAggressive()`, `PCGAMGMISkSetMinDegreeOrdering()`, `PCGAMGSetLowMemoryFilter()`
138: @*/
139: PetscErrorCode PCGAMGSetAggressiveSquareGraph(PC pc, PetscBool b)
140: {
141: PetscFunctionBegin;
144: PetscTryMethod(pc, "PCGAMGSetAggressiveSquareGraph_C", (PC, PetscBool), (pc, b));
145: PetscFunctionReturn(PETSC_SUCCESS);
146: }
148: /*@
149: PCGAMGMISkSetMinDegreeOrdering - Use minimum degree ordering in greedy MIS algorithm
151: Logically Collective
153: Input Parameters:
154: + pc - the preconditioner context
155: - b - default false
157: Options Database Key:
158: . -pc_gamg_mis_k_minimum_degree_ordering (true|false) - use the minimum degree ordering
160: Level: intermediate
162: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGSetLowMemoryFilter()`
163: @*/
164: PetscErrorCode PCGAMGMISkSetMinDegreeOrdering(PC pc, PetscBool b)
165: {
166: PetscFunctionBegin;
169: PetscTryMethod(pc, "PCGAMGMISkSetMinDegreeOrdering_C", (PC, PetscBool), (pc, b));
170: PetscFunctionReturn(PETSC_SUCCESS);
171: }
173: /*@
174: PCGAMGSetLowMemoryFilter - Use low memory graph/matrix filter
176: Logically Collective
178: Input Parameters:
179: + pc - the preconditioner context
180: - b - default false
182: Options Database Key:
183: . -pc_gamg_low_memory_threshold_filter (true|false) - use the low memory filter
185: Level: intermediate
187: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`,
188: `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`
189: @*/
190: PetscErrorCode PCGAMGSetLowMemoryFilter(PC pc, PetscBool b)
191: {
192: PetscFunctionBegin;
195: PetscTryMethod(pc, "PCGAMGSetLowMemoryFilter_C", (PC, PetscBool), (pc, b));
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: /*@
200: PCGAMGSetGraphSymmetrize - Symmetrize graph used for coarsening. Defaults to true, but if matrix has symmetric attribute, then not needed since the graph is already known to be symmetric
202: Logically Collective
204: Input Parameters:
205: + pc - the preconditioner context
206: - b - default true
208: Options Database Key:
209: . -pc_gamg_graph_symmetrize (true|false) - symmetrize the graph
211: Level: intermediate
213: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), `PCGAMG`, `PCGAMGSetThreshold()`, `PCGAMGSetAggressiveLevels()`, `MatCreateGraph()`,
214: `PCGAMGMISkSetAggressive()`, `PCGAMGSetAggressiveSquareGraph()`, `PCGAMGMISkSetMinDegreeOrdering()`
215: @*/
216: PetscErrorCode PCGAMGSetGraphSymmetrize(PC pc, PetscBool b)
217: {
218: PetscFunctionBegin;
221: PetscTryMethod(pc, "PCGAMGSetGraphSymmetrize_C", (PC, PetscBool), (pc, b));
222: PetscFunctionReturn(PETSC_SUCCESS);
223: }
225: /*@
226: PCGAMGSetProlongatorFilter - Set threshold for filtering small entries from the prolongator (a kernel-preserving correction is applied afterward)
228: Logically Collective
230: Input Parameters:
231: + pc - the preconditioner context
232: - thr - threshold value; entries with absolute value below this are dropped (0 disables filtering)
234: Options Database Key:
235: . -pc_gamg_prolongator_filter thr - threshold for filtering small entries from prolongator (0=disabled, 0.0025=typical)
237: Level: intermediate
239: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGGetProlongatorFilter()`, `PCGAMGSetLowMemoryFilter()`
240: @*/
241: PetscErrorCode PCGAMGSetProlongatorFilter(PC pc, PetscReal thr)
242: {
243: PetscFunctionBegin;
246: PetscTryMethod(pc, "PCGAMGSetProlongatorFilter_C", (PC, PetscReal), (pc, thr));
247: PetscFunctionReturn(PETSC_SUCCESS);
248: }
250: /*@
251: PCGAMGGetProlongatorFilter - Get threshold for filtering small entries from the prolongator
253: Not Collective
255: Input Parameter:
256: . pc - the preconditioner context
258: Output Parameter:
259: . thr - threshold value; entries with absolute value below this are dropped (0 disables filtering)
261: Level: intermediate
263: .seealso: [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCGAMG`, `PCGAMGSetProlongatorFilter()`, `PCGAMGSetLowMemoryFilter()`
264: @*/
265: PetscErrorCode PCGAMGGetProlongatorFilter(PC pc, PetscReal *thr)
266: {
267: PetscFunctionBegin;
269: PetscAssertPointer(thr, 2);
270: PetscUseMethod(pc, "PCGAMGGetProlongatorFilter_C", (PC, PetscReal *), (pc, thr));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: static PetscErrorCode PCGAMGSetAggressiveLevels_AGG(PC pc, PetscInt n)
275: {
276: PC_MG *mg = (PC_MG *)pc->data;
277: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
278: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
280: PetscFunctionBegin;
281: pc_gamg_agg->aggressive_coarsening_levels = n;
282: PetscFunctionReturn(PETSC_SUCCESS);
283: }
285: static PetscErrorCode PCGAMGMISkSetAggressive_AGG(PC pc, PetscInt n)
286: {
287: PC_MG *mg = (PC_MG *)pc->data;
288: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
289: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
291: PetscFunctionBegin;
292: pc_gamg_agg->aggressive_mis_k = n;
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: static PetscErrorCode PCGAMGSetAggressiveSquareGraph_AGG(PC pc, PetscBool b)
297: {
298: PC_MG *mg = (PC_MG *)pc->data;
299: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
300: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
302: PetscFunctionBegin;
303: pc_gamg_agg->use_aggressive_square_graph = b;
304: PetscFunctionReturn(PETSC_SUCCESS);
305: }
307: static PetscErrorCode PCGAMGSetLowMemoryFilter_AGG(PC pc, PetscBool b)
308: {
309: PC_MG *mg = (PC_MG *)pc->data;
310: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
311: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
313: PetscFunctionBegin;
314: pc_gamg_agg->use_low_mem_filter = b;
315: PetscFunctionReturn(PETSC_SUCCESS);
316: }
318: static PetscErrorCode PCGAMGSetGraphSymmetrize_AGG(PC pc, PetscBool b)
319: {
320: PC_MG *mg = (PC_MG *)pc->data;
321: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
322: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
324: PetscFunctionBegin;
325: pc_gamg_agg->graph_symmetrize = b;
326: PetscFunctionReturn(PETSC_SUCCESS);
327: }
329: static PetscErrorCode PCGAMGMISkSetMinDegreeOrdering_AGG(PC pc, PetscBool b)
330: {
331: PC_MG *mg = (PC_MG *)pc->data;
332: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
333: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
335: PetscFunctionBegin;
336: pc_gamg_agg->use_minimum_degree_ordering = b;
337: PetscFunctionReturn(PETSC_SUCCESS);
338: }
340: static PetscErrorCode PCGAMGSetProlongatorFilter_AGG(PC pc, PetscReal thr)
341: {
342: PC_MG *mg = (PC_MG *)pc->data;
343: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
345: PetscFunctionBegin;
346: PetscCheck(thr >= 0.0, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_OUTOFRANGE, "Filter threshold %g must be non-negative", (double)thr);
347: pc_gamg->prolongator_filter = thr;
348: PetscFunctionReturn(PETSC_SUCCESS);
349: }
351: static PetscErrorCode PCGAMGGetProlongatorFilter_AGG(PC pc, PetscReal *thr)
352: {
353: PC_MG *mg = (PC_MG *)pc->data;
354: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
356: PetscFunctionBegin;
357: *thr = pc_gamg->prolongator_filter;
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: static PetscErrorCode PCSetFromOptions_GAMG_AGG(PC pc, PetscOptionItems PetscOptionsObject)
362: {
363: PC_MG *mg = (PC_MG *)pc->data;
364: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
365: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
366: PetscBool n_aggressive_flg, old_sq_provided = PETSC_FALSE, new_sq_provided = PETSC_FALSE, new_sqr_graph = pc_gamg_agg->use_aggressive_square_graph;
367: PetscInt nsq_graph_old = 0;
368: PetscReal thr = pc_gamg->prolongator_filter;
369: PetscBool flg;
371: PetscFunctionBegin;
372: PetscOptionsHeadBegin(PetscOptionsObject, "GAMG-AGG options");
373: PetscCall(PetscOptionsInt("-pc_gamg_agg_nsmooths", "number of smoothing steps to construct prolongation, usually 1", "PCGAMGSetNSmooths", pc_gamg_agg->nsmooths, &pc_gamg_agg->nsmooths, NULL));
374: // aggressive coarsening logic with deprecated -pc_gamg_square_graph
375: PetscCall(PetscOptionsInt("-pc_gamg_aggressive_coarsening", "Number of aggressive coarsening (MIS-2) levels from finest", "PCGAMGSetAggressiveLevels", pc_gamg_agg->aggressive_coarsening_levels, &pc_gamg_agg->aggressive_coarsening_levels, &n_aggressive_flg));
376: if (!n_aggressive_flg)
377: PetscCall(PetscOptionsInt("-pc_gamg_square_graph", "Number of aggressive coarsening (MIS-2) levels from finest (deprecated alias for -pc_gamg_aggressive_coarsening)", "PCGAMGSetAggressiveLevels", nsq_graph_old, &nsq_graph_old, &old_sq_provided));
378: PetscCall(PetscOptionsBool("-pc_gamg_aggressive_square_graph", "Use square graph $ (A^T A)$ for aggressive coarsening, if false, MIS-k (k=2) is used, see PCGAMGMISkSetAggressive()", "PCGAMGSetAggressiveSquareGraph", new_sqr_graph, &pc_gamg_agg->use_aggressive_square_graph, &new_sq_provided));
379: if (!new_sq_provided && old_sq_provided) {
380: pc_gamg_agg->aggressive_coarsening_levels = nsq_graph_old; // could be zero
381: pc_gamg_agg->use_aggressive_square_graph = PETSC_TRUE;
382: }
383: if (new_sq_provided && old_sq_provided)
384: PetscCall(PetscInfo(pc, "Warning: both -pc_gamg_square_graph and -pc_gamg_aggressive_coarsening are used. -pc_gamg_square_graph is deprecated, Number of aggressive levels is %" PetscInt_FMT "\n", pc_gamg_agg->aggressive_coarsening_levels));
385: PetscCall(PetscOptionsBool("-pc_gamg_mis_k_minimum_degree_ordering", "Use minimum degree ordering for greedy MIS", "PCGAMGMISkSetMinDegreeOrdering", pc_gamg_agg->use_minimum_degree_ordering, &pc_gamg_agg->use_minimum_degree_ordering, NULL));
386: PetscCall(PetscOptionsBool("-pc_gamg_low_memory_threshold_filter", "Use the (built-in) low memory graph/matrix filter", "PCGAMGSetLowMemoryFilter", pc_gamg_agg->use_low_mem_filter, &pc_gamg_agg->use_low_mem_filter, NULL));
387: PetscCall(PetscOptionsInt("-pc_gamg_aggressive_mis_k", "Number of levels of multigrid to use.", "PCGAMGMISkSetAggressive", pc_gamg_agg->aggressive_mis_k, &pc_gamg_agg->aggressive_mis_k, NULL));
388: PetscCall(PetscOptionsBool("-pc_gamg_graph_symmetrize", "Symmetrize graph for coarsening", "PCGAMGSetGraphSymmetrize", pc_gamg_agg->graph_symmetrize, &pc_gamg_agg->graph_symmetrize, NULL));
389: PetscCall(PetscOptionsReal("-pc_gamg_prolongator_filter", "Threshold for filtering small entries from prolongator (0=disabled)", "PCGAMGSetProlongatorFilter", thr, &thr, &flg));
390: if (flg) PetscCall(PCGAMGSetProlongatorFilter(pc, thr));
392: PetscOptionsHeadEnd();
393: PetscFunctionReturn(PETSC_SUCCESS);
394: }
396: static PetscErrorCode PCDestroy_GAMG_AGG(PC pc)
397: {
398: PC_MG *mg = (PC_MG *)pc->data;
399: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
400: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
402: PetscFunctionBegin;
403: PetscCall(MatCoarsenDestroy(&pc_gamg_agg->crs));
404: PetscCall(PetscFree(pc_gamg->subctx));
405: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetNSmooths_C", NULL));
406: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveLevels_C", NULL));
407: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetAggressive_C", NULL));
408: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetMinDegreeOrdering_C", NULL));
409: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetLowMemoryFilter_C", NULL));
410: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveSquareGraph_C", NULL));
411: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetGraphSymmetrize_C", NULL));
412: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetProlongatorFilter_C", NULL));
413: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGGetProlongatorFilter_C", NULL));
414: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", NULL));
415: PetscFunctionReturn(PETSC_SUCCESS);
416: }
418: /*
419: PCSetCoordinates_AGG
421: Collective
423: Input Parameter:
424: . pc - the preconditioner context
425: . ndm - dimension of data (used for dof/vertex for Stokes)
426: . a_nloc - number of vertices local
427: . coords - [a_nloc][ndm] - interleaved coordinate data: {x_0, y_0, z_0, x_1, y_1, ...}
428: */
430: static PetscErrorCode PCSetCoordinates_AGG(PC pc, PetscInt ndm, PetscInt a_nloc, PetscReal *coords)
431: {
432: PC_MG *mg = (PC_MG *)pc->data;
433: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
434: PetscInt arrsz, kk, ii, jj, nloc, ndatarows, ndf;
435: Mat mat = pc->pmat;
437: PetscFunctionBegin;
440: nloc = a_nloc;
442: /* SA: null space vectors */
443: PetscCall(MatGetBlockSize(mat, &ndf)); /* this does not work for Stokes */
444: if (coords && ndf == 1) pc_gamg->data_cell_cols = 1; /* scalar w/ coords and SA (not needed) */
445: else if (coords) {
446: PetscCheck(ndm <= ndf, PETSC_COMM_SELF, PETSC_ERR_PLIB, "degrees of motion %" PetscInt_FMT " > block size %" PetscInt_FMT, ndm, ndf);
447: pc_gamg->data_cell_cols = (ndm == 2 ? 3 : 6); /* displacement elasticity */
448: if (ndm != ndf) PetscCheck(pc_gamg->data_cell_cols == ndf, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Don't know how to create null space for ndm=%" PetscInt_FMT ", ndf=%" PetscInt_FMT ". Use MatSetNearNullSpace().", ndm, ndf);
449: } else pc_gamg->data_cell_cols = ndf; /* no data, force SA with constant null space vectors */
450: pc_gamg->data_cell_rows = ndatarows = ndf;
451: PetscCheck(pc_gamg->data_cell_cols > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "pc_gamg->data_cell_cols %" PetscInt_FMT " <= 0", pc_gamg->data_cell_cols);
452: arrsz = nloc * pc_gamg->data_cell_rows * pc_gamg->data_cell_cols;
454: if (!pc_gamg->data || (pc_gamg->data_sz != arrsz)) {
455: PetscCall(PetscFree(pc_gamg->data));
456: PetscCall(PetscMalloc1(arrsz + 1, &pc_gamg->data));
457: }
458: /* copy data in - column-oriented */
459: for (kk = 0; kk < nloc; kk++) {
460: const PetscInt M = nloc * pc_gamg->data_cell_rows; /* stride into data */
461: PetscReal *data = &pc_gamg->data[kk * ndatarows]; /* start of cell */
463: if (pc_gamg->data_cell_cols == 1) *data = 1.0;
464: else {
465: /* translational modes */
466: for (ii = 0; ii < ndatarows; ii++) {
467: for (jj = 0; jj < ndatarows; jj++) {
468: if (ii == jj) data[ii * M + jj] = 1.0;
469: else data[ii * M + jj] = 0.0;
470: }
471: }
473: /* rotational modes */
474: if (coords) {
475: if (ndm == 2) {
476: data += 2 * M;
477: data[0] = -coords[2 * kk + 1];
478: data[1] = coords[2 * kk];
479: } else {
480: data += 3 * M;
481: data[0] = 0.0;
482: data[M + 0] = coords[3 * kk + 2];
483: data[2 * M + 0] = -coords[3 * kk + 1];
484: data[1] = -coords[3 * kk + 2];
485: data[M + 1] = 0.0;
486: data[2 * M + 1] = coords[3 * kk];
487: data[2] = coords[3 * kk + 1];
488: data[M + 2] = -coords[3 * kk];
489: data[2 * M + 2] = 0.0;
490: }
491: }
492: }
493: }
494: pc_gamg->data_sz = arrsz;
495: PetscFunctionReturn(PETSC_SUCCESS);
496: }
498: /*
499: PCSetData_AGG - called if data is not set with PCSetCoordinates.
500: Looks in Mat for near null space.
501: Does not work for Stokes
503: Input Parameter:
504: . pc -
505: . a_A - matrix to get (near) null space out of.
506: */
507: static PetscErrorCode PCSetData_AGG(PC pc, Mat a_A)
508: {
509: PC_MG *mg = (PC_MG *)pc->data;
510: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
511: MatNullSpace mnull;
513: PetscFunctionBegin;
514: PetscCall(MatGetNearNullSpace(a_A, &mnull));
515: if (!mnull) {
516: DM dm;
518: PetscCall(PCGetDM(pc, &dm));
519: if (!dm) PetscCall(MatGetDM(a_A, &dm));
520: if (dm) {
521: PetscObject deformation;
522: PetscInt Nf;
524: PetscCall(DMGetNumFields(dm, &Nf));
525: if (Nf) {
526: PetscCall(DMGetField(dm, 0, NULL, &deformation));
527: if (deformation) {
528: PetscCall(PetscObjectQuery(deformation, "nearnullspace", (PetscObject *)&mnull));
529: if (!mnull) PetscCall(PetscObjectQuery(deformation, "nullspace", (PetscObject *)&mnull));
530: }
531: }
532: }
533: }
535: if (!mnull) {
536: PetscInt bs, NN, MM;
538: PetscCall(MatGetBlockSize(a_A, &bs));
539: PetscCall(MatGetLocalSize(a_A, &MM, &NN));
540: PetscCheck(MM % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MM %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT, MM, bs);
541: PetscCall(PCSetCoordinates_AGG(pc, bs, MM / bs, NULL));
542: } else {
543: PetscReal *nullvec;
544: PetscBool has_const;
545: PetscInt i, j, mlocal, nvec, bs;
546: const Vec *vecs;
547: const PetscScalar *v;
549: PetscCall(MatGetLocalSize(a_A, &mlocal, NULL));
550: PetscCall(MatNullSpaceGetVecs(mnull, &has_const, &nvec, &vecs));
551: for (i = 0; i < nvec; i++) {
552: PetscCall(VecGetLocalSize(vecs[i], &j));
553: PetscCheck(j == mlocal, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Attached null space vector size %" PetscInt_FMT " != matrix size %" PetscInt_FMT, j, mlocal);
554: }
555: pc_gamg->data_sz = (nvec + !!has_const) * mlocal;
556: PetscCall(PetscMalloc1((nvec + !!has_const) * mlocal, &nullvec));
557: if (has_const)
558: for (i = 0; i < mlocal; i++) nullvec[i] = 1.0;
559: for (i = 0; i < nvec; i++) {
560: PetscCall(VecGetArrayRead(vecs[i], &v));
561: for (j = 0; j < mlocal; j++) nullvec[(i + !!has_const) * mlocal + j] = PetscRealPart(v[j]);
562: PetscCall(VecRestoreArrayRead(vecs[i], &v));
563: }
564: pc_gamg->data = nullvec;
565: pc_gamg->data_cell_cols = (nvec + !!has_const);
566: PetscCall(MatGetBlockSize(a_A, &bs));
567: pc_gamg->data_cell_rows = bs;
568: }
569: PetscFunctionReturn(PETSC_SUCCESS);
570: }
572: /*
573: formProl0 - collect null space data for each aggregate, do QR, put R in coarse grid data and Q in P_0
575: Input Parameter:
576: . agg_llists - list of arrays with aggregates -- list from selected vertices of aggregate unselected vertices
577: . bs - row block size
578: . nSAvec - column bs of new P
579: . my0crs - global index of start of locals
580: . data_stride - bs*(nloc nodes + ghost nodes) [data_stride][nSAvec]
581: . data_in[data_stride*nSAvec] - local data on fine grid
582: . flid_fgid[data_stride/bs] - make local to global IDs, includes ghosts in 'locals_llist'
584: Output Parameter:
585: . a_data_out - in with fine grid data (w/ghosts), out with coarse grid data
586: . a_Prol - prolongation operator
587: */
588: static PetscErrorCode formProl0(PetscCoarsenData *agg_llists, PetscInt bs, PetscInt nSAvec, PetscInt my0crs, PetscInt data_stride, PetscReal data_in[], const PetscInt flid_fgid[], PetscReal **a_data_out, Mat a_Prol)
589: {
590: PetscInt Istart, my0, Iend, nloc, clid, flid = 0, aggID, kk, jj, ii, mm, nSelected, minsz, nghosts, out_data_stride;
591: MPI_Comm comm;
592: PetscReal *out_data;
593: PetscCDIntNd *pos;
594: PetscHMapI fgid_flid;
596: PetscFunctionBegin;
597: PetscCall(PetscObjectGetComm((PetscObject)a_Prol, &comm));
598: PetscCall(MatGetOwnershipRange(a_Prol, &Istart, &Iend));
599: nloc = (Iend - Istart) / bs;
600: my0 = Istart / bs;
601: PetscCheck((Iend - Istart) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT " must be divisible by bs %" PetscInt_FMT, Iend, Istart, bs);
602: Iend /= bs;
603: nghosts = data_stride / bs - nloc;
605: PetscCall(PetscHMapICreateWithSize(2 * nghosts + 1, &fgid_flid));
607: for (kk = 0; kk < nghosts; kk++) PetscCall(PetscHMapISet(fgid_flid, flid_fgid[nloc + kk], nloc + kk));
609: /* count selected -- same as number of cols of P */
610: for (nSelected = mm = 0; mm < nloc; mm++) {
611: PetscBool ise;
613: PetscCall(PetscCDIsEmptyAt(agg_llists, mm, &ise));
614: if (!ise) nSelected++;
615: }
616: PetscCall(MatGetOwnershipRangeColumn(a_Prol, &ii, &jj));
617: PetscCheck((ii / nSAvec) == my0crs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "ii %" PetscInt_FMT " /nSAvec %" PetscInt_FMT " != my0crs %" PetscInt_FMT, ii, nSAvec, my0crs);
618: PetscCheck(nSelected == (jj - ii) / nSAvec, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nSelected %" PetscInt_FMT " != (jj %" PetscInt_FMT " - ii %" PetscInt_FMT ")/nSAvec %" PetscInt_FMT, nSelected, jj, ii, nSAvec);
620: /* aloc space for coarse point data (output) */
621: out_data_stride = nSelected * nSAvec;
623: PetscCall(PetscMalloc1(out_data_stride * nSAvec, &out_data));
624: for (ii = 0; ii < out_data_stride * nSAvec; ii++) out_data[ii] = PETSC_MAX_REAL;
625: *a_data_out = out_data; /* output - stride nSelected*nSAvec */
627: /* find points and set prolongation */
628: minsz = 100;
629: for (mm = clid = 0; mm < nloc; mm++) {
630: PetscCall(PetscCDCountAt(agg_llists, mm, &jj));
631: if (jj > 0) {
632: const PetscInt lid = mm, cgid = my0crs + clid;
633: PetscInt cids[100]; /* max bs */
634: PetscBLASInt asz, M, N, INFO;
635: PetscBLASInt Mdata, LDA, LWORK;
636: PetscScalar *qqc, *qqr, *TAU, *WORK;
637: PetscInt *fids;
638: PetscReal *data;
640: PetscCall(PetscBLASIntCast(jj, &asz));
641: PetscCall(PetscBLASIntCast(asz * bs, &M));
642: PetscCall(PetscBLASIntCast(nSAvec, &N));
643: PetscCall(PetscBLASIntCast(M + ((N - M > 0) ? N - M : 0), &Mdata));
644: PetscCall(PetscBLASIntCast(Mdata, &LDA));
645: PetscCall(PetscBLASIntCast(N * bs, &LWORK));
646: /* count agg */
647: if (asz < minsz) minsz = asz;
649: /* get block */
650: PetscCall(PetscMalloc5(Mdata * N, &qqc, M * N, &qqr, N, &TAU, LWORK, &WORK, M, &fids));
652: aggID = 0;
653: PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
654: while (pos) {
655: PetscInt gid1;
657: PetscCall(PetscCDIntNdGetID(pos, &gid1));
658: PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
660: if (gid1 >= my0 && gid1 < Iend) flid = gid1 - my0;
661: else {
662: PetscCall(PetscHMapIGet(fgid_flid, gid1, &flid));
663: PetscCheck(flid >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot find gid1 in table");
664: }
665: /* copy in B_i matrix - column-oriented */
666: data = &data_in[flid * bs];
667: for (ii = 0; ii < bs; ii++) {
668: for (jj = 0; jj < N; jj++) {
669: PetscReal d = data[jj * data_stride + ii];
671: qqc[jj * Mdata + aggID * bs + ii] = d;
672: }
673: }
674: /* set fine IDs */
675: for (kk = 0; kk < bs; kk++) fids[aggID * bs + kk] = flid_fgid[flid] * bs + kk;
676: aggID++;
677: }
679: /* pad with zeros */
680: for (ii = asz * bs; ii < Mdata; ii++) {
681: for (jj = 0; jj < N; jj++, kk++) qqc[jj * Mdata + ii] = .0;
682: }
684: /* QR */
685: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
686: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Mdata, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
687: PetscCall(PetscFPTrapPop());
688: PetscCheck(INFO == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xGEQRF error");
689: /* get R - column-oriented - output B_{i+1} */
690: {
691: PetscReal *data = &out_data[clid * nSAvec];
693: for (jj = 0; jj < nSAvec; jj++) {
694: for (ii = 0; ii < nSAvec; ii++) {
695: PetscCheck(data[jj * out_data_stride + ii] == PETSC_MAX_REAL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "data[jj*out_data_stride + ii] != %e", (double)PETSC_MAX_REAL);
696: if (ii <= jj) data[jj * out_data_stride + ii] = PetscRealPart(qqc[jj * Mdata + ii]);
697: else data[jj * out_data_stride + ii] = 0.;
698: }
699: }
700: }
702: /* get Q - row-oriented */
703: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Mdata, &N, &N, qqc, &LDA, TAU, WORK, &LWORK, &INFO));
704: PetscCheck(INFO == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "xORGQR error arg %" PetscBLASInt_FMT, -INFO);
706: for (ii = 0; ii < M; ii++) {
707: for (jj = 0; jj < N; jj++) qqr[N * ii + jj] = qqc[jj * Mdata + ii];
708: }
710: /* add diagonal block of P0 */
711: for (kk = 0; kk < N; kk++) cids[kk] = N * cgid + kk; /* global col IDs in P0 */
712: PetscCall(MatSetValues(a_Prol, M, fids, N, cids, qqr, INSERT_VALUES));
713: PetscCall(PetscFree5(qqc, qqr, TAU, WORK, fids));
714: clid++;
715: } /* coarse agg */
716: } /* for all fine nodes */
717: PetscCall(MatAssemblyBegin(a_Prol, MAT_FINAL_ASSEMBLY));
718: PetscCall(MatAssemblyEnd(a_Prol, MAT_FINAL_ASSEMBLY));
719: PetscCall(PetscHMapIDestroy(&fgid_flid));
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }
723: static PetscErrorCode PCView_GAMG_AGG(PC pc, PetscViewer viewer)
724: {
725: PC_MG *mg = (PC_MG *)pc->data;
726: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
727: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
729: PetscFunctionBegin;
730: PetscCall(PetscViewerASCIIPrintf(viewer, " AGG specific options\n"));
731: PetscCall(PetscViewerASCIIPrintf(viewer, " Number of levels of aggressive coarsening %" PetscInt_FMT "\n", pc_gamg_agg->aggressive_coarsening_levels));
732: if (pc_gamg_agg->aggressive_coarsening_levels > 0) {
733: PetscCall(PetscViewerASCIIPrintf(viewer, " %s aggressive coarsening\n", !pc_gamg_agg->use_aggressive_square_graph ? "MIS-k" : "Square graph"));
734: if (!pc_gamg_agg->use_aggressive_square_graph) PetscCall(PetscViewerASCIIPrintf(viewer, " MIS-%" PetscInt_FMT " coarsening on aggressive levels\n", pc_gamg_agg->aggressive_mis_k));
735: }
736: PetscCall(PetscViewerASCIIPushTab(viewer));
737: PetscCall(PetscViewerASCIIPushTab(viewer));
738: PetscCall(PetscViewerASCIIPushTab(viewer));
739: PetscCall(PetscViewerASCIIPushTab(viewer));
740: if (pc_gamg_agg->crs) PetscCall(MatCoarsenView(pc_gamg_agg->crs, viewer));
741: else PetscCall(PetscViewerASCIIPrintf(viewer, "Coarsening algorithm not yet selected\n"));
742: PetscCall(PetscViewerASCIIPopTab(viewer));
743: PetscCall(PetscViewerASCIIPopTab(viewer));
744: PetscCall(PetscViewerASCIIPopTab(viewer));
745: PetscCall(PetscViewerASCIIPopTab(viewer));
746: PetscCall(PetscViewerASCIIPrintf(viewer, " Number smoothing steps to construct prolongation %" PetscInt_FMT "\n", pc_gamg_agg->nsmooths));
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
750: static PetscErrorCode PCGAMGCreateGraph_AGG(PC pc, Mat Amat, Mat *a_Gmat)
751: {
752: PC_MG *mg = (PC_MG *)pc->data;
753: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
754: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
755: const PetscReal vfilter = pc_gamg->threshold[pc_gamg->current_level];
756: PetscBool ishem, ismis;
757: const char *prefix;
758: MatInfo info0, info1;
759: PetscInt bs;
761: PetscFunctionBegin;
762: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
763: /* Note: depending on the algorithm that will be used for computing the coarse grid points this should pass PETSC_TRUE or PETSC_FALSE as the first argument */
764: /* MATCOARSENHEM requires numerical weights for edges so ensure they are computed */
765: PetscCall(MatCoarsenDestroy(&pc_gamg_agg->crs));
766: PetscCall(MatCoarsenCreate(PetscObjectComm((PetscObject)pc), &pc_gamg_agg->crs));
767: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)pc, &prefix));
768: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)pc_gamg_agg->crs, prefix));
769: PetscCall(PetscObjectAppendOptionsPrefix((PetscObject)pc_gamg_agg->crs, "pc_gamg_"));
770: PetscCall(MatCoarsenSetFromOptions(pc_gamg_agg->crs));
771: PetscCall(MatGetBlockSize(Amat, &bs));
772: // check for valid indices wrt bs
773: for (int ii = 0; ii < pc_gamg_agg->crs->strength_index_size; ii++) {
774: PetscCheck(pc_gamg_agg->crs->strength_index[ii] >= 0 && pc_gamg_agg->crs->strength_index[ii] < bs, PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Indices (%" PetscInt_FMT ") must be non-negative and < block size (%" PetscInt_FMT "), NB, can not use -mat_coarsen_strength_index with -mat_coarsen_strength_index",
775: pc_gamg_agg->crs->strength_index[ii], bs);
776: }
777: PetscCall(PetscObjectTypeCompare((PetscObject)pc_gamg_agg->crs, MATCOARSENHEM, &ishem));
778: if (ishem) {
779: if (pc_gamg_agg->aggressive_coarsening_levels) PetscCall(PetscInfo(pc, "HEM and aggressive coarsening ignored: HEM using %" PetscInt_FMT " iterations\n", pc_gamg_agg->crs->max_it));
780: pc_gamg_agg->aggressive_coarsening_levels = 0; // aggressive and HEM does not make sense
781: PetscCall(MatCoarsenSetMaximumIterations(pc_gamg_agg->crs, pc_gamg_agg->crs->max_it)); // for code coverage
782: PetscCall(MatCoarsenSetThreshold(pc_gamg_agg->crs, vfilter)); // for code coverage
783: } else {
784: PetscCall(PetscObjectTypeCompare((PetscObject)pc_gamg_agg->crs, MATCOARSENMIS, &ismis));
785: if (ismis && pc_gamg_agg->aggressive_coarsening_levels && !pc_gamg_agg->use_aggressive_square_graph) {
786: PetscCall(PetscInfo(pc, "MIS and aggressive coarsening and no square graph: force square graph\n"));
787: pc_gamg_agg->use_aggressive_square_graph = PETSC_TRUE;
788: }
789: }
790: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
791: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_GRAPH], 0, 0, 0, 0));
792: PetscCall(MatGetInfo(Amat, MAT_LOCAL, &info0)); /* global reduction */
794: if (ishem || pc_gamg_agg->use_low_mem_filter) {
795: PetscCall(MatCreateGraph(Amat, pc_gamg_agg->graph_symmetrize, (vfilter >= 0 || ishem) ? PETSC_TRUE : PETSC_FALSE, vfilter, pc_gamg_agg->crs->strength_index_size, pc_gamg_agg->crs->strength_index, a_Gmat));
796: } else {
797: // make scalar graph, symmetrize if not known to be symmetric, scale, but do not filter (expensive)
798: PetscCall(MatCreateGraph(Amat, pc_gamg_agg->graph_symmetrize, PETSC_TRUE, -1, pc_gamg_agg->crs->strength_index_size, pc_gamg_agg->crs->strength_index, a_Gmat));
799: if (vfilter >= 0) {
800: PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
801: Mat tGmat, Gmat = *a_Gmat;
802: MPI_Comm comm;
803: const PetscScalar *vals;
804: const PetscInt *idx;
805: PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
806: MatScalar *AA; // this is checked in graph
807: PetscBool isseqaij;
808: Mat a, b, c;
809: MatType jtype;
811: PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
812: PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
813: PetscCall(MatGetType(Gmat, &jtype));
814: PetscCall(MatCreate(comm, &tGmat));
815: PetscCall(MatSetType(tGmat, jtype));
817: /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
818: Also, if the matrix is symmetric, can we skip this
819: operation? It can be very expensive on large matrices. */
821: // global sizes
822: PetscCall(MatGetSize(Gmat, &MM, &NN));
823: PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
824: nloc = Iend - Istart;
825: PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
826: if (isseqaij) {
827: a = Gmat;
828: b = NULL;
829: } else {
830: Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
832: a = d->A;
833: b = d->B;
834: garray = d->garray;
835: }
836: /* Determine upper bound on non-zeros needed in new filtered matrix */
837: for (PetscInt row = 0; row < nloc; row++) {
838: PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
839: d_nnz[row] = ncols;
840: if (ncols > maxcols) maxcols = ncols;
841: PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
842: }
843: if (b) {
844: for (PetscInt row = 0; row < nloc; row++) {
845: PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
846: o_nnz[row] = ncols;
847: if (ncols > maxcols) maxcols = ncols;
848: PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
849: }
850: }
851: PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
852: PetscCall(MatSetBlockSizes(tGmat, 1, 1));
853: PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
854: PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
855: PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
856: PetscCall(PetscFree2(d_nnz, o_nnz));
857: PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
858: nnz0 = nnz1 = 0;
859: for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
860: for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
861: PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
862: for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
863: PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
864: if (PetscRealPart(sv) > vfilter) {
865: PetscInt cid = idx[jj] + Istart; //diag
867: nnz1++;
868: if (c != a) cid = garray[idx[jj]];
869: AA[ncol_row] = vals[jj];
870: AJ[ncol_row] = cid;
871: ncol_row++;
872: }
873: }
874: PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
875: PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
876: }
877: }
878: PetscCall(PetscFree2(AA, AJ));
879: PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
880: PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
881: PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
882: PetscCall(PetscInfo(pc, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %" PetscInt_FMT "\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, maxcols));
883: PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
884: PetscCall(MatDestroy(&Gmat));
885: *a_Gmat = tGmat;
886: }
887: }
889: PetscCall(MatGetInfo(*a_Gmat, MAT_LOCAL, &info1)); /* global reduction */
890: if (info0.nz_used > 0) PetscCall(PetscInfo(pc, "Filtering left %g %% edges in graph (%e %e)\n", 100.0 * info1.nz_used * (double)(bs * bs) / info0.nz_used, info0.nz_used, info1.nz_used));
891: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_GRAPH], 0, 0, 0, 0));
892: PetscFunctionReturn(PETSC_SUCCESS);
893: }
895: typedef PetscInt NState;
896: static const NState NOT_DONE = -2;
897: static const NState DELETED = -1;
898: static const NState REMOVED = -3;
899: #define IS_SELECTED(s) (s != DELETED && s != NOT_DONE && s != REMOVED)
901: /*
902: fixAggregatesWithSquare - greedy grab of with G1 (unsquared graph) -- AIJ specific -- change to fixAggregatesWithSquare -- TODD
903: - AGG-MG specific: clears singletons out of 'selected_2'
905: Input Parameter:
906: . Gmat_2 - global matrix of squared graph (data not defined)
907: . Gmat_1 - base graph to grab with base graph
908: Input/Output Parameter:
909: . aggs_2 - linked list of aggs with gids)
910: */
911: static PetscErrorCode fixAggregatesWithSquare(PC pc, Mat Gmat_2, Mat Gmat_1, PetscCoarsenData *aggs_2)
912: {
913: PetscBool isMPI;
914: Mat_SeqAIJ *matA_1, *matB_1 = NULL;
915: MPI_Comm comm;
916: PetscInt lid, *ii, *idx, ix, Iend, my0, kk, n, j;
917: Mat_MPIAIJ *mpimat_2 = NULL, *mpimat_1 = NULL;
918: const PetscInt nloc = Gmat_2->rmap->n;
919: PetscScalar *cpcol_1_state, *cpcol_2_state, *cpcol_2_par_orig, *lid_parent_gid;
920: PetscInt *lid_cprowID_1 = NULL;
921: NState *lid_state;
922: Vec ghost_par_orig2;
923: PetscMPIInt rank;
925: PetscFunctionBegin;
926: PetscCall(PetscObjectGetComm((PetscObject)Gmat_2, &comm));
927: PetscCallMPI(MPI_Comm_rank(comm, &rank));
928: PetscCall(MatGetOwnershipRange(Gmat_1, &my0, &Iend));
930: /* get submatrices */
931: PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name, MATMPIAIJ, &isMPI));
932: PetscCall(PetscInfo(pc, "isMPI = %s\n", isMPI ? "yes" : "no"));
933: PetscCall(PetscMalloc3(nloc, &lid_state, nloc, &lid_parent_gid, nloc, &lid_cprowID_1));
934: for (lid = 0; lid < nloc; lid++) lid_cprowID_1[lid] = -1;
935: if (isMPI) {
936: /* grab matrix objects */
937: mpimat_2 = (Mat_MPIAIJ *)Gmat_2->data;
938: mpimat_1 = (Mat_MPIAIJ *)Gmat_1->data;
939: matA_1 = (Mat_SeqAIJ *)mpimat_1->A->data;
940: matB_1 = (Mat_SeqAIJ *)mpimat_1->B->data;
942: /* force compressed row storage for B matrix in AuxMat */
943: PetscCall(MatCheckCompressedRow(mpimat_1->B, matB_1->nonzerorowcnt, &matB_1->compressedrow, matB_1->i, Gmat_1->rmap->n, -1.0));
944: for (ix = 0; ix < matB_1->compressedrow.nrows; ix++) {
945: PetscInt lid = matB_1->compressedrow.rindex[ix];
947: PetscCheck(lid <= nloc && lid >= -1, PETSC_COMM_SELF, PETSC_ERR_USER, "lid %" PetscInt_FMT " out of range. nloc = %" PetscInt_FMT, lid, nloc);
948: if (lid != -1) lid_cprowID_1[lid] = ix;
949: }
950: } else {
951: PetscBool isAIJ;
953: PetscCall(PetscStrbeginswith(((PetscObject)Gmat_1)->type_name, MATSEQAIJ, &isAIJ));
954: PetscCheck(isAIJ, PETSC_COMM_SELF, PETSC_ERR_USER, "Require AIJ matrix.");
955: matA_1 = (Mat_SeqAIJ *)Gmat_1->data;
956: }
957: if (nloc > 0) PetscCheck(!matB_1 || matB_1->compressedrow.use, PETSC_COMM_SELF, PETSC_ERR_PLIB, "matB_1 && !matB_1->compressedrow.use: PETSc bug???");
958: /* get state of locals and selected gid for deleted */
959: for (lid = 0; lid < nloc; lid++) {
960: lid_parent_gid[lid] = -1.0;
961: lid_state[lid] = DELETED;
962: }
964: /* set lid_state */
965: for (lid = 0; lid < nloc; lid++) {
966: PetscCDIntNd *pos;
968: PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
969: if (pos) {
970: PetscInt gid1;
972: PetscCall(PetscCDIntNdGetID(pos, &gid1));
973: PetscCheck(gid1 == lid + my0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "gid1 %" PetscInt_FMT " != lid %" PetscInt_FMT " + my0 %" PetscInt_FMT, gid1, lid, my0);
974: lid_state[lid] = gid1;
975: }
976: }
978: /* map local to selected local, DELETED means a ghost owns it */
979: for (lid = 0; lid < nloc; lid++) {
980: NState state = lid_state[lid];
982: if (IS_SELECTED(state)) {
983: PetscCDIntNd *pos;
985: PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
986: while (pos) {
987: PetscInt gid1;
989: PetscCall(PetscCDIntNdGetID(pos, &gid1));
990: PetscCall(PetscCDGetNextPos(aggs_2, lid, &pos));
991: if (gid1 >= my0 && gid1 < Iend) lid_parent_gid[gid1 - my0] = (PetscScalar)(lid + my0);
992: }
993: }
994: }
995: /* get 'cpcol_1/2_state' & cpcol_2_par_orig - uses mpimat_1/2->lvec for temp space */
996: if (isMPI) {
997: Vec tempVec;
999: /* get 'cpcol_1_state' */
1000: PetscCall(MatCreateVecs(Gmat_1, &tempVec, NULL));
1001: for (kk = 0, j = my0; kk < nloc; kk++, j++) {
1002: PetscScalar v = (PetscScalar)lid_state[kk];
1004: PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
1005: }
1006: PetscCall(VecAssemblyBegin(tempVec));
1007: PetscCall(VecAssemblyEnd(tempVec));
1008: PetscCall(VecScatterBegin(mpimat_1->Mvctx, tempVec, mpimat_1->lvec, INSERT_VALUES, SCATTER_FORWARD));
1009: PetscCall(VecScatterEnd(mpimat_1->Mvctx, tempVec, mpimat_1->lvec, INSERT_VALUES, SCATTER_FORWARD));
1010: PetscCall(VecGetArray(mpimat_1->lvec, &cpcol_1_state));
1011: /* get 'cpcol_2_state' */
1012: PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, mpimat_2->lvec, INSERT_VALUES, SCATTER_FORWARD));
1013: PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, mpimat_2->lvec, INSERT_VALUES, SCATTER_FORWARD));
1014: PetscCall(VecGetArray(mpimat_2->lvec, &cpcol_2_state));
1015: /* get 'cpcol_2_par_orig' */
1016: for (kk = 0, j = my0; kk < nloc; kk++, j++) {
1017: PetscScalar v = lid_parent_gid[kk];
1019: PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
1020: }
1021: PetscCall(VecAssemblyBegin(tempVec));
1022: PetscCall(VecAssemblyEnd(tempVec));
1023: PetscCall(VecDuplicate(mpimat_2->lvec, &ghost_par_orig2));
1024: PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghost_par_orig2, INSERT_VALUES, SCATTER_FORWARD));
1025: PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghost_par_orig2, INSERT_VALUES, SCATTER_FORWARD));
1026: PetscCall(VecGetArray(ghost_par_orig2, &cpcol_2_par_orig));
1028: PetscCall(VecDestroy(&tempVec));
1029: } /* ismpi */
1030: for (lid = 0; lid < nloc; lid++) {
1031: NState state = lid_state[lid];
1033: if (IS_SELECTED(state)) {
1034: /* steal locals */
1035: ii = matA_1->i;
1036: n = ii[lid + 1] - ii[lid];
1037: idx = matA_1->j + ii[lid];
1038: for (j = 0; j < n; j++) {
1039: PetscInt lidj = idx[j], sgid;
1040: NState statej = lid_state[lidj];
1042: if (statej == DELETED && (sgid = (PetscInt)PetscRealPart(lid_parent_gid[lidj])) != lid + my0) { /* steal local */
1043: lid_parent_gid[lidj] = (PetscScalar)(lid + my0); /* send this if sgid is not local */
1044: if (sgid >= my0 && sgid < Iend) { /* I'm stealing this local from a local sgid */
1045: PetscInt hav = 0, slid = sgid - my0, gidj = lidj + my0;
1046: PetscCDIntNd *pos, *last = NULL;
1048: /* looking for local from local so id_llist_2 works */
1049: PetscCall(PetscCDGetHeadPos(aggs_2, slid, &pos));
1050: while (pos) {
1051: PetscInt gid;
1053: PetscCall(PetscCDIntNdGetID(pos, &gid));
1054: if (gid == gidj) {
1055: PetscCheck(last, PETSC_COMM_SELF, PETSC_ERR_PLIB, "last cannot be null");
1056: PetscCall(PetscCDRemoveNextNode(aggs_2, slid, last));
1057: PetscCall(PetscCDAppendNode(aggs_2, lid, pos));
1058: hav = 1;
1059: break;
1060: } else last = pos;
1061: PetscCall(PetscCDGetNextPos(aggs_2, slid, &pos));
1062: }
1063: if (hav != 1) {
1064: PetscCheck(hav, PETSC_COMM_SELF, PETSC_ERR_PLIB, "failed to find adj in 'selected' lists - structurally unsymmetric matrix");
1065: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "found node %" PetscInt_FMT " times???", hav);
1066: }
1067: } else { /* I'm stealing this local, owned by a ghost */
1068: PetscCheck(sgid == -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Mat has an un-symmetric graph. Use '-%spc_gamg_sym_graph true' to symmetrize the graph or '-%spc_gamg_threshold -1' if the matrix is structurally symmetric.",
1069: ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "", ((PetscObject)pc)->prefix ? ((PetscObject)pc)->prefix : "");
1070: PetscCall(PetscCDAppendID(aggs_2, lid, lidj + my0));
1071: }
1072: }
1073: } /* local neighbors */
1074: } else if (state == DELETED /* && lid_cprowID_1 */) {
1075: PetscInt sgidold = (PetscInt)PetscRealPart(lid_parent_gid[lid]);
1077: /* see if I have a selected ghost neighbor that will steal me */
1078: if ((ix = lid_cprowID_1[lid]) != -1) {
1079: ii = matB_1->compressedrow.i;
1080: n = ii[ix + 1] - ii[ix];
1081: idx = matB_1->j + ii[ix];
1082: for (j = 0; j < n; j++) {
1083: PetscInt cpid = idx[j];
1084: NState statej = (NState)PetscRealPart(cpcol_1_state[cpid]);
1086: if (IS_SELECTED(statej) && sgidold != statej) { /* ghost will steal this, remove from my list */
1087: lid_parent_gid[lid] = (PetscScalar)statej; /* send who selected */
1088: if (sgidold >= my0 && sgidold < Iend) { /* this was mine */
1089: PetscInt hav = 0, oldslidj = sgidold - my0;
1090: PetscCDIntNd *pos, *last = NULL;
1092: /* remove from 'oldslidj' list */
1093: PetscCall(PetscCDGetHeadPos(aggs_2, oldslidj, &pos));
1094: while (pos) {
1095: PetscInt gid;
1097: PetscCall(PetscCDIntNdGetID(pos, &gid));
1098: if (lid + my0 == gid) {
1099: /* id_llist_2[lastid] = id_llist_2[flid]; /\* remove lid from oldslidj list *\/ */
1100: PetscCheck(last, PETSC_COMM_SELF, PETSC_ERR_PLIB, "last cannot be null");
1101: PetscCall(PetscCDRemoveNextNode(aggs_2, oldslidj, last));
1102: /* ghost (PetscScalar)statej will add this later */
1103: hav = 1;
1104: break;
1105: } else last = pos;
1106: PetscCall(PetscCDGetNextPos(aggs_2, oldslidj, &pos));
1107: }
1108: if (hav != 1) {
1109: PetscCheck(hav, PETSC_COMM_SELF, PETSC_ERR_PLIB, "failed to find (hav=%" PetscInt_FMT ") adj in 'selected' lists - structurally unsymmetric matrix", hav);
1110: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "found node %" PetscInt_FMT " times???", hav);
1111: }
1112: } else {
1113: /* TODO: ghosts remove this later */
1114: }
1115: }
1116: }
1117: }
1118: } /* selected/deleted */
1119: } /* node loop */
1121: if (isMPI) {
1122: PetscScalar *cpcol_2_parent, *cpcol_2_gid;
1123: Vec tempVec, ghostgids2, ghostparents2;
1124: PetscInt cpid, nghost_2;
1125: PetscHMapI gid_cpid;
1127: PetscCall(VecGetSize(mpimat_2->lvec, &nghost_2));
1128: PetscCall(MatCreateVecs(Gmat_2, &tempVec, NULL));
1130: /* get 'cpcol_2_parent' */
1131: for (kk = 0, j = my0; kk < nloc; kk++, j++) PetscCall(VecSetValues(tempVec, 1, &j, &lid_parent_gid[kk], INSERT_VALUES));
1132: PetscCall(VecAssemblyBegin(tempVec));
1133: PetscCall(VecAssemblyEnd(tempVec));
1134: PetscCall(VecDuplicate(mpimat_2->lvec, &ghostparents2));
1135: PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghostparents2, INSERT_VALUES, SCATTER_FORWARD));
1136: PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghostparents2, INSERT_VALUES, SCATTER_FORWARD));
1137: PetscCall(VecGetArray(ghostparents2, &cpcol_2_parent));
1139: /* get 'cpcol_2_gid' */
1140: for (kk = 0, j = my0; kk < nloc; kk++, j++) {
1141: PetscScalar v = (PetscScalar)j;
1143: PetscCall(VecSetValues(tempVec, 1, &j, &v, INSERT_VALUES));
1144: }
1145: PetscCall(VecAssemblyBegin(tempVec));
1146: PetscCall(VecAssemblyEnd(tempVec));
1147: PetscCall(VecDuplicate(mpimat_2->lvec, &ghostgids2));
1148: PetscCall(VecScatterBegin(mpimat_2->Mvctx, tempVec, ghostgids2, INSERT_VALUES, SCATTER_FORWARD));
1149: PetscCall(VecScatterEnd(mpimat_2->Mvctx, tempVec, ghostgids2, INSERT_VALUES, SCATTER_FORWARD));
1150: PetscCall(VecGetArray(ghostgids2, &cpcol_2_gid));
1151: PetscCall(VecDestroy(&tempVec));
1153: /* look for deleted ghosts and add to table */
1154: PetscCall(PetscHMapICreateWithSize(2 * nghost_2 + 1, &gid_cpid));
1155: for (cpid = 0; cpid < nghost_2; cpid++) {
1156: NState state = (NState)PetscRealPart(cpcol_2_state[cpid]);
1158: if (state == DELETED) {
1159: PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
1160: PetscInt sgid_old = (PetscInt)PetscRealPart(cpcol_2_par_orig[cpid]);
1162: if (sgid_old == -1 && sgid_new != -1) {
1163: PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
1165: PetscCall(PetscHMapISet(gid_cpid, gid, cpid));
1166: }
1167: }
1168: }
1170: /* look for deleted ghosts and see if they moved - remove it */
1171: for (lid = 0; lid < nloc; lid++) {
1172: NState state = lid_state[lid];
1174: if (IS_SELECTED(state)) {
1175: PetscCDIntNd *pos, *last = NULL;
1177: /* look for deleted ghosts and see if they moved */
1178: PetscCall(PetscCDGetHeadPos(aggs_2, lid, &pos));
1179: while (pos) {
1180: PetscInt gid;
1182: PetscCall(PetscCDIntNdGetID(pos, &gid));
1183: if (gid < my0 || gid >= Iend) {
1184: PetscCall(PetscHMapIGet(gid_cpid, gid, &cpid));
1185: if (cpid != -1) {
1186: /* a moved ghost - */
1187: /* id_llist_2[lastid] = id_llist_2[flid]; /\* remove 'flid' from list *\/ */
1188: PetscCall(PetscCDRemoveNextNode(aggs_2, lid, last));
1189: } else last = pos;
1190: } else last = pos;
1192: PetscCall(PetscCDGetNextPos(aggs_2, lid, &pos));
1193: } /* loop over list of deleted */
1194: } /* selected */
1195: }
1196: PetscCall(PetscHMapIDestroy(&gid_cpid));
1198: /* look at ghosts, see if they changed - and it */
1199: for (cpid = 0; cpid < nghost_2; cpid++) {
1200: PetscInt sgid_new = (PetscInt)PetscRealPart(cpcol_2_parent[cpid]);
1202: if (sgid_new >= my0 && sgid_new < Iend) { /* this is mine */
1203: PetscInt gid = (PetscInt)PetscRealPart(cpcol_2_gid[cpid]);
1204: PetscInt slid_new = sgid_new - my0, hav = 0;
1205: PetscCDIntNd *pos;
1207: /* search for this gid to see if I have it */
1208: PetscCall(PetscCDGetHeadPos(aggs_2, slid_new, &pos));
1209: while (pos) {
1210: PetscInt gidj;
1212: PetscCall(PetscCDIntNdGetID(pos, &gidj));
1213: PetscCall(PetscCDGetNextPos(aggs_2, slid_new, &pos));
1215: if (gidj == gid) {
1216: hav = 1;
1217: break;
1218: }
1219: }
1220: if (hav != 1) {
1221: /* insert 'flidj' into head of llist */
1222: PetscCall(PetscCDAppendID(aggs_2, slid_new, gid));
1223: }
1224: }
1225: }
1226: PetscCall(VecRestoreArray(mpimat_1->lvec, &cpcol_1_state));
1227: PetscCall(VecRestoreArray(mpimat_2->lvec, &cpcol_2_state));
1228: PetscCall(VecRestoreArray(ghostparents2, &cpcol_2_parent));
1229: PetscCall(VecRestoreArray(ghostgids2, &cpcol_2_gid));
1230: PetscCall(VecDestroy(&ghostgids2));
1231: PetscCall(VecDestroy(&ghostparents2));
1232: PetscCall(VecDestroy(&ghost_par_orig2));
1233: }
1234: PetscCall(PetscFree3(lid_state, lid_parent_gid, lid_cprowID_1));
1235: PetscFunctionReturn(PETSC_SUCCESS);
1236: }
1238: /*
1239: PCGAMGCoarsen_AGG - supports squaring the graph (deprecated) and new graph for
1240: communication of QR data used with HEM and MISk coarsening
1242: Input Parameter:
1243: . a_pc - this
1245: Input/Output Parameter:
1246: . a_Gmat1 - graph to coarsen (in), graph off processor edges for QR gather scatter (out)
1248: Output Parameter:
1249: . agg_lists - list of aggregates
1251: */
1252: static PetscErrorCode PCGAMGCoarsen_AGG(PC a_pc, Mat *a_Gmat1, PetscCoarsenData **agg_lists)
1253: {
1254: PC_MG *mg = (PC_MG *)a_pc->data;
1255: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
1256: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
1257: Mat Gmat2, Gmat1 = *a_Gmat1; /* aggressive graph */
1258: IS perm;
1259: PetscInt Istart, Iend, Ii, nloc, bs, nn;
1260: PetscInt *permute, *degree;
1261: PetscBool *bIndexSet;
1262: PetscReal hashfact;
1263: PetscInt iSwapIndex;
1264: PetscRandom random;
1265: MPI_Comm comm;
1267: PetscFunctionBegin;
1268: PetscCall(PetscObjectGetComm((PetscObject)Gmat1, &comm));
1269: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
1270: PetscCall(MatGetLocalSize(Gmat1, &nn, NULL));
1271: PetscCall(MatGetBlockSize(Gmat1, &bs));
1272: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "bs %" PetscInt_FMT " must be 1", bs);
1273: nloc = nn / bs;
1274: /* get MIS aggs - randomize */
1275: PetscCall(PetscMalloc2(nloc, &permute, nloc, °ree));
1276: PetscCall(PetscCalloc1(nloc, &bIndexSet));
1277: for (Ii = 0; Ii < nloc; Ii++) permute[Ii] = Ii;
1278: PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &random));
1279: PetscCall(MatGetOwnershipRange(Gmat1, &Istart, &Iend));
1280: for (Ii = 0; Ii < nloc; Ii++) {
1281: PetscInt nc;
1283: PetscCall(MatGetRow(Gmat1, Istart + Ii, &nc, NULL, NULL));
1284: degree[Ii] = nc;
1285: PetscCall(MatRestoreRow(Gmat1, Istart + Ii, &nc, NULL, NULL));
1286: }
1287: for (Ii = 0; Ii < nloc; Ii++) {
1288: PetscCall(PetscRandomGetValueReal(random, &hashfact));
1289: iSwapIndex = (PetscInt)(hashfact * nloc) % nloc;
1290: if (!bIndexSet[iSwapIndex] && iSwapIndex != Ii) {
1291: PetscInt iTemp = permute[iSwapIndex];
1293: permute[iSwapIndex] = permute[Ii];
1294: permute[Ii] = iTemp;
1295: iTemp = degree[iSwapIndex];
1296: degree[iSwapIndex] = degree[Ii];
1297: degree[Ii] = iTemp;
1298: bIndexSet[iSwapIndex] = PETSC_TRUE;
1299: }
1300: }
1301: // apply minimum degree ordering -- NEW
1302: if (pc_gamg_agg->use_minimum_degree_ordering) PetscCall(PetscSortIntWithArray(nloc, degree, permute));
1303: PetscCall(PetscFree(bIndexSet));
1304: PetscCall(PetscRandomDestroy(&random));
1305: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nloc, permute, PETSC_USE_POINTER, &perm));
1306: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_MIS], 0, 0, 0, 0));
1307: // square graph
1308: if (pc_gamg->current_level < pc_gamg_agg->aggressive_coarsening_levels && pc_gamg_agg->use_aggressive_square_graph) PetscCall(PCGAMGSquareGraph_GAMG(a_pc, Gmat1, &Gmat2));
1309: else Gmat2 = Gmat1;
1310: // switch to old MIS-1 for square graph
1311: if (pc_gamg->current_level < pc_gamg_agg->aggressive_coarsening_levels) {
1312: if (!pc_gamg_agg->use_aggressive_square_graph) PetscCall(MatCoarsenMISKSetDistance(pc_gamg_agg->crs, pc_gamg_agg->aggressive_mis_k)); // hardwire to MIS-2
1313: else PetscCall(MatCoarsenSetType(pc_gamg_agg->crs, MATCOARSENMIS)); // old MIS -- side effect
1314: } else if (pc_gamg_agg->use_aggressive_square_graph && pc_gamg_agg->aggressive_coarsening_levels > 0) { // we reset the MIS
1315: const char *prefix;
1317: PetscCall(PetscObjectGetOptionsPrefix((PetscObject)a_pc, &prefix));
1318: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)pc_gamg_agg->crs, prefix));
1319: PetscCall(MatCoarsenSetFromOptions(pc_gamg_agg->crs)); // get the default back on non-aggressive levels when square graph switched to old MIS
1320: }
1321: PetscCall(MatCoarsenSetAdjacency(pc_gamg_agg->crs, Gmat2));
1322: PetscCall(MatCoarsenSetStrictAggs(pc_gamg_agg->crs, PETSC_TRUE));
1323: PetscCall(MatCoarsenSetGreedyOrdering(pc_gamg_agg->crs, perm));
1324: PetscCall(MatCoarsenApply(pc_gamg_agg->crs));
1325: PetscCall(MatCoarsenGetData(pc_gamg_agg->crs, agg_lists)); /* output */
1327: PetscCall(ISDestroy(&perm));
1328: PetscCall(PetscFree2(permute, degree));
1329: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_MIS], 0, 0, 0, 0));
1331: if (Gmat2 != Gmat1) { // square graph, we need ghosts for selected
1332: PetscCoarsenData *llist = *agg_lists;
1334: PetscCall(fixAggregatesWithSquare(a_pc, Gmat2, Gmat1, *agg_lists));
1335: PetscCall(MatDestroy(&Gmat1));
1336: *a_Gmat1 = Gmat2; /* output */
1337: PetscCall(PetscCDSetMat(llist, *a_Gmat1)); /* Need a graph with ghosts here */
1338: }
1339: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_COARSEN], 0, 0, 0, 0));
1340: PetscFunctionReturn(PETSC_SUCCESS);
1341: }
1343: /*
1344: PCGAMGConstructProlongator_AGG
1346: Input Parameter:
1347: . pc - this
1348: . Amat - matrix on this fine level
1349: . Graph - used to get ghost data for nodes in
1350: . agg_lists - list of aggregates
1351: Output Parameter:
1352: . a_P_out - prolongation operator to the next level
1353: */
1354: static PetscErrorCode PCGAMGConstructProlongator_AGG(PC pc, Mat Amat, PetscCoarsenData *agg_lists, Mat *a_P_out)
1355: {
1356: PC_MG *mg = (PC_MG *)pc->data;
1357: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
1358: const PetscInt col_bs = pc_gamg->data_cell_cols;
1359: PetscInt Istart, Iend, nloc, ii, jj, kk, my0, nLocalSelected, bs;
1360: Mat Gmat, Prol;
1361: PetscMPIInt size;
1362: MPI_Comm comm;
1363: PetscReal *data_w_ghost;
1364: PetscInt myCrs0, nbnodes = 0, *flid_fgid;
1365: MatType mtype;
1367: PetscFunctionBegin;
1368: PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1369: PetscCheck(col_bs >= 1, comm, PETSC_ERR_PLIB, "Column bs cannot be less than 1");
1370: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1371: PetscCallMPI(MPI_Comm_size(comm, &size));
1372: PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
1373: PetscCall(MatGetBlockSize(Amat, &bs));
1374: nloc = (Iend - Istart) / bs;
1375: my0 = Istart / bs;
1376: PetscCheck((Iend - Istart) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(Iend %" PetscInt_FMT " - Istart %" PetscInt_FMT ") not divisible by bs %" PetscInt_FMT, Iend, Istart, bs);
1377: PetscCall(PetscCDGetMat(agg_lists, &Gmat)); // get auxiliary matrix for ghost edges for size > 1
1379: /* get 'nLocalSelected' */
1380: for (ii = 0, nLocalSelected = 0; ii < nloc; ii++) {
1381: PetscBool ise;
1383: /* filter out singletons 0 or 1? */
1384: PetscCall(PetscCDIsEmptyAt(agg_lists, ii, &ise));
1385: if (!ise) nLocalSelected++;
1386: }
1388: /* create prolongator, create P matrix */
1389: PetscCall(MatGetType(Amat, &mtype));
1390: PetscCall(MatCreate(comm, &Prol));
1391: PetscCall(MatSetSizes(Prol, nloc * bs, nLocalSelected * col_bs, PETSC_DETERMINE, PETSC_DETERMINE));
1392: PetscCall(MatSetBlockSizes(Prol, bs, col_bs)); // should this be before MatSetSizes?
1393: PetscCall(MatSetType(Prol, mtype));
1394: #if PetscDefined(HAVE_DEVICE)
1395: PetscBool flg;
1396: PetscCall(MatBoundToCPU(Amat, &flg));
1397: PetscCall(MatBindToCPU(Prol, flg));
1398: if (flg) PetscCall(MatSetBindingPropagates(Prol, PETSC_TRUE));
1399: #endif
1400: PetscCall(MatSeqAIJSetPreallocation(Prol, col_bs, NULL));
1401: PetscCall(MatMPIAIJSetPreallocation(Prol, col_bs, NULL, col_bs, NULL));
1403: /* can get all points "removed" */
1404: PetscCall(MatGetSize(Prol, &kk, &ii));
1405: if (!ii) {
1406: PetscCall(PetscInfo(pc, "%s: No selected points on coarse grid\n", ((PetscObject)pc)->prefix));
1407: PetscCall(MatDestroy(&Prol));
1408: *a_P_out = NULL; /* out */
1409: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1410: PetscFunctionReturn(PETSC_SUCCESS);
1411: }
1412: PetscCall(PetscInfo(pc, "%s: New grid %" PetscInt_FMT " nodes\n", ((PetscObject)pc)->prefix, ii / col_bs));
1413: PetscCall(MatGetOwnershipRangeColumn(Prol, &myCrs0, &kk));
1415: PetscCheck((kk - myCrs0) % col_bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(kk %" PetscInt_FMT " -myCrs0 %" PetscInt_FMT ") not divisible by col_bs %" PetscInt_FMT, kk, myCrs0, col_bs);
1416: myCrs0 = myCrs0 / col_bs;
1417: PetscCheck((kk / col_bs - myCrs0) == nLocalSelected, PETSC_COMM_SELF, PETSC_ERR_PLIB, "(kk %" PetscInt_FMT "/col_bs %" PetscInt_FMT " - myCrs0 %" PetscInt_FMT ") != nLocalSelected %" PetscInt_FMT ")", kk, col_bs, myCrs0, nLocalSelected);
1419: /* create global vector of data in 'data_w_ghost' */
1420: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLA], 0, 0, 0, 0));
1421: if (size > 1) { /* get ghost null space data */
1422: PetscReal *tmp_gdata, *tmp_ldata, *tp2;
1424: PetscCall(PetscMalloc1(nloc, &tmp_ldata));
1425: for (jj = 0; jj < col_bs; jj++) {
1426: for (kk = 0; kk < bs; kk++) {
1427: PetscInt stride;
1428: const PetscReal *tp = PetscSafePointerPlusOffset(pc_gamg->data, jj * bs * nloc + kk);
1430: for (PetscInt ii = 0; ii < nloc; ii++, tp += bs) tmp_ldata[ii] = *tp;
1432: PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, tmp_ldata, &stride, &tmp_gdata));
1434: if (!jj && !kk) { /* now I know how many total nodes - allocate TODO: move below and do in one 'col_bs' call */
1435: PetscCall(PetscMalloc1(stride * bs * col_bs, &data_w_ghost));
1436: nbnodes = bs * stride;
1437: }
1438: tp2 = PetscSafePointerPlusOffset(data_w_ghost, jj * bs * stride + kk);
1439: for (PetscInt ii = 0; ii < stride; ii++, tp2 += bs) *tp2 = tmp_gdata[ii];
1440: PetscCall(PetscFree(tmp_gdata));
1441: }
1442: }
1443: PetscCall(PetscFree(tmp_ldata));
1444: } else {
1445: nbnodes = bs * nloc;
1446: data_w_ghost = pc_gamg->data;
1447: }
1449: /* get 'flid_fgid' TODO - move up to get 'stride' and do get null space data above in one step (jj loop) */
1450: if (size > 1) {
1451: PetscReal *fid_glid_loc, *fiddata;
1452: PetscInt stride;
1454: PetscCall(PetscMalloc1(nloc, &fid_glid_loc));
1455: for (kk = 0; kk < nloc; kk++) fid_glid_loc[kk] = (PetscReal)(my0 + kk);
1456: PetscCall(PCGAMGGetDataWithGhosts(Gmat, 1, fid_glid_loc, &stride, &fiddata));
1457: PetscCall(PetscMalloc1(stride, &flid_fgid)); /* copy real data to in */
1458: for (kk = 0; kk < stride; kk++) flid_fgid[kk] = (PetscInt)fiddata[kk];
1459: PetscCall(PetscFree(fiddata));
1461: PetscCheck(stride == nbnodes / bs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "stride %" PetscInt_FMT " != nbnodes %" PetscInt_FMT "/bs %" PetscInt_FMT, stride, nbnodes, bs);
1462: PetscCall(PetscFree(fid_glid_loc));
1463: } else {
1464: PetscCall(PetscMalloc1(nloc, &flid_fgid));
1465: for (kk = 0; kk < nloc; kk++) flid_fgid[kk] = my0 + kk;
1466: }
1467: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLA], 0, 0, 0, 0));
1468: /* get P0 */
1469: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_PROLB], 0, 0, 0, 0));
1470: {
1471: PetscReal *data_out = NULL;
1473: PetscCall(formProl0(agg_lists, bs, col_bs, myCrs0, nbnodes, data_w_ghost, flid_fgid, &data_out, Prol));
1474: PetscCall(PetscFree(pc_gamg->data));
1476: pc_gamg->data = data_out;
1477: pc_gamg->data_cell_rows = col_bs;
1478: pc_gamg->data_sz = col_bs * col_bs * nLocalSelected;
1479: }
1480: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROLB], 0, 0, 0, 0));
1481: if (size > 1) PetscCall(PetscFree(data_w_ghost));
1482: PetscCall(PetscFree(flid_fgid));
1484: *a_P_out = Prol; /* out */
1485: PetscCall(MatViewFromOptions(Prol, NULL, "-pc_gamg_agg_view_initial_prolongation"));
1487: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_PROL], 0, 0, 0, 0));
1488: PetscFunctionReturn(PETSC_SUCCESS);
1489: }
1491: /*
1492: PCGAMGKernelPreservingFilter_AGG - filter the prolongator while preserving the near-null space constraint P*B_c = B
1494: Applies `MatFilter()` to drop small entries, then corrects each row so that
1495: P_filtered * B_c = B (the fine near-null space) is restored.
1497: For nSAvec == 1: rescale each row by B[i] / (P_filtered[i,:] * B_c[J_i]).
1498: For nSAvec > 1: solve a small nSAvec x nSAvec SPD system per row and add
1499: a rank-nSAvec correction to the row entries.
1500: */
1501: static PetscErrorCode PCGAMGKernelPreservingFilter_AGG(PC pc, Mat Prol, PetscReal threshold)
1502: {
1503: PC_MG *mg = (PC_MG *)pc->data;
1504: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
1505: const PetscInt nSAvec = pc_gamg->data_cell_rows; /* == data_cell_cols after formProl0 */
1506: PetscInt cStart, cEnd, rStart, rEnd;
1507: const PetscReal *Bc_data = pc_gamg->data;
1508: Vec *Bc_vecs, *B_vecs;
1509: PetscScalar *Bc_arr;
1511: PetscFunctionBegin;
1512: PetscCall(PetscInfo(pc, "Kernel-preserving filter of prolongator with threshold %g, nSAvec=%" PetscInt_FMT "\n", (double)threshold, nSAvec));
1514: PetscCall(MatGetOwnershipRange(Prol, &rStart, &rEnd));
1515: PetscCall(MatGetOwnershipRangeColumn(Prol, &cStart, &cEnd));
1517: /* Step 1: build coarse null-space vectors and compute B = P_original * B_c */
1518: PetscCall(PetscMalloc1(nSAvec, &Bc_vecs));
1519: PetscCall(PetscMalloc1(nSAvec, &B_vecs));
1521: {
1522: PetscInt nloc = cEnd - cStart;
1523: for (PetscInt k = 0; k < nSAvec; k++) {
1524: PetscCall(MatCreateVecs(Prol, &Bc_vecs[k], &B_vecs[k]));
1525: /* fill local entries: Bc_data layout is Bc_data[k * nloc + c] (stride == nloc) */
1526: PetscCall(VecGetArray(Bc_vecs[k], &Bc_arr));
1527: for (PetscInt c = 0; c < nloc; c++) Bc_arr[c] = (PetscScalar)Bc_data[k * nloc + c];
1528: PetscCall(VecRestoreArray(Bc_vecs[k], &Bc_arr));
1529: PetscCall(MatMult(Prol, Bc_vecs[k], B_vecs[k]));
1530: }
1531: }
1533: /* Step 2: apply the threshold filter */
1534: {
1535: PetscBool info_active = PETSC_FALSE;
1536: MatInfo info0, info1;
1537: PetscCall(PetscInfoEnabled(((PetscObject)pc)->classid, &info_active));
1538: if (info_active) PetscCall(MatGetInfo(Prol, MAT_GLOBAL_SUM, &info0));
1539: PetscCall(MatFilter(Prol, threshold, PETSC_TRUE, PETSC_TRUE));
1540: if (info_active) {
1541: PetscCall(MatGetInfo(Prol, MAT_GLOBAL_SUM, &info1));
1542: PetscCall(PetscInfo(pc, "Prolongator filter: nnz before=%g after=%g reduction=%g%%\n", info0.nz_used, info1.nz_used, (info0.nz_used > 0) ? 100.0 * (info0.nz_used - info1.nz_used) / info0.nz_used : 0.0));
1543: }
1544: }
1546: /* Step 3: correct rows to restore P_filtered * B_c = B */
1547: if (nSAvec == 1) {
1548: /*
1549: Scalar case: use `MatMult()` + element-wise scaling + `MatDiagonalScale()`.
1550: scale_i = B_i / (P_filtered * Bc)_i, then P_new = diag(scale) * P_filtered.
1551: Guard against zero denominators (empty rows after filter).
1552: No ghost column access needed.
1553: */
1554: Vec d_vec, scale_vec;
1555: PetscInt n_local;
1556: PetscScalar *s_arr;
1557: const PetscScalar *b_arr, *d_arr;
1559: PetscCall(MatCreateVecs(Prol, NULL, &d_vec));
1560: PetscCall(MatMult(Prol, Bc_vecs[0], d_vec));
1561: PetscCall(VecDuplicate(d_vec, &scale_vec));
1562: PetscCall(VecGetLocalSize(d_vec, &n_local));
1563: PetscCall(VecGetArrayRead(B_vecs[0], &b_arr));
1564: PetscCall(VecGetArrayRead(d_vec, &d_arr));
1565: PetscCall(VecGetArray(scale_vec, &s_arr));
1566: for (PetscInt i = 0; i < n_local; i++) s_arr[i] = (PetscAbsScalar(d_arr[i]) > 0.0) ? b_arr[i] / d_arr[i] : 1.0;
1567: PetscCall(VecRestoreArray(scale_vec, &s_arr));
1568: PetscCall(VecRestoreArrayRead(d_vec, &d_arr));
1569: PetscCall(VecRestoreArrayRead(B_vecs[0], &b_arr));
1570: PetscCall(MatDiagonalScale(Prol, scale_vec, NULL));
1571: PetscCall(VecDestroy(&d_vec));
1572: PetscCall(VecDestroy(&scale_vec));
1573: } else {
1574: /*
1575: Vector case (nSAvec > 1): per-row least-squares correction.
1576: Scatter Bc_data to include ghost column values using Prol's Mvctx,
1577: then build a hash map from global ghost column index to local ghost index
1578: so that `MatGetRow()` global column indices can be mapped to the ghosted array.
1579: */
1580: PetscInt nloc = cEnd - cStart;
1581: PetscInt ghost_stride;
1582: PetscReal *Bc_ghosted = NULL;
1583: const PetscReal *Bc_ghosted_ro;
1584: PetscMPIInt comm_size;
1585: PetscHMapI ghost_gid_to_lid; /* global ghost col index -> local ghost index (0-based) */
1586: PetscInt num_ghosts = 0;
1588: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)Prol), &comm_size));
1589: if (comm_size > 1) {
1590: Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)Prol->data;
1591: Vec tmp_vec;
1592: PetscScalar *data_arr;
1593: PetscInt nnodes;
1595: PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
1596: nnodes = nloc + num_ghosts;
1597: ghost_stride = nnodes;
1598: /*
1599: Scatter Bc_data to include ghost column values using Prol's Mvctx.
1600: Cannot use PCGAMGGetDataWithGhosts() because it assumes square matrix
1601: (uses MatGetOwnershipRange() for row indices, but Prol is rectangular).
1602: */
1603: PetscCall(MatCreateVecs(Prol, &tmp_vec, NULL));
1604: PetscCall(PetscMalloc1(nSAvec * nnodes, &Bc_ghosted));
1605: for (PetscInt dir = 0; dir < nSAvec; dir++) {
1606: PetscScalar *tmp_arr;
1607: PetscCall(VecGetArray(tmp_vec, &tmp_arr));
1608: for (PetscInt kk = 0; kk < nloc; kk++) {
1609: PetscReal val = Bc_data[dir * nloc + kk];
1610: Bc_ghosted[dir * nnodes + kk] = val;
1611: tmp_arr[kk] = (PetscScalar)val;
1612: }
1613: PetscCall(VecRestoreArray(tmp_vec, &tmp_arr));
1614: PetscCall(VecScatterBegin(mpimat->Mvctx, tmp_vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1615: PetscCall(VecScatterEnd(mpimat->Mvctx, tmp_vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1616: PetscCall(VecGetArray(mpimat->lvec, &data_arr));
1617: for (PetscInt g = 0; g < num_ghosts; g++) Bc_ghosted[dir * nnodes + nloc + g] = PetscRealPart(data_arr[g]);
1618: PetscCall(VecRestoreArray(mpimat->lvec, &data_arr));
1619: }
1620: PetscCall(VecDestroy(&tmp_vec));
1621: Bc_ghosted_ro = Bc_ghosted;
1622: /* build hash: global ghost col index -> local ghost index (0-based into ghost portion) */
1623: PetscCall(PetscHMapICreateWithSize(2 * num_ghosts + 1, &ghost_gid_to_lid));
1624: for (PetscInt g = 0; g < num_ghosts; g++) PetscCall(PetscHMapISet(ghost_gid_to_lid, mpimat->garray[g], g));
1625: } else {
1626: /* sequential: no ghosts, ghost_stride == nloc, use Bc_data directly (read-only) */
1627: ghost_stride = nloc;
1628: Bc_ghosted_ro = Bc_data;
1629: PetscCall(PetscHMapICreateWithSize(1, &ghost_gid_to_lid));
1630: }
1632: {
1633: PetscInt nrows = rEnd - rStart, max_ncols = 0;
1634: const PetscScalar **B_arrays;
1635: PetscScalar *work, *new_vals, *G, *rhs, *x, *bc_col;
1636: PetscInt *ghosted_idx, *col_buf;
1637: PetscBLASInt *ipiv;
1638: PetscBLASInt N_b;
1640: PetscCall(PetscMalloc1(nSAvec, &B_arrays));
1641: for (PetscInt k = 0; k < nSAvec; k++) PetscCall(VecGetArrayRead(B_vecs[k], &B_arrays[k]));
1642: /* work: nSAvec*nSAvec Gram + nSAvec rhs + nSAvec solution + nSAvec bc_col scratch */
1643: PetscCall(PetscMalloc1(nSAvec * nSAvec + 3 * nSAvec, &work));
1644: PetscCall(PetscMalloc1(nSAvec, &ipiv));
1645: PetscCall(PetscBLASIntCast(nSAvec, &N_b));
1646: G = work;
1647: rhs = work + nSAvec * nSAvec;
1648: x = rhs + nSAvec;
1649: bc_col = x + nSAvec;
1651: /* find max row width and total nnz for pre-allocation */
1652: {
1653: PetscInt total_nnz = 0;
1654: for (PetscInt row = 0; row < nrows; row++) {
1655: PetscInt ncols;
1656: PetscCall(MatGetRow(Prol, rStart + row, &ncols, NULL, NULL));
1657: if (ncols > max_ncols) max_ncols = ncols;
1658: total_nnz += ncols;
1659: PetscCall(MatRestoreRow(Prol, rStart + row, &ncols, NULL, NULL));
1660: }
1661: /* allocate flat CSR-like buffers to store all corrections before applying */
1662: PetscCall(PetscMalloc1(total_nnz, &new_vals));
1663: PetscCall(PetscMalloc1(total_nnz, &col_buf));
1664: }
1665: PetscCall(PetscMalloc1(max_ncols, &ghosted_idx));
1667: /* Pass 1: read rows, compute corrections, store in flat buffers */
1668: {
1669: PetscInt *row_offsets;
1670: PetscInt offset = 0, n_singular = 0, n_zero_rows = 0, n_corrected = 0, n_underdetermined = 0;
1671: PetscReal max_xnorm = 0.0;
1673: PetscCall(PetscMalloc1(nrows + 1, &row_offsets));
1674: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1676: for (PetscInt row = 0; row < nrows; row++) {
1677: PetscInt ncols;
1678: const PetscInt *cols;
1679: const PetscScalar *vals;
1680: PetscInt grow = rStart + row;
1681: PetscBLASInt NRHS = 1, LDA = N_b, LDB = N_b, INFO;
1683: row_offsets[row] = offset;
1684: PetscCall(MatGetRow(Prol, grow, &ncols, &cols, &vals));
1685: if (ncols == 0) {
1686: n_zero_rows++;
1687: PetscCall(MatRestoreRow(Prol, grow, &ncols, &cols, &vals));
1688: continue;
1689: }
1691: /* When ncols < nSAvec the Gram matrix G is rank-deficient by construction;
1692: skip correction for this row (keep filtered values as-is).
1693: Note: the near-null space constraint P*Bc = B is NOT enforced for these rows.
1694: This typically occurs at boundary or isolated nodes where few coarse neighbors
1695: remain after filtering; the impact on convergence is generally small. */
1696: if (ncols < nSAvec) {
1697: n_underdetermined++;
1698: for (PetscInt j = 0; j < ncols; j++) {
1699: col_buf[offset + j] = cols[j];
1700: new_vals[offset + j] = vals[j];
1701: }
1702: offset += ncols;
1703: PetscCall(MatRestoreRow(Prol, grow, &ncols, &cols, &vals));
1704: continue;
1705: }
1707: /* map global column indices to ghosted array indices and save cols */
1708: for (PetscInt j = 0; j < ncols; j++) {
1709: col_buf[offset + j] = cols[j];
1710: if (cols[j] >= cStart && cols[j] < cEnd) ghosted_idx[j] = cols[j] - cStart;
1711: else {
1712: PetscInt g = -1;
1713: PetscCall(PetscHMapIGet(ghost_gid_to_lid, cols[j], &g));
1714: PetscCheck(g >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Off-diagonal column %" PetscInt_FMT " not found in ghost map for prolongator filter", cols[j]);
1715: ghosted_idx[j] = nloc + g;
1716: }
1717: }
1719: for (PetscInt i = 0; i < nSAvec * nSAvec; i++) G[i] = 0.0;
1721: /* rhs[k] = B[row,k] - sum_j P[row,j] * Bc[ghosted_idx[j], k] */
1722: for (PetscInt k = 0; k < nSAvec; k++) {
1723: PetscScalar dot = 0.0;
1724: for (PetscInt j = 0; j < ncols; j++) dot += vals[j] * (PetscScalar)Bc_ghosted_ro[k * ghost_stride + ghosted_idx[j]];
1725: rhs[k] = B_arrays[k][row] - dot;
1726: }
1728: /* G[k1,k2] = sum_j Bc[j,k1] * Bc[j,k2] using pre-gathered bc_col */
1729: for (PetscInt j = 0; j < ncols; j++) {
1730: PetscInt gidx = ghosted_idx[j];
1731: for (PetscInt k = 0; k < nSAvec; k++) bc_col[k] = (PetscScalar)Bc_ghosted_ro[k * ghost_stride + gidx];
1732: for (PetscInt k1 = 0; k1 < nSAvec; k1++)
1733: for (PetscInt k2 = k1; k2 < nSAvec; k2++) G[k1 * nSAvec + k2] += bc_col[k1] * bc_col[k2];
1734: }
1735: /* fill lower triangle from upper (G is symmetric) */
1736: for (PetscInt k1 = 1; k1 < nSAvec; k1++)
1737: for (PetscInt k2 = 0; k2 < k1; k2++) G[k1 * nSAvec + k2] = G[k2 * nSAvec + k1];
1739: /* solve G * x = rhs */
1740: for (PetscInt i = 0; i < nSAvec; i++) x[i] = rhs[i];
1741: PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&N_b, &NRHS, G, &LDA, ipiv, x, &LDB, &INFO));
1742: if (INFO != 0) {
1743: /* G is singular despite ncols >= nSAvec (Bc columns linearly dependent);
1744: keep filtered values as-is (near-null space constraint not enforced for this row) */
1745: n_singular++;
1746: for (PetscInt j = 0; j < ncols; j++) new_vals[offset + j] = vals[j];
1747: offset += ncols;
1748: PetscCall(MatRestoreRow(Prol, grow, &ncols, &cols, &vals));
1749: continue;
1750: }
1752: /* track ||x||^2 */
1753: {
1754: PetscReal xnorm2 = 0.0;
1755: for (PetscInt k = 0; k < nSAvec; k++) xnorm2 += PetscSqr(PetscAbsScalar(x[k]));
1756: if (xnorm2 > max_xnorm) max_xnorm = xnorm2;
1757: }
1758: n_corrected++;
1760: /* new_vals[j] = vals[j] + sum_k Bc[ghosted_idx[j],k] * x[k] */
1761: for (PetscInt j = 0; j < ncols; j++) {
1762: PetscScalar delta = 0.0;
1763: PetscInt gidx = ghosted_idx[j];
1764: for (PetscInt k = 0; k < nSAvec; k++) delta += (PetscScalar)Bc_ghosted_ro[k * ghost_stride + gidx] * x[k];
1765: new_vals[offset + j] = vals[j] + delta;
1766: }
1767: offset += ncols;
1768: PetscCall(MatRestoreRow(Prol, grow, &ncols, &cols, &vals));
1769: }
1770: row_offsets[nrows] = offset;
1771: PetscCall(PetscFPTrapPop());
1772: PetscCall(PetscInfo(pc, "PCGAMGKernelPreservingFilter_AGG: nrows=%" PetscInt_FMT " corrected=%" PetscInt_FMT " zero_rows=%" PetscInt_FMT " underdetermined(ncols<nSAvec)=%" PetscInt_FMT " singular_G=%" PetscInt_FMT " max_xnorm2=%g\n", nrows, n_corrected, n_zero_rows, n_underdetermined, n_singular, (double)max_xnorm));
1774: /* Pass 2: apply all corrections at once */
1775: for (PetscInt row = 0; row < nrows; row++) {
1776: PetscInt grow = rStart + row;
1777: PetscInt nc = row_offsets[row + 1] - row_offsets[row];
1778: if (nc > 0) PetscCall(MatSetValues(Prol, 1, &grow, nc, col_buf + row_offsets[row], new_vals + row_offsets[row], INSERT_VALUES));
1779: }
1780: PetscCall(PetscFree(row_offsets));
1781: }
1783: for (PetscInt k = 0; k < nSAvec; k++) PetscCall(VecRestoreArrayRead(B_vecs[k], &B_arrays[k]));
1784: PetscCall(PetscFree(B_arrays));
1785: PetscCall(PetscFree(work));
1786: PetscCall(PetscFree(ipiv));
1787: PetscCall(PetscFree(ghosted_idx));
1788: PetscCall(PetscFree(new_vals));
1789: PetscCall(PetscFree(col_buf));
1790: }
1792: PetscCall(PetscHMapIDestroy(&ghost_gid_to_lid));
1793: if (comm_size > 1) PetscCall(PetscFree(Bc_ghosted));
1794: }
1796: PetscCall(MatAssemblyBegin(Prol, MAT_FINAL_ASSEMBLY));
1797: PetscCall(MatAssemblyEnd(Prol, MAT_FINAL_ASSEMBLY));
1799: for (PetscInt k = 0; k < nSAvec; k++) {
1800: PetscCall(VecDestroy(&Bc_vecs[k]));
1801: PetscCall(VecDestroy(&B_vecs[k]));
1802: }
1803: PetscCall(PetscFree(Bc_vecs));
1804: PetscCall(PetscFree(B_vecs));
1805: PetscFunctionReturn(PETSC_SUCCESS);
1806: }
1808: /*
1809: PCGAMGOptimizeProlongator_AGG - given the initial prolongator optimizes it by smoothed aggregation pc_gamg_agg->nsmooths times
1811: Input Parameter:
1812: . pc - this
1813: . Amat - matrix on this fine level
1814: In/Output Parameter:
1815: . a_P - prolongation operator to the next level
1816: */
1817: static PetscErrorCode PCGAMGOptimizeProlongator_AGG(PC pc, Mat Amat, Mat *a_P)
1818: {
1819: PC_MG *mg = (PC_MG *)pc->data;
1820: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
1821: PC_GAMG_AGG *pc_gamg_agg = (PC_GAMG_AGG *)pc_gamg->subctx;
1822: Mat Prol = *a_P;
1823: MPI_Comm comm;
1824: KSP eksp;
1825: Vec bb, xx;
1826: PC epc;
1827: PetscReal alpha, emax, emin;
1829: PetscFunctionBegin;
1830: PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1831: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPT], 0, 0, 0, 0));
1833: /* compute maximum singular value of operator to be used in smoother */
1834: if (0 < pc_gamg_agg->nsmooths) {
1835: /* get eigen estimates */
1836: if (pc_gamg->emax > 0) {
1837: emin = pc_gamg->emin;
1838: emax = pc_gamg->emax;
1839: } else {
1840: const char *prefix;
1842: PetscCall(MatCreateVecs(Amat, &bb, NULL));
1843: PetscCall(MatCreateVecs(Amat, &xx, NULL));
1844: PetscCall(KSPSetNoisy_Private(Amat, bb));
1846: PetscCall(KSPCreate(comm, &eksp));
1847: PetscCall(KSPSetNestLevel(eksp, pc->kspnestlevel));
1848: PetscCall(PCGetOptionsPrefix(pc, &prefix));
1849: PetscCall(KSPSetOptionsPrefix(eksp, prefix));
1850: PetscCall(KSPAppendOptionsPrefix(eksp, "pc_gamg_esteig_"));
1851: {
1852: PetscBool isset, sflg;
1854: PetscCall(MatIsSPDKnown(Amat, &isset, &sflg));
1855: if (isset && sflg) PetscCall(KSPSetType(eksp, KSPCG));
1856: }
1857: PetscCall(KSPSetErrorIfNotConverged(eksp, pc->erroriffailure));
1858: PetscCall(KSPSetNormType(eksp, KSP_NORM_NONE));
1860: PetscCall(KSPSetInitialGuessNonzero(eksp, PETSC_FALSE));
1861: PetscCall(KSPSetOperators(eksp, Amat, Amat));
1863: PetscCall(KSPGetPC(eksp, &epc));
1864: PetscCall(PCSetType(epc, PCJACOBI)); /* smoother in smoothed agg. */
1866: PetscCall(KSPSetTolerances(eksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 10)); // 10 is safer, but 5 is often fine, can override with -pc_gamg_esteig_ksp_max_it -mg_levels_ksp_chebyshev_esteig 0,0.25,0,1.2
1868: PetscCall(KSPSetFromOptions(eksp));
1869: PetscCall(KSPSetComputeSingularValues(eksp, PETSC_TRUE));
1870: PetscCall(KSPSolve(eksp, bb, xx));
1871: PetscCall(KSPCheckSolve(eksp, pc, xx));
1873: PetscCall(KSPComputeExtremeSingularValues(eksp, &emax, &emin));
1874: PetscCall(PetscInfo(pc, "%s: Smooth P0: max eigen=%e min=%e PC=%s\n", ((PetscObject)pc)->prefix, (double)emax, (double)emin, PCJACOBI));
1875: PetscCall(VecDestroy(&xx));
1876: PetscCall(VecDestroy(&bb));
1877: PetscCall(KSPDestroy(&eksp));
1878: }
1879: if (pc_gamg->use_sa_esteig) {
1880: mg->min_eigen_DinvA[pc_gamg->current_level] = emin;
1881: mg->max_eigen_DinvA[pc_gamg->current_level] = emax;
1882: PetscCall(PetscInfo(pc, "%s: Smooth P0: level %" PetscInt_FMT ", cache spectra %g %g\n", ((PetscObject)pc)->prefix, pc_gamg->current_level, (double)emin, (double)emax));
1883: } else {
1884: mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1885: mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1886: }
1887: } else {
1888: mg->min_eigen_DinvA[pc_gamg->current_level] = 0;
1889: mg->max_eigen_DinvA[pc_gamg->current_level] = 0;
1890: }
1892: /* smooth P0 */
1893: if (pc_gamg_agg->nsmooths > 0) {
1894: Vec diag;
1896: /* TODO: Set a PCFailedReason and exit the building of the AMG preconditioner */
1897: PetscCheck(emax != 0.0, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Computed maximum singular value as zero");
1899: PetscCall(MatCreateVecs(Amat, &diag, NULL));
1900: PetscCall(MatGetDiagonal(Amat, diag)); /* effectively PCJACOBI */
1901: PetscCall(VecReciprocal(diag));
1903: for (PetscInt jj = 0; jj < pc_gamg_agg->nsmooths; jj++) {
1904: Mat tMat;
1906: PetscCall(PetscLogEventBegin(petsc_gamg_setup_events[GAMG_OPTSM], 0, 0, 0, 0));
1907: /*
1908: Smooth aggregation on the prolongator
1910: P_{i} := (I - 1.4/emax D^{-1}A) P_i\{i-1}
1911: */
1912: PetscCall(PetscLogEventBegin(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2], 0, 0, 0, 0));
1913: PetscCall(MatMatMult(Amat, Prol, MAT_INITIAL_MATRIX, PETSC_CURRENT, &tMat));
1914: PetscCall(PetscLogEventEnd(petsc_gamg_setup_matmat_events[pc_gamg->current_level][2], 0, 0, 0, 0));
1915: PetscCall(MatProductClear(tMat));
1916: PetscCall(MatDiagonalScale(tMat, diag, NULL));
1918: /* TODO: Document the 1.4 and don't hardwire it in this routine */
1919: alpha = -1.4 / emax;
1920: PetscCall(MatAYPX(tMat, alpha, Prol, SUBSET_NONZERO_PATTERN));
1921: PetscCall(MatDestroy(&Prol));
1922: Prol = tMat;
1923: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPTSM], 0, 0, 0, 0));
1924: }
1925: PetscCall(VecDestroy(&diag));
1926: }
1927: if (pc_gamg->prolongator_filter > 0.0) PetscCall(PCGAMGKernelPreservingFilter_AGG(pc, Prol, pc_gamg->prolongator_filter));
1928: PetscCall(PetscLogEventEnd(petsc_gamg_setup_events[GAMG_OPT], 0, 0, 0, 0));
1929: PetscCall(MatViewFromOptions(Prol, NULL, "-pc_gamg_agg_view_prolongation"));
1930: *a_P = Prol;
1931: PetscFunctionReturn(PETSC_SUCCESS);
1932: }
1934: /*MC
1935: PCGAMGAGG - Smooth aggregation, {cite}`vanek1996algebraic`, {cite}`vanek2001convergence`, variant of PETSc's algebraic multigrid (`PCGAMG`) preconditioner
1937: Options Database Keys:
1938: + -pc_gamg_agg_nsmooths nsmooth - number of smoothing steps to use with smooth aggregation to construct prolongation
1939: . -pc_gamg_prolongator_filter thr - filter small entries from the prolongator, preserving the near-null space (0=disabled, 0.0025=typical)
1940: . -pc_gamg_aggressive_coarsening n - number of aggressive coarsening (MIS-2) levels from finest.
1941: . -pc_gamg_aggressive_square_graph (true|false) - Use square graph (A'A), alternative is MIS-k (k=2), for aggressive coarsening
1942: . -pc_gamg_mis_k_minimum_degree_ordering (true|false) - Use minimum degree ordering in greedy MIS algorithm
1943: . -pc_gamg_pc_gamg_asm_hem_aggs n - Number of HEM aggregation steps for ASM smoother
1944: - -pc_gamg_aggressive_mis_k n - Number (k) distance in MIS coarsening (>2 is 'aggressive')
1946: Level: intermediate
1948: Notes:
1949: To obtain good performance for `PCGAMG` for vector valued problems you must
1950: call `MatSetBlockSize()` to indicate the number of degrees of freedom per grid point.
1951: Call `MatSetNearNullSpace()` (or `PCSetCoordinates()` if solving the equations of elasticity) to indicate the near null space of the operator
1953: The many options for `PCMG` and `PCGAMG` such as controlling the smoothers on each level etc. also work for `PCGAMGAGG`
1955: .seealso: `PCGAMG`, [the Users Manual section on PCGAMG](sec_amg), [the Users Manual section on PCMG](sec_mg), [](ch_ksp), `PCCreate()`, `PCSetType()`,
1956: `MatSetBlockSize()`, `PCMGType`, `PCSetCoordinates()`, `MatSetNearNullSpace()`, `PCGAMGSetType()`,
1957: `PCGAMGAGG`, `PCGAMGGEO`, `PCGAMGCLASSICAL`, `PCGAMGSetProcEqLim()`, `PCGAMGSetCoarseEqLim()`, `PCGAMGSetRepartition()`, `PCGAMGRegister()`,
1958: `PCGAMGSetReuseInterpolation()`, `PCGAMGASMSetUseAggs()`, `PCGAMGSetParallelCoarseGridSolve()`, `PCGAMGSetNlevels()`, `PCGAMGSetThreshold()`,
1959: `PCGAMGGetType()`, `PCGAMGSetUseSAEstEig()`
1960: M*/
1961: PetscErrorCode PCCreateGAMG_AGG(PC pc)
1962: {
1963: PC_MG *mg = (PC_MG *)pc->data;
1964: PC_GAMG *pc_gamg = (PC_GAMG *)mg->innerctx;
1965: PC_GAMG_AGG *pc_gamg_agg;
1967: PetscFunctionBegin;
1968: /* create sub context for SA */
1969: PetscCall(PetscNew(&pc_gamg_agg));
1970: pc_gamg->subctx = pc_gamg_agg;
1972: pc_gamg->ops->setfromoptions = PCSetFromOptions_GAMG_AGG;
1973: pc_gamg->ops->destroy = PCDestroy_GAMG_AGG;
1974: /* reset does not do anything; setup not virtual */
1976: /* set internal function pointers */
1977: pc_gamg->ops->creategraph = PCGAMGCreateGraph_AGG;
1978: pc_gamg->ops->coarsen = PCGAMGCoarsen_AGG;
1979: pc_gamg->ops->prolongator = PCGAMGConstructProlongator_AGG;
1980: pc_gamg->ops->optprolongator = PCGAMGOptimizeProlongator_AGG;
1981: pc_gamg->ops->createdefaultdata = PCSetData_AGG;
1982: pc_gamg->ops->view = PCView_GAMG_AGG;
1984: pc_gamg_agg->nsmooths = 1;
1985: pc_gamg_agg->aggressive_coarsening_levels = 1;
1986: pc_gamg_agg->use_aggressive_square_graph = PETSC_TRUE;
1987: pc_gamg_agg->use_minimum_degree_ordering = PETSC_FALSE;
1988: pc_gamg_agg->use_low_mem_filter = PETSC_FALSE;
1989: pc_gamg_agg->aggressive_mis_k = 2;
1990: pc_gamg_agg->graph_symmetrize = PETSC_TRUE;
1992: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetNSmooths_C", PCGAMGSetNSmooths_AGG));
1993: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveLevels_C", PCGAMGSetAggressiveLevels_AGG));
1994: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetAggressiveSquareGraph_C", PCGAMGSetAggressiveSquareGraph_AGG));
1995: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetMinDegreeOrdering_C", PCGAMGMISkSetMinDegreeOrdering_AGG));
1996: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetLowMemoryFilter_C", PCGAMGSetLowMemoryFilter_AGG));
1997: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGMISkSetAggressive_C", PCGAMGMISkSetAggressive_AGG));
1998: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetGraphSymmetrize_C", PCGAMGSetGraphSymmetrize_AGG));
1999: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGSetProlongatorFilter_C", PCGAMGSetProlongatorFilter_AGG));
2000: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCGAMGGetProlongatorFilter_C", PCGAMGGetProlongatorFilter_AGG));
2001: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSetCoordinates_C", PCSetCoordinates_AGG));
2002: PetscFunctionReturn(PETSC_SUCCESS);
2003: }