Actual source code: ex4.c
1: static char help[] = "Applies the 2023 preconditioner of Benzi and Faccio\n\n";
3: #include <petscmat.h>
4: #include <petscviewer.h>
5: #include <petscvec.h>
6: #include <petscis.h>
7: #include <petscksp.h>
9: /*
10: * This example reproduces the preconditioner outlined in Benzi's paper
11: * https://doi.org/10.1137/22M1505529. The problem considered is:
12: *
13: * (A + gamma UU^T)x = b
14: *
15: * whose structure arises from, for example, grad-div stabilization in the
16: * Navier-Stokes momentum equation. In the code we will also refer to
17: * gamma UU^T as J. The preconditioner developed by Benzi is:
18: *
19: * P_alpha = (A + alpha I)(alpha I + gamma UU^T)
20: *
21: * Another variant which may yield better convergence depending on the specific
22: * problem is
23: *
24: * P_alpha = (A + alpha D) D^-1 (alpha D + gamma UU^T)
25: *
26: * where D = diag(A + gamma UU^T). This is the variant implemented
27: * here. Application of the preconditioner involves (approximate) solution of
28: * two systems, one with (A + alpha D), and another with (alpha D + gamma
29: * UU^T). For small alpha (which generally yields the best overall
30: * preconditioner), (alpha D + gamma UU^T) is ill-conditioned. To combat this we
31: * solve (alpha D + gamma UU^T) using the Sherman-Morrison-Woodbury (SMW) matrix
32: * identity, which effectively converts the grad-div structure to a much nicer
33: * div-grad (laplacian) structure.
34: *
35: * The matrices used as input can be generated by running the matlab/octave
36: * program IFISS. The particular matrices checked into the datafiles repository
37: * and used in testing of this example correspond to a leaky lid-driven cavity
38: * with a stretched grid and Q2-Q1 finite elements. The matrices are taken from
39: * the last iteration of a Picard solve with tolerance 1e-8 with a viscosity of
40: * 0.1 and a 32x32 grid. We summarize below iteration counts from running this
41: * preconditioner for different grids and viscosity with a KSP tolerance of 1e-6.
42: *
43: * 32x32 64x64 128x128
44: * 0.1 28 36 43
45: * 0.01 59 75 73
46: * 0.002 136 161 167
47: *
48: * A reader of Benzi's paper will note that the performance shown above with
49: * respect to decreasing viscosity is significantly worse than in the
50: * paper. This is actually because of the choice of RHS. In Benzi's work, the
51: * RHS was generated by multiplying the operator with a vector of 1s whereas
52: * here we generate the RHS using a random vector. The iteration counts from the
53: * Benzi paper can be reproduced by changing the RHS generation in this example,
54: * but we choose to use the more difficult RHS as the resulting performance may
55: * more closely match what users experience in "physical" contexts.
56: */
58: PetscErrorCode CreateAndLoadMat(const char *mat_name, Mat *mat)
59: {
60: PetscViewer viewer;
61: char file[PETSC_MAX_PATH_LEN];
62: char flag_name[10] = "-f";
63: PetscBool flg;
65: PetscFunctionBeginUser;
66: PetscCall(PetscOptionsGetString(NULL, NULL, strcat(flag_name, mat_name), file, sizeof(file), &flg));
67: PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -f<mat_name> option");
68: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
69: PetscCall(MatCreate(PETSC_COMM_WORLD, mat));
70: PetscCall(MatSetType(*mat, MATAIJ));
71: PetscCall(PetscObjectSetName((PetscObject)*mat, mat_name));
72: PetscCall(MatSetFromOptions(*mat));
73: PetscCall(MatLoad(*mat, viewer));
74: PetscCall(PetscViewerDestroy(&viewer));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: typedef struct {
79: Mat U, UT, D, aD, aDinv, I_plus_gammaUTaDinvU;
80: PC smw_cholesky;
81: PetscReal gamma, alpha;
82: PetscBool setup_called;
83: } SmwPCCtx;
85: PetscErrorCode SmwSetup(PC pc)
86: {
87: SmwPCCtx *ctx;
88: Vec aDVec;
90: PetscFunctionBeginUser;
91: PetscCall(PCShellGetContext(pc, &ctx));
93: if (ctx->setup_called) PetscFunctionReturn(PETSC_SUCCESS);
95: // Create aD
96: PetscCall(MatDuplicate(ctx->D, MAT_COPY_VALUES, &ctx->aD));
97: PetscCall(MatScale(ctx->aD, ctx->alpha));
99: // Create aDinv
100: PetscCall(MatDuplicate(ctx->aD, MAT_DO_NOT_COPY_VALUES, &ctx->aDinv));
101: PetscCall(MatCreateVecs(ctx->aD, &aDVec, NULL));
102: PetscCall(MatGetDiagonal(ctx->aD, aDVec));
103: PetscCall(VecReciprocal(aDVec));
104: PetscCall(MatDiagonalSet(ctx->aDinv, aDVec, INSERT_VALUES));
106: // Create UT
107: PetscCall(MatTranspose(ctx->U, MAT_INITIAL_MATRIX, &ctx->UT));
109: // Create sum Mat
110: PetscCall(MatMatMatMult(ctx->UT, ctx->aDinv, ctx->U, MAT_INITIAL_MATRIX, PETSC_CURRENT, &ctx->I_plus_gammaUTaDinvU));
111: PetscCall(MatScale(ctx->I_plus_gammaUTaDinvU, ctx->gamma));
112: PetscCall(MatShift(ctx->I_plus_gammaUTaDinvU, 1.));
114: PetscCall(PCCreate(PETSC_COMM_WORLD, &ctx->smw_cholesky));
115: PetscCall(PCSetType(ctx->smw_cholesky, PCCHOLESKY));
116: PetscCall(PCSetOperators(ctx->smw_cholesky, ctx->I_plus_gammaUTaDinvU, ctx->I_plus_gammaUTaDinvU));
117: PetscCall(PCSetOptionsPrefix(ctx->smw_cholesky, "smw_"));
118: PetscCall(PCSetFromOptions(ctx->smw_cholesky));
119: PetscCall(PCSetUp(ctx->smw_cholesky));
121: PetscCall(VecDestroy(&aDVec));
123: ctx->setup_called = PETSC_TRUE;
124: PetscFunctionReturn(PETSC_SUCCESS);
125: }
127: PetscErrorCode SmwApply(PC pc, Vec x, Vec y)
128: {
129: SmwPCCtx *ctx;
130: Vec vel0, pressure0, pressure1;
132: PetscFunctionBeginUser;
133: PetscCall(PCShellGetContext(pc, &ctx));
135: PetscCall(MatCreateVecs(ctx->UT, &vel0, &pressure0));
136: PetscCall(VecDuplicate(pressure0, &pressure1));
138: // First term
139: PetscCall(MatMult(ctx->aDinv, x, vel0));
140: PetscCall(MatMult(ctx->UT, vel0, pressure0));
141: PetscCall(PCApply(ctx->smw_cholesky, pressure0, pressure1));
142: PetscCall(MatMult(ctx->U, pressure1, vel0));
143: PetscCall(MatMult(ctx->aDinv, vel0, y));
144: PetscCall(VecScale(y, -ctx->gamma));
146: // Second term
147: PetscCall(MatMult(ctx->aDinv, x, vel0));
149: PetscCall(VecAXPY(y, 1, vel0));
151: PetscCall(VecDestroy(&vel0));
152: PetscCall(VecDestroy(&pressure0));
153: PetscCall(VecDestroy(&pressure1));
154: PetscFunctionReturn(PETSC_SUCCESS);
155: }
157: int main(int argc, char **args)
158: {
159: Mat A, B, Q, Acondensed, Bcondensed, BT, J, AplusJ, QInv, D, AplusD, JplusD, U;
160: Mat AplusJarray[2];
161: Vec bound, x, b, Qdiag, DVec;
162: PetscBool flg;
163: PetscViewer viewer;
164: char file[PETSC_MAX_PATH_LEN];
165: PetscInt *boundary_indices;
166: PetscInt boundary_indices_size, am, an, bm, bn, condensed_am, astart, aend, Dstart, Dend, num_local_bnd_dofs = 0;
167: const PetscScalar zero = 0;
168: IS boundary_is, bulk_is;
169: KSP ksp;
170: PC pc, pcA, pcJ;
171: PetscRandom rctx;
172: PetscReal *boundary_indices_values;
173: PetscReal gamma = 100, alpha = .01;
174: PetscMPIInt rank;
175: SmwPCCtx ctx;
177: PetscFunctionBeginUser;
178: PetscCall(PetscInitialize(&argc, &args, NULL, help));
179: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
181: PetscCall(CreateAndLoadMat("A", &A));
182: PetscCall(CreateAndLoadMat("B", &B));
183: PetscCall(CreateAndLoadMat("Q", &Q));
185: PetscCall(PetscOptionsGetString(NULL, NULL, "-fbound", file, sizeof(file), &flg));
186: PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -fbound option");
188: if (rank == 0) {
189: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, file, FILE_MODE_READ, &viewer));
190: PetscCall(VecCreate(PETSC_COMM_SELF, &bound));
191: PetscCall(PetscObjectSetName((PetscObject)bound, "bound"));
192: PetscCall(VecSetType(bound, VECSEQ));
193: PetscCall(VecLoad(bound, viewer));
194: PetscCall(PetscViewerDestroy(&viewer));
195: PetscCall(VecGetLocalSize(bound, &boundary_indices_size));
196: PetscCall(VecGetArray(bound, &boundary_indices_values));
197: }
198: PetscCallMPI(MPI_Bcast(&boundary_indices_size, 1, MPIU_INT, 0, PETSC_COMM_WORLD));
199: if (rank != 0) PetscCall(PetscMalloc1(boundary_indices_size, &boundary_indices_values));
200: PetscCallMPI(MPI_Bcast(boundary_indices_values, (PetscMPIInt)boundary_indices_size, MPIU_SCALAR, 0, PETSC_COMM_WORLD));
202: PetscCall(MatGetSize(A, &am, NULL));
203: // The total number of dofs for a given velocity component
204: const PetscInt nc = am / 2;
205: PetscCall(MatGetOwnershipRange(A, &astart, &aend));
207: PetscCall(PetscMalloc1(2 * boundary_indices_size, &boundary_indices));
209: //
210: // The dof index ordering appears to be all vx dofs and then all vy dofs.
211: //
213: // First do vx
214: for (PetscInt i = 0; i < boundary_indices_size; ++i) {
215: // MATLAB uses 1-based indexing
216: const PetscInt bnd_dof = (PetscInt)boundary_indices_values[i] - 1;
217: if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
218: }
220: // Now vy
221: for (PetscInt i = 0; i < boundary_indices_size; ++i) {
222: // MATLAB uses 1-based indexing
223: const PetscInt bnd_dof = ((PetscInt)boundary_indices_values[i] - 1) + nc;
224: if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
225: }
226: if (rank == 0) PetscCall(VecRestoreArray(bound, &boundary_indices_values));
227: else PetscCall(PetscFree(boundary_indices_values));
229: PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, num_local_bnd_dofs, boundary_indices, PETSC_USE_POINTER, &boundary_is));
230: PetscCall(ISComplement(boundary_is, astart, aend, &bulk_is));
232: PetscCall(MatCreateSubMatrix(A, bulk_is, bulk_is, MAT_INITIAL_MATRIX, &Acondensed));
233: // Can't pass null for row index set :-(
234: PetscCall(MatTranspose(B, MAT_INPLACE_MATRIX, &B));
235: PetscCall(MatCreateSubMatrix(B, bulk_is, NULL, MAT_INITIAL_MATRIX, &Bcondensed));
236: PetscCall(MatGetLocalSize(Acondensed, &am, &an));
237: PetscCall(MatGetLocalSize(Bcondensed, &bm, &bn));
239: // Create QInv
240: PetscCall(MatCreateVecs(Q, &Qdiag, NULL));
241: PetscCall(MatGetDiagonal(Q, Qdiag));
242: PetscCall(VecReciprocal(Qdiag));
243: PetscCall(MatDuplicate(Q, MAT_DO_NOT_COPY_VALUES, &QInv));
244: PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
245: PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
246: PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
248: // Create J
249: PetscCall(MatTranspose(Bcondensed, MAT_INITIAL_MATRIX, &BT));
250: PetscCall(MatMatMatMult(Bcondensed, QInv, BT, MAT_INITIAL_MATRIX, PETSC_CURRENT, &J));
251: PetscCall(MatScale(J, gamma));
253: // Create sum of A + J
254: AplusJarray[0] = Acondensed;
255: AplusJarray[1] = J;
256: PetscCall(MatCreateComposite(PETSC_COMM_WORLD, 2, AplusJarray, &AplusJ));
258: // Create decomposition matrices
259: // We've already used Qdiag, which currently represents Q^-1, for its necessary purposes. Let's
260: // convert it to represent Q^(-1/2)
261: PetscCall(VecSqrtAbs(Qdiag));
262: // We can similarly reuse Qinv
263: PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
264: PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
265: PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
266: // Create U
267: PetscCall(MatMatMult(Bcondensed, QInv, MAT_INITIAL_MATRIX, PETSC_CURRENT, &U));
269: // Create x and b
270: PetscCall(MatCreateVecs(AplusJ, &x, &b));
271: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
272: PetscCall(VecSetRandom(x, rctx));
273: PetscCall(PetscRandomDestroy(&rctx));
274: PetscCall(MatMult(AplusJ, x, b));
276: // Compute preconditioner operators
277: PetscCall(MatGetLocalSize(Acondensed, &condensed_am, NULL));
278: PetscCall(MatCreate(PETSC_COMM_WORLD, &D));
279: PetscCall(MatSetType(D, MATAIJ));
280: PetscCall(MatSetSizes(D, condensed_am, condensed_am, PETSC_DETERMINE, PETSC_DETERMINE));
281: PetscCall(MatGetOwnershipRange(D, &Dstart, &Dend));
282: for (PetscInt i = Dstart; i < Dend; ++i) PetscCall(MatSetValues(D, 1, &i, 1, &i, &zero, INSERT_VALUES));
283: PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY));
284: PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY));
285: PetscCall(MatCreateVecs(D, &DVec, NULL));
286: PetscCall(MatGetDiagonal(AplusJ, DVec));
287: PetscCall(MatDiagonalSet(D, DVec, INSERT_VALUES));
288: PetscCall(MatDuplicate(Acondensed, MAT_COPY_VALUES, &AplusD));
289: PetscCall(MatAXPY(AplusD, alpha, D, SUBSET_NONZERO_PATTERN));
290: PetscCall(MatDuplicate(J, MAT_COPY_VALUES, &JplusD));
291: PetscCall(MatAXPY(JplusD, alpha, D, SUBSET_NONZERO_PATTERN));
293: // Initialize our SMW context
294: ctx.U = U;
295: ctx.D = D;
296: ctx.gamma = gamma;
297: ctx.alpha = alpha;
298: ctx.setup_called = PETSC_FALSE;
300: // Set preconditioner operators
301: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
302: PetscCall(KSPSetType(ksp, KSPFGMRES));
303: PetscCall(KSPSetOperators(ksp, AplusJ, AplusJ));
304: PetscCall(KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED));
305: PetscCall(KSPGMRESSetRestart(ksp, 300));
306: PetscCall(KSPGetPC(ksp, &pc));
307: PetscCall(PCSetType(pc, PCCOMPOSITE));
308: PetscCall(PCCompositeSetType(pc, PC_COMPOSITE_SPECIAL));
309: PetscCall(PCCompositeAddPCType(pc, PCILU));
310: PetscCall(PCCompositeAddPCType(pc, PCSHELL));
311: PetscCall(PCCompositeGetPC(pc, 0, &pcA));
312: PetscCall(PCCompositeGetPC(pc, 1, &pcJ));
313: PetscCall(PCSetOperators(pcA, AplusD, AplusD));
314: PetscCall(PCSetOperators(pcJ, JplusD, JplusD));
315: PetscCall(PCShellSetContext(pcJ, &ctx));
316: PetscCall(PCShellSetApply(pcJ, SmwApply));
317: PetscCall(PCShellSetSetUp(pcJ, SmwSetup));
318: PetscCall(PCCompositeSpecialSetAlphaMat(pc, D));
320: // Solve
321: PetscCall(KSPSetFromOptions(ksp));
322: PetscCall(KSPSolve(ksp, b, x));
324: PetscCall(MatDestroy(&A));
325: PetscCall(MatDestroy(&B));
326: PetscCall(MatDestroy(&Q));
327: PetscCall(MatDestroy(&Acondensed));
328: PetscCall(MatDestroy(&Bcondensed));
329: PetscCall(MatDestroy(&BT));
330: PetscCall(MatDestroy(&J));
331: PetscCall(MatDestroy(&AplusJ));
332: PetscCall(MatDestroy(&QInv));
333: PetscCall(MatDestroy(&D));
334: PetscCall(MatDestroy(&AplusD));
335: PetscCall(MatDestroy(&JplusD));
336: PetscCall(MatDestroy(&U));
337: if (rank == 0) PetscCall(VecDestroy(&bound));
338: PetscCall(VecDestroy(&x));
339: PetscCall(VecDestroy(&b));
340: PetscCall(VecDestroy(&Qdiag));
341: PetscCall(VecDestroy(&DVec));
342: PetscCall(ISDestroy(&boundary_is));
343: PetscCall(ISDestroy(&bulk_is));
344: PetscCall(KSPDestroy(&ksp));
345: PetscCall(PetscFree(boundary_indices));
346: PetscCall(MatDestroy(&ctx.UT));
347: PetscCall(MatDestroy(&ctx.I_plus_gammaUTaDinvU));
348: PetscCall(MatDestroy(&ctx.aD));
349: PetscCall(MatDestroy(&ctx.aDinv));
350: PetscCall(PCDestroy(&ctx.smw_cholesky));
352: PetscCall(PetscFinalize());
353: return 0;
354: }
356: /*TEST
358: build:
359: requires: !complex
361: test:
362: args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
363: requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double
365: test:
366: suffix: 2
367: nsize: 2
368: args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
369: requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double strumpack
371: TEST*/