Actual source code: ex4.c

  1: static char help[] = "Applies the 2023 preconditioner of Benzi and Faccio\n\n";

  3: #include <petscmat.h>
  4: #include <petscviewer.h>
  5: #include <petscvec.h>
  6: #include <petscis.h>
  7: #include <petscksp.h>

  9: /*
 10:  * This example reproduces the preconditioner outlined in Benzi's paper
 11:  * https://doi.org/10.1137/22M1505529. The problem considered is:
 12:  *
 13:  * (A + gamma UU^T)x = b
 14:  *
 15:  * whose structure arises from, for example, grad-div stabilization in the
 16:  * Navier-Stokes momentum equation. In the code we will also refer to
 17:  * gamma UU^T as J. The preconditioner developed by Benzi is:
 18:  *
 19:  * P_alpha = (A + alpha I)(alpha I + gamma UU^T)
 20:  *
 21:  * Another variant which may yield better convergence depending on the specific
 22:  * problem is
 23:  *
 24:  * P_alpha = (A + alpha D) D^-1 (alpha D + gamma UU^T)
 25:  *
 26:  * where D = diag(A + gamma UU^T). This is the variant implemented
 27:  * here. Application of the preconditioner involves (approximate) solution of
 28:  * two systems, one with (A + alpha D), and another with (alpha D + gamma
 29:  * UU^T). For small alpha (which generally yields the best overall
 30:  * preconditioner), (alpha D + gamma UU^T) is ill-conditioned. To combat this we
 31:  * solve (alpha D + gamma UU^T) using the Sherman-Morrison-Woodbury (SMW) matrix
 32:  * identity, which effectively converts the grad-div structure to a much nicer
 33:  * div-grad (laplacian) structure.
 34:  *
 35:  * The matrices used as input can be generated by running the matlab/octave
 36:  * program IFISS. The particular matrices checked into the datafiles repository
 37:  * and used in testing of this example correspond to a leaky lid-driven cavity
 38:  * with a stretched grid and Q2-Q1 finite elements. The matrices are taken from
 39:  * the last iteration of a Picard solve with tolerance 1e-8 with a viscosity of
 40:  * 0.1 and a 32x32 grid. We summarize below iteration counts from running this
 41:  * preconditioner for different grids and viscosity with a KSP tolerance of 1e-6.
 42:  *
 43:  *       32x32 64x64 128x128
 44:  * 0.1   28    36    43
 45:  * 0.01  59    75    73
 46:  * 0.002 136   161   167
 47:  *
 48:  * A reader of Benzi's paper will note that the performance shown above with
 49:  * respect to decreasing viscosity is significantly worse than in the
 50:  * paper. This is actually because of the choice of RHS. In Benzi's work, the
 51:  * RHS was generated by multiplying the operator with a vector of 1s whereas
 52:  * here we generate the RHS using a random vector. The iteration counts from the
 53:  * Benzi paper can be reproduced by changing the RHS generation in this example,
 54:  * but we choose to use the more difficult RHS as the resulting performance may
 55:  * more closely match what users experience in "physical" contexts.
 56:  */

 58: PetscErrorCode CreateAndLoadMat(const char *mat_name, Mat *mat)
 59: {
 60:   PetscViewer viewer;
 61:   char        file[PETSC_MAX_PATH_LEN];
 62:   char        flag_name[10] = "-f";
 63:   PetscBool   flg;

 65:   PetscFunctionBeginUser;
 66:   PetscCall(PetscOptionsGetString(NULL, NULL, strcat(flag_name, mat_name), file, sizeof(file), &flg));
 67:   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -f<mat_name> option");
 68:   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &viewer));
 69:   PetscCall(MatCreate(PETSC_COMM_WORLD, mat));
 70:   PetscCall(MatSetType(*mat, MATAIJ));
 71:   PetscCall(PetscObjectSetName((PetscObject)*mat, mat_name));
 72:   PetscCall(MatSetFromOptions(*mat));
 73:   PetscCall(MatLoad(*mat, viewer));
 74:   PetscCall(PetscViewerDestroy(&viewer));
 75:   PetscFunctionReturn(PETSC_SUCCESS);
 76: }

 78: typedef struct {
 79:   Mat       U, UT, D, aD, aDinv, I_plus_gammaUTaDinvU;
 80:   PC        smw_cholesky;
 81:   PetscReal gamma, alpha;
 82:   PetscBool setup_called;
 83: } SmwPCCtx;

 85: PetscErrorCode SmwSetup(PC pc)
 86: {
 87:   SmwPCCtx *ctx;
 88:   Vec       aDVec;

 90:   PetscFunctionBeginUser;
 91:   PetscCall(PCShellGetContext(pc, &ctx));

 93:   if (ctx->setup_called) PetscFunctionReturn(PETSC_SUCCESS);

 95:   // Create aD
 96:   PetscCall(MatDuplicate(ctx->D, MAT_COPY_VALUES, &ctx->aD));
 97:   PetscCall(MatScale(ctx->aD, ctx->alpha));

 99:   // Create aDinv
100:   PetscCall(MatDuplicate(ctx->aD, MAT_DO_NOT_COPY_VALUES, &ctx->aDinv));
101:   PetscCall(MatCreateVecs(ctx->aD, &aDVec, NULL));
102:   PetscCall(MatGetDiagonal(ctx->aD, aDVec));
103:   PetscCall(VecReciprocal(aDVec));
104:   PetscCall(MatDiagonalSet(ctx->aDinv, aDVec, INSERT_VALUES));

106:   // Create UT
107:   PetscCall(MatTranspose(ctx->U, MAT_INITIAL_MATRIX, &ctx->UT));

109:   // Create sum Mat
110:   PetscCall(MatMatMatMult(ctx->UT, ctx->aDinv, ctx->U, MAT_INITIAL_MATRIX, PETSC_CURRENT, &ctx->I_plus_gammaUTaDinvU));
111:   PetscCall(MatScale(ctx->I_plus_gammaUTaDinvU, ctx->gamma));
112:   PetscCall(MatShift(ctx->I_plus_gammaUTaDinvU, 1.));

114:   PetscCall(PCCreate(PETSC_COMM_WORLD, &ctx->smw_cholesky));
115:   PetscCall(PCSetType(ctx->smw_cholesky, PCCHOLESKY));
116:   PetscCall(PCSetOperators(ctx->smw_cholesky, ctx->I_plus_gammaUTaDinvU, ctx->I_plus_gammaUTaDinvU));
117:   PetscCall(PCSetOptionsPrefix(ctx->smw_cholesky, "smw_"));
118:   PetscCall(PCSetFromOptions(ctx->smw_cholesky));
119:   PetscCall(PCSetUp(ctx->smw_cholesky));

121:   PetscCall(VecDestroy(&aDVec));

123:   ctx->setup_called = PETSC_TRUE;
124:   PetscFunctionReturn(PETSC_SUCCESS);
125: }

127: PetscErrorCode SmwApply(PC pc, Vec x, Vec y)
128: {
129:   SmwPCCtx *ctx;
130:   Vec       vel0, pressure0, pressure1;

132:   PetscFunctionBeginUser;
133:   PetscCall(PCShellGetContext(pc, &ctx));

135:   PetscCall(MatCreateVecs(ctx->UT, &vel0, &pressure0));
136:   PetscCall(VecDuplicate(pressure0, &pressure1));

138:   // First term
139:   PetscCall(MatMult(ctx->aDinv, x, vel0));
140:   PetscCall(MatMult(ctx->UT, vel0, pressure0));
141:   PetscCall(PCApply(ctx->smw_cholesky, pressure0, pressure1));
142:   PetscCall(MatMult(ctx->U, pressure1, vel0));
143:   PetscCall(MatMult(ctx->aDinv, vel0, y));
144:   PetscCall(VecScale(y, -ctx->gamma));

146:   // Second term
147:   PetscCall(MatMult(ctx->aDinv, x, vel0));

149:   PetscCall(VecAXPY(y, 1, vel0));

151:   PetscCall(VecDestroy(&vel0));
152:   PetscCall(VecDestroy(&pressure0));
153:   PetscCall(VecDestroy(&pressure1));
154:   PetscFunctionReturn(PETSC_SUCCESS);
155: }

157: int main(int argc, char **args)
158: {
159:   Mat               A, B, Q, Acondensed, Bcondensed, BT, J, AplusJ, QInv, D, AplusD, JplusD, U;
160:   Mat               AplusJarray[2];
161:   Vec               bound, x, b, Qdiag, DVec;
162:   PetscBool         flg;
163:   PetscViewer       viewer;
164:   char              file[PETSC_MAX_PATH_LEN];
165:   PetscInt         *boundary_indices;
166:   PetscInt          boundary_indices_size, am, an, bm, bn, condensed_am, astart, aend, Dstart, Dend, num_local_bnd_dofs = 0;
167:   const PetscScalar zero = 0;
168:   IS                boundary_is, bulk_is;
169:   KSP               ksp;
170:   PC                pc, pcA, pcJ;
171:   PetscRandom       rctx;
172:   PetscReal        *boundary_indices_values;
173:   PetscReal         gamma = 100, alpha = .01;
174:   PetscMPIInt       rank;
175:   SmwPCCtx          ctx;

177:   PetscFunctionBeginUser;
178:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
179:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));

181:   PetscCall(CreateAndLoadMat("A", &A));
182:   PetscCall(CreateAndLoadMat("B", &B));
183:   PetscCall(CreateAndLoadMat("Q", &Q));

185:   PetscCall(PetscOptionsGetString(NULL, NULL, "-fbound", file, sizeof(file), &flg));
186:   PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER, "Must indicate file with the -fbound option");

188:   if (rank == 0) {
189:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, file, FILE_MODE_READ, &viewer));
190:     PetscCall(VecCreate(PETSC_COMM_SELF, &bound));
191:     PetscCall(PetscObjectSetName((PetscObject)bound, "bound"));
192:     PetscCall(VecSetType(bound, VECSEQ));
193:     PetscCall(VecLoad(bound, viewer));
194:     PetscCall(PetscViewerDestroy(&viewer));
195:     PetscCall(VecGetLocalSize(bound, &boundary_indices_size));
196:     PetscCall(VecGetArray(bound, &boundary_indices_values));
197:   }
198:   PetscCallMPI(MPI_Bcast(&boundary_indices_size, 1, MPIU_INT, 0, PETSC_COMM_WORLD));
199:   if (rank != 0) PetscCall(PetscMalloc1(boundary_indices_size, &boundary_indices_values));
200:   PetscCallMPI(MPI_Bcast(boundary_indices_values, (PetscMPIInt)boundary_indices_size, MPIU_SCALAR, 0, PETSC_COMM_WORLD));

202:   PetscCall(MatGetSize(A, &am, NULL));
203:   // The total number of dofs for a given velocity component
204:   const PetscInt nc = am / 2;
205:   PetscCall(MatGetOwnershipRange(A, &astart, &aend));

207:   PetscCall(PetscMalloc1(2 * boundary_indices_size, &boundary_indices));

209:   //
210:   // The dof index ordering appears to be all vx dofs and then all vy dofs.
211:   //

213:   // First do vx
214:   for (PetscInt i = 0; i < boundary_indices_size; ++i) {
215:     // MATLAB uses 1-based indexing
216:     const PetscInt bnd_dof = (PetscInt)boundary_indices_values[i] - 1;
217:     if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
218:   }

220:   // Now vy
221:   for (PetscInt i = 0; i < boundary_indices_size; ++i) {
222:     // MATLAB uses 1-based indexing
223:     const PetscInt bnd_dof = ((PetscInt)boundary_indices_values[i] - 1) + nc;
224:     if ((bnd_dof >= astart) && (bnd_dof < aend)) boundary_indices[num_local_bnd_dofs++] = bnd_dof;
225:   }
226:   if (rank == 0) PetscCall(VecRestoreArray(bound, &boundary_indices_values));
227:   else PetscCall(PetscFree(boundary_indices_values));

229:   PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, num_local_bnd_dofs, boundary_indices, PETSC_USE_POINTER, &boundary_is));
230:   PetscCall(ISComplement(boundary_is, astart, aend, &bulk_is));

232:   PetscCall(MatCreateSubMatrix(A, bulk_is, bulk_is, MAT_INITIAL_MATRIX, &Acondensed));
233:   // Can't pass null for row index set :-(
234:   PetscCall(MatTranspose(B, MAT_INPLACE_MATRIX, &B));
235:   PetscCall(MatCreateSubMatrix(B, bulk_is, NULL, MAT_INITIAL_MATRIX, &Bcondensed));
236:   PetscCall(MatGetLocalSize(Acondensed, &am, &an));
237:   PetscCall(MatGetLocalSize(Bcondensed, &bm, &bn));

239:   // Create QInv
240:   PetscCall(MatCreateVecs(Q, &Qdiag, NULL));
241:   PetscCall(MatGetDiagonal(Q, Qdiag));
242:   PetscCall(VecReciprocal(Qdiag));
243:   PetscCall(MatDuplicate(Q, MAT_DO_NOT_COPY_VALUES, &QInv));
244:   PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
245:   PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
246:   PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));

248:   // Create J
249:   PetscCall(MatTranspose(Bcondensed, MAT_INITIAL_MATRIX, &BT));
250:   PetscCall(MatMatMatMult(Bcondensed, QInv, BT, MAT_INITIAL_MATRIX, PETSC_CURRENT, &J));
251:   PetscCall(MatScale(J, gamma));

253:   // Create sum of A + J
254:   AplusJarray[0] = Acondensed;
255:   AplusJarray[1] = J;
256:   PetscCall(MatCreateComposite(PETSC_COMM_WORLD, 2, AplusJarray, &AplusJ));

258:   // Create decomposition matrices
259:   // We've already used Qdiag, which currently represents Q^-1,  for its necessary purposes. Let's
260:   // convert it to represent Q^(-1/2)
261:   PetscCall(VecSqrtAbs(Qdiag));
262:   // We can similarly reuse Qinv
263:   PetscCall(MatDiagonalSet(QInv, Qdiag, INSERT_VALUES));
264:   PetscCall(MatAssemblyBegin(QInv, MAT_FINAL_ASSEMBLY));
265:   PetscCall(MatAssemblyEnd(QInv, MAT_FINAL_ASSEMBLY));
266:   // Create U
267:   PetscCall(MatMatMult(Bcondensed, QInv, MAT_INITIAL_MATRIX, PETSC_CURRENT, &U));

269:   // Create x and b
270:   PetscCall(MatCreateVecs(AplusJ, &x, &b));
271:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
272:   PetscCall(VecSetRandom(x, rctx));
273:   PetscCall(PetscRandomDestroy(&rctx));
274:   PetscCall(MatMult(AplusJ, x, b));

276:   // Compute preconditioner operators
277:   PetscCall(MatGetLocalSize(Acondensed, &condensed_am, NULL));
278:   PetscCall(MatCreate(PETSC_COMM_WORLD, &D));
279:   PetscCall(MatSetType(D, MATAIJ));
280:   PetscCall(MatSetSizes(D, condensed_am, condensed_am, PETSC_DETERMINE, PETSC_DETERMINE));
281:   PetscCall(MatGetOwnershipRange(D, &Dstart, &Dend));
282:   for (PetscInt i = Dstart; i < Dend; ++i) PetscCall(MatSetValues(D, 1, &i, 1, &i, &zero, INSERT_VALUES));
283:   PetscCall(MatAssemblyBegin(D, MAT_FINAL_ASSEMBLY));
284:   PetscCall(MatAssemblyEnd(D, MAT_FINAL_ASSEMBLY));
285:   PetscCall(MatCreateVecs(D, &DVec, NULL));
286:   PetscCall(MatGetDiagonal(AplusJ, DVec));
287:   PetscCall(MatDiagonalSet(D, DVec, INSERT_VALUES));
288:   PetscCall(MatDuplicate(Acondensed, MAT_COPY_VALUES, &AplusD));
289:   PetscCall(MatAXPY(AplusD, alpha, D, SUBSET_NONZERO_PATTERN));
290:   PetscCall(MatDuplicate(J, MAT_COPY_VALUES, &JplusD));
291:   PetscCall(MatAXPY(JplusD, alpha, D, SUBSET_NONZERO_PATTERN));

293:   // Initialize our SMW context
294:   ctx.U            = U;
295:   ctx.D            = D;
296:   ctx.gamma        = gamma;
297:   ctx.alpha        = alpha;
298:   ctx.setup_called = PETSC_FALSE;

300:   // Set preconditioner operators
301:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
302:   PetscCall(KSPSetType(ksp, KSPFGMRES));
303:   PetscCall(KSPSetOperators(ksp, AplusJ, AplusJ));
304:   PetscCall(KSPSetNormType(ksp, KSP_NORM_UNPRECONDITIONED));
305:   PetscCall(KSPGMRESSetRestart(ksp, 300));
306:   PetscCall(KSPGetPC(ksp, &pc));
307:   PetscCall(PCSetType(pc, PCCOMPOSITE));
308:   PetscCall(PCCompositeSetType(pc, PC_COMPOSITE_SPECIAL));
309:   PetscCall(PCCompositeAddPCType(pc, PCILU));
310:   PetscCall(PCCompositeAddPCType(pc, PCSHELL));
311:   PetscCall(PCCompositeGetPC(pc, 0, &pcA));
312:   PetscCall(PCCompositeGetPC(pc, 1, &pcJ));
313:   PetscCall(PCSetOperators(pcA, AplusD, AplusD));
314:   PetscCall(PCSetOperators(pcJ, JplusD, JplusD));
315:   PetscCall(PCShellSetContext(pcJ, &ctx));
316:   PetscCall(PCShellSetApply(pcJ, SmwApply));
317:   PetscCall(PCShellSetSetUp(pcJ, SmwSetup));
318:   PetscCall(PCCompositeSpecialSetAlphaMat(pc, D));

320:   // Solve
321:   PetscCall(KSPSetFromOptions(ksp));
322:   PetscCall(KSPSolve(ksp, b, x));

324:   PetscCall(MatDestroy(&A));
325:   PetscCall(MatDestroy(&B));
326:   PetscCall(MatDestroy(&Q));
327:   PetscCall(MatDestroy(&Acondensed));
328:   PetscCall(MatDestroy(&Bcondensed));
329:   PetscCall(MatDestroy(&BT));
330:   PetscCall(MatDestroy(&J));
331:   PetscCall(MatDestroy(&AplusJ));
332:   PetscCall(MatDestroy(&QInv));
333:   PetscCall(MatDestroy(&D));
334:   PetscCall(MatDestroy(&AplusD));
335:   PetscCall(MatDestroy(&JplusD));
336:   PetscCall(MatDestroy(&U));
337:   if (rank == 0) PetscCall(VecDestroy(&bound));
338:   PetscCall(VecDestroy(&x));
339:   PetscCall(VecDestroy(&b));
340:   PetscCall(VecDestroy(&Qdiag));
341:   PetscCall(VecDestroy(&DVec));
342:   PetscCall(ISDestroy(&boundary_is));
343:   PetscCall(ISDestroy(&bulk_is));
344:   PetscCall(KSPDestroy(&ksp));
345:   PetscCall(PetscFree(boundary_indices));
346:   PetscCall(MatDestroy(&ctx.UT));
347:   PetscCall(MatDestroy(&ctx.I_plus_gammaUTaDinvU));
348:   PetscCall(MatDestroy(&ctx.aD));
349:   PetscCall(MatDestroy(&ctx.aDinv));
350:   PetscCall(PCDestroy(&ctx.smw_cholesky));

352:   PetscCall(PetscFinalize());
353:   return 0;
354: }

356: /*TEST

358:    build:
359:       requires: !complex

361:    test:
362:       args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
363:       requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double

365:    test:
366:       suffix: 2
367:       nsize: 2
368:       args: -fA ${DATAFILESPATH}/matrices/ifiss/A -fB ${DATAFILESPATH}/matrices/ifiss/B -fQ ${DATAFILESPATH}/matrices/ifiss/Q -fbound ${DATAFILESPATH}/is/ifiss/bound -ksp_monitor
369:       requires: datafilespath defined(PETSC_USE_64BIT_INDICES) !complex double strumpack

371: TEST*/