Actual source code: bcgsl.c

  1: /*
  2:    Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  3:    "Enhanced implementation of BiCGStab(L) for solving linear systems
  4:    of equations". This uses tricky delayed updating ideas to prevent
  5:    round-off buildup.
  6: */
  7: #include <petsc/private/kspimpl.h>
  8: #include <../src/ksp/ksp/impls/bcgsl/bcgslimpl.h>
  9: #include <petscblaslapack.h>

 11: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
 12: {
 13:   KSP_BCGSL   *bcgsl = (KSP_BCGSL *)ksp->data;
 14:   PetscScalar  alpha, beta, omega, sigma;
 15:   PetscScalar  rho0, rho1;
 16:   PetscReal    kappa0, kappaA, kappa1;
 17:   PetscReal    ghat;
 18:   PetscReal    zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 19:   PetscBool    bUpdateX;
 20:   PetscInt     maxit;
 21:   PetscInt     h, i, j, k, vi, ell;
 22:   PetscBLASInt ldMZ, bierr;
 23:   PetscScalar  utb;
 24:   PetscReal    max_s, pinv_tol;

 26:   PetscFunctionBegin;
 27:   /* set up temporary vectors */
 28:   vi        = 0;
 29:   ell       = bcgsl->ell;
 30:   bcgsl->vB = ksp->work[vi];
 31:   vi++;
 32:   bcgsl->vRt = ksp->work[vi];
 33:   vi++;
 34:   bcgsl->vTm = ksp->work[vi];
 35:   vi++;
 36:   bcgsl->vvR = ksp->work + vi;
 37:   vi += ell + 1;
 38:   bcgsl->vvU = ksp->work + vi;
 39:   vi += ell + 1;
 40:   bcgsl->vXr = ksp->work[vi];
 41:   vi++;
 42:   PetscCall(PetscBLASIntCast(ell + 1, &ldMZ));

 44:   /* Prime the iterative solver */
 45:   PetscCall(KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs));
 46:   PetscCall(VecNorm(VVR[0], NORM_2, &zeta0));
 47:   KSPCheckNorm(ksp, zeta0);
 48:   rnmax_computed = zeta0;
 49:   rnmax_true     = zeta0;

 51:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
 52:   ksp->its = 0;
 53:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta0;
 54:   else ksp->rnorm = 0.0;
 55:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
 56:   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
 57:   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
 58:   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
 59:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

 61:   PetscCall(VecSet(VVU[0], 0.0));
 62:   alpha = 0.;
 63:   rho0 = omega = 1;

 65:   if (bcgsl->delta > 0.0) {
 66:     PetscCall(VecCopy(VX, VXR));
 67:     PetscCall(VecSet(VX, 0.0));
 68:     PetscCall(VecCopy(VVR[0], VB));
 69:   } else {
 70:     PetscCall(VecCopy(ksp->vec_rhs, VB));
 71:   }

 73:   /* Life goes on */
 74:   PetscCall(VecCopy(VVR[0], VRT));
 75:   zeta = zeta0;

 77:   PetscCall(KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit));

 79:   for (k = 0; k < maxit; k += bcgsl->ell) {
 80:     ksp->its = k;
 81:     if (k > 0) {
 82:       if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
 83:       else ksp->rnorm = 0.0;

 85:       PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
 86:       PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));

 88:       PetscCall((*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP));
 89:       if (ksp->reason < 0) PetscFunctionReturn(PETSC_SUCCESS);
 90:       if (ksp->reason) {
 91:         if (bcgsl->delta > 0.0) PetscCall(VecAXPY(VX, 1.0, VXR));
 92:         PetscFunctionReturn(PETSC_SUCCESS);
 93:       }
 94:     }

 96:     /* BiCG part */
 97:     rho0 = -omega * rho0;
 98:     nrm0 = zeta;
 99:     for (j = 0; j < bcgsl->ell; j++) {
100:       /* rho1 <- r_j' * r_tilde */
101:       PetscCall(VecDot(VVR[j], VRT, &rho1));
102:       KSPCheckDot(ksp, rho1);
103:       if (rho1 == 0.0) {
104:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
105:         PetscFunctionReturn(PETSC_SUCCESS);
106:       }
107:       beta = alpha * (rho1 / rho0);
108:       rho0 = rho1;
109:       for (i = 0; i <= j; i++) {
110:         /* u_i <- r_i - beta*u_i */
111:         PetscCall(VecAYPX(VVU[i], -beta, VVR[i]));
112:       }
113:       /* u_{j+1} <- inv(K)*A*u_j */
114:       PetscCall(KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j + 1], VTM));

116:       PetscCall(VecDot(VVU[j + 1], VRT, &sigma));
117:       KSPCheckDot(ksp, sigma);
118:       if (sigma == 0.0) {
119:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
120:         PetscFunctionReturn(PETSC_SUCCESS);
121:       }
122:       alpha = rho1 / sigma;

124:       /* x <- x + alpha*u_0 */
125:       PetscCall(VecAXPY(VX, alpha, VVU[0]));

127:       for (i = 0; i <= j; i++) {
128:         /* r_i <- r_i - alpha*u_{i+1} */
129:         PetscCall(VecAXPY(VVR[i], -alpha, VVU[i + 1]));
130:       }

132:       /* r_{j+1} <- inv(K)*A*r_j */
133:       PetscCall(KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j + 1], VTM));

135:       PetscCall(VecNorm(VVR[0], NORM_2, &nrm0));
136:       KSPCheckNorm(ksp, nrm0);
137:       if (bcgsl->delta > 0.0) {
138:         if (rnmax_computed < nrm0) rnmax_computed = nrm0;
139:         if (rnmax_true < nrm0) rnmax_true = nrm0;
140:       }

142:       /* NEW: check for early exit */
143:       PetscCall((*ksp->converged)(ksp, k + j, nrm0, &ksp->reason, ksp->cnvP));
144:       if (ksp->reason) {
145:         PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
146:         ksp->its   = k + j;
147:         ksp->rnorm = nrm0;

149:         PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
150:         if (ksp->reason < 0) PetscFunctionReturn(PETSC_SUCCESS);
151:       }
152:     }

154:     /* Polynomial part */
155:     for (i = 0; i <= bcgsl->ell; ++i) PetscCall(VecMDot(VVR[i], i + 1, VVR, &MZa[i * ldMZ]));
156:     /* Symmetrize MZa */
157:     for (i = 0; i <= bcgsl->ell; ++i) {
158:       for (j = i + 1; j <= bcgsl->ell; ++j) MZa[i * ldMZ + j] = MZa[j * ldMZ + i] = PetscConj(MZa[j * ldMZ + i]);
159:     }
160:     /* Copy MZa to MZb */
161:     PetscCall(PetscArraycpy(MZb, MZa, ldMZ * ldMZ));

163:     if (!bcgsl->bConvex || bcgsl->ell == 1) {
164:       PetscBLASInt ione = 1, bell;
165:       PetscCall(PetscBLASIntCast(bcgsl->ell, &bell));

167:       AY0c[0] = -1;
168:       if (bcgsl->pinv) {
169: #if defined(PETSC_USE_COMPLEX)
170:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "A", &bell, &bell, &MZa[1 + ldMZ], &ldMZ, bcgsl->s, bcgsl->u, &bell, bcgsl->v, &bell, bcgsl->work, &bcgsl->lwork, bcgsl->realwork, &bierr));
171: #else
172:         PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "A", &bell, &bell, &MZa[1 + ldMZ], &ldMZ, bcgsl->s, bcgsl->u, &bell, bcgsl->v, &bell, bcgsl->work, &bcgsl->lwork, &bierr));
173: #endif
174:         if (bierr != 0) {
175:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
176:           PetscFunctionReturn(PETSC_SUCCESS);
177:         }
178:         /* Apply pseudo-inverse */
179:         max_s = bcgsl->s[0];
180:         for (i = 1; i < bell; i++) {
181:           if (bcgsl->s[i] > max_s) max_s = bcgsl->s[i];
182:         }
183:         /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
184:         pinv_tol = bell * max_s * PETSC_MACHINE_EPSILON;
185:         PetscCall(PetscArrayzero(&AY0c[1], bell));
186:         for (i = 0; i < bell; i++) {
187:           if (bcgsl->s[i] >= pinv_tol) {
188:             utb = 0.;
189:             for (j = 0; j < bell; j++) utb += MZb[1 + j] * bcgsl->u[i * bell + j];

191:             for (j = 0; j < bell; j++) AY0c[1 + j] += utb / bcgsl->s[i] * bcgsl->v[j * bell + i];
192:           }
193:         }
194:       } else {
195:         PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("Lower", &bell, &MZa[1 + ldMZ], &ldMZ, &bierr));
196:         if (bierr != 0) {
197:           ksp->reason = KSP_DIVERGED_BREAKDOWN;
198:           PetscFunctionReturn(PETSC_SUCCESS);
199:         }
200:         PetscCall(PetscArraycpy(&AY0c[1], &MZb[1], bcgsl->ell));
201:         PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &bell, &ione, &MZa[1 + ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
202:       }
203:     } else {
204:       PetscBLASInt ione = 1;
205:       PetscScalar  aone = 1.0, azero = 0.0;
206:       PetscBLASInt neqs;
207:       PetscCall(PetscBLASIntCast(bcgsl->ell - 1, &neqs));

209:       PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("Lower", &neqs, &MZa[1 + ldMZ], &ldMZ, &bierr));
210:       if (bierr != 0) {
211:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
212:         PetscFunctionReturn(PETSC_SUCCESS);
213:       }
214:       PetscCall(PetscArraycpy(&AY0c[1], &MZb[1], bcgsl->ell - 1));
215:       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1 + ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
216:       AY0c[0]          = -1;
217:       AY0c[bcgsl->ell] = 0.;

219:       PetscCall(PetscArraycpy(&AYlc[1], &MZb[1 + ldMZ * (bcgsl->ell)], bcgsl->ell - 1));
220:       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1 + ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

222:       AYlc[0]          = 0.;
223:       AYlc[bcgsl->ell] = -1;

225:       PetscCallBLAS("BLASgemv", BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

227:       kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

229:       /* round-off can cause negative kappa's */
230:       if (kappa0 < 0) kappa0 = -kappa0;
231:       kappa0 = PetscSqrtReal(kappa0);

233:       kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

235:       PetscCallBLAS("BLASgemv", BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

237:       kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

239:       if (kappa1 < 0) kappa1 = -kappa1;
240:       kappa1 = PetscSqrtReal(kappa1);

242:       if (kappa0 != 0.0 && kappa1 != 0.0) {
243:         if (kappaA < 0.7 * kappa0 * kappa1) {
244:           ghat = (kappaA < 0.0) ? -0.7 * kappa0 / kappa1 : 0.7 * kappa0 / kappa1;
245:         } else {
246:           ghat = kappaA / (kappa1 * kappa1);
247:         }
248:         for (i = 0; i <= bcgsl->ell; i++) AY0c[i] = AY0c[i] - ghat * AYlc[i];
249:       }
250:     }

252:     omega = AY0c[bcgsl->ell];
253:     for (h = bcgsl->ell; h > 0 && omega == 0.0; h--) omega = AY0c[h];
254:     if (omega == 0.0) {
255:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
256:       PetscFunctionReturn(PETSC_SUCCESS);
257:     }

259:     PetscCall(VecMAXPY(VX, bcgsl->ell, AY0c + 1, VVR));
260:     for (i = 1; i <= bcgsl->ell; i++) AY0c[i] *= -1.0;
261:     PetscCall(VecMAXPY(VVU[0], bcgsl->ell, AY0c + 1, VVU + 1));
262:     PetscCall(VecMAXPY(VVR[0], bcgsl->ell, AY0c + 1, VVR + 1));
263:     for (i = 1; i <= bcgsl->ell; i++) AY0c[i] *= -1.0;
264:     PetscCall(VecNorm(VVR[0], NORM_2, &zeta));
265:     KSPCheckNorm(ksp, zeta);

267:     /* Accurate Update */
268:     if (bcgsl->delta > 0.0) {
269:       if (rnmax_computed < zeta) rnmax_computed = zeta;
270:       if (rnmax_true < zeta) rnmax_true = zeta;

272:       bUpdateX = (PetscBool)(zeta < bcgsl->delta * zeta0 && zeta0 <= rnmax_computed);
273:       if ((zeta < bcgsl->delta * rnmax_true && zeta0 <= rnmax_true) || bUpdateX) {
274:         /* r0 <- b-inv(K)*A*X */
275:         PetscCall(KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM));
276:         PetscCall(VecAYPX(VVR[0], -1.0, VB));
277:         rnmax_true = zeta;

279:         if (bUpdateX) {
280:           PetscCall(VecAXPY(VXR, 1.0, VX));
281:           PetscCall(VecSet(VX, 0.0));
282:           PetscCall(VecCopy(VVR[0], VB));
283:           rnmax_computed = zeta;
284:         }
285:       }
286:     }
287:   }
288:   if (bcgsl->delta > 0.0) PetscCall(VecAXPY(VX, 1.0, VXR));

290:   ksp->its = k;
291:   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = zeta;
292:   else ksp->rnorm = 0.0;
293:   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
294:   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
295:   PetscCall((*ksp->converged)(ksp, k, ksp->rnorm, &ksp->reason, ksp->cnvP));
296:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
297:   PetscFunctionReturn(PETSC_SUCCESS);
298: }

300: /*@
301:   KSPBCGSLSetXRes - Sets the parameter governing when
302:   exact residuals will be used instead of computed residuals for `KSPCBGSL`.

304:   Logically Collective

306:   Input Parameters:
307: + ksp   - iterative context of type `KSPBCGSL`
308: - delta - computed residuals are used alone when delta is not positive

310:   Options Database Key:
311: . -ksp_bcgsl_xres delta - Threshold used to decide when to refresh computed residuals

313:   Level: intermediate

315: .seealso: [](ch_ksp), `KSPBCGSLSetEll()`, `KSPBCGSLSetPol()`, `KSP`, `KSPCBGSL`, `KSPBCGSLSetUsePseudoinverse()`
316: @*/
317: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
318: {
319:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;

321:   PetscFunctionBegin;
323:   if (ksp->setupstage) {
324:     if ((delta <= 0 && bcgsl->delta > 0) || (delta > 0 && bcgsl->delta <= 0)) {
325:       PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
326:       PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
327:       PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));
328:       ksp->setupstage = KSP_SETUP_NEW;
329:     }
330:   }
331:   bcgsl->delta = delta;
332:   PetscFunctionReturn(PETSC_SUCCESS);
333: }

335: /*@
336:   KSPBCGSLSetUsePseudoinverse - Use pseudoinverse (via SVD) to solve polynomial part of the update in `KSPCBGSL` solver

338:   Logically Collective

340:   Input Parameters:
341: + ksp      - iterative context of type `KSPCBGSL`
342: - use_pinv - set to `PETSC_TRUE` when using pseudoinverse

344:   Options Database Key:
345: . -ksp_bcgsl_pinv <true,false> - use pseudoinverse

347:   Level: intermediate

349: .seealso: [](ch_ksp), `KSPBCGSLSetEll()`, `KSP`, `KSPCBGSL`, `KSPBCGSLSetPol()`, `KSPBCGSLSetXRes()`
350: @*/
351: PetscErrorCode KSPBCGSLSetUsePseudoinverse(KSP ksp, PetscBool use_pinv)
352: {
353:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;

355:   PetscFunctionBegin;
356:   bcgsl->pinv = use_pinv;
357:   PetscFunctionReturn(PETSC_SUCCESS);
358: }

360: /*@
361:   KSPBCGSLSetPol - Sets the type of polynomial part that will
362:   be used in the `KSPCBGSL` `KSPSolve()`

364:   Logically Collective

366:   Input Parameters:
367: + ksp   - iterative context of type `KSPCBGSL`
368: - uMROR - set to `PETSC_TRUE` when the polynomial is a convex combination of an MR and an OR step.

370:   Options Database Keys:
371: + -ksp_bcgsl_cxpoly - use enhanced polynomial
372: - -ksp_bcgsl_mrpoly - use standard polynomial

374:   Level: intermediate

376: .seealso: [](ch_ksp), `KSP`, `KSPBCGSL`, `KSPCreate()`, `KSPSetType()`, `KSPCBGSL`, `KSPBCGSLSetUsePseudoinverse()`, `KSPBCGSLSetEll()`, `KSPBCGSLSetXRes()`
377: @*/
378: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscBool uMROR)
379: {
380:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;

382:   PetscFunctionBegin;

385:   if (!ksp->setupstage) bcgsl->bConvex = uMROR;
386:   else if (bcgsl->bConvex != uMROR) {
387:     /* free the data structures,
388:        then create them again
389:      */
390:     PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
391:     PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
392:     PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));

394:     bcgsl->bConvex  = uMROR;
395:     ksp->setupstage = KSP_SETUP_NEW;
396:   }
397:   PetscFunctionReturn(PETSC_SUCCESS);
398: }

400: /*@
401:   KSPBCGSLSetEll - Sets the number of search directions to use in the `KSPCBGSL` solver

403:   Logically Collective

405:   Input Parameters:
406: + ksp - iterative context of type `KSPCBGSL`
407: - ell - number of search directions

409:   Options Database Key:
410: . -ksp_bcgsl_ell ell - Number of Krylov search directions

412:   Level: intermediate

414:   Notes:
415:   For large `ell` it is common for the polynomial update problem to become singular (due to happy breakdown for smallish
416:   test problems, but also for larger problems). Consequently, by default, the system is solved by pseudoinverse, which
417:   allows the iteration to complete successfully. See `KSPBCGSLSetUsePseudoinverse()` to switch to a conventional solve.

419: .seealso: [](ch_ksp), `KSPBCGSLSetUsePseudoinverse()`, `KSP`, `KSPBCGSL`, `KSPBCGSLSetPol()`, `KSPBCGSLSetXRes()`
420: @*/
421: PetscErrorCode KSPBCGSLSetEll(KSP ksp, PetscInt ell)
422: {
423:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;

425:   PetscFunctionBegin;
426:   PetscCheck(ell >= 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

429:   if (!ksp->setupstage) bcgsl->ell = ell;
430:   else if (bcgsl->ell != ell) {
431:     /* free the data structures, then create them again */
432:     PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
433:     PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
434:     PetscCall(PetscFree4(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v));

436:     bcgsl->ell      = ell;
437:     ksp->setupstage = KSP_SETUP_NEW;
438:   }
439:   PetscFunctionReturn(PETSC_SUCCESS);
440: }

442: static PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
443: {
444:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
445:   PetscBool  isascii;

447:   PetscFunctionBegin;
448:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));

450:   if (isascii) {
451:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Ell = %" PetscInt_FMT "\n", bcgsl->ell));
452:     PetscCall(PetscViewerASCIIPrintf(viewer, "  Delta = %g\n", (double)bcgsl->delta));
453:   }
454:   PetscFunctionReturn(PETSC_SUCCESS);
455: }

457: static PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp, PetscOptionItems *PetscOptionsObject)
458: {
459:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
460:   PetscInt   this_ell;
461:   PetscReal  delta;
462:   PetscBool  flga = PETSC_FALSE, flg;

464:   PetscFunctionBegin;
465:   PetscOptionsHeadBegin(PetscOptionsObject, "KSPBCGSL Options");

467:   /* Set number of search directions */
468:   PetscCall(PetscOptionsInt("-ksp_bcgsl_ell", "Number of Krylov search directions", "KSPBCGSLSetEll", bcgsl->ell, &this_ell, &flg));
469:   if (flg) PetscCall(KSPBCGSLSetEll(ksp, this_ell));

471:   /* Set polynomial type */
472:   PetscCall(PetscOptionsBool("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga, &flga, NULL));
473:   if (flga) {
474:     PetscCall(KSPBCGSLSetPol(ksp, PETSC_TRUE));
475:   } else {
476:     flg = PETSC_FALSE;
477:     PetscCall(PetscOptionsBool("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg, &flg, NULL));
478:     PetscCall(KSPBCGSLSetPol(ksp, PETSC_FALSE));
479:   }

481:   /* Will computed residual be refreshed? */
482:   PetscCall(PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg));
483:   if (flg) PetscCall(KSPBCGSLSetXRes(ksp, delta));

485:   /* Use pseudoinverse? */
486:   flg = bcgsl->pinv;
487:   PetscCall(PetscOptionsBool("-ksp_bcgsl_pinv", "Polynomial correction via pseudoinverse", "KSPBCGSLSetUsePseudoinverse", flg, &flg, NULL));
488:   PetscCall(KSPBCGSLSetUsePseudoinverse(ksp, flg));
489:   PetscOptionsHeadEnd();
490:   PetscFunctionReturn(PETSC_SUCCESS);
491: }

493: static PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
494: {
495:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
496:   PetscInt   ell = bcgsl->ell, ldMZ = ell + 1;

498:   PetscFunctionBegin;
499:   PetscCall(KSPSetWorkVecs(ksp, 6 + 2 * ell));
500:   PetscCall(PetscMalloc5(ldMZ, &AY0c, ldMZ, &AYlc, ldMZ, &AYtc, ldMZ * ldMZ, &MZa, ldMZ * ldMZ, &MZb));
501:   PetscCall(PetscBLASIntCast(5 * ell, &bcgsl->lwork));
502:   PetscCall(PetscMalloc5(bcgsl->lwork, &bcgsl->work, ell, &bcgsl->s, ell * ell, &bcgsl->u, ell * ell, &bcgsl->v, 5 * ell, &bcgsl->realwork));
503:   PetscFunctionReturn(PETSC_SUCCESS);
504: }

506: static PetscErrorCode KSPReset_BCGSL(KSP ksp)
507: {
508:   KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;

510:   PetscFunctionBegin;
511:   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));
512:   PetscCall(PetscFree5(AY0c, AYlc, AYtc, MZa, MZb));
513:   PetscCall(PetscFree5(bcgsl->work, bcgsl->s, bcgsl->u, bcgsl->v, bcgsl->realwork));
514:   PetscFunctionReturn(PETSC_SUCCESS);
515: }

517: static PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
518: {
519:   PetscFunctionBegin;
520:   PetscCall(KSPReset_BCGSL(ksp));
521:   PetscCall(KSPDestroyDefault(ksp));
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: /*MC
526:     KSPBCGSL - Implements a slight variant of the Enhanced BiCGStab(L) algorithm in {cite}`fokkema1996enhanced`
527:                 and {cite}`sleijpen1994bicgstab`, see also {cite}`sleijpen1995overview`. The variation
528:                 concerns cases when either kappa0**2 or kappa1**2 is
529:                 negative due to round-off. Kappa0 has also been pulled
530:                 out of the denominator in the formula for ghat.

532:    Options Database Keys:
533: +  -ksp_bcgsl_ell <ell>         - Number of Krylov search directions, defaults to 2, cf. `KSPBCGSLSetEll()`
534: .  -ksp_bcgsl_cxpol             - Use a convex function of the MinRes and OR polynomials after the BiCG step instead of default MinRes, cf. `KSPBCGSLSetPol()`
535: .  -ksp_bcgsl_mrpoly            - Use the default MinRes polynomial after the BiCG step, cf. `KSPBCGSLSetPol()`
536: .  -ksp_bcgsl_xres <res>        - Threshold used to decide when to refresh computed residuals, cf. `KSPBCGSLSetXRes()`
537: -  -ksp_bcgsl_pinv <true/false> - (de)activate use of pseudoinverse, cf. `KSPBCGSLSetUsePseudoinverse()`

539:    Level: intermediate

541:    Contributed by:
542:    Joel M. Malard, email jm.malard@pnl.gov

544:    Note:
545:    The "sub-iterations" of this solver are not reported by `-ksp_monitor` or recorded in `KSPSetResidualHistory()` since the solution is not directly computed for
546:    these sub-iterations.

548:    Developer Notes:
549:    This has not been completely cleaned up into PETSc style.

551:    All the BLAS and LAPACK calls in the source should be removed and replaced with loops and the macros for block solvers converted from LINPACK.

553: .seealso: [](ch_ksp), `KSPFBCGS`, `KSPFBCGSR`, `KSPBCGS`, `KSPPIPEBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPBCGS`, `KSPSetPCSide()`,
554:           `KSPBCGSLSetEll()`, `KSPBCGSLSetXRes()`, `KSPBCGSLSetUsePseudoinverse()`, `KSPBCGSLSetPol()`
555: M*/
556: PETSC_EXTERN PetscErrorCode KSPCreate_BCGSL(KSP ksp)
557: {
558:   KSP_BCGSL *bcgsl;

560:   PetscFunctionBegin;
561:   /* allocate BiCGStab(L) context */
562:   PetscCall(PetscNew(&bcgsl));
563:   ksp->data = (void *)bcgsl;

565:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
566:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
567:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));

569:   ksp->ops->setup          = KSPSetUp_BCGSL;
570:   ksp->ops->solve          = KSPSolve_BCGSL;
571:   ksp->ops->reset          = KSPReset_BCGSL;
572:   ksp->ops->destroy        = KSPDestroy_BCGSL;
573:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
574:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
575:   ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
576:   ksp->ops->view           = KSPView_BCGSL;

578:   /* Let the user redefine the number of directions vectors */
579:   bcgsl->ell = 2;

581:   /*Choose between a single MR step or an averaged MR/OR */
582:   bcgsl->bConvex = PETSC_FALSE;

584:   bcgsl->pinv = PETSC_TRUE; /* Use the reliable method by default */

586:   /* Set the threshold for when exact residuals will be used */
587:   bcgsl->delta = 0.0;
588:   PetscFunctionReturn(PETSC_SUCCESS);
589: }