Actual source code: ibcgs.c
  1: #include <petsc/private/kspimpl.h>
  2: #include <petsc/private/vecimpl.h>

  4: static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
  5: {
  6:   PetscBool diagonalscale;

  8:   PetscFunctionBegin;
  9:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
 10:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
 11:   PetscCall(KSPSetWorkVecs(ksp, 9));
 12:   PetscFunctionReturn(PETSC_SUCCESS);
 13: }

 15: /*
 16:     The code below "cheats" from PETSc style
 17:        1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
 18:           restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
 19:           generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
 20:        2) The vector operations on done directly on the arrays instead of with VecXXXX() calls

 22:        For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
 23:      the exact same memory. We do this with macro defines so that compiler won't think they are
 24:      two different variables.

 26: */
 27: #define Xn_1 Xn
 28: #define xn_1 xn
 29: #define Rn_1 Rn
 30: #define rn_1 rn
 31: #define Un_1 Un
 32: #define un_1 un
 33: #define Vn_1 Vn
 34: #define vn_1 vn
 35: #define Qn_1 Qn
 36: #define qn_1 qn
 37: #define Zn_1 Zn
 38: #define zn_1 zn
 39: static PetscErrorCode KSPSolve_IBCGS(KSP ksp)
 40: {
 41:   PetscInt  i, N;
 42:   PetscReal rnorm = 0.0, rnormin = 0.0;
 43: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
 44:   /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem
 45:      on the same number of processes  with different runs) we support computing the inner products using Intel's 80 bit arithmetic
 46:      rather than just 64-bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
 47:      and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.

 49:      Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
 50:      precision into a 16 byte space (the rest of the space is ignored)  */
 51:   long double insums[7], outsums[7];
 52: #else
 53:   PetscScalar insums[7], outsums[7];
 54: #endif
 55:   PetscScalar                       sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin, tmp1, tmp2;
 56:   PetscScalar                       taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
 57:   const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
 58:   PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
 59:   /* the rest do not have to keep n_1 values */
 60:   PetscScalar                       kappan, thetan, etan, gamman, betan, deltan;
 61:   const PetscScalar *PETSC_RESTRICT tn;
 62:   PetscScalar *PETSC_RESTRICT       sn;
 63:   Vec                               R0, Rn, Xn, F0, Vn, Zn, Qn, Tn, Sn, B, Un;
 64:   Mat                               A;

 66:   PetscFunctionBegin;
 67:   PetscCheck(ksp->vec_rhs->petscnative, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Only coded for PETSc vectors");

 69: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
 70:   /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that
 71:      valgrind won't detect MPI_Allreduce() with uninitialized data */
 72:   PetscCall(PetscMemzero(insums, sizeof(insums)));
 73:   PetscCall(PetscMemzero(insums, sizeof(insums)));
 74: #endif

 76:   PetscCall(PCGetOperators(ksp->pc, &A, NULL));
 77:   PetscCall(VecGetLocalSize(ksp->vec_sol, &N));
 78:   Xn = ksp->vec_sol;
 79:   PetscCall(VecGetArray(Xn_1, (PetscScalar **)&xn_1));
 80:   PetscCall(VecRestoreArray(Xn_1, NULL));
 81:   B = ksp->vec_rhs;
 82:   PetscCall(VecGetArrayRead(B, (const PetscScalar **)&b));
 83:   PetscCall(VecRestoreArrayRead(B, NULL));
 84:   R0 = ksp->work[0];
 85:   PetscCall(VecGetArrayRead(R0, (const PetscScalar **)&r0));
 86:   PetscCall(VecRestoreArrayRead(R0, NULL));
 87:   Rn = ksp->work[1];
 88:   PetscCall(VecGetArray(Rn_1, (PetscScalar **)&rn_1));
 89:   PetscCall(VecRestoreArray(Rn_1, NULL));
 90:   Un = ksp->work[2];
 91:   PetscCall(VecGetArrayRead(Un_1, (const PetscScalar **)&un_1));
 92:   PetscCall(VecRestoreArrayRead(Un_1, NULL));
 93:   F0 = ksp->work[3];
 94:   PetscCall(VecGetArrayRead(F0, (const PetscScalar **)&f0));
 95:   PetscCall(VecRestoreArrayRead(F0, NULL));
 96:   Vn = ksp->work[4];
 97:   PetscCall(VecGetArray(Vn_1, (PetscScalar **)&vn_1));
 98:   PetscCall(VecRestoreArray(Vn_1, NULL));
 99:   Zn = ksp->work[5];
100:   PetscCall(VecGetArray(Zn_1, (PetscScalar **)&zn_1));
101:   PetscCall(VecRestoreArray(Zn_1, NULL));
102:   Qn = ksp->work[6];
103:   PetscCall(VecGetArrayRead(Qn_1, (const PetscScalar **)&qn_1));
104:   PetscCall(VecRestoreArrayRead(Qn_1, NULL));
105:   Tn = ksp->work[7];
106:   PetscCall(VecGetArrayRead(Tn, (const PetscScalar **)&tn));
107:   PetscCall(VecRestoreArrayRead(Tn, NULL));
108:   Sn = ksp->work[8];
109:   PetscCall(VecGetArrayRead(Sn, (const PetscScalar **)&sn));
110:   PetscCall(VecRestoreArrayRead(Sn, NULL));

112:   /* r0 = rn_1 = b - A*xn_1; */
113:   /* PetscCall(KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn));
114:      PetscCall(VecAYPX(Rn_1,-1.0,B)); */
115:   PetscCall(KSPInitialResidual(ksp, Xn_1, Tn, Sn, Rn_1, B));
116:   if (ksp->normtype != KSP_NORM_NONE) {
117:     PetscCall(VecNorm(Rn_1, NORM_2, &rnorm));
118:     KSPCheckNorm(ksp, rnorm);
119:   }
120:   PetscCall(KSPMonitor(ksp, 0, rnorm));
121:   PetscCall((*ksp->converged)(ksp, 0, rnorm, &ksp->reason, ksp->cnvP));
122:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

124:   PetscCall(VecCopy(Rn_1, R0));

126:   /* un_1 = A*rn_1; */
127:   PetscCall(KSP_PCApplyBAorAB(ksp, Rn_1, Un_1, Tn));

129:   /* f0   = A'*rn_1; */
130:   if (ksp->pc_side == PC_RIGHT) { /* B' A' */
131:     PetscCall(KSP_MatMultTranspose(ksp, A, R0, Tn));
132:     PetscCall(KSP_PCApplyTranspose(ksp, Tn, F0));
133:   } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
134:     PetscCall(KSP_PCApplyTranspose(ksp, R0, Tn));
135:     PetscCall(KSP_MatMultTranspose(ksp, A, Tn, F0));
136:   }

138:   /*qn_1 = vn_1 = zn_1 = 0.0; */
139:   PetscCall(VecSet(Qn_1, 0.0));
140:   PetscCall(VecSet(Vn_1, 0.0));
141:   PetscCall(VecSet(Zn_1, 0.0));

143:   sigman_2 = pin_1 = taun_1 = 0.0;

145:   /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
146:   PetscCall(VecDot(R0, R0, &phin_1));
147:   KSPCheckDot(ksp, phin_1);

149:   /* sigman_1 = rn_1'un_1  */
150:   PetscCall(VecDot(R0, Un_1, &sigman_1));

152:   alphan_1 = omegan_1 = 1.0;

154:   for (ksp->its = 1; ksp->its < ksp->max_it + 1; ksp->its++) {
155:     rhon = phin_1 - omegan_1 * sigman_2 + omegan_1 * alphan_1 * pin_1;
156:     if (ksp->its == 1) deltan = rhon;
157:     else deltan = rhon / taun_1;
158:     betan = deltan / omegan_1;
159:     taun  = sigman_1 + betan * taun_1 - deltan * pin_1;
160:     if (taun == 0.0) {
161:       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to taun is zero, iteration %" PetscInt_FMT, ksp->its);
162:       ksp->reason = KSP_DIVERGED_NANORINF;
163:       PetscFunctionReturn(PETSC_SUCCESS);
164:     }
165:     alphan = rhon / taun;
166:     PetscCall(PetscLogFlops(15.0));

168:     /*
169:         zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
170:         vn = un_1 + betan*vn_1 - deltan*qn_1
171:         sn = rn_1 - alphan*vn

173:        The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
174:     */
175:     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
176:     tmp1 = (alphan / alphan_1) * betan;
177:     tmp2 = alphan * deltan;
178:     for (i = 0; i < N; i++) {
179:       zn[i] = alphan * rn_1[i] + tmp1 * zn_1[i] - tmp2 * vn_1[i];
180:       vn[i] = un_1[i] + betan * vn_1[i] - deltan * qn_1[i];
181:       sn[i] = rn_1[i] - alphan * vn[i];
182:     }
183:     PetscCall(PetscLogFlops(3.0 + 11.0 * N));
184:     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));

186:     /*
187:         qn = A*vn
188:     */
189:     PetscCall(KSP_PCApplyBAorAB(ksp, Vn, Qn, Tn));

191:     /*
192:         tn = un_1 - alphan*qn
193:     */
194:     PetscCall(VecWAXPY(Tn, -alphan, Qn, Un_1));

196:     /*
197:         phin = r0'sn
198:         pin  = r0'qn
199:         gamman = f0'sn
200:         etan   = f0'tn
201:         thetan = sn'tn
202:         kappan = tn'tn
203:     */
204:     PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
205:     phin = pin = gamman = etan = thetan = kappan = 0.0;
206:     for (i = 0; i < N; i++) {
207:       phin += r0[i] * sn[i];
208:       pin += r0[i] * qn[i];
209:       gamman += f0[i] * sn[i];
210:       etan += f0[i] * tn[i];
211:       thetan += sn[i] * tn[i];
212:       kappan += tn[i] * tn[i];
213:     }
214:     PetscCall(PetscLogFlops(12.0 * N));
215:     PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));

217:     insums[0] = phin;
218:     insums[1] = pin;
219:     insums[2] = gamman;
220:     insums[3] = etan;
221:     insums[4] = thetan;
222:     insums[5] = kappan;
223:     insums[6] = rnormin;

225:     PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
226: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
227:     if (ksp->lagnorm && ksp->its > 1) {
228:       PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
229:     } else {
230:       PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPI_LONG_DOUBLE, MPI_SUM, PetscObjectComm((PetscObject)ksp)));
231:     }
232: #else
233:     if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) {
234:       PetscCallMPI(MPIU_Allreduce(insums, outsums, 7, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
235:     } else {
236:       PetscCallMPI(MPIU_Allreduce(insums, outsums, 6, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
237:     }
238: #endif
239:     PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
240:     phin   = outsums[0];
241:     pin    = outsums[1];
242:     gamman = outsums[2];
243:     etan   = outsums[3];
244:     thetan = outsums[4];
245:     kappan = outsums[5];
246:     if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));

248:     if (kappan == 0.0) {
249:       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to kappan is zero, iteration %" PetscInt_FMT, ksp->its);
250:       ksp->reason = KSP_DIVERGED_NANORINF;
251:       PetscFunctionReturn(PETSC_SUCCESS);
252:     }
253:     if (thetan == 0.0) {
254:       PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "KSPSolve has not converged due to thetan is zero, iteration %" PetscInt_FMT, ksp->its);
255:       ksp->reason = KSP_DIVERGED_NANORINF;
256:       PetscFunctionReturn(PETSC_SUCCESS);
257:     }
258:     omegan = thetan / kappan;
259:     sigman = gamman - omegan * etan;

261:     /*
262:         rn = sn - omegan*tn
263:         xn = xn_1 + zn + omegan*sn
264:     */
265:     PetscCall(PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0));
266:     rnormin = 0.0;
267:     for (i = 0; i < N; i++) {
268:       rn[i] = sn[i] - omegan * tn[i];
269:       rnormin += PetscRealPart(PetscConj(rn[i]) * rn[i]);
270:       xn[i] += zn[i] + omegan * sn[i];
271:     }
272:     PetscCall(PetscObjectStateIncrease((PetscObject)Xn));
273:     PetscCall(PetscLogFlops(7.0 * N));
274:     PetscCall(PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0));

276:     if (!ksp->lagnorm && ksp->chknorm < ksp->its && ksp->normtype != KSP_NORM_NONE) {
277:       PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
278:       PetscCallMPI(MPIU_Allreduce(&rnormin, &rnorm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)ksp)));
279:       PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
280:       rnorm = PetscSqrtReal(rnorm);
281:     }

283:     /* Test for convergence */
284:     PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
285:     PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
286:     if (ksp->reason) {
287:       PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
288:       PetscFunctionReturn(PETSC_SUCCESS);
289:     }

291:     /* un = A*rn */
292:     PetscCall(KSP_PCApplyBAorAB(ksp, Rn, Un, Tn));

294:     /* Update n-1 locations with n locations */
295:     sigman_2 = sigman_1;
296:     sigman_1 = sigman;
297:     pin_1    = pin;
298:     phin_1   = phin;
299:     alphan_1 = alphan;
300:     taun_1   = taun;
301:     omegan_1 = omegan;
302:   }
303:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
304:   PetscCall(KSPUnwindPreconditioner(ksp, Xn, Tn));
305:   PetscFunctionReturn(PETSC_SUCCESS);
306: }

308: /*MC
309:    KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient) method {cite}`yang:brent:2002`
310:    in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)

312:    Level: beginner

314:    Notes:
315:    Supports left and right preconditioning

317:    See `KSPBCGSL` for additional stabilization

319:    Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
320:    before the iteration starts.

322:    The paper has two errors in the algorithm presented, they are fixed in the code in `KSPSolve_IBCGS()`

324:    For maximum reduction in the number of global reduction operations, this solver should be used with
325:    `KSPSetLagNorm()`.

327:    This is not supported for complex numbers.

329: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPBCGSL`, `KSPIBCGS`, `KSPSetLagNorm()`
330: M*/

332: PETSC_EXTERN PetscErrorCode KSPCreate_IBCGS(KSP ksp)
333: {
334:   PetscFunctionBegin;
335:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
336:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
337:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));

339:   ksp->ops->setup          = KSPSetUp_IBCGS;
340:   ksp->ops->solve          = KSPSolve_IBCGS;
341:   ksp->ops->destroy        = KSPDestroyDefault;
342:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
343:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
344:   ksp->ops->setfromoptions = NULL;
345:   ksp->ops->view           = NULL;
346: #if defined(PETSC_USE_COMPLEX)
347:   SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "This is not supported for complex numbers");
348: #else
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: #endif
351: }