Actual source code: fbcgsr.c
2: /*
3: This file implements FBiCGStab-R.
4: FBiCGStab-R is a mathematically equivalent variant of FBiCGStab. Differences are:
5: (1) There are fewer MPI_Allreduce calls.
6: (2) The convergence occasionally is much faster than that of FBiCGStab.
7: */
8: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
9: #include <petsc/private/vecimpl.h>
11: static PetscErrorCode KSPSetUp_FBCGSR(KSP ksp)
12: {
13: KSPSetWorkVecs(ksp, 8);
14: return 0;
15: }
17: static PetscErrorCode KSPSolve_FBCGSR(KSP ksp)
18: {
19: PetscInt i, j, N;
20: PetscScalar tau, sigma, alpha, omega, beta;
21: PetscReal rho;
22: PetscScalar xi1, xi2, xi3, xi4;
23: Vec X, B, P, P2, RP, R, V, S, T, S2;
24: PetscScalar *PETSC_RESTRICT rp, *PETSC_RESTRICT r, *PETSC_RESTRICT p;
25: PetscScalar *PETSC_RESTRICT v, *PETSC_RESTRICT s, *PETSC_RESTRICT t, *PETSC_RESTRICT s2;
26: PetscScalar insums[4], outsums[4];
27: KSP_BCGS *bcgs = (KSP_BCGS *)ksp->data;
28: PC pc;
29: Mat mat;
32: VecGetLocalSize(ksp->vec_sol, &N);
34: X = ksp->vec_sol;
35: B = ksp->vec_rhs;
36: P2 = ksp->work[0];
38: /* The followings are involved in modified inner product calculations and vector updates */
39: RP = ksp->work[1];
40: VecGetArray(RP, (PetscScalar **)&rp);
41: VecRestoreArray(RP, NULL);
42: R = ksp->work[2];
43: VecGetArray(R, (PetscScalar **)&r);
44: VecRestoreArray(R, NULL);
45: P = ksp->work[3];
46: VecGetArray(P, (PetscScalar **)&p);
47: VecRestoreArray(P, NULL);
48: V = ksp->work[4];
49: VecGetArray(V, (PetscScalar **)&v);
50: VecRestoreArray(V, NULL);
51: S = ksp->work[5];
52: VecGetArray(S, (PetscScalar **)&s);
53: VecRestoreArray(S, NULL);
54: T = ksp->work[6];
55: VecGetArray(T, (PetscScalar **)&t);
56: VecRestoreArray(T, NULL);
57: S2 = ksp->work[7];
58: VecGetArray(S2, (PetscScalar **)&s2);
59: VecRestoreArray(S2, NULL);
61: /* Only supports right preconditioning */
63: if (!ksp->guess_zero) {
64: if (!bcgs->guess) VecDuplicate(X, &bcgs->guess);
65: VecCopy(X, bcgs->guess);
66: } else {
67: VecSet(X, 0.0);
68: }
70: /* Compute initial residual */
71: KSPGetPC(ksp, &pc);
72: PCSetUp(pc);
73: PCGetOperators(pc, &mat, NULL);
74: if (!ksp->guess_zero) {
75: KSP_MatMult(ksp, mat, X, P2); /* P2 is used as temporary storage */
76: VecCopy(B, R);
77: VecAXPY(R, -1.0, P2);
78: } else {
79: VecCopy(B, R);
80: }
82: /* Test for nothing to do */
83: VecNorm(R, NORM_2, &rho);
84: PetscObjectSAWsTakeAccess((PetscObject)ksp);
85: ksp->its = 0;
86: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
87: else ksp->rnorm = 0;
88: PetscObjectSAWsGrantAccess((PetscObject)ksp);
89: KSPLogResidualHistory(ksp, ksp->rnorm);
90: KSPMonitor(ksp, 0, ksp->rnorm);
91: (*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP);
92: if (ksp->reason) return 0;
94: /* Initialize iterates */
95: VecCopy(R, RP); /* rp <- r */
96: VecCopy(R, P); /* p <- r */
98: /* Big loop */
99: for (i = 0; i < ksp->max_it; i++) {
100: /* matmult and pc */
101: KSP_PCApply(ksp, P, P2); /* p2 <- K p */
102: KSP_MatMult(ksp, mat, P2, V); /* v <- A p2 */
104: /* inner prodcuts */
105: if (i == 0) {
106: tau = rho * rho;
107: VecDot(V, RP, &sigma); /* sigma <- (v,rp) */
108: } else {
109: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
110: tau = sigma = 0.0;
111: for (j = 0; j < N; j++) {
112: tau += r[j] * rp[j]; /* tau <- (r,rp) */
113: sigma += v[j] * rp[j]; /* sigma <- (v,rp) */
114: }
115: PetscLogFlops(4.0 * N);
116: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
117: insums[0] = tau;
118: insums[1] = sigma;
119: PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
120: MPIU_Allreduce(insums, outsums, 2, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
121: PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
122: tau = outsums[0];
123: sigma = outsums[1];
124: }
126: /* scalar update */
127: alpha = tau / sigma;
129: /* vector update */
130: VecWAXPY(S, -alpha, V, R); /* s <- r - alpha v */
132: /* matmult and pc */
133: KSP_PCApply(ksp, S, S2); /* s2 <- K s */
134: KSP_MatMult(ksp, mat, S2, T); /* t <- A s2 */
136: /* inner prodcuts */
137: PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0);
138: xi1 = xi2 = xi3 = xi4 = 0.0;
139: for (j = 0; j < N; j++) {
140: xi1 += s[j] * s[j]; /* xi1 <- (s,s) */
141: xi2 += t[j] * s[j]; /* xi2 <- (t,s) */
142: xi3 += t[j] * t[j]; /* xi3 <- (t,t) */
143: xi4 += t[j] * rp[j]; /* xi4 <- (t,rp) */
144: }
145: PetscLogFlops(8.0 * N);
146: PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0);
148: insums[0] = xi1;
149: insums[1] = xi2;
150: insums[2] = xi3;
151: insums[3] = xi4;
153: PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0);
154: MPIU_Allreduce(insums, outsums, 4, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)ksp));
155: PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0);
156: xi1 = outsums[0];
157: xi2 = outsums[1];
158: xi3 = outsums[2];
159: xi4 = outsums[3];
161: /* test denominator */
162: if ((xi3 == 0.0) || (sigma == 0.0)) {
164: ksp->reason = KSP_DIVERGED_BREAKDOWN;
165: PetscInfo(ksp, "KSPSolve has failed due to zero inner product\n");
166: break;
167: }
169: /* scalar updates */
170: omega = xi2 / xi3;
171: beta = -xi4 / sigma;
172: rho = PetscSqrtReal(PetscAbsScalar(xi1 - omega * xi2)); /* residual norm */
174: /* vector updates */
175: VecAXPBYPCZ(X, alpha, omega, 1.0, P2, S2); /* x <- alpha * p2 + omega * s2 + x */
177: /* convergence test */
178: PetscObjectSAWsTakeAccess((PetscObject)ksp);
179: ksp->its++;
180: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = rho;
181: else ksp->rnorm = 0;
182: PetscObjectSAWsGrantAccess((PetscObject)ksp);
183: KSPLogResidualHistory(ksp, ksp->rnorm);
184: KSPMonitor(ksp, i + 1, ksp->rnorm);
185: (*ksp->converged)(ksp, i + 1, ksp->rnorm, &ksp->reason, ksp->cnvP);
186: if (ksp->reason) break;
188: /* vector updates */
189: PetscLogEventBegin(VEC_Ops, 0, 0, 0, 0);
190: for (j = 0; j < N; j++) {
191: r[j] = s[j] - omega * t[j]; /* r <- s - omega t */
192: p[j] = r[j] + beta * (p[j] - omega * v[j]); /* p <- r + beta * (p - omega v) */
193: }
194: PetscLogFlops(6.0 * N);
195: PetscLogEventEnd(VEC_Ops, 0, 0, 0, 0);
196: }
198: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
199: return 0;
200: }
202: /*MC
203: KSPFBCGSR - Implements a mathematically equivalent variant of flexible bi-CG-stab, `KSPFBCGS`. [](sec_flexibleksp)
205: Level: beginner
207: Notes:
208: This implementation requires fewer `MPI_Allreduce()` calls than `KSPFBCGS` and may converge faster
210: See `KSPPIPEBCGS` for a pipelined version of the algorithm
212: Flexible BiCGStab, unlike most Krylov methods, allows the preconditioner to be nonlinear, that is the action of the preconditioner to a vector need not be linear
213: in the vector entries.
215: Only supports right preconditioning
217: .seealso: [](chapter_ksp), [](sec_flexibleksp), `KSPFBCGSR`, `KSPPIPEBCGS`, `KSPBCGSL`, `KSPBCGS`, `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPBICG`, `KSPFBCGSL`, `KSPSetPCSide()`
218: M*/
219: PETSC_EXTERN PetscErrorCode KSPCreate_FBCGSR(KSP ksp)
220: {
221: KSP_BCGS *bcgs;
223: PetscNew(&bcgs);
225: ksp->data = bcgs;
226: ksp->ops->setup = KSPSetUp_FBCGSR;
227: ksp->ops->solve = KSPSolve_FBCGSR;
228: ksp->ops->destroy = KSPDestroy_BCGS;
229: ksp->ops->reset = KSPReset_BCGS;
230: ksp->ops->buildsolution = KSPBuildSolution_BCGS;
231: ksp->ops->buildresidual = KSPBuildResidualDefault;
232: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
233: ksp->pc_side = PC_RIGHT; /* set default PC side */
235: KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3);
236: KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2);
237: KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1);
238: return 0;
239: }