Actual source code: pipefcg.c


  2: #include <../src/ksp/ksp/impls/fcg/pipefcg/pipefcgimpl.h>

  4: static PetscBool  cited      = PETSC_FALSE;
  5: static const char citation[] = "@article{SSM2016,\n"
  6:                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
  7:                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
  8:                                "  journal = {SIAM Journal on Scientific Computing},\n"
  9:                                "  volume = {38},\n"
 10:                                "  number = {5},\n"
 11:                                "  pages = {C441-C470},\n"
 12:                                "  year = {2016},\n"
 13:                                "  doi = {10.1137/15M1049130},\n"
 14:                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 15:                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 16:                                "}\n";

 18: #define KSPPIPEFCG_DEFAULT_MMAX       15
 19: #define KSPPIPEFCG_DEFAULT_NPREALLOC  5
 20: #define KSPPIPEFCG_DEFAULT_VECB       5
 21: #define KSPPIPEFCG_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY

 23: static PetscErrorCode KSPAllocateVectors_PIPEFCG(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
 24: {
 25:   PetscInt     i;
 26:   KSP_PIPEFCG *pipefcg;
 27:   PetscInt     nnewvecs, nvecsprev;

 29:   PetscFunctionBegin;
 30:   pipefcg = (KSP_PIPEFCG *)ksp->data;

 32:   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
 33:   if (pipefcg->nvecs < PetscMin(pipefcg->mmax + 1, nvecsneeded)) {
 34:     nvecsprev = pipefcg->nvecs;
 35:     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipefcg->nvecs, chunksize), pipefcg->mmax + 1 - pipefcg->nvecs);
 36:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pQvecs[pipefcg->nchunks], 0, NULL));
 37:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pZETAvecs[pipefcg->nchunks], 0, NULL));
 38:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pPvecs[pipefcg->nchunks], 0, NULL));
 39:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipefcg->pSvecs[pipefcg->nchunks], 0, NULL));
 40:     pipefcg->nvecs += nnewvecs;
 41:     for (i = 0; i < nnewvecs; ++i) {
 42:       pipefcg->Qvecs[nvecsprev + i]    = pipefcg->pQvecs[pipefcg->nchunks][i];
 43:       pipefcg->ZETAvecs[nvecsprev + i] = pipefcg->pZETAvecs[pipefcg->nchunks][i];
 44:       pipefcg->Pvecs[nvecsprev + i]    = pipefcg->pPvecs[pipefcg->nchunks][i];
 45:       pipefcg->Svecs[nvecsprev + i]    = pipefcg->pSvecs[pipefcg->nchunks][i];
 46:     }
 47:     pipefcg->chunksizes[pipefcg->nchunks] = nnewvecs;
 48:     ++pipefcg->nchunks;
 49:   }
 50:   PetscFunctionReturn(PETSC_SUCCESS);
 51: }

 53: static PetscErrorCode KSPSetUp_PIPEFCG(KSP ksp)
 54: {
 55:   KSP_PIPEFCG   *pipefcg;
 56:   const PetscInt nworkstd = 5;

 58:   PetscFunctionBegin;
 59:   pipefcg = (KSP_PIPEFCG *)ksp->data;

 61:   /* Allocate "standard" work vectors (not including the basis and transformed basis vectors) */
 62:   PetscCall(KSPSetWorkVecs(ksp, nworkstd));

 64:   /* Allocated space for pointers to additional work vectors
 65:    note that mmax is the number of previous directions, so we add 1 for the current direction,
 66:    and an extra 1 for the prealloc (which might be empty) */
 67:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &(pipefcg->Pvecs), pipefcg->mmax + 1, &(pipefcg->pPvecs), pipefcg->mmax + 1, &(pipefcg->Svecs), pipefcg->mmax + 1, &(pipefcg->pSvecs)));
 68:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &(pipefcg->Qvecs), pipefcg->mmax + 1, &(pipefcg->pQvecs), pipefcg->mmax + 1, &(pipefcg->ZETAvecs), pipefcg->mmax + 1, &(pipefcg->pZETAvecs)));
 69:   PetscCall(PetscMalloc4(pipefcg->mmax + 1, &(pipefcg->Pold), pipefcg->mmax + 1, &(pipefcg->Sold), pipefcg->mmax + 1, &(pipefcg->Qold), pipefcg->mmax + 1, &(pipefcg->ZETAold)));
 70:   PetscCall(PetscMalloc1(pipefcg->mmax + 1, &(pipefcg->chunksizes)));
 71:   PetscCall(PetscMalloc3(pipefcg->mmax + 2, &(pipefcg->dots), pipefcg->mmax + 1, &(pipefcg->etas), pipefcg->mmax + 2, &(pipefcg->redux)));

 73:   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
 74:   if (pipefcg->nprealloc > pipefcg->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipefcg->nprealloc, pipefcg->mmax + 1));

 76:   /* Preallocate additional work vectors */
 77:   PetscCall(KSPAllocateVectors_PIPEFCG(ksp, pipefcg->nprealloc, pipefcg->nprealloc));
 78:   PetscFunctionReturn(PETSC_SUCCESS);
 79: }

 81: static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp)
 82: {
 83:   PetscInt     i, j, k, idx, kdx, mi;
 84:   KSP_PIPEFCG *pipefcg;
 85:   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
 86:   PetscReal    dp    = 0.0, delta, *eta, *etas;
 87:   Vec          B, R, Z, X, Qcurr, W, ZETAcurr, M, N, Pcurr, Scurr, *redux;
 88:   Mat          Amat, Pmat;

 90:   PetscFunctionBegin;
 91:   /* We have not checked these routines for use with complex numbers. The inner products
 92:      are likely not defined correctly for that case */
 93:   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");

 95: #define VecXDot(x, y, a)          (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x, y, a) : VecTDot(x, y, a))
 96: #define VecXDotBegin(x, y, a)     (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotBegin(x, y, a) : VecTDotBegin(x, y, a))
 97: #define VecXDotEnd(x, y, a)       (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotEnd(x, y, a) : VecTDotEnd(x, y, a))
 98: #define VecMXDot(x, n, y, a)      (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDot(x, n, y, a) : VecMTDot(x, n, y, a))
 99: #define VecMXDotBegin(x, n, y, a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotBegin(x, n, y, a) : VecMTDotBegin(x, n, y, a))
100: #define VecMXDotEnd(x, n, y, a)   (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotEnd(x, n, y, a) : VecMTDotEnd(x, n, y, a))

102:   pipefcg = (KSP_PIPEFCG *)ksp->data;
103:   X       = ksp->vec_sol;
104:   B       = ksp->vec_rhs;
105:   R       = ksp->work[0];
106:   Z       = ksp->work[1];
107:   W       = ksp->work[2];
108:   M       = ksp->work[3];
109:   N       = ksp->work[4];

111:   redux = pipefcg->redux;
112:   dots  = pipefcg->dots;
113:   etas  = pipefcg->etas;
114:   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */

116:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

118:   /* Compute cycle initial residual */
119:   PetscCall(KSP_MatMult(ksp, Amat, X, R));
120:   PetscCall(VecAYPX(R, -1.0, B));    /* r <- b - Ax */
121:   PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br     */

123:   Pcurr    = pipefcg->Pvecs[0];
124:   Scurr    = pipefcg->Svecs[0];
125:   Qcurr    = pipefcg->Qvecs[0];
126:   ZETAcurr = pipefcg->ZETAvecs[0];
127:   PetscCall(VecCopy(Z, Pcurr));
128:   PetscCall(KSP_MatMult(ksp, Amat, Pcurr, Scurr)); /* S = Ap     */
129:   PetscCall(VecCopy(Scurr, W));                    /* w = s = Az */

131:   /* Initial state of pipelining intermediates */
132:   redux[0] = R;
133:   redux[1] = W;
134:   PetscCall(VecMXDotBegin(Z, 2, redux, dots));
135:   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
136:   PetscCall(KSP_PCApply(ksp, W, M));                                        /* m = B(w) */
137:   PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am   */
138:   PetscCall(VecCopy(M, Qcurr));                                             /* q = m    */
139:   PetscCall(VecCopy(N, ZETAcurr));                                          /* zeta = n */
140:   PetscCall(VecMXDotEnd(Z, 2, redux, dots));
141:   gamma   = dots[0];
142:   delta   = PetscRealPart(dots[1]);
143:   etas[0] = delta;
144:   alpha   = gamma / delta;

146:   i = 0;
147:   do {
148:     ksp->its++;

150:     /* Update X, R, Z, W */
151:     PetscCall(VecAXPY(X, +alpha, Pcurr));    /* x <- x + alpha * pi    */
152:     PetscCall(VecAXPY(R, -alpha, Scurr));    /* r <- r - alpha * si    */
153:     PetscCall(VecAXPY(Z, -alpha, Qcurr));    /* z <- z - alpha * qi    */
154:     PetscCall(VecAXPY(W, -alpha, ZETAcurr)); /* w <- w - alpha * zetai */

156:     /* Compute norm for convergence check */
157:     switch (ksp->normtype) {
158:     case KSP_NORM_PRECONDITIONED:
159:       PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
160:       break;
161:     case KSP_NORM_UNPRECONDITIONED:
162:       PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
163:       break;
164:     case KSP_NORM_NATURAL:
165:       dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
166:       break;
167:     case KSP_NORM_NONE:
168:       dp = 0.0;
169:       break;
170:     default:
171:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
172:     }

174:     /* Check for convergence */
175:     ksp->rnorm = dp;
176:     PetscCall(KSPLogResidualHistory(ksp, dp));
177:     PetscCall(KSPMonitor(ksp, ksp->its, dp));
178:     PetscCall((*ksp->converged)(ksp, ksp->its, dp, &ksp->reason, ksp->cnvP));
179:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

181:     /* Computations of current iteration done */
182:     ++i;

184:     /* If needbe, allocate a new chunk of vectors in P and C */
185:     PetscCall(KSPAllocateVectors_PIPEFCG(ksp, i + 1, pipefcg->vecb));

187:     /* Note that we wrap around and start clobbering old vectors */
188:     idx      = i % (pipefcg->mmax + 1);
189:     Pcurr    = pipefcg->Pvecs[idx];
190:     Scurr    = pipefcg->Svecs[idx];
191:     Qcurr    = pipefcg->Qvecs[idx];
192:     ZETAcurr = pipefcg->ZETAvecs[idx];
193:     eta      = pipefcg->etas + idx;

195:     /* number of old directions to orthogonalize against */
196:     switch (pipefcg->truncstrat) {
197:     case KSP_FCD_TRUNC_TYPE_STANDARD:
198:       mi = pipefcg->mmax;
199:       break;
200:     case KSP_FCD_TRUNC_TYPE_NOTAY:
201:       mi = ((i - 1) % pipefcg->mmax) + 1;
202:       break;
203:     default:
204:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
205:     }

207:     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
208:     PetscCall(VecCopy(Z, Pcurr));
209:     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
210:       kdx                 = k % (pipefcg->mmax + 1);
211:       pipefcg->Pold[j]    = pipefcg->Pvecs[kdx];
212:       pipefcg->Sold[j]    = pipefcg->Svecs[kdx];
213:       pipefcg->Qold[j]    = pipefcg->Qvecs[kdx];
214:       pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx];
215:       redux[j]            = pipefcg->Svecs[kdx];
216:     }
217:     redux[j]     = R; /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */
218:     redux[j + 1] = W;

220:     PetscCall(VecMXDotBegin(Z, j + 2, redux, betas));                         /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */
221:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z))); /* perform asynchronous reduction */
222:     PetscCall(VecWAXPY(N, -1.0, R, W));                                       /* m = u + B(w-r): (a) ntmp = w-r              */
223:     PetscCall(KSP_PCApply(ksp, N, M));                                        /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
224:     PetscCall(VecAXPY(M, 1.0, Z));                                            /* m = u + B(w-r): (c) m = z + mtmp            */
225:     PetscCall(KSP_MatMult(ksp, Amat, M, N));                                  /* n = Am                                      */
226:     PetscCall(VecMXDotEnd(Z, j + 2, redux, betas));                           /* Finish split reductions */
227:     gamma = betas[j];
228:     delta = PetscRealPart(betas[j + 1]);

230:     *eta = 0.;
231:     for (k = PetscMax(0, i - mi), j = 0; k < i; ++j, ++k) {
232:       kdx = k % (pipefcg->mmax + 1);
233:       betas[j] /= -etas[kdx]; /* betak  /= etak */
234:       *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
235:       /* etaitmp = -betaik^2 * etak */
236:     }
237:     *eta += delta; /* etai    = delta -betaik^2 * etak */
238:     if (*eta < 0.) {
239:       pipefcg->norm_breakdown = PETSC_TRUE;
240:       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
241:       break;
242:     } else {
243:       alpha = gamma / (*eta); /* alpha = gamma/etai */
244:     }

246:     /* project out stored search directions using classical G-S */
247:     PetscCall(VecCopy(Z, Pcurr));
248:     PetscCall(VecCopy(W, Scurr));
249:     PetscCall(VecCopy(M, Qcurr));
250:     PetscCall(VecCopy(N, ZETAcurr));
251:     PetscCall(VecMAXPY(Pcurr, j, betas, pipefcg->Pold));       /* pi    <- ui - sum_k beta_k p_k    */
252:     PetscCall(VecMAXPY(Scurr, j, betas, pipefcg->Sold));       /* si    <- wi - sum_k beta_k s_k    */
253:     PetscCall(VecMAXPY(Qcurr, j, betas, pipefcg->Qold));       /* qi    <- m  - sum_k beta_k q_k    */
254:     PetscCall(VecMAXPY(ZETAcurr, j, betas, pipefcg->ZETAold)); /* zetai <- n  - sum_k beta_k zeta_k */

256:   } while (ksp->its < ksp->max_it);
257:   if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
258:   PetscFunctionReturn(PETSC_SUCCESS);
259: }

261: static PetscErrorCode KSPSolve_PIPEFCG(KSP ksp)
262: {
263:   KSP_PIPEFCG *pipefcg;
264:   PetscScalar  gamma;
265:   PetscReal    dp = 0.0;
266:   Vec          B, R, Z, X;
267:   Mat          Amat, Pmat;

269: #define VecXDot(x, y, a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot(x, y, a) : VecTDot(x, y, a))

271:   PetscFunctionBegin;
272:   PetscCall(PetscCitationsRegister(citation, &cited));

274:   pipefcg = (KSP_PIPEFCG *)ksp->data;
275:   X       = ksp->vec_sol;
276:   B       = ksp->vec_rhs;
277:   R       = ksp->work[0];
278:   Z       = ksp->work[1];

280:   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));

282:   /* Compute initial residual needed for convergence check*/
283:   ksp->its = 0;
284:   if (!ksp->guess_zero) {
285:     PetscCall(KSP_MatMult(ksp, Amat, X, R));
286:     PetscCall(VecAYPX(R, -1.0, B)); /* r <- b - Ax                             */
287:   } else {
288:     PetscCall(VecCopy(B, R)); /* r <- b (x is 0)                         */
289:   }
290:   switch (ksp->normtype) {
291:   case KSP_NORM_PRECONDITIONED:
292:     PetscCall(KSP_PCApply(ksp, R, Z));  /* z <- Br                                 */
293:     PetscCall(VecNorm(Z, NORM_2, &dp)); /* dp <- dqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
294:     break;
295:   case KSP_NORM_UNPRECONDITIONED:
296:     PetscCall(VecNorm(R, NORM_2, &dp)); /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
297:     break;
298:   case KSP_NORM_NATURAL:
299:     PetscCall(KSP_PCApply(ksp, R, Z)); /* z <- Br                                 */
300:     PetscCall(VecXDot(Z, R, &gamma));
301:     dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
302:     break;
303:   case KSP_NORM_NONE:
304:     dp = 0.0;
305:     break;
306:   default:
307:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
308:   }

310:   /* Initial Convergence Check */
311:   PetscCall(KSPLogResidualHistory(ksp, dp));
312:   PetscCall(KSPMonitor(ksp, 0, dp));
313:   ksp->rnorm = dp;
314:   PetscCall((*ksp->converged)(ksp, 0, dp, &ksp->reason, ksp->cnvP));
315:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

317:   do {
318:     /* A cycle is broken only if a norm breakdown occurs. If not the entire solve happens in a single cycle.
319:        This is coded this way to allow both truncation and truncation-restart strategy
320:        (see KSPFCDGetNumOldDirections()) */
321:     PetscCall(KSPSolve_PIPEFCG_cycle(ksp));
322:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
323:     if (pipefcg->norm_breakdown) {
324:       pipefcg->n_restarts++;
325:       pipefcg->norm_breakdown = PETSC_FALSE;
326:     }
327:   } while (ksp->its < ksp->max_it);

329:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
330:   PetscFunctionReturn(PETSC_SUCCESS);
331: }

333: static PetscErrorCode KSPDestroy_PIPEFCG(KSP ksp)
334: {
335:   PetscInt     i;
336:   KSP_PIPEFCG *pipefcg;

338:   PetscFunctionBegin;
339:   pipefcg = (KSP_PIPEFCG *)ksp->data;

341:   /* Destroy "standard" work vecs */
342:   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work));

344:   /* Destroy vectors of old directions and the arrays that manage pointers to them */
345:   if (pipefcg->nvecs) {
346:     for (i = 0; i < pipefcg->nchunks; ++i) {
347:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pPvecs[i]));
348:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pSvecs[i]));
349:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pQvecs[i]));
350:       PetscCall(VecDestroyVecs(pipefcg->chunksizes[i], &pipefcg->pZETAvecs[i]));
351:     }
352:   }
353:   PetscCall(PetscFree4(pipefcg->Pvecs, pipefcg->Svecs, pipefcg->pPvecs, pipefcg->pSvecs));
354:   PetscCall(PetscFree4(pipefcg->Qvecs, pipefcg->ZETAvecs, pipefcg->pQvecs, pipefcg->pZETAvecs));
355:   PetscCall(PetscFree4(pipefcg->Pold, pipefcg->Sold, pipefcg->Qold, pipefcg->ZETAold));
356:   PetscCall(PetscFree(pipefcg->chunksizes));
357:   PetscCall(PetscFree3(pipefcg->dots, pipefcg->etas, pipefcg->redux));
358:   PetscCall(KSPDestroyDefault(ksp));
359:   PetscFunctionReturn(PETSC_SUCCESS);
360: }

362: static PetscErrorCode KSPView_PIPEFCG(KSP ksp, PetscViewer viewer)
363: {
364:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
365:   PetscBool    iascii, isstring;
366:   const char  *truncstr;

368:   PetscFunctionBegin;
369:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
370:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));

372:   if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
373:     truncstr = "Using standard truncation strategy";
374:   } else if (pipefcg->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
375:     truncstr = "Using Notay's truncation strategy";
376:   } else {
377:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
378:   }

380:   if (iascii) {
381:     PetscCall(PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipefcg->mmax));
382:     PetscCall(PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipefcg->nprealloc, pipefcg->mmax + 1)));
383:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr));
384:     PetscCall(PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipefcg->n_restarts));
385:   } else if (isstring) {
386:     PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipefcg->mmax, pipefcg->nprealloc, truncstr));
387:   }
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: /*@
392:   KSPPIPEFCGSetMmax - set the maximum number of previous directions `KSPPIPEFCG` will store for orthogonalization

394:   Logically Collective

396:   Input Parameters:
397: +  ksp - the Krylov space context
398: -  mmax - the maximum number of previous directions to orthogonalize against

400:   Options Database Key:
401: . -ksp_pipefcg_mmax <N> - maximum number of previous directions

403:   Level: intermediate

405:   Note:
406:    mmax + 1 directions are stored (mmax previous ones along with the current one)
407:   and whether all are used in each iteration also depends on the truncation strategy, see `KSPPIPEFCGSetTruncationType()`

409: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGSetNprealloc()`
410: @*/
411: PetscErrorCode KSPPIPEFCGSetMmax(KSP ksp, PetscInt mmax)
412: {
413:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

415:   PetscFunctionBegin;
418:   pipefcg->mmax = mmax;
419:   PetscFunctionReturn(PETSC_SUCCESS);
420: }

422: /*@
423:   KSPPIPEFCGGetMmax - get the maximum number of previous directions `KSPPIPEFCG` will store

425:    Not Collective

427:    Input Parameter:
428: .  ksp - the Krylov space context

430:    Output Parameter:
431: .  mmax - the maximum number of previous directions allowed for orthogonalization

433:   Options Database Key:
434: . -ksp_pipefcg_mmax <N> - maximum number of previous directions

436:    Level: intermediate

438: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`
439: @*/
440: PetscErrorCode KSPPIPEFCGGetMmax(KSP ksp, PetscInt *mmax)
441: {
442:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

444:   PetscFunctionBegin;
446:   *mmax = pipefcg->mmax;
447:   PetscFunctionReturn(PETSC_SUCCESS);
448: }

450: /*@
451:   KSPPIPEFCGSetNprealloc - set the number of directions to preallocate with `KSPPIPEFCG`

453:   Logically Collective

455:   Input Parameters:
456: +  ksp - the Krylov space context
457: -  nprealloc - the number of vectors to preallocate

459:   Options Database Key:
460: . -ksp_pipefcg_nprealloc <N> - the number of vectors to preallocate

462:   Level: advanced

464: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
465: @*/
466: PetscErrorCode KSPPIPEFCGSetNprealloc(KSP ksp, PetscInt nprealloc)
467: {
468:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

470:   PetscFunctionBegin;
473:   pipefcg->nprealloc = nprealloc;
474:   PetscFunctionReturn(PETSC_SUCCESS);
475: }

477: /*@
478:   KSPPIPEFCGGetNprealloc - get the number of directions to preallocate by `KSPPIPEFCG`

480:    Not Collective

482:    Input Parameter:
483: .  ksp - the Krylov space context

485:    Output Parameter:
486: .  nprealloc - the number of directions preallocated

488:    Level: advanced

490: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType()`, `KSPPIPEFCGSetNprealloc()`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`
491: @*/
492: PetscErrorCode KSPPIPEFCGGetNprealloc(KSP ksp, PetscInt *nprealloc)
493: {
494:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

496:   PetscFunctionBegin;
498:   *nprealloc = pipefcg->nprealloc;
499:   PetscFunctionReturn(PETSC_SUCCESS);
500: }

502: /*@
503:   KSPPIPEFCGSetTruncationType - specify how many of its stored previous directions `KSPPIPEFCG` uses during orthoganalization

505:   Logically Collective

507:   Input Parameters:
508: +  ksp - the Krylov space context
509: -  truncstrat - the choice of strategy
510: .vb
511:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
512:   KSP_FCD_TRUNC_TYPE_NOTAY uses max(1,mod(i,mmax)) stored directions at iteration i=0,1,..
513: .ve

515:   Options Database Key:
516: .  -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against

518:   Level: intermediate

520: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGGetTruncationType`, `KSPFCDTruncationType`
521: @*/
522: PetscErrorCode KSPPIPEFCGSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
523: {
524:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

526:   PetscFunctionBegin;
529:   pipefcg->truncstrat = truncstrat;
530:   PetscFunctionReturn(PETSC_SUCCESS);
531: }

533: /*@
534:   KSPPIPEFCGGetTruncationType - get the truncation strategy employed by `KSPPIPEFCG`

536:    Not Collective

538:    Input Parameter:
539: .  ksp - the Krylov space context

541:    Output Parameter:
542: .  truncstrat - the strategy type

544:   Options Database Key:
545: . -ksp_pipefcg_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against

547:    Level: intermediate

549: .seealso: [](ch_ksp), `KSPPIPEFCG`, `KSPPIPEFCGSetTruncationType`, `KSPFCDTruncationType`
550: @*/
551: PetscErrorCode KSPPIPEFCGGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
552: {
553:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;

555:   PetscFunctionBegin;
557:   *truncstrat = pipefcg->truncstrat;
558:   PetscFunctionReturn(PETSC_SUCCESS);
559: }

561: static PetscErrorCode KSPSetFromOptions_PIPEFCG(KSP ksp, PetscOptionItems *PetscOptionsObject)
562: {
563:   KSP_PIPEFCG *pipefcg = (KSP_PIPEFCG *)ksp->data;
564:   PetscInt     mmax, nprealloc;
565:   PetscBool    flg;

567:   PetscFunctionBegin;
568:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEFCG options");
569:   PetscCall(PetscOptionsInt("-ksp_pipefcg_mmax", "Number of search directions to storue", "KSPPIPEFCGSetMmax", pipefcg->mmax, &mmax, &flg));
570:   if (flg) PetscCall(KSPPIPEFCGSetMmax(ksp, mmax));
571:   PetscCall(PetscOptionsInt("-ksp_pipefcg_nprealloc", "Number of directions to preallocate", "KSPPIPEFCGSetNprealloc", pipefcg->nprealloc, &nprealloc, &flg));
572:   if (flg) PetscCall(KSPPIPEFCGSetNprealloc(ksp, nprealloc));
573:   PetscCall(PetscOptionsEnum("-ksp_pipefcg_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipefcg->truncstrat, (PetscEnum *)&pipefcg->truncstrat, NULL));
574:   PetscOptionsHeadEnd();
575:   PetscFunctionReturn(PETSC_SUCCESS);
576: }

578: /*MC

580:   KSPPIPEFCG - Implements a Pipelined, Flexible Conjugate Gradient method. [](sec_pipelineksp). [](sec_flexibleksp)

582:   Options Database Keys:
583: +   -ksp_pipefcg_mmax <N> - The number of previous search directions to store
584: .   -ksp_pipefcg_nprealloc <N> - The number of previous search directions to preallocate
585: -   -ksp_pipefcg_truncation_type <standard,notay> - which stored search directions to orthogonalize against

587:   Notes:
588:    Supports left preconditioning only.

590:    The natural "norm" for this method is (u,Au), where u is the preconditioned residual. As with standard `KSPCG`, this norm is available at no additional computational cost.
591:    Choosing preconditioned or unpreconditioned norms involve an extra blocking global reduction, thus removing any benefit from pipelining.

593:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
594:    See [](doc_faq_pipelined)

596:   Contributed by:
597:   Patrick Sanan and Sascha M. Schnepp

599:   Reference:
600: . * - P. Sanan, S.M. Schnepp, and D.A. May, "Pipelined, Flexible Krylov Subspace Methods", SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
601:     DOI: 10.1137/15M1049130

603:   Level: intermediate

605: .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPFCG`, `KSPPIPECG`, `KSPPIPECR`, `KSPGCR`, `KSPPIPEGCR`, `KSPFGMRES`, `KSPCG`, `KSPPIPEFCGSetMmax()`, `KSPPIPEFCGGetMmax()`, `KSPPIPEFCGSetNprealloc()`,
606:           `KSPPIPEFCGGetNprealloc()`, `KSPPIPEFCGSetTruncationType()`, `KSPPIPEFCGGetTruncationType()`
607: M*/
608: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFCG(KSP ksp)
609: {
610:   KSP_PIPEFCG *pipefcg;

612:   PetscFunctionBegin;
613:   PetscCall(PetscNew(&pipefcg));
614: #if !defined(PETSC_USE_COMPLEX)
615:   pipefcg->type = KSP_CG_SYMMETRIC;
616: #else
617:   pipefcg->type = KSP_CG_HERMITIAN;
618: #endif
619:   pipefcg->mmax       = KSPPIPEFCG_DEFAULT_MMAX;
620:   pipefcg->nprealloc  = KSPPIPEFCG_DEFAULT_NPREALLOC;
621:   pipefcg->nvecs      = 0;
622:   pipefcg->vecb       = KSPPIPEFCG_DEFAULT_VECB;
623:   pipefcg->nchunks    = 0;
624:   pipefcg->truncstrat = KSPPIPEFCG_DEFAULT_TRUNCSTRAT;
625:   pipefcg->n_restarts = 0;

627:   ksp->data = (void *)pipefcg;

629:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 2));
630:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 1));
631:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
632:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

634:   ksp->ops->setup          = KSPSetUp_PIPEFCG;
635:   ksp->ops->solve          = KSPSolve_PIPEFCG;
636:   ksp->ops->destroy        = KSPDestroy_PIPEFCG;
637:   ksp->ops->view           = KSPView_PIPEFCG;
638:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFCG;
639:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
640:   ksp->ops->buildresidual  = KSPBuildResidualDefault;
641:   PetscFunctionReturn(PETSC_SUCCESS);
642: }