Actual source code: pipegcr.c

  1: #include <petscsys.h>
  2: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>

  4: static PetscBool  cited      = PETSC_FALSE;
  5: static const char citation[] = "@article{SSM2016,\n"
  6:                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
  7:                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
  8:                                "  journal = {SIAM Journal on Scientific Computing},\n"
  9:                                "  volume = {38},\n"
 10:                                "  number = {5},\n"
 11:                                "  pages = {C441-C470},\n"
 12:                                "  year = {2016},\n"
 13:                                "  doi = {10.1137/15M1049130},\n"
 14:                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 15:                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 16:                                "}\n";

 18: #define KSPPIPEGCR_DEFAULT_MMAX       15
 19: #define KSPPIPEGCR_DEFAULT_NPREALLOC  5
 20: #define KSPPIPEGCR_DEFAULT_VECB       5
 21: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
 22: #define KSPPIPEGCR_DEFAULT_UNROLL_W   PETSC_TRUE

 24: #include <petscksp.h>

 26: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
 27: {
 28:   KSP_PIPEGCR *pipegcr;
 29:   PetscInt     nnewvecs, nvecsprev;

 31:   PetscFunctionBegin;
 32:   pipegcr = (KSP_PIPEGCR *)ksp->data;

 34:   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
 35:   if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
 36:     nvecsprev = pipegcr->nvecs;
 37:     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
 38:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
 39:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
 40:     PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
 41:     if (pipegcr->unroll_w) PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL));
 42:     pipegcr->nvecs += nnewvecs;
 43:     for (PetscInt i = 0; i < nnewvecs; i++) {
 44:       pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
 45:       pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
 46:       pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
 47:       if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
 48:     }
 49:     pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
 50:     pipegcr->nchunks++;
 51:   }
 52:   PetscFunctionReturn(PETSC_SUCCESS);
 53: }

 55: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
 56: {
 57:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
 58:   Mat          A, B;
 59:   Vec          x, r, b, z, w, m, n, p, s, q, t, *redux;
 60:   PetscInt     i, j, k, idx, kdx, mi;
 61:   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
 62:   PetscReal    rnorm = 0.0, delta, *eta, *etas;

 64:   PetscFunctionBegin;
 65:   /* !!PS We have not checked these routines for use with complex numbers. The inner products
 66:      are likely not defined correctly for that case */
 67:   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");

 69:   PetscCall(KSPGetOperators(ksp, &A, &B));
 70:   x = ksp->vec_sol;
 71:   b = ksp->vec_rhs;
 72:   r = ksp->work[0];
 73:   z = ksp->work[1];
 74:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */
 75:   m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r))    (pipelining intermediate) */
 76:   n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
 77:   p = pipegcr->pvecs[0];
 78:   s = pipegcr->svecs[0];
 79:   q = pipegcr->qvecs[0];
 80:   t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;

 82:   redux = pipegcr->redux;
 83:   dots  = pipegcr->dots;
 84:   etas  = pipegcr->etas;
 85:   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */

 87:   /* cycle initial residual */
 88:   PetscCall(KSP_MatMult(ksp, A, x, r));
 89:   PetscCall(VecAYPX(r, -1.0, b));       /* r <- b - Ax */
 90:   PetscCall(KSP_PCApply(ksp, r, z));    /* z <- B(r)   */
 91:   PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az     */

 93:   /* initialization of other variables and pipelining intermediates */
 94:   PetscCall(VecCopy(z, p));
 95:   PetscCall(KSP_MatMult(ksp, A, p, s));

 97:   /* overlap initial computation of delta, gamma */
 98:   redux[0] = w;
 99:   redux[1] = r;
100:   PetscCall(VecMDotBegin(w, 2, redux, dots));                               /* Start split reductions for gamma = (w,r), delta = (w,w) */
101:   PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
102:   PetscCall(KSP_PCApply(ksp, s, q));                                        /* q = B(s) */
103:   if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, q, t));              /* t = Aq   */
104:   PetscCall(VecMDotEnd(w, 2, redux, dots));                                 /* Finish split reduction */
105:   delta   = PetscRealPart(dots[0]);
106:   etas[0] = delta;
107:   gamma   = dots[1];
108:   alpha   = gamma / delta;

110:   i = 0;
111:   do {
112:     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
113:     ksp->its++;
114:     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));

116:     /* update solution, residuals, .. */
117:     PetscCall(VecAXPY(x, +alpha, p));
118:     PetscCall(VecAXPY(r, -alpha, s));
119:     PetscCall(VecAXPY(z, -alpha, q));
120:     if (pipegcr->unroll_w) {
121:       PetscCall(VecAXPY(w, -alpha, t));
122:     } else {
123:       PetscCall(KSP_MatMult(ksp, A, z, w));
124:     }

126:     /* Computations of current iteration done */
127:     i++;

129:     if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, 0 /* unused argument */, ksp->rnorm, pipegcr->modifypc_ctx));

131:     /* If needbe, allocate a new chunk of vectors */
132:     PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));

134:     /* Note that we wrap around and start clobbering old vectors */
135:     idx = i % (pipegcr->mmax + 1);
136:     p   = pipegcr->pvecs[idx];
137:     s   = pipegcr->svecs[idx];
138:     q   = pipegcr->qvecs[idx];
139:     if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
140:     eta = pipegcr->etas + idx;

142:     /* number of old directions to orthogonalize against */
143:     switch (pipegcr->truncstrat) {
144:     case KSP_FCD_TRUNC_TYPE_STANDARD:
145:       mi = pipegcr->mmax;
146:       break;
147:     case KSP_FCD_TRUNC_TYPE_NOTAY:
148:       mi = ((i - 1) % pipegcr->mmax) + 1;
149:       break;
150:     default:
151:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
152:     }

154:     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
155:     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
156:       kdx              = k % (pipegcr->mmax + 1);
157:       pipegcr->pold[j] = pipegcr->pvecs[kdx];
158:       pipegcr->sold[j] = pipegcr->svecs[kdx];
159:       pipegcr->qold[j] = pipegcr->qvecs[kdx];
160:       if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
161:       redux[j] = pipegcr->svecs[kdx];
162:     }
163:     /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
164:     redux[j]     = r;
165:     redux[j + 1] = w;

167:     /* Dot products */
168:     /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
169:     PetscCall(VecMDotBegin(w, j + 2, redux, dots));
170:     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));

172:     /* B(w-r) + u stabilization */
173:     PetscCall(VecWAXPY(n, -1.0, r, w));                          /* m = u + B(w-r): (a) ntmp = w-r              */
174:     PetscCall(KSP_PCApply(ksp, n, m));                           /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
175:     PetscCall(VecAXPY(m, 1.0, z));                               /* m = u + B(w-r): (c) m = z + mtmp            */
176:     if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am                                      */

178:     /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
179:     PetscCall(VecMDotEnd(w, j + 2, redux, dots));
180:     gamma = dots[j];
181:     delta = PetscRealPart(dots[j + 1]);

183:     /* compute new residual norm.
184:        this cannot be done before this point so that the natural norm
185:        is available for free and the communication involved is overlapped */
186:     switch (ksp->normtype) {
187:     case KSP_NORM_PRECONDITIONED:
188:       PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
189:       break;
190:     case KSP_NORM_UNPRECONDITIONED:
191:       PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
192:       break;
193:     case KSP_NORM_NATURAL:
194:       rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
195:       break;
196:     case KSP_NORM_NONE:
197:       rnorm = 0.0;
198:       break;
199:     default:
200:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
201:     }

203:     /* Check for convergence */
204:     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
205:     ksp->rnorm = rnorm;
206:     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
207:     PetscCall(KSPLogResidualHistory(ksp, rnorm));
208:     PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
209:     PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
210:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

212:     /* compute new eta and scale beta */
213:     *eta = 0.;
214:     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
215:       kdx = k % (pipegcr->mmax + 1);
216:       betas[j] /= -etas[kdx]; /* betak  /= etak */
217:       *eta -= PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]) * etas[kdx];
218:       /* etaitmp = -betaik^2 * etak */
219:     }
220:     *eta += delta; /* etai    = delta -betaik^2 * etak */

222:     /* check breakdown of eta = (s,s) */
223:     if (*eta < 0.) {
224:       pipegcr->norm_breakdown = PETSC_TRUE;
225:       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
226:       break;
227:     } else {
228:       alpha = gamma / (*eta); /* alpha = gamma/etai */
229:     }

231:     /* project out stored search directions using classical G-S */
232:     PetscCall(VecCopy(z, p));
233:     PetscCall(VecCopy(w, s));
234:     PetscCall(VecCopy(m, q));
235:     if (pipegcr->unroll_w) {
236:       PetscCall(VecCopy(n, t));
237:       PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n  - sum_k beta_k t_k */
238:     }
239:     PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
240:     PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
241:     PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m  - sum_k beta_k q_k */

243:   } while (ksp->its < ksp->max_it);
244:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
245:   PetscFunctionReturn(PETSC_SUCCESS);
246: }

248: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
249: {
250:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
251:   Mat          A, B;
252:   Vec          x, b, r, z, w;
253:   PetscScalar  gamma;
254:   PetscReal    rnorm = 0.0;
255:   PetscBool    issym;

257:   PetscFunctionBegin;
258:   PetscCall(PetscCitationsRegister(citation, &cited));

260:   PetscCall(KSPGetOperators(ksp, &A, &B));
261:   x = ksp->vec_sol;
262:   b = ksp->vec_rhs;
263:   r = ksp->work[0];
264:   z = ksp->work[1];
265:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */

267:   /* compute initial residual */
268:   if (!ksp->guess_zero) {
269:     PetscCall(KSP_MatMult(ksp, A, x, r));
270:     PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax       */
271:   } else {
272:     PetscCall(VecCopy(b, r)); /* r <- b            */
273:   }

275:   /* initial residual norm */
276:   PetscCall(KSP_PCApply(ksp, r, z));    /* z <- B(r)         */
277:   PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az           */
278:   PetscCall(VecDot(r, w, &gamma));      /* gamma = (r,w)     */

280:   switch (ksp->normtype) {
281:   case KSP_NORM_PRECONDITIONED:
282:     PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
283:     break;
284:   case KSP_NORM_UNPRECONDITIONED:
285:     PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
286:     break;
287:   case KSP_NORM_NATURAL:
288:     rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
289:     break;
290:   case KSP_NORM_NONE:
291:     rnorm = 0.0;
292:     break;
293:   default:
294:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
295:   }

297:   /* Is A symmetric? */
298:   PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
299:   if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?\n"));

301:   /* logging */
302:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
303:   ksp->its    = 0;
304:   ksp->rnorm0 = rnorm;
305:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
306:   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
307:   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
308:   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
309:   if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);

311:   do {
312:     PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
313:     if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
314:     if (pipegcr->norm_breakdown) {
315:       pipegcr->n_restarts++;
316:       pipegcr->norm_breakdown = PETSC_FALSE;
317:     }
318:   } while (ksp->its < ksp->max_it);

320:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
321:   PetscFunctionReturn(PETSC_SUCCESS);
322: }

324: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
325: {
326:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
327:   PetscBool    isascii, isstring;
328:   const char  *truncstr;

330:   PetscFunctionBegin;
331:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
332:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));

334:   if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
335:     truncstr = "Using standard truncation strategy";
336:   } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
337:     truncstr = "Using Notay's truncation strategy";
338:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");

340:   if (isascii) {
341:     PetscCall(PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
342:     PetscCall(PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
343:     PetscCall(PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr));
344:     PetscCall(PetscViewerASCIIPrintf(viewer, "  w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
345:     PetscCall(PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
346:   } else if (isstring) {
347:     PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
348:   }
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: }

352: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
353: {
354:   KSP_PIPEGCR   *pipegcr = (KSP_PIPEGCR *)ksp->data;
355:   Mat            A;
356:   PetscBool      diagonalscale;
357:   const PetscInt nworkstd = 5;

359:   PetscFunctionBegin;
360:   PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
361:   PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);

363:   PetscCall(KSPGetOperators(ksp, &A, NULL));

365:   /* Allocate "standard" work vectors */
366:   PetscCall(KSPSetWorkVecs(ksp, nworkstd));

368:   /* Allocated space for pointers to additional work vectors
369:     note that mmax is the number of previous directions, so we add 1 for the current direction */
370:   PetscCall(PetscMalloc6(pipegcr->mmax + 1, &pipegcr->pvecs, pipegcr->mmax + 1, &pipegcr->ppvecs, pipegcr->mmax + 1, &pipegcr->svecs, pipegcr->mmax + 1, &pipegcr->psvecs, pipegcr->mmax + 1, &pipegcr->qvecs, pipegcr->mmax + 1, &pipegcr->pqvecs));
371:   if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &pipegcr->tvecs, pipegcr->mmax + 1, &pipegcr->ptvecs, pipegcr->mmax + 2, &pipegcr->told));
372:   PetscCall(PetscMalloc4(pipegcr->mmax + 2, &pipegcr->pold, pipegcr->mmax + 2, &pipegcr->sold, pipegcr->mmax + 2, &pipegcr->qold, pipegcr->mmax + 2, &pipegcr->chunksizes));
373:   PetscCall(PetscMalloc3(pipegcr->mmax + 2, &pipegcr->dots, pipegcr->mmax + 1, &pipegcr->etas, pipegcr->mmax + 2, &pipegcr->redux));
374:   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
375:   if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));

377:   /* Preallocate additional work vectors */
378:   PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
383: {
384:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

386:   PetscFunctionBegin;
387:   if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(&pipegcr->modifypc_ctx));
388:   PetscFunctionReturn(PETSC_SUCCESS);
389: }

391: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
392: {
393:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

395:   PetscFunctionBegin;
396:   PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */

398:   /* Destroy vectors for old directions and the arrays that manage pointers to them */
399:   if (pipegcr->nvecs) {
400:     for (PetscInt i = 0; i < pipegcr->nchunks; i++) {
401:       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
402:       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
403:       PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
404:       if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
405:     }
406:   }

408:   PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
409:   PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
410:   PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
411:   if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));

413:   PetscCall(KSPReset_PIPEGCR(ksp));
414:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", NULL));
415:   PetscCall(KSPDestroyDefault(ksp));
416:   PetscFunctionReturn(PETSC_SUCCESS);
417: }

419: /*@
420:   KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector

422:   Logically Collective

424:   Input Parameters:
425: + ksp      - the Krylov space context
426: - unroll_w - use unrolling

428:   Level: intermediate

430:   Options Database Key:
431: . -ksp_pipegcr_unroll_w (true|false) - use unrolling

433: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
434: @*/
435: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
436: {
437:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

439:   PetscFunctionBegin;
442:   pipegcr->unroll_w = unroll_w;
443:   PetscFunctionReturn(PETSC_SUCCESS);
444: }

446: /*@
447:   KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector

449:   Logically Collective

451:   Input Parameter:
452: . ksp - the Krylov space context

454:   Output Parameter:
455: . unroll_w - `KSPPIPEGCR` uses unrolling (bool)

457:   Level: intermediate

459: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
460: @*/
461: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
462: {
463:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

465:   PetscFunctionBegin;
467:   *unroll_w = pipegcr->unroll_w;
468:   PetscFunctionReturn(PETSC_SUCCESS);
469: }

471: /*@
472:   KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization

474:   Logically Collective

476:   Input Parameters:
477: + ksp  - the Krylov space context
478: - mmax - the maximum number of previous directions to orthogonalize against

480:   Options Database Key:
481: . -ksp_pipegcr_mmax mmax - maximum number of previous directions

483:   Level: intermediate

485:   Note:
486:   `mmax` + 1 directions are stored (`mmax` previous ones along with a current one)
487:   and whether all are used in each iteration also depends on the truncation strategy
488:   (see `KSPPIPEGCRSetTruncationType`)

490: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
491: @*/
492: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
493: {
494:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

496:   PetscFunctionBegin;
499:   pipegcr->mmax = mmax;
500:   PetscFunctionReturn(PETSC_SUCCESS);
501: }

503: /*@
504:   KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store

506:   Not Collective

508:   Input Parameter:
509: . ksp - the Krylov space context

511:   Output Parameter:
512: . mmax - the maximum number of previous directions allowed for orthogonalization

514:   Level: intermediate

516: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
517: @*/
518: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
519: {
520:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

522:   PetscFunctionBegin;
524:   *mmax = pipegcr->mmax;
525:   PetscFunctionReturn(PETSC_SUCCESS);
526: }

528: /*@
529:   KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`

531:   Logically Collective

533:   Input Parameters:
534: + ksp       - the Krylov space context
535: - nprealloc - the number of vectors to preallocate

537:   Level: advanced

539:   Options Database Key:
540: . -ksp_pipegcr_nprealloc N - number of vectors to preallocate

542: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
543: @*/
544: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
545: {
546:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

548:   PetscFunctionBegin;
551:   pipegcr->nprealloc = nprealloc;
552:   PetscFunctionReturn(PETSC_SUCCESS);
553: }

555: /*@
556:   KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`

558:   Not Collective

560:   Input Parameter:
561: . ksp - the Krylov space context

563:   Output Parameter:
564: . nprealloc - the number of directions preallocated

566:   Level: advanced

568: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
569: @*/
570: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
571: {
572:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

574:   PetscFunctionBegin;
576:   *nprealloc = pipegcr->nprealloc;
577:   PetscFunctionReturn(PETSC_SUCCESS);
578: }

580: /*@
581:   KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization

583:   Logically Collective

585:   Input Parameters:
586: + ksp        - the Krylov space context
587: - truncstrat - the choice of strategy
588: .vb
589:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
590:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i = 0, 1, ..
591: .ve

593:   Options Database Key:
594: . -ksp_pipegcr_truncation_type (standard|notay) - which stored basis vectors to orthogonalize against

596:   Level: intermediate

598: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPFCDTruncationType`, `KSPPIPEGCRGetTruncationType()`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
599: @*/
600: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
601: {
602:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

604:   PetscFunctionBegin;
607:   pipegcr->truncstrat = truncstrat;
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }

611: /*@
612:   KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`

614:   Not Collective

616:   Input Parameter:
617: . ksp - the Krylov space context

619:   Output Parameter:
620: . truncstrat - the strategy type
621: .vb
622:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
623:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i =0, 1, ..
624: .ve

626:   Level: intermediate

628: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
629: @*/
630: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
631: {
632:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

634:   PetscFunctionBegin;
636:   *truncstrat = pipegcr->truncstrat;
637:   PetscFunctionReturn(PETSC_SUCCESS);
638: }

640: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems PetscOptionsObject)
641: {
642:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
643:   PetscInt     mmax, nprealloc;
644:   PetscBool    flg;

646:   PetscFunctionBegin;
647:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
648:   PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
649:   if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
650:   PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
651:   if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
652:   PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
653:   PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
654:   PetscOptionsHeadEnd();
655:   PetscFunctionReturn(PETSC_SUCCESS);
656: }

658: static PetscErrorCode KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp, KSPFlexibleModifyPCFn *function, PetscCtx ctx, PetscCtxDestroyFn *destroy)
659: {
660:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

662:   PetscFunctionBegin;
664:   pipegcr->modifypc         = function;
665:   pipegcr->modifypc_destroy = destroy;
666:   pipegcr->modifypc_ctx     = ctx;
667:   PetscFunctionReturn(PETSC_SUCCESS);
668: }

670: /*MC
671:   KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method {cite}`sananschneppmay2016`. [](sec_flexibleksp). [](sec_pipelineksp)

673:   Options Database Keys:
674: +   -ksp_pipegcr_mmax N                           - the max number of Krylov directions to orthogonalize against
675: .   -ksp_pipegcr_unroll_w (true|false)            - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
676: .   -ksp_pipegcr_nprealloc N                      - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
677: -   -ksp_pipegcr_truncation_type (standard|notay) - which previous search directions to orthogonalize against

679:   Level: intermediate

681:   Notes:
682:   Pipelined version of `KSPGCR` that overlaps communication (global reductions) with computation (preconditioner application and matrix-vector products) to reduce the impact of communication latency on parallel performance.

684:   The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
685:   which may vary from one iteration to the next. Users can define a method to vary the
686:   preconditioner between iterates via `KSPFlexibleSetModifyPC()`.
687:   Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
688:   solution is given by the current estimate for `x` which was obtained by the last restart
689:   iterations of the `KSPPIPEGCR` algorithm.
690:   The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as `KSPGCR`.

692:   Only supports left preconditioning.

694:   The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. This norm is available at no additional computational cost, as with standard
695:   `KSPCG`.  Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.

697:   MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
698:   See [](doc_faq_pipelined)

700:   Contributed by:
701:   Sascha M. Schnepp and Patrick Sanan

703: .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
704:           `KSPFlexibleSetModifyPC()`, `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`,
705:           `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRGetMmax()`, `KSPGCR`, `KSPPIPEGCRGetUnrollW()`
706: M*/
707: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
708: {
709:   KSP_PIPEGCR *pipegcr;

711:   PetscFunctionBegin;
712:   PetscCall(PetscNew(&pipegcr));
713:   pipegcr->mmax       = KSPPIPEGCR_DEFAULT_MMAX;
714:   pipegcr->nprealloc  = KSPPIPEGCR_DEFAULT_NPREALLOC;
715:   pipegcr->nvecs      = 0;
716:   pipegcr->vecb       = KSPPIPEGCR_DEFAULT_VECB;
717:   pipegcr->nchunks    = 0;
718:   pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
719:   pipegcr->n_restarts = 0;
720:   pipegcr->unroll_w   = KSPPIPEGCR_DEFAULT_UNROLL_W;

722:   ksp->data = (void *)pipegcr;

724:   /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
725:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
726:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
727:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
728:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));

730:   ksp->ops->setup          = KSPSetUp_PIPEGCR;
731:   ksp->ops->solve          = KSPSolve_PIPEGCR;
732:   ksp->ops->reset          = KSPReset_PIPEGCR;
733:   ksp->ops->destroy        = KSPDestroy_PIPEGCR;
734:   ksp->ops->view           = KSPView_PIPEGCR;
735:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
736:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
737:   ksp->ops->buildresidual  = KSPBuildResidualDefault;

739:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", KSPFlexibleSetModifyPC_PIPEGCR));
740:   PetscFunctionReturn(PETSC_SUCCESS);
741: }