Actual source code: pipegcr.c


  2: #include "petscsys.h"
  3: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>

  5: static PetscBool  cited      = PETSC_FALSE;
  6: static const char citation[] = "@article{SSM2016,\n"
  7:                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
  8:                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
  9:                                "  journal = {SIAM Journal on Scientific Computing},\n"
 10:                                "  volume = {38},\n"
 11:                                "  number = {5},\n"
 12:                                "  pages = {C441-C470},\n"
 13:                                "  year = {2016},\n"
 14:                                "  doi = {10.1137/15M1049130},\n"
 15:                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
 16:                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
 17:                                "}\n";

 19: #define KSPPIPEGCR_DEFAULT_MMAX       15
 20: #define KSPPIPEGCR_DEFAULT_NPREALLOC  5
 21: #define KSPPIPEGCR_DEFAULT_VECB       5
 22: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
 23: #define KSPPIPEGCR_DEFAULT_UNROLL_W   PETSC_TRUE

 25: #include <petscksp.h>

 27: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
 28: {
 29:   PetscInt     i;
 30:   KSP_PIPEGCR *pipegcr;
 31:   PetscInt     nnewvecs, nvecsprev;

 33:   pipegcr = (KSP_PIPEGCR *)ksp->data;

 35:   /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
 36:   if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
 37:     nvecsprev = pipegcr->nvecs;
 38:     nnewvecs  = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
 39:     KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL);
 40:     KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL);
 41:     KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL);
 42:     if (pipegcr->unroll_w) { KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL); }
 43:     pipegcr->nvecs += nnewvecs;
 44:     for (i = 0; i < nnewvecs; i++) {
 45:       pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
 46:       pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
 47:       pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
 48:       if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
 49:     }
 50:     pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
 51:     pipegcr->nchunks++;
 52:   }
 53:   return 0;
 54: }

 56: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
 57: {
 58:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
 59:   Mat          A, B;
 60:   Vec          x, r, b, z, w, m, n, p, s, q, t, *redux;
 61:   PetscInt     i, j, k, idx, kdx, mi;
 62:   PetscScalar  alpha = 0.0, gamma, *betas, *dots;
 63:   PetscReal    rnorm = 0.0, delta, *eta, *etas;

 65:   /* !!PS We have not checked these routines for use with complex numbers. The inner products
 66:      are likely not defined correctly for that case */

 69:   KSPGetOperators(ksp, &A, &B);
 70:   x = ksp->vec_sol;
 71:   b = ksp->vec_rhs;
 72:   r = ksp->work[0];
 73:   z = ksp->work[1];
 74:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */
 75:   m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r))    (pipelining intermediate) */
 76:   n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
 77:   p = pipegcr->pvecs[0];
 78:   s = pipegcr->svecs[0];
 79:   q = pipegcr->qvecs[0];
 80:   t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;

 82:   redux = pipegcr->redux;
 83:   dots  = pipegcr->dots;
 84:   etas  = pipegcr->etas;
 85:   betas = dots; /* dots takes the result of all dot products of which the betas are a subset */

 87:   /* cycle initial residual */
 88:   KSP_MatMult(ksp, A, x, r);
 89:   VecAYPX(r, -1.0, b);       /* r <- b - Ax */
 90:   KSP_PCApply(ksp, r, z);    /* z <- B(r)   */
 91:   KSP_MatMult(ksp, A, z, w); /* w <- Az     */

 93:   /* initialization of other variables and pipelining intermediates */
 94:   VecCopy(z, p);
 95:   KSP_MatMult(ksp, A, p, s);

 97:   /* overlap initial computation of delta, gamma */
 98:   redux[0] = w;
 99:   redux[1] = r;
100:   VecMDotBegin(w, 2, redux, dots);                               /* Start split reductions for gamma = (w,r), delta = (w,w) */
101:   PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s)); /* perform asynchronous reduction */
102:   KSP_PCApply(ksp, s, q);                                        /* q = B(s) */
103:   if (pipegcr->unroll_w) { KSP_MatMult(ksp, A, q, t); /* t = Aq   */ }
104:   VecMDotEnd(w, 2, redux, dots); /* Finish split reduction */
105:   delta   = PetscRealPart(dots[0]);
106:   etas[0] = delta;
107:   gamma   = dots[1];
108:   alpha   = gamma / delta;

110:   i = 0;
111:   do {
112:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
113:     ksp->its++;
114:     PetscObjectSAWsGrantAccess((PetscObject)ksp);

116:     /* update solution, residuals, .. */
117:     VecAXPY(x, +alpha, p);
118:     VecAXPY(r, -alpha, s);
119:     VecAXPY(z, -alpha, q);
120:     if (pipegcr->unroll_w) {
121:       VecAXPY(w, -alpha, t);
122:     } else {
123:       KSP_MatMult(ksp, A, z, w);
124:     }

126:     /* Computations of current iteration done */
127:     i++;

129:     if (pipegcr->modifypc) (*pipegcr->modifypc)(ksp, ksp->its, ksp->rnorm, pipegcr->modifypc_ctx);

131:     /* If needbe, allocate a new chunk of vectors */
132:     KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb);

134:     /* Note that we wrap around and start clobbering old vectors */
135:     idx = i % (pipegcr->mmax + 1);
136:     p   = pipegcr->pvecs[idx];
137:     s   = pipegcr->svecs[idx];
138:     q   = pipegcr->qvecs[idx];
139:     if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
140:     eta = pipegcr->etas + idx;

142:     /* number of old directions to orthogonalize against */
143:     switch (pipegcr->truncstrat) {
144:     case KSP_FCD_TRUNC_TYPE_STANDARD:
145:       mi = pipegcr->mmax;
146:       break;
147:     case KSP_FCD_TRUNC_TYPE_NOTAY:
148:       mi = ((i - 1) % pipegcr->mmax) + 1;
149:       break;
150:     default:
151:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
152:     }

154:     /* Pick old p,s,q,zeta in a way suitable for VecMDot */
155:     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
156:       kdx              = k % (pipegcr->mmax + 1);
157:       pipegcr->pold[j] = pipegcr->pvecs[kdx];
158:       pipegcr->sold[j] = pipegcr->svecs[kdx];
159:       pipegcr->qold[j] = pipegcr->qvecs[kdx];
160:       if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
161:       redux[j] = pipegcr->svecs[kdx];
162:     }
163:     /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
164:     redux[j]     = r;
165:     redux[j + 1] = w;

167:     /* Dot products */
168:     /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
169:     VecMDotBegin(w, j + 2, redux, dots);
170:     PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w));

172:     /* B(w-r) + u stabilization */
173:     VecWAXPY(n, -1.0, r, w); /* m = u + B(w-r): (a) ntmp = w-r              */
174:     KSP_PCApply(ksp, n, m);  /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
175:     VecAXPY(m, 1.0, z);      /* m = u + B(w-r): (c) m = z + mtmp            */
176:     if (pipegcr->unroll_w) { KSP_MatMult(ksp, A, m, n); /* n = Am                                      */ }

178:     /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
179:     VecMDotEnd(w, j + 2, redux, dots);
180:     gamma = dots[j];
181:     delta = PetscRealPart(dots[j + 1]);

183:     /* compute new residual norm.
184:        this cannot be done before this point so that the natural norm
185:        is available for free and the communication involved is overlapped */
186:     switch (ksp->normtype) {
187:     case KSP_NORM_PRECONDITIONED:
188:       VecNorm(z, NORM_2, &rnorm); /* ||r|| <- sqrt(z'*z) */
189:       break;
190:     case KSP_NORM_UNPRECONDITIONED:
191:       VecNorm(r, NORM_2, &rnorm); /* ||r|| <- sqrt(r'*r) */
192:       break;
193:     case KSP_NORM_NATURAL:
194:       rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
195:       break;
196:     case KSP_NORM_NONE:
197:       rnorm = 0.0;
198:       break;
199:     default:
200:       SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
201:     }

203:     /* Check for convergence */
204:     PetscObjectSAWsTakeAccess((PetscObject)ksp);
205:     ksp->rnorm = rnorm;
206:     PetscObjectSAWsGrantAccess((PetscObject)ksp);
207:     KSPLogResidualHistory(ksp, rnorm);
208:     KSPMonitor(ksp, ksp->its, rnorm);
209:     (*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP);
210:     if (ksp->reason) return 0;

212:     /* compute new eta and scale beta */
213:     *eta = 0.;
214:     for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
215:       kdx = k % (pipegcr->mmax + 1);
216:       betas[j] /= -etas[kdx]; /* betak  /= etak */
217:       *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
218:       /* etaitmp = -betaik^2 * etak */
219:     }
220:     *eta += delta; /* etai    = delta -betaik^2 * etak */

222:     /* check breakdown of eta = (s,s) */
223:     if (*eta < 0.) {
224:       pipegcr->norm_breakdown = PETSC_TRUE;
225:       PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its);
226:       break;
227:     } else {
228:       alpha = gamma / (*eta); /* alpha = gamma/etai */
229:     }

231:     /* project out stored search directions using classical G-S */
232:     VecCopy(z, p);
233:     VecCopy(w, s);
234:     VecCopy(m, q);
235:     if (pipegcr->unroll_w) {
236:       VecCopy(n, t);
237:       VecMAXPY(t, j, betas, pipegcr->told); /* ti <- n  - sum_k beta_k t_k */
238:     }
239:     VecMAXPY(p, j, betas, pipegcr->pold); /* pi <- ui - sum_k beta_k p_k */
240:     VecMAXPY(s, j, betas, pipegcr->sold); /* si <- wi - sum_k beta_k s_k */
241:     VecMAXPY(q, j, betas, pipegcr->qold); /* qi <- m  - sum_k beta_k q_k */

243:   } while (ksp->its < ksp->max_it);
244:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
245:   return 0;
246: }

248: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
249: {
250:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
251:   Mat          A, B;
252:   Vec          x, b, r, z, w;
253:   PetscScalar  gamma;
254:   PetscReal    rnorm = 0.0;
255:   PetscBool    issym;

257:   PetscCitationsRegister(citation, &cited);

259:   KSPGetOperators(ksp, &A, &B);
260:   x = ksp->vec_sol;
261:   b = ksp->vec_rhs;
262:   r = ksp->work[0];
263:   z = ksp->work[1];
264:   w = ksp->work[2]; /* w = Az = AB(r)                 (pipelining intermediate) */

266:   /* compute initial residual */
267:   if (!ksp->guess_zero) {
268:     KSP_MatMult(ksp, A, x, r);
269:     VecAYPX(r, -1.0, b); /* r <- b - Ax       */
270:   } else {
271:     VecCopy(b, r); /* r <- b            */
272:   }

274:   /* initial residual norm */
275:   KSP_PCApply(ksp, r, z);    /* z <- B(r)         */
276:   KSP_MatMult(ksp, A, z, w); /* w <- Az           */
277:   VecDot(r, w, &gamma);      /* gamma = (r,w)     */

279:   switch (ksp->normtype) {
280:   case KSP_NORM_PRECONDITIONED:
281:     VecNorm(z, NORM_2, &rnorm); /* ||r|| <- sqrt(z'*z) */
282:     break;
283:   case KSP_NORM_UNPRECONDITIONED:
284:     VecNorm(r, NORM_2, &rnorm); /* ||r|| <- sqrt(r'*r) */
285:     break;
286:   case KSP_NORM_NATURAL:
287:     rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w)  */
288:     break;
289:   case KSP_NORM_NONE:
290:     rnorm = 0.0;
291:     break;
292:   default:
293:     SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
294:   }

296:   /* Is A symmetric? */
297:   PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, "");
298:   if (!issym) PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?");

300:   /* logging */
301:   PetscObjectSAWsTakeAccess((PetscObject)ksp);
302:   ksp->its    = 0;
303:   ksp->rnorm0 = rnorm;
304:   PetscObjectSAWsGrantAccess((PetscObject)ksp);
305:   KSPLogResidualHistory(ksp, ksp->rnorm0);
306:   KSPMonitor(ksp, ksp->its, ksp->rnorm0);
307:   (*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP);
308:   if (ksp->reason) return 0;

310:   do {
311:     KSPSolve_PIPEGCR_cycle(ksp);
312:     if (ksp->reason) return 0;
313:     if (pipegcr->norm_breakdown) {
314:       pipegcr->n_restarts++;
315:       pipegcr->norm_breakdown = PETSC_FALSE;
316:     }
317:   } while (ksp->its < ksp->max_it);

319:   if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
320:   return 0;
321: }

323: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
324: {
325:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
326:   PetscBool    isascii, isstring;
327:   const char  *truncstr;

329:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);
330:   PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring);

332:   if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
333:     truncstr = "Using standard truncation strategy";
334:   } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
335:     truncstr = "Using Notay's truncation strategy";
336:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");

338:   if (isascii) {
339:     PetscViewerASCIIPrintf(viewer, "  max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax);
340:     PetscViewerASCIIPrintf(viewer, "  preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1));
341:     PetscViewerASCIIPrintf(viewer, "  %s\n", truncstr);
342:     PetscViewerASCIIPrintf(viewer, "  w unrolling = %s \n", PetscBools[pipegcr->unroll_w]);
343:     PetscViewerASCIIPrintf(viewer, "  restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts);
344:   } else if (isstring) {
345:     PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr);
346:   }
347:   return 0;
348: }

350: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
351: {
352:   KSP_PIPEGCR   *pipegcr = (KSP_PIPEGCR *)ksp->data;
353:   Mat            A;
354:   PetscBool      diagonalscale;
355:   const PetscInt nworkstd = 5;

357:   PCGetDiagonalScale(ksp->pc, &diagonalscale);

360:   KSPGetOperators(ksp, &A, NULL);

362:   /* Allocate "standard" work vectors */
363:   KSPSetWorkVecs(ksp, nworkstd);

365:   /* Allocated space for pointers to additional work vectors
366:     note that mmax is the number of previous directions, so we add 1 for the current direction */
367:   PetscMalloc6(pipegcr->mmax + 1, &(pipegcr->pvecs), pipegcr->mmax + 1, &(pipegcr->ppvecs), pipegcr->mmax + 1, &(pipegcr->svecs), pipegcr->mmax + 1, &(pipegcr->psvecs), pipegcr->mmax + 1, &(pipegcr->qvecs), pipegcr->mmax + 1, &(pipegcr->pqvecs));
368:   if (pipegcr->unroll_w) PetscMalloc3(pipegcr->mmax + 1, &(pipegcr->tvecs), pipegcr->mmax + 1, &(pipegcr->ptvecs), pipegcr->mmax + 2, &(pipegcr->told));
369:   PetscMalloc4(pipegcr->mmax + 2, &(pipegcr->pold), pipegcr->mmax + 2, &(pipegcr->sold), pipegcr->mmax + 2, &(pipegcr->qold), pipegcr->mmax + 2, &(pipegcr->chunksizes));
370:   PetscMalloc3(pipegcr->mmax + 2, &(pipegcr->dots), pipegcr->mmax + 1, &(pipegcr->etas), pipegcr->mmax + 2, &(pipegcr->redux));
371:   /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
372:   if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1);

374:   /* Preallocate additional work vectors */
375:   KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc);
376:   return 0;
377: }

379: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
380: {
381:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

383:   if (pipegcr->modifypc_destroy) (*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx);
384:   return 0;
385: }

387: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
388: {
389:   PetscInt     i;
390:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

392:   VecDestroyVecs(ksp->nwork, &ksp->work); /* Destroy "standard" work vecs */

394:   /* Destroy vectors for old directions and the arrays that manage pointers to them */
395:   if (pipegcr->nvecs) {
396:     for (i = 0; i < pipegcr->nchunks; i++) {
397:       VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]);
398:       VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]);
399:       VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]);
400:       if (pipegcr->unroll_w) VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]);
401:     }
402:   }

404:   PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs);
405:   PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes);
406:   PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux);
407:   if (pipegcr->unroll_w) PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told);

409:   KSPReset_PIPEGCR(ksp);
410:   PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", NULL);
411:   KSPDestroyDefault(ksp);
412:   return 0;
413: }

415: /*@
416:   KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector

418:   Logically Collective

420:   Input Parameters:
421: +  ksp - the Krylov space context
422: -  unroll_w - use unrolling

424:   Level: intermediate

426:   Options Database Key:
427: . -ksp_pipegcr_unroll_w <bool> -  use unrolling

429: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
430: @*/
431: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
432: {
433:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

437:   pipegcr->unroll_w = unroll_w;
438:   return 0;
439: }

441: /*@
442:   KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector

444:   Logically Collective

446:    Input Parameter:
447: .  ksp - the Krylov space context

449:    Output Parameter:
450: .  unroll_w - `KSPPIPEGCR` uses unrolling (bool)

452:   Level: intermediate

454: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
455: @*/
456: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
457: {
458:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

461:   *unroll_w = pipegcr->unroll_w;
462:   return 0;
463: }

465: /*@
466:   KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization

468:   Logically Collective

470:   Input Parameters:
471: +  ksp - the Krylov space context
472: -  mmax - the maximum number of previous directions to orthogonalize against

474:   Options Database Key:
475: . -ksp_pipegcr_mmax <N> - maximum number of previous directions

477:   Level: intermediate

479:   Note:
480:    mmax + 1 directions are stored (mmax previous ones along with a current one)
481:   and whether all are used in each iteration also depends on the truncation strategy
482:   (see `KSPPIPEGCRSetTruncationType`)

484: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
485: @*/
486: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
487: {
488:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

492:   pipegcr->mmax = mmax;
493:   return 0;
494: }

496: /*@
497:   KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store

499:    Not Collective

501:    Input Parameter:
502: .  ksp - the Krylov space context

504:    Output Parameter:
505: .  mmax - the maximum number of previous directions allowed for orthogonalization

507:    Level: intermediate

509: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
510: @*/

512: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
513: {
514:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

517:   *mmax = pipegcr->mmax;
518:   return 0;
519: }

521: /*@
522:   KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`

524:   Logically Collective

526:   Input Parameters:
527: +  ksp - the Krylov space context
528: -  nprealloc - the number of vectors to preallocate

530:   Level: advanced

532:   Options Database Key:
533: . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate

535: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
536: @*/
537: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
538: {
539:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

543:   pipegcr->nprealloc = nprealloc;
544:   return 0;
545: }

547: /*@
548:   KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`

550:    Not Collective

552:    Input Parameter:
553: .  ksp - the Krylov space context

555:    Output Parameter:
556: .  nprealloc - the number of directions preallocated

558:    Level: advanced

560: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
561: @*/
562: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
563: {
564:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

567:   *nprealloc = pipegcr->nprealloc;
568:   return 0;
569: }

571: /*@
572:   KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization

574:   Logically Collective

576:   Input Parameters:
577: +  ksp - the Krylov space context
578: -  truncstrat - the choice of strategy
579: .vb
580:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
581:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
582: .ve

584:   Options Database Key:
585: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against

587:   Level: intermediate

589: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
590: @*/
591: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
592: {
593:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

597:   pipegcr->truncstrat = truncstrat;
598:   return 0;
599: }

601: /*@
602:   KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`

604:   Not Collective

606:    Input Parameter:
607: .  ksp - the Krylov space context

609:    Output Parameter:
610: .  truncstrat - the strategy type
611: .vb
612:   KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
613:   KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
614: .ve

616:   Options Database Key:
617: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against

619:    Level: intermediate

621: .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`
622: @*/
623: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
624: {
625:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

628:   *truncstrat = pipegcr->truncstrat;
629:   return 0;
630: }

632: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems *PetscOptionsObject)
633: {
634:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
635:   PetscInt     mmax, nprealloc;
636:   PetscBool    flg;

638:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
639:   PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg);
640:   if (flg) KSPPIPEGCRSetMmax(ksp, mmax);
641:   PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg);
642:   if (flg) KSPPIPEGCRSetNprealloc(ksp, nprealloc);
643:   PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL);
644:   PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL);
645:   PetscOptionsHeadEnd();
646:   return 0;
647: }

650: typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP, PetscInt, PetscReal, void *);
651: typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void *);

653: static PetscErrorCode KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp, KSPPIPEGCRModifyPCFunction function, void *data, KSPPIPEGCRDestroyFunction destroy)
654: {
655:   KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;

658:   pipegcr->modifypc         = function;
659:   pipegcr->modifypc_destroy = destroy;
660:   pipegcr->modifypc_ctx     = data;
661:   return 0;
662: }

664: /*@C
665:  KSPPIPEGCRSetModifyPC - Sets the routine used by `KSPPIPEGCR` to modify the preconditioner at each iteration

667:  Logically Collective

669:  Input Parameters:
670:  +  ksp      - iterative context obtained from KSPCreate()
671:  .  function - user defined function to modify the preconditioner
672:  .  ctx      - user provided context for the modify preconditioner function
673:  -  destroy  - the function to use to destroy the user provided application context.

675:  Calling Sequence of function:
676:   PetscErrorCode function (KSP ksp, PetscInt n, PetscReal rnorm, void *ctx)
677: + ksp   - iterative context
678: . n     - the total number of PIPEGCR iterations that have occurred
679: . rnorm - 2-norm residual value
680: - ctx   - the user provided application context

682:  Level: intermediate

684:  Notes:
685:  The default modifypc routine is `KSPPIPEGCRModifyPCNoChange()`

687:  .seealso: [](chapter_ksp), `KSPPIPEGCR`, `KSPPIPEGCRModifyPCNoChange()`
688:  @*/
689: PetscErrorCode KSPPIPEGCRSetModifyPC(KSP ksp, PetscErrorCode (*function)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*destroy)(void *))
690: {
691:   PetscUseMethod(ksp, "KSPPIPEGCRSetModifyPC_C", (KSP, PetscErrorCode(*)(KSP, PetscInt, PetscReal, void *), void *data, PetscErrorCode (*)(void *)), (ksp, function, data, destroy));
692:   return 0;
693: }

695: /*MC
696:      KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method. [](sec_flexibleksp). [](sec_pipelineksp)

698:   Options Database Keys:
699: +   -ksp_pipegcr_mmax <N>  - the max number of Krylov directions to orthogonalize against
700: .   -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
701: .   -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
702: -   -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against

704:   Level: intermediate

706:   Notes:
707:     The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
708:     which may vary from one iteration to the next. Users can can define a method to vary the
709:     preconditioner between iterates via `KSPPIPEGCRSetModifyPC()`.
710:     Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
711:     solution is given by the current estimate for x which was obtained by the last restart
712:     iterations of the PIPEGCR algorithm.
713:     The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as GCR.

715:     Only supports left preconditioning.

717:     The natural "norm" for this method is (u,Au), where u is the preconditioned residual. This norm is available at no additional computational cost, as with standard CG.
718:     Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.

720:    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
721:    See [](doc_faq_pipelined)

723:   Contributed by:
724:   Sascha M. Schnepp and Patrick Sanan

726:   Reference:
727:     P. Sanan, S.M. Schnepp, and D.A. May,
728:     "Pipelined, Flexible Krylov Subspace Methods,"
729:     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
730:     DOI: 10.1137/15M1049130

732: .seealso: [](chapter_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
733:           `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`
734: M*/
735: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
736: {
737:   KSP_PIPEGCR *pipegcr;

739:   PetscNew(&pipegcr);
740:   pipegcr->mmax       = KSPPIPEGCR_DEFAULT_MMAX;
741:   pipegcr->nprealloc  = KSPPIPEGCR_DEFAULT_NPREALLOC;
742:   pipegcr->nvecs      = 0;
743:   pipegcr->vecb       = KSPPIPEGCR_DEFAULT_VECB;
744:   pipegcr->nchunks    = 0;
745:   pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
746:   pipegcr->n_restarts = 0;
747:   pipegcr->unroll_w   = KSPPIPEGCR_DEFAULT_UNROLL_W;

749:   ksp->data = (void *)pipegcr;

751:   /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
752:   KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2);
753:   KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1);
754:   KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1);
755:   KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1);

757:   ksp->ops->setup          = KSPSetUp_PIPEGCR;
758:   ksp->ops->solve          = KSPSolve_PIPEGCR;
759:   ksp->ops->reset          = KSPReset_PIPEGCR;
760:   ksp->ops->destroy        = KSPDestroy_PIPEGCR;
761:   ksp->ops->view           = KSPView_PIPEGCR;
762:   ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
763:   ksp->ops->buildsolution  = KSPBuildSolutionDefault;
764:   ksp->ops->buildresidual  = KSPBuildResidualDefault;

766:   PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", KSPPIPEGCRSetModifyPC_PIPEGCR);
767:   return 0;
768: }