Actual source code: pipegcr.c
1: #include <petscsys.h>
2: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>
4: static PetscBool cited = PETSC_FALSE;
5: static const char citation[] = "@article{SSM2016,\n"
6: " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7: " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8: " journal = {SIAM Journal on Scientific Computing},\n"
9: " volume = {38},\n"
10: " number = {5},\n"
11: " pages = {C441-C470},\n"
12: " year = {2016},\n"
13: " doi = {10.1137/15M1049130},\n"
14: " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15: " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16: "}\n";
18: #define KSPPIPEGCR_DEFAULT_MMAX 15
19: #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
20: #define KSPPIPEGCR_DEFAULT_VECB 5
21: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
22: #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
24: #include <petscksp.h>
26: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
27: {
28: PetscInt i;
29: KSP_PIPEGCR *pipegcr;
30: PetscInt nnewvecs, nvecsprev;
32: PetscFunctionBegin;
33: pipegcr = (KSP_PIPEGCR *)ksp->data;
35: /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
36: if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
37: nvecsprev = pipegcr->nvecs;
38: nnewvecs = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
39: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
40: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
41: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
42: if (pipegcr->unroll_w) { PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL)); }
43: pipegcr->nvecs += nnewvecs;
44: for (i = 0; i < nnewvecs; i++) {
45: pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
46: pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
47: pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
48: if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
49: }
50: pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
51: pipegcr->nchunks++;
52: }
53: PetscFunctionReturn(PETSC_SUCCESS);
54: }
56: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
57: {
58: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
59: Mat A, B;
60: Vec x, r, b, z, w, m, n, p, s, q, t, *redux;
61: PetscInt i, j, k, idx, kdx, mi;
62: PetscScalar alpha = 0.0, gamma, *betas, *dots;
63: PetscReal rnorm = 0.0, delta, *eta, *etas;
65: PetscFunctionBegin;
66: /* !!PS We have not checked these routines for use with complex numbers. The inner products
67: are likely not defined correctly for that case */
68: PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
70: PetscCall(KSPGetOperators(ksp, &A, &B));
71: x = ksp->vec_sol;
72: b = ksp->vec_rhs;
73: r = ksp->work[0];
74: z = ksp->work[1];
75: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
76: m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
77: n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
78: p = pipegcr->pvecs[0];
79: s = pipegcr->svecs[0];
80: q = pipegcr->qvecs[0];
81: t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
83: redux = pipegcr->redux;
84: dots = pipegcr->dots;
85: etas = pipegcr->etas;
86: betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
88: /* cycle initial residual */
89: PetscCall(KSP_MatMult(ksp, A, x, r));
90: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
91: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
92: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
94: /* initialization of other variables and pipelining intermediates */
95: PetscCall(VecCopy(z, p));
96: PetscCall(KSP_MatMult(ksp, A, p, s));
98: /* overlap initial computation of delta, gamma */
99: redux[0] = w;
100: redux[1] = r;
101: PetscCall(VecMDotBegin(w, 2, redux, dots)); /* Start split reductions for gamma = (w,r), delta = (w,w) */
102: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
103: PetscCall(KSP_PCApply(ksp, s, q)); /* q = B(s) */
104: if (pipegcr->unroll_w) { PetscCall(KSP_MatMult(ksp, A, q, t)); /* t = Aq */ }
105: PetscCall(VecMDotEnd(w, 2, redux, dots)); /* Finish split reduction */
106: delta = PetscRealPart(dots[0]);
107: etas[0] = delta;
108: gamma = dots[1];
109: alpha = gamma / delta;
111: i = 0;
112: do {
113: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
114: ksp->its++;
115: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
117: /* update solution, residuals, .. */
118: PetscCall(VecAXPY(x, +alpha, p));
119: PetscCall(VecAXPY(r, -alpha, s));
120: PetscCall(VecAXPY(z, -alpha, q));
121: if (pipegcr->unroll_w) {
122: PetscCall(VecAXPY(w, -alpha, t));
123: } else {
124: PetscCall(KSP_MatMult(ksp, A, z, w));
125: }
127: /* Computations of current iteration done */
128: i++;
130: if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, ksp->rnorm, pipegcr->modifypc_ctx));
132: /* If needbe, allocate a new chunk of vectors */
133: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));
135: /* Note that we wrap around and start clobbering old vectors */
136: idx = i % (pipegcr->mmax + 1);
137: p = pipegcr->pvecs[idx];
138: s = pipegcr->svecs[idx];
139: q = pipegcr->qvecs[idx];
140: if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
141: eta = pipegcr->etas + idx;
143: /* number of old directions to orthogonalize against */
144: switch (pipegcr->truncstrat) {
145: case KSP_FCD_TRUNC_TYPE_STANDARD:
146: mi = pipegcr->mmax;
147: break;
148: case KSP_FCD_TRUNC_TYPE_NOTAY:
149: mi = ((i - 1) % pipegcr->mmax) + 1;
150: break;
151: default:
152: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
153: }
155: /* Pick old p,s,q,zeta in a way suitable for VecMDot */
156: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
157: kdx = k % (pipegcr->mmax + 1);
158: pipegcr->pold[j] = pipegcr->pvecs[kdx];
159: pipegcr->sold[j] = pipegcr->svecs[kdx];
160: pipegcr->qold[j] = pipegcr->qvecs[kdx];
161: if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
162: redux[j] = pipegcr->svecs[kdx];
163: }
164: /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
165: redux[j] = r;
166: redux[j + 1] = w;
168: /* Dot products */
169: /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
170: PetscCall(VecMDotBegin(w, j + 2, redux, dots));
171: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));
173: /* B(w-r) + u stabilization */
174: PetscCall(VecWAXPY(n, -1.0, r, w)); /* m = u + B(w-r): (a) ntmp = w-r */
175: PetscCall(KSP_PCApply(ksp, n, m)); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
176: PetscCall(VecAXPY(m, 1.0, z)); /* m = u + B(w-r): (c) m = z + mtmp */
177: if (pipegcr->unroll_w) { PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am */ }
179: /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
180: PetscCall(VecMDotEnd(w, j + 2, redux, dots));
181: gamma = dots[j];
182: delta = PetscRealPart(dots[j + 1]);
184: /* compute new residual norm.
185: this cannot be done before this point so that the natural norm
186: is available for free and the communication involved is overlapped */
187: switch (ksp->normtype) {
188: case KSP_NORM_PRECONDITIONED:
189: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
190: break;
191: case KSP_NORM_UNPRECONDITIONED:
192: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
193: break;
194: case KSP_NORM_NATURAL:
195: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
196: break;
197: case KSP_NORM_NONE:
198: rnorm = 0.0;
199: break;
200: default:
201: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
202: }
204: /* Check for convergence */
205: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
206: ksp->rnorm = rnorm;
207: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
208: PetscCall(KSPLogResidualHistory(ksp, rnorm));
209: PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
210: PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
211: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
213: /* compute new eta and scale beta */
214: *eta = 0.;
215: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
216: kdx = k % (pipegcr->mmax + 1);
217: betas[j] /= -etas[kdx]; /* betak /= etak */
218: *eta -= ((PetscReal)(PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]))) * etas[kdx];
219: /* etaitmp = -betaik^2 * etak */
220: }
221: *eta += delta; /* etai = delta -betaik^2 * etak */
223: /* check breakdown of eta = (s,s) */
224: if (*eta < 0.) {
225: pipegcr->norm_breakdown = PETSC_TRUE;
226: PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
227: break;
228: } else {
229: alpha = gamma / (*eta); /* alpha = gamma/etai */
230: }
232: /* project out stored search directions using classical G-S */
233: PetscCall(VecCopy(z, p));
234: PetscCall(VecCopy(w, s));
235: PetscCall(VecCopy(m, q));
236: if (pipegcr->unroll_w) {
237: PetscCall(VecCopy(n, t));
238: PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n - sum_k beta_k t_k */
239: }
240: PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
241: PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
242: PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m - sum_k beta_k q_k */
244: } while (ksp->its < ksp->max_it);
245: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
246: PetscFunctionReturn(PETSC_SUCCESS);
247: }
249: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
250: {
251: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
252: Mat A, B;
253: Vec x, b, r, z, w;
254: PetscScalar gamma;
255: PetscReal rnorm = 0.0;
256: PetscBool issym;
258: PetscFunctionBegin;
259: PetscCall(PetscCitationsRegister(citation, &cited));
261: PetscCall(KSPGetOperators(ksp, &A, &B));
262: x = ksp->vec_sol;
263: b = ksp->vec_rhs;
264: r = ksp->work[0];
265: z = ksp->work[1];
266: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
268: /* compute initial residual */
269: if (!ksp->guess_zero) {
270: PetscCall(KSP_MatMult(ksp, A, x, r));
271: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
272: } else {
273: PetscCall(VecCopy(b, r)); /* r <- b */
274: }
276: /* initial residual norm */
277: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
278: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
279: PetscCall(VecDot(r, w, &gamma)); /* gamma = (r,w) */
281: switch (ksp->normtype) {
282: case KSP_NORM_PRECONDITIONED:
283: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
284: break;
285: case KSP_NORM_UNPRECONDITIONED:
286: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
287: break;
288: case KSP_NORM_NATURAL:
289: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
290: break;
291: case KSP_NORM_NONE:
292: rnorm = 0.0;
293: break;
294: default:
295: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
296: }
298: /* Is A symmetric? */
299: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
300: if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?\n"));
302: /* logging */
303: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
304: ksp->its = 0;
305: ksp->rnorm0 = rnorm;
306: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
307: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
308: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
309: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
310: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
312: do {
313: PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
314: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
315: if (pipegcr->norm_breakdown) {
316: pipegcr->n_restarts++;
317: pipegcr->norm_breakdown = PETSC_FALSE;
318: }
319: } while (ksp->its < ksp->max_it);
321: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
326: {
327: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
328: PetscBool isascii, isstring;
329: const char *truncstr;
331: PetscFunctionBegin;
332: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
333: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
335: if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
336: truncstr = "Using standard truncation strategy";
337: } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
338: truncstr = "Using Notay's truncation strategy";
339: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
341: if (isascii) {
342: PetscCall(PetscViewerASCIIPrintf(viewer, " max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
343: PetscCall(PetscViewerASCIIPrintf(viewer, " preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
344: PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", truncstr));
345: PetscCall(PetscViewerASCIIPrintf(viewer, " w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
346: PetscCall(PetscViewerASCIIPrintf(viewer, " restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
347: } else if (isstring) {
348: PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
349: }
350: PetscFunctionReturn(PETSC_SUCCESS);
351: }
353: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
354: {
355: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
356: Mat A;
357: PetscBool diagonalscale;
358: const PetscInt nworkstd = 5;
360: PetscFunctionBegin;
361: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
362: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
364: PetscCall(KSPGetOperators(ksp, &A, NULL));
366: /* Allocate "standard" work vectors */
367: PetscCall(KSPSetWorkVecs(ksp, nworkstd));
369: /* Allocated space for pointers to additional work vectors
370: note that mmax is the number of previous directions, so we add 1 for the current direction */
371: PetscCall(PetscMalloc6(pipegcr->mmax + 1, &pipegcr->pvecs, pipegcr->mmax + 1, &pipegcr->ppvecs, pipegcr->mmax + 1, &pipegcr->svecs, pipegcr->mmax + 1, &pipegcr->psvecs, pipegcr->mmax + 1, &pipegcr->qvecs, pipegcr->mmax + 1, &pipegcr->pqvecs));
372: if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &pipegcr->tvecs, pipegcr->mmax + 1, &pipegcr->ptvecs, pipegcr->mmax + 2, &pipegcr->told));
373: PetscCall(PetscMalloc4(pipegcr->mmax + 2, &pipegcr->pold, pipegcr->mmax + 2, &pipegcr->sold, pipegcr->mmax + 2, &pipegcr->qold, pipegcr->mmax + 2, &pipegcr->chunksizes));
374: PetscCall(PetscMalloc3(pipegcr->mmax + 2, &pipegcr->dots, pipegcr->mmax + 1, &pipegcr->etas, pipegcr->mmax + 2, &pipegcr->redux));
375: /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
376: if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));
378: /* Preallocate additional work vectors */
379: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
384: {
385: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
387: PetscFunctionBegin;
388: if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(pipegcr->modifypc_ctx));
389: PetscFunctionReturn(PETSC_SUCCESS);
390: }
392: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
393: {
394: PetscInt i;
395: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
397: PetscFunctionBegin;
398: PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */
400: /* Destroy vectors for old directions and the arrays that manage pointers to them */
401: if (pipegcr->nvecs) {
402: for (i = 0; i < pipegcr->nchunks; i++) {
403: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
404: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
405: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
406: if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
407: }
408: }
410: PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
411: PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
412: PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
413: if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));
415: PetscCall(KSPReset_PIPEGCR(ksp));
416: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", NULL));
417: PetscCall(KSPDestroyDefault(ksp));
418: PetscFunctionReturn(PETSC_SUCCESS);
419: }
421: /*@
422: KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector
424: Logically Collective
426: Input Parameters:
427: + ksp - the Krylov space context
428: - unroll_w - use unrolling
430: Level: intermediate
432: Options Database Key:
433: . -ksp_pipegcr_unroll_w <bool> - use unrolling
435: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
436: @*/
437: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
438: {
439: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
441: PetscFunctionBegin;
444: pipegcr->unroll_w = unroll_w;
445: PetscFunctionReturn(PETSC_SUCCESS);
446: }
448: /*@
449: KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector
451: Logically Collective
453: Input Parameter:
454: . ksp - the Krylov space context
456: Output Parameter:
457: . unroll_w - `KSPPIPEGCR` uses unrolling (bool)
459: Level: intermediate
461: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
462: @*/
463: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
464: {
465: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
467: PetscFunctionBegin;
469: *unroll_w = pipegcr->unroll_w;
470: PetscFunctionReturn(PETSC_SUCCESS);
471: }
473: /*@
474: KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization
476: Logically Collective
478: Input Parameters:
479: + ksp - the Krylov space context
480: - mmax - the maximum number of previous directions to orthogonalize against
482: Options Database Key:
483: . -ksp_pipegcr_mmax <mmax> - maximum number of previous directions
485: Level: intermediate
487: Note:
488: `mmax` + 1 directions are stored (`mmax` previous ones along with a current one)
489: and whether all are used in each iteration also depends on the truncation strategy
490: (see `KSPPIPEGCRSetTruncationType`)
492: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
493: @*/
494: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
495: {
496: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
498: PetscFunctionBegin;
501: pipegcr->mmax = mmax;
502: PetscFunctionReturn(PETSC_SUCCESS);
503: }
505: /*@
506: KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store
508: Not Collective
510: Input Parameter:
511: . ksp - the Krylov space context
513: Output Parameter:
514: . mmax - the maximum number of previous directions allowed for orthogonalization
516: Level: intermediate
518: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
519: @*/
520: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
521: {
522: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
524: PetscFunctionBegin;
526: *mmax = pipegcr->mmax;
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: /*@
531: KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`
533: Logically Collective
535: Input Parameters:
536: + ksp - the Krylov space context
537: - nprealloc - the number of vectors to preallocate
539: Level: advanced
541: Options Database Key:
542: . -ksp_pipegcr_nprealloc <N> - number of vectors to preallocate
544: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
545: @*/
546: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
547: {
548: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
550: PetscFunctionBegin;
553: pipegcr->nprealloc = nprealloc;
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: /*@
558: KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`
560: Not Collective
562: Input Parameter:
563: . ksp - the Krylov space context
565: Output Parameter:
566: . nprealloc - the number of directions preallocated
568: Level: advanced
570: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
571: @*/
572: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
573: {
574: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
576: PetscFunctionBegin;
578: *nprealloc = pipegcr->nprealloc;
579: PetscFunctionReturn(PETSC_SUCCESS);
580: }
582: /*@
583: KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization
585: Logically Collective
587: Input Parameters:
588: + ksp - the Krylov space context
589: - truncstrat - the choice of strategy
590: .vb
591: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
592: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
593: .ve
595: Options Database Key:
596: . -ksp_pipegcr_truncation_type <standard,notay> - which stored basis vectors to orthogonalize against
598: Level: intermediate
600: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
601: @*/
602: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
603: {
604: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
606: PetscFunctionBegin;
609: pipegcr->truncstrat = truncstrat;
610: PetscFunctionReturn(PETSC_SUCCESS);
611: }
613: /*@
614: KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`
616: Not Collective
618: Input Parameter:
619: . ksp - the Krylov space context
621: Output Parameter:
622: . truncstrat - the strategy type
623: .vb
624: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to mmax) stored directions
625: KSP_FCD_TRUNC_TYPE_NOTAY uses the last max(1,mod(i,mmax)) directions at iteration i=0,1,..
626: .ve
628: Level: intermediate
630: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType`, `KSPPIPEGCRTruncationType`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
631: @*/
632: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
633: {
634: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
636: PetscFunctionBegin;
638: *truncstrat = pipegcr->truncstrat;
639: PetscFunctionReturn(PETSC_SUCCESS);
640: }
642: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems *PetscOptionsObject)
643: {
644: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
645: PetscInt mmax, nprealloc;
646: PetscBool flg;
648: PetscFunctionBegin;
649: PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
650: PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
651: if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
652: PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
653: if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
654: PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
655: PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
656: PetscOptionsHeadEnd();
657: PetscFunctionReturn(PETSC_SUCCESS);
658: }
661: typedef PetscErrorCode (*KSPPIPEGCRModifyPCFunction)(KSP, PetscInt, PetscReal, void *);
662: typedef PetscErrorCode (*KSPPIPEGCRDestroyFunction)(void *);
664: static PetscErrorCode KSPPIPEGCRSetModifyPC_PIPEGCR(KSP ksp, KSPPIPEGCRModifyPCFunction function, void *data, KSPPIPEGCRDestroyFunction destroy)
665: {
666: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
668: PetscFunctionBegin;
670: pipegcr->modifypc = function;
671: pipegcr->modifypc_destroy = destroy;
672: pipegcr->modifypc_ctx = data;
673: PetscFunctionReturn(PETSC_SUCCESS);
674: }
676: /*@C
677: KSPPIPEGCRSetModifyPC - Sets the routine used by `KSPPIPEGCR` to modify the preconditioner at each iteration
679: Logically Collective
681: Input Parameters:
682: + ksp - iterative context obtained from `KSPCreate()`
683: . function - user defined function to modify the preconditioner
684: . ctx - user provided context for the modify preconditioner function
685: - destroy - the function to use to destroy the user provided application context.
687: Calling sequence of `function`:
688: + ksp - iterative context
689: . n - the total number of `KSPPIPEGCR` iterations that have occurred
690: . rnorm - 2-norm residual value
691: - ctx - the user provided application context
693: Calling sequence of `destroy`:
694: . ctx - the user provided application context
696: Level: intermediate
698: Note:
699: The default modifypc routine is `KSPPIPEGCRModifyPCNoChange()`
701: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRModifyPCNoChange()`
702: @*/
703: PetscErrorCode KSPPIPEGCRSetModifyPC(KSP ksp, PetscErrorCode (*function)(KSP ksp, PetscInt n, PetscReal rnorm, void *ctx), void *ctx, PetscErrorCode (*destroy)(void *ctx))
704: {
705: PetscFunctionBegin;
706: PetscUseMethod(ksp, "KSPPIPEGCRSetModifyPC_C", (KSP, PetscErrorCode (*)(KSP, PetscInt, PetscReal, void *), void *ctx, PetscErrorCode (*)(void *)), (ksp, function, ctx, destroy));
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: /*MC
711: KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method {cite}`sananschneppmay2016`. [](sec_flexibleksp). [](sec_pipelineksp)
713: Options Database Keys:
714: + -ksp_pipegcr_mmax <N> - the max number of Krylov directions to orthogonalize against
715: . -ksp_pipegcr_unroll_w - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
716: . -ksp_pipegcr_nprealloc <N> - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
717: - -ksp_pipegcr_truncation_type <standard,notay> - which previous search directions to orthogonalize against
719: Level: intermediate
721: Notes:
722: Compare to `KSPGCR`
724: The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
725: which may vary from one iteration to the next. Users can define a method to vary the
726: preconditioner between iterates via `KSPPIPEGCRSetModifyPC()`.
727: Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
728: solution is given by the current estimate for x which was obtained by the last restart
729: iterations of the `KSPPIPEGCR` algorithm.
730: The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as `KSPGCR`.
732: Only supports left preconditioning.
734: The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. This norm is available at no additional computational cost, as with standard
735: `KSPCG`. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
737: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
738: See [](doc_faq_pipelined)
740: Contributed by:
741: Sascha M. Schnepp and Patrick Sanan
743: .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
744: `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`,
745: `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRGetMmax()`, `KSPGCR`, `KSPPIPEGCRGetUnrollW()
746: M*/
747: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
748: {
749: KSP_PIPEGCR *pipegcr;
751: PetscFunctionBegin;
752: PetscCall(PetscNew(&pipegcr));
753: pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
754: pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
755: pipegcr->nvecs = 0;
756: pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
757: pipegcr->nchunks = 0;
758: pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
759: pipegcr->n_restarts = 0;
760: pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
762: ksp->data = (void *)pipegcr;
764: /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
765: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
766: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
767: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
768: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
770: ksp->ops->setup = KSPSetUp_PIPEGCR;
771: ksp->ops->solve = KSPSolve_PIPEGCR;
772: ksp->ops->reset = KSPReset_PIPEGCR;
773: ksp->ops->destroy = KSPDestroy_PIPEGCR;
774: ksp->ops->view = KSPView_PIPEGCR;
775: ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
776: ksp->ops->buildsolution = KSPBuildSolutionDefault;
777: ksp->ops->buildresidual = KSPBuildResidualDefault;
779: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPPIPEGCRSetModifyPC_C", KSPPIPEGCRSetModifyPC_PIPEGCR));
780: PetscFunctionReturn(PETSC_SUCCESS);
781: }