Actual source code: pipegcr.c
1: #include <petscsys.h>
2: #include <../src/ksp/ksp/impls/gcr/pipegcr/pipegcrimpl.h>
4: static PetscBool cited = PETSC_FALSE;
5: static const char citation[] = "@article{SSM2016,\n"
6: " author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7: " title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8: " journal = {SIAM Journal on Scientific Computing},\n"
9: " volume = {38},\n"
10: " number = {5},\n"
11: " pages = {C441-C470},\n"
12: " year = {2016},\n"
13: " doi = {10.1137/15M1049130},\n"
14: " URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15: " eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16: "}\n";
18: #define KSPPIPEGCR_DEFAULT_MMAX 15
19: #define KSPPIPEGCR_DEFAULT_NPREALLOC 5
20: #define KSPPIPEGCR_DEFAULT_VECB 5
21: #define KSPPIPEGCR_DEFAULT_TRUNCSTRAT KSP_FCD_TRUNC_TYPE_NOTAY
22: #define KSPPIPEGCR_DEFAULT_UNROLL_W PETSC_TRUE
24: #include <petscksp.h>
26: static PetscErrorCode KSPAllocateVectors_PIPEGCR(KSP ksp, PetscInt nvecsneeded, PetscInt chunksize)
27: {
28: KSP_PIPEGCR *pipegcr;
29: PetscInt nnewvecs, nvecsprev;
31: PetscFunctionBegin;
32: pipegcr = (KSP_PIPEGCR *)ksp->data;
34: /* Allocate enough new vectors to add chunksize new vectors, reach nvecsneedtotal, or to reach mmax+1, whichever is smallest */
35: if (pipegcr->nvecs < PetscMin(pipegcr->mmax + 1, nvecsneeded)) {
36: nvecsprev = pipegcr->nvecs;
37: nnewvecs = PetscMin(PetscMax(nvecsneeded - pipegcr->nvecs, chunksize), pipegcr->mmax + 1 - pipegcr->nvecs);
38: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ppvecs[pipegcr->nchunks], 0, NULL));
39: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->psvecs[pipegcr->nchunks], 0, NULL));
40: PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->pqvecs[pipegcr->nchunks], 0, NULL));
41: if (pipegcr->unroll_w) PetscCall(KSPCreateVecs(ksp, nnewvecs, &pipegcr->ptvecs[pipegcr->nchunks], 0, NULL));
42: pipegcr->nvecs += nnewvecs;
43: for (PetscInt i = 0; i < nnewvecs; i++) {
44: pipegcr->qvecs[nvecsprev + i] = pipegcr->pqvecs[pipegcr->nchunks][i];
45: pipegcr->pvecs[nvecsprev + i] = pipegcr->ppvecs[pipegcr->nchunks][i];
46: pipegcr->svecs[nvecsprev + i] = pipegcr->psvecs[pipegcr->nchunks][i];
47: if (pipegcr->unroll_w) pipegcr->tvecs[nvecsprev + i] = pipegcr->ptvecs[pipegcr->nchunks][i];
48: }
49: pipegcr->chunksizes[pipegcr->nchunks] = nnewvecs;
50: pipegcr->nchunks++;
51: }
52: PetscFunctionReturn(PETSC_SUCCESS);
53: }
55: static PetscErrorCode KSPSolve_PIPEGCR_cycle(KSP ksp)
56: {
57: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
58: Mat A, B;
59: Vec x, r, b, z, w, m, n, p, s, q, t, *redux;
60: PetscInt i, j, k, idx, kdx, mi;
61: PetscScalar alpha = 0.0, gamma, *betas, *dots;
62: PetscReal rnorm = 0.0, delta, *eta, *etas;
64: PetscFunctionBegin;
65: /* !!PS We have not checked these routines for use with complex numbers. The inner products
66: are likely not defined correctly for that case */
67: PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
69: PetscCall(KSPGetOperators(ksp, &A, &B));
70: x = ksp->vec_sol;
71: b = ksp->vec_rhs;
72: r = ksp->work[0];
73: z = ksp->work[1];
74: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
75: m = ksp->work[3]; /* m = B(w) = B(Az) = B(AB(r)) (pipelining intermediate) */
76: n = ksp->work[4]; /* n = AB(w) = AB(Az) = AB(AB(r)) (pipelining intermediate) */
77: p = pipegcr->pvecs[0];
78: s = pipegcr->svecs[0];
79: q = pipegcr->qvecs[0];
80: t = pipegcr->unroll_w ? pipegcr->tvecs[0] : NULL;
82: redux = pipegcr->redux;
83: dots = pipegcr->dots;
84: etas = pipegcr->etas;
85: betas = dots; /* dots takes the result of all dot products of which the betas are a subset */
87: /* cycle initial residual */
88: PetscCall(KSP_MatMult(ksp, A, x, r));
89: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
90: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
91: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
93: /* initialization of other variables and pipelining intermediates */
94: PetscCall(VecCopy(z, p));
95: PetscCall(KSP_MatMult(ksp, A, p, s));
97: /* overlap initial computation of delta, gamma */
98: redux[0] = w;
99: redux[1] = r;
100: PetscCall(VecMDotBegin(w, 2, redux, dots)); /* Start split reductions for gamma = (w,r), delta = (w,w) */
101: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)s))); /* perform asynchronous reduction */
102: PetscCall(KSP_PCApply(ksp, s, q)); /* q = B(s) */
103: if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, q, t)); /* t = Aq */
104: PetscCall(VecMDotEnd(w, 2, redux, dots)); /* Finish split reduction */
105: delta = PetscRealPart(dots[0]);
106: etas[0] = delta;
107: gamma = dots[1];
108: alpha = gamma / delta;
110: i = 0;
111: do {
112: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
113: ksp->its++;
114: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
116: /* update solution, residuals, .. */
117: PetscCall(VecAXPY(x, +alpha, p));
118: PetscCall(VecAXPY(r, -alpha, s));
119: PetscCall(VecAXPY(z, -alpha, q));
120: if (pipegcr->unroll_w) {
121: PetscCall(VecAXPY(w, -alpha, t));
122: } else {
123: PetscCall(KSP_MatMult(ksp, A, z, w));
124: }
126: /* Computations of current iteration done */
127: i++;
129: if (pipegcr->modifypc) PetscCall((*pipegcr->modifypc)(ksp, ksp->its, 0 /* unused argument */, ksp->rnorm, pipegcr->modifypc_ctx));
131: /* If needbe, allocate a new chunk of vectors */
132: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, i + 1, pipegcr->vecb));
134: /* Note that we wrap around and start clobbering old vectors */
135: idx = i % (pipegcr->mmax + 1);
136: p = pipegcr->pvecs[idx];
137: s = pipegcr->svecs[idx];
138: q = pipegcr->qvecs[idx];
139: if (pipegcr->unroll_w) t = pipegcr->tvecs[idx];
140: eta = pipegcr->etas + idx;
142: /* number of old directions to orthogonalize against */
143: switch (pipegcr->truncstrat) {
144: case KSP_FCD_TRUNC_TYPE_STANDARD:
145: mi = pipegcr->mmax;
146: break;
147: case KSP_FCD_TRUNC_TYPE_NOTAY:
148: mi = ((i - 1) % pipegcr->mmax) + 1;
149: break;
150: default:
151: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unrecognized Truncation Strategy");
152: }
154: /* Pick old p,s,q,zeta in a way suitable for VecMDot */
155: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
156: kdx = k % (pipegcr->mmax + 1);
157: pipegcr->pold[j] = pipegcr->pvecs[kdx];
158: pipegcr->sold[j] = pipegcr->svecs[kdx];
159: pipegcr->qold[j] = pipegcr->qvecs[kdx];
160: if (pipegcr->unroll_w) pipegcr->told[j] = pipegcr->tvecs[kdx];
161: redux[j] = pipegcr->svecs[kdx];
162: }
163: /* If the above loop is not run redux contains only r and w => all beta_k = 0, only gamma, delta != 0 */
164: redux[j] = r;
165: redux[j + 1] = w;
167: /* Dot products */
168: /* Start split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
169: PetscCall(VecMDotBegin(w, j + 2, redux, dots));
170: PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)w)));
172: /* B(w-r) + u stabilization */
173: PetscCall(VecWAXPY(n, -1.0, r, w)); /* m = u + B(w-r): (a) ntmp = w-r */
174: PetscCall(KSP_PCApply(ksp, n, m)); /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
175: PetscCall(VecAXPY(m, 1.0, z)); /* m = u + B(w-r): (c) m = z + mtmp */
176: if (pipegcr->unroll_w) PetscCall(KSP_MatMult(ksp, A, m, n)); /* n = Am */
178: /* Finish split reductions for beta_k = (w,s_k), gamma = (w,r), delta = (w,w) */
179: PetscCall(VecMDotEnd(w, j + 2, redux, dots));
180: gamma = dots[j];
181: delta = PetscRealPart(dots[j + 1]);
183: /* compute new residual norm.
184: this cannot be done before this point so that the natural norm
185: is available for free and the communication involved is overlapped */
186: switch (ksp->normtype) {
187: case KSP_NORM_PRECONDITIONED:
188: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
189: break;
190: case KSP_NORM_UNPRECONDITIONED:
191: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
192: break;
193: case KSP_NORM_NATURAL:
194: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
195: break;
196: case KSP_NORM_NONE:
197: rnorm = 0.0;
198: break;
199: default:
200: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
201: }
203: /* Check for convergence */
204: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
205: ksp->rnorm = rnorm;
206: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
207: PetscCall(KSPLogResidualHistory(ksp, rnorm));
208: PetscCall(KSPMonitor(ksp, ksp->its, rnorm));
209: PetscCall((*ksp->converged)(ksp, ksp->its, rnorm, &ksp->reason, ksp->cnvP));
210: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
212: /* compute new eta and scale beta */
213: *eta = 0.;
214: for (k = PetscMax(0, i - mi), j = 0; k < i; j++, k++) {
215: kdx = k % (pipegcr->mmax + 1);
216: betas[j] /= -etas[kdx]; /* betak /= etak */
217: *eta -= PetscAbsScalar(betas[j]) * PetscAbsScalar(betas[j]) * etas[kdx];
218: /* etaitmp = -betaik^2 * etak */
219: }
220: *eta += delta; /* etai = delta -betaik^2 * etak */
222: /* check breakdown of eta = (s,s) */
223: if (*eta < 0.) {
224: pipegcr->norm_breakdown = PETSC_TRUE;
225: PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT "\n", ksp->its));
226: break;
227: } else {
228: alpha = gamma / (*eta); /* alpha = gamma/etai */
229: }
231: /* project out stored search directions using classical G-S */
232: PetscCall(VecCopy(z, p));
233: PetscCall(VecCopy(w, s));
234: PetscCall(VecCopy(m, q));
235: if (pipegcr->unroll_w) {
236: PetscCall(VecCopy(n, t));
237: PetscCall(VecMAXPY(t, j, betas, pipegcr->told)); /* ti <- n - sum_k beta_k t_k */
238: }
239: PetscCall(VecMAXPY(p, j, betas, pipegcr->pold)); /* pi <- ui - sum_k beta_k p_k */
240: PetscCall(VecMAXPY(s, j, betas, pipegcr->sold)); /* si <- wi - sum_k beta_k s_k */
241: PetscCall(VecMAXPY(q, j, betas, pipegcr->qold)); /* qi <- m - sum_k beta_k q_k */
243: } while (ksp->its < ksp->max_it);
244: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
245: PetscFunctionReturn(PETSC_SUCCESS);
246: }
248: static PetscErrorCode KSPSolve_PIPEGCR(KSP ksp)
249: {
250: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
251: Mat A, B;
252: Vec x, b, r, z, w;
253: PetscScalar gamma;
254: PetscReal rnorm = 0.0;
255: PetscBool issym;
257: PetscFunctionBegin;
258: PetscCall(PetscCitationsRegister(citation, &cited));
260: PetscCall(KSPGetOperators(ksp, &A, &B));
261: x = ksp->vec_sol;
262: b = ksp->vec_rhs;
263: r = ksp->work[0];
264: z = ksp->work[1];
265: w = ksp->work[2]; /* w = Az = AB(r) (pipelining intermediate) */
267: /* compute initial residual */
268: if (!ksp->guess_zero) {
269: PetscCall(KSP_MatMult(ksp, A, x, r));
270: PetscCall(VecAYPX(r, -1.0, b)); /* r <- b - Ax */
271: } else {
272: PetscCall(VecCopy(b, r)); /* r <- b */
273: }
275: /* initial residual norm */
276: PetscCall(KSP_PCApply(ksp, r, z)); /* z <- B(r) */
277: PetscCall(KSP_MatMult(ksp, A, z, w)); /* w <- Az */
278: PetscCall(VecDot(r, w, &gamma)); /* gamma = (r,w) */
280: switch (ksp->normtype) {
281: case KSP_NORM_PRECONDITIONED:
282: PetscCall(VecNorm(z, NORM_2, &rnorm)); /* ||r|| <- sqrt(z'*z) */
283: break;
284: case KSP_NORM_UNPRECONDITIONED:
285: PetscCall(VecNorm(r, NORM_2, &rnorm)); /* ||r|| <- sqrt(r'*r) */
286: break;
287: case KSP_NORM_NATURAL:
288: rnorm = PetscSqrtReal(PetscAbsScalar(gamma)); /* ||r|| <- sqrt(r,w) */
289: break;
290: case KSP_NORM_NONE:
291: rnorm = 0.0;
292: break;
293: default:
294: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "%s", KSPNormTypes[ksp->normtype]);
295: }
297: /* Is A symmetric? */
298: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &issym, MATSBAIJ, MATSEQSBAIJ, MATMPISBAIJ, ""));
299: if (!issym) PetscCall(PetscInfo(A, "Matrix type is not any of MATSBAIJ,MATSEQSBAIJ,MATMPISBAIJ. Is matrix A symmetric (as required by CR methods)?\n"));
301: /* logging */
302: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
303: ksp->its = 0;
304: ksp->rnorm0 = rnorm;
305: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
306: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm0));
307: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm0));
308: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm0, &ksp->reason, ksp->cnvP));
309: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
311: do {
312: PetscCall(KSPSolve_PIPEGCR_cycle(ksp));
313: if (ksp->reason) PetscFunctionReturn(PETSC_SUCCESS);
314: if (pipegcr->norm_breakdown) {
315: pipegcr->n_restarts++;
316: pipegcr->norm_breakdown = PETSC_FALSE;
317: }
318: } while (ksp->its < ksp->max_it);
320: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
321: PetscFunctionReturn(PETSC_SUCCESS);
322: }
324: static PetscErrorCode KSPView_PIPEGCR(KSP ksp, PetscViewer viewer)
325: {
326: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
327: PetscBool isascii, isstring;
328: const char *truncstr;
330: PetscFunctionBegin;
331: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
332: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
334: if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_STANDARD) {
335: truncstr = "Using standard truncation strategy";
336: } else if (pipegcr->truncstrat == KSP_FCD_TRUNC_TYPE_NOTAY) {
337: truncstr = "Using Notay's truncation strategy";
338: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Undefined FCD truncation strategy");
340: if (isascii) {
341: PetscCall(PetscViewerASCIIPrintf(viewer, " max previous directions = %" PetscInt_FMT "\n", pipegcr->mmax));
342: PetscCall(PetscViewerASCIIPrintf(viewer, " preallocated %" PetscInt_FMT " directions\n", PetscMin(pipegcr->nprealloc, pipegcr->mmax + 1)));
343: PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", truncstr));
344: PetscCall(PetscViewerASCIIPrintf(viewer, " w unrolling = %s \n", PetscBools[pipegcr->unroll_w]));
345: PetscCall(PetscViewerASCIIPrintf(viewer, " restarts performed = %" PetscInt_FMT " \n", pipegcr->n_restarts));
346: } else if (isstring) {
347: PetscCall(PetscViewerStringSPrintf(viewer, "max previous directions = %" PetscInt_FMT ", preallocated %" PetscInt_FMT " directions, %s truncation strategy", pipegcr->mmax, pipegcr->nprealloc, truncstr));
348: }
349: PetscFunctionReturn(PETSC_SUCCESS);
350: }
352: static PetscErrorCode KSPSetUp_PIPEGCR(KSP ksp)
353: {
354: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
355: Mat A;
356: PetscBool diagonalscale;
357: const PetscInt nworkstd = 5;
359: PetscFunctionBegin;
360: PetscCall(PCGetDiagonalScale(ksp->pc, &diagonalscale));
361: PetscCheck(!diagonalscale, PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name);
363: PetscCall(KSPGetOperators(ksp, &A, NULL));
365: /* Allocate "standard" work vectors */
366: PetscCall(KSPSetWorkVecs(ksp, nworkstd));
368: /* Allocated space for pointers to additional work vectors
369: note that mmax is the number of previous directions, so we add 1 for the current direction */
370: PetscCall(PetscMalloc6(pipegcr->mmax + 1, &pipegcr->pvecs, pipegcr->mmax + 1, &pipegcr->ppvecs, pipegcr->mmax + 1, &pipegcr->svecs, pipegcr->mmax + 1, &pipegcr->psvecs, pipegcr->mmax + 1, &pipegcr->qvecs, pipegcr->mmax + 1, &pipegcr->pqvecs));
371: if (pipegcr->unroll_w) PetscCall(PetscMalloc3(pipegcr->mmax + 1, &pipegcr->tvecs, pipegcr->mmax + 1, &pipegcr->ptvecs, pipegcr->mmax + 2, &pipegcr->told));
372: PetscCall(PetscMalloc4(pipegcr->mmax + 2, &pipegcr->pold, pipegcr->mmax + 2, &pipegcr->sold, pipegcr->mmax + 2, &pipegcr->qold, pipegcr->mmax + 2, &pipegcr->chunksizes));
373: PetscCall(PetscMalloc3(pipegcr->mmax + 2, &pipegcr->dots, pipegcr->mmax + 1, &pipegcr->etas, pipegcr->mmax + 2, &pipegcr->redux));
374: /* If the requested number of preallocated vectors is greater than mmax reduce nprealloc */
375: if (pipegcr->nprealloc > pipegcr->mmax + 1) PetscCall(PetscInfo(NULL, "Requested nprealloc=%" PetscInt_FMT " is greater than m_max+1=%" PetscInt_FMT ". Resetting nprealloc = m_max+1.\n", pipegcr->nprealloc, pipegcr->mmax + 1));
377: /* Preallocate additional work vectors */
378: PetscCall(KSPAllocateVectors_PIPEGCR(ksp, pipegcr->nprealloc, pipegcr->nprealloc));
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: static PetscErrorCode KSPReset_PIPEGCR(KSP ksp)
383: {
384: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
386: PetscFunctionBegin;
387: if (pipegcr->modifypc_destroy) PetscCall((*pipegcr->modifypc_destroy)(&pipegcr->modifypc_ctx));
388: PetscFunctionReturn(PETSC_SUCCESS);
389: }
391: static PetscErrorCode KSPDestroy_PIPEGCR(KSP ksp)
392: {
393: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
395: PetscFunctionBegin;
396: PetscCall(VecDestroyVecs(ksp->nwork, &ksp->work)); /* Destroy "standard" work vecs */
398: /* Destroy vectors for old directions and the arrays that manage pointers to them */
399: if (pipegcr->nvecs) {
400: for (PetscInt i = 0; i < pipegcr->nchunks; i++) {
401: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ppvecs[i]));
402: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->psvecs[i]));
403: PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->pqvecs[i]));
404: if (pipegcr->unroll_w) PetscCall(VecDestroyVecs(pipegcr->chunksizes[i], &pipegcr->ptvecs[i]));
405: }
406: }
408: PetscCall(PetscFree6(pipegcr->pvecs, pipegcr->ppvecs, pipegcr->svecs, pipegcr->psvecs, pipegcr->qvecs, pipegcr->pqvecs));
409: PetscCall(PetscFree4(pipegcr->pold, pipegcr->sold, pipegcr->qold, pipegcr->chunksizes));
410: PetscCall(PetscFree3(pipegcr->dots, pipegcr->etas, pipegcr->redux));
411: if (pipegcr->unroll_w) PetscCall(PetscFree3(pipegcr->tvecs, pipegcr->ptvecs, pipegcr->told));
413: PetscCall(KSPReset_PIPEGCR(ksp));
414: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", NULL));
415: PetscCall(KSPDestroyDefault(ksp));
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: /*@
420: KSPPIPEGCRSetUnrollW - Set to `PETSC_TRUE` to use `KSPPIPEGCR` with unrolling of the w vector
422: Logically Collective
424: Input Parameters:
425: + ksp - the Krylov space context
426: - unroll_w - use unrolling
428: Level: intermediate
430: Options Database Key:
431: . -ksp_pipegcr_unroll_w (true|false) - use unrolling
433: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRGetUnrollW()`
434: @*/
435: PetscErrorCode KSPPIPEGCRSetUnrollW(KSP ksp, PetscBool unroll_w)
436: {
437: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
439: PetscFunctionBegin;
442: pipegcr->unroll_w = unroll_w;
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: /*@
447: KSPPIPEGCRGetUnrollW - Get information on `KSPPIPEGCR` if it uses unrolling the w vector
449: Logically Collective
451: Input Parameter:
452: . ksp - the Krylov space context
454: Output Parameter:
455: . unroll_w - `KSPPIPEGCR` uses unrolling (bool)
457: Level: intermediate
459: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetUnrollW()`
460: @*/
461: PetscErrorCode KSPPIPEGCRGetUnrollW(KSP ksp, PetscBool *unroll_w)
462: {
463: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
465: PetscFunctionBegin;
467: *unroll_w = pipegcr->unroll_w;
468: PetscFunctionReturn(PETSC_SUCCESS);
469: }
471: /*@
472: KSPPIPEGCRSetMmax - set the maximum number of previous directions `KSPPIPEGCR` will store for orthogonalization
474: Logically Collective
476: Input Parameters:
477: + ksp - the Krylov space context
478: - mmax - the maximum number of previous directions to orthogonalize against
480: Options Database Key:
481: . -ksp_pipegcr_mmax mmax - maximum number of previous directions
483: Level: intermediate
485: Note:
486: `mmax` + 1 directions are stored (`mmax` previous ones along with a current one)
487: and whether all are used in each iteration also depends on the truncation strategy
488: (see `KSPPIPEGCRSetTruncationType`)
490: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
491: @*/
492: PetscErrorCode KSPPIPEGCRSetMmax(KSP ksp, PetscInt mmax)
493: {
494: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
496: PetscFunctionBegin;
499: pipegcr->mmax = mmax;
500: PetscFunctionReturn(PETSC_SUCCESS);
501: }
503: /*@
504: KSPPIPEGCRGetMmax - get the maximum number of previous directions `KSPPIPEGCR` will store
506: Not Collective
508: Input Parameter:
509: . ksp - the Krylov space context
511: Output Parameter:
512: . mmax - the maximum number of previous directions allowed for orthogonalization
514: Level: intermediate
516: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRSetMmax()`
517: @*/
518: PetscErrorCode KSPPIPEGCRGetMmax(KSP ksp, PetscInt *mmax)
519: {
520: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
522: PetscFunctionBegin;
524: *mmax = pipegcr->mmax;
525: PetscFunctionReturn(PETSC_SUCCESS);
526: }
528: /*@
529: KSPPIPEGCRSetNprealloc - set the number of directions to preallocate with `KSPPIPEGCR`
531: Logically Collective
533: Input Parameters:
534: + ksp - the Krylov space context
535: - nprealloc - the number of vectors to preallocate
537: Level: advanced
539: Options Database Key:
540: . -ksp_pipegcr_nprealloc N - number of vectors to preallocate
542: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`
543: @*/
544: PetscErrorCode KSPPIPEGCRSetNprealloc(KSP ksp, PetscInt nprealloc)
545: {
546: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
548: PetscFunctionBegin;
551: pipegcr->nprealloc = nprealloc;
552: PetscFunctionReturn(PETSC_SUCCESS);
553: }
555: /*@
556: KSPPIPEGCRGetNprealloc - get the number of directions preallocate by `KSPPIPEGCR`
558: Not Collective
560: Input Parameter:
561: . ksp - the Krylov space context
563: Output Parameter:
564: . nprealloc - the number of directions preallocated
566: Level: advanced
568: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRSetNprealloc()`
569: @*/
570: PetscErrorCode KSPPIPEGCRGetNprealloc(KSP ksp, PetscInt *nprealloc)
571: {
572: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
574: PetscFunctionBegin;
576: *nprealloc = pipegcr->nprealloc;
577: PetscFunctionReturn(PETSC_SUCCESS);
578: }
580: /*@
581: KSPPIPEGCRSetTruncationType - specify how many of its stored previous directions `KSPPIPEGCR` uses during orthogonalization
583: Logically Collective
585: Input Parameters:
586: + ksp - the Krylov space context
587: - truncstrat - the choice of strategy
588: .vb
589: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
590: KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i = 0, 1, ..
591: .ve
593: Options Database Key:
594: . -ksp_pipegcr_truncation_type (standard|notay) - which stored basis vectors to orthogonalize against
596: Level: intermediate
598: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPFCDTruncationType`, `KSPPIPEGCRGetTruncationType()`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
599: @*/
600: PetscErrorCode KSPPIPEGCRSetTruncationType(KSP ksp, KSPFCDTruncationType truncstrat)
601: {
602: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
604: PetscFunctionBegin;
607: pipegcr->truncstrat = truncstrat;
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: /*@
612: KSPPIPEGCRGetTruncationType - get the truncation strategy employed by `KSPPIPEGCR`
614: Not Collective
616: Input Parameter:
617: . ksp - the Krylov space context
619: Output Parameter:
620: . truncstrat - the strategy type
621: .vb
622: KSP_FCD_TRUNC_TYPE_STANDARD uses all (up to `mmax`) stored directions
623: KSP_FCD_TRUNC_TYPE_NOTAY uses the last `max(1,mod(i,mmax))` directions at iteration i =0, 1, ..
624: .ve
626: Level: intermediate
628: .seealso: [](ch_ksp), `KSPPIPEGCR`, `KSPPIPEGCRSetTruncationType()`, `KSPFCDTruncationType`, `KSP_FCD_TRUNC_TYPE_STANDARD`, `KSP_FCD_TRUNC_TYPE_NOTAY`
629: @*/
630: PetscErrorCode KSPPIPEGCRGetTruncationType(KSP ksp, KSPFCDTruncationType *truncstrat)
631: {
632: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
634: PetscFunctionBegin;
636: *truncstrat = pipegcr->truncstrat;
637: PetscFunctionReturn(PETSC_SUCCESS);
638: }
640: static PetscErrorCode KSPSetFromOptions_PIPEGCR(KSP ksp, PetscOptionItems PetscOptionsObject)
641: {
642: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
643: PetscInt mmax, nprealloc;
644: PetscBool flg;
646: PetscFunctionBegin;
647: PetscOptionsHeadBegin(PetscOptionsObject, "KSP PIPEGCR options");
648: PetscCall(PetscOptionsInt("-ksp_pipegcr_mmax", "Number of search directions to storue", "KSPPIPEGCRSetMmax", pipegcr->mmax, &mmax, &flg));
649: if (flg) PetscCall(KSPPIPEGCRSetMmax(ksp, mmax));
650: PetscCall(PetscOptionsInt("-ksp_pipegcr_nprealloc", "Number of directions to preallocate", "KSPPIPEGCRSetNprealloc", pipegcr->nprealloc, &nprealloc, &flg));
651: if (flg) PetscCall(KSPPIPEGCRSetNprealloc(ksp, nprealloc));
652: PetscCall(PetscOptionsEnum("-ksp_pipegcr_truncation_type", "Truncation approach for directions", "KSPFCGSetTruncationType", KSPFCDTruncationTypes, (PetscEnum)pipegcr->truncstrat, (PetscEnum *)&pipegcr->truncstrat, NULL));
653: PetscCall(PetscOptionsBool("-ksp_pipegcr_unroll_w", "Use unrolling of w", "KSPPIPEGCRSetUnrollW", pipegcr->unroll_w, &pipegcr->unroll_w, NULL));
654: PetscOptionsHeadEnd();
655: PetscFunctionReturn(PETSC_SUCCESS);
656: }
658: static PetscErrorCode KSPFlexibleSetModifyPC_PIPEGCR(KSP ksp, KSPFlexibleModifyPCFn *function, PetscCtx ctx, PetscCtxDestroyFn *destroy)
659: {
660: KSP_PIPEGCR *pipegcr = (KSP_PIPEGCR *)ksp->data;
662: PetscFunctionBegin;
664: pipegcr->modifypc = function;
665: pipegcr->modifypc_destroy = destroy;
666: pipegcr->modifypc_ctx = ctx;
667: PetscFunctionReturn(PETSC_SUCCESS);
668: }
670: /*MC
671: KSPPIPEGCR - Implements a Pipelined Generalized Conjugate Residual method {cite}`sananschneppmay2016`. [](sec_flexibleksp). [](sec_pipelineksp)
673: Options Database Keys:
674: + -ksp_pipegcr_mmax N - the max number of Krylov directions to orthogonalize against
675: . -ksp_pipegcr_unroll_w (true|false) - unroll w at the storage cost of a maximum of (mmax+1) extra vectors with the benefit of better pipelining (default: `PETSC_TRUE`)
676: . -ksp_pipegcr_nprealloc N - the number of vectors to preallocated for storing Krylov directions. Once exhausted new directions are allocated blockwise (default: 5)
677: - -ksp_pipegcr_truncation_type (standard|notay) - which previous search directions to orthogonalize against
679: Level: intermediate
681: Notes:
682: Pipelined version of `KSPGCR` that overlaps communication (global reductions) with computation (preconditioner application and matrix-vector products) to reduce the impact of communication latency on parallel performance.
684: The `KSPPIPEGCR` Krylov method supports non-symmetric matrices and permits the use of a preconditioner
685: which may vary from one iteration to the next. Users can define a method to vary the
686: preconditioner between iterates via `KSPFlexibleSetModifyPC()`.
687: Restarts are solves with x0 not equal to zero. When a restart occurs, the initial starting
688: solution is given by the current estimate for `x` which was obtained by the last restart
689: iterations of the `KSPPIPEGCR` algorithm.
690: The method implemented requires at most the storage of 4 x mmax + 5 vectors, roughly twice as much as `KSPGCR`.
692: Only supports left preconditioning.
694: The natural "norm" for this method is $(u,Au)$, where $u$ is the preconditioned residual. This norm is available at no additional computational cost, as with standard
695: `KSPCG`. Choosing preconditioned or unpreconditioned norm types involves a blocking reduction which prevents any benefit from pipelining.
697: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
698: See [](doc_faq_pipelined)
700: Contributed by:
701: Sascha M. Schnepp and Patrick Sanan
703: .seealso: [](ch_ksp), [](sec_flexibleksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`,
704: `KSPFlexibleSetModifyPC()`, `KSPPIPEFGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPIPEFCG`, `KSPPIPEGCRSetTruncationType()`, `KSPPIPEGCRSetNprealloc()`, `KSPPIPEGCRSetUnrollW()`, `KSPPIPEGCRSetMmax()`,
705: `KSPPIPEGCRGetTruncationType()`, `KSPPIPEGCRGetNprealloc()`, `KSPPIPEGCRGetMmax()`, `KSPGCR`, `KSPPIPEGCRGetUnrollW()`
706: M*/
707: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEGCR(KSP ksp)
708: {
709: KSP_PIPEGCR *pipegcr;
711: PetscFunctionBegin;
712: PetscCall(PetscNew(&pipegcr));
713: pipegcr->mmax = KSPPIPEGCR_DEFAULT_MMAX;
714: pipegcr->nprealloc = KSPPIPEGCR_DEFAULT_NPREALLOC;
715: pipegcr->nvecs = 0;
716: pipegcr->vecb = KSPPIPEGCR_DEFAULT_VECB;
717: pipegcr->nchunks = 0;
718: pipegcr->truncstrat = KSPPIPEGCR_DEFAULT_TRUNCSTRAT;
719: pipegcr->n_restarts = 0;
720: pipegcr->unroll_w = KSPPIPEGCR_DEFAULT_UNROLL_W;
722: ksp->data = (void *)pipegcr;
724: /* natural norm is for free, precond+unprecond norm require non-overlapped reduction */
725: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NATURAL, PC_LEFT, 2));
726: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 1));
727: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_LEFT, 1));
728: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_LEFT, 1));
730: ksp->ops->setup = KSPSetUp_PIPEGCR;
731: ksp->ops->solve = KSPSolve_PIPEGCR;
732: ksp->ops->reset = KSPReset_PIPEGCR;
733: ksp->ops->destroy = KSPDestroy_PIPEGCR;
734: ksp->ops->view = KSPView_PIPEGCR;
735: ksp->ops->setfromoptions = KSPSetFromOptions_PIPEGCR;
736: ksp->ops->buildsolution = KSPBuildSolutionDefault;
737: ksp->ops->buildresidual = KSPBuildResidualDefault;
739: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPFlexibleSetModifyPC_C", KSPFlexibleSetModifyPC_PIPEGCR));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }