Actual source code: dgmres.c
1: /*
2: Implements deflated GMRES.
3: */
5: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h>
7: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
9: static PetscErrorCode KSPDGMRESGetNewVectors(KSP, PetscInt);
10: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP, PetscInt, PetscBool, PetscReal *);
11: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
13: static PetscErrorCode KSPDGMRESSetEigen(KSP ksp, PetscInt nb_eig)
14: {
15: PetscFunctionBegin;
16: PetscTryMethod(ksp, "KSPDGMRESSetEigen_C", (KSP, PetscInt), (ksp, nb_eig));
17: PetscFunctionReturn(PETSC_SUCCESS);
18: }
19: static PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp, PetscInt max_neig)
20: {
21: PetscFunctionBegin;
22: PetscTryMethod(ksp, "KSPDGMRESSetMaxEigen_C", (KSP, PetscInt), (ksp, max_neig));
23: PetscFunctionReturn(PETSC_SUCCESS);
24: }
25: static PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp, PetscInt *neig)
26: {
27: PetscFunctionBegin;
28: PetscUseMethod(ksp, "KSPDGMRESComputeSchurForm_C", (KSP, PetscInt *), (ksp, neig));
29: PetscFunctionReturn(PETSC_SUCCESS);
30: }
31: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp, PetscInt *curneigh)
32: {
33: PetscFunctionBegin;
34: PetscUseMethod(ksp, "KSPDGMRESComputeDeflationData_C", (KSP, PetscInt *), (ksp, curneigh));
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
37: static PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
38: {
39: PetscFunctionBegin;
40: PetscUseMethod(ksp, "KSPDGMRESApplyDeflation_C", (KSP, Vec, Vec), (ksp, x, y));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: static PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
45: {
46: PetscFunctionBegin;
47: PetscUseMethod(ksp, "KSPDGMRESImproveEig_C", (KSP, PetscInt), (ksp, neig));
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
51: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
52: {
53: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
54: PetscInt neig = dgmres->neig + EIG_OFFSET;
55: PetscInt max_k = dgmres->max_k + 1;
57: PetscFunctionBegin;
58: PetscCall(KSPSetUp_GMRES(ksp));
59: if (!dgmres->neig) PetscFunctionReturn(PETSC_SUCCESS);
61: /* Allocate workspace for the Schur vectors*/
62: PetscCall(PetscMalloc1(neig * max_k, &SR));
63: dgmres->wr = NULL;
64: dgmres->wi = NULL;
65: dgmres->perm = NULL;
66: dgmres->modul = NULL;
67: dgmres->Q = NULL;
68: dgmres->Z = NULL;
70: UU = NULL;
71: XX = NULL;
72: MX = NULL;
73: AUU = NULL;
74: XMX = NULL;
75: XMU = NULL;
76: UMX = NULL;
77: AUAU = NULL;
78: TT = NULL;
79: TTF = NULL;
80: INVP = NULL;
81: X1 = NULL;
82: X2 = NULL;
83: MU = NULL;
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: /*
88: Run GMRES, possibly with restart. Return residual history if requested.
89: input parameters:
91: . gmres - structure containing parameters and work areas
93: output parameters:
94: . nres - residuals (from preconditioned system) at each step.
95: If restarting, consider passing nres+it. If null,
96: ignored
97: . itcount - number of iterations used. nres[0] to nres[itcount]
98: are defined. If null, ignored.
100: Notes:
101: On entry, the value in vector VEC_VV(0) should be the initial residual
102: (this allows shortcuts where the initial preconditioned residual is 0).
103: */
104: static PetscErrorCode KSPDGMRESCycle(PetscInt *itcount, KSP ksp)
105: {
106: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
107: PetscReal res_norm, res, hapbnd, tt;
108: PetscInt it = 0;
109: PetscInt max_k = dgmres->max_k;
110: PetscBool hapend = PETSC_FALSE;
111: PetscReal res_old;
112: PetscInt test = 0;
114: PetscFunctionBegin;
115: PetscCall(VecNormalize(VEC_VV(0), &res_norm));
116: KSPCheckNorm(ksp, res_norm);
117: res = res_norm;
118: *GRS(0) = res_norm;
120: /* check for the convergence */
121: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
122: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
123: else ksp->rnorm = 0.0;
124: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
125: dgmres->it = (it - 1);
126: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
127: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
128: if (!res) {
129: if (itcount) *itcount = 0;
130: ksp->reason = KSP_CONVERGED_ATOL;
131: PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
134: /* record the residual norm to test if deflation is needed */
135: res_old = res;
137: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
138: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
139: if (it) {
140: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
141: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
142: }
143: dgmres->it = (it - 1);
144: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPDGMRESGetNewVectors(ksp, it + 1));
145: if (dgmres->r > 0) {
146: if (ksp->pc_side == PC_LEFT) {
147: /* Apply the first preconditioner */
148: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_TEMP, VEC_TEMP_MATOP));
149: /* Then apply Deflation as a preconditioner */
150: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1 + it)));
151: } else if (ksp->pc_side == PC_RIGHT) {
152: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP));
153: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1 + it), VEC_TEMP_MATOP));
154: }
155: } else {
156: PetscCall(KSP_PCApplyBAorAB(ksp, VEC_VV(it), VEC_VV(1 + it), VEC_TEMP_MATOP));
157: }
158: dgmres->matvecs += 1;
159: /* update Hessenberg matrix and do Gram-Schmidt */
160: PetscCall((*dgmres->orthog)(ksp, it));
162: /* vv(i+1) . vv(i+1) */
163: PetscCall(VecNormalize(VEC_VV(it + 1), &tt));
164: /* save the magnitude */
165: *HH(it + 1, it) = tt;
166: *HES(it + 1, it) = tt;
168: /* check for the happy breakdown */
169: hapbnd = PetscAbsScalar(tt / *GRS(it));
170: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
171: if (tt < hapbnd) {
172: PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %g tt = %g\n", (double)hapbnd, (double)tt));
173: hapend = PETSC_TRUE;
174: }
175: PetscCall(KSPDGMRESUpdateHessenberg(ksp, it, hapend, &res));
177: it++;
178: dgmres->it = (it - 1); /* For converged */
179: ksp->its++;
180: if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
181: else ksp->rnorm = 0.0;
182: if (ksp->reason) break;
184: PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
186: /* Catch error in happy breakdown and signal convergence and break from loop */
187: if (hapend) {
188: if (!ksp->reason) {
189: PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
190: ksp->reason = KSP_DIVERGED_BREAKDOWN;
191: break;
192: }
193: }
194: }
196: if (itcount) *itcount = it;
198: /*
199: Down here we have to solve for the "best" coefficients of the Krylov
200: columns, add the solution values together, and possibly unwind the
201: preconditioning from the solution
202: */
203: /* Form the solution (or the solution so far) */
204: PetscCall(KSPDGMRESBuildSoln(GRS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 1));
206: /* Monitor if we know that we will not return for a restart */
207: if (ksp->reason == KSP_CONVERGED_ITERATING && ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
208: if (it && ksp->reason) {
209: PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
210: PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
211: }
213: /* Compute data for the deflation to be used during the next restart */
214: if (!ksp->reason && ksp->its < ksp->max_it) {
215: test = max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old);
216: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
217: if ((test > dgmres->smv * (ksp->max_it - ksp->its)) || dgmres->force) PetscCall(KSPDGMRESComputeDeflationData(ksp, NULL));
218: }
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
222: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
223: {
224: PetscInt i, its = 0, itcount;
225: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
226: PetscBool guess_zero = ksp->guess_zero;
228: PetscFunctionBegin;
229: PetscCheck(!ksp->calc_sings || dgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
231: PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
232: ksp->its = 0;
233: dgmres->matvecs = 0;
234: PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
236: itcount = 0;
237: while (!ksp->reason) {
238: PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
239: if (ksp->pc_side == PC_LEFT) {
240: dgmres->matvecs += 1;
241: if (dgmres->r > 0) {
242: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP));
243: PetscCall(VecCopy(VEC_TEMP, VEC_VV(0)));
244: }
245: }
247: PetscCall(KSPDGMRESCycle(&its, ksp));
248: itcount += its;
249: if (itcount >= ksp->max_it) {
250: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
251: break;
252: }
253: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
254: }
255: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
257: for (i = 0; i < dgmres->r; i++) PetscCall(VecViewFromOptions(UU[i], (PetscObject)ksp, "-ksp_dgmres_view_deflation_vecs"));
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
262: {
263: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
264: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
265: PetscInt max_neig = dgmres->max_neig;
267: PetscFunctionBegin;
268: if (dgmres->r) {
269: PetscCall(VecDestroyVecs(max_neig, &UU));
270: PetscCall(VecDestroyVecs(max_neig, &MU));
271: if (XX) {
272: PetscCall(VecDestroyVecs(neig1, &XX));
273: PetscCall(VecDestroyVecs(neig1, &MX));
274: }
275: PetscCall(PetscFree(TT));
276: PetscCall(PetscFree(TTF));
277: PetscCall(PetscFree(INVP));
278: PetscCall(PetscFree(XMX));
279: PetscCall(PetscFree(UMX));
280: PetscCall(PetscFree(XMU));
281: PetscCall(PetscFree(X1));
282: PetscCall(PetscFree(X2));
283: PetscCall(PetscFree(dgmres->work));
284: PetscCall(PetscFree(dgmres->iwork));
285: PetscCall(PetscFree(dgmres->wr));
286: PetscCall(PetscFree(dgmres->wi));
287: PetscCall(PetscFree(dgmres->modul));
288: PetscCall(PetscFree(dgmres->Q));
289: PetscCall(PetscFree(ORTH));
290: PetscCall(PetscFree(AUAU));
291: PetscCall(PetscFree(AUU));
292: PetscCall(PetscFree(SR2));
293: }
294: PetscCall(PetscFree(SR));
295: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", NULL));
296: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", NULL));
297: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", NULL));
298: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", NULL));
299: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", NULL));
300: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", NULL));
301: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", NULL));
302: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", NULL));
303: PetscCall(KSPDestroy_GMRES(ksp));
304: PetscFunctionReturn(PETSC_SUCCESS);
305: }
307: /*
308: KSPDGMRESBuildSoln - create the solution from the starting vector and the
309: current iterates.
311: Input parameters:
312: nrs - work area of size it + 1.
313: vs - index of initial guess
314: vdest - index of result. Note that vs may == vdest (replace
315: guess with the solution).
317: This is an internal routine that knows about the GMRES internals.
318: */
319: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs, Vec vs, Vec vdest, KSP ksp, PetscInt it)
320: {
321: PetscScalar tt;
322: PetscInt ii, k, j;
323: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
325: /* Solve for solution vector that minimizes the residual */
327: PetscFunctionBegin;
328: /* If it is < 0, no gmres steps have been performed */
329: if (it < 0) {
330: PetscCall(VecCopy(vs, vdest)); /* VecCopy() is smart, exists immediately if vguess == vdest */
331: PetscFunctionReturn(PETSC_SUCCESS);
332: }
333: PetscCheck(*HH(it, it) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %" PetscInt_FMT " GRS(it) = %g", it, (double)PetscAbsScalar(*GRS(it)));
334: if (*HH(it, it) != 0.0) nrs[it] = *GRS(it) / *HH(it, it);
335: else nrs[it] = 0.0;
337: for (ii = 1; ii <= it; ii++) {
338: k = it - ii;
339: tt = *GRS(k);
340: for (j = k + 1; j <= it; j++) tt = tt - *HH(k, j) * nrs[j];
341: PetscCheck(*HH(k, k) != 0.0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED, "Likely your matrix is singular. HH(k,k) is identically zero; it = %" PetscInt_FMT " k = %" PetscInt_FMT, it, k);
342: nrs[k] = tt / *HH(k, k);
343: }
345: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
346: PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &VEC_VV(0)));
348: /* Apply deflation */
349: if (ksp->pc_side == PC_RIGHT && dgmres->r > 0) {
350: PetscCall(KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP));
351: PetscCall(VecCopy(VEC_TEMP_MATOP, VEC_TEMP));
352: }
353: PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));
355: /* add solution to previous solution */
356: if (vdest != vs) PetscCall(VecCopy(vs, vdest));
357: PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
358: PetscFunctionReturn(PETSC_SUCCESS);
359: }
361: /*
362: Do the scalar work for the orthogonalization. Return new residual norm.
363: */
364: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool hapend, PetscReal *res)
365: {
366: PetscScalar *hh, *cc, *ss, tt;
367: PetscInt j;
368: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
370: PetscFunctionBegin;
371: hh = HH(0, it);
372: cc = CC(0);
373: ss = SS(0);
375: /* Apply all the previously computed plane rotations to the new column
376: of the Hessenberg matrix */
377: for (j = 1; j <= it; j++) {
378: tt = *hh;
379: *hh = PetscConj(*cc) * tt + *ss * *(hh + 1);
380: hh++;
381: *hh = *cc++ * *hh - (*ss++ * tt);
382: }
384: /*
385: compute the new plane rotation, and apply it to:
386: 1) the right-hand side of the Hessenberg system
387: 2) the new column of the Hessenberg matrix
388: thus obtaining the updated value of the residual
389: */
390: if (!hapend) {
391: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh + 1)) * *(hh + 1));
392: if (tt == 0.0) {
393: ksp->reason = KSP_DIVERGED_NULL;
394: PetscFunctionReturn(PETSC_SUCCESS);
395: }
396: *cc = *hh / tt;
397: *ss = *(hh + 1) / tt;
398: *GRS(it + 1) = -(*ss * *GRS(it));
399: *GRS(it) = PetscConj(*cc) * *GRS(it);
400: *hh = PetscConj(*cc) * *hh + *ss * *(hh + 1);
401: *res = PetscAbsScalar(*GRS(it + 1));
402: } else {
403: /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
404: another rotation matrix (so RH doesn't change). The new residual is
405: always the new sine term times the residual from last time (GRS(it)),
406: but now the new sine rotation would be zero...so the residual should
407: be zero...so we will multiply "zero" by the last residual. This might
408: not be exactly what we want to do here -could just return "zero". */
409: *res = 0.0;
410: }
411: PetscFunctionReturn(PETSC_SUCCESS);
412: }
414: /*
415: Allocates more work vectors, starting from VEC_VV(it).
416: */
417: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp, PetscInt it)
418: {
419: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
420: PetscInt nwork = dgmres->nwork_alloc, k, nalloc;
422: PetscFunctionBegin;
423: nalloc = PetscMin(ksp->max_it, dgmres->delta_allocate);
424: /* Adjust the number to allocate to make sure that we don't exceed the
425: number of available slots */
426: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
427: if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
429: dgmres->vv_allocated += nalloc;
431: PetscCall(KSPCreateVecs(ksp, nalloc, &dgmres->user_work[nwork], 0, NULL));
433: dgmres->mwork_alloc[nwork] = nalloc;
434: for (k = 0; k < nalloc; k++) dgmres->vecs[it + VEC_OFFSET + k] = dgmres->user_work[nwork][k];
435: dgmres->nwork_alloc++;
436: PetscFunctionReturn(PETSC_SUCCESS);
437: }
439: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp, Vec ptr, Vec *result)
440: {
441: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
443: PetscFunctionBegin;
444: if (!ptr) {
445: if (!dgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &dgmres->sol_temp));
446: ptr = dgmres->sol_temp;
447: }
448: if (!dgmres->nrs) {
449: /* allocate the work area */
450: PetscCall(PetscMalloc1(dgmres->max_k, &dgmres->nrs));
451: }
452: PetscCall(KSPDGMRESBuildSoln(dgmres->nrs, ksp->vec_sol, ptr, ksp, dgmres->it));
453: if (result) *result = ptr;
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: static PetscErrorCode KSPView_DGMRES(KSP ksp, PetscViewer viewer)
458: {
459: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
460: PetscBool iascii, isharmonic;
462: PetscFunctionBegin;
463: PetscCall(KSPView_GMRES(ksp, viewer));
464: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
465: if (iascii) {
466: PetscCall(PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: %s\n", PetscBools[dgmres->force]));
467: PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic));
468: if (isharmonic) {
469: PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Harmonic Ritz values \n", dgmres->neig));
470: } else {
471: PetscCall(PetscViewerASCIIPrintf(viewer, " Frequency of extracted eigenvalues = %" PetscInt_FMT " using Ritz values \n", dgmres->neig));
472: }
473: PetscCall(PetscViewerASCIIPrintf(viewer, " Total number of extracted eigenvalues = %" PetscInt_FMT "\n", dgmres->r));
474: PetscCall(PetscViewerASCIIPrintf(viewer, " Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", dgmres->max_neig));
475: PetscCall(PetscViewerASCIIPrintf(viewer, " relaxation parameter for the adaptive strategy(smv) = %g\n", (double)dgmres->smv));
476: PetscCall(PetscViewerASCIIPrintf(viewer, " Number of matvecs : %" PetscInt_FMT "\n", dgmres->matvecs));
477: }
478: PetscFunctionReturn(PETSC_SUCCESS);
479: }
481: PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp, PetscInt neig)
482: {
483: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
485: PetscFunctionBegin;
486: PetscCheck(neig >= 0 && neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of neig must be positive and less than the restart value ");
487: dgmres->neig = neig;
488: PetscFunctionReturn(PETSC_SUCCESS);
489: }
491: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp, PetscInt max_neig)
492: {
493: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
495: PetscFunctionBegin;
496: PetscCheck(max_neig >= 0 && max_neig <= dgmres->max_k, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The value of max_neig must be positive and less than the restart value ");
497: dgmres->max_neig = max_neig;
498: PetscFunctionReturn(PETSC_SUCCESS);
499: }
501: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp, PetscReal ratio)
502: {
503: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
505: PetscFunctionBegin;
506: PetscCheck(ratio > 0, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE, "The relaxation parameter value must be positive");
507: dgmres->smv = ratio;
508: PetscFunctionReturn(PETSC_SUCCESS);
509: }
511: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp, PetscBool force)
512: {
513: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
515: PetscFunctionBegin;
516: dgmres->force = force;
517: PetscFunctionReturn(PETSC_SUCCESS);
518: }
520: PetscErrorCode KSPSetFromOptions_DGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
521: {
522: PetscInt neig;
523: PetscInt max_neig;
524: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
525: PetscBool flg;
527: PetscFunctionBegin;
528: PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
529: PetscOptionsHeadBegin(PetscOptionsObject, "KSP DGMRES Options");
530: PetscCall(PetscOptionsInt("-ksp_dgmres_eigen", "Number of smallest eigenvalues to extract at each restart", "KSPDGMRESSetEigen", dgmres->neig, &neig, &flg));
531: if (flg) PetscCall(KSPDGMRESSetEigen(ksp, neig));
532: PetscCall(PetscOptionsInt("-ksp_dgmres_max_eigen", "Maximum Number of smallest eigenvalues to extract ", "KSPDGMRESSetMaxEigen", dgmres->max_neig, &max_neig, &flg));
533: if (flg) PetscCall(KSPDGMRESSetMaxEigen(ksp, max_neig));
534: PetscCall(PetscOptionsReal("-ksp_dgmres_ratio", "Relaxation parameter for the smaller number of matrix-vectors product allowed", "KSPDGMRESSetRatio", dgmres->smv, &dgmres->smv, NULL));
535: PetscCall(PetscOptionsBool("-ksp_dgmres_improve", "Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)", NULL, dgmres->improve, &dgmres->improve, NULL));
536: PetscCall(PetscOptionsBool("-ksp_dgmres_force", "Sets DGMRES always at restart active, i.e do not use the adaptive strategy", "KSPDGMRESForce", dgmres->force, &dgmres->force, NULL));
537: PetscOptionsHeadEnd();
538: PetscFunctionReturn(PETSC_SUCCESS);
539: }
541: PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
542: {
543: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
544: PetscInt i, j, k;
545: PetscBLASInt nr, bmax;
546: PetscInt r = dgmres->r;
547: PetscInt neig; /* number of eigenvalues to extract at each restart */
548: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
549: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
550: PetscInt N = dgmres->max_k + 1;
551: PetscInt n = dgmres->it + 1;
552: PetscReal alpha;
554: PetscFunctionBegin;
555: PetscCall(PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
556: if (dgmres->neig == 0 || (max_neig < (r + neig1) && !dgmres->improve)) {
557: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
558: PetscFunctionReturn(PETSC_SUCCESS);
559: }
561: PetscCall(KSPDGMRESComputeSchurForm(ksp, &neig));
562: /* Form the extended Schur vectors X=VV*Sr */
563: if (!XX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &XX));
564: for (j = 0; j < neig; j++) PetscCall(VecMAXPBY(XX[j], n, &SR[j * N], 0, &VEC_VV(0)));
566: /* Orthogonalize X against U */
567: if (!ORTH) PetscCall(PetscMalloc1(max_neig, &ORTH));
568: if (r > 0) {
569: /* modified Gram-Schmidt */
570: for (j = 0; j < neig; j++) {
571: for (i = 0; i < r; i++) {
572: /* First, compute U'*X[j] */
573: PetscCall(VecDot(XX[j], UU[i], &alpha));
574: /* Then, compute X(j)=X(j)-U*U'*X(j) */
575: PetscCall(VecAXPY(XX[j], -alpha, UU[i]));
576: }
577: }
578: }
579: /* Compute MX = M^{-1}*A*X */
580: if (!MX) PetscCall(VecDuplicateVecs(VEC_VV(0), neig1, &MX));
581: for (j = 0; j < neig; j++) PetscCall(KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP));
582: dgmres->matvecs += neig;
584: if ((r + neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- expensive to do this */
585: PetscCall(KSPDGMRESImproveEig(ksp, neig));
586: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
587: PetscFunctionReturn(PETSC_SUCCESS); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
588: }
590: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
591: if (!XMX) PetscCall(PetscMalloc1(neig1 * neig1, &XMX));
592: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, XX, &XMX[j * neig1]));
594: if (r > 0) {
595: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
596: if (!UMX) PetscCall(PetscMalloc1(max_neig * neig1, &UMX));
597: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, UU, &UMX[j * max_neig]));
598: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
599: if (!XMU) PetscCall(PetscMalloc1(max_neig * neig1, &XMU));
600: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, XX, &XMU[j * neig1]));
601: }
603: /* Form the new matrix T = [T UMX; XMU XMX]; */
604: if (!TT) PetscCall(PetscMalloc1(max_neig * max_neig, &TT));
605: if (r > 0) {
606: /* Add XMU to T */
607: for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&TT[max_neig * j + r], &XMU[neig1 * j], neig));
608: /* Add [UMX; XMX] to T */
609: for (j = 0; j < neig; j++) {
610: k = r + j;
611: PetscCall(PetscArraycpy(&TT[max_neig * k], &UMX[max_neig * j], r));
612: PetscCall(PetscArraycpy(&TT[max_neig * k + r], &XMX[neig1 * j], neig));
613: }
614: } else { /* Add XMX to T */
615: for (j = 0; j < neig; j++) PetscCall(PetscArraycpy(&TT[max_neig * j], &XMX[neig1 * j], neig));
616: }
618: dgmres->r += neig;
619: r = dgmres->r;
620: PetscCall(PetscBLASIntCast(r, &nr));
621: /*LU Factorize T with Lapack xgetrf routine */
623: PetscCall(PetscBLASIntCast(max_neig, &bmax));
624: if (!TTF) PetscCall(PetscMalloc1(bmax * bmax, &TTF));
625: PetscCall(PetscArraycpy(TTF, TT, bmax * r));
626: if (!INVP) PetscCall(PetscMalloc1(bmax, &INVP));
627: {
628: PetscBLASInt info;
629: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
630: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
631: }
633: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
634: if (!UU) {
635: PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &UU));
636: PetscCall(VecDuplicateVecs(VEC_VV(0), max_neig, &MU));
637: }
638: for (j = 0; j < neig; j++) {
639: PetscCall(VecCopy(XX[j], UU[r - neig + j]));
640: PetscCall(VecCopy(MX[j], MU[r - neig + j]));
641: }
642: PetscCall(PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0, 0, 0));
643: PetscFunctionReturn(PETSC_SUCCESS);
644: }
646: PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
647: {
648: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
649: PetscInt N = dgmres->max_k + 1, n = dgmres->it + 1;
650: PetscBLASInt bn;
651: PetscReal *A;
652: PetscBLASInt ihi;
653: PetscBLASInt ldA = 0; /* leading dimension of A */
654: PetscBLASInt ldQ; /* leading dimension of Q */
655: PetscReal *Q; /* orthogonal matrix of (left) Schur vectors */
656: PetscReal *work; /* working vector */
657: PetscBLASInt lwork; /* size of the working vector */
658: PetscInt *perm; /* Permutation vector to sort eigenvalues */
659: PetscInt i, j;
660: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
661: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modulus of the eigenvalues of A */
662: PetscBLASInt *select;
663: PetscBLASInt *iwork;
664: PetscBLASInt liwork;
665: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
666: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
667: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
668: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
670: PetscFunctionBegin;
671: PetscCall(PetscBLASIntCast(n, &bn));
672: PetscCall(PetscBLASIntCast(N, &ldA));
673: ihi = ldQ = bn;
674: PetscCall(PetscBLASIntCast(5 * N, &lwork));
676: #if defined(PETSC_USE_COMPLEX)
677: SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_SUP, "No support for complex numbers.");
678: #endif
680: PetscCall(PetscMalloc1(ldA * ldA, &A));
681: PetscCall(PetscMalloc1(ldQ * n, &Q));
682: PetscCall(PetscMalloc1(lwork, &work));
683: if (!dgmres->wr) {
684: PetscCall(PetscMalloc1(n, &dgmres->wr));
685: PetscCall(PetscMalloc1(n, &dgmres->wi));
686: }
687: wr = dgmres->wr;
688: wi = dgmres->wi;
689: PetscCall(PetscMalloc1(n, &modul));
690: PetscCall(PetscMalloc1(n, &perm));
691: /* copy the Hessenberg matrix to work space */
692: PetscCall(PetscArraycpy(A, dgmres->hes_origin, ldA * ldA));
693: PetscCall(PetscOptionsHasName(((PetscObject)ksp)->options, ((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag));
694: if (flag) {
695: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
696: /* Transpose the Hessenberg matrix */
697: PetscCall(PetscMalloc1(bn * bn, &Ht));
698: for (i = 0; i < bn; i++) {
699: for (j = 0; j < bn; j++) Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
700: }
702: /* Solve the system H^T*t = h_{m+1,m}e_m */
703: PetscCall(PetscCalloc1(bn, &t));
704: t[bn - 1] = dgmres->hes_origin[(bn - 1) * ldA + bn]; /* Pick the last element H(m+1,m) */
705: PetscCall(PetscMalloc1(bn, &ipiv));
706: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
707: {
708: PetscBLASInt info;
709: PetscBLASInt nrhs = 1;
710: PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
711: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error while calling the Lapack routine DGESV");
712: }
713: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
714: for (i = 0; i < bn; i++) A[(bn - 1) * bn + i] += t[i];
715: PetscCall(PetscFree(t));
716: PetscCall(PetscFree(Ht));
717: }
718: /* Compute eigenvalues with the Schur form */
719: {
720: PetscBLASInt info = 0;
721: PetscBLASInt ilo = 1;
722: PetscCallBLAS("LAPACKhseqr", LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
723: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XHSEQR %" PetscBLASInt_FMT, info);
724: }
725: PetscCall(PetscFree(work));
727: /* sort the eigenvalues */
728: for (i = 0; i < n; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
729: for (i = 0; i < n; i++) perm[i] = i;
731: PetscCall(PetscSortRealWithPermutation(n, modul, perm));
732: /* save the complex modulus of the largest eigenvalue in magnitude */
733: if (dgmres->lambdaN < modul[perm[n - 1]]) dgmres->lambdaN = modul[perm[n - 1]];
734: /* count the number of extracted eigenvalues (with complex conjugates) */
735: NbrEig = 0;
736: while (NbrEig < dgmres->neig) {
737: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
738: else NbrEig += 1;
739: }
740: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
742: PetscCall(PetscCalloc1(n, &select));
744: if (!dgmres->GreatestEig) {
745: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
746: } else {
747: for (j = 0; j < NbrEig; j++) select[perm[n - j - 1]] = 1;
748: }
749: /* call Lapack dtrsen */
750: lwork = PetscMax(1, 4 * NbrEig * (bn - NbrEig));
751: liwork = PetscMax(1, 2 * NbrEig * (bn - NbrEig));
752: PetscCall(PetscMalloc1(lwork, &work));
753: PetscCall(PetscMalloc1(liwork, &iwork));
754: {
755: PetscBLASInt info = 0;
756: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
757: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
758: PetscCallBLAS("LAPACKtrsen", LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
759: PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ILL-CONDITIONED PROBLEM");
760: }
761: PetscCall(PetscFree(select));
763: /* Extract the Schur vectors */
764: for (j = 0; j < NbrEig; j++) PetscCall(PetscArraycpy(&SR[j * N], &Q[j * ldQ], n));
765: *neig = NbrEig;
766: PetscCall(PetscFree(A));
767: PetscCall(PetscFree(work));
768: PetscCall(PetscFree(perm));
769: PetscCall(PetscFree(work));
770: PetscCall(PetscFree(iwork));
771: PetscCall(PetscFree(modul));
772: PetscCall(PetscFree(Q));
773: PetscFunctionReturn(PETSC_SUCCESS);
774: }
776: PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
777: {
778: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
779: PetscInt i, r = dgmres->r;
780: PetscReal alpha = 1.0;
781: PetscInt max_neig = dgmres->max_neig;
782: PetscBLASInt br, bmax;
783: PetscReal lambda = dgmres->lambdaN;
785: PetscFunctionBegin;
786: PetscCall(PetscBLASIntCast(r, &br));
787: PetscCall(PetscBLASIntCast(max_neig, &bmax));
788: PetscCall(PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
789: if (!r) {
790: PetscCall(VecCopy(x, y));
791: PetscFunctionReturn(PETSC_SUCCESS);
792: }
793: /* Compute U'*x */
794: if (!X1) {
795: PetscCall(PetscMalloc1(bmax, &X1));
796: PetscCall(PetscMalloc1(bmax, &X2));
797: }
798: PetscCall(VecMDot(x, r, UU, X1));
800: /* Solve T*X1=X2 for X1*/
801: PetscCall(PetscArraycpy(X2, X1, br));
802: {
803: PetscBLASInt info;
804: PetscBLASInt nrhs = 1;
805: PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
806: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRS %" PetscBLASInt_FMT, info);
807: }
808: /* Iterative refinement -- is it really necessary ?? */
809: if (!WORK) {
810: PetscCall(PetscMalloc1(3 * bmax, &WORK));
811: PetscCall(PetscMalloc1(bmax, &IWORK));
812: }
813: {
814: PetscBLASInt info;
815: PetscReal berr, ferr;
816: PetscBLASInt nrhs = 1;
817: PetscCallBLAS("LAPACKgerfs", LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax, X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
818: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGERFS %" PetscBLASInt_FMT, info);
819: }
821: for (i = 0; i < r; i++) X2[i] = X1[i] / lambda - X2[i];
823: /* Compute X2=U*X2 */
824: PetscCall(VecMAXPBY(y, r, X2, 0, UU));
825: PetscCall(VecAXPY(y, alpha, x));
827: PetscCall(PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0));
828: PetscFunctionReturn(PETSC_SUCCESS);
829: }
831: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
832: {
833: KSP_DGMRES *dgmres = (KSP_DGMRES *)ksp->data;
834: PetscInt j, r_old, r = dgmres->r;
835: PetscBLASInt i = 0;
836: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
837: PetscInt bmax = dgmres->max_neig;
838: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
839: PetscInt aug1 = bmax + neig1; /* maximum size of the augmented invariant basis */
840: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
841: PetscBLASInt N; /* size of AUAU */
842: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
843: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
844: PetscReal *work; /* working vector */
845: PetscBLASInt lwork; /* size of the working vector */
846: PetscInt *perm; /* Permutation vector to sort eigenvalues */
847: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modulus of the eigenvalues of A*/
848: PetscBLASInt NbrEig = 0, nr, bm;
849: PetscBLASInt *select;
850: PetscBLASInt liwork, *iwork;
852: PetscFunctionBegin;
853: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
854: if (!AUU) {
855: PetscCall(PetscMalloc1(aug1 * aug1, &AUU));
856: PetscCall(PetscMalloc1(aug1 * aug1, &AUAU));
857: }
858: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
859: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
860: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
861: for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], r, MU, &AUU[j * aug1]));
862: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
863: for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], r, MU, &AUU[(r + j) * aug1]));
864: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
865: for (j = 0; j < r; j++) PetscCall(VecMDot(UU[j], neig, MX, &AUU[j * aug1 + r]));
866: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
867: for (j = 0; j < neig; j++) PetscCall(VecMDot(XX[j], neig, MX, &AUU[(r + j) * aug1 + r]));
869: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
870: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
871: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, MU, &AUAU[j * aug1]));
872: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
873: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], r, MU, &AUAU[(r + j) * aug1]));
874: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
875: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], neig, MX, &AUAU[j * aug1 + r]));
876: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
877: for (j = 0; j < neig; j++) PetscCall(VecMDot(MX[j], neig, MX, &AUAU[(r + j) * aug1 + r]));
879: /* Computation of the eigenvectors */
880: PetscCall(PetscBLASIntCast(aug1, &ldA));
881: PetscCall(PetscBLASIntCast(aug, &N));
882: lwork = 8 * N + 20; /* sizeof the working space */
883: PetscCall(PetscMalloc1(N, &wr));
884: PetscCall(PetscMalloc1(N, &wi));
885: PetscCall(PetscMalloc1(N, &beta));
886: PetscCall(PetscMalloc1(N, &modul));
887: PetscCall(PetscMalloc1(N, &perm));
888: PetscCall(PetscMalloc1(N * N, &Q));
889: PetscCall(PetscMalloc1(N * N, &Z));
890: PetscCall(PetscMalloc1(lwork, &work));
891: {
892: PetscBLASInt info = 0;
893: PetscCallBLAS("LAPACKgges", LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
894: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGGES %" PetscBLASInt_FMT, info);
895: }
896: for (i = 0; i < N; i++) {
897: if (beta[i] != 0.0) {
898: wr[i] /= beta[i];
899: wi[i] /= beta[i];
900: }
901: }
902: /* sort the eigenvalues */
903: for (i = 0; i < N; i++) modul[i] = PetscSqrtReal(wr[i] * wr[i] + wi[i] * wi[i]);
904: for (i = 0; i < N; i++) perm[i] = i;
905: PetscCall(PetscSortRealWithPermutation(N, modul, perm));
906: /* Save the norm of the largest eigenvalue */
907: if (dgmres->lambdaN < modul[perm[N - 1]]) dgmres->lambdaN = modul[perm[N - 1]];
908: /* Allocate space to extract the first r schur vectors */
909: if (!SR2) PetscCall(PetscMalloc1(aug1 * bmax, &SR2));
910: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
911: while (NbrEig < bmax) {
912: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
913: else NbrEig += 2;
914: }
915: if (NbrEig > bmax) PetscCall(PetscBLASIntCast(bmax - 1, &NbrEig));
916: r_old = r; /* previous size of r */
917: dgmres->r = r = NbrEig;
919: /* Select the eigenvalues to reorder */
920: PetscCall(PetscCalloc1(N, &select));
921: if (!dgmres->GreatestEig) {
922: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
923: } else {
924: for (j = 0; j < NbrEig; j++) select[perm[N - j - 1]] = 1;
925: }
926: /* Reorder and extract the new <r> schur vectors */
927: lwork = PetscMax(4 * N + 16, 2 * NbrEig * (N - NbrEig));
928: liwork = PetscMax(N + 6, 2 * NbrEig * (N - NbrEig));
929: PetscCall(PetscFree(work));
930: PetscCall(PetscMalloc1(lwork, &work));
931: PetscCall(PetscMalloc1(liwork, &iwork));
932: {
933: PetscBLASInt info = 0;
934: PetscReal Dif[2];
935: PetscBLASInt ijob = 2;
936: PetscBLASInt wantQ = 1, wantZ = 1;
937: PetscCallBLAS("LAPACKtgsen", LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &Dif[0], work, &lwork, iwork, &liwork, &info));
938: PetscCheck(info != 1, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Unable to reorder the eigenvalues with the LAPACK routine: ill-conditioned problem.");
939: }
940: PetscCall(PetscFree(select));
942: for (j = 0; j < r; j++) PetscCall(PetscArraycpy(&SR2[j * aug1], &Z[j * N], N));
944: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
945: -- save it temporarily in MU */
946: for (j = 0; j < r; j++) {
947: PetscCall(VecMAXPBY(MU[j], r_old, &SR2[j * aug1], 0, UU));
948: PetscCall(VecMAXPY(MU[j], neig, &SR2[j * aug1 + r_old], XX));
949: }
950: /* Form T = U'*MU*U */
951: for (j = 0; j < r; j++) {
952: PetscCall(VecCopy(MU[j], UU[j]));
953: PetscCall(KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP));
954: }
955: dgmres->matvecs += r;
956: for (j = 0; j < r; j++) PetscCall(VecMDot(MU[j], r, UU, &TT[j * bmax]));
957: /* Factorize T */
958: PetscCall(PetscArraycpy(TTF, TT, bmax * r));
959: PetscCall(PetscBLASIntCast(r, &nr));
960: PetscCall(PetscBLASIntCast(bmax, &bm));
961: {
962: PetscBLASInt info;
963: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
964: PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGETRF INFO=%" PetscBLASInt_FMT, info);
965: }
966: /* Free Memory */
967: PetscCall(PetscFree(wr));
968: PetscCall(PetscFree(wi));
969: PetscCall(PetscFree(beta));
970: PetscCall(PetscFree(modul));
971: PetscCall(PetscFree(perm));
972: PetscCall(PetscFree(Q));
973: PetscCall(PetscFree(Z));
974: PetscCall(PetscFree(work));
975: PetscCall(PetscFree(iwork));
976: PetscFunctionReturn(PETSC_SUCCESS);
977: }
979: /*MC
980: KSPDGMRES - Implements the deflated GMRES as defined in {cite}`erhel1996restarted` and {cite}`wakam2013memory`
982: Options Database Keys:
983: GMRES Options (inherited):
984: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
985: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
986: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially
987: (otherwise groups of vectors are allocated as needed)
988: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against
989: the Krylov space (fast) (the default)
990: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
991: . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
992: stability of the classical Gram-Schmidt orthogonalization.
993: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
995: DGMRES Options Database Keys:
996: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
997: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative process
998: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
999: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1000: parsed by `PetscOptionsCreateViewer()`. If neig > 1, viewerspec should
1001: end with ":append". No vectors will be viewed if the adaptive
1002: strategy chooses not to deflate, so -ksp_dgmres_force should also
1003: be given.
1004: The deflation vectors span a subspace that may be a good
1005: approximation of the subspace of smallest eigenvectors of the
1006: preconditioned operator, so this option can aid in understanding
1007: the performance of a preconditioner.
1009: Level: beginner
1011: Notes:
1012: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported
1014: In this implementation, the adaptive strategy allows switching to deflated GMRES when the stagnation occurs.
1016: Contributed by:
1017: Desire NUENTSA WAKAM, INRIA
1019: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPFGMRES`, `KSPLGMRES`,
1020: `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
1021: `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
1022: `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
1023: M*/
1025: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1026: {
1027: KSP_DGMRES *dgmres;
1029: PetscFunctionBegin;
1030: PetscCall(PetscNew(&dgmres));
1031: ksp->data = (void *)dgmres;
1033: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
1034: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
1035: PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
1037: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1038: ksp->ops->setup = KSPSetUp_DGMRES;
1039: ksp->ops->solve = KSPSolve_DGMRES;
1040: ksp->ops->destroy = KSPDestroy_DGMRES;
1041: ksp->ops->view = KSPView_DGMRES;
1042: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1043: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1044: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1046: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
1047: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
1048: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
1049: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
1050: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
1051: /* -- New functions defined in DGMRES -- */
1052: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
1053: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C", KSPDGMRESSetMaxEigen_DGMRES));
1054: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C", KSPDGMRESSetRatio_DGMRES));
1055: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C", KSPDGMRESForce_DGMRES));
1056: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
1057: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
1058: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));
1059: PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES));
1061: PetscCall(PetscLogEventRegister("DGMRESCompDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData));
1062: PetscCall(PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation));
1064: dgmres->haptol = 1.0e-30;
1065: dgmres->q_preallocate = 0;
1066: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1067: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1068: dgmres->nrs = NULL;
1069: dgmres->sol_temp = NULL;
1070: dgmres->max_k = GMRES_DEFAULT_MAXK;
1071: dgmres->Rsvd = NULL;
1072: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1073: dgmres->orthogwork = NULL;
1075: /* Default values for the deflation */
1076: dgmres->r = 0;
1077: dgmres->neig = DGMRES_DEFAULT_EIG;
1078: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG - 1;
1079: dgmres->lambdaN = 0.0;
1080: dgmres->smv = SMV;
1081: dgmres->matvecs = 0;
1082: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1083: dgmres->HasSchur = PETSC_FALSE;
1084: PetscFunctionReturn(PETSC_SUCCESS);
1085: }