Actual source code: agmres.c

  1: #define PETSCKSP_DLL

  3: #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>

  5: static PetscErrorCode KSPAGMRESBuildSoln(KSP, PetscInt);
  6: static PetscErrorCode KSPAGMRESBuildBasis(KSP);
  7: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP);
  8: extern PetscErrorCode KSPSetUp_DGMRES(KSP);
  9: extern PetscErrorCode KSPBuildSolution_DGMRES(KSP, Vec, Vec *);
 10: extern PetscErrorCode KSPSolve_DGMRES(KSP);
 11: extern PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP, PetscInt *);
 12: extern PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP, PetscInt *);
 13: extern PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP, Vec, Vec);
 14: extern PetscErrorCode KSPDestroy_DGMRES(KSP);
 15: extern PetscErrorCode KSPSetFromOptions_DGMRES(KSP, PetscOptionItems *);
 16: extern PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP, PetscInt);

 18: PetscLogEvent KSP_AGMRESComputeDeflationData, KSP_AGMRESBuildBasis, KSP_AGMRESComputeShifts, KSP_AGMRESRoddec;

 20: /*
 21:    This function allocates  data for the Newton basis GMRES implementation.
 22:    Note that most data are allocated in KSPSetUp_DGMRES and KSPSetUp_GMRES, including the space for the basis vectors,
 23:    the various Hessenberg matrices and the Givens rotations coefficients
 24: */
 25: static PetscErrorCode KSPSetUp_AGMRES(KSP ksp)
 26: {
 27:   PetscInt       hes;
 28:   PetscInt       nloc;
 29:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
 30:   PetscInt       neig   = agmres->neig;
 31:   const PetscInt max_k  = agmres->max_k;
 32:   PetscInt       N      = MAXKSPSIZE;
 33:   PetscInt       lwork  = PetscMax(8 * N + 16, 4 * neig * (N - neig));

 35:   PetscFunctionBegin;
 36:   N = MAXKSPSIZE;
 37:   /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */
 38:   agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */
 39:   /* Preallocate space to compute later the eigenvalues in GMRES */
 40:   ksp->calc_sings = PETSC_TRUE;
 41:   agmres->max_k   = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */
 42:   PetscCall(KSPSetUp_DGMRES(ksp));
 43:   agmres->max_k = max_k;
 44:   hes           = (N + 1) * (N + 1);

 46:   /* Data for the Newton basis GMRES */
 47:   PetscCall(PetscCalloc4(max_k, &agmres->Rshift, max_k, &agmres->Ishift, hes, &agmres->Rloc, (N + 1) * 4, &agmres->wbufptr));
 48:   PetscCall(PetscMalloc3(N + 1, &agmres->tau, lwork, &agmres->work, N + 1, &agmres->nrs));
 49:   PetscCall(PetscCalloc4(N + 1, &agmres->Scale, N + 1, &agmres->sgn, N + 1, &agmres->tloc, N + 1, &agmres->temp));

 51:   /* Allocate space for the vectors in the orthogonalized basis*/
 52:   PetscCall(VecGetLocalSize(agmres->vecs[0], &nloc));
 53:   PetscCall(PetscMalloc1(nloc * (N + 1), &agmres->Qloc));

 55:   /* Init the ring of processors for the roddec orthogonalization */
 56:   PetscCall(KSPAGMRESRoddecInitNeighboor(ksp));

 58:   if (agmres->neig < 1) PetscFunctionReturn(PETSC_SUCCESS);

 60:   /* Allocate space for the deflation */
 61:   PetscCall(PetscMalloc1(N, &agmres->select));
 62:   PetscCall(VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU));
 63:   PetscCall(PetscMalloc2(N * N, &agmres->MatEigL, N * N, &agmres->MatEigR));
 64:   /*  PetscCall(PetscMalloc6(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->wr, N, &agmres->wi, N, &agmres->beta, N, &agmres->modul)); */
 65:   PetscCall(PetscMalloc3(N * N, &agmres->Q, N * N, &agmres->Z, N, &agmres->beta));
 66:   PetscCall(PetscMalloc2(N + 1, &agmres->perm, 2 * neig * N, &agmres->iwork));
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: /*
 71:     Returns the current solution from the private data structure of AGMRES back to ptr.
 72: */
 73: static PetscErrorCode KSPBuildSolution_AGMRES(KSP ksp, Vec ptr, Vec *result)
 74: {
 75:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;

 77:   PetscFunctionBegin;
 78:   if (!ptr) {
 79:     if (!agmres->sol_temp) {
 80:       PetscCall(VecDuplicate(ksp->vec_sol, &agmres->sol_temp));
 81:       PetscCall(VecCopy(ksp->vec_sol, agmres->sol_temp));
 82:     }
 83:     ptr = agmres->sol_temp;
 84:   } else {
 85:     PetscCall(VecCopy(ksp->vec_sol, ptr));
 86:   }
 87:   if (result) *result = ptr;
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }

 91: /* Computes the shift values (Ritz values) needed to generate stable basis vectors
 92:    One cycle of DGMRES is performed to find the eigenvalues. The same data structures are used since AGMRES extends DGMRES
 93:    Note that when the basis is  to be augmented, then this function computes the harmonic Ritz vectors from this first cycle.
 94:    Input :
 95:     - The operators (matrix, preconditioners and right-hand side) are  normally required.
 96:     - max_k : the size of the (non augmented) basis.
 97:     - neig: The number of eigenvectors to augment, if deflation is needed
 98:    Output :
 99:     - The shifts as complex pair of arrays in wr and wi (size max_k).
100:     - The harmonic Ritz vectors (agmres->U) if deflation is needed.
101: */
102: static PetscErrorCode KSPComputeShifts_DGMRES(KSP ksp)
103: {
104:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
105:   PetscInt       max_k  = agmres->max_k; /* size of the (non augmented) Krylov subspace */
106:   PetscInt       Neig   = 0;
107:   const PetscInt max_it = ksp->max_it;
108:   PetscBool      flg;

110:   /* Perform one cycle of dgmres to find the eigenvalues and compute the first approximations of the eigenvectors */

112:   PetscFunctionBegin;
113:   PetscCall(PetscLogEventBegin(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
114:   /* Send the size of the augmented basis to DGMRES */
115:   ksp->max_it             = max_k; /* set this to have DGMRES performing only one cycle */
116:   ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
117:   PetscCall(KSPSolve_DGMRES(ksp));
118:   ksp->guess_zero = PETSC_FALSE;
119:   if (ksp->reason == KSP_CONVERGED_RTOL) {
120:     PetscCall(PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
121:     PetscFunctionReturn(PETSC_SUCCESS);
122:   } else ksp->reason = KSP_CONVERGED_ITERATING;

124:   if ((agmres->r == 0) && (agmres->neig > 0)) { /* Compute the eigenvalues for the shifts and the eigenvectors (to augment the Newton basis) */
125:     agmres->HasSchur = PETSC_FALSE;
126:     PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig));
127:     Neig = max_k;
128:   } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
129:     PetscCall(KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig));
130:   }

132:   /* It may happen that the Ritz values from one cycle of GMRES are not accurate enough to provide a good stability. In this case, another cycle of GMRES is performed.  The two sets of values thus generated are sorted and the most accurate are kept as shifts */
133:   PetscCall(PetscOptionsHasName(NULL, NULL, "-ksp_agmres_ImproveShifts", &flg));
134:   if (!flg) {
135:     PetscCall(KSPAGMRESLejaOrdering(agmres->wr, agmres->wi, agmres->Rshift, agmres->Ishift, max_k));
136:   } else { /* Perform another cycle of DGMRES to find another set of eigenvalues */
137:     PetscInt     i;
138:     PetscScalar *wr, *wi, *Rshift, *Ishift;
139:     PetscCall(PetscMalloc4(2 * max_k, &wr, 2 * max_k, &wi, 2 * max_k, &Rshift, 2 * max_k, &Ishift));
140:     for (i = 0; i < max_k; i++) {
141:       wr[i] = agmres->wr[i];
142:       wi[i] = agmres->wi[i];
143:     }

145:     PetscCall(KSPSolve_DGMRES(ksp));

147:     ksp->guess_zero = PETSC_FALSE;
148:     if (ksp->reason == KSP_CONVERGED_RTOL) PetscFunctionReturn(PETSC_SUCCESS);
149:     else ksp->reason = KSP_CONVERGED_ITERATING;
150:     if (agmres->neig > 0) { /* Compute the eigenvalues for the shifts) and the eigenvectors (to augment the Newton basis */
151:       agmres->HasSchur = PETSC_FALSE;

153:       PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig));
154:       Neig = max_k;
155:     } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
156:       PetscCall(KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig));
157:     }
158:     for (i = 0; i < max_k; i++) {
159:       wr[max_k + i] = agmres->wr[i];
160:       wi[max_k + i] = agmres->wi[i];
161:     }
162:     PetscCall(KSPAGMRESLejaOrdering(wr, wi, Rshift, Ishift, 2 * max_k));
163:     for (i = 0; i < max_k; i++) {
164:       agmres->Rshift[i] = Rshift[i];
165:       agmres->Ishift[i] = Ishift[i];
166:     }
167:     PetscCall(PetscFree(Rshift));
168:     PetscCall(PetscFree(wr));
169:     PetscCall(PetscFree(Ishift));
170:     PetscCall(PetscFree(wi));
171:   }
172:   agmres->HasShifts = PETSC_TRUE;
173:   ksp->max_it       = max_it;
174:   PetscCall(PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
175:   PetscFunctionReturn(PETSC_SUCCESS);
176: }

178: /*
179:    Generate the basis vectors from the Newton polynomials with shifts and scaling factors
180:    The scaling factors are computed to obtain unit vectors. Note that this step can be avoided with the preprocessing option KSP_AGMRES_NONORM.
181:    Inputs :
182:     - Operators (Matrix and preconditioners and the first basis vector in VEC_V(0)
183:     - Shifts values in agmres->Rshift and agmres->Ishift.
184:    Output :
185:     - agmres->vecs or VEC_V : basis vectors
186:     - agmres->Scale : Scaling factors (equal to 1 if no scaling is done)
187: */
188: static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp)
189: {
190:   KSP_AGMRES    *agmres  = (KSP_AGMRES *)ksp->data;
191:   PetscReal     *Rshift  = agmres->Rshift;
192:   PetscReal     *Ishift  = agmres->Ishift;
193:   PetscReal     *Scale   = agmres->Scale;
194:   const PetscInt max_k   = agmres->max_k;
195:   PetscInt       KspSize = KSPSIZE; /* if max_k == KspSizen then the basis should not be augmented */
196:   PetscInt       j       = 1;

198:   PetscFunctionBegin;
199:   PetscCall(PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0, 0, 0));
200:   Scale[0] = 1.0;
201:   while (j <= max_k) {
202:     if (Ishift[j - 1] == 0) {
203:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
204:         /* Apply the precond-matrix operators */
205:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
206:         /* Then apply deflation as a preconditioner */
207:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
208:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
209:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
210:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
211:       } else {
212:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
213:       }
214:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 1], VEC_V(j - 1)));
215: #if defined(KSP_AGMRES_NONORM)
216:       Scale[j] = 1.0;
217: #else
218:       PetscCall(VecScale(VEC_V(j), Scale[j - 1])); /* This step can be postponed until all vectors are built */
219:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
220:       Scale[j] = 1.0 / Scale[j];
221: #endif

223:       agmres->matvecs += 1;
224:       j++;
225:     } else {
226:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
227:         /* Apply the precond-matrix operators */
228:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
229:         /* Then apply deflation as a preconditioner */
230:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
231:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
232:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
233:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
234:       } else {
235:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
236:       }
237:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 1], VEC_V(j - 1)));
238: #if defined(KSP_AGMRES_NONORM)
239:       Scale[j] = 1.0;
240: #else
241:       PetscCall(VecScale(VEC_V(j), Scale[j - 1]));
242:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
243:       Scale[j] = 1.0 / Scale[j];
244: #endif
245:       agmres->matvecs += 1;
246:       j++;
247:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
248:         /* Apply the precond-matrix operators */
249:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
250:         /* Then apply deflation as a preconditioner */
251:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
252:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
253:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
254:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
255:       } else {
256:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
257:       }
258:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 2], VEC_V(j - 1)));
259:       PetscCall(VecAXPY(VEC_V(j), Scale[j - 2] * Ishift[j - 2] * Ishift[j - 2], VEC_V(j - 2)));
260: #if defined(KSP_AGMRES_NONORM)
261:       Scale[j] = 1.0;
262: #else
263:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
264:       Scale[j] = 1.0 / Scale[j];
265: #endif
266:       agmres->matvecs += 1;
267:       j++;
268:     }
269:   }
270:   /* Augment the subspace with the eigenvectors*/
271:   while (j <= KspSize) {
272:     PetscCall(KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP));
273: #if defined(KSP_AGMRES_NONORM)
274:     Scale[j] = 1.0;
275: #else
276:     PetscCall(VecScale(VEC_V(j), Scale[j - 1]));
277:     PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
278:     Scale[j] = 1.0 / Scale[j];
279: #endif
280:     agmres->matvecs += 1;
281:     j++;
282:   }
283:   PetscCall(PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0, 0, 0));
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }

287: /*
288:   Form the Hessenberg matrix for the Arnoldi-like relation.
289:    Inputs :
290:    - Shifts values in agmres->Rshift and agmres->Ishift
291:    - RLoc : Triangular matrix from the RODDEC orthogonalization
292:    Outputs :
293:    - H = agmres->hh_origin : The Hessenberg matrix.

295:    NOTE: Note that the computed Hessenberg matrix is not mathematically equivalent to that in the real Arnoldi process (in KSP GMRES). If it is needed, it can be explicitly  formed as H <-- H * RLoc^-1.
296:  */
297: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP ksp)
298: {
299:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
300:   PetscScalar   *Rshift = agmres->Rshift;
301:   PetscScalar   *Ishift = agmres->Ishift;
302:   PetscScalar   *Scale  = agmres->Scale;
303:   PetscInt       i = 0, j = 0;
304:   const PetscInt max_k   = agmres->max_k;
305:   PetscInt       KspSize = KSPSIZE;
306:   PetscInt       N       = MAXKSPSIZE + 1;

308:   PetscFunctionBegin;
309:   PetscCall(PetscArrayzero(agmres->hh_origin, (N + 1) * N));
310:   while (j < max_k) {
311:     /* Real shifts */
312:     if (Ishift[j] == 0) {
313:       for (i = 0; i <= j; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j] + (Rshift[j] * *RLOC(i, j));
314:       *H(j + 1, j) = *RLOC(j + 1, j + 1) / Scale[j];
315:       j++;
316:     } else if (Ishift[j] > 0) {
317:       for (i = 0; i <= j; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j] + Rshift[j] * *RLOC(i, j);
318:       *H(j + 1, j) = *RLOC(j + 1, j + 1) / Scale[j];
319:       j++;
320:       for (i = 0; i <= j; i++) *H(i, j) = (*RLOC(i, j + 1) + Rshift[j - 1] * *RLOC(i, j) - Scale[j - 1] * Ishift[j - 1] * Ishift[j - 1] * *RLOC(i, j - 1));
321:       *H(j, j)     = (*RLOC(j, j + 1) + Rshift[j - 1] * *RLOC(j, j));
322:       *H(j + 1, j) = *RLOC(j + 1, j + 1);
323:       j++;
324:     } else SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "BAD ORDERING OF THE SHIFTS VALUES IN THE NEWTON BASIS");
325:   }
326:   for (j = max_k; j < KspSize; j++) { /* take into account the norm of the augmented vectors */
327:     for (i = 0; i <= j + 1; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j];
328:   }
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }

332: /*
333:   Form the new approximate solution from the least-square problem
334: */
335: static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp, PetscInt it)
336: {
337:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
338:   const PetscInt max_k  = agmres->max_k; /* Size of the non-augmented Krylov basis */
339:   PetscInt       i, j;
340:   PetscInt       r = agmres->r; /* current number of augmented eigenvectors */
341:   PetscBLASInt   KspSize;
342:   PetscBLASInt   lC;
343:   PetscBLASInt   N;
344:   PetscBLASInt   ldH = (PetscBLASInt)(it + 1);
345:   PetscBLASInt   lwork;
346:   PetscBLASInt   info, nrhs = 1;

348:   PetscFunctionBegin;
349:   PetscCall(PetscBLASIntCast(KSPSIZE, &KspSize));
350:   PetscCall(PetscBLASIntCast(4 * (KspSize + 1), &lwork));
351:   PetscCall(PetscBLASIntCast(KspSize + 1, &lC));
352:   PetscCall(PetscBLASIntCast(MAXKSPSIZE + 1, &N));
353:   PetscCall(PetscBLASIntCast(N + 1, &ldH));
354:   /* Save a copy of the Hessenberg matrix */
355:   for (j = 0; j < N - 1; j++) {
356:     for (i = 0; i < N; i++) *HS(i, j) = *H(i, j);
357:   }
358:   /* QR factorize the Hessenberg matrix */
359:   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info));
360:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGEQRF INFO=%" PetscBLASInt_FMT, info);
361:   /* Update the right-hand side of the least square problem */
362:   PetscCall(PetscArrayzero(agmres->nrs, N));

364:   agmres->nrs[0] = ksp->rnorm;
365:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info));
366:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XORMQR INFO=%" PetscBLASInt_FMT, info);
367:   ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]);
368:   /* solve the least-square problem */
369:   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info));
370:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XTRTRS INFO=%" PetscBLASInt_FMT, info);
371:   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */
372:   PetscCall(VecMAXPBY(VEC_TMP, max_k, agmres->nrs, 0, &VEC_V(0)));
373:   if (!agmres->DeflPrecond) PetscCall(VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U));

375:   if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
376:     PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP));
377:     PetscCall(VecCopy(VEC_TMP_MATOP, VEC_TMP));
378:   }
379:   PetscCall(KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP));
380:   /* add the solution to the previous one */
381:   PetscCall(VecAXPY(ksp->vec_sol, 1.0, VEC_TMP));
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }

385: /*
386:    Run  one cycle of the Newton-basis gmres, possibly augmented with eigenvectors.

388:    Return residual history if requested.
389:    Input :
390:    - The vector VEC_V(0) is the initia residual
391:    Output :
392:     - the solution vector is in agmres->vec_sol
393:    - itcount : number of inner iterations
394:     - res : the new residual norm
395:  .
396:  NOTE: Unlike GMRES where the residual norm is available at each (inner) iteration,  here it is available at the end of the cycle.
397: */
398: static PetscErrorCode KSPAGMRESCycle(PetscInt *itcount, KSP ksp)
399: {
400:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
401:   PetscReal   res;
402:   PetscInt    KspSize = KSPSIZE;

404:   PetscFunctionBegin;
405:   /* check for the convergence */
406:   res = ksp->rnorm; /* Norm of the initial residual vector */
407:   if (!res) {
408:     if (itcount) *itcount = 0;
409:     ksp->reason = KSP_CONVERGED_ATOL;
410:     PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
411:     PetscFunctionReturn(PETSC_SUCCESS);
412:   }
413:   PetscCall((*ksp->converged)(ksp, ksp->its, res, &ksp->reason, ksp->cnvP));
414:   /* Build the Krylov basis with Newton polynomials */
415:   PetscCall(KSPAGMRESBuildBasis(ksp));
416:   /* QR Factorize the basis with RODDEC */
417:   PetscCall(KSPAGMRESRoddec(ksp, KspSize + 1));

419:   /* Recover a (partial) Hessenberg matrix for the Arnoldi-like relation */
420:   PetscCall(KSPAGMRESBuildHessenberg(ksp));
421:   /* Solve the least square problem and unwind the preconditioner */
422:   PetscCall(KSPAGMRESBuildSoln(ksp, KspSize));

424:   res = ksp->rnorm;
425:   ksp->its += KspSize;
426:   agmres->it = KspSize - 1;
427:   /*  Test for the convergence */
428:   PetscCall((*ksp->converged)(ksp, ksp->its, res, &ksp->reason, ksp->cnvP));
429:   PetscCall(KSPLogResidualHistory(ksp, res));
430:   PetscCall(KSPMonitor(ksp, ksp->its, res));

432:   *itcount = KspSize;
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }

436: static PetscErrorCode KSPSolve_AGMRES(KSP ksp)
437: {
438:   PetscInt    its;
439:   KSP_AGMRES *agmres     = (KSP_AGMRES *)ksp->data;
440:   PetscBool   guess_zero = ksp->guess_zero;
441:   PetscReal   res_old, res;
442:   PetscInt    test;

444:   PetscFunctionBegin;
445:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
446:   ksp->its = 0;
447:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));

449:   if (!agmres->HasShifts) { /* Compute Shifts for the Newton basis */
450:     PetscCall(KSPComputeShifts_DGMRES(ksp));
451:   }
452:   /* NOTE: At this step, the initial guess is not equal to zero since one cycle of the classical GMRES is performed to compute the shifts */
453:   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
454:   while (!ksp->reason) {
455:     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TMP, VEC_TMP_MATOP, VEC_V(0), ksp->vec_rhs));
456:     if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
457:       PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(0), VEC_TMP));
458:       PetscCall(VecCopy(VEC_TMP, VEC_V(0)));

460:       agmres->matvecs += 1;
461:     }
462:     PetscCall(VecNormalize(VEC_V(0), &ksp->rnorm));
463:     KSPCheckNorm(ksp, ksp->rnorm);
464:     res_old = ksp->rnorm; /* Record the residual norm to test if deflation is needed */

466:     ksp->ops->buildsolution = KSPBuildSolution_AGMRES;

468:     PetscCall(KSPAGMRESCycle(&its, ksp));
469:     if (ksp->its >= ksp->max_it) {
470:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
471:       break;
472:     }
473:     /* compute the eigenvectors to augment the subspace : use an adaptive strategy */
474:     res = ksp->rnorm;
475:     if (!ksp->reason && agmres->neig > 0) {
476:       test = agmres->max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old); /* estimate the remaining number of steps */
477:       if ((test > agmres->smv * (ksp->max_it - ksp->its)) || agmres->force) {
478:         /* Augment the number of eigenvalues to deflate if the convergence is too slow */
479:         if (!agmres->force && ((test > agmres->bgv * (ksp->max_it - ksp->its)) && ((agmres->r + 1) < agmres->max_neig))) agmres->neig += 1;
480:         PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &agmres->neig));
481:       }
482:     }
483:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
484:   }
485:   ksp->guess_zero = guess_zero; /* restore if user has provided nonzero initial guess */
486:   PetscFunctionReturn(PETSC_SUCCESS);
487: }

489: static PetscErrorCode KSPDestroy_AGMRES(KSP ksp)
490: {
491:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;

493:   PetscFunctionBegin;
494:   PetscCall(PetscFree(agmres->hh_origin));

496:   PetscCall(PetscFree(agmres->Qloc));
497:   PetscCall(PetscFree4(agmres->Rshift, agmres->Ishift, agmres->Rloc, agmres->wbufptr));
498:   PetscCall(PetscFree3(agmres->tau, agmres->work, agmres->nrs));
499:   PetscCall(PetscFree4(agmres->Scale, agmres->sgn, agmres->tloc, agmres->temp));

501:   PetscCall(PetscFree(agmres->select));
502:   PetscCall(PetscFree(agmres->wr));
503:   PetscCall(PetscFree(agmres->wi));
504:   if (agmres->neig) {
505:     PetscCall(VecDestroyVecs(MAXKSPSIZE, &agmres->TmpU));
506:     PetscCall(PetscFree(agmres->perm));
507:     PetscCall(PetscFree(agmres->MatEigL));
508:     PetscCall(PetscFree(agmres->MatEigR));
509:     PetscCall(PetscFree(agmres->Q));
510:     PetscCall(PetscFree(agmres->Z));
511:     PetscCall(PetscFree(agmres->beta));
512:   }
513:   PetscCall(KSPDestroy_DGMRES(ksp));
514:   PetscFunctionReturn(PETSC_SUCCESS);
515: }

517: static PetscErrorCode KSPView_AGMRES(KSP ksp, PetscViewer viewer)
518: {
519:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
520:   const char *cstr   = "RODDEC ORTHOGONOLIZATION";
521:   PetscBool   iascii, isstring;
522: #if defined(KSP_AGMRES_NONORM)
523:   const char *Nstr = "SCALING FACTORS : NO";
524: #else
525:   const char *Nstr = "SCALING FACTORS : YES";
526: #endif

528:   PetscFunctionBegin;
529:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
530:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));

532:   if (iascii) {
533:     PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT " using %s\n", agmres->max_k, cstr));
534:     PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", Nstr));
535:     PetscCall(PetscViewerASCIIPrintf(viewer, " Number of matvecs : %" PetscInt_FMT "\n", agmres->matvecs));
536:     PetscCall(PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: %s\n", PetscBools[agmres->force]));
537:     if (agmres->DeflPrecond) {
538:       PetscCall(PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: PRECONDITIONER \n"));
539:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Frequency of extracted eigenvalues = %" PetscInt_FMT "\n", agmres->neig));
540:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Total number of extracted eigenvalues = %" PetscInt_FMT "\n", agmres->r));
541:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", agmres->max_neig));
542:     } else {
543:       PetscCall(PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: AUGMENT\n"));
544:       PetscCall(PetscViewerASCIIPrintf(viewer, " augmented vectors  %" PetscInt_FMT " at frequency %" PetscInt_FMT " with %sRitz vectors\n", agmres->r, agmres->neig, agmres->ritz ? "" : "Harmonic "));
545:     }
546:     PetscCall(PetscViewerASCIIPrintf(viewer, " Minimum relaxation parameter for the adaptive strategy(smv)  = %g\n", (double)agmres->smv));
547:     PetscCall(PetscViewerASCIIPrintf(viewer, " Maximum relaxation parameter for the adaptive strategy(bgv)  = %g\n", (double)agmres->bgv));
548:   } else if (isstring) {
549:     PetscCall(PetscViewerStringSPrintf(viewer, "%s restart %" PetscInt_FMT, cstr, agmres->max_k));
550:   }
551:   PetscFunctionReturn(PETSC_SUCCESS);
552: }

554: static PetscErrorCode KSPSetFromOptions_AGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
555: {
556:   PetscInt    neig;
557:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
558:   PetscBool   flg;

560:   PetscFunctionBegin;
561:   PetscCall(KSPSetFromOptions_DGMRES(ksp, PetscOptionsObject)); /* Set common options from DGMRES and GMRES */
562:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP AGMRES Options");
563:   PetscCall(PetscOptionsInt("-ksp_agmres_eigen", "Number of eigenvalues to deflate", "KSPDGMRESSetEigen", agmres->neig, &neig, &flg));
564:   if (flg) {
565:     PetscCall(KSPDGMRESSetEigen_DGMRES(ksp, neig));
566:     agmres->r = 0;
567:   } else agmres->neig = 0;
568:   PetscCall(PetscOptionsInt("-ksp_agmres_maxeigen", "Maximum number of eigenvalues to deflate", "KSPDGMRESSetMaxEigen", agmres->max_neig, &neig, &flg));
569:   if (flg) agmres->max_neig = neig + EIG_OFFSET;
570:   else agmres->max_neig = agmres->neig + EIG_OFFSET;
571:   PetscCall(PetscOptionsBool("-ksp_agmres_DeflPrecond", "Determine if the deflation should be applied as a preconditioner -- similar to KSP DGMRES", "KSPGMRESDeflPrecond", agmres->DeflPrecond, &agmres->DeflPrecond, NULL));
572:   PetscCall(PetscOptionsBool("-ksp_agmres_ritz", "Compute the Ritz vectors instead of the Harmonic Ritz vectors ", "KSPGMRESHarmonic", agmres->ritz, &agmres->ritz, &flg));
573:   PetscCall(PetscOptionsReal("-ksp_agmres_MinRatio", "Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed", "KSPGMRESSetMinRatio", agmres->smv, &agmres->smv, NULL));
574:   PetscCall(PetscOptionsReal("-ksp_agmres_MaxRatio", "Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed", "KSPGMRESSetMaxRatio", agmres->bgv, &agmres->bgv, &flg));
575:   PetscOptionsHeadEnd();
576:   PetscFunctionReturn(PETSC_SUCCESS);
577: }

579: /*MC
580:   KSPAGMRES - Newton basis GMRES implementation with adaptive augmented eigenvectors

582:   The techniques used are best described in {cite}`wakam2011parallelism`. The contribution of this work is that it combines many of the previous work to reduce the amount
583:   of MPI messages and improve the robustness of the global approach by using deflation techniques. Please see {cite}`wakam2011parallelism` for numerical experiments and {cite}`wakam2013memory`
584:   for a description of these problems. There are  many ongoing work that aim at avoiding (or minimizing) the communication in Krylov subspace methods.
585:   This code can be used as an experimental framework to combine several techniques in the particular case of GMRES.
586:   For instance, the computation of the shifts can be improved with techniques described in {cite}`philippe2012generation`. The orthogonalization technique can be replaced by TSQR {cite}`demmel2012communication`.
587:   The generation of the basis can be done using s-steps approaches{cite}`mohiyuddin2009minimizing`. See also {cite}`sidje1997alternatives` and {cite}`bai1994newton`.

589:   Options Database Keys:
590: +   -ksp_gmres_restart <restart>    -  the number of Krylov directions
591: .   -ksp_gmres_krylov_monitor       - plot the Krylov space generated
592: .   -ksp_agmres_eigen <neig>        - Number of eigenvalues to deflate (Number of vectors to augment)
593: .   -ksp_agmres_maxeigen <max_neig> - Maximum number of eigenvalues to deflate
594: .   -ksp_agmres_MinRatio <1>        - Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed
595: .   -ksp_agmres_MaxRatio <1>        - Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed
596: .   -ksp_agmres_DeflPrecond         - Apply deflation as a preconditioner, this is similar to `KSPDGMRES` but it rather builds a Newton basis.
597: -   -ksp_dgmres_force <0, 1>        - Force the deflation at each restart.

599:   Level: intermediate

601:   Note:
602:   Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported

604:   Developer Note:
605:   This object is subclassed off of `KSPDGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code

607:   Contributed by:
608:   Desire NUENTSA WAKAM, INRIA <desire.nuentsa_wakam@inria.fr> with inputs from Guy Atenekeng <atenekeng@yahoo.com> and R.B. Sidje <roger.b.sidje@ua.edu>

610: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPDGMRES`, `KSPPGMRES`,
611:            `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
612:            `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
613:            `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
614:  M*/

616: PETSC_EXTERN PetscErrorCode KSPCreate_AGMRES(KSP ksp)
617: {
618:   KSP_AGMRES *agmres;

620:   PetscFunctionBegin;
621:   PetscCall(PetscNew(&agmres));
622:   ksp->data = (void *)agmres;

624:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
625:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
626:   ksp->ops->buildsolution                = KSPBuildSolution_AGMRES;
627:   ksp->ops->setup                        = KSPSetUp_AGMRES;
628:   ksp->ops->solve                        = KSPSolve_AGMRES;
629:   ksp->ops->destroy                      = KSPDestroy_AGMRES;
630:   ksp->ops->view                         = KSPView_AGMRES;
631:   ksp->ops->setfromoptions               = KSPSetFromOptions_AGMRES;
632:   ksp->guess_zero                        = PETSC_TRUE;
633:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
634:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

636:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
637:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
638:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
639:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
640:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
641:   /* -- New functions defined in DGMRES -- */
642:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
644:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
645:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));

647:   PetscCall(PetscLogEventRegister("AGMRESCompDefl", KSP_CLASSID, &KSP_AGMRESComputeDeflationData));
648:   PetscCall(PetscLogEventRegister("AGMRESBuildBasis", KSP_CLASSID, &KSP_AGMRESBuildBasis));
649:   PetscCall(PetscLogEventRegister("AGMRESCompShifts", KSP_CLASSID, &KSP_AGMRESComputeShifts));
650:   PetscCall(PetscLogEventRegister("AGMRESOrthog", KSP_CLASSID, &KSP_AGMRESRoddec));

652:   agmres->haptol         = 1.0e-30;
653:   agmres->q_preallocate  = 0;
654:   agmres->delta_allocate = AGMRES_DELTA_DIRECTIONS;
655:   agmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
656:   agmres->nrs            = NULL;
657:   agmres->sol_temp       = NULL;
658:   agmres->max_k          = AGMRES_DEFAULT_MAXK;
659:   agmres->Rsvd           = NULL;
660:   agmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
661:   agmres->orthogwork     = NULL;

663:   /* Default values for the deflation */
664:   agmres->r           = 0;
665:   agmres->neig        = 0;
666:   agmres->max_neig    = 0;
667:   agmres->lambdaN     = 0.0;
668:   agmres->smv         = SMV;
669:   agmres->bgv         = 1;
670:   agmres->force       = PETSC_FALSE;
671:   agmres->matvecs     = 0;
672:   agmres->improve     = PETSC_FALSE;
673:   agmres->HasShifts   = PETSC_FALSE;
674:   agmres->r           = 0;
675:   agmres->HasSchur    = PETSC_FALSE;
676:   agmres->DeflPrecond = PETSC_FALSE;
677:   PetscCall(PetscObjectGetNewTag((PetscObject)ksp, &agmres->tag));
678:   PetscFunctionReturn(PETSC_SUCCESS);
679: }