Actual source code: agmres.c

  1: #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>

  3: static PetscErrorCode KSPAGMRESBuildSoln(KSP, PetscInt);
  4: static PetscErrorCode KSPAGMRESBuildBasis(KSP);
  5: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP);
  6: extern PetscErrorCode KSPSetUp_DGMRES(KSP);
  7: extern PetscErrorCode KSPBuildSolution_DGMRES(KSP, Vec, Vec *);
  8: extern PetscErrorCode KSPSolve_DGMRES(KSP);
  9: extern PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP, PetscInt *);
 10: extern PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP, PetscInt *);
 11: extern PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP, Vec, Vec);
 12: extern PetscErrorCode KSPDestroy_DGMRES(KSP);
 13: extern PetscErrorCode KSPSetFromOptions_DGMRES(KSP, PetscOptionItems);
 14: extern PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP, PetscInt);

 16: PetscLogEvent KSP_AGMRESComputeDeflationData, KSP_AGMRESBuildBasis, KSP_AGMRESComputeShifts, KSP_AGMRESRoddec;

 18: /*
 19:    This function allocates  data for the Newton basis GMRES implementation.
 20:    Note that most data are allocated in KSPSetUp_DGMRES and KSPSetUp_GMRES, including the space for the basis vectors,
 21:    the various Hessenberg matrices and the Givens rotations coefficients
 22: */
 23: static PetscErrorCode KSPSetUp_AGMRES(KSP ksp)
 24: {
 25:   PetscInt       hes;
 26:   PetscInt       nloc;
 27:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
 28:   PetscInt       neig   = agmres->neig;
 29:   const PetscInt max_k  = agmres->max_k;
 30:   PetscInt       N      = MAXKSPSIZE;
 31:   PetscInt       lwork  = PetscMax(8 * N + 16, 4 * neig * (N - neig));

 33:   PetscFunctionBegin;
 34:   N = MAXKSPSIZE;
 35:   /* Preallocate space during the call to KSPSetup_GMRES for the Krylov basis */
 36:   agmres->q_preallocate = PETSC_TRUE; /* No allocation on the fly */
 37:   /* Preallocate space to compute later the eigenvalues in GMRES */
 38:   ksp->calc_sings = PETSC_TRUE;
 39:   agmres->max_k   = N; /* Set the augmented size to be allocated in KSPSetup_GMRES */
 40:   PetscCall(KSPSetUp_DGMRES(ksp));
 41:   agmres->max_k = max_k;
 42:   hes           = (N + 1) * (N + 1);

 44:   /* Data for the Newton basis GMRES */
 45:   PetscCall(PetscCalloc4(max_k, &agmres->Rshift, max_k, &agmres->Ishift, hes, &agmres->Rloc, (N + 1) * 4, &agmres->wbufptr));
 46:   PetscCall(PetscMalloc3(N + 1, &agmres->tau, lwork, &agmres->work, N + 1, &agmres->nrs));
 47:   PetscCall(PetscCalloc4(N + 1, &agmres->Scale, N + 1, &agmres->sgn, N + 1, &agmres->tloc, N + 1, &agmres->temp));

 49:   /* Allocate space for the vectors in the orthogonalized basis*/
 50:   PetscCall(VecGetLocalSize(agmres->vecs[0], &nloc));
 51:   PetscCall(PetscMalloc1(nloc * (N + 1), &agmres->Qloc));

 53:   /* Init the ring of processors for the roddec orthogonalization */
 54:   PetscCall(KSPAGMRESRoddecInitNeighboor(ksp));

 56:   if (agmres->neig < 1) PetscFunctionReturn(PETSC_SUCCESS);

 58:   /* Allocate space for the deflation */
 59:   PetscCall(PetscMalloc1(N, &agmres->select));
 60:   PetscCall(VecDuplicateVecs(VEC_V(0), N, &agmres->TmpU));
 61:   PetscCall(PetscMalloc2(N * N, &agmres->MatEigL, N * N, &agmres->MatEigR));
 62:   /*  PetscCall(PetscMalloc6(N*N, &agmres->Q, N*N, &agmres->Z, N, &agmres->wr, N, &agmres->wi, N, &agmres->beta, N, &agmres->modul)); */
 63:   PetscCall(PetscMalloc3(N * N, &agmres->Q, N * N, &agmres->Z, N, &agmres->beta));
 64:   PetscCall(PetscMalloc2(N + 1, &agmres->perm, 2 * neig * N, &agmres->iwork));
 65:   PetscFunctionReturn(PETSC_SUCCESS);
 66: }

 68: /*
 69:     Returns the current solution from the private data structure of AGMRES back to ptr.
 70: */
 71: static PetscErrorCode KSPBuildSolution_AGMRES(KSP ksp, Vec ptr, Vec *result)
 72: {
 73:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;

 75:   PetscFunctionBegin;
 76:   if (!ptr) {
 77:     if (!agmres->sol_temp) {
 78:       PetscCall(VecDuplicate(ksp->vec_sol, &agmres->sol_temp));
 79:       PetscCall(VecCopy(ksp->vec_sol, agmres->sol_temp));
 80:     }
 81:     ptr = agmres->sol_temp;
 82:   } else {
 83:     PetscCall(VecCopy(ksp->vec_sol, ptr));
 84:   }
 85:   if (result) *result = ptr;
 86:   PetscFunctionReturn(PETSC_SUCCESS);
 87: }

 89: /* Computes the shift values (Ritz values) needed to generate stable basis vectors
 90:    One cycle of DGMRES is performed to find the eigenvalues. The same data structures are used since AGMRES extends DGMRES
 91:    Note that when the basis is  to be augmented, then this function computes the harmonic Ritz vectors from this first cycle.
 92:    Input :
 93:     - The operators (matrix, preconditioners and right-hand side) are  normally required.
 94:     - max_k : the size of the (non augmented) basis.
 95:     - neig: The number of eigenvectors to augment, if deflation is needed
 96:    Output :
 97:     - The shifts as complex pair of arrays in wr and wi (size max_k).
 98:     - The harmonic Ritz vectors (agmres->U) if deflation is needed.
 99: */
100: static PetscErrorCode KSPComputeShifts_DGMRES(KSP ksp)
101: {
102:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
103:   PetscInt       max_k  = agmres->max_k; /* size of the (non augmented) Krylov subspace */
104:   PetscInt       Neig   = 0;
105:   const PetscInt max_it = ksp->max_it;
106:   PetscBool      flg;

108:   /* Perform one cycle of dgmres to find the eigenvalues and compute the first approximations of the eigenvectors */

110:   PetscFunctionBegin;
111:   PetscCall(PetscLogEventBegin(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
112:   /* Send the size of the augmented basis to DGMRES */
113:   ksp->max_it             = max_k; /* set this to have DGMRES performing only one cycle */
114:   ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
115:   PetscCall(KSPSolve_DGMRES(ksp));
116:   ksp->guess_zero = PETSC_FALSE;
117:   if (ksp->reason == KSP_CONVERGED_RTOL) {
118:     PetscCall(PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
119:     PetscFunctionReturn(PETSC_SUCCESS);
120:   } else ksp->reason = KSP_CONVERGED_ITERATING;

122:   if ((agmres->r == 0) && (agmres->neig > 0)) { /* Compute the eigenvalues for the shifts and the eigenvectors (to augment the Newton basis) */
123:     agmres->HasSchur = PETSC_FALSE;
124:     PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig));
125:     Neig = max_k;
126:   } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
127:     PetscCall(KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig));
128:   }

130:   /* It may happen that the Ritz values from one cycle of GMRES are not accurate enough to provide a good stability. In this case, another cycle of GMRES is performed.  The two sets of values thus generated are sorted and the most accurate are kept as shifts */
131:   PetscCall(PetscOptionsHasName(NULL, NULL, "-ksp_agmres_ImproveShifts", &flg));
132:   if (!flg) {
133:     PetscCall(KSPAGMRESLejaOrdering(agmres->wr, agmres->wi, agmres->Rshift, agmres->Ishift, max_k));
134:   } else { /* Perform another cycle of DGMRES to find another set of eigenvalues */
135:     PetscInt     i;
136:     PetscScalar *wr, *wi, *Rshift, *Ishift;
137:     PetscCall(PetscMalloc4(2 * max_k, &wr, 2 * max_k, &wi, 2 * max_k, &Rshift, 2 * max_k, &Ishift));
138:     for (i = 0; i < max_k; i++) {
139:       wr[i] = agmres->wr[i];
140:       wi[i] = agmres->wi[i];
141:     }

143:     PetscCall(KSPSolve_DGMRES(ksp));

145:     ksp->guess_zero = PETSC_FALSE;
146:     if (ksp->reason == KSP_CONVERGED_RTOL) PetscFunctionReturn(PETSC_SUCCESS);
147:     else ksp->reason = KSP_CONVERGED_ITERATING;
148:     if (agmres->neig > 0) { /* Compute the eigenvalues for the shifts) and the eigenvectors (to augment the Newton basis */
149:       agmres->HasSchur = PETSC_FALSE;

151:       PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &Neig));
152:       Neig = max_k;
153:     } else { /* From DGMRES, compute only the eigenvalues needed as Shifts for the Newton Basis */
154:       PetscCall(KSPDGMRESComputeSchurForm_DGMRES(ksp, &Neig));
155:     }
156:     for (i = 0; i < max_k; i++) {
157:       wr[max_k + i] = agmres->wr[i];
158:       wi[max_k + i] = agmres->wi[i];
159:     }
160:     PetscCall(KSPAGMRESLejaOrdering(wr, wi, Rshift, Ishift, 2 * max_k));
161:     for (i = 0; i < max_k; i++) {
162:       agmres->Rshift[i] = Rshift[i];
163:       agmres->Ishift[i] = Ishift[i];
164:     }
165:     PetscCall(PetscFree(Rshift));
166:     PetscCall(PetscFree(wr));
167:     PetscCall(PetscFree(Ishift));
168:     PetscCall(PetscFree(wi));
169:   }
170:   agmres->HasShifts = PETSC_TRUE;
171:   ksp->max_it       = max_it;
172:   PetscCall(PetscLogEventEnd(KSP_AGMRESComputeShifts, ksp, 0, 0, 0));
173:   PetscFunctionReturn(PETSC_SUCCESS);
174: }

176: /*
177:    Generate the basis vectors from the Newton polynomials with shifts and scaling factors
178:    The scaling factors are computed to obtain unit vectors. Note that this step can be avoided with the preprocessing option KSP_AGMRES_NONORM.
179:    Inputs :
180:     - Operators (Matrix and preconditioners and the first basis vector in VEC_V(0)
181:     - Shifts values in agmres->Rshift and agmres->Ishift.
182:    Output :
183:     - agmres->vecs or VEC_V : basis vectors
184:     - agmres->Scale : Scaling factors (equal to 1 if no scaling is done)
185: */
186: static PetscErrorCode KSPAGMRESBuildBasis(KSP ksp)
187: {
188:   KSP_AGMRES    *agmres  = (KSP_AGMRES *)ksp->data;
189:   PetscReal     *Rshift  = agmres->Rshift;
190:   PetscReal     *Ishift  = agmres->Ishift;
191:   PetscReal     *Scale   = agmres->Scale;
192:   const PetscInt max_k   = agmres->max_k;
193:   PetscInt       KspSize = KSPSIZE; /* if max_k == KspSizen then the basis should not be augmented */
194:   PetscInt       j       = 1;

196:   PetscFunctionBegin;
197:   PetscCall(PetscLogEventBegin(KSP_AGMRESBuildBasis, ksp, 0, 0, 0));
198:   Scale[0] = 1.0;
199:   while (j <= max_k) {
200:     if (Ishift[j - 1] == 0) {
201:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
202:         /* Apply the precond-matrix operators */
203:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
204:         /* Then apply deflation as a preconditioner */
205:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
206:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
207:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
208:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
209:       } else {
210:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
211:       }
212:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 1], VEC_V(j - 1)));
213: #if defined(KSP_AGMRES_NONORM)
214:       Scale[j] = 1.0;
215: #else
216:       PetscCall(VecScale(VEC_V(j), Scale[j - 1])); /* This step can be postponed until all vectors are built */
217:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
218:       Scale[j] = 1.0 / Scale[j];
219: #endif

221:       agmres->matvecs += 1;
222:       j++;
223:     } else {
224:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
225:         /* Apply the precond-matrix operators */
226:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
227:         /* Then apply deflation as a preconditioner */
228:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
229:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
230:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
231:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
232:       } else {
233:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
234:       }
235:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 1], VEC_V(j - 1)));
236: #if defined(KSP_AGMRES_NONORM)
237:       Scale[j] = 1.0;
238: #else
239:       PetscCall(VecScale(VEC_V(j), Scale[j - 1]));
240:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
241:       Scale[j] = 1.0 / Scale[j];
242: #endif
243:       agmres->matvecs += 1;
244:       j++;
245:       if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
246:         /* Apply the precond-matrix operators */
247:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_TMP, VEC_TMP_MATOP));
248:         /* Then apply deflation as a preconditioner */
249:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_V(j)));
250:       } else if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
251:         PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(j - 1), VEC_TMP));
252:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_TMP, VEC_V(j), VEC_TMP_MATOP));
253:       } else {
254:         PetscCall(KSP_PCApplyBAorAB(ksp, VEC_V(j - 1), VEC_V(j), VEC_TMP_MATOP));
255:       }
256:       PetscCall(VecAXPY(VEC_V(j), -Rshift[j - 2], VEC_V(j - 1)));
257:       PetscCall(VecAXPY(VEC_V(j), Scale[j - 2] * Ishift[j - 2] * Ishift[j - 2], VEC_V(j - 2)));
258: #if defined(KSP_AGMRES_NONORM)
259:       Scale[j] = 1.0;
260: #else
261:       PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
262:       Scale[j] = 1.0 / Scale[j];
263: #endif
264:       agmres->matvecs += 1;
265:       j++;
266:     }
267:   }
268:   /* Augment the subspace with the eigenvectors*/
269:   while (j <= KspSize) {
270:     PetscCall(KSP_PCApplyBAorAB(ksp, agmres->U[j - max_k - 1], VEC_V(j), VEC_TMP_MATOP));
271: #if defined(KSP_AGMRES_NONORM)
272:     Scale[j] = 1.0;
273: #else
274:     PetscCall(VecScale(VEC_V(j), Scale[j - 1]));
275:     PetscCall(VecNorm(VEC_V(j), NORM_2, &Scale[j]));
276:     Scale[j] = 1.0 / Scale[j];
277: #endif
278:     agmres->matvecs += 1;
279:     j++;
280:   }
281:   PetscCall(PetscLogEventEnd(KSP_AGMRESBuildBasis, ksp, 0, 0, 0));
282:   PetscFunctionReturn(PETSC_SUCCESS);
283: }

285: /*
286:   Form the Hessenberg matrix for the Arnoldi-like relation.
287:    Inputs :
288:    - Shifts values in agmres->Rshift and agmres->Ishift
289:    - RLoc : Triangular matrix from the RODDEC orthogonalization
290:    Outputs :
291:    - H = agmres->hh_origin : The Hessenberg matrix.

293:    NOTE: Note that the computed Hessenberg matrix is not mathematically equivalent to that in the real Arnoldi process (in KSP GMRES). If it is needed, it can be explicitly  formed as H <-- H * RLoc^-1.
294:  */
295: static PetscErrorCode KSPAGMRESBuildHessenberg(KSP ksp)
296: {
297:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
298:   PetscScalar   *Rshift = agmres->Rshift;
299:   PetscScalar   *Ishift = agmres->Ishift;
300:   PetscScalar   *Scale  = agmres->Scale;
301:   PetscInt       i = 0, j = 0;
302:   const PetscInt max_k   = agmres->max_k;
303:   PetscInt       KspSize = KSPSIZE;
304:   PetscInt       N       = MAXKSPSIZE + 1;

306:   PetscFunctionBegin;
307:   PetscCall(PetscArrayzero(agmres->hh_origin, (N + 1) * N));
308:   while (j < max_k) {
309:     /* Real shifts */
310:     if (Ishift[j] == 0) {
311:       for (i = 0; i <= j; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j] + (Rshift[j] * *RLOC(i, j));
312:       *H(j + 1, j) = *RLOC(j + 1, j + 1) / Scale[j];
313:       j++;
314:     } else if (Ishift[j] > 0) {
315:       for (i = 0; i <= j; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j] + Rshift[j] * *RLOC(i, j);
316:       *H(j + 1, j) = *RLOC(j + 1, j + 1) / Scale[j];
317:       j++;
318:       for (i = 0; i <= j; i++) *H(i, j) = (*RLOC(i, j + 1) + Rshift[j - 1] * *RLOC(i, j) - Scale[j - 1] * Ishift[j - 1] * Ishift[j - 1] * *RLOC(i, j - 1));
319:       *H(j, j)     = (*RLOC(j, j + 1) + Rshift[j - 1] * *RLOC(j, j));
320:       *H(j + 1, j) = *RLOC(j + 1, j + 1);
321:       j++;
322:     } else SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "BAD ORDERING OF THE SHIFTS VALUES IN THE NEWTON BASIS");
323:   }
324:   for (j = max_k; j < KspSize; j++) { /* take into account the norm of the augmented vectors */
325:     for (i = 0; i <= j + 1; i++) *H(i, j) = *RLOC(i, j + 1) / Scale[j];
326:   }
327:   PetscFunctionReturn(PETSC_SUCCESS);
328: }

330: /*
331:   Form the new approximate solution from the least-square problem
332: */
333: static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp, PetscInt it)
334: {
335:   KSP_AGMRES    *agmres = (KSP_AGMRES *)ksp->data;
336:   const PetscInt max_k  = agmres->max_k; /* Size of the non-augmented Krylov basis */
337:   PetscInt       i, j;
338:   PetscInt       r = agmres->r; /* current number of augmented eigenvectors */
339:   PetscBLASInt   KspSize;
340:   PetscBLASInt   lC;
341:   PetscBLASInt   N;
342:   PetscBLASInt   ldH;
343:   PetscBLASInt   lwork;
344:   PetscBLASInt   info, nrhs = 1;

346:   PetscFunctionBegin;
347:   PetscCall(PetscBLASIntCast(it + 1, &ldH));
348:   PetscCall(PetscBLASIntCast(KSPSIZE, &KspSize));
349:   PetscCall(PetscBLASIntCast(4 * (KspSize + 1), &lwork));
350:   PetscCall(PetscBLASIntCast(KspSize + 1, &lC));
351:   PetscCall(PetscBLASIntCast(MAXKSPSIZE + 1, &N));
352:   PetscCall(PetscBLASIntCast(N + 1, &ldH));
353:   /* Save a copy of the Hessenberg matrix */
354:   for (j = 0; j < N - 1; j++) {
355:     for (i = 0; i < N; i++) *HS(i, j) = *H(i, j);
356:   }
357:   /* QR factorize the Hessenberg matrix */
358:   PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info));
359:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XGEQRF INFO=%" PetscBLASInt_FMT, info);
360:   /* Update the right-hand side of the least square problem */
361:   PetscCall(PetscArrayzero(agmres->nrs, N));

363:   agmres->nrs[0] = ksp->rnorm;
364:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info));
365:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XORMQR INFO=%" PetscBLASInt_FMT, info);
366:   ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]);
367:   /* solve the least-square problem */
368:   PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info));
369:   PetscCheck(!info, PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "Error in LAPACK routine XTRTRS INFO=%" PetscBLASInt_FMT, info);
370:   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */
371:   PetscCall(VecMAXPBY(VEC_TMP, max_k, agmres->nrs, 0, &VEC_V(0)));
372:   if (!agmres->DeflPrecond) PetscCall(VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U));

374:   if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
375:     PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP));
376:     PetscCall(VecCopy(VEC_TMP_MATOP, VEC_TMP));
377:   }
378:   PetscCall(KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP));
379:   /* add the solution to the previous one */
380:   PetscCall(VecAXPY(ksp->vec_sol, 1.0, VEC_TMP));
381:   PetscFunctionReturn(PETSC_SUCCESS);
382: }

384: /*
385:    Run  one cycle of the Newton-basis gmres, possibly augmented with eigenvectors.

387:    Return residual history if requested.
388:    Input :
389:    - The vector VEC_V(0) is the initia residual
390:    Output :
391:     - the solution vector is in agmres->vec_sol
392:    - itcount : number of inner iterations
393:     - res : the new residual norm
394:  .
395:  NOTE: Unlike GMRES where the residual norm is available at each (inner) iteration,  here it is available at the end of the cycle.
396: */
397: static PetscErrorCode KSPAGMRESCycle(PetscInt *itcount, KSP ksp)
398: {
399:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
400:   PetscReal   res;
401:   PetscInt    KspSize = KSPSIZE;

403:   PetscFunctionBegin;
404:   /* check for the convergence */
405:   res = ksp->rnorm; /* Norm of the initial residual vector */
406:   if (!res) {
407:     if (itcount) *itcount = 0;
408:     ksp->reason = KSP_CONVERGED_ATOL;
409:     PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
410:     PetscFunctionReturn(PETSC_SUCCESS);
411:   }
412:   PetscCall((*ksp->converged)(ksp, ksp->its, res, &ksp->reason, ksp->cnvP));
413:   /* Build the Krylov basis with Newton polynomials */
414:   PetscCall(KSPAGMRESBuildBasis(ksp));
415:   /* QR Factorize the basis with RODDEC */
416:   PetscCall(KSPAGMRESRoddec(ksp, KspSize + 1));

418:   /* Recover a (partial) Hessenberg matrix for the Arnoldi-like relation */
419:   PetscCall(KSPAGMRESBuildHessenberg(ksp));
420:   /* Solve the least square problem and unwind the preconditioner */
421:   PetscCall(KSPAGMRESBuildSoln(ksp, KspSize));

423:   res = ksp->rnorm;
424:   ksp->its += KspSize;
425:   agmres->it = KspSize - 1;
426:   /*  Test for the convergence */
427:   PetscCall((*ksp->converged)(ksp, ksp->its, res, &ksp->reason, ksp->cnvP));
428:   PetscCall(KSPLogResidualHistory(ksp, res));
429:   PetscCall(KSPMonitor(ksp, ksp->its, res));

431:   *itcount = KspSize;
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: static PetscErrorCode KSPSolve_AGMRES(KSP ksp)
436: {
437:   PetscInt    its;
438:   KSP_AGMRES *agmres     = (KSP_AGMRES *)ksp->data;
439:   PetscBool   guess_zero = ksp->guess_zero;
440:   PetscReal   res_old, res;
441:   PetscInt    test;

443:   PetscFunctionBegin;
444:   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
445:   ksp->its = 0;
446:   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));

448:   if (!agmres->HasShifts) { /* Compute Shifts for the Newton basis */
449:     PetscCall(KSPComputeShifts_DGMRES(ksp));
450:   }
451:   /* NOTE: At this step, the initial guess is not equal to zero since one cycle of the classical GMRES is performed to compute the shifts */
452:   PetscCall((*ksp->converged)(ksp, 0, ksp->rnorm, &ksp->reason, ksp->cnvP));
453:   while (!ksp->reason) {
454:     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TMP, VEC_TMP_MATOP, VEC_V(0), ksp->vec_rhs));
455:     if ((ksp->pc_side == PC_LEFT) && agmres->r && agmres->DeflPrecond) {
456:       PetscCall(KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_V(0), VEC_TMP));
457:       PetscCall(VecCopy(VEC_TMP, VEC_V(0)));

459:       agmres->matvecs += 1;
460:     }
461:     PetscCall(VecNormalize(VEC_V(0), &ksp->rnorm));
462:     KSPCheckNorm(ksp, ksp->rnorm);
463:     res_old = ksp->rnorm; /* Record the residual norm to test if deflation is needed */

465:     ksp->ops->buildsolution = KSPBuildSolution_AGMRES;

467:     PetscCall(KSPAGMRESCycle(&its, ksp));
468:     if (ksp->its >= ksp->max_it) {
469:       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
470:       break;
471:     }
472:     /* compute the eigenvectors to augment the subspace : use an adaptive strategy */
473:     res = ksp->rnorm;
474:     if (!ksp->reason && agmres->neig > 0) {
475:       test = agmres->max_k * PetscLogReal(ksp->rtol / res) / PetscLogReal(res / res_old); /* estimate the remaining number of steps */
476:       if ((test > agmres->smv * (ksp->max_it - ksp->its)) || agmres->force) {
477:         /* Augment the number of eigenvalues to deflate if the convergence is too slow */
478:         if (!agmres->force && ((test > agmres->bgv * (ksp->max_it - ksp->its)) && ((agmres->r + 1) < agmres->max_neig))) agmres->neig += 1;
479:         PetscCall(KSPDGMRESComputeDeflationData_DGMRES(ksp, &agmres->neig));
480:       }
481:     }
482:     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
483:   }
484:   ksp->guess_zero = guess_zero; /* restore if user has provided nonzero initial guess */
485:   PetscFunctionReturn(PETSC_SUCCESS);
486: }

488: static PetscErrorCode KSPDestroy_AGMRES(KSP ksp)
489: {
490:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;

492:   PetscFunctionBegin;
493:   PetscCall(PetscFree(agmres->hh_origin));

495:   PetscCall(PetscFree(agmres->Qloc));
496:   PetscCall(PetscFree4(agmres->Rshift, agmres->Ishift, agmres->Rloc, agmres->wbufptr));
497:   PetscCall(PetscFree3(agmres->tau, agmres->work, agmres->nrs));
498:   PetscCall(PetscFree4(agmres->Scale, agmres->sgn, agmres->tloc, agmres->temp));

500:   PetscCall(PetscFree(agmres->select));
501:   PetscCall(PetscFree(agmres->wr));
502:   PetscCall(PetscFree(agmres->wi));
503:   if (agmres->neig) {
504:     PetscCall(VecDestroyVecs(MAXKSPSIZE, &agmres->TmpU));
505:     PetscCall(PetscFree(agmres->perm));
506:     PetscCall(PetscFree(agmres->MatEigL));
507:     PetscCall(PetscFree(agmres->MatEigR));
508:     PetscCall(PetscFree(agmres->Q));
509:     PetscCall(PetscFree(agmres->Z));
510:     PetscCall(PetscFree(agmres->beta));
511:   }
512:   PetscCall(KSPDestroy_DGMRES(ksp));
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode KSPView_AGMRES(KSP ksp, PetscViewer viewer)
517: {
518:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
519:   const char *cstr   = "RODDEC ORTHOGONOLIZATION";
520:   PetscBool   isascii, isstring;
521: #if defined(KSP_AGMRES_NONORM)
522:   const char *Nstr = "SCALING FACTORS : NO";
523: #else
524:   const char *Nstr = "SCALING FACTORS : YES";
525: #endif

527:   PetscFunctionBegin;
528:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
529:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));

531:   if (isascii) {
532:     PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT " using %s\n", agmres->max_k, cstr));
533:     PetscCall(PetscViewerASCIIPrintf(viewer, " %s\n", Nstr));
534:     PetscCall(PetscViewerASCIIPrintf(viewer, " Number of matvecs : %" PetscInt_FMT "\n", agmres->matvecs));
535:     PetscCall(PetscViewerASCIIPrintf(viewer, " Adaptive strategy is used: %s\n", PetscBools[agmres->force]));
536:     if (agmres->DeflPrecond) {
537:       PetscCall(PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: PRECONDITIONER \n"));
538:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Frequency of extracted eigenvalues = %" PetscInt_FMT "\n", agmres->neig));
539:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Total number of extracted eigenvalues = %" PetscInt_FMT "\n", agmres->r));
540:       PetscCall(PetscViewerASCIIPrintf(viewer, "  Maximum number of eigenvalues set to be extracted = %" PetscInt_FMT "\n", agmres->max_neig));
541:     } else {
542:       PetscCall(PetscViewerASCIIPrintf(viewer, " STRATEGY OF DEFLATION: AUGMENT\n"));
543:       PetscCall(PetscViewerASCIIPrintf(viewer, " augmented vectors  %" PetscInt_FMT " at frequency %" PetscInt_FMT " with %sRitz vectors\n", agmres->r, agmres->neig, agmres->ritz ? "" : "Harmonic "));
544:     }
545:     PetscCall(PetscViewerASCIIPrintf(viewer, " Minimum relaxation parameter for the adaptive strategy(smv)  = %g\n", (double)agmres->smv));
546:     PetscCall(PetscViewerASCIIPrintf(viewer, " Maximum relaxation parameter for the adaptive strategy(bgv)  = %g\n", (double)agmres->bgv));
547:   } else if (isstring) {
548:     PetscCall(PetscViewerStringSPrintf(viewer, "%s restart %" PetscInt_FMT, cstr, agmres->max_k));
549:   }
550:   PetscFunctionReturn(PETSC_SUCCESS);
551: }

553: static PetscErrorCode KSPSetFromOptions_AGMRES(KSP ksp, PetscOptionItems PetscOptionsObject)
554: {
555:   PetscInt    neig;
556:   KSP_AGMRES *agmres = (KSP_AGMRES *)ksp->data;
557:   PetscBool   flg;

559:   PetscFunctionBegin;
560:   PetscCall(KSPSetFromOptions_DGMRES(ksp, PetscOptionsObject)); /* Set common options from DGMRES and GMRES */
561:   PetscOptionsHeadBegin(PetscOptionsObject, "KSP AGMRES Options");
562:   PetscCall(PetscOptionsInt("-ksp_agmres_eigen", "Number of eigenvalues to deflate", "KSPDGMRESSetEigen", agmres->neig, &neig, &flg));
563:   if (flg) {
564:     PetscCall(KSPDGMRESSetEigen_DGMRES(ksp, neig));
565:     agmres->r = 0;
566:   } else agmres->neig = 0;
567:   PetscCall(PetscOptionsInt("-ksp_agmres_maxeigen", "Maximum number of eigenvalues to deflate", "KSPDGMRESSetMaxEigen", agmres->max_neig, &neig, &flg));
568:   if (flg) agmres->max_neig = neig + EIG_OFFSET;
569:   else agmres->max_neig = agmres->neig + EIG_OFFSET;
570:   PetscCall(PetscOptionsBool("-ksp_agmres_DeflPrecond", "Determine if the deflation should be applied as a preconditioner -- similar to KSP DGMRES", "KSPGMRESDeflPrecond", agmres->DeflPrecond, &agmres->DeflPrecond, NULL));
571:   PetscCall(PetscOptionsBool("-ksp_agmres_ritz", "Compute the Ritz vectors instead of the Harmonic Ritz vectors ", "KSPGMRESHarmonic", agmres->ritz, &agmres->ritz, &flg));
572:   PetscCall(PetscOptionsReal("-ksp_agmres_MinRatio", "Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed", "KSPGMRESSetMinRatio", agmres->smv, &agmres->smv, NULL));
573:   PetscCall(PetscOptionsReal("-ksp_agmres_MaxRatio", "Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed", "KSPGMRESSetMaxRatio", agmres->bgv, &agmres->bgv, &flg));
574:   PetscOptionsHeadEnd();
575:   PetscFunctionReturn(PETSC_SUCCESS);
576: }

578: /*MC
579:   KSPAGMRES - a Newton basis GMRES implementation with adaptive augmented eigenvectors.

581:   Options Database Keys:
582: +   -ksp_gmres_restart restart     - the number of Krylov directions
583: .   -ksp_gmres_krylov_monitor      - plot the Krylov space generated
584: .   -ksp_agmres_eigen neig         - Number of eigenvalues to deflate (Number of vectors to augment)
585: .   -ksp_agmres_maxeigen max_neig  - Maximum number of eigenvalues to deflate
586: .   -ksp_agmres_MinRatio minr      - Relaxation parameter in the adaptive strategy; smallest multiple of the remaining number of steps allowed
587: .   -ksp_agmres_MaxRatio maxr      - Relaxation parameter in the adaptive strategy; Largest multiple of the remaining number of steps allowed
588: .   -ksp_agmres_DeflPrecond        - Apply deflation as a preconditioner, this is similar to `KSPDGMRES` but it rather builds a Newton basis.
589: -   -ksp_dgmres_force (true|false) - Force the deflation at each restart.

591:   Level: intermediate

593:   Notes:
594:   Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not supported

596:   This code can be used as an experimental framework to combine several techniques in the particular case of GMRES.
597:   For instance, the computation of the shifts can be improved with techniques described in {cite}`philippe2012generation`. The orthogonalization technique can be replaced by TSQR {cite}`demmel2012communication`.

599:   The techniques used are best described in {cite}`wakam2011parallelism`. The contribution of this work is that it combines many of the previous work to reduce the amount
600:   of MPI messages and improve the robustness of the global approach by using deflation techniques. Please see {cite}`wakam2011parallelism` for numerical experiments and {cite}`wakam2013memory`
601:   for a description of these problems.
602:   The generation of the basis can be done using s-steps approaches{cite}`mohiyuddin2009minimizing`. See also {cite}`sidje1997alternatives` and {cite}`bai1994newton`.

604:   Developer Note:
605:   This object is subclassed off of `KSPDGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code

607:   Contributed by:
608:   Desire NUENTSA WAKAM, INRIA <desire.nuentsa_wakam@inria.fr> with inputs from Guy Atenekeng <atenekeng@yahoo.com> and R.B. Sidje <roger.b.sidje@ua.edu>

610: .seealso: [](ch_ksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPDGMRES`, `KSPPGMRES`,
611:            `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
612:            `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
613:            `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`, `KSPSetPCSide()`
614:  M*/

616: PETSC_EXTERN PetscErrorCode KSPCreate_AGMRES(KSP ksp)
617: {
618:   KSP_AGMRES *agmres;

620:   PetscFunctionBegin;
621:   PetscCall(PetscNew(&agmres));
622:   ksp->data = (void *)agmres;

624:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
625:   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
626:   ksp->ops->buildsolution                = KSPBuildSolution_AGMRES;
627:   ksp->ops->setup                        = KSPSetUp_AGMRES;
628:   ksp->ops->solve                        = KSPSolve_AGMRES;
629:   ksp->ops->destroy                      = KSPDestroy_AGMRES;
630:   ksp->ops->view                         = KSPView_AGMRES;
631:   ksp->ops->setfromoptions               = KSPSetFromOptions_AGMRES;
632:   ksp->guess_zero                        = PETSC_TRUE;
633:   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
634:   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;

636:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
637:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
638:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
639:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES));
640:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
641:   /* -- New functions defined in DGMRES -- */
642:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C", KSPDGMRESSetEigen_DGMRES));
643:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C", KSPDGMRESComputeSchurForm_DGMRES));
644:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C", KSPDGMRESComputeDeflationData_DGMRES));
645:   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C", KSPDGMRESApplyDeflation_DGMRES));

647:   PetscCall(PetscLogEventRegister("AGMRESCompDefl", KSP_CLASSID, &KSP_AGMRESComputeDeflationData));
648:   PetscCall(PetscLogEventRegister("AGMRESBuildBasis", KSP_CLASSID, &KSP_AGMRESBuildBasis));
649:   PetscCall(PetscLogEventRegister("AGMRESCompShifts", KSP_CLASSID, &KSP_AGMRESComputeShifts));
650:   PetscCall(PetscLogEventRegister("AGMRESOrthog", KSP_CLASSID, &KSP_AGMRESRoddec));

652:   agmres->haptol         = 1.0e-30;
653:   agmres->q_preallocate  = 0;
654:   agmres->delta_allocate = AGMRES_DELTA_DIRECTIONS;
655:   agmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
656:   agmres->nrs            = NULL;
657:   agmres->sol_temp       = NULL;
658:   agmres->max_k          = AGMRES_DEFAULT_MAXK;
659:   agmres->Rsvd           = NULL;
660:   agmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
661:   agmres->orthogwork     = NULL;

663:   /* Default values for the deflation */
664:   agmres->r           = 0;
665:   agmres->neig        = 0;
666:   agmres->max_neig    = 0;
667:   agmres->lambdaN     = 0.0;
668:   agmres->smv         = SMV;
669:   agmres->bgv         = 1;
670:   agmres->force       = PETSC_FALSE;
671:   agmres->matvecs     = 0;
672:   agmres->improve     = PETSC_FALSE;
673:   agmres->HasShifts   = PETSC_FALSE;
674:   agmres->r           = 0;
675:   agmres->HasSchur    = PETSC_FALSE;
676:   agmres->DeflPrecond = PETSC_FALSE;
677:   PetscCall(PetscObjectGetNewTag((PetscObject)ksp, &agmres->tag));
678:   PetscFunctionReturn(PETSC_SUCCESS);
679: }