Actual source code: bncg.c
1: #include <petsctaolinesearch.h>
2: #include <../src/tao/bound/impls/bncg/bncg.h>
3: #include <petscksp.h>
5: const char *const TaoBNCGTypes[] = {"gd", "pcgd", "hs", "fr", "prp", "prp_plus", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "TAOBNCGType", "TAO_BNCG_", NULL};
7: #define CG_AS_NONE 0
8: #define CG_AS_BERTSEKAS 1
9: #define CG_AS_SIZE 2
11: static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
13: PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
14: {
15: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
17: PetscFunctionBegin;
18: PetscCall(ISDestroy(&cg->inactive_old));
19: if (cg->inactive_idx) {
20: PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old));
21: PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old));
22: }
23: switch (asType) {
24: case CG_AS_NONE:
25: PetscCall(ISDestroy(&cg->inactive_idx));
26: PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx));
27: PetscCall(ISDestroy(&cg->active_idx));
28: PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx));
29: break;
30: case CG_AS_BERTSEKAS:
31: /* Use gradient descent to estimate the active set */
32: PetscCall(VecCopy(cg->unprojected_gradient, cg->W));
33: PetscCall(VecScale(cg->W, -1.0));
34: PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx));
35: break;
36: default:
37: break;
38: }
39: PetscFunctionReturn(PETSC_SUCCESS);
40: }
42: PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
43: {
44: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
46: PetscFunctionBegin;
47: switch (asType) {
48: case CG_AS_NONE:
49: if (cg->active_idx) PetscCall(VecISSet(step, cg->active_idx, 0.0));
50: break;
51: case CG_AS_BERTSEKAS:
52: PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step));
53: break;
54: default:
55: break;
56: }
57: PetscFunctionReturn(PETSC_SUCCESS);
58: }
60: static PetscErrorCode TaoSolve_BNCG(Tao tao)
61: {
62: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
63: PetscReal step = 1.0, gnorm, gnorm2, resnorm;
64: PetscInt nDiff;
66: PetscFunctionBegin;
67: /* Project the current point onto the feasible set */
68: PetscCall(TaoComputeVariableBounds(tao));
69: PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU));
71: /* Project the initial point onto the feasible region */
72: PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution));
74: if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient));
75: PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm));
76: PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
78: /* Estimate the active set and compute the projected gradient */
79: PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
81: /* Project the gradient and calculate the norm */
82: PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
83: if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
84: PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
85: gnorm2 = gnorm * gnorm;
87: /* Initialize counters */
88: tao->niter = 0;
89: cg->ls_fails = cg->descent_error = 0;
90: cg->resets = -1;
91: cg->skipped_updates = cg->pure_gd_steps = 0;
92: cg->iter_quad = 0;
94: /* Convergence test at the starting point. */
95: tao->reason = TAO_CONTINUE_ITERATING;
97: PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
98: PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
99: PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
100: PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
101: PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
102: PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
103: if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
104: /* Calculate initial direction. */
105: if (!tao->recycle) {
106: /* We are not recycling a solution/history from a past TaoSolve */
107: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
108: }
109: /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
110: while (1) {
111: /* Call general purpose update function */
112: if (tao->ops->update) {
113: PetscUseTypeMethod(tao, update, tao->niter, tao->user_update);
114: PetscCall(TaoComputeObjective(tao, tao->solution, &cg->f));
115: }
116: PetscCall(TaoBNCGConductIteration(tao, gnorm));
117: if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
118: }
119: }
121: static PetscErrorCode TaoSetUp_BNCG(Tao tao)
122: {
123: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
125: PetscFunctionBegin;
126: if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
127: if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
128: if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W));
129: if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work));
130: if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk));
131: if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk));
132: if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old));
133: if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old));
134: if (cg->diag_scaling) {
135: PetscCall(VecDuplicate(tao->solution, &cg->d_work));
136: PetscCall(VecDuplicate(tao->solution, &cg->y_work));
137: PetscCall(VecDuplicate(tao->solution, &cg->g_work));
138: }
139: if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient));
140: if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old));
141: PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk));
142: if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc));
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
146: static PetscErrorCode TaoDestroy_BNCG(Tao tao)
147: {
148: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
150: PetscFunctionBegin;
151: if (tao->setupcalled) {
152: PetscCall(VecDestroy(&cg->W));
153: PetscCall(VecDestroy(&cg->work));
154: PetscCall(VecDestroy(&cg->X_old));
155: PetscCall(VecDestroy(&cg->G_old));
156: PetscCall(VecDestroy(&cg->unprojected_gradient));
157: PetscCall(VecDestroy(&cg->unprojected_gradient_old));
158: PetscCall(VecDestroy(&cg->g_work));
159: PetscCall(VecDestroy(&cg->d_work));
160: PetscCall(VecDestroy(&cg->y_work));
161: PetscCall(VecDestroy(&cg->sk));
162: PetscCall(VecDestroy(&cg->yk));
163: }
164: PetscCall(ISDestroy(&cg->active_lower));
165: PetscCall(ISDestroy(&cg->active_upper));
166: PetscCall(ISDestroy(&cg->active_fixed));
167: PetscCall(ISDestroy(&cg->active_idx));
168: PetscCall(ISDestroy(&cg->inactive_idx));
169: PetscCall(ISDestroy(&cg->inactive_old));
170: PetscCall(ISDestroy(&cg->new_inactives));
171: PetscCall(MatDestroy(&cg->B));
172: if (cg->pc) PetscCall(MatDestroy(&cg->pc));
173: PetscCall(PetscFree(tao->data));
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems PetscOptionsObject)
178: {
179: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
181: PetscFunctionBegin;
182: PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization");
183: PetscCall(PetscOptionsEnum("-tao_bncg_type", "CG update formula", "TaoBNCGTypes", TaoBNCGTypes, (PetscEnum)cg->cg_type, (PetscEnum *)&cg->cg_type, NULL));
184: if (cg->cg_type != TAO_BNCG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
185: if (TAO_BNCG_GD == cg->cg_type) {
186: cg->cg_type = TAO_BNCG_PCGD;
187: /* Set scaling equal to none or, at best, scalar scaling. */
188: cg->unscaled_restart = PETSC_TRUE;
189: cg->diag_scaling = PETSC_FALSE;
190: }
191: PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL));
192: PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL));
193: PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL));
194: PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL));
195: PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL));
196: PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL));
197: PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL));
198: PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL));
199: PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL));
200: PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL));
201: PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL));
202: PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL));
203: PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL));
204: PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL));
205: PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL));
206: PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL));
207: PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL));
208: if (cg->no_scaling) {
209: cg->diag_scaling = PETSC_FALSE;
210: cg->alpha = -1.0;
211: }
212: if (cg->alpha == -1.0 && cg->cg_type == TAO_BNCG_KD && !cg->diag_scaling) { /* Some more default options that appear to be good. */
213: cg->neg_xi = PETSC_TRUE;
214: }
215: PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL));
216: PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->as_type], &cg->as_type, NULL));
217: PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL));
218: PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL));
219: PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL));
220: PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL));
222: PetscOptionsHeadEnd();
223: PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix));
224: PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_"));
225: PetscCall(MatSetFromOptions(cg->B));
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
230: {
231: PetscBool isascii;
232: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
234: PetscFunctionBegin;
235: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
236: if (isascii) {
237: PetscCall(PetscViewerASCIIPushTab(viewer));
238: PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", TaoBNCGTypes[cg->cg_type]));
239: PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates));
240: PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets));
241: PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps));
242: PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error));
243: PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails));
244: if (cg->diag_scaling) {
245: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
246: if (isascii) {
247: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
248: PetscCall(MatView(cg->B, viewer));
249: PetscCall(PetscViewerPopFormat(viewer));
250: }
251: }
252: PetscCall(PetscViewerASCIIPopTab(viewer));
253: }
254: PetscFunctionReturn(PETSC_SUCCESS);
255: }
257: PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
258: {
259: PetscReal a, b, c, sig1, sig2;
261: PetscFunctionBegin;
262: *scale = 0.0;
263: if (1.0 == alpha) *scale = yts / yty;
264: else if (0.0 == alpha) *scale = sts / yts;
265: else if (-1.0 == alpha) *scale = 1.0;
266: else {
267: a = yty;
268: b = yts;
269: c = sts;
270: a *= alpha;
271: b *= -(2.0 * alpha - 1.0);
272: c *= alpha - 1.0;
273: sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
274: sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a);
275: /* accept the positive root as the scalar */
276: if (sig1 > 0.0) *scale = sig1;
277: else if (sig2 > 0.0) *scale = sig2;
278: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
279: }
280: PetscFunctionReturn(PETSC_SUCCESS);
281: }
283: /*MC
284: TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
286: Options Database Keys:
287: + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent `TaoSolve()` calls (currently disabled)
288: . -tao_bncg_eta r - restart tolerance
289: . -tao_bncg_type taocg_type - cg formula
290: . -tao_bncg_as_type (none|bertsekas) - active set estimation method
291: . -tao_bncg_as_tol r - tolerance used in Bertsekas active-set estimation
292: . -tao_bncg_as_step r - trial step length used in Bertsekas active-set estimation
293: . -tao_bncg_eps r - cutoff used for determining whether or not we restart based on steplength each iteration,
294: as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
295: . -tao_bncg_theta r - convex combination parameter for the Broyden method
296: . -tao_bncg_hz_eta r - cutoff tolerance for the beta term in the `hz`, `dk` methods
297: . -tao_bncg_dk_eta r - cutoff tolerance for the beta term in the `hz`, `dk` methods
298: . -tao_bncg_xi r - Multiplicative constant of the gamma term in the `kd` method
299: . -tao_bncg_hz_theta r - Multiplicative constant of the theta term for the `hz` method
300: . -tao_bncg_bfgs_scale r - Scaling parameter of the BFGS contribution to the scalar Broyden method
301: . -tao_bncg_dfp_scale r - Scaling parameter of the dfp contribution to the scalar Broyden method
302: . -tao_bncg_diag_scaling b - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
303: . -tao_bncg_dynamic_restart b - use dynamic restart strategy in the `hz`, `dk`, `kd` methods
304: . -tao_bncg_unscaled_restart b - whether or not to scale the gradient when doing gradient descent restarts
305: . -tao_bncg_zeta r - Scaling parameter in the `kd` method
306: . -tao_bncg_delta_min r - Minimum bound for rescaling during restarted gradient descent steps
307: . -tao_bncg_delta_max r - Maximum bound for rescaling during restarted gradient descent steps
308: . -tao_bncg_min_quad i - Number of quadratic-like steps in a row necessary to do a dynamic restart
309: . -tao_bncg_min_restart_num i - This number, x, makes sure there is a gradient descent step every $x*n$ iterations, where `n` is the dimension of the problem
310: . -tao_bncg_spaced_restart (true|false) - whether or not to do gradient descent steps every x*n iterations
311: . -tao_bncg_no_scaling b - If true, eliminates all scaling, including defaults.
312: - -tao_bncg_neg_xi b - Whether or not to use negative xi in the `kd` method under certain conditions
314: Note:
315: CG formulas are:
316: + `gd` - Gradient Descent
317: . `fr` - Fletcher-Reeves
318: . `pr` - Polak-Ribiere-Polyak
319: . `prp` - Polak-Ribiere-Plus
320: . `hs` - Hestenes-Steifel
321: . `dy` - Dai-Yuan
322: . `ssml_bfgs` - Self-Scaling Memoryless BFGS
323: . `ssml_dfp` - Self-Scaling Memoryless DFP
324: . `ssml_brdn` - Self-Scaling Memoryless Broyden
325: . `hz` - Hager-Zhang (CG_DESCENT 5.3)
326: . `dk` - Dai-Kou (2013)
327: - `kd` - Kou-Dai (2015)
329: Level: beginner
331: The various algorithmic factors can only be supplied via the options database
333: .seealso: `Tao`, `TAONTR`, `TAONTL`, `TAONM`, `TAOCG`, `TaoType`, `TaoCreate()`
334: M*/
336: PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
337: {
338: TAO_BNCG *cg;
339: const char *morethuente_type = TAOLINESEARCHMT;
341: PetscFunctionBegin;
342: tao->ops->setup = TaoSetUp_BNCG;
343: tao->ops->solve = TaoSolve_BNCG;
344: tao->ops->view = TaoView_BNCG;
345: tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
346: tao->ops->destroy = TaoDestroy_BNCG;
347: tao->uses_gradient = PETSC_TRUE;
349: /* Override default settings (unless already changed) */
350: PetscCall(TaoParametersInitialize(tao));
351: PetscObjectParameterSetDefault(tao, max_it, 2000);
352: PetscObjectParameterSetDefault(tao, max_funcs, 4000);
354: /* Note: nondefault values should be used for nonlinear conjugate gradient */
355: /* method. In particular, gtol should be less than 0.5; the value used in */
356: /* Nocedal and Wright is 0.10. We use the default values for the */
357: /* linesearch because it seems to work better. */
358: PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch));
359: PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1));
360: PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type));
361: PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao));
363: PetscCall(PetscNew(&cg));
364: tao->data = (void *)cg;
365: PetscCall(KSPInitializePackage());
366: PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B));
367: PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1));
368: PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN));
370: cg->pc = NULL;
372: cg->dk_eta = 0.5;
373: cg->hz_eta = 0.4;
374: cg->dynamic_restart = PETSC_FALSE;
375: cg->unscaled_restart = PETSC_FALSE;
376: cg->no_scaling = PETSC_FALSE;
377: cg->delta_min = 1e-7;
378: cg->delta_max = 100;
379: cg->theta = 1.0;
380: cg->hz_theta = 1.0;
381: cg->dfp_scale = 1.0;
382: cg->bfgs_scale = 1.0;
383: cg->zeta = 0.1;
384: cg->min_quad = 6;
385: cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
386: cg->xi = 1.0;
387: cg->neg_xi = PETSC_TRUE;
388: cg->spaced_restart = PETSC_FALSE;
389: cg->tol_quad = 1e-8;
390: cg->as_step = 0.001;
391: cg->as_tol = 0.001;
392: cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/
393: cg->as_type = CG_AS_BERTSEKAS;
394: cg->cg_type = TAO_BNCG_SSML_BFGS;
395: cg->alpha = 1.0;
396: cg->diag_scaling = PETSC_TRUE;
397: PetscFunctionReturn(PETSC_SUCCESS);
398: }
400: PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
401: {
402: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
403: PetscReal scaling;
405: PetscFunctionBegin;
406: ++cg->resets;
407: scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
408: scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
409: if (cg->unscaled_restart) {
410: scaling = 1.0;
411: ++cg->pure_gd_steps;
412: }
413: PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient));
414: /* Also want to reset our diagonal scaling with each restart */
415: if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE));
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
420: {
421: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
422: PetscReal quadinterp;
424: PetscFunctionBegin;
425: if (cg->f < cg->min_quad / 10) {
426: *dynrestart = PETSC_FALSE;
427: PetscFunctionReturn(PETSC_SUCCESS);
428: } /* just skip this since this strategy doesn't work well for functions near zero */
429: quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old));
430: if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
431: else {
432: cg->iter_quad = 0;
433: *dynrestart = PETSC_FALSE;
434: }
435: if (cg->iter_quad >= cg->min_quad) {
436: cg->iter_quad = 0;
437: *dynrestart = PETSC_TRUE;
438: }
439: PetscFunctionReturn(PETSC_SUCCESS);
440: }
442: PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
443: {
444: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
445: PetscReal gamma = 1.0, tau_k, beta;
446: PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd;
447: PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
448: PetscInt dim;
449: PetscBool cg_restart = PETSC_FALSE;
451: PetscFunctionBegin;
452: /* Local curvature check to see if we need to restart */
453: if (tao->niter >= 1 || tao->recycle) {
454: PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
455: PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
456: ynorm2 = ynorm * ynorm;
457: PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
458: if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) {
459: cg_restart = PETSC_TRUE;
460: ++cg->skipped_updates;
461: }
462: if (cg->spaced_restart) {
463: PetscCall(VecGetSize(tao->gradient, &dim));
464: if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE;
465: }
466: }
467: /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
468: if (cg->spaced_restart) {
469: PetscCall(VecGetSize(tao->gradient, &dim));
470: if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE;
471: }
472: /* Compute the diagonal scaling vector if applicable */
473: if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient));
475: /* A note on diagonal scaling (to be added to paper):
476: For the FR, PR, PRP, and DY methods, the diagonally scaled versions
477: must be derived as a preconditioned CG method rather than as
478: a Hessian initialization like in the Broyden methods. */
480: /* In that case, one writes the objective function as
481: f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
482: Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
483: HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
484: same under preconditioning. Note that A is diagonal, such that A^T = A. */
486: /* This yields questions like what the dot product d_k^T y_k
487: should look like. HZ mistakenly treats that as the same under
488: preconditioning, but that is not necessarily true. */
490: /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
491: we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
492: yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
493: NOT the same if our matrix used to construct the preconditioner is updated between iterations.
494: This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
496: /* Compute CG step direction */
497: if (cg_restart) {
498: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
499: } else if (pcgd_fallback) {
500: /* Just like preconditioned CG */
501: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
502: PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
503: } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
504: switch (cg->cg_type) {
505: case TAO_BNCG_PCGD:
506: if (!cg->diag_scaling) {
507: if (!cg->no_scaling) {
508: cg->sts = step * step * dnorm * dnorm;
509: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
510: } else {
511: tau_k = 1.0;
512: ++cg->pure_gd_steps;
513: }
514: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient));
515: } else {
516: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
517: PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
518: }
519: break;
521: case TAO_BNCG_HS:
522: /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
523: if (!cg->diag_scaling) {
524: cg->sts = step * step * dnorm * dnorm;
525: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
526: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha));
527: beta = tau_k * gkp1_yk / dk_yk;
528: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
529: } else {
530: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
531: PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
532: beta = gkp1_yk / dk_yk;
533: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
534: }
535: break;
537: case TAO_BNCG_FR:
538: PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
539: PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
540: PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
541: ynorm2 = ynorm * ynorm;
542: PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk));
543: if (!cg->diag_scaling) {
544: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha));
545: beta = tau_k * gnorm2 / gnorm2_old;
546: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
547: } else {
548: PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */
549: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
550: PetscCall(VecDot(tao->gradient, cg->g_work, &tmp));
551: beta = tmp / gnorm2_old;
552: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
553: }
554: break;
556: case TAO_BNCG_PRP:
557: snorm = step * dnorm;
558: if (!cg->diag_scaling) {
559: PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
560: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
561: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
562: beta = tau_k * gkp1_yk / gnorm2_old;
563: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
564: } else {
565: PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old));
566: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
567: PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
568: beta = gkp1_yk / gnorm2_old;
569: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
570: }
571: break;
573: case TAO_BNCG_PRP_PLUS:
574: PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
575: PetscCall(VecNorm(cg->yk, NORM_2, &ynorm));
576: ynorm2 = ynorm * ynorm;
577: if (!cg->diag_scaling) {
578: PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
579: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
580: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha));
581: beta = tau_k * gkp1_yk / gnorm2_old;
582: beta = PetscMax(beta, 0.0);
583: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
584: } else {
585: PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */
586: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
587: PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
588: beta = gkp1_yk / gnorm2_old;
589: beta = PetscMax(beta, 0.0);
590: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
591: }
592: break;
594: case TAO_BNCG_DY:
595: /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
596: SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
597: if (!cg->diag_scaling) {
598: PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd));
599: PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
600: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha));
601: beta = tau_k * gnorm2 / (gd - gd_old);
602: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
603: } else {
604: PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work));
605: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
606: PetscCall(VecDot(cg->g_work, tao->gradient, >Dg));
607: PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old));
608: PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk));
609: dk_yk = dk_yk - gd_old;
610: beta = gtDg / dk_yk;
611: PetscCall(VecScale(cg->d_work, beta));
612: PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work));
613: }
614: break;
616: case TAO_BNCG_HZ:
617: /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
618: ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
619: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
620: PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
621: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
622: snorm = dnorm * step;
623: cg->yts = step * dk_yk;
624: if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
625: if (cg->dynamic_restart) {
626: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
627: } else {
628: if (!cg->diag_scaling) {
629: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
630: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
631: /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
632: tmp = gd / dk_yk;
633: beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk));
634: /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
635: beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
636: /* d <- -t*g + beta*t*d */
637: PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
638: } else {
639: /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
640: cg->yty = ynorm2;
641: cg->sts = snorm * snorm;
642: /* Apply the diagonal scaling to all my vectors */
643: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
644: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
645: PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
646: /* Construct the constant ytDgkp1 */
647: PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
648: /* Construct the constant for scaling Dkyk in the update */
649: tmp = gd / dk_yk;
650: PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
651: tau_k = -tau_k * gd / (dk_yk * dk_yk);
652: /* beta is the constant which adds the dk contribution */
653: beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */
654: /* From HZ2013, modified to account for diagonal scaling*/
655: PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
656: PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
657: beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
658: /* Do the update */
659: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
660: }
661: }
662: break;
664: case TAO_BNCG_DK:
665: /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
666: SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
667: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
668: PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
669: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
670: snorm = step * dnorm;
671: cg->yts = dk_yk * step;
672: if (!cg->diag_scaling) {
673: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
674: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
675: /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
676: tmp = gd / dk_yk;
677: beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk;
678: beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm));
679: /* d <- -t*g + beta*t*d */
680: PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk));
681: } else {
682: /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
683: cg->yty = ynorm2;
684: cg->sts = snorm * snorm;
685: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
686: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
687: PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work));
688: /* Construct the constant ytDgkp1 */
689: PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk));
690: PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
691: tau_k = tau_k * gd / (dk_yk * dk_yk);
692: tmp = gd / dk_yk;
693: /* beta is the constant which adds the dk contribution */
694: beta = gkp1_yk / dk_yk - step * tmp - tau_k;
695: /* Update this for the last term in beta */
696: PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk));
697: beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */
698: PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd));
699: PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old));
700: beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm));
701: /* Do the update */
702: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
703: }
704: break;
706: case TAO_BNCG_KD:
707: /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
708: Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
709: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
710: PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old));
711: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
712: snorm = step * dnorm;
713: cg->yts = dk_yk * step;
714: if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
715: if (cg->dynamic_restart) {
716: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
717: } else {
718: if (!cg->diag_scaling) {
719: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
720: PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha));
721: beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
722: if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */
723: {
724: beta = cg->zeta * tau_k * gd / (dnorm * dnorm);
725: gamma = 0.0;
726: } else {
727: if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk;
728: /* This seems to be very effective when there's no tau_k scaling.
729: This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
730: else gamma = cg->xi * gd / dk_yk;
731: }
732: /* d <- -t*g + beta*t*d + t*tmp*yk */
733: PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk));
734: } else {
735: /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
736: cg->yty = ynorm2;
737: cg->sts = snorm * snorm;
738: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
739: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
740: /* Construct the constant ytDgkp1 */
741: PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk));
742: /* Construct the constant for scaling Dkyk in the update */
743: gamma = gd / dk_yk;
744: /* tau_k = -ytDy/(ytd)^2 * gd */
745: PetscCall(VecDot(cg->yk, cg->y_work, &tau_k));
746: tau_k = tau_k * gd / (dk_yk * dk_yk);
747: /* beta is the constant which adds the d_k contribution */
748: beta = gkp1D_yk / dk_yk - step * gamma - tau_k;
749: /* Here is the requisite check */
750: PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp));
751: if (cg->neg_xi) {
752: /* modified KD implementation */
753: if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk;
754: else gamma = cg->xi * gd / dk_yk;
755: if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
756: beta = cg->zeta * tmp / (dnorm * dnorm);
757: gamma = 0.0;
758: }
759: } else { /* original KD 2015 implementation */
760: if (beta < cg->zeta * tmp / (dnorm * dnorm)) {
761: beta = cg->zeta * tmp / (dnorm * dnorm);
762: gamma = 0.0;
763: } else gamma = cg->xi * gd / dk_yk;
764: }
765: /* Do the update in two steps */
766: PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
767: PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work));
768: }
769: }
770: break;
772: case TAO_BNCG_SSML_BFGS:
773: /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
774: Discussion Papers 269 (1977). */
775: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
776: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
777: snorm = step * dnorm;
778: cg->yts = dk_yk * step;
779: cg->yty = ynorm2;
780: cg->sts = snorm * snorm;
781: if (!cg->diag_scaling) {
782: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
783: PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
784: tmp = gd / dk_yk;
785: beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp;
786: /* d <- -t*g + beta*t*d + t*tmp*yk */
787: PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk));
788: } else {
789: /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
790: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
791: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
792: /* compute scalar gamma */
793: PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
794: PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
795: gamma = gd / dk_yk;
796: /* Compute scalar beta */
797: beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
798: /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
799: PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
800: }
801: break;
803: case TAO_BNCG_SSML_DFP:
804: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
805: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
806: snorm = step * dnorm;
807: cg->yts = dk_yk * step;
808: cg->yty = ynorm2;
809: cg->sts = snorm * snorm;
810: if (!cg->diag_scaling) {
811: /* Instead of a regular convex combination, we will solve a quadratic formula. */
812: PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
813: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
814: tau_k = cg->dfp_scale * tau_k;
815: tmp = tau_k * gkp1_yk / cg->yty;
816: beta = -step * gd / dk_yk;
817: /* d <- -t*g + beta*d + tmp*yk */
818: PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
819: } else {
820: /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
821: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
822: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
823: /* compute scalar gamma */
824: PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
825: PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
826: gamma = (gkp1_yk / tmp);
827: /* Compute scalar beta */
828: beta = -step * gd / dk_yk;
829: /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
830: PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
831: }
832: break;
834: case TAO_BNCG_SSML_BRDN:
835: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
836: PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
837: snorm = step * dnorm;
838: cg->yts = step * dk_yk;
839: cg->yty = ynorm2;
840: cg->sts = snorm * snorm;
841: if (!cg->diag_scaling) {
842: /* Instead of a regular convex combination, we will solve a quadratic formula. */
843: PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale));
844: PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale));
845: PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk));
846: tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp;
847: /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
848: it should reproduce the tau_dfp scaling. Same with dfp_scale. */
849: tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty;
850: beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk;
851: /* d <- -t*g + beta*d + tmp*yk */
852: PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
853: } else {
854: /* We have diagonal scaling enabled */
855: PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work));
856: PetscCall(MatSolve(cg->B, cg->yk, cg->y_work));
857: /* compute scalar gamma */
858: PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk));
859: PetscCall(VecDot(cg->y_work, cg->yk, &tmp));
860: gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp);
861: /* Compute scalar beta */
862: beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk;
863: /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
864: PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
865: }
866: break;
868: default:
869: break;
870: }
871: }
872: PetscFunctionReturn(PETSC_SUCCESS);
873: }
875: PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
876: {
877: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
878: TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
879: PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0;
880: PetscReal gnorm2_old, f_old, resnorm, gnorm_old;
881: PetscBool pcgd_fallback = PETSC_FALSE;
883: PetscFunctionBegin;
884: /* We are now going to perform a line search along the direction. */
885: /* Store solution and gradient info before it changes */
886: PetscCall(VecCopy(tao->solution, cg->X_old));
887: PetscCall(VecCopy(tao->gradient, cg->G_old));
888: PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old));
890: gnorm_old = gnorm;
891: gnorm2_old = gnorm_old * gnorm_old;
892: f_old = cg->f;
893: /* Perform bounded line search. If we are recycling a solution from a previous */
894: /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
895: if (!(tao->recycle && 0 == tao->niter)) {
896: /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
897: PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
898: PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
899: PetscCall(TaoAddLineSearchCounts(tao));
901: /* Check linesearch failure */
902: if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
903: ++cg->ls_fails;
904: if (cg->cg_type == TAO_BNCG_GD) {
905: /* Nothing left to do but fail out of the optimization */
906: step = 0.0;
907: tao->reason = TAO_DIVERGED_LS_FAILURE;
908: } else {
909: /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
910: PetscCall(VecCopy(cg->X_old, tao->solution));
911: PetscCall(VecCopy(cg->G_old, tao->gradient));
912: PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient));
913: gnorm = gnorm_old;
914: gnorm2 = gnorm2_old;
915: cg->f = f_old;
917: /* Fall back on preconditioned CG (so long as you're not already using it) */
918: if (cg->cg_type != TAO_BNCG_PCGD && cg->diag_scaling) {
919: pcgd_fallback = PETSC_TRUE;
920: PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
922: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
923: PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
925: PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
926: PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
927: PetscCall(TaoAddLineSearchCounts(tao));
929: pcgd_fallback = PETSC_FALSE;
930: if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
931: /* Going to perform a regular gradient descent step. */
932: ++cg->ls_fails;
933: step = 0.0;
934: }
935: }
936: /* Fall back on the scaled gradient step */
937: if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
938: ++cg->ls_fails;
939: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
940: PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
941: PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
942: PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
943: PetscCall(TaoAddLineSearchCounts(tao));
944: }
946: if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
947: /* Nothing left to do but fail out of the optimization */
948: ++cg->ls_fails;
949: step = 0.0;
950: tao->reason = TAO_DIVERGED_LS_FAILURE;
951: } else {
952: /* One of the fallbacks worked. Set them both back equal to false. */
953: pcgd_fallback = PETSC_FALSE;
954: }
955: }
956: }
957: /* Convergence test for line search failure */
958: if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
960: /* Standard convergence test */
961: PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
962: PetscCall(VecNorm(cg->W, NORM_2, &resnorm));
963: PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
964: PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
965: PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
966: PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
967: if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS);
968: }
969: /* Assert we have an updated step and we need at least one more iteration. */
970: /* Calculate the next direction */
971: /* Estimate the active set at the new solution */
972: PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type));
973: /* Compute the projected gradient and its norm */
974: PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient));
975: if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0));
976: PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
977: gnorm2 = gnorm * gnorm;
979: /* Calculate some quantities used in the StepDirectionUpdate. */
980: PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
981: /* Update the step direction. */
982: PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
983: ++tao->niter;
984: PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
986: if (cg->cg_type != TAO_BNCG_GD) {
987: /* Figure out which previously active variables became inactive this iteration */
988: PetscCall(ISDestroy(&cg->new_inactives));
989: if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives));
990: /* Selectively reset the CG step those freshly inactive variables */
991: if (cg->new_inactives) {
992: PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
993: PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
994: PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step));
995: PetscCall(VecScale(cg->inactive_step, -1.0));
996: PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
997: PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
998: }
999: /* Verify that this is a descent direction */
1000: PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd));
1001: PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm));
1002: if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) {
1003: /* Not a descent direction, so we reset back to projected gradient descent */
1004: PetscCall(TaoBNCGResetUpdate(tao, gnorm2));
1005: PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
1006: ++cg->descent_error;
1007: } else {
1008: }
1009: }
1010: PetscFunctionReturn(PETSC_SUCCESS);
1011: }
1013: PETSC_INTERN PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1014: {
1015: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1016: PetscBool same;
1018: PetscFunctionBegin;
1019: PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1020: if (same) {
1021: PetscCall(PetscObjectReference((PetscObject)H0));
1022: cg->pc = H0;
1023: }
1024: PetscFunctionReturn(PETSC_SUCCESS);
1025: }
1027: /*@
1028: TaoBNCGGetType - Return the type for the `TAOBNCG` solver
1030: Input Parameter:
1031: . tao - the `Tao` solver context
1033: Output Parameter:
1034: . type - `TAOBNCG` type
1036: Level: advanced
1038: .seealso: `Tao`, `TAOBNCG`, `TaoBNCGSetType()`, `TaoBNCGType`
1039: @*/
1040: PetscErrorCode TaoBNCGGetType(Tao tao, TaoBNCGType *type)
1041: {
1042: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1043: PetscBool same;
1045: PetscFunctionBegin;
1046: PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1047: PetscCheck(same, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_INCOMP, "TAO solver is not BNCG type");
1048: *type = cg->cg_type;
1049: PetscFunctionReturn(PETSC_SUCCESS);
1050: }
1052: /*@
1053: TaoBNCGSetType - Set the type for the `TAOBNCG` solver
1055: Input Parameters:
1056: + tao - the `Tao` solver context
1057: - type - `TAOBNCG` type
1059: Level: advanced
1061: .seealso: `Tao`, `TAOBNCG`, `TaoBNCGGetType()`, `TaoBNCGType`
1062: @*/
1063: PetscErrorCode TaoBNCGSetType(Tao tao, TaoBNCGType type)
1064: {
1065: TAO_BNCG *cg = (TAO_BNCG *)tao->data;
1066: PetscBool same;
1068: PetscFunctionBegin;
1069: PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same));
1070: if (same) cg->cg_type = type;
1071: PetscFunctionReturn(PETSC_SUCCESS);
1072: }