Actual source code: bfgs.c
1: #include <../src/ksp/ksp/utils/lmvm/symbrdn/symbrdn.h>
2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
3: #include <petsc/private/vecimpl.h>
4: #include <petscdevice.h>
6: /*
7: Limited-memory Broyden-Fletcher-Goldfarb-Shano method for approximating both
8: the forward product and inverse application of a Jacobian.
9: */
11: /*
12: The solution method (approximate inverse Jacobian application) is adapted
13: from Algorithm 7.4 on page 178 of Nocedal and Wright "Numerical Optimization"
14: 2nd edition (https://doi.org/10.1007/978-0-387-40065-5). The initial inverse
15: Jacobian application falls back onto the gamma scaling recommended in equation
16: (7.20) if the user has not provided any estimation of the initial Jacobian or
17: its inverse.
19: work <- F
21: for i = k,k-1,k-2,...,0
22: rho[i] = 1 / (Y[i]^T S[i])
23: alpha[i] = rho[i] * (S[i]^T work)
24: Fwork <- work - (alpha[i] * Y[i])
25: end
27: dX <- J0^{-1} * work
29: for i = 0,1,2,...,k
30: beta = rho[i] * (Y[i]^T dX)
31: dX <- dX + ((alpha[i] - beta) * S[i])
32: end
33: */
34: PetscErrorCode MatSolve_LMVMBFGS(Mat B, Vec F, Vec dX)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
38: PetscInt i;
39: PetscReal *alpha, beta;
40: PetscScalar stf, ytx;
42: PetscFunctionBegin;
43: VecCheckSameSize(F, 2, dX, 3);
44: VecCheckMatCompatible(B, dX, 3, F, 2);
46: /* Copy the function into the work vector for the first loop */
47: PetscCall(VecCopy(F, lbfgs->work));
49: /* Start the first loop */
50: PetscCall(PetscMalloc1(lmvm->k + 1, &alpha));
51: for (i = lmvm->k; i >= 0; --i) {
52: PetscCall(VecDot(lmvm->S[i], lbfgs->work, &stf));
53: alpha[i] = PetscRealPart(stf) / lbfgs->yts[i];
54: PetscCall(VecAXPY(lbfgs->work, -alpha[i], lmvm->Y[i]));
55: }
57: /* Invert the initial Jacobian onto the work vector (or apply scaling) */
58: PetscCall(MatSymBrdnApplyJ0Inv(B, lbfgs->work, dX));
60: /* Start the second loop */
61: for (i = 0; i <= lmvm->k; ++i) {
62: // dot product performed on default blocking stream, last write to lbfgs->work completes before dot product starts
63: PetscCall(VecDot(lmvm->Y[i], dX, &ytx));
64: beta = PetscRealPart(ytx) / lbfgs->yts[i];
65: PetscCall(VecAXPY(dX, alpha[i] - beta, lmvm->S[i]));
66: }
67: PetscCall(PetscFree(alpha));
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: /*
72: The forward product for the approximate Jacobian is the matrix-free
73: implementation of Equation (6.19) in Nocedal and Wright "Numerical
74: Optimization" 2nd Edition, pg 140.
76: This forward product has the same structure as the inverse Jacobian
77: application in the DFP formulation, except with S and Y exchanging
78: roles.
80: Note: P[i] = (B_i)*S[i] terms are computed ahead of time whenever
81: the matrix is updated with a new (S[i], Y[i]) pair. This allows
82: repeated calls of MatMult inside KSP solvers without unnecessarily
83: recomputing P[i] terms in expensive nested-loops.
85: Z <- J0 * X
87: for i = 0,1,2,...,k
88: P[i] <- J0 * S[i]
89: for j = 0,1,2,...,(i-1)
90: gamma = (Y[j]^T S[i]) / (Y[j]^T S[j])
91: zeta = (S[j]^ P[i]) / (S[j]^T P[j])
92: P[i] <- P[i] - (zeta * P[j]) + (gamma * Y[j])
93: end
94: gamma = (Y[i]^T X) / (Y[i]^T S[i])
95: zeta = (S[i]^T Z) / (S[i]^T P[i])
96: Z <- Z - (zeta * P[i]) + (gamma * Y[i])
97: end
98: */
99: PetscErrorCode MatMult_LMVMBFGS(Mat B, Vec X, Vec Z)
100: {
101: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
102: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
103: PetscInt i, j;
104: PetscScalar sjtpi, yjtsi, ytx, stz, stp;
106: PetscFunctionBegin;
107: VecCheckSameSize(X, 2, Z, 3);
108: VecCheckMatCompatible(B, X, 2, Z, 3);
110: if (lbfgs->needP) {
111: /* Pre-compute (P[i] = B_i * S[i]) */
112: for (i = 0; i <= lmvm->k; ++i) {
113: PetscCall(MatSymBrdnApplyJ0Fwd(B, lmvm->S[i], lbfgs->P[i]));
114: /* Compute the necessary dot products */
115: PetscCall(VecMDot(lmvm->S[i], i, lmvm->Y, lbfgs->workscalar));
116: for (j = 0; j < i; ++j) {
117: yjtsi = lbfgs->workscalar[j];
118: PetscCall(VecDot(lmvm->S[j], lbfgs->P[i], &sjtpi));
119: /* Compute the pure BFGS component of the forward product */
120: PetscCall(VecAXPBYPCZ(lbfgs->P[i], -PetscRealPart(sjtpi) / lbfgs->stp[j], PetscRealPart(yjtsi) / lbfgs->yts[j], 1.0, lbfgs->P[j], lmvm->Y[j]));
121: }
122: PetscCall(VecDot(lmvm->S[i], lbfgs->P[i], &stp));
123: lbfgs->stp[i] = PetscRealPart(stp);
124: }
125: lbfgs->needP = PETSC_FALSE;
126: }
128: /* Start the outer loop (i) for the recursive formula */
129: PetscCall(MatSymBrdnApplyJ0Fwd(B, X, Z));
130: /* Get all the dot products we need */
131: PetscCall(VecMDot(X, lmvm->k + 1, lmvm->Y, lbfgs->workscalar));
132: for (i = 0; i <= lmvm->k; ++i) {
133: ytx = lbfgs->workscalar[i];
134: PetscCall(VecDot(lmvm->S[i], Z, &stz));
135: /* Update Z_{i+1} = B_{i+1} * X */
136: PetscCall(VecAXPBYPCZ(Z, -PetscRealPart(stz) / lbfgs->stp[i], PetscRealPart(ytx) / lbfgs->yts[i], 1.0, lbfgs->P[i], lmvm->Y[i]));
137: }
138: PetscFunctionReturn(PETSC_SUCCESS);
139: }
141: static PetscErrorCode MatUpdate_LMVMBFGS(Mat B, Vec X, Vec F)
142: {
143: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
144: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
145: Mat_LMVM *dbase;
146: Mat_DiagBrdn *diagctx;
147: PetscInt old_k, i;
148: PetscReal curvtol, ytytmp;
149: PetscScalar curvature, ststmp;
151: PetscFunctionBegin;
152: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
153: if (lmvm->prev_set) {
154: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
155: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
156: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
158: /* Test if the updates can be accepted */
159: PetscCall(VecDotNorm2(lmvm->Xprev, lmvm->Fprev, &curvature, &ytytmp));
160: if (ytytmp < lmvm->eps) curvtol = 0.0;
161: else curvtol = lmvm->eps * ytytmp;
163: if (PetscRealPart(curvature) > curvtol) {
164: /* Update is good, accept it */
165: lbfgs->watchdog = 0;
166: lbfgs->needP = PETSC_TRUE;
167: old_k = lmvm->k;
168: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
169: /* If we hit the memory limit, shift the yts, yty and sts arrays */
170: if (old_k == lmvm->k) {
171: for (i = 0; i <= lmvm->k - 1; ++i) {
172: lbfgs->yts[i] = lbfgs->yts[i + 1];
173: lbfgs->yty[i] = lbfgs->yty[i + 1];
174: lbfgs->sts[i] = lbfgs->sts[i + 1];
175: }
176: }
177: /* Update history of useful scalars */
178: lbfgs->yts[lmvm->k] = PetscRealPart(curvature);
179: lbfgs->yty[lmvm->k] = ytytmp;
180: /* Compute the scalar scale if necessary */
181: if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
182: PetscCall(VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &ststmp));
183: lbfgs->sts[lmvm->k] = PetscRealPart(ststmp);
184: PetscCall(MatSymBrdnComputeJ0Scalar(B));
185: }
186: } else {
187: /* Update is bad, skip it */
188: ++lmvm->nrejects;
189: ++lbfgs->watchdog;
190: }
191: } else {
192: switch (lbfgs->scale_type) {
193: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
194: dbase = (Mat_LMVM *)lbfgs->D->data;
195: diagctx = (Mat_DiagBrdn *)dbase->ctx;
196: PetscCall(VecSet(diagctx->invD, lbfgs->delta));
197: break;
198: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
199: lbfgs->sigma = lbfgs->delta;
200: break;
201: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
202: lbfgs->sigma = 1.0;
203: break;
204: default:
205: break;
206: }
207: }
209: /* Update the scaling */
210: if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) PetscCall(MatLMVMUpdate(lbfgs->D, X, F));
212: if (lbfgs->watchdog > lbfgs->max_seq_rejects) {
213: PetscCall(MatLMVMReset(B, PETSC_FALSE));
214: if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) PetscCall(MatLMVMReset(lbfgs->D, PETSC_FALSE));
215: }
217: /* Save the solution and function to be used in the next update */
218: PetscCall(VecCopy(X, lmvm->Xprev));
219: PetscCall(VecCopy(F, lmvm->Fprev));
220: lmvm->prev_set = PETSC_TRUE;
221: PetscFunctionReturn(PETSC_SUCCESS);
222: }
224: static PetscErrorCode MatCopy_LMVMBFGS(Mat B, Mat M, MatStructure str)
225: {
226: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
227: Mat_SymBrdn *bctx = (Mat_SymBrdn *)bdata->ctx;
228: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
229: Mat_SymBrdn *mctx = (Mat_SymBrdn *)mdata->ctx;
230: PetscInt i;
232: PetscFunctionBegin;
233: mctx->needP = bctx->needP;
234: for (i = 0; i <= bdata->k; ++i) {
235: mctx->stp[i] = bctx->stp[i];
236: mctx->yts[i] = bctx->yts[i];
237: PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
238: }
239: mctx->scale_type = bctx->scale_type;
240: mctx->alpha = bctx->alpha;
241: mctx->beta = bctx->beta;
242: mctx->rho = bctx->rho;
243: mctx->delta = bctx->delta;
244: mctx->sigma_hist = bctx->sigma_hist;
245: mctx->watchdog = bctx->watchdog;
246: mctx->max_seq_rejects = bctx->max_seq_rejects;
247: switch (bctx->scale_type) {
248: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
249: mctx->sigma = bctx->sigma;
250: break;
251: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
252: PetscCall(MatCopy(bctx->D, mctx->D, SAME_NONZERO_PATTERN));
253: break;
254: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
255: mctx->sigma = 1.0;
256: break;
257: default:
258: break;
259: }
260: PetscFunctionReturn(PETSC_SUCCESS);
261: }
263: static PetscErrorCode MatReset_LMVMBFGS(Mat B, PetscBool destructive)
264: {
265: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
266: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
267: Mat_LMVM *dbase;
268: Mat_DiagBrdn *dctx;
270: PetscFunctionBegin;
271: lbfgs->watchdog = 0;
272: lbfgs->needP = PETSC_TRUE;
273: if (lbfgs->allocated) {
274: if (destructive) {
275: PetscCall(VecDestroy(&lbfgs->work));
276: PetscCall(PetscFree5(lbfgs->stp, lbfgs->yts, lbfgs->yty, lbfgs->sts, lbfgs->workscalar));
277: PetscCall(VecDestroyVecs(lmvm->m, &lbfgs->P));
278: switch (lbfgs->scale_type) {
279: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
280: PetscCall(MatLMVMReset(lbfgs->D, PETSC_TRUE));
281: break;
282: default:
283: break;
284: }
285: lbfgs->allocated = PETSC_FALSE;
286: } else {
287: switch (lbfgs->scale_type) {
288: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
289: lbfgs->sigma = lbfgs->delta;
290: break;
291: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
292: PetscCall(MatLMVMReset(lbfgs->D, PETSC_FALSE));
293: dbase = (Mat_LMVM *)lbfgs->D->data;
294: dctx = (Mat_DiagBrdn *)dbase->ctx;
295: PetscCall(VecSet(dctx->invD, lbfgs->delta));
296: break;
297: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
298: lbfgs->sigma = 1.0;
299: break;
300: default:
301: break;
302: }
303: }
304: }
305: PetscCall(MatReset_LMVM(B, destructive));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: static PetscErrorCode MatAllocate_LMVMBFGS(Mat B, Vec X, Vec F)
310: {
311: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
312: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
314: PetscFunctionBegin;
315: PetscCall(MatAllocate_LMVM(B, X, F));
316: if (!lbfgs->allocated) {
317: PetscCall(VecDuplicate(X, &lbfgs->work));
318: PetscCall(PetscMalloc5(lmvm->m, &lbfgs->stp, lmvm->m, &lbfgs->yts, lmvm->m, &lbfgs->yty, lmvm->m, &lbfgs->sts, lmvm->m, &lbfgs->workscalar));
319: if (lmvm->m > 0) PetscCall(VecDuplicateVecs(X, lmvm->m, &lbfgs->P));
320: switch (lbfgs->scale_type) {
321: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
322: PetscCall(MatLMVMAllocate(lbfgs->D, X, F));
323: break;
324: default:
325: break;
326: }
327: lbfgs->allocated = PETSC_TRUE;
328: }
329: PetscFunctionReturn(PETSC_SUCCESS);
330: }
332: static PetscErrorCode MatDestroy_LMVMBFGS(Mat B)
333: {
334: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
335: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
337: PetscFunctionBegin;
338: if (lbfgs->allocated) {
339: PetscCall(VecDestroy(&lbfgs->work));
340: PetscCall(PetscFree5(lbfgs->stp, lbfgs->yts, lbfgs->yty, lbfgs->sts, lbfgs->workscalar));
341: PetscCall(VecDestroyVecs(lmvm->m, &lbfgs->P));
342: lbfgs->allocated = PETSC_FALSE;
343: }
344: PetscCall(MatDestroy(&lbfgs->D));
345: PetscCall(PetscFree(lmvm->ctx));
346: PetscCall(MatDestroy_LMVM(B));
347: PetscFunctionReturn(PETSC_SUCCESS);
348: }
350: static PetscErrorCode MatSetUp_LMVMBFGS(Mat B)
351: {
352: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
353: Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
354: PetscInt n, N;
356: PetscFunctionBegin;
357: PetscCall(MatSetUp_LMVM(B));
358: lbfgs->max_seq_rejects = lmvm->m / 2;
359: if (!lbfgs->allocated) {
360: PetscCall(VecDuplicate(lmvm->Xprev, &lbfgs->work));
361: PetscCall(PetscMalloc5(lmvm->m, &lbfgs->stp, lmvm->m, &lbfgs->yts, lmvm->m, &lbfgs->yty, lmvm->m, &lbfgs->sts, lmvm->m, &lbfgs->workscalar));
362: if (lmvm->m > 0) PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbfgs->P));
363: switch (lbfgs->scale_type) {
364: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
365: PetscCall(MatGetLocalSize(B, &n, &n));
366: PetscCall(MatGetSize(B, &N, &N));
367: PetscCall(MatSetSizes(lbfgs->D, n, n, N, N));
368: PetscCall(MatSetUp(lbfgs->D));
369: break;
370: default:
371: break;
372: }
373: lbfgs->allocated = PETSC_TRUE;
374: }
375: PetscFunctionReturn(PETSC_SUCCESS);
376: }
378: static PetscErrorCode MatSetFromOptions_LMVMBFGS(Mat B, PetscOptionItems *PetscOptionsObject)
379: {
380: PetscFunctionBegin;
381: PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
382: PetscOptionsHeadBegin(PetscOptionsObject, "L-BFGS method for approximating SPD Jacobian actions (MATLMVMBFGS)");
383: PetscCall(MatSetFromOptions_LMVMSymBrdn_Private(B, PetscOptionsObject));
384: PetscOptionsHeadEnd();
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: PetscErrorCode MatCreate_LMVMBFGS(Mat B)
389: {
390: Mat_LMVM *lmvm;
391: Mat_SymBrdn *lbfgs;
393: PetscFunctionBegin;
394: PetscCall(MatCreate_LMVMSymBrdn(B));
395: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMBFGS));
396: B->ops->setup = MatSetUp_LMVMBFGS;
397: B->ops->destroy = MatDestroy_LMVMBFGS;
398: B->ops->setfromoptions = MatSetFromOptions_LMVMBFGS;
400: lmvm = (Mat_LMVM *)B->data;
401: lmvm->ops->allocate = MatAllocate_LMVMBFGS;
402: lmvm->ops->reset = MatReset_LMVMBFGS;
403: lmvm->ops->update = MatUpdate_LMVMBFGS;
404: lmvm->ops->mult = MatMult_LMVMBFGS;
405: lmvm->ops->solve = MatSolve_LMVMBFGS;
406: lmvm->ops->copy = MatCopy_LMVMBFGS;
408: lbfgs = (Mat_SymBrdn *)lmvm->ctx;
409: lbfgs->needQ = PETSC_FALSE;
410: lbfgs->phi = 0.0;
411: PetscFunctionReturn(PETSC_SUCCESS);
412: }
414: /*@
415: MatCreateLMVMBFGS - Creates a limited-memory Broyden-Fletcher-Goldfarb-Shano (BFGS)
416: matrix used for approximating Jacobians. L-BFGS is symmetric positive-definite by
417: construction, and is commonly used to approximate Hessians in optimization
418: problems.
420: To use the L-BFGS matrix with other vector types, the matrix must be
421: created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
422: This ensures that the internal storage and work vectors are duplicated from the
423: correct type of vector.
425: Collective
427: Input Parameters:
428: + comm - MPI communicator
429: . n - number of local rows for storage vectors
430: - N - global size of the storage vectors
432: Output Parameter:
433: . B - the matrix
435: Options Database Keys:
436: + -mat_lmvm_scale_type - (developer) type of scaling applied to J0 (none, scalar, diagonal)
437: . -mat_lmvm_theta - (developer) convex ratio between BFGS and DFP components of the diagonal J0 scaling
438: . -mat_lmvm_rho - (developer) update limiter for the J0 scaling
439: . -mat_lmvm_alpha - (developer) coefficient factor for the quadratic subproblem in J0 scaling
440: . -mat_lmvm_beta - (developer) exponential factor for the diagonal J0 scaling
441: - -mat_lmvm_sigma_hist - (developer) number of past updates to use in J0 scaling
443: Level: intermediate
445: Note:
446: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
447: paradigm instead of this routine directly.
449: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMBFGS`, `MatCreateLMVMDFP()`, `MatCreateLMVMSR1()`,
450: `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
451: @*/
452: PetscErrorCode MatCreateLMVMBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
453: {
454: PetscFunctionBegin;
455: PetscCall(KSPInitializePackage());
456: PetscCall(MatCreate(comm, B));
457: PetscCall(MatSetSizes(*B, n, n, N, N));
458: PetscCall(MatSetType(*B, MATLMVMBFGS));
459: PetscCall(MatSetUp(*B));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }