Actual source code: bfgs.c
  1: #include <../src/ksp/ksp/utils/lmvm/symbrdn/symbrdn.h>
  2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
  3: #include <petsc/private/vecimpl.h>
  4: #include <petscdevice.h>

  6: /*
  7:   Limited-memory Broyden-Fletcher-Goldfarb-Shano method for approximating both
  8:   the forward product and inverse application of a Jacobian.
  9: */

 11: /*
 12:   The solution method (approximate inverse Jacobian application) is adapted
 13:    from Algorithm 7.4 on page 178 of Nocedal and Wright "Numerical Optimization"
 14:    2nd edition (https://doi.org/10.1007/978-0-387-40065-5). The initial inverse
 15:    Jacobian application falls back onto the gamma scaling recommended in equation
 16:    (7.20) if the user has not provided any estimation of the initial Jacobian or
 17:    its inverse.

 19:    work <- F

 21:    for i = k,k-1,k-2,...,0
 22:      rho[i] = 1 / (Y[i]^T S[i])
 23:      alpha[i] = rho[i] * (S[i]^T work)
 24:      Fwork <- work - (alpha[i] * Y[i])
 25:    end

 27:    dX <- J0^{-1} * work

 29:    for i = 0,1,2,...,k
 30:      beta = rho[i] * (Y[i]^T dX)
 31:      dX <- dX + ((alpha[i] - beta) * S[i])
 32:    end
 33: */
 34: PetscErrorCode MatSolve_LMVMBFGS(Mat B, Vec F, Vec dX)
 35: {
 36:   Mat_LMVM    *lmvm  = (Mat_LMVM *)B->data;
 37:   Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
 38:   PetscInt     i;
 39:   PetscReal   *alpha, beta;
 40:   PetscScalar  stf, ytx;

 42:   PetscFunctionBegin;
 43:   VecCheckSameSize(F, 2, dX, 3);
 44:   VecCheckMatCompatible(B, dX, 3, F, 2);

 46:   /* Copy the function into the work vector for the first loop */
 47:   PetscCall(VecCopy(F, lbfgs->work));

 49:   /* Start the first loop */
 50:   PetscCall(PetscMalloc1(lmvm->k + 1, &alpha));
 51:   for (i = lmvm->k; i >= 0; --i) {
 52:     PetscCall(VecDot(lmvm->S[i], lbfgs->work, &stf));
 53:     alpha[i] = PetscRealPart(stf) / lbfgs->yts[i];
 54:     PetscCall(VecAXPY(lbfgs->work, -alpha[i], lmvm->Y[i]));
 55:   }

 57:   /* Invert the initial Jacobian onto the work vector (or apply scaling) */
 58:   PetscCall(MatSymBrdnApplyJ0Inv(B, lbfgs->work, dX));

 60:   /* Start the second loop */
 61:   for (i = 0; i <= lmvm->k; ++i) {
 62:     // dot product performed on default blocking stream, last write to lbfgs->work completes before dot product starts
 63:     PetscCall(VecDot(lmvm->Y[i], dX, &ytx));
 64:     beta = PetscRealPart(ytx) / lbfgs->yts[i];
 65:     PetscCall(VecAXPY(dX, alpha[i] - beta, lmvm->S[i]));
 66:   }
 67:   PetscCall(PetscFree(alpha));
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71: /*
 72:   The forward product for the approximate Jacobian is the matrix-free
 73:   implementation of Equation (6.19) in Nocedal and Wright "Numerical
 74:   Optimization" 2nd Edition, pg 140.

 76:   This forward product has the same structure as the inverse Jacobian
 77:   application in the DFP formulation, except with S and Y exchanging
 78:   roles.

 80:   Note: P[i] = (B_i)*S[i] terms are computed ahead of time whenever
 81:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 82:   repeated calls of MatMult inside KSP solvers without unnecessarily
 83:   recomputing P[i] terms in expensive nested-loops.

 85:   Z <- J0 * X

 87:   for i = 0,1,2,...,k
 88:     P[i] <- J0 * S[i]
 89:     for j = 0,1,2,...,(i-1)
 90:       gamma = (Y[j]^T S[i]) / (Y[j]^T S[j])
 91:       zeta = (S[j]^ P[i]) / (S[j]^T P[j])
 92:       P[i] <- P[i] - (zeta * P[j]) + (gamma * Y[j])
 93:     end
 94:     gamma = (Y[i]^T X) / (Y[i]^T S[i])
 95:     zeta = (S[i]^T Z) / (S[i]^T P[i])
 96:     Z <- Z - (zeta * P[i]) + (gamma * Y[i])
 97:   end
 98: */
 99: PetscErrorCode MatMult_LMVMBFGS(Mat B, Vec X, Vec Z)
100: {
101:   Mat_LMVM    *lmvm  = (Mat_LMVM *)B->data;
102:   Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
103:   PetscInt     i, j;
104:   PetscScalar  sjtpi, yjtsi, ytx, stz, stp;

106:   PetscFunctionBegin;
107:   VecCheckSameSize(X, 2, Z, 3);
108:   VecCheckMatCompatible(B, X, 2, Z, 3);

110:   if (lbfgs->needP) {
111:     /* Pre-compute (P[i] = B_i * S[i]) */
112:     for (i = 0; i <= lmvm->k; ++i) {
113:       PetscCall(MatSymBrdnApplyJ0Fwd(B, lmvm->S[i], lbfgs->P[i]));
114:       /* Compute the necessary dot products */
115:       PetscCall(VecMDot(lmvm->S[i], i, lmvm->Y, lbfgs->workscalar));
116:       for (j = 0; j < i; ++j) {
117:         yjtsi = lbfgs->workscalar[j];
118:         PetscCall(VecDot(lmvm->S[j], lbfgs->P[i], &sjtpi));
119:         /* Compute the pure BFGS component of the forward product */
120:         PetscCall(VecAXPBYPCZ(lbfgs->P[i], -PetscRealPart(sjtpi) / lbfgs->stp[j], PetscRealPart(yjtsi) / lbfgs->yts[j], 1.0, lbfgs->P[j], lmvm->Y[j]));
121:       }
122:       PetscCall(VecDot(lmvm->S[i], lbfgs->P[i], &stp));
123:       lbfgs->stp[i] = PetscRealPart(stp);
124:     }
125:     lbfgs->needP = PETSC_FALSE;
126:   }

128:   /* Start the outer loop (i) for the recursive formula */
129:   PetscCall(MatSymBrdnApplyJ0Fwd(B, X, Z));
130:   /* Get all the dot products we need */
131:   PetscCall(VecMDot(X, lmvm->k + 1, lmvm->Y, lbfgs->workscalar));
132:   for (i = 0; i <= lmvm->k; ++i) {
133:     ytx = lbfgs->workscalar[i];
134:     PetscCall(VecDot(lmvm->S[i], Z, &stz));
135:     /* Update Z_{i+1} = B_{i+1} * X */
136:     PetscCall(VecAXPBYPCZ(Z, -PetscRealPart(stz) / lbfgs->stp[i], PetscRealPart(ytx) / lbfgs->yts[i], 1.0, lbfgs->P[i], lmvm->Y[i]));
137:   }
138:   PetscFunctionReturn(PETSC_SUCCESS);
139: }

141: static PetscErrorCode MatUpdate_LMVMBFGS(Mat B, Vec X, Vec F)
142: {
143:   Mat_LMVM     *lmvm  = (Mat_LMVM *)B->data;
144:   Mat_SymBrdn  *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
145:   Mat_LMVM     *dbase;
146:   Mat_DiagBrdn *diagctx;
147:   PetscInt      old_k, i;
148:   PetscReal     curvtol, ytytmp;
149:   PetscScalar   curvature, ststmp;

151:   PetscFunctionBegin;
152:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
153:   if (lmvm->prev_set) {
154:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
155:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
156:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));

158:     /* Test if the updates can be accepted */
159:     PetscCall(VecDotNorm2(lmvm->Xprev, lmvm->Fprev, &curvature, &ytytmp));
160:     if (ytytmp < lmvm->eps) curvtol = 0.0;
161:     else curvtol = lmvm->eps * ytytmp;

163:     if (PetscRealPart(curvature) > curvtol) {
164:       /* Update is good, accept it */
165:       lbfgs->watchdog = 0;
166:       lbfgs->needP    = PETSC_TRUE;
167:       old_k           = lmvm->k;
168:       PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
169:       /* If we hit the memory limit, shift the yts, yty and sts arrays */
170:       if (old_k == lmvm->k) {
171:         for (i = 0; i <= lmvm->k - 1; ++i) {
172:           lbfgs->yts[i] = lbfgs->yts[i + 1];
173:           lbfgs->yty[i] = lbfgs->yty[i + 1];
174:           lbfgs->sts[i] = lbfgs->sts[i + 1];
175:         }
176:       }
177:       /* Update history of useful scalars */
178:       lbfgs->yts[lmvm->k] = PetscRealPart(curvature);
179:       lbfgs->yty[lmvm->k] = ytytmp;
180:       /* Compute the scalar scale if necessary */
181:       if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
182:         PetscCall(VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &ststmp));
183:         lbfgs->sts[lmvm->k] = PetscRealPart(ststmp);
184:         PetscCall(MatSymBrdnComputeJ0Scalar(B));
185:       }
186:     } else {
187:       /* Update is bad, skip it */
188:       ++lmvm->nrejects;
189:       ++lbfgs->watchdog;
190:     }
191:   } else {
192:     switch (lbfgs->scale_type) {
193:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
194:       dbase   = (Mat_LMVM *)lbfgs->D->data;
195:       diagctx = (Mat_DiagBrdn *)dbase->ctx;
196:       PetscCall(VecSet(diagctx->invD, lbfgs->delta));
197:       break;
198:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
199:       lbfgs->sigma = lbfgs->delta;
200:       break;
201:     case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
202:       lbfgs->sigma = 1.0;
203:       break;
204:     default:
205:       break;
206:     }
207:   }

209:   /* Update the scaling */
210:   if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) PetscCall(MatLMVMUpdate(lbfgs->D, X, F));

212:   if (lbfgs->watchdog > lbfgs->max_seq_rejects) {
213:     PetscCall(MatLMVMReset(B, PETSC_FALSE));
214:     if (lbfgs->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) PetscCall(MatLMVMReset(lbfgs->D, PETSC_FALSE));
215:   }

217:   /* Save the solution and function to be used in the next update */
218:   PetscCall(VecCopy(X, lmvm->Xprev));
219:   PetscCall(VecCopy(F, lmvm->Fprev));
220:   lmvm->prev_set = PETSC_TRUE;
221:   PetscFunctionReturn(PETSC_SUCCESS);
222: }

224: static PetscErrorCode MatCopy_LMVMBFGS(Mat B, Mat M, MatStructure str)
225: {
226:   Mat_LMVM    *bdata = (Mat_LMVM *)B->data;
227:   Mat_SymBrdn *bctx  = (Mat_SymBrdn *)bdata->ctx;
228:   Mat_LMVM    *mdata = (Mat_LMVM *)M->data;
229:   Mat_SymBrdn *mctx  = (Mat_SymBrdn *)mdata->ctx;
230:   PetscInt     i;

232:   PetscFunctionBegin;
233:   mctx->needP = bctx->needP;
234:   for (i = 0; i <= bdata->k; ++i) {
235:     mctx->stp[i] = bctx->stp[i];
236:     mctx->yts[i] = bctx->yts[i];
237:     PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
238:   }
239:   mctx->scale_type      = bctx->scale_type;
240:   mctx->alpha           = bctx->alpha;
241:   mctx->beta            = bctx->beta;
242:   mctx->rho             = bctx->rho;
243:   mctx->delta           = bctx->delta;
244:   mctx->sigma_hist      = bctx->sigma_hist;
245:   mctx->watchdog        = bctx->watchdog;
246:   mctx->max_seq_rejects = bctx->max_seq_rejects;
247:   switch (bctx->scale_type) {
248:   case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
249:     mctx->sigma = bctx->sigma;
250:     break;
251:   case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
252:     PetscCall(MatCopy(bctx->D, mctx->D, SAME_NONZERO_PATTERN));
253:     break;
254:   case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
255:     mctx->sigma = 1.0;
256:     break;
257:   default:
258:     break;
259:   }
260:   PetscFunctionReturn(PETSC_SUCCESS);
261: }

263: static PetscErrorCode MatReset_LMVMBFGS(Mat B, PetscBool destructive)
264: {
265:   Mat_LMVM     *lmvm  = (Mat_LMVM *)B->data;
266:   Mat_SymBrdn  *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
267:   Mat_LMVM     *dbase;
268:   Mat_DiagBrdn *dctx;

270:   PetscFunctionBegin;
271:   lbfgs->watchdog = 0;
272:   lbfgs->needP    = PETSC_TRUE;
273:   if (lbfgs->allocated) {
274:     if (destructive) {
275:       PetscCall(VecDestroy(&lbfgs->work));
276:       PetscCall(PetscFree5(lbfgs->stp, lbfgs->yts, lbfgs->yty, lbfgs->sts, lbfgs->workscalar));
277:       PetscCall(VecDestroyVecs(lmvm->m, &lbfgs->P));
278:       switch (lbfgs->scale_type) {
279:       case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
280:         PetscCall(MatLMVMReset(lbfgs->D, PETSC_TRUE));
281:         break;
282:       default:
283:         break;
284:       }
285:       lbfgs->allocated = PETSC_FALSE;
286:     } else {
287:       switch (lbfgs->scale_type) {
288:       case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
289:         lbfgs->sigma = lbfgs->delta;
290:         break;
291:       case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
292:         PetscCall(MatLMVMReset(lbfgs->D, PETSC_FALSE));
293:         dbase = (Mat_LMVM *)lbfgs->D->data;
294:         dctx  = (Mat_DiagBrdn *)dbase->ctx;
295:         PetscCall(VecSet(dctx->invD, lbfgs->delta));
296:         break;
297:       case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
298:         lbfgs->sigma = 1.0;
299:         break;
300:       default:
301:         break;
302:       }
303:     }
304:   }
305:   PetscCall(MatReset_LMVM(B, destructive));
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }

309: static PetscErrorCode MatAllocate_LMVMBFGS(Mat B, Vec X, Vec F)
310: {
311:   Mat_LMVM    *lmvm  = (Mat_LMVM *)B->data;
312:   Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;

314:   PetscFunctionBegin;
315:   PetscCall(MatAllocate_LMVM(B, X, F));
316:   if (!lbfgs->allocated) {
317:     PetscCall(VecDuplicate(X, &lbfgs->work));
318:     PetscCall(PetscMalloc5(lmvm->m, &lbfgs->stp, lmvm->m, &lbfgs->yts, lmvm->m, &lbfgs->yty, lmvm->m, &lbfgs->sts, lmvm->m, &lbfgs->workscalar));
319:     if (lmvm->m > 0) PetscCall(VecDuplicateVecs(X, lmvm->m, &lbfgs->P));
320:     switch (lbfgs->scale_type) {
321:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
322:       PetscCall(MatLMVMAllocate(lbfgs->D, X, F));
323:       break;
324:     default:
325:       break;
326:     }
327:     lbfgs->allocated = PETSC_TRUE;
328:   }
329:   PetscFunctionReturn(PETSC_SUCCESS);
330: }

332: static PetscErrorCode MatDestroy_LMVMBFGS(Mat B)
333: {
334:   Mat_LMVM    *lmvm  = (Mat_LMVM *)B->data;
335:   Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;

337:   PetscFunctionBegin;
338:   if (lbfgs->allocated) {
339:     PetscCall(VecDestroy(&lbfgs->work));
340:     PetscCall(PetscFree5(lbfgs->stp, lbfgs->yts, lbfgs->yty, lbfgs->sts, lbfgs->workscalar));
341:     PetscCall(VecDestroyVecs(lmvm->m, &lbfgs->P));
342:     lbfgs->allocated = PETSC_FALSE;
343:   }
344:   PetscCall(MatDestroy(&lbfgs->D));
345:   PetscCall(PetscFree(lmvm->ctx));
346:   PetscCall(MatDestroy_LMVM(B));
347:   PetscFunctionReturn(PETSC_SUCCESS);
348: }

350: static PetscErrorCode MatSetUp_LMVMBFGS(Mat B)
351: {
352:   Mat_LMVM    *lmvm  = (Mat_LMVM *)B->data;
353:   Mat_SymBrdn *lbfgs = (Mat_SymBrdn *)lmvm->ctx;
354:   PetscInt     n, N;

356:   PetscFunctionBegin;
357:   PetscCall(MatSetUp_LMVM(B));
358:   lbfgs->max_seq_rejects = lmvm->m / 2;
359:   if (!lbfgs->allocated) {
360:     PetscCall(VecDuplicate(lmvm->Xprev, &lbfgs->work));
361:     PetscCall(PetscMalloc5(lmvm->m, &lbfgs->stp, lmvm->m, &lbfgs->yts, lmvm->m, &lbfgs->yty, lmvm->m, &lbfgs->sts, lmvm->m, &lbfgs->workscalar));
362:     if (lmvm->m > 0) PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbfgs->P));
363:     switch (lbfgs->scale_type) {
364:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
365:       PetscCall(MatGetLocalSize(B, &n, &n));
366:       PetscCall(MatGetSize(B, &N, &N));
367:       PetscCall(MatSetSizes(lbfgs->D, n, n, N, N));
368:       PetscCall(MatSetUp(lbfgs->D));
369:       break;
370:     default:
371:       break;
372:     }
373:     lbfgs->allocated = PETSC_TRUE;
374:   }
375:   PetscFunctionReturn(PETSC_SUCCESS);
376: }

378: static PetscErrorCode MatSetFromOptions_LMVMBFGS(Mat B, PetscOptionItems *PetscOptionsObject)
379: {
380:   PetscFunctionBegin;
381:   PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
382:   PetscOptionsHeadBegin(PetscOptionsObject, "L-BFGS method for approximating SPD Jacobian actions (MATLMVMBFGS)");
383:   PetscCall(MatSetFromOptions_LMVMSymBrdn_Private(B, PetscOptionsObject));
384:   PetscOptionsHeadEnd();
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: PetscErrorCode MatCreate_LMVMBFGS(Mat B)
389: {
390:   Mat_LMVM    *lmvm;
391:   Mat_SymBrdn *lbfgs;

393:   PetscFunctionBegin;
394:   PetscCall(MatCreate_LMVMSymBrdn(B));
395:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMBFGS));
396:   B->ops->setup          = MatSetUp_LMVMBFGS;
397:   B->ops->destroy        = MatDestroy_LMVMBFGS;
398:   B->ops->setfromoptions = MatSetFromOptions_LMVMBFGS;

400:   lmvm                = (Mat_LMVM *)B->data;
401:   lmvm->ops->allocate = MatAllocate_LMVMBFGS;
402:   lmvm->ops->reset    = MatReset_LMVMBFGS;
403:   lmvm->ops->update   = MatUpdate_LMVMBFGS;
404:   lmvm->ops->mult     = MatMult_LMVMBFGS;
405:   lmvm->ops->solve    = MatSolve_LMVMBFGS;
406:   lmvm->ops->copy     = MatCopy_LMVMBFGS;

408:   lbfgs        = (Mat_SymBrdn *)lmvm->ctx;
409:   lbfgs->needQ = PETSC_FALSE;
410:   lbfgs->phi   = 0.0;
411:   PetscFunctionReturn(PETSC_SUCCESS);
412: }

414: /*@
415:   MatCreateLMVMBFGS - Creates a limited-memory Broyden-Fletcher-Goldfarb-Shano (BFGS)
416:   matrix used for approximating Jacobians. L-BFGS is symmetric positive-definite by
417:   construction, and is commonly used to approximate Hessians in optimization
418:   problems.

420:   To use the L-BFGS matrix with other vector types, the matrix must be
421:   created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
422:   This ensures that the internal storage and work vectors are duplicated from the
423:   correct type of vector.

425:   Collective

427:   Input Parameters:
428: + comm - MPI communicator
429: . n    - number of local rows for storage vectors
430: - N    - global size of the storage vectors

432:   Output Parameter:
433: . B - the matrix

435:   Options Database Keys:
436: + -mat_lmvm_scale_type - (developer) type of scaling applied to J0 (none, scalar, diagonal)
437: . -mat_lmvm_theta      - (developer) convex ratio between BFGS and DFP components of the diagonal J0 scaling
438: . -mat_lmvm_rho        - (developer) update limiter for the J0 scaling
439: . -mat_lmvm_alpha      - (developer) coefficient factor for the quadratic subproblem in J0 scaling
440: . -mat_lmvm_beta       - (developer) exponential factor for the diagonal J0 scaling
441: - -mat_lmvm_sigma_hist - (developer) number of past updates to use in J0 scaling

443:   Level: intermediate

445:   Note:
446:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
447:   paradigm instead of this routine directly.

449: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMBFGS`, `MatCreateLMVMDFP()`, `MatCreateLMVMSR1()`,
450:           `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
451: @*/
452: PetscErrorCode MatCreateLMVMBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
453: {
454:   PetscFunctionBegin;
455:   PetscCall(KSPInitializePackage());
456:   PetscCall(MatCreate(comm, B));
457:   PetscCall(MatSetSizes(*B, n, n, N, N));
458:   PetscCall(MatSetType(*B, MATLMVMBFGS));
459:   PetscCall(MatSetUp(*B));
460:   PetscFunctionReturn(PETSC_SUCCESS);
461: }