Actual source code: denseqn.c

  1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
  2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
  3: #include <petscblaslapack.h>
  4: #include <petscmat.h>
  5: #include <petscsys.h>
  6: #include <petscsystypes.h>
  7: #include <petscis.h>
  8: #include <petscoptions.h>
  9: #include <petscdevice.h>
 10: #include <petsc/private/deviceimpl.h>

 12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
 13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
 14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
 15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
 16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
 17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);

 19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
 20: {
 21:   return idx % m;
 22: }

 24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
 25: {
 26:   return (idx - num_updates) + PetscMin(m, num_updates);
 27: }

 29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
 30: {
 31:   return PetscMax(0, idx - m);
 32: }

 34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
 35: {
 36:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 37:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;

 39:   PetscBool isascii;

 41:   PetscFunctionBegin;
 42:   PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
 43:   PetscCall(MatView_LMVM(B, pv));
 44:   if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatView(ldfp->diag_qn, pv)); }
 45:   if (isascii) { PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ",  Y^T x: %" PetscInt_FMT "\n", ldfp->S_count, ldfp->St_count, ldfp->Y_count, ldfp->Yt_count)); }
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }

 49: static PetscErrorCode MatReset_LMVMDQN(Mat, PetscBool);
 50: static PetscErrorCode MatAllocate_LMVMDQN(Mat B, Vec X, Vec F)
 51: {
 52:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
 53:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
 54:   PetscBool is_dbfgs, is_ddfp, is_dqn, same, allocate = PETSC_FALSE;
 55:   VecType   vec_type;
 56:   PetscInt  m, n, M, N;
 57:   MPI_Comm  comm = PetscObjectComm((PetscObject)B);

 59:   PetscFunctionBegin;
 60:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
 61:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
 62:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));

 64:   if (lmvm->allocated) {
 65:     PetscCall(VecGetType(X, &vec_type));
 66:     PetscCall(PetscObjectTypeCompare((PetscObject)lmvm->Xprev, vec_type, &same));
 67:     if (!same) {
 68:       /* Given X vector has a different type than allocated X-type data structures.
 69:          We need to destroy all of this and duplicate again out of the given vector. */
 70:       allocate = PETSC_TRUE;
 71:       PetscCall(MatReset_LMVMDQN(B, PETSC_TRUE));
 72:     } else {
 73:       VecCheckMatCompatible(B, X, 2, F, 3);
 74:     }
 75:   } else {
 76:     allocate = PETSC_TRUE;
 77:   }
 78:   if (allocate) {
 79:     PetscCall(VecGetLocalSize(X, &n));
 80:     PetscCall(VecGetSize(X, &N));
 81:     PetscCall(VecGetLocalSize(F, &m));
 82:     PetscCall(VecGetSize(F, &M));
 83:     PetscCheck(N == M, comm, PETSC_ERR_ARG_SIZ, "Incorrect problem sizes! dim(X) not equal to dim(F)");
 84:     PetscCall(MatSetSizes(B, m, n, M, N));
 85:     PetscCall(PetscLayoutSetUp(B->rmap));
 86:     PetscCall(PetscLayoutSetUp(B->cmap));
 87:     PetscCall(VecDuplicate(X, &lmvm->Xprev));
 88:     PetscCall(VecDuplicate(F, &lmvm->Fprev));
 89:     if (lmvm->m > 0) {
 90:       PetscMPIInt rank;
 91:       PetscInt    m, M;

 93:       PetscCallMPI(MPI_Comm_rank(comm, &rank));
 94:       M = lmvm->m;
 95:       m = (rank == 0) ? M : 0;

 97:       /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
 98:        * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
 99:        * For DQN  : Create all data eagerly */
100:       PetscCall(VecGetType(X, &vec_type));
101:       PetscCall(MatCreateDenseFromVecType(comm, vec_type, n, m, N, M, -1, NULL, &lqn->Sfull));
102:       PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->Yfull));
103:       if (is_dqn) {
104:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
105:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
106:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
107:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
108:       } else if (is_ddfp) {
109:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
110:         PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
111:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
112:         PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
113:       } else if (is_dbfgs) {
114:         PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
115:         PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
116:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
117:         PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
118:       } else {
119:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
120:       }
121:       /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
122:       if (lqn->StY_triu) {
123:         PetscCall(MatZeroEntries(lqn->StY_triu));
124:         PetscCall(MatShift(lqn->StY_triu, 1.0));
125:       }
126:       if (lqn->YtS_triu) {
127:         PetscCall(MatZeroEntries(lqn->YtS_triu));
128:         PetscCall(MatShift(lqn->YtS_triu, 1.0));
129:       }
130:       PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
131:       PetscCall(VecZeroEntries(lqn->rwork1));
132:       PetscCall(VecZeroEntries(lqn->rwork2));
133:       PetscCall(VecZeroEntries(lqn->rwork3));
134:       PetscCall(VecZeroEntries(lqn->diag_vec));
135:     }
136:     PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
137:     if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatLMVMAllocate(lqn->diag_qn, X, F)); }
138:     lmvm->allocated = PETSC_TRUE;
139:     B->preallocated = PETSC_TRUE;
140:     B->assembled    = PETSC_TRUE;
141:   }
142:   PetscFunctionReturn(PETSC_SUCCESS);
143: }

145: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
146: {
147:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;

149:   PetscInt    m, n, M, N;
150:   PetscMPIInt size;
151:   MPI_Comm    comm = PetscObjectComm((PetscObject)B);
152:   Vec         Xtmp, Ftmp;

154:   PetscFunctionBegin;
155:   PetscCall(MatGetSize(B, &M, &N));
156:   PetscCheck(M != 0 && N != 0, comm, PETSC_ERR_ORDER, "MatSetSizes() must be called before MatSetUp()");
157:   if (!lmvm->allocated) {
158:     PetscCallMPI(MPI_Comm_size(comm, &size));
159:     if (size == 1) {
160:       PetscCall(VecCreateSeq(comm, N, &Xtmp));
161:       PetscCall(VecCreateSeq(comm, M, &Ftmp));
162:     } else {
163:       PetscCall(MatGetLocalSize(B, &m, &n));
164:       PetscCall(VecCreateMPI(comm, n, N, &Xtmp));
165:       PetscCall(VecCreateMPI(comm, m, M, &Ftmp));
166:     }
167:     PetscCall(MatAllocate_LMVMDQN(B, Xtmp, Ftmp));
168:     PetscCall(VecDestroy(&Xtmp));
169:     PetscCall(VecDestroy(&Ftmp));
170:   }
171:   PetscFunctionReturn(PETSC_SUCCESS);
172: }

174: static PetscErrorCode MatSetFromOptions_LMVMDQN_Private(Mat B, PetscOptionItems *PetscOptionsObject)
175: {
176:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
177:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;
178:   PetscBool is_dbfgs, is_ddfp, is_dqn;

180:   PetscFunctionBegin;
181:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
182:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
183:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
184:   if (is_dqn) {
185:     PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
186:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
187:   } else if (is_dbfgs) {
188:     PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
189:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
190:   } else if (is_ddfp) {
191:     PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
192:     PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
193:   } else {
194:     SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
195:   }
196:   PetscFunctionReturn(PETSC_SUCCESS);
197: }

199: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems *PetscOptionsObject)
200: {
201:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
202:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

204:   PetscFunctionBegin;
205:   PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
206:   PetscOptionsBegin(PetscObjectComm((PetscObject)B), ((PetscObject)B)->prefix, "Dense QN method (MATLMVMDQN,MATLMVMDBFGS,MATLMVMDDFP)", NULL);
207:   PetscCall(MatSetFromOptions_LMVMDQN_Private(B, PetscOptionsObject));
208:   PetscOptionsEnd();
209:   lqn->allocated = PETSC_FALSE;
210:   if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
211:     const char *prefix;

213:     PetscCall(MatGetOptionsPrefix(B, &prefix));
214:     PetscCall(MatSetOptionsPrefix(lqn->diag_qn, prefix));
215:     PetscCall(MatAppendOptionsPrefix(lqn->diag_qn, "J0_"));
216:     PetscCall(MatSetFromOptions(lqn->diag_qn));
217:   }
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }

221: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
222: {
223:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
224:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

226:   PetscFunctionBegin;
227:   PetscCall(MatDestroy(&lqn->Sfull));
228:   PetscCall(MatDestroy(&lqn->Yfull));
229:   PetscCall(MatDestroy(&lqn->HY));
230:   PetscCall(MatDestroy(&lqn->BS));
231:   PetscCall(MatDestroy(&lqn->StY_triu));
232:   PetscCall(MatDestroy(&lqn->YtS_triu));
233:   PetscCall(VecDestroy(&lqn->StFprev));
234:   PetscCall(VecDestroy(&lqn->Fprev_ref));
235:   lqn->Fprev_state = 0;
236:   PetscCall(MatDestroy(&lqn->YtS_triu_strict));
237:   PetscCall(MatDestroy(&lqn->StY_triu_strict));
238:   PetscCall(MatDestroy(&lqn->StBS));
239:   PetscCall(MatDestroy(&lqn->YtHY));
240:   PetscCall(MatDestroy(&lqn->J));
241:   PetscCall(MatDestroy(&lqn->temp_mat));
242:   PetscCall(VecDestroy(&lqn->diag_vec));
243:   PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
244:   PetscCall(VecDestroy(&lqn->inv_diag_vec));
245:   PetscCall(VecDestroy(&lqn->column_work));
246:   PetscCall(VecDestroy(&lqn->rwork1));
247:   PetscCall(VecDestroy(&lqn->rwork2));
248:   PetscCall(VecDestroy(&lqn->rwork3));
249:   PetscCall(VecDestroy(&lqn->rwork2_local));
250:   PetscCall(VecDestroy(&lqn->rwork3_local));
251:   PetscCall(VecDestroy(&lqn->cyclic_work_vec));
252:   lqn->allocated = PETSC_FALSE;
253:   PetscFunctionReturn(PETSC_SUCCESS);
254: }

256: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
257: {
258:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
259:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

261:   PetscFunctionBegin;
262:   PetscCall(MatLMVMDQNResetDestructive(B));
263:   PetscCall(MatDestroy(&lqn->diag_qn));
264:   PetscCall(PetscFree(lmvm->ctx));
265:   PetscCall(MatDestroy_LMVM(B));
266:   PetscFunctionReturn(PETSC_SUCCESS);
267: }

269: static PetscErrorCode MatReset_LMVMDQN(Mat B, PetscBool destructive)
270: {
271:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
272:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

274:   PetscFunctionBegin;
275:   lqn->watchdog = 0;
276:   if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
277:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
278:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
279:     if (!diagctx->allocated) PetscCall(MatLMVMAllocate(lqn->diag_qn, lmvm->Xprev, lmvm->Fprev));
280:     PetscCall(MatLMVMReset(lqn->diag_qn, destructive));
281:   }
282:   if (lqn->Sfull) PetscCall(MatZeroEntries(lqn->Sfull));
283:   if (lqn->Yfull) PetscCall(MatZeroEntries(lqn->Yfull));
284:   if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
285:   if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
286:   if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
287:     PetscCall(MatZeroEntries(lqn->StY_triu));
288:     PetscCall(MatShift(lqn->StY_triu, 1.0));
289:   }
290:   if (lqn->YtS_triu) {
291:     PetscCall(MatZeroEntries(lqn->YtS_triu));
292:     PetscCall(MatShift(lqn->YtS_triu, 1.0));
293:   }
294:   if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
295:   if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
296:   if (lqn->StBS) {
297:     PetscCall(MatZeroEntries(lqn->StBS));
298:     PetscCall(MatShift(lqn->StBS, 1.0));
299:   }
300:   if (lqn->YtHY) {
301:     PetscCall(MatZeroEntries(lqn->YtHY));
302:     PetscCall(MatShift(lqn->YtHY, 1.0));
303:   }
304:   if (lqn->Fprev_ref) PetscCall(VecDestroy(&lqn->Fprev_ref));
305:   lqn->Fprev_state = 0;
306:   if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
307:   if (destructive) { PetscCall(MatLMVMDQNResetDestructive(B)); }
308:   lqn->num_updates      = 0;
309:   lqn->num_mult_updates = 0;
310:   PetscCall(MatReset_LMVM(B, destructive));
311:   PetscFunctionReturn(PETSC_SUCCESS);
312: }

314: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
315: {
316:   Mat_LMVM     *lmvm    = (Mat_LMVM *)B->data;
317:   Mat_DQN      *lqn     = (Mat_DQN *)lmvm->ctx;
318:   Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
319:   Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;

321:   PetscBool          is_ddfp, is_dbfgs, is_dqn;
322:   PetscDeviceContext dctx;

324:   PetscFunctionBegin;
325:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
326:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
327:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
328:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
329:   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
330:   if (lmvm->prev_set) {
331:     Vec         FX[2];
332:     PetscScalar dotFX[2];
333:     PetscScalar stFprev;
334:     PetscScalar curvature, yTy;
335:     PetscReal   curvtol;
336:     Vec         workvec1;

338:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
339:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
340:     /* Test if the updates can be accepted */
341:     FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
342:     FX[1] = F;           /* dotFX[1] = s^T F     */
343:     PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
344:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
345:     PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
346:     stFprev   = PetscConj(dotFX[0]);
347:     curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
348:     if (PetscRealPart(yTy) < lmvm->eps) {
349:       curvtol = 0.0;
350:     } else {
351:       curvtol = lmvm->eps * PetscRealPart(yTy);
352:     }
353:     if (PetscRealPart(curvature) > curvtol) {
354:       PetscInt m     = lmvm->m;
355:       PetscInt k     = lqn->num_updates;
356:       PetscInt h_new = k + 1 - oldest_update(m, k + 1);
357:       PetscInt idx   = recycle_index(m, k);

359:       /* Update is good, accept it */
360:       lmvm->nupdates++;
361:       lqn->num_updates++;
362:       lqn->watchdog = 0;

364:       if (lmvm->k != m - 1) {
365:         lmvm->k++;
366:       } else if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
367:         if (is_dqn) {
368:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
369:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
370:         } else if (is_dbfgs) {
371:           PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
372:         } else if (is_ddfp) {
373:           PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
374:         } else {
375:           SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
376:         }
377:       }

379:       /* First update the S^T matrix */
380:       PetscCall(MatDenseGetColumnVecWrite(lqn->Sfull, idx, &workvec1));
381:       PetscCall(VecCopy(lmvm->Xprev, workvec1));
382:       PetscCall(MatDenseRestoreColumnVecWrite(lqn->Sfull, idx, &workvec1));

384:       /* Now repeat update for the Y^T matrix */
385:       PetscCall(MatDenseGetColumnVecWrite(lqn->Yfull, idx, &workvec1));
386:       PetscCall(VecCopy(lmvm->Fprev, workvec1));
387:       PetscCall(MatDenseRestoreColumnVecWrite(lqn->Yfull, idx, &workvec1));

389:       if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the       *
390:          * H_k is immediately applied to F after begin updated.   The S^T y computation can be split up as S^T (F - F_prev) */
391:         PetscInt     local_n;
392:         PetscScalar *StFprev;
393:         PetscMemType memtype;
394:         PetscInt     StYidx;

396:         StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
397:         if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
398:         PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
399:         PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
400:         if (local_n) {
401:           if (PetscMemTypeHost(memtype)) {
402:             StFprev[idx] = stFprev;
403:           } else {
404:             PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
405:             PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
406:             PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
407:           }
408:         }
409:         PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));

411:         {
412:           Vec this_sy_col;
413:           /* Now StFprev is updated for the new S vector.  Write -StFprev into the appropriate row */
414:           PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
415:           PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));

417:           /* Now compute the new StFprev */
418:           PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h_new));
419:           lqn->St_count++;

421:           /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
422:           PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));

424:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
425:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
426:         }
427:       }

429:       if (is_ddfp || is_dqn) {
430:         PetscInt YtSidx;

432:         YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;

434:         {
435:           Vec this_ys_col;

437:           PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
438:           PetscCall(MatMultHermitianTransposeColumnRange(lqn->Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
439:           lqn->Yt_count++;

441:           if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
442:           PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
443:         }
444:       }

446:       if (is_dbfgs || is_dqn) {
447:         PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
448:       } else if (is_ddfp) {
449:         PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
450:       } else {
451:         SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
452:       }

454:       if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
455:         if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
456:         PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
457:         PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
458:       } else {
459:         if (!lqn->diag_vec_recycle_order) {
460:           PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
461:           lqn->diag_vec_recycle_order = lqn->diag_vec;
462:         }
463:       }

465:       if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
466:         PetscScalar sTy = curvature;

468:         diagctx->sigma = PetscRealPart(sTy) / PetscRealPart(yTy);
469:       } else if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
470:         PetscScalar sTy = curvature;
471:         PetscScalar sTDs, yTDy;

473:         if (!diagctx->invD) {
474:           PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->invD));
475:           PetscCall(VecSet(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTy)));
476:         }
477:         if (!diagctx->U) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->U));
478:         if (!diagctx->V) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->V));
479:         if (!diagctx->W) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->W));

481:         /* diagonal Broyden */
482:         PetscCall(VecReciprocal(diagctx->invD));
483:         PetscCall(VecPointwiseMult(diagctx->V, diagctx->invD, lmvm->Xprev));
484:         PetscCall(VecPointwiseMult(diagctx->U, lmvm->Fprev, lmvm->Fprev));
485:         if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->U));
486:         PetscCall(VecAXPY(diagctx->invD, 1.0 / sTy, diagctx->U));
487:         PetscCall(VecDot(diagctx->V, lmvm->Xprev, &sTDs));
488:         if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->V));
489:         PetscCall(VecPointwiseMult(diagctx->V, diagctx->V, diagctx->V));
490:         PetscCall(VecAXPY(diagctx->invD, -1.0 / PetscMax(PetscRealPart(sTDs), diagctx->tol), diagctx->V));
491:         PetscCall(VecReciprocal(diagctx->invD));
492:         PetscCall(VecAbs(diagctx->invD));
493:         PetscCall(VecDot(diagctx->U, diagctx->invD, &yTDy));
494:         PetscCall(VecScale(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTDy)));
495:       }
496:     } else {
497:       /* Update is bad, skip it */
498:       ++lmvm->nrejects;
499:       ++lqn->watchdog;
500:       PetscInt m = lmvm->m;
501:       PetscInt k = lqn->num_updates;
502:       PetscInt h = k - oldest_update(m, k);

504:       /* we still have to maintain StFprev */
505:       if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
506:       PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h));
507:       lqn->St_count++;
508:     }
509:   } else {
510:     switch (lqn->scale_type) {
511:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
512:       PetscCall(VecSet(diagctx->invD, diagctx->delta));
513:       break;
514:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
515:       diagctx->sigma = diagctx->delta;
516:       break;
517:     default:
518:       diagctx->sigma = 1.0;
519:       break;
520:     }
521:   }

523:   if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));

525:   /* Save the solution and function to be used in the next update */
526:   PetscCall(VecCopy(X, lmvm->Xprev));
527:   PetscCall(VecCopy(F, lmvm->Fprev));
528:   PetscCall(PetscObjectReference((PetscObject)F));
529:   PetscCall(VecDestroy(&lqn->Fprev_ref));
530:   lqn->Fprev_ref = F;
531:   PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
532:   lmvm->prev_set = PETSC_TRUE;
533:   PetscFunctionReturn(PETSC_SUCCESS);
534: }

536: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
537: {
538:   PetscFunctionBegin;
539:   PetscCall(MatDestroy(dst));
540:   if (src) { PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst)); }
541:   PetscFunctionReturn(PETSC_SUCCESS);
542: }

544: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
545: {
546:   PetscFunctionBegin;
547:   PetscCall(VecDestroy(dst));
548:   if (src) {
549:     PetscCall(VecDuplicate(src, dst));
550:     PetscCall(VecCopy(src, *dst));
551:   }
552:   PetscFunctionReturn(PETSC_SUCCESS);
553: }

555: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
556: {
557:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
558:   Mat_DQN  *blqn  = (Mat_DQN *)bdata->ctx;
559:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
560:   Mat_DQN  *mlqn  = (Mat_DQN *)mdata->ctx;

562:   PetscFunctionBegin;
563:   mlqn->num_updates      = blqn->num_updates;
564:   mlqn->num_mult_updates = blqn->num_mult_updates;
565:   PetscCall(MatDestroyThenCopy(blqn->Sfull, &mlqn->Sfull));
566:   PetscCall(MatDestroyThenCopy(blqn->Yfull, &mlqn->Yfull));
567:   PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
568:   PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
569:   PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
570:   PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
571:   PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
572:   PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
573:   PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
574:   PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
575:   PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
576:   PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
577:   PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
578:   PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
579:   mlqn->dense_type      = blqn->dense_type;
580:   mlqn->strategy        = blqn->strategy;
581:   mlqn->scale_type      = blqn->scale_type;
582:   mlqn->S_count         = 0;
583:   mlqn->St_count        = 0;
584:   mlqn->Y_count         = 0;
585:   mlqn->Yt_count        = 0;
586:   mlqn->watchdog        = blqn->watchdog;
587:   mlqn->max_seq_rejects = blqn->max_seq_rejects;
588:   mlqn->allocated       = blqn->allocated;
589:   PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
590:   PetscCall(VecDestroy(&mlqn->Fprev_ref));
591:   mlqn->Fprev_ref   = blqn->Fprev_ref;
592:   mlqn->Fprev_state = blqn->Fprev_state;
593:   if (!(bdata->J0 || bdata->user_pc || bdata->user_ksp || bdata->user_scale)) { PetscCall(MatCopy(blqn->diag_qn, mlqn->diag_qn, SAME_NONZERO_PATTERN)); }
594:   PetscFunctionReturn(PETSC_SUCCESS);
595: }

597: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
598: {
599:   PetscFunctionBegin;
600:   PetscCall(MatMult_LMVMDDFP(B, X, Z));
601:   PetscFunctionReturn(PETSC_SUCCESS);
602: }

604: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
605: {
606:   PetscFunctionBegin;
607:   PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
608:   PetscFunctionReturn(PETSC_SUCCESS);
609: }

611: /*
612:   This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
613:   and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
614:   results in avoiding costly Cholesky factorization, at the cost of duality cap.
615:   Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
616: */
617: PetscErrorCode MatCreate_LMVMDQN(Mat B)
618: {
619:   Mat_LMVM *lmvm;
620:   Mat_DQN  *ldfp;

622:   PetscFunctionBegin;
623:   PetscCall(MatCreate_LMVM(B));
624:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
625:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
626:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
627:   B->ops->view           = MatView_LMVMDQN;
628:   B->ops->setup          = MatSetUp_LMVMDQN;
629:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
630:   B->ops->destroy        = MatDestroy_LMVMDQN;

632:   lmvm                = (Mat_LMVM *)B->data;
633:   lmvm->square        = PETSC_TRUE;
634:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
635:   lmvm->ops->reset    = MatReset_LMVMDQN;
636:   lmvm->ops->update   = MatUpdate_LMVMDQN;
637:   lmvm->ops->mult     = MatMult_LMVMDQN;
638:   lmvm->ops->solve    = MatSolve_LMVMDQN;
639:   lmvm->ops->copy     = MatCopy_LMVMDQN;

641:   PetscCall(PetscNew(&ldfp));
642:   lmvm->ctx             = (void *)ldfp;
643:   ldfp->allocated       = PETSC_FALSE;
644:   ldfp->watchdog        = 0;
645:   ldfp->max_seq_rejects = lmvm->m / 2;
646:   ldfp->strategy        = MAT_LMVM_DENSE_INPLACE;
647:   ldfp->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;

649:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
650:   PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
651:   PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
652:   PetscFunctionReturn(PETSC_SUCCESS);
653: }

655: /*@
656:   MatCreateLMVMDQN - Creates a dense representation of the limited-memory
657:   Quasi-Newton approximation to a Hessian.

659:   Collective

661:   Input Parameters:
662: + comm - MPI communicator
663: . n    - number of local rows for storage vectors
664: - N    - global size of the storage vectors

666:   Output Parameter:
667: . B - the matrix

669:   Level: advanced

671:   Note:
672:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
673:   paradigm instead of this routine directly.

675: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
676: @*/
677: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
678: {
679:   PetscFunctionBegin;
680:   PetscCall(KSPInitializePackage());
681:   PetscCall(MatCreate(comm, B));
682:   PetscCall(MatSetSizes(*B, n, n, N, N));
683:   PetscCall(MatSetType(*B, MATLMVMDQN));
684:   PetscCall(MatSetUp(*B));
685:   PetscFunctionReturn(PETSC_SUCCESS);
686: }

688: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
689: {
690:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
691:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

693:   PetscFunctionBegin;
694:   if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
695:     lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
696:     PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
697:   } else {
698:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
699:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;

701:     switch (lqn->scale_type) {
702:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
703:       PetscCall(VecAXPBY(Z, 1.0 / diagctx->sigma, 0.0, X));
704:       break;
705:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
706:       PetscCall(VecPointwiseDivide(Z, X, diagctx->invD));
707:       break;
708:     case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
709:     default:
710:       PetscCall(VecCopy(X, Z));
711:       break;
712:     }
713:   }
714:   PetscFunctionReturn(PETSC_SUCCESS);
715: }

717: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
718: {
719:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
720:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

722:   PetscFunctionBegin;
723:   if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
724:     lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
725:     PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
726:   } else {
727:     Mat_LMVM     *dbase   = (Mat_LMVM *)lqn->diag_qn->data;
728:     Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;

730:     switch (lqn->scale_type) {
731:     case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
732:       PetscCall(VecAXPBY(dX, diagctx->sigma, 0.0, F));
733:       break;
734:     case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
735:       PetscCall(VecPointwiseMult(dX, F, diagctx->invD));
736:       break;
737:     case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
738:     default:
739:       PetscCall(VecCopy(F, dX));
740:       break;
741:     }
742:   }
743:   PetscFunctionReturn(PETSC_SUCCESS);
744: }

746: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
747: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
748: {
749:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
750:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
751:   PetscInt  m_local;

753:   PetscFunctionBegin;
754:   if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
755:   PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
756:   PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
757:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
758:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
759:   PetscCall(MatConjugate(lbfgs->temp_mat));
760:   if (m_local) {
761:     Mat temp_local, YtS_local, result_local;
762:     PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
763:     PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
764:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
765:     PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
766:   }
767:   PetscCall(MatConjugate(result));
768:   PetscFunctionReturn(PETSC_SUCCESS);
769: }

771: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
772: {
773:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
774:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
775:   PetscInt  m     = lmvm->m, m_local;
776:   PetscInt  k     = lbfgs->num_updates;
777:   PetscInt  h     = k - oldest_update(m, k);
778:   PetscInt  j_0;
779:   PetscInt  prev_oldest;
780:   Mat       J_local;

782:   PetscFunctionBegin;
783:   if (!lbfgs->YtS_triu_strict) {
784:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
785:     PetscCall(MatDestroy(&lbfgs->StBS));
786:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
787:     PetscCall(MatDestroy(&lbfgs->J));
788:     PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
789:     PetscCall(MatDestroy(&lbfgs->BS));
790:     PetscCall(MatDuplicate(lbfgs->Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
791:     PetscCall(MatShift(lbfgs->StBS, 1.0));
792:     lbfgs->num_mult_updates = oldest_update(m, k);
793:   }
794:   if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);

796:   /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
797:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
798:     Vec      s_j;
799:     Vec      Bs_j;
800:     Vec      StBs_j;
801:     PetscInt S_idx    = recycle_index(m, j);
802:     PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);

804:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
805:     PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
806:     PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
807:     PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
808:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
809:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Bs_j, StBs_j, 0, h));
810:     lbfgs->St_count++;
811:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
812:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
813:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
814:   }
815:   prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
816:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
817:     /* move the YtS entries that have been computed and need to be kept back up */
818:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);

820:     PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
821:   }
822:   PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
823:   j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
824:   for (PetscInt j = j_0; j < k; j++) {
825:     PetscInt S_idx   = recycle_index(m, j);
826:     PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
827:     Vec      s_j, Yts_j;

829:     PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
830:     PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
831:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, s_j, Yts_j, 0, h));
832:     lbfgs->Yt_count++;
833:     if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
834:     PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
835:     PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
836:     /* zero the corresponding row */
837:     if (m_local > 0) {
838:       Mat YtS_local, YtS_row;

840:       PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
841:       PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
842:       PetscCall(MatZeroEntries(YtS_row));
843:       PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
844:     }
845:   }
846:   if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
847:   PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
848:   PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
849:   PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
850:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
851:   PetscCall(MatGetLDLT(B, lbfgs->J));
852:   PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
853:   if (m_local) {
854:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
855:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
856:   }
857:   lbfgs->num_mult_updates = lbfgs->num_updates;
858:   PetscFunctionReturn(PETSC_SUCCESS);
859: }

861: /* Solves for
862:  * [ I | -S R^{-T} ] [  I  | 0 ] [ H_0 | 0 ] [ I | Y ] [      I      ]
863:  *                   [-----+---] [-----+---] [---+---] [-------------]
864:  *                   [ Y^T | I ] [  0  | D ] [ 0 | I ] [ -R^{-1} S^T ]  */

866: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
867: {
868:   Mat_LMVM        *lmvm   = (Mat_LMVM *)H->data;
869:   Mat_DQN         *lbfgs  = (Mat_DQN *)lmvm->ctx;
870:   Vec              rwork1 = lbfgs->rwork1;
871:   PetscInt         m      = lmvm->m;
872:   PetscInt         k      = lbfgs->num_updates;
873:   PetscInt         h      = k - oldest_update(m, k);
874:   PetscObjectState Fstate;

876:   PetscFunctionBegin;
877:   VecCheckSameSize(F, 2, dX, 3);
878:   VecCheckMatCompatible(H, dX, 3, F, 2);

880:   /* Block Version */
881:   if (!lbfgs->num_updates) {
882:     PetscCall(MatDQNApplyJ0Inv(H, F, dX));
883:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
884:   }

886:   PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
887:   if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
888:     PetscCall(VecCopy(lbfgs->StFprev, rwork1));
889:   } else {
890:     PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, F, rwork1, 0, h));
891:     lbfgs->St_count++;
892:   }

894:   /* Reordering rwork1, as STY is in history order, while S is in recycled order */
895:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
896:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
897:   PetscCall(VecScale(rwork1, -1.0));
898:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));

900:   PetscCall(VecCopy(F, lbfgs->column_work));
901:   PetscCall(MatMultAddColumnRange(lbfgs->Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
902:   lbfgs->Y_count++;

904:   PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
905:   PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));

907:   PetscCall(MatMultHermitianTransposeAddColumnRange(lbfgs->Yfull, dX, rwork1, rwork1, 0, h));
908:   lbfgs->Yt_count++;

910:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
911:   PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
912:   PetscCall(VecScale(rwork1, -1.0));
913:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));

915:   PetscCall(MatMultAddColumnRange(lbfgs->Sfull, rwork1, dX, dX, 0, h));
916:   lbfgs->S_count++;
917:   PetscFunctionReturn(PETSC_SUCCESS);
918: }

920: /* Solves for
921:    B_0 - [ Y | B_0 S] [ -D  |    L^T    ]^-1 [   Y^T   ]
922:                       [-----+-----------]    [---------]
923:                       [  L  | S^T B_0 S ]    [ S^T B_0 ]

925:    Above is equivalent to

927:    B_0 - [ Y | B_0 S] [[     I     | 0 ][ -D  | 0 ][ I | -D^{-1} L^T ]]^-1 [   Y^T   ]
928:                       [[-----------+---][-----+---][---+-------------]]    [---------]
929:                       [[ -L D^{-1} | I ][  0  | J ][ 0 |       I     ]]    [ S^T B_0 ]

931:    where J = S^T B_0 S + L D^{-1} L^T

933:    becomes

935:    B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1}  |   0    ][    I     | 0 ] [   Y^T   ]
936:                       [---+------------][----------+--------][----------+---] [---------]
937:                       [ 0 |     I      ][     0    | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]

939:                       =

941:    B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I |    0    ][     I    | 0 ] [   Y^T   ]
942:                       [--------+---][---+-----][---+---------][----------+---] [---------]
943:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]

945:                       (Note that YtS_triu_strict is L^T)
946:    Byrd, Nocedal, Schnabel 1994

948:    Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
949:    (See ddfp.c's MatMult_LMVMDDFP)

951: */

953: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
954: {
955:   Mat_LMVM *lmvm  = (Mat_LMVM *)B->data;
956:   Mat_DQN  *lbfgs = (Mat_DQN *)lmvm->ctx;
957:   Mat       J_local;
958:   PetscInt  m_local;
959:   PetscInt  m = lmvm->m;
960:   PetscInt  k = lbfgs->num_updates;
961:   PetscInt  h = k - oldest_update(m, k);

963:   PetscFunctionBegin;
964:   VecCheckSameSize(X, 2, Z, 3);
965:   VecCheckMatCompatible(B, X, 2, Z, 3);

967:   /* Cholesky Version */
968:   /* Start with the B0 term */
969:   PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
970:   if (!lbfgs->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }

972:   PetscCall(MatLMVMDBFGSUpdateMultData(B));
973:   PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
974:   lbfgs->Yt_count++;
975:   PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Z, lbfgs->rwork2, 0, h));
976:   lbfgs->St_count++;
977:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
978:     PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
979:     PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
980:   }

982:   PetscCall(VecPointwiseMult(lbfgs->rwork3, lbfgs->rwork1, lbfgs->inv_diag_vec));
983:   PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2, lbfgs->rwork2));

985:   if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
986:   if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
987:   PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
988:   PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
989:   PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
990:   PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
991:   if (m_local) {
992:     Mat J_local;

994:     PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
995:     PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
996:   }
997:   PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
998:   PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
999:   PetscCall(VecScale(lbfgs->rwork3, -1.0));

1001:   PetscCall(MatMultAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork1, lbfgs->rwork1));
1002:   PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));

1004:   if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1005:     PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1006:     PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1007:   }

1009:   PetscCall(MatMultAddColumnRange(lbfgs->Yfull, lbfgs->rwork1, Z, Z, 0, h));
1010:   lbfgs->Y_count++;
1011:   PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
1012:   lbfgs->S_count++;
1013:   PetscFunctionReturn(PETSC_SUCCESS);
1014: }

1016: /*
1017:   This dense representation reduces the L-BFGS update to a series of
1018:   matrix-vector products with dense matrices in lieu of the conventional matrix-free
1019:   two-loop algorithm.
1020: */
1021: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
1022: {
1023:   Mat_LMVM *lmvm;
1024:   Mat_DQN  *lbfgs;

1026:   PetscFunctionBegin;
1027:   PetscCall(MatCreate_LMVM(B));
1028:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
1029:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1030:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1031:   B->ops->view           = MatView_LMVMDQN;
1032:   B->ops->setup          = MatSetUp_LMVMDQN;
1033:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1034:   B->ops->destroy        = MatDestroy_LMVMDQN;

1036:   lmvm                = (Mat_LMVM *)B->data;
1037:   lmvm->square        = PETSC_TRUE;
1038:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
1039:   lmvm->ops->reset    = MatReset_LMVMDQN;
1040:   lmvm->ops->update   = MatUpdate_LMVMDQN;
1041:   lmvm->ops->mult     = MatMult_LMVMDBFGS;
1042:   lmvm->ops->solve    = MatSolve_LMVMDBFGS;
1043:   lmvm->ops->copy     = MatCopy_LMVMDQN;

1045:   PetscCall(PetscNew(&lbfgs));
1046:   lmvm->ctx              = (void *)lbfgs;
1047:   lbfgs->allocated       = PETSC_FALSE;
1048:   lbfgs->watchdog        = 0;
1049:   lbfgs->max_seq_rejects = lmvm->m / 2;
1050:   lbfgs->strategy        = MAT_LMVM_DENSE_INPLACE;
1051:   lbfgs->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;

1053:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lbfgs->diag_qn));
1054:   PetscCall(MatSetType(lbfgs->diag_qn, MATLMVMDIAGBROYDEN));
1055:   PetscCall(MatSetOptionsPrefix(lbfgs->diag_qn, "J0_"));
1056:   PetscFunctionReturn(PETSC_SUCCESS);
1057: }

1059: /*@
1060:   MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1061:   Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.

1063:   Collective

1065:   Input Parameters:
1066: + comm - MPI communicator
1067: . n    - number of local rows for storage vectors
1068: - N    - global size of the storage vectors

1070:   Output Parameter:
1071: . B - the matrix

1073:   Level: advanced

1075:   Note:
1076:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1077:   paradigm instead of this routine directly.

1079: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1080: @*/
1081: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1082: {
1083:   PetscFunctionBegin;
1084:   PetscCall(KSPInitializePackage());
1085:   PetscCall(MatCreate(comm, B));
1086:   PetscCall(MatSetSizes(*B, n, n, N, N));
1087:   PetscCall(MatSetType(*B, MATLMVMDBFGS));
1088:   PetscCall(MatSetUp(*B));
1089:   PetscFunctionReturn(PETSC_SUCCESS);
1090: }

1092: /* here R is strictly upper triangular part of STY */
1093: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1094: {
1095:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1096:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1097:   PetscInt  m_local;

1099:   PetscFunctionBegin;
1100:   if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1101:   PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1102:   PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1103:   PetscCall(MatGetLocalSize(result, &m_local, NULL));
1104:   // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1105:   PetscCall(MatConjugate(ldfp->temp_mat));
1106:   if (m_local) {
1107:     Mat temp_local, StY_local, result_local;
1108:     PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1109:     PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1110:     PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1111:     PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
1112:   }
1113:   PetscCall(MatConjugate(result));
1114:   PetscFunctionReturn(PETSC_SUCCESS);
1115: }

1117: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1118: {
1119:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1120:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1121:   PetscInt  m    = lmvm->m, m_local;
1122:   PetscInt  k    = ldfp->num_updates;
1123:   PetscInt  h    = k - oldest_update(m, k);
1124:   PetscInt  j_0;
1125:   PetscInt  prev_oldest;
1126:   Mat       J_local;

1128:   PetscFunctionBegin;
1129:   if (!ldfp->StY_triu_strict) {
1130:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1131:     PetscCall(MatDestroy(&ldfp->YtHY));
1132:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1133:     PetscCall(MatDestroy(&ldfp->J));
1134:     PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1135:     PetscCall(MatDestroy(&ldfp->HY));
1136:     PetscCall(MatDuplicate(ldfp->Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1137:     PetscCall(MatShift(ldfp->YtHY, 1.0));
1138:     ldfp->num_mult_updates = oldest_update(m, k);
1139:   }
1140:   if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);

1142:   /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1143:   for (PetscInt j = oldest_update(m, k); j < k; j++) {
1144:     Vec      y_j;
1145:     Vec      Hy_j;
1146:     Vec      YtHy_j;
1147:     PetscInt Y_idx    = recycle_index(m, j);
1148:     PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);

1150:     PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1151:     PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1152:     PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1153:     PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1154:     PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1155:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, Hy_j, YtHy_j, 0, h));
1156:     ldfp->Yt_count++;
1157:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1158:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1159:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1160:   }
1161:   prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1162:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1163:     /* move the YtS entries that have been computed and need to be kept back up */
1164:     PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);

1166:     PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1167:   }
1168:   PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1169:   j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1170:   for (PetscInt j = j_0; j < k; j++) {
1171:     PetscInt Y_idx   = recycle_index(m, j);
1172:     PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1173:     Vec      y_j, Sty_j;

1175:     PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1176:     PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1177:     PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, y_j, Sty_j, 0, h));
1178:     ldfp->St_count++;
1179:     if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1180:     PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1181:     PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1182:     /* zero the corresponding row */
1183:     if (m_local > 0) {
1184:       Mat StY_local, StY_row;

1186:       PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1187:       PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1188:       PetscCall(MatZeroEntries(StY_row));
1189:       PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1190:     }
1191:   }
1192:   if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1193:   PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1194:   PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1195:   PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1196:   PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1197:   PetscCall(MatGetRTDR(B, ldfp->J));
1198:   PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1199:   if (m_local) {
1200:     PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1201:     PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1202:   }
1203:   ldfp->num_mult_updates = ldfp->num_updates;
1204:   PetscFunctionReturn(PETSC_SUCCESS);
1205: }

1207: /* Solves for

1209:    H_0 - [ S | H_0 Y] [ -D  |    R.T    ]^-1 [   S^T   ]
1210:                       [-----+-----------]    [---------]
1211:                       [  R  | Y^T H_0 Y ]    [ Y^T H_0 ]

1213:    Above is equivalent to

1215:    H_0 - [ S | H_0 Y] [[     I     | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [   S^T   ]
1216:                       [[-----------+---][----+---][---+-------------]]    [---------]
1217:                       [[ -R D^{-1} | I ][  0 | J ][ 0 |      I      ]]    [ Y^T H_0 ]

1219:    where J = Y^T H_0 Y + R D^{-1} R.T

1221:    becomes

1223:    H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1}  |   0    ][     I    | 0 ] [   S^T   ]
1224:                       [---+------------][----------+--------][----------+---] [---------]
1225:                       [ 0 |      I     ][     0    | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]

1227:                       =

1229:    H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I |    0    ][     I    | 0 ] [   S^T   ]
1230:                       [--------+---][---+-----][---+---------][----------+---] [---------]
1231:                       [ 0      | I ][ 0 |  I  ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]

1233:                       (Note that StY_triu_strict is R)
1234:    Byrd, Nocedal, Schnabel 1994

1236: */
1237: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1238: {
1239:   Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1240:   Mat_DQN  *ldfp = (Mat_DQN *)lmvm->ctx;
1241:   PetscInt  m    = lmvm->m;
1242:   PetscInt  k    = ldfp->num_updates;
1243:   PetscInt  h    = k - oldest_update(m, k);
1244:   PetscInt  m_local;
1245:   Mat       J_local;

1247:   PetscFunctionBegin;
1248:   VecCheckSameSize(F, 2, dX, 3);
1249:   VecCheckMatCompatible(H, dX, 3, F, 2);

1251:   /* Cholesky Version */
1252:   /* Start with the B0 term */
1253:   PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1254:   if (!ldfp->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }

1256:   PetscCall(MatLMVMDDFPUpdateSolveData(H));

1258:   PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1259:   ldfp->St_count++;
1260:   PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, dX, ldfp->rwork2, 0, h));
1261:   ldfp->Yt_count++;
1262:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1263:     PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1264:     PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1265:   }

1267:   PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1268:   PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));

1270:   if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1271:   if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1272:   PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1273:   PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1274:   PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1275:   PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1276:   if (m_local) {
1277:     Mat J_local;

1279:     PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1280:     PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1281:   }
1282:   PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1283:   PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1284:   PetscCall(VecScale(ldfp->rwork3, -1.0));

1286:   PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1287:   PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));

1289:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1290:     PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1291:     PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1292:   }

1294:   PetscCall(MatMultAddColumnRange(ldfp->Sfull, ldfp->rwork1, dX, dX, 0, h));
1295:   ldfp->S_count++;
1296:   PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1297:   ldfp->Y_count++;
1298:   PetscFunctionReturn(PETSC_SUCCESS);
1299: }

1301: /* Solves for
1302:    (Theorem 1, Erway, Jain, and Marcia, 2013)

1304:    B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [   Y^T   ]
1305:                       ---------------------------------+--------] [---------]
1306:                       [             R^{-1}             |   0    ] [ S^T B_0 ]

1308:    (Note: R above is right triangular part of YTS)
1309:    which becomes,

1311:    [ I | -Y L^{-T} ] [  I  | 0 ] [ B_0 | 0 ] [ I | S ] [      I      ]
1312:                      [-----+---] [-----+---] [---+---] [-------------]
1313:                      [ S^T | I ] [  0  | D ] [ 0 | I ] [ -L^{-1} Y^T ]

1315:    (Note: L above is right triangular part of STY)

1317: */
1318: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1319: {
1320:   Mat_LMVM        *lmvm   = (Mat_LMVM *)B->data;
1321:   Mat_DQN         *ldfp   = (Mat_DQN *)lmvm->ctx;
1322:   Vec              rwork1 = ldfp->rwork1;
1323:   PetscInt         m      = lmvm->m;
1324:   PetscInt         k      = ldfp->num_updates;
1325:   PetscInt         h      = k - oldest_update(m, k);
1326:   PetscObjectState Xstate;

1328:   PetscFunctionBegin;
1329:   VecCheckSameSize(X, 2, Z, 3);
1330:   VecCheckMatCompatible(B, X, 2, Z, 3);

1332:   /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1333:   /* Block Version */
1334:   if (!ldfp->num_updates) {
1335:     PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1336:     PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1337:   }

1339:   PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1340:   PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, X, rwork1, 0, h));

1342:   /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1343:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1344:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1345:   PetscCall(VecScale(rwork1, -1.0));
1346:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));

1348:   PetscCall(VecCopy(X, ldfp->column_work));
1349:   PetscCall(MatMultAddColumnRange(ldfp->Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1350:   ldfp->S_count++;

1352:   PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1353:   PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));

1355:   PetscCall(MatMultHermitianTransposeAddColumnRange(ldfp->Sfull, Z, rwork1, rwork1, 0, h));
1356:   ldfp->St_count++;

1358:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1359:   PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1360:   PetscCall(VecScale(rwork1, -1.0));
1361:   if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));

1363:   PetscCall(MatMultAddColumnRange(ldfp->Yfull, rwork1, Z, Z, 0, h));
1364:   ldfp->Y_count++;
1365:   PetscFunctionReturn(PETSC_SUCCESS);
1366: }

1368: /*
1369:    This dense representation reduces the L-DFP update to a series of
1370:    matrix-vector products with dense matrices in lieu of the conventional
1371:    matrix-free two-loop algorithm.
1372: */
1373: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1374: {
1375:   Mat_LMVM *lmvm;
1376:   Mat_DQN  *ldfp;

1378:   PetscFunctionBegin;
1379:   PetscCall(MatCreate_LMVM(B));
1380:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1381:   PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1382:   PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1383:   B->ops->view           = MatView_LMVMDQN;
1384:   B->ops->setup          = MatSetUp_LMVMDQN;
1385:   B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1386:   B->ops->destroy        = MatDestroy_LMVMDQN;

1388:   lmvm                = (Mat_LMVM *)B->data;
1389:   lmvm->square        = PETSC_TRUE;
1390:   lmvm->ops->allocate = MatAllocate_LMVMDQN;
1391:   lmvm->ops->reset    = MatReset_LMVMDQN;
1392:   lmvm->ops->update   = MatUpdate_LMVMDQN;
1393:   lmvm->ops->mult     = MatMult_LMVMDDFP;
1394:   lmvm->ops->solve    = MatSolve_LMVMDDFP;
1395:   lmvm->ops->copy     = MatCopy_LMVMDQN;

1397:   PetscCall(PetscNew(&ldfp));
1398:   lmvm->ctx             = (void *)ldfp;
1399:   ldfp->allocated       = PETSC_FALSE;
1400:   ldfp->watchdog        = 0;
1401:   ldfp->max_seq_rejects = lmvm->m / 2;
1402:   ldfp->strategy        = MAT_LMVM_DENSE_INPLACE;
1403:   ldfp->scale_type      = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;

1405:   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
1406:   PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
1407:   PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
1408:   PetscFunctionReturn(PETSC_SUCCESS);
1409: }

1411: /*@
1412:   MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1413:   Davidon-Fletcher-Powell (DFP) approximation to a Hessian.

1415:   Collective

1417:   Input Parameters:
1418: + comm - MPI communicator
1419: . n    - number of local rows for storage vectors
1420: - N    - global size of the storage vectors

1422:   Output Parameter:
1423: . B - the matrix

1425:   Level: advanced

1427:   Note:
1428:   It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1429:   paradigm instead of this routine directly.

1431: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1432: @*/
1433: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1434: {
1435:   PetscFunctionBegin;
1436:   PetscCall(KSPInitializePackage());
1437:   PetscCall(MatCreate(comm, B));
1438:   PetscCall(MatSetSizes(*B, n, n, N, N));
1439:   PetscCall(MatSetType(*B, MATLMVMDDFP));
1440:   PetscCall(MatSetUp(*B));
1441:   PetscFunctionReturn(PETSC_SUCCESS);
1442: }

1444: /*@
1445:   MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`

1447:   Input Parameters:
1448: + B    - the `MATLMVM` matrix
1449: - type - scale type, see `MatLMVMDenseSetType`

1451:   Options Database Keys:
1452: + -mat_lqn_type   <reorder,inplace> - set the strategy
1453: . -mat_lbfgs_type <reorder,inplace> - set the strategy
1454: - -mat_ldfp_type  <reorder,inplace> - set the strategy

1456:   Level: intermediate

1458:   MatLMVMDenseTypes\:
1459: +   `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1460: -   `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement

1462: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1463: @*/
1464: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1465: {
1466:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1467:   Mat_DQN  *lqn  = (Mat_DQN *)lmvm->ctx;

1469:   PetscFunctionBegin;
1471:   lqn->strategy = type;
1472:   PetscFunctionReturn(PETSC_SUCCESS);
1473: }