Actual source code: denseqn.c
1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
3: #include <petscblaslapack.h>
4: #include <petscmat.h>
5: #include <petscsys.h>
6: #include <petscsystypes.h>
7: #include <petscis.h>
8: #include <petscoptions.h>
9: #include <petscdevice.h>
10: #include <petsc/private/deviceimpl.h>
12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);
19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
20: {
21: return idx % m;
22: }
24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
25: {
26: return (idx - num_updates) + PetscMin(m, num_updates);
27: }
29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
30: {
31: return PetscMax(0, idx - m);
32: }
34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
39: PetscBool isascii;
41: PetscFunctionBegin;
42: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
43: PetscCall(MatView_LMVM(B, pv));
44: if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatView(ldfp->diag_qn, pv)); }
45: if (isascii) { PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ", Y^T x: %" PetscInt_FMT "\n", ldfp->S_count, ldfp->St_count, ldfp->Y_count, ldfp->Yt_count)); }
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: static PetscErrorCode MatReset_LMVMDQN(Mat, PetscBool);
50: static PetscErrorCode MatAllocate_LMVMDQN(Mat B, Vec X, Vec F)
51: {
52: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
53: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
54: PetscBool is_dbfgs, is_ddfp, is_dqn, same, allocate = PETSC_FALSE;
55: VecType vec_type;
56: PetscInt m, n, M, N;
57: MPI_Comm comm = PetscObjectComm((PetscObject)B);
59: PetscFunctionBegin;
60: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
61: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
62: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
64: if (lmvm->allocated) {
65: PetscCall(VecGetType(X, &vec_type));
66: PetscCall(PetscObjectTypeCompare((PetscObject)lmvm->Xprev, vec_type, &same));
67: if (!same) {
68: /* Given X vector has a different type than allocated X-type data structures.
69: We need to destroy all of this and duplicate again out of the given vector. */
70: allocate = PETSC_TRUE;
71: PetscCall(MatReset_LMVMDQN(B, PETSC_TRUE));
72: } else {
73: VecCheckMatCompatible(B, X, 2, F, 3);
74: }
75: } else {
76: allocate = PETSC_TRUE;
77: }
78: if (allocate) {
79: PetscCall(VecGetLocalSize(X, &n));
80: PetscCall(VecGetSize(X, &N));
81: PetscCall(VecGetLocalSize(F, &m));
82: PetscCall(VecGetSize(F, &M));
83: PetscCheck(N == M, comm, PETSC_ERR_ARG_SIZ, "Incorrect problem sizes! dim(X) not equal to dim(F)");
84: PetscCall(MatSetSizes(B, m, n, M, N));
85: PetscCall(PetscLayoutSetUp(B->rmap));
86: PetscCall(PetscLayoutSetUp(B->cmap));
87: PetscCall(VecDuplicate(X, &lmvm->Xprev));
88: PetscCall(VecDuplicate(F, &lmvm->Fprev));
89: if (lmvm->m > 0) {
90: PetscMPIInt rank;
91: PetscInt m, M;
93: PetscCallMPI(MPI_Comm_rank(comm, &rank));
94: M = lmvm->m;
95: m = (rank == 0) ? M : 0;
97: /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
98: * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
99: * For DQN : Create all data eagerly */
100: PetscCall(VecGetType(X, &vec_type));
101: PetscCall(MatCreateDenseFromVecType(comm, vec_type, n, m, N, M, -1, NULL, &lqn->Sfull));
102: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->Yfull));
103: if (is_dqn) {
104: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
105: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
106: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
107: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
108: } else if (is_ddfp) {
109: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
110: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
111: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
112: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
113: } else if (is_dbfgs) {
114: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
115: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
116: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
117: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
118: } else {
119: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
120: }
121: /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
122: if (lqn->StY_triu) {
123: PetscCall(MatZeroEntries(lqn->StY_triu));
124: PetscCall(MatShift(lqn->StY_triu, 1.0));
125: }
126: if (lqn->YtS_triu) {
127: PetscCall(MatZeroEntries(lqn->YtS_triu));
128: PetscCall(MatShift(lqn->YtS_triu, 1.0));
129: }
130: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
131: PetscCall(VecDuplicateVecs(X, lmvm->m, &lqn->PQ));
132: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work2));
133: PetscCall(PetscMalloc1(lmvm->m, &lqn->yts));
134: if (is_dbfgs) {
135: PetscCall(PetscMalloc1(lmvm->m, &lqn->stp));
136: } else if (is_ddfp) {
137: PetscCall(PetscMalloc1(lmvm->m, &lqn->ytq));
138: }
139: }
140: PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
141: PetscCall(VecZeroEntries(lqn->rwork1));
142: PetscCall(VecZeroEntries(lqn->rwork2));
143: PetscCall(VecZeroEntries(lqn->rwork3));
144: PetscCall(VecZeroEntries(lqn->diag_vec));
145: }
146: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
147: if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatLMVMAllocate(lqn->diag_qn, X, F)); }
148: lmvm->allocated = PETSC_TRUE;
149: B->preallocated = PETSC_TRUE;
150: B->assembled = PETSC_TRUE;
151: }
152: PetscFunctionReturn(PETSC_SUCCESS);
153: }
155: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
156: {
157: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
158: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
160: PetscInt M, N;
161: MPI_Comm comm = PetscObjectComm((PetscObject)B);
162: Vec Xtmp, Ftmp;
164: PetscFunctionBegin;
165: PetscCall(MatGetSize(B, &M, &N));
166: PetscCheck(M != 0 && N != 0, comm, PETSC_ERR_ORDER, "MatSetSizes() must be called before MatSetUp()");
167: if (!lmvm->allocated) {
168: PetscCall(PetscLayoutSetUp(B->rmap));
169: PetscCall(PetscLayoutSetUp(B->cmap));
170: PetscCall(MatCreateVecs(B, &Xtmp, &Ftmp));
171: if (lmvm->m > 0) PetscCall(PetscMalloc1(lmvm->m, &lqn->workscalar));
172: PetscCall(MatAllocate_LMVMDQN(B, Xtmp, Ftmp));
173: PetscCall(VecDestroy(&Xtmp));
174: PetscCall(VecDestroy(&Ftmp));
175: }
176: PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: static PetscErrorCode MatSetFromOptions_LMVMDQN_Private(Mat B, PetscOptionItems *PetscOptionsObject)
180: {
181: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
182: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
183: PetscBool is_dbfgs, is_ddfp, is_dqn;
185: PetscFunctionBegin;
186: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
187: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
188: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
189: if (is_dqn) {
190: PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
191: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
192: } else if (is_dbfgs) {
193: PetscCall(PetscOptionsBool("-mat_lbfgs_recursive", "Use recursive formulation for MatMult_LMVMDBFGS, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
194: PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
195: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
196: } else if (is_ddfp) {
197: PetscCall(PetscOptionsBool("-mat_ldfp_recursive", "Use recursive formulation for MatSolve_LMVMDDFP, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
198: PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
199: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
200: } else {
201: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
202: }
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems *PetscOptionsObject)
207: {
208: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
209: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
211: PetscFunctionBegin;
212: PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
213: PetscOptionsBegin(PetscObjectComm((PetscObject)B), ((PetscObject)B)->prefix, "Dense QN method (MATLMVMDQN,MATLMVMDBFGS,MATLMVMDDFP)", NULL);
214: PetscCall(MatSetFromOptions_LMVMDQN_Private(B, PetscOptionsObject));
215: PetscOptionsEnd();
216: lqn->allocated = PETSC_FALSE;
217: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
218: const char *prefix;
220: PetscCall(MatGetOptionsPrefix(B, &prefix));
221: PetscCall(MatSetOptionsPrefix(lqn->diag_qn, prefix));
222: PetscCall(MatAppendOptionsPrefix(lqn->diag_qn, "J0_"));
223: PetscCall(MatSetFromOptions(lqn->diag_qn));
224: }
225: PetscFunctionReturn(PETSC_SUCCESS);
226: }
228: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
229: {
230: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
231: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
233: PetscFunctionBegin;
234: PetscCall(MatDestroy(&lqn->Sfull));
235: PetscCall(MatDestroy(&lqn->Yfull));
236: PetscCall(MatDestroy(&lqn->HY));
237: PetscCall(MatDestroy(&lqn->BS));
238: PetscCall(MatDestroy(&lqn->StY_triu));
239: PetscCall(MatDestroy(&lqn->YtS_triu));
240: PetscCall(VecDestroy(&lqn->StFprev));
241: PetscCall(VecDestroy(&lqn->Fprev_ref));
242: lqn->Fprev_state = 0;
243: PetscCall(MatDestroy(&lqn->YtS_triu_strict));
244: PetscCall(MatDestroy(&lqn->StY_triu_strict));
245: PetscCall(MatDestroy(&lqn->StBS));
246: PetscCall(MatDestroy(&lqn->YtHY));
247: PetscCall(MatDestroy(&lqn->J));
248: PetscCall(MatDestroy(&lqn->temp_mat));
249: PetscCall(VecDestroy(&lqn->diag_vec));
250: PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
251: PetscCall(VecDestroy(&lqn->inv_diag_vec));
252: PetscCall(VecDestroy(&lqn->column_work));
253: PetscCall(VecDestroy(&lqn->column_work2));
254: PetscCall(VecDestroy(&lqn->rwork1));
255: PetscCall(VecDestroy(&lqn->rwork2));
256: PetscCall(VecDestroy(&lqn->rwork3));
257: PetscCall(VecDestroy(&lqn->rwork2_local));
258: PetscCall(VecDestroy(&lqn->rwork3_local));
259: PetscCall(VecDestroy(&lqn->cyclic_work_vec));
260: PetscCall(VecDestroyVecs(lmvm->m, &lqn->PQ));
261: PetscCall(PetscFree(lqn->stp));
262: PetscCall(PetscFree(lqn->yts));
263: PetscCall(PetscFree(lqn->ytq));
264: lqn->allocated = PETSC_FALSE;
265: PetscFunctionReturn(PETSC_SUCCESS);
266: }
268: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
269: {
270: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
271: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
273: PetscFunctionBegin;
274: PetscCall(MatLMVMDQNResetDestructive(B));
275: PetscCall(PetscFree(lqn->workscalar));
276: PetscCall(MatDestroy(&lqn->diag_qn));
277: PetscCall(PetscFree(lmvm->ctx));
278: PetscCall(MatDestroy_LMVM(B));
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: static PetscErrorCode MatReset_LMVMDQN(Mat B, PetscBool destructive)
283: {
284: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
285: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
287: PetscFunctionBegin;
288: lqn->watchdog = 0;
289: lqn->needPQ = PETSC_TRUE;
290: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
291: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
292: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
293: if (!diagctx->allocated) PetscCall(MatLMVMAllocate(lqn->diag_qn, lmvm->Xprev, lmvm->Fprev));
294: PetscCall(MatLMVMReset(lqn->diag_qn, destructive));
295: }
296: if (lqn->Sfull) PetscCall(MatZeroEntries(lqn->Sfull));
297: if (lqn->Yfull) PetscCall(MatZeroEntries(lqn->Yfull));
298: if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
299: if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
300: if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
301: PetscCall(MatZeroEntries(lqn->StY_triu));
302: PetscCall(MatShift(lqn->StY_triu, 1.0));
303: }
304: if (lqn->YtS_triu) {
305: PetscCall(MatZeroEntries(lqn->YtS_triu));
306: PetscCall(MatShift(lqn->YtS_triu, 1.0));
307: }
308: if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
309: if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
310: if (lqn->StBS) {
311: PetscCall(MatZeroEntries(lqn->StBS));
312: PetscCall(MatShift(lqn->StBS, 1.0));
313: }
314: if (lqn->YtHY) {
315: PetscCall(MatZeroEntries(lqn->YtHY));
316: PetscCall(MatShift(lqn->YtHY, 1.0));
317: }
318: if (lqn->Fprev_ref) PetscCall(VecDestroy(&lqn->Fprev_ref));
319: lqn->Fprev_state = 0;
320: if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
321: if (destructive) { PetscCall(MatLMVMDQNResetDestructive(B)); }
322: lqn->num_updates = 0;
323: lqn->num_mult_updates = 0;
324: PetscCall(MatReset_LMVM(B, destructive));
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
329: {
330: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
331: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
332: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
333: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
335: PetscBool is_ddfp, is_dbfgs, is_dqn;
336: PetscDeviceContext dctx;
338: PetscFunctionBegin;
339: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
340: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
341: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
342: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
343: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
344: if (lmvm->prev_set) {
345: Vec FX[2];
346: PetscScalar dotFX[2];
347: PetscScalar stFprev;
348: PetscScalar curvature, yTy;
349: PetscReal curvtol;
350: Vec workvec1;
352: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
353: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
354: /* Test if the updates can be accepted */
355: FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
356: FX[1] = F; /* dotFX[1] = s^T F */
357: PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
358: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
359: PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
360: stFprev = PetscConj(dotFX[0]);
361: curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
362: if (PetscRealPart(yTy) < lmvm->eps) {
363: curvtol = 0.0;
364: } else {
365: curvtol = lmvm->eps * PetscRealPart(yTy);
366: }
367: if (PetscRealPart(curvature) > curvtol) {
368: PetscInt m = lmvm->m;
369: PetscInt k = lqn->num_updates;
370: PetscInt h_new = k + 1 - oldest_update(m, k + 1);
371: PetscInt idx = recycle_index(m, k);
372: PetscInt i, old_k;
374: /* Update is good, accept it */
375: lmvm->nupdates++;
376: lqn->num_updates++;
377: lqn->watchdog = 0;
378: lqn->needPQ = PETSC_TRUE;
379: old_k = lmvm->k;
381: if (lmvm->k != m - 1) {
382: lmvm->k++;
383: } else if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
384: if (is_dqn) {
385: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
386: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
387: } else if (is_dbfgs) {
388: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
389: } else if (is_ddfp) {
390: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
391: } else {
392: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
393: }
394: }
396: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
397: if (old_k == lmvm->k) {
398: for (i = 0; i <= lmvm->k - 1; ++i) {
399: lqn->yts[i] = lqn->yts[i + 1];
400: if (is_dbfgs) {
401: lqn->stp[i] = lqn->stp[i + 1];
402: } else if (is_ddfp) {
403: lqn->ytq[i] = lqn->ytq[i + 1];
404: }
405: }
406: }
407: lqn->yts[lmvm->k] = PetscRealPart(curvature);
408: }
410: /* First update the S^T matrix */
411: PetscCall(MatDenseGetColumnVecWrite(lqn->Sfull, idx, &workvec1));
412: PetscCall(VecCopy(lmvm->Xprev, workvec1));
413: PetscCall(MatDenseRestoreColumnVecWrite(lqn->Sfull, idx, &workvec1));
415: /* Now repeat update for the Y^T matrix */
416: PetscCall(MatDenseGetColumnVecWrite(lqn->Yfull, idx, &workvec1));
417: PetscCall(VecCopy(lmvm->Fprev, workvec1));
418: PetscCall(MatDenseRestoreColumnVecWrite(lqn->Yfull, idx, &workvec1));
420: if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the *
421: * H_k is immediately applied to F after begin updated. The S^T y computation can be split up as S^T (F - F_prev) */
422: PetscInt local_n;
423: PetscScalar *StFprev;
424: PetscMemType memtype;
425: PetscInt StYidx;
427: StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
428: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
429: PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
430: PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
431: if (local_n) {
432: if (PetscMemTypeHost(memtype)) {
433: StFprev[idx] = stFprev;
434: } else {
435: PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
436: PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
437: PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
438: }
439: }
440: PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));
442: {
443: Vec this_sy_col;
444: /* Now StFprev is updated for the new S vector. Write -StFprev into the appropriate row */
445: PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
446: PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));
448: /* Now compute the new StFprev */
449: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h_new));
450: lqn->St_count++;
452: /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
453: PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));
455: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
456: PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
457: }
458: }
460: if (is_ddfp || is_dqn) {
461: PetscInt YtSidx;
463: YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
465: {
466: Vec this_ys_col;
468: PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
469: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
470: lqn->Yt_count++;
472: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
473: PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
474: }
475: }
477: if (is_dbfgs || is_dqn) {
478: PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
479: } else if (is_ddfp) {
480: PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
481: } else {
482: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
483: }
485: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
486: if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
487: PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
488: PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
489: } else {
490: if (!lqn->diag_vec_recycle_order) {
491: PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
492: lqn->diag_vec_recycle_order = lqn->diag_vec;
493: }
494: }
496: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
497: PetscScalar sTy = curvature;
499: diagctx->sigma = PetscRealPart(sTy) / PetscRealPart(yTy);
500: } else if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
501: PetscScalar sTy = curvature;
502: PetscScalar sTDs, yTDy;
504: if (!diagctx->invD) {
505: PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->invD));
506: PetscCall(VecSet(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTy)));
507: }
508: if (!diagctx->U) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->U));
509: if (!diagctx->V) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->V));
510: if (!diagctx->W) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->W));
512: /* diagonal Broyden */
513: PetscCall(VecReciprocal(diagctx->invD));
514: PetscCall(VecPointwiseMult(diagctx->V, diagctx->invD, lmvm->Xprev));
515: PetscCall(VecPointwiseMult(diagctx->U, lmvm->Fprev, lmvm->Fprev));
516: if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->U));
517: PetscCall(VecAXPY(diagctx->invD, 1.0 / sTy, diagctx->U));
518: PetscCall(VecDot(diagctx->V, lmvm->Xprev, &sTDs));
519: if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->V));
520: PetscCall(VecPointwiseMult(diagctx->V, diagctx->V, diagctx->V));
521: PetscCall(VecAXPY(diagctx->invD, -1.0 / PetscMax(PetscRealPart(sTDs), diagctx->tol), diagctx->V));
522: PetscCall(VecReciprocal(diagctx->invD));
523: PetscCall(VecAbs(diagctx->invD));
524: PetscCall(VecDot(diagctx->U, diagctx->invD, &yTDy));
525: PetscCall(VecScale(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTDy)));
526: }
527: } else {
528: /* Update is bad, skip it */
529: ++lmvm->nrejects;
530: ++lqn->watchdog;
531: PetscInt m = lmvm->m;
532: PetscInt k = lqn->num_updates;
533: PetscInt h = k - oldest_update(m, k);
535: /* we still have to maintain StFprev */
536: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
537: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h));
538: lqn->St_count++;
539: }
540: } else {
541: switch (lqn->scale_type) {
542: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
543: PetscCall(VecSet(diagctx->invD, diagctx->delta));
544: break;
545: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
546: diagctx->sigma = diagctx->delta;
547: break;
548: default:
549: diagctx->sigma = 1.0;
550: break;
551: }
552: }
554: if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));
556: /* Save the solution and function to be used in the next update */
557: PetscCall(VecCopy(X, lmvm->Xprev));
558: PetscCall(VecCopy(F, lmvm->Fprev));
559: PetscCall(PetscObjectReference((PetscObject)F));
560: PetscCall(VecDestroy(&lqn->Fprev_ref));
561: lqn->Fprev_ref = F;
562: PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
563: lmvm->prev_set = PETSC_TRUE;
564: PetscFunctionReturn(PETSC_SUCCESS);
565: }
567: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
568: {
569: PetscFunctionBegin;
570: PetscCall(MatDestroy(dst));
571: if (src) { PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst)); }
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
576: {
577: PetscFunctionBegin;
578: PetscCall(VecDestroy(dst));
579: if (src) {
580: PetscCall(VecDuplicate(src, dst));
581: PetscCall(VecCopy(src, *dst));
582: }
583: PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
587: {
588: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
589: Mat_DQN *blqn = (Mat_DQN *)bdata->ctx;
590: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
591: Mat_DQN *mlqn = (Mat_DQN *)mdata->ctx;
592: PetscInt i;
593: PetscBool is_dbfgs, is_ddfp, is_dqn;
595: PetscFunctionBegin;
596: mlqn->num_updates = blqn->num_updates;
597: mlqn->num_mult_updates = blqn->num_mult_updates;
598: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
599: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
600: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
601: PetscCall(MatDestroyThenCopy(blqn->Sfull, &mlqn->Sfull));
602: PetscCall(MatDestroyThenCopy(blqn->Yfull, &mlqn->Yfull));
603: PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
604: PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
605: PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
606: PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
607: PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
608: PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
609: PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
610: PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
611: PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
612: PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
613: PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
614: PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
615: if (blqn->use_recursive && (is_dbfgs || is_ddfp)) {
616: for (i = 0; i <= bdata->k; i++) {
617: PetscCall(VecDestroyThenCopy(blqn->PQ[i], &mlqn->PQ[i]));
618: mlqn->yts[i] = blqn->yts[i];
619: if (is_dbfgs) {
620: mlqn->stp[i] = blqn->stp[i];
621: } else if (is_ddfp) {
622: mlqn->ytq[i] = blqn->ytq[i];
623: }
624: }
625: }
626: mlqn->dense_type = blqn->dense_type;
627: mlqn->strategy = blqn->strategy;
628: mlqn->scale_type = blqn->scale_type;
629: mlqn->S_count = 0;
630: mlqn->St_count = 0;
631: mlqn->Y_count = 0;
632: mlqn->Yt_count = 0;
633: mlqn->watchdog = blqn->watchdog;
634: mlqn->max_seq_rejects = blqn->max_seq_rejects;
635: mlqn->allocated = blqn->allocated;
636: mlqn->use_recursive = blqn->use_recursive;
637: mlqn->needPQ = blqn->needPQ;
638: PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
639: PetscCall(VecDestroy(&mlqn->Fprev_ref));
640: mlqn->Fprev_ref = blqn->Fprev_ref;
641: mlqn->Fprev_state = blqn->Fprev_state;
642: if (!(bdata->J0 || bdata->user_pc || bdata->user_ksp || bdata->user_scale)) { PetscCall(MatCopy(blqn->diag_qn, mlqn->diag_qn, SAME_NONZERO_PATTERN)); }
643: PetscFunctionReturn(PETSC_SUCCESS);
644: }
646: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
647: {
648: PetscFunctionBegin;
649: PetscCall(MatMult_LMVMDDFP(B, X, Z));
650: PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
654: {
655: PetscFunctionBegin;
656: PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
657: PetscFunctionReturn(PETSC_SUCCESS);
658: }
660: /*
661: This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
662: and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
663: results in avoiding costly Cholesky factorization, at the cost of duality cap.
664: Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
665: */
666: PetscErrorCode MatCreate_LMVMDQN(Mat B)
667: {
668: Mat_LMVM *lmvm;
669: Mat_DQN *lqn;
671: PetscFunctionBegin;
672: PetscCall(MatCreate_LMVM(B));
673: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
674: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
675: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
676: B->ops->view = MatView_LMVMDQN;
677: B->ops->setup = MatSetUp_LMVMDQN;
678: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
679: B->ops->destroy = MatDestroy_LMVMDQN;
681: lmvm = (Mat_LMVM *)B->data;
682: lmvm->square = PETSC_TRUE;
683: lmvm->ops->allocate = MatAllocate_LMVMDQN;
684: lmvm->ops->reset = MatReset_LMVMDQN;
685: lmvm->ops->update = MatUpdate_LMVMDQN;
686: lmvm->ops->mult = MatMult_LMVMDQN;
687: lmvm->ops->solve = MatSolve_LMVMDQN;
688: lmvm->ops->copy = MatCopy_LMVMDQN;
690: PetscCall(PetscNew(&lqn));
691: lmvm->ctx = (void *)lqn;
692: lqn->allocated = PETSC_FALSE;
693: lqn->use_recursive = PETSC_FALSE;
694: lqn->needPQ = PETSC_FALSE;
695: lqn->watchdog = 0;
696: lqn->max_seq_rejects = lmvm->m / 2;
697: lqn->strategy = MAT_LMVM_DENSE_INPLACE;
698: lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
700: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lqn->diag_qn));
701: PetscCall(MatSetType(lqn->diag_qn, MATLMVMDIAGBROYDEN));
702: PetscCall(MatSetOptionsPrefix(lqn->diag_qn, "J0_"));
703: PetscFunctionReturn(PETSC_SUCCESS);
704: }
706: /*@
707: MatCreateLMVMDQN - Creates a dense representation of the limited-memory
708: Quasi-Newton approximation to a Hessian.
710: Collective
712: Input Parameters:
713: + comm - MPI communicator
714: . n - number of local rows for storage vectors
715: - N - global size of the storage vectors
717: Output Parameter:
718: . B - the matrix
720: Level: advanced
722: Note:
723: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
724: paradigm instead of this routine directly.
726: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
727: @*/
728: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
729: {
730: PetscFunctionBegin;
731: PetscCall(KSPInitializePackage());
732: PetscCall(MatCreate(comm, B));
733: PetscCall(MatSetSizes(*B, n, n, N, N));
734: PetscCall(MatSetType(*B, MATLMVMDQN));
735: PetscCall(MatSetUp(*B));
736: PetscFunctionReturn(PETSC_SUCCESS);
737: }
739: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
740: {
741: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
742: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
744: PetscFunctionBegin;
745: if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
746: lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
747: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
748: } else {
749: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
750: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
752: switch (lqn->scale_type) {
753: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
754: PetscCall(VecAXPBY(Z, 1.0 / diagctx->sigma, 0.0, X));
755: break;
756: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
757: PetscCall(VecPointwiseDivide(Z, X, diagctx->invD));
758: break;
759: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
760: default:
761: PetscCall(VecCopy(X, Z));
762: break;
763: }
764: }
765: PetscFunctionReturn(PETSC_SUCCESS);
766: }
768: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
769: {
770: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
771: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
773: PetscFunctionBegin;
774: if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
775: lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
776: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
777: } else {
778: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
779: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
781: switch (lqn->scale_type) {
782: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
783: PetscCall(VecAXPBY(dX, diagctx->sigma, 0.0, F));
784: break;
785: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
786: PetscCall(VecPointwiseMult(dX, F, diagctx->invD));
787: break;
788: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
789: default:
790: PetscCall(VecCopy(F, dX));
791: break;
792: }
793: }
794: PetscFunctionReturn(PETSC_SUCCESS);
795: }
797: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
798: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
799: {
800: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
801: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
802: PetscInt m_local;
804: PetscFunctionBegin;
805: if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
806: PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
807: PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
808: PetscCall(MatGetLocalSize(result, &m_local, NULL));
809: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
810: PetscCall(MatConjugate(lbfgs->temp_mat));
811: if (m_local) {
812: Mat temp_local, YtS_local, result_local;
813: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
814: PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
815: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
816: PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
817: }
818: PetscCall(MatConjugate(result));
819: PetscFunctionReturn(PETSC_SUCCESS);
820: }
822: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
823: {
824: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
825: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
826: PetscInt m = lmvm->m, m_local;
827: PetscInt k = lbfgs->num_updates;
828: PetscInt h = k - oldest_update(m, k);
829: PetscInt j_0;
830: PetscInt prev_oldest;
831: Mat J_local;
833: PetscFunctionBegin;
834: if (!lbfgs->YtS_triu_strict) {
835: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
836: PetscCall(MatDestroy(&lbfgs->StBS));
837: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
838: PetscCall(MatDestroy(&lbfgs->J));
839: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
840: PetscCall(MatDestroy(&lbfgs->BS));
841: PetscCall(MatDuplicate(lbfgs->Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
842: PetscCall(MatShift(lbfgs->StBS, 1.0));
843: lbfgs->num_mult_updates = oldest_update(m, k);
844: }
845: if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
847: /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
848: for (PetscInt j = oldest_update(m, k); j < k; j++) {
849: Vec s_j;
850: Vec Bs_j;
851: Vec StBs_j;
852: PetscInt S_idx = recycle_index(m, j);
853: PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
855: PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
856: PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
857: PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
858: PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
859: PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
860: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Bs_j, StBs_j, 0, h));
861: lbfgs->St_count++;
862: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
863: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
864: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
865: }
866: prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
867: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
868: /* move the YtS entries that have been computed and need to be kept back up */
869: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
871: PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
872: }
873: PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
874: j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
875: for (PetscInt j = j_0; j < k; j++) {
876: PetscInt S_idx = recycle_index(m, j);
877: PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
878: Vec s_j, Yts_j;
880: PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
881: PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
882: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, s_j, Yts_j, 0, h));
883: lbfgs->Yt_count++;
884: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
885: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
886: PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
887: /* zero the corresponding row */
888: if (m_local > 0) {
889: Mat YtS_local, YtS_row;
891: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
892: PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
893: PetscCall(MatZeroEntries(YtS_row));
894: PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
895: }
896: }
897: if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
898: PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
899: PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
900: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
901: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
902: PetscCall(MatGetLDLT(B, lbfgs->J));
903: PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
904: if (m_local) {
905: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
906: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
907: }
908: lbfgs->num_mult_updates = lbfgs->num_updates;
909: PetscFunctionReturn(PETSC_SUCCESS);
910: }
912: /* Solves for
913: * [ I | -S R^{-T} ] [ I | 0 ] [ H_0 | 0 ] [ I | Y ] [ I ]
914: * [-----+---] [-----+---] [---+---] [-------------]
915: * [ Y^T | I ] [ 0 | D ] [ 0 | I ] [ -R^{-1} S^T ] */
917: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
918: {
919: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
920: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
921: Vec rwork1 = lbfgs->rwork1;
922: PetscInt m = lmvm->m;
923: PetscInt k = lbfgs->num_updates;
924: PetscInt h = k - oldest_update(m, k);
925: PetscObjectState Fstate;
927: PetscFunctionBegin;
928: VecCheckSameSize(F, 2, dX, 3);
929: VecCheckMatCompatible(H, dX, 3, F, 2);
931: /* Block Version */
932: if (!lbfgs->num_updates) {
933: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
934: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
935: }
937: PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
938: if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
939: PetscCall(VecCopy(lbfgs->StFprev, rwork1));
940: } else {
941: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, F, rwork1, 0, h));
942: lbfgs->St_count++;
943: }
945: /* Reordering rwork1, as STY is in history order, while S is in recycled order */
946: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
947: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
948: PetscCall(VecScale(rwork1, -1.0));
949: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
951: PetscCall(VecCopy(F, lbfgs->column_work));
952: PetscCall(MatMultAddColumnRange(lbfgs->Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
953: lbfgs->Y_count++;
955: PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
956: PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));
958: PetscCall(MatMultHermitianTransposeAddColumnRange(lbfgs->Yfull, dX, rwork1, rwork1, 0, h));
959: lbfgs->Yt_count++;
961: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
962: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
963: PetscCall(VecScale(rwork1, -1.0));
964: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
966: PetscCall(MatMultAddColumnRange(lbfgs->Sfull, rwork1, dX, dX, 0, h));
967: lbfgs->S_count++;
968: PetscFunctionReturn(PETSC_SUCCESS);
969: }
971: /* Solves for
972: B_0 - [ Y | B_0 S] [ -D | L^T ]^-1 [ Y^T ]
973: [-----+-----------] [---------]
974: [ L | S^T B_0 S ] [ S^T B_0 ]
976: Above is equivalent to
978: B_0 - [ Y | B_0 S] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} L^T ]]^-1 [ Y^T ]
979: [[-----------+---][-----+---][---+-------------]] [---------]
980: [[ -L D^{-1} | I ][ 0 | J ][ 0 | I ]] [ S^T B_0 ]
982: where J = S^T B_0 S + L D^{-1} L^T
984: becomes
986: B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1} | 0 ][ I | 0 ] [ Y^T ]
987: [---+------------][----------+--------][----------+---] [---------]
988: [ 0 | I ][ 0 | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
990: =
992: B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I | 0 ][ I | 0 ] [ Y^T ]
993: [--------+---][---+-----][---+---------][----------+---] [---------]
994: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
996: (Note that YtS_triu_strict is L^T)
997: Byrd, Nocedal, Schnabel 1994
999: Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
1000: (See ddfp.c's MatMult_LMVMDDFP)
1002: */
1003: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
1004: {
1005: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1006: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
1007: Mat J_local;
1008: PetscInt idx, i, j, m_local, local_n;
1009: PetscInt m = lmvm->m;
1010: PetscInt k = lbfgs->num_updates;
1011: PetscInt h = k - oldest_update(m, k);
1013: PetscFunctionBegin;
1014: VecCheckSameSize(X, 2, Z, 3);
1015: VecCheckMatCompatible(B, X, 2, Z, 3);
1017: /* Cholesky Version */
1018: /* Start with the B0 term */
1019: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1020: if (!lbfgs->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
1022: if (lbfgs->use_recursive) {
1023: PetscDeviceContext dctx;
1024: PetscMemType memtype;
1025: PetscScalar stz, ytx, stp, sjtpi, yjtsi, *workscalar;
1026: PetscInt oldest = oldest_update(m, k);
1028: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1029: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1030: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
1031: lbfgs->Yt_count++;
1033: PetscCall(VecGetLocalSize(lbfgs->rwork1, &local_n));
1035: if (lbfgs->needPQ) {
1036: PetscInt oldest = oldest_update(m, k);
1037: for (i = 0; i <= lmvm->k; ++i) {
1038: idx = recycle_index(m, i + oldest);
1039: /* column_work = S[idx] */
1040: PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work, idx));
1041: PetscCall(MatDQNApplyJ0Fwd(B, lbfgs->column_work, lbfgs->PQ[idx]));
1042: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, lbfgs->column_work, lbfgs->rwork3, 0, h));
1043: PetscCall(VecGetArrayAndMemType(lbfgs->rwork3, &workscalar, &memtype));
1044: for (j = 0; j < i; ++j) {
1045: PetscInt idx_j = recycle_index(m, j + oldest);
1046: /* Copy yjtsi in device-aware manner */
1047: if (local_n) {
1048: if (PetscMemTypeHost(memtype)) {
1049: yjtsi = workscalar[idx_j];
1050: } else {
1051: PetscCall(PetscDeviceRegisterMemory(&yjtsi, PETSC_MEMTYPE_HOST, sizeof(yjtsi)));
1052: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1053: PetscCall(PetscDeviceArrayCopy(dctx, &yjtsi, &workscalar[idx_j], 1));
1054: }
1055: }
1056: PetscCallMPI(MPI_Bcast(&yjtsi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
1057: /* column_work2 = S[j] */
1058: PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work2, idx_j));
1059: PetscCall(VecDot(lbfgs->column_work2, lbfgs->PQ[idx], &sjtpi));
1060: /* column_work2 = Y[j] */
1061: PetscCall(MatGetColumnVector(lbfgs->Yfull, lbfgs->column_work2, idx_j));
1062: /* Compute the pure BFGS component of the forward product */
1063: PetscCall(VecAXPBYPCZ(lbfgs->PQ[idx], -PetscRealPart(sjtpi) / lbfgs->stp[idx_j], PetscRealPart(yjtsi) / lbfgs->yts[j], 1.0, lbfgs->PQ[idx_j], lbfgs->column_work2));
1064: }
1065: PetscCall(VecDot(lbfgs->column_work, lbfgs->PQ[idx], &stp));
1066: lbfgs->stp[idx] = PetscRealPart(stp);
1067: }
1068: lbfgs->needPQ = PETSC_FALSE;
1069: }
1071: PetscCall(VecGetArrayAndMemType(lbfgs->rwork1, &workscalar, &memtype));
1072: for (i = 0; i <= lmvm->k; ++i) {
1073: idx = recycle_index(m, i + oldest);
1074: /* Copy stz[i], ytx[i] in device-aware manner */
1075: if (local_n) {
1076: if (PetscMemTypeHost(memtype)) {
1077: ytx = workscalar[idx];
1078: } else {
1079: PetscCall(PetscDeviceRegisterMemory(&ytx, PETSC_MEMTYPE_HOST, 1 * sizeof(ytx)));
1080: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1081: PetscCall(PetscDeviceArrayCopy(dctx, &ytx, &workscalar[idx], 1));
1082: }
1083: }
1084: PetscCallMPI(MPI_Bcast(&ytx, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
1085: /* column_work : S[i], column_work2 : Y[i] */
1086: PetscCall(MatGetColumnVector(lbfgs->Sfull, lbfgs->column_work, idx));
1087: PetscCall(MatGetColumnVector(lbfgs->Yfull, lbfgs->column_work2, idx));
1088: PetscCall(VecDot(lbfgs->column_work, Z, &stz));
1089: PetscCall(VecAXPBYPCZ(Z, -PetscRealPart(stz) / lbfgs->stp[idx], PetscRealPart(ytx) / lbfgs->yts[i], 1.0, lbfgs->PQ[idx], lbfgs->column_work2));
1090: }
1091: PetscCall(VecRestoreArrayAndMemType(lbfgs->rwork1, &workscalar));
1092: } else {
1093: PetscCall(MatLMVMDBFGSUpdateMultData(B));
1094: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
1095: lbfgs->Yt_count++;
1096: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Z, lbfgs->rwork2, 0, h));
1097: lbfgs->St_count++;
1098: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1099: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1100: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1101: }
1103: PetscCall(VecPointwiseMult(lbfgs->rwork3, lbfgs->rwork1, lbfgs->inv_diag_vec));
1104: PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2, lbfgs->rwork2));
1106: if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
1107: if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
1108: PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
1109: PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
1110: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
1111: PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
1112: if (m_local) {
1113: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
1114: PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
1115: }
1116: PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
1117: PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
1118: PetscCall(VecScale(lbfgs->rwork3, -1.0));
1120: PetscCall(MatMultAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork1, lbfgs->rwork1));
1121: PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
1123: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1124: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1125: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1126: }
1128: PetscCall(MatMultAddColumnRange(lbfgs->Yfull, lbfgs->rwork1, Z, Z, 0, h));
1129: lbfgs->Y_count++;
1130: PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
1131: lbfgs->S_count++;
1132: }
1133: PetscFunctionReturn(PETSC_SUCCESS);
1134: }
1136: /*
1137: This dense representation reduces the L-BFGS update to a series of
1138: matrix-vector products with dense matrices in lieu of the conventional matrix-free
1139: two-loop algorithm.
1140: */
1141: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
1142: {
1143: Mat_LMVM *lmvm;
1144: Mat_DQN *lbfgs;
1146: PetscFunctionBegin;
1147: PetscCall(MatCreate_LMVM(B));
1148: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
1149: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1150: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1151: B->ops->view = MatView_LMVMDQN;
1152: B->ops->setup = MatSetUp_LMVMDQN;
1153: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1154: B->ops->destroy = MatDestroy_LMVMDQN;
1156: lmvm = (Mat_LMVM *)B->data;
1157: lmvm->square = PETSC_TRUE;
1158: lmvm->ops->allocate = MatAllocate_LMVMDQN;
1159: lmvm->ops->reset = MatReset_LMVMDQN;
1160: lmvm->ops->update = MatUpdate_LMVMDQN;
1161: lmvm->ops->mult = MatMult_LMVMDBFGS;
1162: lmvm->ops->solve = MatSolve_LMVMDBFGS;
1163: lmvm->ops->copy = MatCopy_LMVMDQN;
1165: PetscCall(PetscNew(&lbfgs));
1166: lmvm->ctx = (void *)lbfgs;
1167: lbfgs->allocated = PETSC_FALSE;
1168: lbfgs->use_recursive = PETSC_TRUE;
1169: lbfgs->needPQ = PETSC_TRUE;
1170: lbfgs->watchdog = 0;
1171: lbfgs->max_seq_rejects = lmvm->m / 2;
1172: lbfgs->strategy = MAT_LMVM_DENSE_INPLACE;
1173: lbfgs->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1175: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lbfgs->diag_qn));
1176: PetscCall(MatSetType(lbfgs->diag_qn, MATLMVMDIAGBROYDEN));
1177: PetscCall(MatSetOptionsPrefix(lbfgs->diag_qn, "J0_"));
1178: PetscFunctionReturn(PETSC_SUCCESS);
1179: }
1181: /*@
1182: MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1183: Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.
1185: Collective
1187: Input Parameters:
1188: + comm - MPI communicator
1189: . n - number of local rows for storage vectors
1190: - N - global size of the storage vectors
1192: Output Parameter:
1193: . B - the matrix
1195: Level: advanced
1197: Note:
1198: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1199: paradigm instead of this routine directly.
1201: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1202: @*/
1203: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1204: {
1205: PetscFunctionBegin;
1206: PetscCall(KSPInitializePackage());
1207: PetscCall(MatCreate(comm, B));
1208: PetscCall(MatSetSizes(*B, n, n, N, N));
1209: PetscCall(MatSetType(*B, MATLMVMDBFGS));
1210: PetscCall(MatSetUp(*B));
1211: PetscFunctionReturn(PETSC_SUCCESS);
1212: }
1214: /* here R is strictly upper triangular part of STY */
1215: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1216: {
1217: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1218: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1219: PetscInt m_local;
1221: PetscFunctionBegin;
1222: if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1223: PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1224: PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1225: PetscCall(MatGetLocalSize(result, &m_local, NULL));
1226: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1227: PetscCall(MatConjugate(ldfp->temp_mat));
1228: if (m_local) {
1229: Mat temp_local, StY_local, result_local;
1230: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1231: PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1232: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1233: PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
1234: }
1235: PetscCall(MatConjugate(result));
1236: PetscFunctionReturn(PETSC_SUCCESS);
1237: }
1239: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1240: {
1241: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1242: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1243: PetscInt m = lmvm->m, m_local;
1244: PetscInt k = ldfp->num_updates;
1245: PetscInt h = k - oldest_update(m, k);
1246: PetscInt j_0;
1247: PetscInt prev_oldest;
1248: Mat J_local;
1250: PetscFunctionBegin;
1251: if (!ldfp->StY_triu_strict) {
1252: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1253: PetscCall(MatDestroy(&ldfp->YtHY));
1254: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1255: PetscCall(MatDestroy(&ldfp->J));
1256: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1257: PetscCall(MatDestroy(&ldfp->HY));
1258: PetscCall(MatDuplicate(ldfp->Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1259: PetscCall(MatShift(ldfp->YtHY, 1.0));
1260: ldfp->num_mult_updates = oldest_update(m, k);
1261: }
1262: if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
1264: /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1265: for (PetscInt j = oldest_update(m, k); j < k; j++) {
1266: Vec y_j;
1267: Vec Hy_j;
1268: Vec YtHy_j;
1269: PetscInt Y_idx = recycle_index(m, j);
1270: PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1272: PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1273: PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1274: PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1275: PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1276: PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1277: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, Hy_j, YtHy_j, 0, h));
1278: ldfp->Yt_count++;
1279: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1280: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1281: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1282: }
1283: prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1284: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1285: /* move the YtS entries that have been computed and need to be kept back up */
1286: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
1288: PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1289: }
1290: PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1291: j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1292: for (PetscInt j = j_0; j < k; j++) {
1293: PetscInt Y_idx = recycle_index(m, j);
1294: PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1295: Vec y_j, Sty_j;
1297: PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1298: PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1299: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, y_j, Sty_j, 0, h));
1300: ldfp->St_count++;
1301: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1302: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1303: PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1304: /* zero the corresponding row */
1305: if (m_local > 0) {
1306: Mat StY_local, StY_row;
1308: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1309: PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1310: PetscCall(MatZeroEntries(StY_row));
1311: PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1312: }
1313: }
1314: if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1315: PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1316: PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1317: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1318: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1319: PetscCall(MatGetRTDR(B, ldfp->J));
1320: PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1321: if (m_local) {
1322: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1323: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1324: }
1325: ldfp->num_mult_updates = ldfp->num_updates;
1326: PetscFunctionReturn(PETSC_SUCCESS);
1327: }
1329: /* Solves for
1331: H_0 - [ S | H_0 Y] [ -D | R.T ]^-1 [ S^T ]
1332: [-----+-----------] [---------]
1333: [ R | Y^T H_0 Y ] [ Y^T H_0 ]
1335: Above is equivalent to
1337: H_0 - [ S | H_0 Y] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [ S^T ]
1338: [[-----------+---][----+---][---+-------------]] [---------]
1339: [[ -R D^{-1} | I ][ 0 | J ][ 0 | I ]] [ Y^T H_0 ]
1341: where J = Y^T H_0 Y + R D^{-1} R.T
1343: becomes
1345: H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1} | 0 ][ I | 0 ] [ S^T ]
1346: [---+------------][----------+--------][----------+---] [---------]
1347: [ 0 | I ][ 0 | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1349: =
1351: H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I | 0 ][ I | 0 ] [ S^T ]
1352: [--------+---][---+-----][---+---------][----------+---] [---------]
1353: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1355: (Note that StY_triu_strict is R)
1356: Byrd, Nocedal, Schnabel 1994
1358: */
1359: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1360: {
1361: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1362: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1363: PetscInt m = lmvm->m;
1364: PetscInt k = ldfp->num_updates;
1365: PetscInt h = k - oldest_update(m, k);
1366: PetscInt idx, i, j, local_n;
1367: PetscInt m_local;
1368: Mat J_local;
1370: PetscFunctionBegin;
1371: VecCheckSameSize(F, 2, dX, 3);
1372: VecCheckMatCompatible(H, dX, 3, F, 2);
1374: /* Cholesky Version */
1375: /* Start with the B0 term */
1376: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1377: if (!ldfp->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
1379: if (ldfp->use_recursive) {
1380: PetscDeviceContext dctx;
1381: PetscMemType memtype;
1382: PetscScalar stf, ytx, ytq, yjtqi, sjtyi, *workscalar;
1384: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1385: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1386: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1387: ldfp->Yt_count++;
1389: PetscCall(VecGetLocalSize(ldfp->rwork1, &local_n));
1391: PetscInt oldest = oldest_update(m, k);
1393: if (ldfp->needPQ) {
1394: PetscInt oldest = oldest_update(m, k);
1395: for (i = 0; i <= lmvm->k; ++i) {
1396: idx = recycle_index(m, i + oldest);
1397: /* column_work = S[idx] */
1398: PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work, idx));
1399: PetscCall(MatDQNApplyJ0Inv(H, ldfp->column_work, ldfp->PQ[idx]));
1400: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, ldfp->column_work, ldfp->rwork3, 0, h));
1401: PetscCall(VecGetArrayAndMemType(ldfp->rwork3, &workscalar, &memtype));
1402: for (j = 0; j < i; ++j) {
1403: PetscInt idx_j = recycle_index(m, j + oldest);
1404: /* Copy sjtyi in device-aware manner */
1405: if (local_n) {
1406: if (PetscMemTypeHost(memtype)) {
1407: sjtyi = workscalar[idx_j];
1408: } else {
1409: PetscCall(PetscDeviceRegisterMemory(&sjtyi, PETSC_MEMTYPE_HOST, 1 * sizeof(sjtyi)));
1410: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1411: PetscCall(PetscDeviceArrayCopy(dctx, &sjtyi, &workscalar[idx_j], 1));
1412: }
1413: }
1414: PetscCallMPI(MPI_Bcast(&sjtyi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1415: /* column_work2 = Y[j] */
1416: PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work2, idx_j));
1417: PetscCall(VecDot(ldfp->column_work2, ldfp->PQ[idx], &yjtqi));
1418: /* column_work2 = Y[j] */
1419: PetscCall(MatGetColumnVector(ldfp->Sfull, ldfp->column_work2, idx_j));
1420: /* Compute the pure BFGS component of the forward product */
1421: PetscCall(VecAXPBYPCZ(ldfp->PQ[idx], -PetscRealPart(yjtqi) / ldfp->ytq[idx_j], PetscRealPart(sjtyi) / ldfp->yts[j], 1.0, ldfp->PQ[idx_j], ldfp->column_work2));
1422: }
1423: PetscCall(VecDot(ldfp->column_work, ldfp->PQ[idx], &ytq));
1424: ldfp->ytq[idx] = PetscRealPart(ytq);
1425: }
1426: ldfp->needPQ = PETSC_FALSE;
1427: }
1429: PetscCall(VecGetArrayAndMemType(ldfp->rwork1, &workscalar, &memtype));
1430: for (i = 0; i <= lmvm->k; ++i) {
1431: idx = recycle_index(m, i + oldest);
1432: /* Copy stz[i], ytx[i] in device-aware manner */
1433: if (local_n) {
1434: if (PetscMemTypeHost(memtype)) {
1435: stf = workscalar[idx];
1436: } else {
1437: PetscCall(PetscDeviceRegisterMemory(&stf, PETSC_MEMTYPE_HOST, sizeof(stf)));
1438: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1439: PetscCall(PetscDeviceArrayCopy(dctx, &stf, &workscalar[idx], 1));
1440: }
1441: }
1442: PetscCallMPI(MPI_Bcast(&stf, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1443: /* column_work : S[i], column_work2 : Y[i] */
1444: PetscCall(MatGetColumnVector(ldfp->Sfull, ldfp->column_work, idx));
1445: PetscCall(MatGetColumnVector(ldfp->Yfull, ldfp->column_work2, idx));
1446: PetscCall(VecDot(ldfp->column_work2, dX, &ytx));
1447: PetscCall(VecAXPBYPCZ(dX, -PetscRealPart(ytx) / ldfp->ytq[idx], PetscRealPart(stf) / ldfp->yts[i], 1.0, ldfp->PQ[idx], ldfp->column_work));
1448: }
1449: PetscCall(VecRestoreArrayAndMemType(ldfp->rwork1, &workscalar));
1450: } else {
1451: PetscCall(MatLMVMDDFPUpdateSolveData(H));
1452: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1453: ldfp->St_count++;
1454: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, dX, ldfp->rwork2, 0, h));
1455: ldfp->Yt_count++;
1456: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1457: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1458: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1459: }
1461: PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1462: PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1464: if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1465: if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1466: PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1467: PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1468: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1469: PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1470: if (m_local) {
1471: Mat J_local;
1473: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1474: PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1475: }
1476: PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1477: PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1478: PetscCall(VecScale(ldfp->rwork3, -1.0));
1480: PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1481: PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));
1483: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1484: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1485: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1486: }
1488: PetscCall(MatMultAddColumnRange(ldfp->Sfull, ldfp->rwork1, dX, dX, 0, h));
1489: ldfp->S_count++;
1490: PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1491: ldfp->Y_count++;
1492: }
1493: PetscFunctionReturn(PETSC_SUCCESS);
1494: }
1496: /* Solves for
1497: (Theorem 1, Erway, Jain, and Marcia, 2013)
1499: B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [ Y^T ]
1500: ---------------------------------+--------] [---------]
1501: [ R^{-1} | 0 ] [ S^T B_0 ]
1503: (Note: R above is right triangular part of YTS)
1504: which becomes,
1506: [ I | -Y L^{-T} ] [ I | 0 ] [ B_0 | 0 ] [ I | S ] [ I ]
1507: [-----+---] [-----+---] [---+---] [-------------]
1508: [ S^T | I ] [ 0 | D ] [ 0 | I ] [ -L^{-1} Y^T ]
1510: (Note: L above is right triangular part of STY)
1512: */
1513: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1514: {
1515: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1516: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1517: Vec rwork1 = ldfp->rwork1;
1518: PetscInt m = lmvm->m;
1519: PetscInt k = ldfp->num_updates;
1520: PetscInt h = k - oldest_update(m, k);
1521: PetscObjectState Xstate;
1523: PetscFunctionBegin;
1524: VecCheckSameSize(X, 2, Z, 3);
1525: VecCheckMatCompatible(B, X, 2, Z, 3);
1527: /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1528: /* Block Version */
1529: if (!ldfp->num_updates) {
1530: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1531: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1532: }
1534: PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1535: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, X, rwork1, 0, h));
1537: /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1538: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1539: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1540: PetscCall(VecScale(rwork1, -1.0));
1541: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1543: PetscCall(VecCopy(X, ldfp->column_work));
1544: PetscCall(MatMultAddColumnRange(ldfp->Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1545: ldfp->S_count++;
1547: PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1548: PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));
1550: PetscCall(MatMultHermitianTransposeAddColumnRange(ldfp->Sfull, Z, rwork1, rwork1, 0, h));
1551: ldfp->St_count++;
1553: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1554: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1555: PetscCall(VecScale(rwork1, -1.0));
1556: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1558: PetscCall(MatMultAddColumnRange(ldfp->Yfull, rwork1, Z, Z, 0, h));
1559: ldfp->Y_count++;
1560: PetscFunctionReturn(PETSC_SUCCESS);
1561: }
1563: /*
1564: This dense representation reduces the L-DFP update to a series of
1565: matrix-vector products with dense matrices in lieu of the conventional
1566: matrix-free two-loop algorithm.
1567: */
1568: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1569: {
1570: Mat_LMVM *lmvm;
1571: Mat_DQN *ldfp;
1573: PetscFunctionBegin;
1574: PetscCall(MatCreate_LMVM(B));
1575: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1576: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1577: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1578: B->ops->view = MatView_LMVMDQN;
1579: B->ops->setup = MatSetUp_LMVMDQN;
1580: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1581: B->ops->destroy = MatDestroy_LMVMDQN;
1583: lmvm = (Mat_LMVM *)B->data;
1584: lmvm->square = PETSC_TRUE;
1585: lmvm->ops->allocate = MatAllocate_LMVMDQN;
1586: lmvm->ops->reset = MatReset_LMVMDQN;
1587: lmvm->ops->update = MatUpdate_LMVMDQN;
1588: lmvm->ops->mult = MatMult_LMVMDDFP;
1589: lmvm->ops->solve = MatSolve_LMVMDDFP;
1590: lmvm->ops->copy = MatCopy_LMVMDQN;
1592: PetscCall(PetscNew(&ldfp));
1593: lmvm->ctx = (void *)ldfp;
1594: ldfp->allocated = PETSC_FALSE;
1595: ldfp->watchdog = 0;
1596: ldfp->max_seq_rejects = lmvm->m / 2;
1597: ldfp->strategy = MAT_LMVM_DENSE_INPLACE;
1598: ldfp->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1599: ldfp->use_recursive = PETSC_TRUE;
1600: ldfp->needPQ = PETSC_TRUE;
1602: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
1603: PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
1604: PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
1605: PetscFunctionReturn(PETSC_SUCCESS);
1606: }
1608: /*@
1609: MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1610: Davidon-Fletcher-Powell (DFP) approximation to a Hessian.
1612: Collective
1614: Input Parameters:
1615: + comm - MPI communicator
1616: . n - number of local rows for storage vectors
1617: - N - global size of the storage vectors
1619: Output Parameter:
1620: . B - the matrix
1622: Level: advanced
1624: Note:
1625: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1626: paradigm instead of this routine directly.
1628: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1629: @*/
1630: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1631: {
1632: PetscFunctionBegin;
1633: PetscCall(KSPInitializePackage());
1634: PetscCall(MatCreate(comm, B));
1635: PetscCall(MatSetSizes(*B, n, n, N, N));
1636: PetscCall(MatSetType(*B, MATLMVMDDFP));
1637: PetscCall(MatSetUp(*B));
1638: PetscFunctionReturn(PETSC_SUCCESS);
1639: }
1641: /*@
1642: MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`
1644: Input Parameters:
1645: + B - the `MATLMVM` matrix
1646: - type - scale type, see `MatLMVMDenseSetType`
1648: Options Database Keys:
1649: + -mat_lqn_type <reorder,inplace> - set the strategy
1650: . -mat_lbfgs_type <reorder,inplace> - set the strategy
1651: - -mat_ldfp_type <reorder,inplace> - set the strategy
1653: Level: intermediate
1655: MatLMVMDenseTypes\:
1656: + `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1657: - `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement
1659: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1660: @*/
1661: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1662: {
1663: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1664: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
1666: PetscFunctionBegin;
1668: lqn->strategy = type;
1669: PetscFunctionReturn(PETSC_SUCCESS);
1670: }