Actual source code: denseqn.c
1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
2: #include <../src/ksp/ksp/utils/lmvm/diagbrdn/diagbrdn.h>
3: #include <petscblaslapack.h>
4: #include <petscmat.h>
5: #include <petscsys.h>
6: #include <petscsystypes.h>
7: #include <petscis.h>
8: #include <petscoptions.h>
9: #include <petscdevice.h>
10: #include <petsc/private/deviceimpl.h>
12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);
19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
20: {
21: return idx % m;
22: }
24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
25: {
26: return (idx - num_updates) + PetscMin(m, num_updates);
27: }
29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
30: {
31: return PetscMax(0, idx - m);
32: }
34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
39: PetscBool isascii;
41: PetscFunctionBegin;
42: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
43: PetscCall(MatView_LMVM(B, pv));
44: if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatView(ldfp->diag_qn, pv)); }
45: if (isascii) { PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ", Y^T x: %" PetscInt_FMT "\n", ldfp->S_count, ldfp->St_count, ldfp->Y_count, ldfp->Yt_count)); }
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: static PetscErrorCode MatReset_LMVMDQN(Mat, PetscBool);
50: static PetscErrorCode MatAllocate_LMVMDQN(Mat B, Vec X, Vec F)
51: {
52: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
53: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
54: PetscBool is_dbfgs, is_ddfp, is_dqn, same, allocate = PETSC_FALSE;
55: VecType vec_type;
56: PetscInt m, n, M, N;
57: MPI_Comm comm = PetscObjectComm((PetscObject)B);
59: PetscFunctionBegin;
60: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
61: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
62: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
64: if (lmvm->allocated) {
65: PetscCall(VecGetType(X, &vec_type));
66: PetscCall(PetscObjectTypeCompare((PetscObject)lmvm->Xprev, vec_type, &same));
67: if (!same) {
68: /* Given X vector has a different type than allocated X-type data structures.
69: We need to destroy all of this and duplicate again out of the given vector. */
70: allocate = PETSC_TRUE;
71: PetscCall(MatReset_LMVMDQN(B, PETSC_TRUE));
72: } else {
73: VecCheckMatCompatible(B, X, 2, F, 3);
74: }
75: } else {
76: allocate = PETSC_TRUE;
77: }
78: if (allocate) {
79: PetscCall(VecGetLocalSize(X, &n));
80: PetscCall(VecGetSize(X, &N));
81: PetscCall(VecGetLocalSize(F, &m));
82: PetscCall(VecGetSize(F, &M));
83: PetscCheck(N == M, comm, PETSC_ERR_ARG_SIZ, "Incorrect problem sizes! dim(X) not equal to dim(F)");
84: PetscCall(MatSetSizes(B, m, n, M, N));
85: PetscCall(PetscLayoutSetUp(B->rmap));
86: PetscCall(PetscLayoutSetUp(B->cmap));
87: PetscCall(VecDuplicate(X, &lmvm->Xprev));
88: PetscCall(VecDuplicate(F, &lmvm->Fprev));
89: if (lmvm->m > 0) {
90: PetscMPIInt rank;
91: PetscInt m, M;
93: PetscCallMPI(MPI_Comm_rank(comm, &rank));
94: M = lmvm->m;
95: m = (rank == 0) ? M : 0;
97: /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
98: * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
99: * For DQN : Create all data eagerly */
100: PetscCall(VecGetType(X, &vec_type));
101: PetscCall(MatCreateDenseFromVecType(comm, vec_type, n, m, N, M, -1, NULL, &lqn->Sfull));
102: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->Yfull));
103: if (is_dqn) {
104: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
105: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
106: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
107: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
108: } else if (is_ddfp) {
109: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
110: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
111: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
112: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
113: } else if (is_dbfgs) {
114: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
115: PetscCall(MatDuplicate(lqn->Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
116: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
117: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
118: } else {
119: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
120: }
121: /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
122: if (lqn->StY_triu) {
123: PetscCall(MatZeroEntries(lqn->StY_triu));
124: PetscCall(MatShift(lqn->StY_triu, 1.0));
125: }
126: if (lqn->YtS_triu) {
127: PetscCall(MatZeroEntries(lqn->YtS_triu));
128: PetscCall(MatShift(lqn->YtS_triu, 1.0));
129: }
130: PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
131: PetscCall(VecZeroEntries(lqn->rwork1));
132: PetscCall(VecZeroEntries(lqn->rwork2));
133: PetscCall(VecZeroEntries(lqn->rwork3));
134: PetscCall(VecZeroEntries(lqn->diag_vec));
135: }
136: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
137: if (!(lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale)) { PetscCall(MatLMVMAllocate(lqn->diag_qn, X, F)); }
138: lmvm->allocated = PETSC_TRUE;
139: B->preallocated = PETSC_TRUE;
140: B->assembled = PETSC_TRUE;
141: }
142: PetscFunctionReturn(PETSC_SUCCESS);
143: }
145: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
146: {
147: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
149: PetscInt m, n, M, N;
150: PetscMPIInt size;
151: MPI_Comm comm = PetscObjectComm((PetscObject)B);
152: Vec Xtmp, Ftmp;
154: PetscFunctionBegin;
155: PetscCall(MatGetSize(B, &M, &N));
156: PetscCheck(M != 0 && N != 0, comm, PETSC_ERR_ORDER, "MatSetSizes() must be called before MatSetUp()");
157: if (!lmvm->allocated) {
158: PetscCallMPI(MPI_Comm_size(comm, &size));
159: if (size == 1) {
160: PetscCall(VecCreateSeq(comm, N, &Xtmp));
161: PetscCall(VecCreateSeq(comm, M, &Ftmp));
162: } else {
163: PetscCall(MatGetLocalSize(B, &m, &n));
164: PetscCall(VecCreateMPI(comm, n, N, &Xtmp));
165: PetscCall(VecCreateMPI(comm, m, M, &Ftmp));
166: }
167: PetscCall(MatAllocate_LMVMDQN(B, Xtmp, Ftmp));
168: PetscCall(VecDestroy(&Xtmp));
169: PetscCall(VecDestroy(&Ftmp));
170: }
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: static PetscErrorCode MatSetFromOptions_LMVMDQN_Private(Mat B, PetscOptionItems *PetscOptionsObject)
175: {
176: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
177: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
178: PetscBool is_dbfgs, is_ddfp, is_dqn;
180: PetscFunctionBegin;
181: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
182: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
183: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
184: if (is_dqn) {
185: PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
186: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
187: } else if (is_dbfgs) {
188: PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
189: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
190: } else if (is_ddfp) {
191: PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
192: PetscCall(PetscOptionsEnum("-mat_lmvm_scale_type", "(developer) scaling type applied to J0", "MatLMVMSymBrdnScaleType", MatLMVMSymBroydenScaleTypes, (PetscEnum)lqn->scale_type, (PetscEnum *)&lqn->scale_type, NULL));
193: } else {
194: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
195: }
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems *PetscOptionsObject)
200: {
201: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
202: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
204: PetscFunctionBegin;
205: PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
206: PetscOptionsBegin(PetscObjectComm((PetscObject)B), ((PetscObject)B)->prefix, "Dense QN method (MATLMVMDQN,MATLMVMDBFGS,MATLMVMDDFP)", NULL);
207: PetscCall(MatSetFromOptions_LMVMDQN_Private(B, PetscOptionsObject));
208: PetscOptionsEnd();
209: lqn->allocated = PETSC_FALSE;
210: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
211: const char *prefix;
213: PetscCall(MatGetOptionsPrefix(B, &prefix));
214: PetscCall(MatSetOptionsPrefix(lqn->diag_qn, prefix));
215: PetscCall(MatAppendOptionsPrefix(lqn->diag_qn, "J0_"));
216: PetscCall(MatSetFromOptions(lqn->diag_qn));
217: }
218: PetscFunctionReturn(PETSC_SUCCESS);
219: }
221: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
222: {
223: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
224: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
226: PetscFunctionBegin;
227: PetscCall(MatDestroy(&lqn->Sfull));
228: PetscCall(MatDestroy(&lqn->Yfull));
229: PetscCall(MatDestroy(&lqn->HY));
230: PetscCall(MatDestroy(&lqn->BS));
231: PetscCall(MatDestroy(&lqn->StY_triu));
232: PetscCall(MatDestroy(&lqn->YtS_triu));
233: PetscCall(VecDestroy(&lqn->StFprev));
234: PetscCall(VecDestroy(&lqn->Fprev_ref));
235: lqn->Fprev_state = 0;
236: PetscCall(MatDestroy(&lqn->YtS_triu_strict));
237: PetscCall(MatDestroy(&lqn->StY_triu_strict));
238: PetscCall(MatDestroy(&lqn->StBS));
239: PetscCall(MatDestroy(&lqn->YtHY));
240: PetscCall(MatDestroy(&lqn->J));
241: PetscCall(MatDestroy(&lqn->temp_mat));
242: PetscCall(VecDestroy(&lqn->diag_vec));
243: PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
244: PetscCall(VecDestroy(&lqn->inv_diag_vec));
245: PetscCall(VecDestroy(&lqn->column_work));
246: PetscCall(VecDestroy(&lqn->rwork1));
247: PetscCall(VecDestroy(&lqn->rwork2));
248: PetscCall(VecDestroy(&lqn->rwork3));
249: PetscCall(VecDestroy(&lqn->rwork2_local));
250: PetscCall(VecDestroy(&lqn->rwork3_local));
251: PetscCall(VecDestroy(&lqn->cyclic_work_vec));
252: lqn->allocated = PETSC_FALSE;
253: PetscFunctionReturn(PETSC_SUCCESS);
254: }
256: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
257: {
258: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
259: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
261: PetscFunctionBegin;
262: PetscCall(MatLMVMDQNResetDestructive(B));
263: PetscCall(MatDestroy(&lqn->diag_qn));
264: PetscCall(PetscFree(lmvm->ctx));
265: PetscCall(MatDestroy_LMVM(B));
266: PetscFunctionReturn(PETSC_SUCCESS);
267: }
269: static PetscErrorCode MatReset_LMVMDQN(Mat B, PetscBool destructive)
270: {
271: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
272: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
274: PetscFunctionBegin;
275: lqn->watchdog = 0;
276: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
277: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
278: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
279: if (!diagctx->allocated) PetscCall(MatLMVMAllocate(lqn->diag_qn, lmvm->Xprev, lmvm->Fprev));
280: PetscCall(MatLMVMReset(lqn->diag_qn, destructive));
281: }
282: if (lqn->Sfull) PetscCall(MatZeroEntries(lqn->Sfull));
283: if (lqn->Yfull) PetscCall(MatZeroEntries(lqn->Yfull));
284: if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
285: if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
286: if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
287: PetscCall(MatZeroEntries(lqn->StY_triu));
288: PetscCall(MatShift(lqn->StY_triu, 1.0));
289: }
290: if (lqn->YtS_triu) {
291: PetscCall(MatZeroEntries(lqn->YtS_triu));
292: PetscCall(MatShift(lqn->YtS_triu, 1.0));
293: }
294: if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
295: if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
296: if (lqn->StBS) {
297: PetscCall(MatZeroEntries(lqn->StBS));
298: PetscCall(MatShift(lqn->StBS, 1.0));
299: }
300: if (lqn->YtHY) {
301: PetscCall(MatZeroEntries(lqn->YtHY));
302: PetscCall(MatShift(lqn->YtHY, 1.0));
303: }
304: if (lqn->Fprev_ref) PetscCall(VecDestroy(&lqn->Fprev_ref));
305: lqn->Fprev_state = 0;
306: if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
307: if (destructive) { PetscCall(MatLMVMDQNResetDestructive(B)); }
308: lqn->num_updates = 0;
309: lqn->num_mult_updates = 0;
310: PetscCall(MatReset_LMVM(B, destructive));
311: PetscFunctionReturn(PETSC_SUCCESS);
312: }
314: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
315: {
316: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
317: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
318: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
319: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
321: PetscBool is_ddfp, is_dbfgs, is_dqn;
322: PetscDeviceContext dctx;
324: PetscFunctionBegin;
325: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
326: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
327: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
328: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
329: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
330: if (lmvm->prev_set) {
331: Vec FX[2];
332: PetscScalar dotFX[2];
333: PetscScalar stFprev;
334: PetscScalar curvature, yTy;
335: PetscReal curvtol;
336: Vec workvec1;
338: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
339: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
340: /* Test if the updates can be accepted */
341: FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
342: FX[1] = F; /* dotFX[1] = s^T F */
343: PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
344: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
345: PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
346: stFprev = PetscConj(dotFX[0]);
347: curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
348: if (PetscRealPart(yTy) < lmvm->eps) {
349: curvtol = 0.0;
350: } else {
351: curvtol = lmvm->eps * PetscRealPart(yTy);
352: }
353: if (PetscRealPart(curvature) > curvtol) {
354: PetscInt m = lmvm->m;
355: PetscInt k = lqn->num_updates;
356: PetscInt h_new = k + 1 - oldest_update(m, k + 1);
357: PetscInt idx = recycle_index(m, k);
359: /* Update is good, accept it */
360: lmvm->nupdates++;
361: lqn->num_updates++;
362: lqn->watchdog = 0;
364: if (lmvm->k != m - 1) {
365: lmvm->k++;
366: } else if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
367: if (is_dqn) {
368: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
369: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
370: } else if (is_dbfgs) {
371: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
372: } else if (is_ddfp) {
373: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
374: } else {
375: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
376: }
377: }
379: /* First update the S^T matrix */
380: PetscCall(MatDenseGetColumnVecWrite(lqn->Sfull, idx, &workvec1));
381: PetscCall(VecCopy(lmvm->Xprev, workvec1));
382: PetscCall(MatDenseRestoreColumnVecWrite(lqn->Sfull, idx, &workvec1));
384: /* Now repeat update for the Y^T matrix */
385: PetscCall(MatDenseGetColumnVecWrite(lqn->Yfull, idx, &workvec1));
386: PetscCall(VecCopy(lmvm->Fprev, workvec1));
387: PetscCall(MatDenseRestoreColumnVecWrite(lqn->Yfull, idx, &workvec1));
389: if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the *
390: * H_k is immediately applied to F after begin updated. The S^T y computation can be split up as S^T (F - F_prev) */
391: PetscInt local_n;
392: PetscScalar *StFprev;
393: PetscMemType memtype;
394: PetscInt StYidx;
396: StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
397: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
398: PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
399: PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
400: if (local_n) {
401: if (PetscMemTypeHost(memtype)) {
402: StFprev[idx] = stFprev;
403: } else {
404: PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
405: PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
406: PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
407: }
408: }
409: PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));
411: {
412: Vec this_sy_col;
413: /* Now StFprev is updated for the new S vector. Write -StFprev into the appropriate row */
414: PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
415: PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));
417: /* Now compute the new StFprev */
418: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h_new));
419: lqn->St_count++;
421: /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
422: PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));
424: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
425: PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
426: }
427: }
429: if (is_ddfp || is_dqn) {
430: PetscInt YtSidx;
432: YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
434: {
435: Vec this_ys_col;
437: PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
438: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
439: lqn->Yt_count++;
441: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
442: PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
443: }
444: }
446: if (is_dbfgs || is_dqn) {
447: PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
448: } else if (is_ddfp) {
449: PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
450: } else {
451: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
452: }
454: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
455: if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
456: PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
457: PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
458: } else {
459: if (!lqn->diag_vec_recycle_order) {
460: PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
461: lqn->diag_vec_recycle_order = lqn->diag_vec;
462: }
463: }
465: if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_SCALAR) {
466: PetscScalar sTy = curvature;
468: diagctx->sigma = PetscRealPart(sTy) / PetscRealPart(yTy);
469: } else if (lqn->scale_type == MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL) {
470: PetscScalar sTy = curvature;
471: PetscScalar sTDs, yTDy;
473: if (!diagctx->invD) {
474: PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->invD));
475: PetscCall(VecSet(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTy)));
476: }
477: if (!diagctx->U) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->U));
478: if (!diagctx->V) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->V));
479: if (!diagctx->W) PetscCall(VecDuplicate(lmvm->Fprev, &diagctx->W));
481: /* diagonal Broyden */
482: PetscCall(VecReciprocal(diagctx->invD));
483: PetscCall(VecPointwiseMult(diagctx->V, diagctx->invD, lmvm->Xprev));
484: PetscCall(VecPointwiseMult(diagctx->U, lmvm->Fprev, lmvm->Fprev));
485: if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->U));
486: PetscCall(VecAXPY(diagctx->invD, 1.0 / sTy, diagctx->U));
487: PetscCall(VecDot(diagctx->V, lmvm->Xprev, &sTDs));
488: if (PetscDefined(USE_COMPLEX)) PetscCall(VecAbs(diagctx->V));
489: PetscCall(VecPointwiseMult(diagctx->V, diagctx->V, diagctx->V));
490: PetscCall(VecAXPY(diagctx->invD, -1.0 / PetscMax(PetscRealPart(sTDs), diagctx->tol), diagctx->V));
491: PetscCall(VecReciprocal(diagctx->invD));
492: PetscCall(VecAbs(diagctx->invD));
493: PetscCall(VecDot(diagctx->U, diagctx->invD, &yTDy));
494: PetscCall(VecScale(diagctx->invD, PetscRealPart(sTy) / PetscRealPart(yTDy)));
495: }
496: } else {
497: /* Update is bad, skip it */
498: ++lmvm->nrejects;
499: ++lqn->watchdog;
500: PetscInt m = lmvm->m;
501: PetscInt k = lqn->num_updates;
502: PetscInt h = k - oldest_update(m, k);
504: /* we still have to maintain StFprev */
505: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
506: PetscCall(MatMultHermitianTransposeColumnRange(lqn->Sfull, F, lqn->StFprev, 0, h));
507: lqn->St_count++;
508: }
509: } else {
510: switch (lqn->scale_type) {
511: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
512: PetscCall(VecSet(diagctx->invD, diagctx->delta));
513: break;
514: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
515: diagctx->sigma = diagctx->delta;
516: break;
517: default:
518: diagctx->sigma = 1.0;
519: break;
520: }
521: }
523: if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));
525: /* Save the solution and function to be used in the next update */
526: PetscCall(VecCopy(X, lmvm->Xprev));
527: PetscCall(VecCopy(F, lmvm->Fprev));
528: PetscCall(PetscObjectReference((PetscObject)F));
529: PetscCall(VecDestroy(&lqn->Fprev_ref));
530: lqn->Fprev_ref = F;
531: PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
532: lmvm->prev_set = PETSC_TRUE;
533: PetscFunctionReturn(PETSC_SUCCESS);
534: }
536: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
537: {
538: PetscFunctionBegin;
539: PetscCall(MatDestroy(dst));
540: if (src) { PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst)); }
541: PetscFunctionReturn(PETSC_SUCCESS);
542: }
544: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
545: {
546: PetscFunctionBegin;
547: PetscCall(VecDestroy(dst));
548: if (src) {
549: PetscCall(VecDuplicate(src, dst));
550: PetscCall(VecCopy(src, *dst));
551: }
552: PetscFunctionReturn(PETSC_SUCCESS);
553: }
555: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
556: {
557: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
558: Mat_DQN *blqn = (Mat_DQN *)bdata->ctx;
559: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
560: Mat_DQN *mlqn = (Mat_DQN *)mdata->ctx;
562: PetscFunctionBegin;
563: mlqn->num_updates = blqn->num_updates;
564: mlqn->num_mult_updates = blqn->num_mult_updates;
565: PetscCall(MatDestroyThenCopy(blqn->Sfull, &mlqn->Sfull));
566: PetscCall(MatDestroyThenCopy(blqn->Yfull, &mlqn->Yfull));
567: PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
568: PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
569: PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
570: PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
571: PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
572: PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
573: PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
574: PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
575: PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
576: PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
577: PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
578: PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
579: mlqn->dense_type = blqn->dense_type;
580: mlqn->strategy = blqn->strategy;
581: mlqn->scale_type = blqn->scale_type;
582: mlqn->S_count = 0;
583: mlqn->St_count = 0;
584: mlqn->Y_count = 0;
585: mlqn->Yt_count = 0;
586: mlqn->watchdog = blqn->watchdog;
587: mlqn->max_seq_rejects = blqn->max_seq_rejects;
588: mlqn->allocated = blqn->allocated;
589: PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
590: PetscCall(VecDestroy(&mlqn->Fprev_ref));
591: mlqn->Fprev_ref = blqn->Fprev_ref;
592: mlqn->Fprev_state = blqn->Fprev_state;
593: if (!(bdata->J0 || bdata->user_pc || bdata->user_ksp || bdata->user_scale)) { PetscCall(MatCopy(blqn->diag_qn, mlqn->diag_qn, SAME_NONZERO_PATTERN)); }
594: PetscFunctionReturn(PETSC_SUCCESS);
595: }
597: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
598: {
599: PetscFunctionBegin;
600: PetscCall(MatMult_LMVMDDFP(B, X, Z));
601: PetscFunctionReturn(PETSC_SUCCESS);
602: }
604: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
605: {
606: PetscFunctionBegin;
607: PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
608: PetscFunctionReturn(PETSC_SUCCESS);
609: }
611: /*
612: This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
613: and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
614: results in avoiding costly Cholesky factorization, at the cost of duality cap.
615: Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
616: */
617: PetscErrorCode MatCreate_LMVMDQN(Mat B)
618: {
619: Mat_LMVM *lmvm;
620: Mat_DQN *ldfp;
622: PetscFunctionBegin;
623: PetscCall(MatCreate_LMVM(B));
624: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
625: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
626: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
627: B->ops->view = MatView_LMVMDQN;
628: B->ops->setup = MatSetUp_LMVMDQN;
629: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
630: B->ops->destroy = MatDestroy_LMVMDQN;
632: lmvm = (Mat_LMVM *)B->data;
633: lmvm->square = PETSC_TRUE;
634: lmvm->ops->allocate = MatAllocate_LMVMDQN;
635: lmvm->ops->reset = MatReset_LMVMDQN;
636: lmvm->ops->update = MatUpdate_LMVMDQN;
637: lmvm->ops->mult = MatMult_LMVMDQN;
638: lmvm->ops->solve = MatSolve_LMVMDQN;
639: lmvm->ops->copy = MatCopy_LMVMDQN;
641: PetscCall(PetscNew(&ldfp));
642: lmvm->ctx = (void *)ldfp;
643: ldfp->allocated = PETSC_FALSE;
644: ldfp->watchdog = 0;
645: ldfp->max_seq_rejects = lmvm->m / 2;
646: ldfp->strategy = MAT_LMVM_DENSE_INPLACE;
647: ldfp->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
649: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
650: PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
651: PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
652: PetscFunctionReturn(PETSC_SUCCESS);
653: }
655: /*@
656: MatCreateLMVMDQN - Creates a dense representation of the limited-memory
657: Quasi-Newton approximation to a Hessian.
659: Collective
661: Input Parameters:
662: + comm - MPI communicator
663: . n - number of local rows for storage vectors
664: - N - global size of the storage vectors
666: Output Parameter:
667: . B - the matrix
669: Level: advanced
671: Note:
672: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
673: paradigm instead of this routine directly.
675: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
676: @*/
677: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
678: {
679: PetscFunctionBegin;
680: PetscCall(KSPInitializePackage());
681: PetscCall(MatCreate(comm, B));
682: PetscCall(MatSetSizes(*B, n, n, N, N));
683: PetscCall(MatSetType(*B, MATLMVMDQN));
684: PetscCall(MatSetUp(*B));
685: PetscFunctionReturn(PETSC_SUCCESS);
686: }
688: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
689: {
690: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
691: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
693: PetscFunctionBegin;
694: if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
695: lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
696: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
697: } else {
698: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
699: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
701: switch (lqn->scale_type) {
702: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
703: PetscCall(VecAXPBY(Z, 1.0 / diagctx->sigma, 0.0, X));
704: break;
705: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
706: PetscCall(VecPointwiseDivide(Z, X, diagctx->invD));
707: break;
708: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
709: default:
710: PetscCall(VecCopy(X, Z));
711: break;
712: }
713: }
714: PetscFunctionReturn(PETSC_SUCCESS);
715: }
717: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
718: {
719: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
720: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
722: PetscFunctionBegin;
723: if (lmvm->J0 || lmvm->user_pc || lmvm->user_ksp || lmvm->user_scale) {
724: lqn->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_USER;
725: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
726: } else {
727: Mat_LMVM *dbase = (Mat_LMVM *)lqn->diag_qn->data;
728: Mat_DiagBrdn *diagctx = (Mat_DiagBrdn *)dbase->ctx;
730: switch (lqn->scale_type) {
731: case MAT_LMVM_SYMBROYDEN_SCALE_SCALAR:
732: PetscCall(VecAXPBY(dX, diagctx->sigma, 0.0, F));
733: break;
734: case MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL:
735: PetscCall(VecPointwiseMult(dX, F, diagctx->invD));
736: break;
737: case MAT_LMVM_SYMBROYDEN_SCALE_NONE:
738: default:
739: PetscCall(VecCopy(F, dX));
740: break;
741: }
742: }
743: PetscFunctionReturn(PETSC_SUCCESS);
744: }
746: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
747: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
748: {
749: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
750: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
751: PetscInt m_local;
753: PetscFunctionBegin;
754: if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
755: PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
756: PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
757: PetscCall(MatGetLocalSize(result, &m_local, NULL));
758: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
759: PetscCall(MatConjugate(lbfgs->temp_mat));
760: if (m_local) {
761: Mat temp_local, YtS_local, result_local;
762: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
763: PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
764: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
765: PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
766: }
767: PetscCall(MatConjugate(result));
768: PetscFunctionReturn(PETSC_SUCCESS);
769: }
771: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
772: {
773: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
774: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
775: PetscInt m = lmvm->m, m_local;
776: PetscInt k = lbfgs->num_updates;
777: PetscInt h = k - oldest_update(m, k);
778: PetscInt j_0;
779: PetscInt prev_oldest;
780: Mat J_local;
782: PetscFunctionBegin;
783: if (!lbfgs->YtS_triu_strict) {
784: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
785: PetscCall(MatDestroy(&lbfgs->StBS));
786: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
787: PetscCall(MatDestroy(&lbfgs->J));
788: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
789: PetscCall(MatDestroy(&lbfgs->BS));
790: PetscCall(MatDuplicate(lbfgs->Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
791: PetscCall(MatShift(lbfgs->StBS, 1.0));
792: lbfgs->num_mult_updates = oldest_update(m, k);
793: }
794: if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
796: /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
797: for (PetscInt j = oldest_update(m, k); j < k; j++) {
798: Vec s_j;
799: Vec Bs_j;
800: Vec StBs_j;
801: PetscInt S_idx = recycle_index(m, j);
802: PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
804: PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
805: PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
806: PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
807: PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
808: PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
809: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Bs_j, StBs_j, 0, h));
810: lbfgs->St_count++;
811: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
812: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
813: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
814: }
815: prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
816: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
817: /* move the YtS entries that have been computed and need to be kept back up */
818: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
820: PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
821: }
822: PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
823: j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
824: for (PetscInt j = j_0; j < k; j++) {
825: PetscInt S_idx = recycle_index(m, j);
826: PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
827: Vec s_j, Yts_j;
829: PetscCall(MatDenseGetColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
830: PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
831: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, s_j, Yts_j, 0, h));
832: lbfgs->Yt_count++;
833: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
834: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
835: PetscCall(MatDenseRestoreColumnVecRead(lbfgs->Sfull, S_idx, &s_j));
836: /* zero the corresponding row */
837: if (m_local > 0) {
838: Mat YtS_local, YtS_row;
840: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
841: PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
842: PetscCall(MatZeroEntries(YtS_row));
843: PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
844: }
845: }
846: if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
847: PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
848: PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
849: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
850: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
851: PetscCall(MatGetLDLT(B, lbfgs->J));
852: PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
853: if (m_local) {
854: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
855: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
856: }
857: lbfgs->num_mult_updates = lbfgs->num_updates;
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: /* Solves for
862: * [ I | -S R^{-T} ] [ I | 0 ] [ H_0 | 0 ] [ I | Y ] [ I ]
863: * [-----+---] [-----+---] [---+---] [-------------]
864: * [ Y^T | I ] [ 0 | D ] [ 0 | I ] [ -R^{-1} S^T ] */
866: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
867: {
868: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
869: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
870: Vec rwork1 = lbfgs->rwork1;
871: PetscInt m = lmvm->m;
872: PetscInt k = lbfgs->num_updates;
873: PetscInt h = k - oldest_update(m, k);
874: PetscObjectState Fstate;
876: PetscFunctionBegin;
877: VecCheckSameSize(F, 2, dX, 3);
878: VecCheckMatCompatible(H, dX, 3, F, 2);
880: /* Block Version */
881: if (!lbfgs->num_updates) {
882: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
883: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
884: }
886: PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
887: if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
888: PetscCall(VecCopy(lbfgs->StFprev, rwork1));
889: } else {
890: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, F, rwork1, 0, h));
891: lbfgs->St_count++;
892: }
894: /* Reordering rwork1, as STY is in history order, while S is in recycled order */
895: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
896: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
897: PetscCall(VecScale(rwork1, -1.0));
898: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
900: PetscCall(VecCopy(F, lbfgs->column_work));
901: PetscCall(MatMultAddColumnRange(lbfgs->Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
902: lbfgs->Y_count++;
904: PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
905: PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));
907: PetscCall(MatMultHermitianTransposeAddColumnRange(lbfgs->Yfull, dX, rwork1, rwork1, 0, h));
908: lbfgs->Yt_count++;
910: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
911: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
912: PetscCall(VecScale(rwork1, -1.0));
913: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
915: PetscCall(MatMultAddColumnRange(lbfgs->Sfull, rwork1, dX, dX, 0, h));
916: lbfgs->S_count++;
917: PetscFunctionReturn(PETSC_SUCCESS);
918: }
920: /* Solves for
921: B_0 - [ Y | B_0 S] [ -D | L^T ]^-1 [ Y^T ]
922: [-----+-----------] [---------]
923: [ L | S^T B_0 S ] [ S^T B_0 ]
925: Above is equivalent to
927: B_0 - [ Y | B_0 S] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} L^T ]]^-1 [ Y^T ]
928: [[-----------+---][-----+---][---+-------------]] [---------]
929: [[ -L D^{-1} | I ][ 0 | J ][ 0 | I ]] [ S^T B_0 ]
931: where J = S^T B_0 S + L D^{-1} L^T
933: becomes
935: B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1} | 0 ][ I | 0 ] [ Y^T ]
936: [---+------------][----------+--------][----------+---] [---------]
937: [ 0 | I ][ 0 | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
939: =
941: B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I | 0 ][ I | 0 ] [ Y^T ]
942: [--------+---][---+-----][---+---------][----------+---] [---------]
943: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
945: (Note that YtS_triu_strict is L^T)
946: Byrd, Nocedal, Schnabel 1994
948: Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
949: (See ddfp.c's MatMult_LMVMDDFP)
951: */
953: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
954: {
955: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
956: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
957: Mat J_local;
958: PetscInt m_local;
959: PetscInt m = lmvm->m;
960: PetscInt k = lbfgs->num_updates;
961: PetscInt h = k - oldest_update(m, k);
963: PetscFunctionBegin;
964: VecCheckSameSize(X, 2, Z, 3);
965: VecCheckMatCompatible(B, X, 2, Z, 3);
967: /* Cholesky Version */
968: /* Start with the B0 term */
969: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
970: if (!lbfgs->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
972: PetscCall(MatLMVMDBFGSUpdateMultData(B));
973: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Yfull, X, lbfgs->rwork1, 0, h));
974: lbfgs->Yt_count++;
975: PetscCall(MatMultHermitianTransposeColumnRange(lbfgs->Sfull, Z, lbfgs->rwork2, 0, h));
976: lbfgs->St_count++;
977: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
978: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
979: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
980: }
982: PetscCall(VecPointwiseMult(lbfgs->rwork3, lbfgs->rwork1, lbfgs->inv_diag_vec));
983: PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2, lbfgs->rwork2));
985: if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
986: if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
987: PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
988: PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
989: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
990: PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
991: if (m_local) {
992: Mat J_local;
994: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
995: PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
996: }
997: PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
998: PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
999: PetscCall(VecScale(lbfgs->rwork3, -1.0));
1001: PetscCall(MatMultAdd(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork1, lbfgs->rwork1));
1002: PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
1004: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
1005: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1006: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
1007: }
1009: PetscCall(MatMultAddColumnRange(lbfgs->Yfull, lbfgs->rwork1, Z, Z, 0, h));
1010: lbfgs->Y_count++;
1011: PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
1012: lbfgs->S_count++;
1013: PetscFunctionReturn(PETSC_SUCCESS);
1014: }
1016: /*
1017: This dense representation reduces the L-BFGS update to a series of
1018: matrix-vector products with dense matrices in lieu of the conventional matrix-free
1019: two-loop algorithm.
1020: */
1021: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
1022: {
1023: Mat_LMVM *lmvm;
1024: Mat_DQN *lbfgs;
1026: PetscFunctionBegin;
1027: PetscCall(MatCreate_LMVM(B));
1028: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
1029: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1030: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1031: B->ops->view = MatView_LMVMDQN;
1032: B->ops->setup = MatSetUp_LMVMDQN;
1033: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1034: B->ops->destroy = MatDestroy_LMVMDQN;
1036: lmvm = (Mat_LMVM *)B->data;
1037: lmvm->square = PETSC_TRUE;
1038: lmvm->ops->allocate = MatAllocate_LMVMDQN;
1039: lmvm->ops->reset = MatReset_LMVMDQN;
1040: lmvm->ops->update = MatUpdate_LMVMDQN;
1041: lmvm->ops->mult = MatMult_LMVMDBFGS;
1042: lmvm->ops->solve = MatSolve_LMVMDBFGS;
1043: lmvm->ops->copy = MatCopy_LMVMDQN;
1045: PetscCall(PetscNew(&lbfgs));
1046: lmvm->ctx = (void *)lbfgs;
1047: lbfgs->allocated = PETSC_FALSE;
1048: lbfgs->watchdog = 0;
1049: lbfgs->max_seq_rejects = lmvm->m / 2;
1050: lbfgs->strategy = MAT_LMVM_DENSE_INPLACE;
1051: lbfgs->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1053: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &lbfgs->diag_qn));
1054: PetscCall(MatSetType(lbfgs->diag_qn, MATLMVMDIAGBROYDEN));
1055: PetscCall(MatSetOptionsPrefix(lbfgs->diag_qn, "J0_"));
1056: PetscFunctionReturn(PETSC_SUCCESS);
1057: }
1059: /*@
1060: MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1061: Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.
1063: Collective
1065: Input Parameters:
1066: + comm - MPI communicator
1067: . n - number of local rows for storage vectors
1068: - N - global size of the storage vectors
1070: Output Parameter:
1071: . B - the matrix
1073: Level: advanced
1075: Note:
1076: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1077: paradigm instead of this routine directly.
1079: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1080: @*/
1081: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1082: {
1083: PetscFunctionBegin;
1084: PetscCall(KSPInitializePackage());
1085: PetscCall(MatCreate(comm, B));
1086: PetscCall(MatSetSizes(*B, n, n, N, N));
1087: PetscCall(MatSetType(*B, MATLMVMDBFGS));
1088: PetscCall(MatSetUp(*B));
1089: PetscFunctionReturn(PETSC_SUCCESS);
1090: }
1092: /* here R is strictly upper triangular part of STY */
1093: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1094: {
1095: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1096: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1097: PetscInt m_local;
1099: PetscFunctionBegin;
1100: if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1101: PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1102: PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1103: PetscCall(MatGetLocalSize(result, &m_local, NULL));
1104: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1105: PetscCall(MatConjugate(ldfp->temp_mat));
1106: if (m_local) {
1107: Mat temp_local, StY_local, result_local;
1108: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1109: PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1110: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1111: PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DEFAULT, &result_local));
1112: }
1113: PetscCall(MatConjugate(result));
1114: PetscFunctionReturn(PETSC_SUCCESS);
1115: }
1117: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1118: {
1119: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1120: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1121: PetscInt m = lmvm->m, m_local;
1122: PetscInt k = ldfp->num_updates;
1123: PetscInt h = k - oldest_update(m, k);
1124: PetscInt j_0;
1125: PetscInt prev_oldest;
1126: Mat J_local;
1128: PetscFunctionBegin;
1129: if (!ldfp->StY_triu_strict) {
1130: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1131: PetscCall(MatDestroy(&ldfp->YtHY));
1132: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1133: PetscCall(MatDestroy(&ldfp->J));
1134: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1135: PetscCall(MatDestroy(&ldfp->HY));
1136: PetscCall(MatDuplicate(ldfp->Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1137: PetscCall(MatShift(ldfp->YtHY, 1.0));
1138: ldfp->num_mult_updates = oldest_update(m, k);
1139: }
1140: if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
1142: /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1143: for (PetscInt j = oldest_update(m, k); j < k; j++) {
1144: Vec y_j;
1145: Vec Hy_j;
1146: Vec YtHy_j;
1147: PetscInt Y_idx = recycle_index(m, j);
1148: PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1150: PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1151: PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1152: PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1153: PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1154: PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1155: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, Hy_j, YtHy_j, 0, h));
1156: ldfp->Yt_count++;
1157: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1158: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1159: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1160: }
1161: prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1162: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1163: /* move the YtS entries that have been computed and need to be kept back up */
1164: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
1166: PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1167: }
1168: PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1169: j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1170: for (PetscInt j = j_0; j < k; j++) {
1171: PetscInt Y_idx = recycle_index(m, j);
1172: PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1173: Vec y_j, Sty_j;
1175: PetscCall(MatDenseGetColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1176: PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1177: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, y_j, Sty_j, 0, h));
1178: ldfp->St_count++;
1179: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1180: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1181: PetscCall(MatDenseRestoreColumnVecRead(ldfp->Yfull, Y_idx, &y_j));
1182: /* zero the corresponding row */
1183: if (m_local > 0) {
1184: Mat StY_local, StY_row;
1186: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1187: PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1188: PetscCall(MatZeroEntries(StY_row));
1189: PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1190: }
1191: }
1192: if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1193: PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1194: PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1195: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1196: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1197: PetscCall(MatGetRTDR(B, ldfp->J));
1198: PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1199: if (m_local) {
1200: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1201: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1202: }
1203: ldfp->num_mult_updates = ldfp->num_updates;
1204: PetscFunctionReturn(PETSC_SUCCESS);
1205: }
1207: /* Solves for
1209: H_0 - [ S | H_0 Y] [ -D | R.T ]^-1 [ S^T ]
1210: [-----+-----------] [---------]
1211: [ R | Y^T H_0 Y ] [ Y^T H_0 ]
1213: Above is equivalent to
1215: H_0 - [ S | H_0 Y] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [ S^T ]
1216: [[-----------+---][----+---][---+-------------]] [---------]
1217: [[ -R D^{-1} | I ][ 0 | J ][ 0 | I ]] [ Y^T H_0 ]
1219: where J = Y^T H_0 Y + R D^{-1} R.T
1221: becomes
1223: H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1} | 0 ][ I | 0 ] [ S^T ]
1224: [---+------------][----------+--------][----------+---] [---------]
1225: [ 0 | I ][ 0 | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1227: =
1229: H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I | 0 ][ I | 0 ] [ S^T ]
1230: [--------+---][---+-----][---+---------][----------+---] [---------]
1231: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1233: (Note that StY_triu_strict is R)
1234: Byrd, Nocedal, Schnabel 1994
1236: */
1237: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1238: {
1239: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1240: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1241: PetscInt m = lmvm->m;
1242: PetscInt k = ldfp->num_updates;
1243: PetscInt h = k - oldest_update(m, k);
1244: PetscInt m_local;
1245: Mat J_local;
1247: PetscFunctionBegin;
1248: VecCheckSameSize(F, 2, dX, 3);
1249: VecCheckMatCompatible(H, dX, 3, F, 2);
1251: /* Cholesky Version */
1252: /* Start with the B0 term */
1253: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1254: if (!ldfp->num_updates) { PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */ }
1256: PetscCall(MatLMVMDDFPUpdateSolveData(H));
1258: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Sfull, F, ldfp->rwork1, 0, h));
1259: ldfp->St_count++;
1260: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, dX, ldfp->rwork2, 0, h));
1261: ldfp->Yt_count++;
1262: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1263: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1264: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1265: }
1267: PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1268: PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1270: if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1271: if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1272: PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1273: PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1274: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1275: PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1276: if (m_local) {
1277: Mat J_local;
1279: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1280: PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1281: }
1282: PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1283: PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1284: PetscCall(VecScale(ldfp->rwork3, -1.0));
1286: PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1287: PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));
1289: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1290: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1291: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1292: }
1294: PetscCall(MatMultAddColumnRange(ldfp->Sfull, ldfp->rwork1, dX, dX, 0, h));
1295: ldfp->S_count++;
1296: PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1297: ldfp->Y_count++;
1298: PetscFunctionReturn(PETSC_SUCCESS);
1299: }
1301: /* Solves for
1302: (Theorem 1, Erway, Jain, and Marcia, 2013)
1304: B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [ Y^T ]
1305: ---------------------------------+--------] [---------]
1306: [ R^{-1} | 0 ] [ S^T B_0 ]
1308: (Note: R above is right triangular part of YTS)
1309: which becomes,
1311: [ I | -Y L^{-T} ] [ I | 0 ] [ B_0 | 0 ] [ I | S ] [ I ]
1312: [-----+---] [-----+---] [---+---] [-------------]
1313: [ S^T | I ] [ 0 | D ] [ 0 | I ] [ -L^{-1} Y^T ]
1315: (Note: L above is right triangular part of STY)
1317: */
1318: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1319: {
1320: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1321: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1322: Vec rwork1 = ldfp->rwork1;
1323: PetscInt m = lmvm->m;
1324: PetscInt k = ldfp->num_updates;
1325: PetscInt h = k - oldest_update(m, k);
1326: PetscObjectState Xstate;
1328: PetscFunctionBegin;
1329: VecCheckSameSize(X, 2, Z, 3);
1330: VecCheckMatCompatible(B, X, 2, Z, 3);
1332: /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1333: /* Block Version */
1334: if (!ldfp->num_updates) {
1335: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1336: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1337: }
1339: PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1340: PetscCall(MatMultHermitianTransposeColumnRange(ldfp->Yfull, X, rwork1, 0, h));
1342: /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1343: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1344: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1345: PetscCall(VecScale(rwork1, -1.0));
1346: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1348: PetscCall(VecCopy(X, ldfp->column_work));
1349: PetscCall(MatMultAddColumnRange(ldfp->Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1350: ldfp->S_count++;
1352: PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1353: PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));
1355: PetscCall(MatMultHermitianTransposeAddColumnRange(ldfp->Sfull, Z, rwork1, rwork1, 0, h));
1356: ldfp->St_count++;
1358: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1359: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1360: PetscCall(VecScale(rwork1, -1.0));
1361: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1363: PetscCall(MatMultAddColumnRange(ldfp->Yfull, rwork1, Z, Z, 0, h));
1364: ldfp->Y_count++;
1365: PetscFunctionReturn(PETSC_SUCCESS);
1366: }
1368: /*
1369: This dense representation reduces the L-DFP update to a series of
1370: matrix-vector products with dense matrices in lieu of the conventional
1371: matrix-free two-loop algorithm.
1372: */
1373: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1374: {
1375: Mat_LMVM *lmvm;
1376: Mat_DQN *ldfp;
1378: PetscFunctionBegin;
1379: PetscCall(MatCreate_LMVM(B));
1380: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1381: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1382: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1383: B->ops->view = MatView_LMVMDQN;
1384: B->ops->setup = MatSetUp_LMVMDQN;
1385: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1386: B->ops->destroy = MatDestroy_LMVMDQN;
1388: lmvm = (Mat_LMVM *)B->data;
1389: lmvm->square = PETSC_TRUE;
1390: lmvm->ops->allocate = MatAllocate_LMVMDQN;
1391: lmvm->ops->reset = MatReset_LMVMDQN;
1392: lmvm->ops->update = MatUpdate_LMVMDQN;
1393: lmvm->ops->mult = MatMult_LMVMDDFP;
1394: lmvm->ops->solve = MatSolve_LMVMDDFP;
1395: lmvm->ops->copy = MatCopy_LMVMDQN;
1397: PetscCall(PetscNew(&ldfp));
1398: lmvm->ctx = (void *)ldfp;
1399: ldfp->allocated = PETSC_FALSE;
1400: ldfp->watchdog = 0;
1401: ldfp->max_seq_rejects = lmvm->m / 2;
1402: ldfp->strategy = MAT_LMVM_DENSE_INPLACE;
1403: ldfp->scale_type = MAT_LMVM_SYMBROYDEN_SCALE_DIAGONAL;
1405: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &ldfp->diag_qn));
1406: PetscCall(MatSetType(ldfp->diag_qn, MATLMVMDIAGBROYDEN));
1407: PetscCall(MatSetOptionsPrefix(ldfp->diag_qn, "J0_"));
1408: PetscFunctionReturn(PETSC_SUCCESS);
1409: }
1411: /*@
1412: MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1413: Davidon-Fletcher-Powell (DFP) approximation to a Hessian.
1415: Collective
1417: Input Parameters:
1418: + comm - MPI communicator
1419: . n - number of local rows for storage vectors
1420: - N - global size of the storage vectors
1422: Output Parameter:
1423: . B - the matrix
1425: Level: advanced
1427: Note:
1428: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1429: paradigm instead of this routine directly.
1431: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1432: @*/
1433: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1434: {
1435: PetscFunctionBegin;
1436: PetscCall(KSPInitializePackage());
1437: PetscCall(MatCreate(comm, B));
1438: PetscCall(MatSetSizes(*B, n, n, N, N));
1439: PetscCall(MatSetType(*B, MATLMVMDDFP));
1440: PetscCall(MatSetUp(*B));
1441: PetscFunctionReturn(PETSC_SUCCESS);
1442: }
1444: /*@
1445: MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`
1447: Input Parameters:
1448: + B - the `MATLMVM` matrix
1449: - type - scale type, see `MatLMVMDenseSetType`
1451: Options Database Keys:
1452: + -mat_lqn_type <reorder,inplace> - set the strategy
1453: . -mat_lbfgs_type <reorder,inplace> - set the strategy
1454: - -mat_ldfp_type <reorder,inplace> - set the strategy
1456: Level: intermediate
1458: MatLMVMDenseTypes\:
1459: + `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1460: - `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement
1462: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1463: @*/
1464: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1465: {
1466: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1467: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
1469: PetscFunctionBegin;
1471: lqn->strategy = type;
1472: PetscFunctionReturn(PETSC_SUCCESS);
1473: }