Actual source code: sr1.c
1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>
3: /*
4: Limited-memory Symmetric-Rank-1 method for approximating both
5: the forward product and inverse application of a Jacobian.
6: */
8: typedef struct {
9: Vec *P, *Q;
10: Vec work;
11: PetscBool allocated, needP, needQ;
12: PetscReal *stp, *ytq;
13: } Mat_LSR1;
15: /*
16: The solution method is adapted from Algorithm 8 of Erway and Marcia
17: "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
18: (https://arxiv.org/abs/1510.06378).
20: Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
21: the matrix is updated with a new (S[i], Y[i]) pair. This allows
22: repeated calls of MatMult inside KSP solvers without unnecessarily
23: recomputing Q[i] terms in expensive nested-loops.
25: dX <- J0^{-1} * F
26: for i = 0,1,2,...,k
27: # Q[i] = S[i] - (B_i)^{-1}*Y[i]
28: zeta = (Q[i]^T F) / (Q[i]^T Y[i])
29: dX <- dX + (zeta * Q[i])
30: end
31: */
32: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
33: {
34: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
35: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
36: PetscInt i, j;
37: PetscScalar qjtyi, qtf, ytq;
39: PetscFunctionBegin;
40: VecCheckSameSize(F, 2, dX, 3);
41: VecCheckMatCompatible(B, dX, 3, F, 2);
43: if (lsr1->needQ) {
44: /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
45: for (i = 0; i <= lmvm->k; ++i) {
46: PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]));
47: PetscCall(VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]));
48: for (j = 0; j <= i - 1; ++j) {
49: PetscCall(VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi));
50: PetscCall(VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]));
51: }
52: PetscCall(VecDot(lmvm->Y[i], lsr1->Q[i], &ytq));
53: lsr1->ytq[i] = PetscRealPart(ytq);
54: }
55: lsr1->needQ = PETSC_FALSE;
56: }
58: /* Invert the initial Jacobian onto F (or apply scaling) */
59: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
60: /* Start outer loop */
61: for (i = 0; i <= lmvm->k; ++i) {
62: PetscCall(VecDot(lsr1->Q[i], F, &qtf));
63: PetscCall(VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]));
64: }
65: PetscFunctionReturn(PETSC_SUCCESS);
66: }
68: /*
69: The forward product is the matrix-free implementation of
70: Equation (6.24) in Nocedal and Wright "Numerical Optimization"
71: 2nd edition, pg 144.
73: Note that the structure of the forward product is identical to
74: the solution, with S and Y exchanging roles.
76: P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
77: the matrix is updated with a new (S[i], Y[i]) pair. This allows
78: repeated calls of MatMult inside KSP solvers without unnecessarily
79: recomputing P[i] terms in expensive nested-loops.
81: Z <- J0 * X
82: for i = 0,1,2,...,k
83: # P[i] = Y[i] - (B_i)*S[i]
84: zeta = (P[i]^T X) / (P[i]^T S[i])
85: Z <- Z + (zeta * P[i])
86: end
87: */
88: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
89: {
90: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
91: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
92: PetscInt i, j;
93: PetscScalar pjtsi, ptx, stp;
95: PetscFunctionBegin;
96: VecCheckSameSize(X, 2, Z, 3);
97: VecCheckMatCompatible(B, X, 2, Z, 3);
99: if (lsr1->needP) {
100: /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
101: for (i = 0; i <= lmvm->k; ++i) {
102: PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]));
103: PetscCall(VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]));
104: for (j = 0; j <= i - 1; ++j) {
105: PetscCall(VecDot(lsr1->P[j], lmvm->S[i], &pjtsi));
106: PetscCall(VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]));
107: }
108: PetscCall(VecDot(lmvm->S[i], lsr1->P[i], &stp));
109: lsr1->stp[i] = PetscRealPart(stp);
110: }
111: lsr1->needP = PETSC_FALSE;
112: }
114: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
115: for (i = 0; i <= lmvm->k; ++i) {
116: PetscCall(VecDot(lsr1->P[i], X, &ptx));
117: PetscCall(VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]));
118: }
119: PetscFunctionReturn(PETSC_SUCCESS);
120: }
122: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
123: {
124: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
125: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
126: PetscReal snorm, pnorm;
127: PetscScalar sktw;
129: PetscFunctionBegin;
130: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
131: if (lmvm->prev_set) {
132: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
133: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
134: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
135: /* See if the updates can be accepted
136: NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
137: PetscCall(MatMult(B, lmvm->Xprev, lsr1->work));
138: PetscCall(VecAYPX(lsr1->work, -1.0, lmvm->Fprev));
139: PetscCall(VecDot(lmvm->Xprev, lsr1->work, &sktw));
140: PetscCall(VecNorm(lmvm->Xprev, NORM_2, &snorm));
141: PetscCall(VecNorm(lsr1->work, NORM_2, &pnorm));
142: if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
143: /* Update is good, accept it */
144: lsr1->needP = lsr1->needQ = PETSC_TRUE;
145: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
146: } else {
147: /* Update is bad, skip it */
148: ++lmvm->nrejects;
149: }
150: }
151: /* Save the solution and function to be used in the next update */
152: PetscCall(VecCopy(X, lmvm->Xprev));
153: PetscCall(VecCopy(F, lmvm->Fprev));
154: lmvm->prev_set = PETSC_TRUE;
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }
158: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
159: {
160: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
161: Mat_LSR1 *bctx = (Mat_LSR1 *)bdata->ctx;
162: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
163: Mat_LSR1 *mctx = (Mat_LSR1 *)mdata->ctx;
164: PetscInt i;
166: PetscFunctionBegin;
167: mctx->needP = bctx->needP;
168: mctx->needQ = bctx->needQ;
169: for (i = 0; i <= bdata->k; ++i) {
170: mctx->stp[i] = bctx->stp[i];
171: mctx->ytq[i] = bctx->ytq[i];
172: PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
173: PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
174: }
175: PetscFunctionReturn(PETSC_SUCCESS);
176: }
178: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
179: {
180: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
181: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
183: PetscFunctionBegin;
184: lsr1->needP = lsr1->needQ = PETSC_TRUE;
185: if (destructive && lsr1->allocated) {
186: PetscCall(VecDestroy(&lsr1->work));
187: PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
188: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
189: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
190: lsr1->allocated = PETSC_FALSE;
191: }
192: PetscCall(MatReset_LMVM(B, destructive));
193: PetscFunctionReturn(PETSC_SUCCESS);
194: }
196: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
197: {
198: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
199: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
201: PetscFunctionBegin;
202: PetscCall(MatAllocate_LMVM(B, X, F));
203: if (!lsr1->allocated) {
204: PetscCall(VecDuplicate(X, &lsr1->work));
205: PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
206: if (lmvm->m > 0) {
207: PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->P));
208: PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->Q));
209: }
210: lsr1->allocated = PETSC_TRUE;
211: }
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
216: {
217: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
218: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
220: PetscFunctionBegin;
221: if (lsr1->allocated) {
222: PetscCall(VecDestroy(&lsr1->work));
223: PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
224: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
225: PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
226: lsr1->allocated = PETSC_FALSE;
227: }
228: PetscCall(PetscFree(lmvm->ctx));
229: PetscCall(MatDestroy_LMVM(B));
230: PetscFunctionReturn(PETSC_SUCCESS);
231: }
233: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
234: {
235: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
236: Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;
238: PetscFunctionBegin;
239: PetscCall(MatSetUp_LMVM(B));
240: if (!lsr1->allocated && lmvm->m > 0) {
241: PetscCall(VecDuplicate(lmvm->Xprev, &lsr1->work));
242: PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
243: if (lmvm->m > 0) {
244: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P));
245: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q));
246: }
247: lsr1->allocated = PETSC_TRUE;
248: }
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: PetscErrorCode MatCreate_LMVMSR1(Mat B)
253: {
254: Mat_LMVM *lmvm;
255: Mat_LSR1 *lsr1;
257: PetscFunctionBegin;
258: PetscCall(MatCreate_LMVM(B));
259: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1));
260: PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
261: B->ops->setup = MatSetUp_LMVMSR1;
262: B->ops->destroy = MatDestroy_LMVMSR1;
263: B->ops->solve = MatSolve_LMVMSR1;
265: lmvm = (Mat_LMVM *)B->data;
266: lmvm->square = PETSC_TRUE;
267: lmvm->ops->allocate = MatAllocate_LMVMSR1;
268: lmvm->ops->reset = MatReset_LMVMSR1;
269: lmvm->ops->update = MatUpdate_LMVMSR1;
270: lmvm->ops->mult = MatMult_LMVMSR1;
271: lmvm->ops->copy = MatCopy_LMVMSR1;
273: PetscCall(PetscNew(&lsr1));
274: lmvm->ctx = (void *)lsr1;
275: lsr1->allocated = PETSC_FALSE;
276: lsr1->needP = lsr1->needQ = PETSC_TRUE;
277: PetscFunctionReturn(PETSC_SUCCESS);
278: }
280: /*@
281: MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
282: matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
283: guaranteed to be positive-definite.
285: To use the L-SR1 matrix with other vector types, the matrix must be
286: created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
287: This ensures that the internal storage and work vectors are duplicated from the
288: correct type of vector.
290: Collective
292: Input Parameters:
293: + comm - MPI communicator
294: . n - number of local rows for storage vectors
295: - N - global size of the storage vectors
297: Output Parameter:
298: . B - the matrix
300: Level: intermediate
302: Note:
303: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
304: paradigm instead of this routine directly.
306: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
307: `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
308: @*/
309: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
310: {
311: PetscFunctionBegin;
312: PetscCall(MatCreate(comm, B));
313: PetscCall(MatSetSizes(*B, n, n, N, N));
314: PetscCall(MatSetType(*B, MATLMVMSR1));
315: PetscCall(MatSetUp(*B));
316: PetscFunctionReturn(PETSC_SUCCESS);
317: }