Actual source code: brdn.c
1: #include <../src/ksp/ksp/utils/lmvm/brdn/brdn.h>
3: /*
4: The solution method is the matrix-free implementation of the inverse Hessian
5: representation in page 312 of Griewank "Broyden Updating, The Good and The Bad!"
6: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
8: Q[i] = (B_i)^{-1}*S[i] terms are computed ahead of time whenever
9: the matrix is updated with a new (S[i], Y[i]) pair. This allows
10: repeated calls of MatSolve without incurring redundant computation.
12: dX <- J0^{-1} * F
14: for i=0,1,2,...,k
15: # Q[i] = (B_i)^{-1} * Y[i]
16: tau = (S[i]^T dX) / (S[i]^T Q[i])
17: dX <- dX + (tau * (S[i] - Q[i]))
18: end
19: */
21: static PetscErrorCode MatSolve_LMVMBrdn(Mat B, Vec F, Vec dX)
22: {
23: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
24: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
25: PetscInt i, j;
26: PetscScalar sjtqi, stx, stq;
28: PetscFunctionBegin;
29: VecCheckSameSize(F, 2, dX, 3);
30: VecCheckMatCompatible(B, dX, 3, F, 2);
32: if (lbrdn->needQ) {
33: /* Pre-compute (Q[i] = (B_i)^{-1} * Y[i]) */
34: for (i = 0; i <= lmvm->k; ++i) {
35: PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lbrdn->Q[i]));
36: for (j = 0; j <= i - 1; ++j) {
37: PetscCall(VecDot(lmvm->S[j], lbrdn->Q[i], &sjtqi));
38: PetscCall(VecAXPBYPCZ(lbrdn->Q[i], PetscRealPart(sjtqi) / lbrdn->stq[j], -PetscRealPart(sjtqi) / lbrdn->stq[j], 1.0, lmvm->S[j], lbrdn->Q[j]));
39: }
40: PetscCall(VecDot(lmvm->S[i], lbrdn->Q[i], &stq));
41: lbrdn->stq[i] = PetscRealPart(stq);
42: }
43: lbrdn->needQ = PETSC_FALSE;
44: }
46: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
47: for (i = 0; i <= lmvm->k; ++i) {
48: PetscCall(VecDot(lmvm->S[i], dX, &stx));
49: PetscCall(VecAXPBYPCZ(dX, PetscRealPart(stx) / lbrdn->stq[i], -PetscRealPart(stx) / lbrdn->stq[i], 1.0, lmvm->S[i], lbrdn->Q[i]));
50: }
51: PetscFunctionReturn(PETSC_SUCCESS);
52: }
54: /*
55: The forward product is the matrix-free implementation of Equation 2 in
56: page 302 of Griewank "Broyden Updating, The Good and The Bad!"
57: (http://www.emis.ams.org/journals/DMJDMV/vol-ismp/45_griewank-andreas-broyden.pdf).
59: P[i] = (B_i)*S[i] terms are computed ahead of time whenever
60: the matrix is updated with a new (S[i], Y[i]) pair. This allows
61: repeated calls of MatMult inside KSP solvers without unnecessarily
62: recomputing P[i] terms in expensive nested-loops.
64: Z <- J0 * X
66: for i=0,1,2,...,k
67: # P[i] = B_i * S[i]
68: tau = (S[i]^T X) / (S[i]^T S[i])
69: dX <- dX + (tau * (Y[i] - P[i]))
70: end
71: */
73: static PetscErrorCode MatMult_LMVMBrdn(Mat B, Vec X, Vec Z)
74: {
75: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
76: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
77: PetscInt i, j;
78: PetscScalar sjtsi, stx;
80: PetscFunctionBegin;
81: VecCheckSameSize(X, 2, Z, 3);
82: VecCheckMatCompatible(B, X, 2, Z, 3);
84: if (lbrdn->needP) {
85: /* Pre-compute (P[i] = (B_i) * S[i]) */
86: for (i = 0; i <= lmvm->k; ++i) {
87: PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lbrdn->P[i]));
88: for (j = 0; j <= i - 1; ++j) {
89: PetscCall(VecDot(lmvm->S[j], lmvm->S[i], &sjtsi));
90: PetscCall(VecAXPBYPCZ(lbrdn->P[i], PetscRealPart(sjtsi) / lbrdn->sts[j], -PetscRealPart(sjtsi) / lbrdn->sts[j], 1.0, lmvm->Y[j], lbrdn->P[j]));
91: }
92: }
93: lbrdn->needP = PETSC_FALSE;
94: }
96: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
97: for (i = 0; i <= lmvm->k; ++i) {
98: PetscCall(VecDot(lmvm->S[i], X, &stx));
99: PetscCall(VecAXPBYPCZ(Z, PetscRealPart(stx) / lbrdn->sts[i], -PetscRealPart(stx) / lbrdn->sts[i], 1.0, lmvm->Y[i], lbrdn->P[i]));
100: }
101: PetscFunctionReturn(PETSC_SUCCESS);
102: }
104: static PetscErrorCode MatUpdate_LMVMBrdn(Mat B, Vec X, Vec F)
105: {
106: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
107: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
108: PetscInt old_k, i;
109: PetscScalar sts;
111: PetscFunctionBegin;
112: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
113: if (lmvm->prev_set) {
114: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
115: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
116: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
117: /* Accept the update */
118: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
119: old_k = lmvm->k;
120: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
121: /* If we hit the memory limit, shift the sts array */
122: if (old_k == lmvm->k) {
123: for (i = 0; i <= lmvm->k - 1; ++i) lbrdn->sts[i] = lbrdn->sts[i + 1];
124: }
125: PetscCall(VecDot(lmvm->S[lmvm->k], lmvm->S[lmvm->k], &sts));
126: lbrdn->sts[lmvm->k] = PetscRealPart(sts);
127: }
128: /* Save the solution and function to be used in the next update */
129: PetscCall(VecCopy(X, lmvm->Xprev));
130: PetscCall(VecCopy(F, lmvm->Fprev));
131: lmvm->prev_set = PETSC_TRUE;
132: PetscFunctionReturn(PETSC_SUCCESS);
133: }
135: static PetscErrorCode MatCopy_LMVMBrdn(Mat B, Mat M, MatStructure str)
136: {
137: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
138: Mat_Brdn *bctx = (Mat_Brdn *)bdata->ctx;
139: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
140: Mat_Brdn *mctx = (Mat_Brdn *)mdata->ctx;
141: PetscInt i;
143: PetscFunctionBegin;
144: mctx->needP = bctx->needP;
145: mctx->needQ = bctx->needQ;
146: for (i = 0; i <= bdata->k; ++i) {
147: mctx->sts[i] = bctx->sts[i];
148: mctx->stq[i] = bctx->stq[i];
149: PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
150: PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
151: }
152: PetscFunctionReturn(PETSC_SUCCESS);
153: }
155: static PetscErrorCode MatReset_LMVMBrdn(Mat B, PetscBool destructive)
156: {
157: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
158: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
160: PetscFunctionBegin;
161: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
162: if (destructive && lbrdn->allocated) {
163: PetscCall(PetscFree2(lbrdn->sts, lbrdn->stq));
164: PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->P));
165: PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->Q));
166: lbrdn->allocated = PETSC_FALSE;
167: }
168: PetscCall(MatReset_LMVM(B, destructive));
169: PetscFunctionReturn(PETSC_SUCCESS);
170: }
172: static PetscErrorCode MatAllocate_LMVMBrdn(Mat B, Vec X, Vec F)
173: {
174: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
175: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
177: PetscFunctionBegin;
178: PetscCall(MatAllocate_LMVM(B, X, F));
179: if (!lbrdn->allocated) {
180: PetscCall(PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq));
181: if (lmvm->m > 0) {
182: PetscCall(VecDuplicateVecs(X, lmvm->m, &lbrdn->P));
183: PetscCall(VecDuplicateVecs(X, lmvm->m, &lbrdn->Q));
184: }
185: lbrdn->allocated = PETSC_TRUE;
186: }
187: PetscFunctionReturn(PETSC_SUCCESS);
188: }
190: static PetscErrorCode MatDestroy_LMVMBrdn(Mat B)
191: {
192: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
193: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
195: PetscFunctionBegin;
196: if (lbrdn->allocated) {
197: PetscCall(PetscFree2(lbrdn->sts, lbrdn->stq));
198: PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->P));
199: PetscCall(VecDestroyVecs(lmvm->m, &lbrdn->Q));
200: lbrdn->allocated = PETSC_FALSE;
201: }
202: PetscCall(PetscFree(lmvm->ctx));
203: PetscCall(MatDestroy_LMVM(B));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: static PetscErrorCode MatSetUp_LMVMBrdn(Mat B)
208: {
209: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
210: Mat_Brdn *lbrdn = (Mat_Brdn *)lmvm->ctx;
212: PetscFunctionBegin;
213: PetscCall(MatSetUp_LMVM(B));
214: if (!lbrdn->allocated) {
215: PetscCall(PetscMalloc2(lmvm->m, &lbrdn->sts, lmvm->m, &lbrdn->stq));
216: if (lmvm->m > 0) {
217: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->P));
218: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lbrdn->Q));
219: }
220: lbrdn->allocated = PETSC_TRUE;
221: }
222: PetscFunctionReturn(PETSC_SUCCESS);
223: }
225: PetscErrorCode MatCreate_LMVMBrdn(Mat B)
226: {
227: Mat_LMVM *lmvm;
228: Mat_Brdn *lbrdn;
230: PetscFunctionBegin;
231: PetscCall(MatCreate_LMVM(B));
232: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMBROYDEN));
233: B->ops->setup = MatSetUp_LMVMBrdn;
234: B->ops->destroy = MatDestroy_LMVMBrdn;
235: B->ops->solve = MatSolve_LMVMBrdn;
237: lmvm = (Mat_LMVM *)B->data;
238: lmvm->square = PETSC_TRUE;
239: lmvm->ops->allocate = MatAllocate_LMVMBrdn;
240: lmvm->ops->reset = MatReset_LMVMBrdn;
241: lmvm->ops->mult = MatMult_LMVMBrdn;
242: lmvm->ops->update = MatUpdate_LMVMBrdn;
243: lmvm->ops->solve = MatSolve_LMVMBrdn;
244: lmvm->ops->copy = MatCopy_LMVMBrdn;
246: PetscCall(PetscNew(&lbrdn));
247: lmvm->ctx = (void *)lbrdn;
248: lbrdn->allocated = PETSC_FALSE;
249: lbrdn->needP = lbrdn->needQ = PETSC_TRUE;
250: PetscFunctionReturn(PETSC_SUCCESS);
251: }
253: /*@
254: MatCreateLMVMBroyden - Creates a limited-memory "good" Broyden-type approximation
255: matrix used for a Jacobian. L-Brdn is not guaranteed to be symmetric or
256: positive-definite.
258: To use the L-Brdn matrix with other vector types, the matrix must be
259: created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
260: This ensures that the internal storage and work vectors are duplicated from the
261: correct type of vector.
263: Collective
265: Input Parameters:
266: + comm - MPI communicator
267: . n - number of local rows for storage vectors
268: - N - global size of the storage vectors
270: Output Parameter:
271: . B - the matrix
273: Level: intermediate
275: Note:
276: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
277: paradigm instead of this routine directly.
279: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMBRDN`, `MatCreateLMVMDFP()`, `MatCreateLMVMSR1()`,
280: `MatCreateLMVMBFGS()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
281: @*/
282: PetscErrorCode MatCreateLMVMBroyden(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
283: {
284: PetscFunctionBegin;
285: PetscCall(KSPInitializePackage());
286: PetscCall(MatCreate(comm, B));
287: PetscCall(MatSetSizes(*B, n, n, N, N));
288: PetscCall(MatSetType(*B, MATLMVMBROYDEN));
289: PetscCall(MatSetUp(*B));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }