Actual source code: sr1.c

  1: #include <../src/ksp/ksp/utils/lmvm/lmvm.h>

  3: /*
  4:   Limited-memory Symmetric-Rank-1 method for approximating both
  5:   the forward product and inverse application of a Jacobian.
  6: */

  8: typedef struct {
  9:   Vec       *P, *Q;
 10:   Vec        work;
 11:   PetscBool  allocated, needP, needQ;
 12:   PetscReal *stp, *ytq;
 13: } Mat_LSR1;

 15: /*
 16:   The solution method is adapted from Algorithm 8 of Erway and Marcia
 17:   "On Solving Large-Scale Limited-Memory Quasi-Newton Equations"
 18:   (https://arxiv.org/abs/1510.06378).

 20:   Q[i] = S[i] - (B_i)^{-1}*Y[i] terms are computed ahead of time whenever
 21:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 22:   repeated calls of MatMult inside KSP solvers without unnecessarily
 23:   recomputing Q[i] terms in expensive nested-loops.

 25:   dX <- J0^{-1} * F
 26:   for i = 0,1,2,...,k
 27:     # Q[i] = S[i] - (B_i)^{-1}*Y[i]
 28:     zeta = (Q[i]^T F) / (Q[i]^T Y[i])
 29:     dX <- dX + (zeta * Q[i])
 30:   end
 31: */
 32: static PetscErrorCode MatSolve_LMVMSR1(Mat B, Vec F, Vec dX)
 33: {
 34:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 35:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 36:   PetscInt    i, j;
 37:   PetscScalar qjtyi, qtf, ytq;

 39:   PetscFunctionBegin;
 40:   VecCheckSameSize(F, 2, dX, 3);
 41:   VecCheckMatCompatible(B, dX, 3, F, 2);

 43:   if (lsr1->needQ) {
 44:     /* Pre-compute (Q[i] = S[i] - (B_i)^{-1} * Y[i]) and (Y[i]^T Q[i]) */
 45:     for (i = 0; i <= lmvm->k; ++i) {
 46:       PetscCall(MatLMVMApplyJ0Inv(B, lmvm->Y[i], lsr1->Q[i]));
 47:       PetscCall(VecAYPX(lsr1->Q[i], -1.0, lmvm->S[i]));
 48:       for (j = 0; j <= i - 1; ++j) {
 49:         PetscCall(VecDot(lsr1->Q[j], lmvm->Y[i], &qjtyi));
 50:         PetscCall(VecAXPY(lsr1->Q[i], -PetscRealPart(qjtyi) / lsr1->ytq[j], lsr1->Q[j]));
 51:       }
 52:       PetscCall(VecDot(lmvm->Y[i], lsr1->Q[i], &ytq));
 53:       lsr1->ytq[i] = PetscRealPart(ytq);
 54:     }
 55:     lsr1->needQ = PETSC_FALSE;
 56:   }

 58:   /* Invert the initial Jacobian onto F (or apply scaling) */
 59:   PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
 60:   /* Start outer loop */
 61:   for (i = 0; i <= lmvm->k; ++i) {
 62:     PetscCall(VecDot(lsr1->Q[i], F, &qtf));
 63:     PetscCall(VecAXPY(dX, PetscRealPart(qtf) / lsr1->ytq[i], lsr1->Q[i]));
 64:   }
 65:   PetscFunctionReturn(PETSC_SUCCESS);
 66: }

 68: /*
 69:   The forward product is the matrix-free implementation of
 70:   Equation (6.24) in Nocedal and Wright "Numerical Optimization"
 71:   2nd edition, pg 144.

 73:   Note that the structure of the forward product is identical to
 74:   the solution, with S and Y exchanging roles.

 76:   P[i] = Y[i] - (B_i)*S[i] terms are computed ahead of time whenever
 77:   the matrix is updated with a new (S[i], Y[i]) pair. This allows
 78:   repeated calls of MatMult inside KSP solvers without unnecessarily
 79:   recomputing P[i] terms in expensive nested-loops.

 81:   Z <- J0 * X
 82:   for i = 0,1,2,...,k
 83:     # P[i] = Y[i] - (B_i)*S[i]
 84:     zeta = (P[i]^T X) / (P[i]^T S[i])
 85:     Z <- Z + (zeta * P[i])
 86:   end
 87: */
 88: static PetscErrorCode MatMult_LMVMSR1(Mat B, Vec X, Vec Z)
 89: {
 90:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
 91:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
 92:   PetscInt    i, j;
 93:   PetscScalar pjtsi, ptx, stp;

 95:   PetscFunctionBegin;
 96:   VecCheckSameSize(X, 2, Z, 3);
 97:   VecCheckMatCompatible(B, X, 2, Z, 3);

 99:   if (lsr1->needP) {
100:     /* Pre-compute (P[i] = Y[i] - (B_i) * S[i]) and (S[i]^T P[i]) */
101:     for (i = 0; i <= lmvm->k; ++i) {
102:       PetscCall(MatLMVMApplyJ0Fwd(B, lmvm->S[i], lsr1->P[i]));
103:       PetscCall(VecAYPX(lsr1->P[i], -1.0, lmvm->Y[i]));
104:       for (j = 0; j <= i - 1; ++j) {
105:         PetscCall(VecDot(lsr1->P[j], lmvm->S[i], &pjtsi));
106:         PetscCall(VecAXPY(lsr1->P[i], -PetscRealPart(pjtsi) / lsr1->stp[j], lsr1->P[j]));
107:       }
108:       PetscCall(VecDot(lmvm->S[i], lsr1->P[i], &stp));
109:       lsr1->stp[i] = PetscRealPart(stp);
110:     }
111:     lsr1->needP = PETSC_FALSE;
112:   }

114:   PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
115:   for (i = 0; i <= lmvm->k; ++i) {
116:     PetscCall(VecDot(lsr1->P[i], X, &ptx));
117:     PetscCall(VecAXPY(Z, PetscRealPart(ptx) / lsr1->stp[i], lsr1->P[i]));
118:   }
119:   PetscFunctionReturn(PETSC_SUCCESS);
120: }

122: static PetscErrorCode MatUpdate_LMVMSR1(Mat B, Vec X, Vec F)
123: {
124:   Mat_LMVM   *lmvm = (Mat_LMVM *)B->data;
125:   Mat_LSR1   *lsr1 = (Mat_LSR1 *)lmvm->ctx;
126:   PetscReal   snorm, pnorm;
127:   PetscScalar sktw;

129:   PetscFunctionBegin;
130:   if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
131:   if (lmvm->prev_set) {
132:     /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
133:     PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
134:     PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
135:     /* See if the updates can be accepted
136:        NOTE: This tests abs(S[k]^T (Y[k] - B_k*S[k])) >= eps * norm(S[k]) * norm(Y[k] - B_k*S[k]) */
137:     PetscCall(MatMult(B, lmvm->Xprev, lsr1->work));
138:     PetscCall(VecAYPX(lsr1->work, -1.0, lmvm->Fprev));
139:     PetscCall(VecDot(lmvm->Xprev, lsr1->work, &sktw));
140:     PetscCall(VecNorm(lmvm->Xprev, NORM_2, &snorm));
141:     PetscCall(VecNorm(lsr1->work, NORM_2, &pnorm));
142:     if (PetscAbsReal(PetscRealPart(sktw)) >= lmvm->eps * snorm * pnorm) {
143:       /* Update is good, accept it */
144:       lsr1->needP = lsr1->needQ = PETSC_TRUE;
145:       PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
146:     } else {
147:       /* Update is bad, skip it */
148:       ++lmvm->nrejects;
149:     }
150:   }
151:   /* Save the solution and function to be used in the next update */
152:   PetscCall(VecCopy(X, lmvm->Xprev));
153:   PetscCall(VecCopy(F, lmvm->Fprev));
154:   lmvm->prev_set = PETSC_TRUE;
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }

158: static PetscErrorCode MatCopy_LMVMSR1(Mat B, Mat M, MatStructure str)
159: {
160:   Mat_LMVM *bdata = (Mat_LMVM *)B->data;
161:   Mat_LSR1 *bctx  = (Mat_LSR1 *)bdata->ctx;
162:   Mat_LMVM *mdata = (Mat_LMVM *)M->data;
163:   Mat_LSR1 *mctx  = (Mat_LSR1 *)mdata->ctx;
164:   PetscInt  i;

166:   PetscFunctionBegin;
167:   mctx->needP = bctx->needP;
168:   mctx->needQ = bctx->needQ;
169:   for (i = 0; i <= bdata->k; ++i) {
170:     mctx->stp[i] = bctx->stp[i];
171:     mctx->ytq[i] = bctx->ytq[i];
172:     PetscCall(VecCopy(bctx->P[i], mctx->P[i]));
173:     PetscCall(VecCopy(bctx->Q[i], mctx->Q[i]));
174:   }
175:   PetscFunctionReturn(PETSC_SUCCESS);
176: }

178: static PetscErrorCode MatReset_LMVMSR1(Mat B, PetscBool destructive)
179: {
180:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
181:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

183:   PetscFunctionBegin;
184:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
185:   if (destructive && lsr1->allocated) {
186:     PetscCall(VecDestroy(&lsr1->work));
187:     PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
188:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
189:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
190:     lsr1->allocated = PETSC_FALSE;
191:   }
192:   PetscCall(MatReset_LMVM(B, destructive));
193:   PetscFunctionReturn(PETSC_SUCCESS);
194: }

196: static PetscErrorCode MatAllocate_LMVMSR1(Mat B, Vec X, Vec F)
197: {
198:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
199:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

201:   PetscFunctionBegin;
202:   PetscCall(MatAllocate_LMVM(B, X, F));
203:   if (!lsr1->allocated) {
204:     PetscCall(VecDuplicate(X, &lsr1->work));
205:     PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
206:     if (lmvm->m > 0) {
207:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->P));
208:       PetscCall(VecDuplicateVecs(X, lmvm->m, &lsr1->Q));
209:     }
210:     lsr1->allocated = PETSC_TRUE;
211:   }
212:   PetscFunctionReturn(PETSC_SUCCESS);
213: }

215: static PetscErrorCode MatDestroy_LMVMSR1(Mat B)
216: {
217:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
218:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

220:   PetscFunctionBegin;
221:   if (lsr1->allocated) {
222:     PetscCall(VecDestroy(&lsr1->work));
223:     PetscCall(PetscFree2(lsr1->stp, lsr1->ytq));
224:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->P));
225:     PetscCall(VecDestroyVecs(lmvm->m, &lsr1->Q));
226:     lsr1->allocated = PETSC_FALSE;
227:   }
228:   PetscCall(PetscFree(lmvm->ctx));
229:   PetscCall(MatDestroy_LMVM(B));
230:   PetscFunctionReturn(PETSC_SUCCESS);
231: }

233: static PetscErrorCode MatSetUp_LMVMSR1(Mat B)
234: {
235:   Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
236:   Mat_LSR1 *lsr1 = (Mat_LSR1 *)lmvm->ctx;

238:   PetscFunctionBegin;
239:   PetscCall(MatSetUp_LMVM(B));
240:   if (!lsr1->allocated && lmvm->m > 0) {
241:     PetscCall(VecDuplicate(lmvm->Xprev, &lsr1->work));
242:     PetscCall(PetscMalloc2(lmvm->m, &lsr1->stp, lmvm->m, &lsr1->ytq));
243:     if (lmvm->m > 0) {
244:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->P));
245:       PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lsr1->Q));
246:     }
247:     lsr1->allocated = PETSC_TRUE;
248:   }
249:   PetscFunctionReturn(PETSC_SUCCESS);
250: }

252: PetscErrorCode MatCreate_LMVMSR1(Mat B)
253: {
254:   Mat_LMVM *lmvm;
255:   Mat_LSR1 *lsr1;

257:   PetscFunctionBegin;
258:   PetscCall(MatCreate_LMVM(B));
259:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMSR1));
260:   PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE));
261:   B->ops->setup   = MatSetUp_LMVMSR1;
262:   B->ops->destroy = MatDestroy_LMVMSR1;
263:   B->ops->solve   = MatSolve_LMVMSR1;

265:   lmvm                = (Mat_LMVM *)B->data;
266:   lmvm->square        = PETSC_TRUE;
267:   lmvm->ops->allocate = MatAllocate_LMVMSR1;
268:   lmvm->ops->reset    = MatReset_LMVMSR1;
269:   lmvm->ops->update   = MatUpdate_LMVMSR1;
270:   lmvm->ops->mult     = MatMult_LMVMSR1;
271:   lmvm->ops->copy     = MatCopy_LMVMSR1;

273:   PetscCall(PetscNew(&lsr1));
274:   lmvm->ctx       = (void *)lsr1;
275:   lsr1->allocated = PETSC_FALSE;
276:   lsr1->needP = lsr1->needQ = PETSC_TRUE;
277:   PetscFunctionReturn(PETSC_SUCCESS);
278: }

280: /*@
281:   MatCreateLMVMSR1 - Creates a limited-memory Symmetric-Rank-1 approximation
282:   matrix used for a Jacobian. L-SR1 is symmetric by construction, but is not
283:   guaranteed to be positive-definite.

285:   To use the L-SR1 matrix with other vector types, the matrix must be
286:   created using `MatCreate()` and `MatSetType()`, followed by `MatLMVMAllocate()`.
287:   This ensures that the internal storage and work vectors are duplicated from the
288:   correct type of vector.

290:   Collective

292:   Input Parameters:
293: + comm - MPI communicator
294: . n    - number of local rows for storage vectors
295: - N    - global size of the storage vectors

297:   Output Parameter:
298: . B - the matrix

300:   Level: intermediate

302:   Note:
303:   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
304:   paradigm instead of this routine directly.

306: .seealso: [](ch_ksp), `MatCreate()`, `MATLMVM`, `MATLMVMSR1`, `MatCreateLMVMBFGS()`, `MatCreateLMVMDFP()`,
307:           `MatCreateLMVMBrdn()`, `MatCreateLMVMBadBrdn()`, `MatCreateLMVMSymBrdn()`
308: @*/
309: PetscErrorCode MatCreateLMVMSR1(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
310: {
311:   PetscFunctionBegin;
312:   PetscCall(MatCreate(comm, B));
313:   PetscCall(MatSetSizes(*B, n, n, N, N));
314:   PetscCall(MatSetType(*B, MATLMVMSR1));
315:   PetscCall(MatSetUp(*B));
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }