Actual source code: pounders.c

  1: #include <../src/tao/leastsquares/impls/pounders/pounders.h>

  3: static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx)
  4: {
  5:   PetscFunctionBegin;
  6:   PetscFunctionReturn(PETSC_SUCCESS);
  7: }

  9: static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx)
 10: {
 11:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
 12:   PetscReal     d1, d2;

 14:   PetscFunctionBegin;
 15:   /* g = A*x  (add b later)*/
 16:   PetscCall(MatMult(mfqP->subH, x, g));

 18:   /* f = 1/2 * x'*(Ax) + b'*x  */
 19:   PetscCall(VecDot(x, g, &d1));
 20:   PetscCall(VecDot(mfqP->subb, x, &d2));
 21:   *f = 0.5 * d1 + d2;

 23:   /* now  g = g + b */
 24:   PetscCall(VecAXPY(g, 1.0, mfqP->subb));
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }

 28: static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
 29: {
 30:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
 31:   PetscInt      i, row, col;
 32:   PetscReal     fr, fc;

 34:   PetscFunctionBegin;
 35:   PetscCall(TaoComputeResidual(tao, x, F));
 36:   if (tao->res_weights_v) {
 37:     PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F));
 38:     PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum));
 39:   } else if (tao->res_weights_w) {
 40:     *fsum = 0;
 41:     for (i = 0; i < tao->res_weights_n; i++) {
 42:       row = tao->res_weights_rows[i];
 43:       col = tao->res_weights_cols[i];
 44:       PetscCall(VecGetValues(F, 1, &row, &fr));
 45:       PetscCall(VecGetValues(F, 1, &col, &fc));
 46:       *fsum += tao->res_weights_w[i] * fc * fr;
 47:     }
 48:   } else {
 49:     PetscCall(VecDot(F, F, fsum));
 50:   }
 51:   PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum));
 52:   PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
 57: {
 58: #if defined(PETSC_USE_REAL_SINGLE)
 59:   PetscReal atol = 1.0e-5;
 60: #else
 61:   PetscReal atol = 1.0e-10;
 62: #endif
 63:   PetscInt      info, its;
 64:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

 66:   PetscFunctionBegin;
 67:   if (!mfqP->usegqt) {
 68:     PetscReal maxval;
 69:     PetscInt  i, j;

 71:     PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
 72:     PetscCall(VecAssemblyBegin(mfqP->subb));
 73:     PetscCall(VecAssemblyEnd(mfqP->subb));

 75:     PetscCall(VecSet(mfqP->subx, 0.0));

 77:     PetscCall(VecSet(mfqP->subndel, -1.0));
 78:     PetscCall(VecSet(mfqP->subpdel, +1.0));

 80:     /* Complete the lower triangle of the Hessian matrix */
 81:     for (i = 0; i < mfqP->n; i++) {
 82:       for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
 83:     }
 84:     PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES));
 85:     PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY));
 86:     PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY));

 88:     PetscCall(TaoResetStatistics(mfqP->subtao));
 89:     /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_CURRENT)); */
 90:     /* enforce bound constraints -- experimental */
 91:     if (tao->XU && tao->XL) {
 92:       PetscCall(VecCopy(tao->XU, mfqP->subxu));
 93:       PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution));
 94:       PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta));
 95:       PetscCall(VecCopy(tao->XL, mfqP->subxl));
 96:       PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution));
 97:       PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta));

 99:       PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel));
100:       PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel));
101:     } else {
102:       PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu));
103:       PetscCall(VecCopy(mfqP->subndel, mfqP->subxl));
104:     }
105:     /* Make sure xu > xl */
106:     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
107:     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
108:     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
109:     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem");
110:     /* Make sure xu > tao->solution > xl */
111:     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
112:     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
113:     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
114:     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem");

116:     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
117:     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
118:     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
119:     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem");

121:     PetscCall(TaoSolve(mfqP->subtao));
122:     PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL));

124:     /* test bounds post-solution*/
125:     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
126:     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
127:     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
128:     if (maxval > 1e-5) {
129:       PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n"));
130:       tao->reason = TAO_DIVERGED_TR_REDUCTION;
131:     }

133:     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
134:     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
135:     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
136:     if (maxval > 1e-5) {
137:       PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n"));
138:       tao->reason = TAO_DIVERGED_TR_REDUCTION;
139:     }
140:   } else {
141:     PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3));
142:   }
143:   *qmin *= -1;
144:   PetscFunctionReturn(PETSC_SUCCESS);
145: }

147: static PetscErrorCode pounders_update_res(Tao tao)
148: {
149:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
150:   PetscInt      i, row, col;
151:   PetscBLASInt  blasn, blasn2, blasm, ione = 1;
152:   PetscReal     zero = 0.0, one = 1.0, wii, factor;

154:   PetscFunctionBegin;
155:   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
156:   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
157:   PetscCall(PetscBLASIntCast(mfqP->n * mfqP->n, &blasn2));
158:   for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
159:   for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;

161:   /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
162:   if (tao->res_weights_v) {
163:     /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
164:     for (i = 0; i < mfqP->m; i++) {
165:       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor));
166:       factor = factor * mfqP->C[i];
167:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
168:     }

170:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
171:     /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
172:     for (i = 0; i < mfqP->m; i++) {
173:       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii));
174:       if (tao->niter > 1) {
175:         factor = wii * mfqP->C[i];
176:         /* add wii * ci * Hi */
177:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
178:       }
179:       /* add wii * gi * gi' */
180:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
181:     }
182:   } else if (tao->res_weights_w) {
183:     /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
184:     for (i = 0; i < tao->res_weights_n; i++) {
185:       row = tao->res_weights_rows[i];
186:       col = tao->res_weights_cols[i];

188:       factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
189:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
190:       factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
191:       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
192:     }

194:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
195:     /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
196:     for (i = 0; i < tao->res_weights_n; i++) {
197:       row    = tao->res_weights_rows[i];
198:       col    = tao->res_weights_cols[i];
199:       factor = tao->res_weights_w[i] / 2.0;
200:       /* add wij * gi gj' + wij * gj gi' */
201:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
202:       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
203:     }
204:     if (tao->niter > 1) {
205:       for (i = 0; i < tao->res_weights_n; i++) {
206:         row = tao->res_weights_rows[i];
207:         col = tao->res_weights_cols[i];

209:         /* add  wij*cj*Hi */
210:         factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
211:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));

213:         /* add wij*ci*Hj */
214:         factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
215:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
216:       }
217:     }
218:   } else {
219:     /* Default: Gres= sum_i[cigi] = G*c' */
220:     PetscCall(PetscInfo(tao, "Identity weights\n"));
221:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));

223:     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
224:     /*  Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)}  */
225:     PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));

227:     /* sum(F(xkin,i)*H(:,:,i)) */
228:     if (tao->niter > 1) {
229:       for (i = 0; i < mfqP->m; i++) {
230:         factor = mfqP->C[i];
231:         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
232:       }
233:     }
234:   }
235:   PetscFunctionReturn(PETSC_SUCCESS);
236: }

238: static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
239: {
240:   /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
241:   PetscInt  i, j, k;
242:   PetscReal sqrt2 = PetscSqrtReal(2.0);

244:   PetscFunctionBegin;
245:   j = 0;
246:   for (i = 0; i < n; i++) {
247:     phi[j] = 0.5 * x[i] * x[i];
248:     j++;
249:     for (k = i + 1; k < n; k++) {
250:       phi[j] = x[i] * x[k] / sqrt2;
251:       j++;
252:     }
253:   }
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
258: {
259:   /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
260:    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
261:    for j=1,...,m and with a Hessian matrix of least Frobenius norm */

263:   /* NB --we are ignoring c */
264:   PetscInt     i, j, k, num, np = mfqP->nmodelpoints;
265:   PetscReal    one = 1.0, zero = 0.0, negone = -1.0;
266:   PetscBLASInt blasnpmax, blasnplus1, blasnp, blasint, blasint2;
267:   PetscBLASInt info, ione = 1;
268:   PetscReal    sqrt2 = PetscSqrtReal(2.0);

270:   PetscFunctionBegin;
271:   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
272:   PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
273:   PetscCall(PetscBLASIntCast(np, &blasnp));
274:   PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint));
275:   PetscCall(PetscBLASIntCast(np - mfqP->n - 1, &blasint2));
276:   for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
277:   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;

279:   /* factor M */
280:   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
281:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info);

283:   if (np == mfqP->n + 1) {
284:     for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
285:     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
286:   } else {
287:     /* Let Ltmp = (L'*L) */
288:     PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));

290:     /* factor Ltmp */
291:     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
292:     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info);
293:   }

295:   for (k = 0; k < mfqP->m; k++) {
296:     if (np != mfqP->n + 1) {
297:       /* Solve L'*L*Omega = Z' * RESk*/
298:       PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
299:       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
300:       PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info);

302:       /* Beta = L*Omega */
303:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
304:     }

306:     /* solve M'*Alpha = RESk - N'*Beta */
307:     PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
308:     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
309:     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info);

311:     /* Gdel(:,k) = Alpha(2:n+1) */
312:     for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];

314:     /* Set Hdels */
315:     num = 0;
316:     for (i = 0; i < mfqP->n; i++) {
317:       /* H[i,i,k] = Beta(num) */
318:       mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
319:       num++;
320:       for (j = i + 1; j < mfqP->n; j++) {
321:         /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
322:         mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
323:         mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
324:         num++;
325:       }
326:     }
327:   }
328:   PetscFunctionReturn(PETSC_SUCCESS);
329: }

331: static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
332: {
333:   /* Assumes mfqP->model_indices[0]  is minimum index
334:    Finishes adding points to mfqP->model_indices (up to npmax)
335:    Computes L,Z,M,N
336:    np is actual number of points in model (should equal npmax?) */
337:   PetscInt         point, i, j, offset;
338:   PetscInt         reject;
339:   PetscBLASInt     blasn, blasnpmax, blasnplus1, info, blasnmax, blasint, blasint2, blasnp, blasmaxmn;
340:   const PetscReal *x;
341:   PetscReal        normd;

343:   PetscFunctionBegin;
344:   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
345:   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
346:   PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));
347:   PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
348:   PetscCall(PetscBLASIntCast(mfqP->n, &blasnp));
349:   /* Initialize M,N */
350:   for (i = 0; i < mfqP->n + 1; i++) {
351:     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
352:     mfqP->M[(mfqP->n + 1) * i] = 1.0;
353:     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
354:     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
355:     PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]));
356:   }

358:   /* Now we add points until we have npmax starting with the most recent ones */
359:   point              = mfqP->nHist - 1;
360:   mfqP->nmodelpoints = mfqP->n + 1;
361:   while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
362:     /* Reject any points already in the model */
363:     reject = 0;
364:     for (j = 0; j < mfqP->n + 1; j++) {
365:       if (point == mfqP->model_indices[j]) {
366:         reject = 1;
367:         break;
368:       }
369:     }

371:     /* Reject if norm(d) >c2 */
372:     if (!reject) {
373:       PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec));
374:       PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]));
375:       PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd));
376:       normd /= mfqP->delta;
377:       if (normd > mfqP->c2) reject = 1;
378:     }
379:     if (reject) {
380:       point--;
381:       continue;
382:     }

384:     PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x));
385:     mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
386:     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
387:     PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x));
388:     PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]));

390:     /* Update QR factorization */
391:     /* Copy M' to Q_tmp */
392:     for (i = 0; i < mfqP->n + 1; i++) {
393:       for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
394:     }
395:     PetscCall(PetscBLASIntCast(mfqP->nmodelpoints + 1, &blasnp));
396:     /* Q_tmp,R = qr(M') */
397:     PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n + 1), &blasmaxmn));
398:     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
399:     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info);

401:     /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
402:     /* L = N*Qtmp */
403:     PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint2));
404:     /* Copy N to L_tmp */
405:     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
406:     /* Copy L_save to L_tmp */

408:     /* L_tmp = N*Qtmp' */
409:     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
410:     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);

412:     /* Copy L_tmp to L_save */
413:     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];

415:     /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
416:     PetscCall(PetscBLASIntCast(mfqP->nmodelpoints - mfqP->n, &blasint));
417:     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
418:     PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
419:     PetscCall(PetscFPTrapPop());
420:     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info);

422:     if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
423:       /* accept point */
424:       mfqP->model_indices[mfqP->nmodelpoints] = point;
425:       /* Copy Q_tmp to Q */
426:       for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
427:       for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
428:       mfqP->nmodelpoints++;
429:       PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));

431:       /* Copy L_save to L */
432:       for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
433:     }
434:     point--;
435:   }

437:   PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));
438:   /* Copy Q(:,n+2:np) to Z */
439:   /* First set Q_tmp to I */
440:   for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
441:   for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;

443:   /* Q_tmp = I * Q */
444:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
445:   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);

447:   /* Copy Q_tmp(:,n+2:np) to Z) */
448:   offset = mfqP->npmax * (mfqP->n + 1);
449:   for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];

451:   if (mfqP->nmodelpoints == mfqP->n + 1) {
452:     /* Set L to I_{n+1} */
453:     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
454:     for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
455:   }
456:   PetscFunctionReturn(PETSC_SUCCESS);
457: }

459: /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
460: static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
461: {
462:   PetscFunctionBegin;
463:   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
464:   PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]));
465:   PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES));
466:   PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
467:   PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
468:   PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]));

470:   /* Project into feasible region */
471:   if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]));

473:   /* Compute value of new vector */
474:   PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]));
475:   CHKMEMQ;
476:   PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));

478:   /* Add new vector to model */
479:   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
480:   mfqP->nmodelpoints++;
481:   mfqP->nHist++;
482:   PetscFunctionReturn(PETSC_SUCCESS);
483: }

485: static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
486: {
487:   /* modeld = Q(:,np+1:n)' */
488:   PetscInt     i, j, minindex = 0;
489:   PetscReal    dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
490:   PetscBLASInt blasn, blasnpmax, blask, info;
491:   PetscBLASInt blas1 = 1, blasnmax;

493:   PetscFunctionBegin;
494:   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
495:   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
496:   PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
497:   PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));

499:   /* Qtmp = I(n x n) */
500:   for (i = 0; i < mfqP->n; i++) {
501:     for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
502:   }
503:   for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;

505:   /* Qtmp = Q * I */
506:   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));

508:   for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
509:     PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
510:     if (dp > 0.0) { /* Model says use the other direction! */
511:       for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
512:     }
513:     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
514:     for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
515:     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
516:     PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
517:     if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
518:       minindex = i;
519:       minvalue = mfqP->work[i];
520:     }
521:     if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i));
522:   }
523:   if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex));
524:   PetscFunctionReturn(PETSC_SUCCESS);
525: }

527: static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
528: {
529:   PetscInt         i, j;
530:   PetscBLASInt     blasm, blasj, blask, blasn, ione = 1, info;
531:   PetscBLASInt     blasnpmax, blasmaxmn;
532:   PetscReal        proj, normd;
533:   const PetscReal *x;

535:   PetscFunctionBegin;
536:   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
537:   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
538:   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
539:   for (i = mfqP->nHist - 1; i >= 0; i--) {
540:     PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x));
541:     for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
542:     PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x));
543:     PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
544:     PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
545:     if (normd <= c) {
546:       PetscCall(PetscBLASIntCast(PetscMax(mfqP->n - mfqP->nmodelpoints, 0), &blasj));
547:       if (!mfqP->q_is_I) {
548:         /* project D onto null */
549:         PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
550:         PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
551:         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info);
552:       }
553:       PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));

555:       if (proj >= mfqP->theta1) { /* add this index to model */
556:         mfqP->model_indices[mfqP->nmodelpoints] = i;
557:         mfqP->nmodelpoints++;
558:         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
559:         PetscCall(PetscBLASIntCast(mfqP->npmax * (mfqP->nmodelpoints), &blask));
560:         PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
561:         PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
562:         PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n), &blasmaxmn));
563:         PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
564:         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info);
565:         mfqP->q_is_I = 0;
566:       }
567:       if (mfqP->nmodelpoints == mfqP->n) break;
568:     }
569:   }
570:   PetscFunctionReturn(PETSC_SUCCESS);
571: }

573: static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
574: {
575:   TAO_POUNDERS    *mfqP = (TAO_POUNDERS *)tao->data;
576:   PetscInt         i, ii, j, k, l;
577:   PetscReal        step = 1.0;
578:   PetscInt         low, high;
579:   PetscReal        minnorm;
580:   PetscReal       *x, *f;
581:   const PetscReal *xmint, *fmin;
582:   PetscReal        deltaold;
583:   PetscReal        gnorm;
584:   PetscBLASInt     info, ione = 1, iblas;
585:   PetscBool        valid, same;
586:   PetscReal        mdec, rho, normxsp;
587:   PetscReal        one = 1.0, zero = 0.0, ratio;
588:   PetscBLASInt     blasm, blasn, blasncopy, blasnpmax;
589:   static PetscBool set = PETSC_FALSE;

591:   /* n = # of parameters
592:      m = dimension (components) of function  */
593:   PetscFunctionBegin;
594:   PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
595:                                    "title = {Nuclear energy density optimization},\n"
596:                                    "author = {Kortelainen, M.  and Lesinski, T.  and Mor\'e, J.  and Nazarewicz, W.\n"
597:                                    "          and Sarich, J.  and Schunck, N.  and Stoitsov, M. V. and Wild, S. },\n"
598:                                    "journal = {Phys. Rev. C},\n"
599:                                    "volume = {82},\n"
600:                                    "number = {2},\n"
601:                                    "pages = {024313},\n"
602:                                    "numpages = {18},\n"
603:                                    "year = {2010},\n"
604:                                    "month = {Aug},\n"
605:                                    "doi = {10.1103/PhysRevC.82.024313}\n}\n",
606:                                    &set));
607:   tao->niter = 0;
608:   if (tao->XL && tao->XU) {
609:     /* Check x0 <= XU */
610:     PetscReal val;

612:     PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
613:     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
614:     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
615:     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound");

617:     /* Check x0 >= xl */
618:     PetscCall(VecCopy(tao->XL, mfqP->Xhist[0]));
619:     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution));
620:     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
621:     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound");

623:     /* Check x0 + delta < XU  -- should be able to get around this eventually */

625:     PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta));
626:     PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution));
627:     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
628:     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
629:     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound");
630:   }

632:   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
633:   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
634:   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
635:   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;

637:   PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));

639:   /* This provides enough information to approximate the gradient of the objective */
640:   /* using a forward difference scheme. */

642:   PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta));
643:   PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]));
644:   mfqP->minindex = 0;
645:   minnorm        = mfqP->Fres[0];

647:   PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high));
648:   for (i = 1; i < mfqP->n + 1; ++i) {
649:     PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]));

651:     if (i - 1 >= low && i - 1 < high) {
652:       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
653:       x[i - 1 - low] += mfqP->delta;
654:       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
655:     }
656:     CHKMEMQ;
657:     PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]));
658:     if (mfqP->Fres[i] < minnorm) {
659:       mfqP->minindex = i;
660:       minnorm        = mfqP->Fres[i];
661:     }
662:   }
663:   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
664:   PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
665:   PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm));

667:   /* Gather mpi vecs to one big local vec */

669:   /* Begin serial code */

671:   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
672:   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
673:   /* (Column oriented for blas calls) */
674:   ii = 0;

676:   PetscCall(PetscInfo(tao, "Build matrix: %d\n", mfqP->size));
677:   if (1 == mfqP->size) {
678:     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
679:     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
680:     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
681:     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
682:     for (i = 0; i < mfqP->n + 1; i++) {
683:       if (i == mfqP->minindex) continue;

685:       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
686:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
687:       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));

689:       PetscCall(VecGetArray(mfqP->Fhist[i], &f));
690:       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
691:       PetscCall(VecRestoreArray(mfqP->Fhist[i], &f));

693:       mfqP->model_indices[ii++] = i;
694:     }
695:     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
696:     PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
697:   } else {
698:     PetscCall(VecSet(mfqP->localxmin, 0));
699:     PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
700:     PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));

702:     PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint));
703:     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
704:     PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint));

706:     PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
707:     PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
708:     PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin));
709:     for (i = 0; i < mfqP->n + 1; i++) {
710:       if (i == mfqP->minindex) continue;

712:       PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
713:       PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
714:       PetscCall(VecGetArray(mfqP->localx, &x));
715:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
716:       PetscCall(VecRestoreArray(mfqP->localx, &x));

718:       PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
719:       PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
720:       PetscCall(VecGetArray(mfqP->localf, &f));
721:       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
722:       PetscCall(VecRestoreArray(mfqP->localf, &f));

724:       mfqP->model_indices[ii++] = i;
725:     }
726:     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
727:     PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin));
728:   }

730:   /* Determine the initial quadratic models */
731:   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
732:   /* D (nxn) Fdiff (nxm)  => G (nxm) */
733:   blasncopy = blasn;
734:   PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
735:   PetscCall(PetscInfo(tao, "Linear solve return: %" PetscBLASInt_FMT "\n", info));

737:   PetscCall(pounders_update_res(tao));

739:   valid = PETSC_TRUE;

741:   PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
742:   PetscCall(VecAssemblyBegin(tao->gradient));
743:   PetscCall(VecAssemblyEnd(tao->gradient));
744:   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
745:   gnorm *= mfqP->delta;
746:   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));

748:   tao->reason = TAO_CONTINUE_ITERATING;
749:   PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
750:   PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
751:   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);

753:   mfqP->nHist        = mfqP->n + 1;
754:   mfqP->nmodelpoints = mfqP->n + 1;
755:   PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm));

757:   while (tao->reason == TAO_CONTINUE_ITERATING) {
758:     PetscReal gnm = 1e-4;
759:     /* Call general purpose update function */
760:     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
761:     tao->niter++;
762:     /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
763:     PetscCall(gqtwrap(tao, &gnm, &mdec));
764:     /* Evaluate the function at the new point */

766:     for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
767:     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]));
768:     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]));
769:     PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES));
770:     PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
771:     PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));

773:     PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));

775:     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
776:     mfqP->nHist++;

778:     /* Update the center */
779:     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
780:       /* Update model to reflect new base point */
781:       for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
782:       for (j = 0; j < mfqP->m; j++) {
783:         /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
784:          G(:,j) = G(:,j) + H(:,:,j)*work' */
785:         for (k = 0; k < mfqP->n; k++) {
786:           mfqP->work2[k] = 0.0;
787:           for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
788:         }
789:         for (i = 0; i < mfqP->n; i++) {
790:           mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
791:           mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
792:         }
793:       }
794:       /* Cres += work*Gres + .5*work*Hres*work';
795:        Gres += Hres*work'; */

797:       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
798:       for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
799:       mfqP->minindex = mfqP->nHist - 1;
800:       minnorm        = mfqP->Fres[mfqP->minindex];
801:       PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
802:       /* Change current center */
803:       PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
804:       for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
805:       PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
806:     }

808:     /* Evaluate at a model-improving point if necessary */
809:     if (valid == PETSC_FALSE) {
810:       mfqP->q_is_I       = 1;
811:       mfqP->nmodelpoints = 0;
812:       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
813:       if (mfqP->nmodelpoints < mfqP->n) {
814:         PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n"));
815:         PetscCall(modelimprove(tao, mfqP, 1));
816:       }
817:     }

819:     /* Update the trust region radius */
820:     deltaold = mfqP->delta;
821:     normxsp  = 0;
822:     for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
823:     normxsp = PetscSqrtReal(normxsp);
824:     if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
825:       mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
826:     } else if (valid == PETSC_TRUE) {
827:       mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
828:     }

830:     /* Compute the next interpolation set */
831:     mfqP->q_is_I       = 1;
832:     mfqP->nmodelpoints = 0;
833:     PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1));
834:     PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
835:     if (mfqP->nmodelpoints == mfqP->n) {
836:       valid = PETSC_TRUE;
837:     } else {
838:       valid = PETSC_FALSE;
839:       PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2));
840:       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2));
841:       if (mfqP->n > mfqP->nmodelpoints) {
842:         PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n"));
843:         PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints));
844:       }
845:     }
846:     for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
847:     mfqP->nmodelpoints++;
848:     mfqP->model_indices[0] = mfqP->minindex;
849:     PetscCall(morepoints(mfqP));
850:     for (i = 0; i < mfqP->nmodelpoints; i++) {
851:       PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
852:       for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
853:       PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
854:       PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
855:       for (j = 0; j < mfqP->m; j++) {
856:         for (k = 0; k < mfqP->n; k++) {
857:           mfqP->work[k] = 0.0;
858:           for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
859:         }
860:         PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
861:       }
862:       PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
863:     }

865:     /* Update the quadratic model */
866:     PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints));
867:     PetscCall(getquadpounders(mfqP));
868:     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
869:     PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
870:     /* G = G*(delta/deltaold) + Gdel */
871:     ratio = mfqP->delta / deltaold;
872:     iblas = blasm * blasn;
873:     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
874:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
875:     /* H = H*(delta/deltaold)^2 + Hdel */
876:     iblas = blasm * blasn * blasn;
877:     ratio *= ratio;
878:     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
879:     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));

881:     /* Get residuals */
882:     PetscCall(pounders_update_res(tao));

884:     /* Export solution and gradient residual to TAO */
885:     PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
886:     PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
887:     PetscCall(VecAssemblyBegin(tao->gradient));
888:     PetscCall(VecAssemblyEnd(tao->gradient));
889:     PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
890:     gnorm *= mfqP->delta;
891:     /*  final criticality test */
892:     PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
893:     PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
894:     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
895:     /* test for repeated model */
896:     if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
897:       same = PETSC_TRUE;
898:     } else {
899:       same = PETSC_FALSE;
900:     }
901:     for (i = 0; i < mfqP->nmodelpoints; i++) {
902:       if (same) {
903:         if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
904:           same = PETSC_TRUE;
905:         } else {
906:           same = PETSC_FALSE;
907:         }
908:       }
909:       mfqP->last_model_indices[i] = mfqP->model_indices[i];
910:     }
911:     mfqP->last_nmodelpoints = mfqP->nmodelpoints;
912:     if (same && mfqP->delta == deltaold) {
913:       PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n"));
914:       tao->reason = TAO_CONVERGED_STEPTOL;
915:     }
916:   }
917:   PetscFunctionReturn(PETSC_SUCCESS);
918: }

920: static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
921: {
922:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
923:   PetscInt      i, j;
924:   IS            isfloc, isfglob, isxloc, isxglob;

926:   PetscFunctionBegin;
927:   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
928:   if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
929:   PetscCall(VecGetSize(tao->solution, &mfqP->n));
930:   PetscCall(VecGetSize(tao->ls_res, &mfqP->m));
931:   mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
932:   if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1;
933:   mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
934:   mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);

936:   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist));
937:   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist));
938:   for (i = 0; i < mfqP->n + 1; i++) {
939:     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i]));
940:     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i]));
941:   }
942:   PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec));
943:   PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec));
944:   mfqP->nHist = 0;

946:   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres));
947:   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES));
948:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work));
949:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2));
950:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3));
951:   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork));
952:   PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega));
953:   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta));
954:   PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha));

956:   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H));
957:   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q));
958:   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp));
959:   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L));
960:   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp));
961:   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save));
962:   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N));
963:   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M));
964:   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z));
965:   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau));
966:   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp));
967:   mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
968:   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork));
969:   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork));
970:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin));
971:   PetscCall(PetscMalloc1(mfqP->m, &mfqP->C));
972:   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff));
973:   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp));
974:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres));
975:   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres));
976:   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints));
977:   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices));
978:   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices));
979:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem));
980:   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel));
981:   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel));
982:   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices));
983:   PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork));
984:   PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w));
985:   for (i = 0; i < mfqP->m; i++) {
986:     for (j = 0; j < mfqP->m; j++) {
987:       if (i == j) {
988:         mfqP->w[i + mfqP->m * j] = 1.0;
989:       } else {
990:         mfqP->w[i + mfqP->m * j] = 0.0;
991:       }
992:     }
993:   }
994:   for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
995:   PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size));
996:   if (mfqP->size > 1) {
997:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx));
998:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin));
999:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf));
1000:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin));
1001:     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc));
1002:     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob));
1003:     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc));
1004:     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob));

1006:     PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx));
1007:     PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf));

1009:     PetscCall(ISDestroy(&isxloc));
1010:     PetscCall(ISDestroy(&isxglob));
1011:     PetscCall(ISDestroy(&isfloc));
1012:     PetscCall(ISDestroy(&isfglob));
1013:   }

1015:   if (!mfqP->usegqt) {
1016:     KSP ksp;
1017:     PC  pc;
1018:     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx));
1019:     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl));
1020:     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb));
1021:     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu));
1022:     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel));
1023:     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel));
1024:     PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao));
1025:     PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1));
1026:     PetscCall(TaoSetType(mfqP->subtao, TAOBNTR));
1027:     PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_"));
1028:     PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx));
1029:     PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP));
1030:     PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits));
1031:     PetscCall(TaoSetFromOptions(mfqP->subtao));
1032:     PetscCall(TaoGetKSP(mfqP->subtao, &ksp));
1033:     if (ksp) {
1034:       PetscCall(KSPGetPC(ksp, &pc));
1035:       PetscCall(PCSetType(pc, PCNONE));
1036:     }
1037:     PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu));
1038:     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH));
1039:     PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP));
1040:   }
1041:   PetscFunctionReturn(PETSC_SUCCESS);
1042: }

1044: static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1045: {
1046:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1047:   PetscInt      i;

1049:   PetscFunctionBegin;
1050:   if (!mfqP->usegqt) {
1051:     PetscCall(TaoDestroy(&mfqP->subtao));
1052:     PetscCall(VecDestroy(&mfqP->subx));
1053:     PetscCall(VecDestroy(&mfqP->subxl));
1054:     PetscCall(VecDestroy(&mfqP->subxu));
1055:     PetscCall(VecDestroy(&mfqP->subb));
1056:     PetscCall(VecDestroy(&mfqP->subpdel));
1057:     PetscCall(VecDestroy(&mfqP->subndel));
1058:     PetscCall(MatDestroy(&mfqP->subH));
1059:   }
1060:   PetscCall(PetscFree(mfqP->Fres));
1061:   PetscCall(PetscFree(mfqP->RES));
1062:   PetscCall(PetscFree(mfqP->work));
1063:   PetscCall(PetscFree(mfqP->work2));
1064:   PetscCall(PetscFree(mfqP->work3));
1065:   PetscCall(PetscFree(mfqP->mwork));
1066:   PetscCall(PetscFree(mfqP->omega));
1067:   PetscCall(PetscFree(mfqP->beta));
1068:   PetscCall(PetscFree(mfqP->alpha));
1069:   PetscCall(PetscFree(mfqP->H));
1070:   PetscCall(PetscFree(mfqP->Q));
1071:   PetscCall(PetscFree(mfqP->Q_tmp));
1072:   PetscCall(PetscFree(mfqP->L));
1073:   PetscCall(PetscFree(mfqP->L_tmp));
1074:   PetscCall(PetscFree(mfqP->L_save));
1075:   PetscCall(PetscFree(mfqP->N));
1076:   PetscCall(PetscFree(mfqP->M));
1077:   PetscCall(PetscFree(mfqP->Z));
1078:   PetscCall(PetscFree(mfqP->tau));
1079:   PetscCall(PetscFree(mfqP->tau_tmp));
1080:   PetscCall(PetscFree(mfqP->npmaxwork));
1081:   PetscCall(PetscFree(mfqP->npmaxiwork));
1082:   PetscCall(PetscFree(mfqP->xmin));
1083:   PetscCall(PetscFree(mfqP->C));
1084:   PetscCall(PetscFree(mfqP->Fdiff));
1085:   PetscCall(PetscFree(mfqP->Disp));
1086:   PetscCall(PetscFree(mfqP->Gres));
1087:   PetscCall(PetscFree(mfqP->Hres));
1088:   PetscCall(PetscFree(mfqP->Gpoints));
1089:   PetscCall(PetscFree(mfqP->model_indices));
1090:   PetscCall(PetscFree(mfqP->last_model_indices));
1091:   PetscCall(PetscFree(mfqP->Xsubproblem));
1092:   PetscCall(PetscFree(mfqP->Gdel));
1093:   PetscCall(PetscFree(mfqP->Hdel));
1094:   PetscCall(PetscFree(mfqP->indices));
1095:   PetscCall(PetscFree(mfqP->iwork));
1096:   PetscCall(PetscFree(mfqP->w));
1097:   for (i = 0; i < mfqP->nHist; i++) {
1098:     PetscCall(VecDestroy(&mfqP->Xhist[i]));
1099:     PetscCall(VecDestroy(&mfqP->Fhist[i]));
1100:   }
1101:   PetscCall(VecDestroy(&mfqP->workxvec));
1102:   PetscCall(VecDestroy(&mfqP->workfvec));
1103:   PetscCall(PetscFree(mfqP->Xhist));
1104:   PetscCall(PetscFree(mfqP->Fhist));

1106:   if (mfqP->size > 1) {
1107:     PetscCall(VecDestroy(&mfqP->localx));
1108:     PetscCall(VecDestroy(&mfqP->localxmin));
1109:     PetscCall(VecDestroy(&mfqP->localf));
1110:     PetscCall(VecDestroy(&mfqP->localfmin));
1111:   }
1112:   PetscCall(PetscFree(tao->data));
1113:   PetscFunctionReturn(PETSC_SUCCESS);
1114: }

1116: static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject)
1117: {
1118:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

1120:   PetscFunctionBegin;
1121:   PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1122:   PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL));
1123:   mfqP->delta = mfqP->delta0;
1124:   PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL));
1125:   PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL));
1126:   PetscOptionsHeadEnd();
1127:   PetscFunctionReturn(PETSC_SUCCESS);
1128: }

1130: static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1131: {
1132:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1133:   PetscBool     isascii;

1135:   PetscFunctionBegin;
1136:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1137:   if (isascii) {
1138:     PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0));
1139:     PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta));
1140:     PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints));
1141:     if (mfqP->usegqt) {
1142:       PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n"));
1143:     } else {
1144:       PetscCall(TaoView(mfqP->subtao, viewer));
1145:     }
1146:   }
1147:   PetscFunctionReturn(PETSC_SUCCESS);
1148: }
1149: /*MC
1150:   TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares

1152:   Options Database Keys:
1153: + -tao_pounders_delta - initial step length
1154: . -tao_pounders_npmax - maximum number of points in model
1155: - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON

1157:   Level: beginner

1159: M*/

1161: PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1162: {
1163:   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;

1165:   PetscFunctionBegin;
1166:   tao->ops->setup          = TaoSetUp_POUNDERS;
1167:   tao->ops->solve          = TaoSolve_POUNDERS;
1168:   tao->ops->view           = TaoView_POUNDERS;
1169:   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1170:   tao->ops->destroy        = TaoDestroy_POUNDERS;

1172:   PetscCall(PetscNew(&mfqP));
1173:   tao->data = (void *)mfqP;

1175:   /* Override default settings (unless already changed) */
1176:   PetscCall(TaoParametersInitialize(tao));
1177:   PetscObjectParameterSetDefault(tao, max_it, 2000);
1178:   PetscObjectParameterSetDefault(tao, max_funcs, 4000);

1180:   mfqP->npmax      = PETSC_CURRENT;
1181:   mfqP->delta0     = 0.1;
1182:   mfqP->delta      = 0.1;
1183:   mfqP->deltamax   = 1e3;
1184:   mfqP->deltamin   = 1e-6;
1185:   mfqP->c2         = 10.0;
1186:   mfqP->theta1     = 1e-5;
1187:   mfqP->theta2     = 1e-4;
1188:   mfqP->gamma0     = 0.5;
1189:   mfqP->gamma1     = 2.0;
1190:   mfqP->eta0       = 0.0;
1191:   mfqP->eta1       = 0.1;
1192:   mfqP->usegqt     = PETSC_FALSE;
1193:   mfqP->gqt_rtol   = 0.001;
1194:   mfqP->gqt_maxits = 50;
1195:   mfqP->workxvec   = NULL;
1196:   PetscFunctionReturn(PETSC_SUCCESS);
1197: }