Actual source code: elliptic.c

  1: #include <petsc/private/taoimpl.h>

  3: typedef struct {
  4:   PetscInt n; /* Number of total variables */
  5:   PetscInt m; /* Number of constraints */
  6:   PetscInt nstate;
  7:   PetscInt ndesign;
  8:   PetscInt mx;    /* grid points in each direction */
  9:   PetscInt ns;    /* Number of data samples (1<=ns<=8)
 10:                   Currently only ns=1 is supported */
 11:   PetscInt ndata; /* Number of data points per sample */
 12:   IS       s_is;
 13:   IS       d_is;

 15:   VecScatter  state_scatter;
 16:   VecScatter  design_scatter;
 17:   VecScatter *yi_scatter, *di_scatter;
 18:   Vec         suby, subq, subd;
 19:   Mat         Js, Jd, JsPrec, JsInv, JsBlock;

 21:   PetscReal  alpha; /* Regularization parameter */
 22:   PetscReal  beta;  /* Weight attributed to ||u||^2 in regularization functional */
 23:   PetscReal  noise; /* Amount of noise to add to data */
 24:   PetscReal *ones;
 25:   Mat        Q;
 26:   Mat        MQ;
 27:   Mat        L;

 29:   Mat Grad;
 30:   Mat Av, Avwork;
 31:   Mat Div, Divwork;
 32:   Mat DSG;
 33:   Mat Diag, Ones;

 35:   Vec q;
 36:   Vec ur; /* reference */

 38:   Vec d;
 39:   Vec dwork;

 41:   Vec x; /* super vec of y,u */

 43:   Vec y; /* state variables */
 44:   Vec ywork;

 46:   Vec ytrue;

 48:   Vec u; /* design variables */
 49:   Vec uwork;

 51:   Vec utrue;

 53:   Vec js_diag;

 55:   Vec c; /* constraint vector */
 56:   Vec cwork;

 58:   Vec lwork;
 59:   Vec S;
 60:   Vec Swork, Twork, Sdiag, Ywork;
 61:   Vec Av_u;

 63:   KSP solver;
 64:   PC  prec;

 66:   PetscReal     tola, tolb, tolc, told;
 67:   PetscInt      ksp_its;
 68:   PetscInt      ksp_its_initial;
 69:   PetscLogStage stages[10];
 70:   PetscBool     use_ptap;
 71:   PetscBool     use_lrc;
 72: } AppCtx;

 74: PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
 75: PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
 76: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
 77: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
 78: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
 79: PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
 80: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
 81: PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
 82: PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
 83: PetscErrorCode EllipticInitialize(AppCtx *);
 84: PetscErrorCode EllipticDestroy(AppCtx *);
 85: PetscErrorCode EllipticMonitor(Tao, void *);

 87: PetscErrorCode StateBlockMatMult(Mat, Vec, Vec);
 88: PetscErrorCode StateMatMult(Mat, Vec, Vec);

 90: PetscErrorCode StateInvMatMult(Mat, Vec, Vec);
 91: PetscErrorCode DesignMatMult(Mat, Vec, Vec);
 92: PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);

 94: PetscErrorCode QMatMult(Mat, Vec, Vec);
 95: PetscErrorCode QMatMultTranspose(Mat, Vec, Vec);

 97: static char help[] = "";

 99: int main(int argc, char **argv)
100: {
101:   Vec      x0;
102:   Tao      tao;
103:   AppCtx   user;
104:   PetscInt ntests = 1;
105:   PetscInt i;

108:   PetscInitialize(&argc, &argv, (char *)0, help);
109:   user.mx = 8;
110:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "elliptic example", NULL);
111:   PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL);
112:   user.ns = 6;
113:   PetscOptionsInt("-ns", "Number of data samples (1<=ns<=8)", "", user.ns, &user.ns, NULL);
114:   user.ndata = 64;
115:   PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL);
116:   user.alpha = 0.1;
117:   PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL);
118:   user.beta = 0.00001;
119:   PetscOptionsReal("-beta", "Weight attributed to ||u||^2 in regularization functional", "", user.beta, &user.beta, NULL);
120:   user.noise = 0.01;
121:   PetscOptionsReal("-noise", "Amount of noise to add to data", "", user.noise, &user.noise, NULL);

123:   user.use_ptap = PETSC_FALSE;
124:   PetscOptionsBool("-use_ptap", "Use ptap matrix for DSG", "", user.use_ptap, &user.use_ptap, NULL);
125:   user.use_lrc = PETSC_FALSE;
126:   PetscOptionsBool("-use_lrc", "Use lrc matrix for Js", "", user.use_lrc, &user.use_lrc, NULL);
127:   PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL);
128:   PetscOptionsEnd();

130:   user.m       = user.ns * user.mx * user.mx * user.mx; /* number of constraints */
131:   user.nstate  = user.m;
132:   user.ndesign = user.mx * user.mx * user.mx;
133:   user.n       = user.nstate + user.ndesign; /* number of variables */

135:   /* Create TAO solver and set desired solution method */
136:   TaoCreate(PETSC_COMM_WORLD, &tao);
137:   TaoSetType(tao, TAOLCL);

139:   /* Set up initial vectors and matrices */
140:   EllipticInitialize(&user);

142:   Gather(user.x, user.y, user.state_scatter, user.u, user.design_scatter);
143:   VecDuplicate(user.x, &x0);
144:   VecCopy(user.x, x0);

146:   /* Set solution vector with an initial guess */
147:   TaoSetSolution(tao, user.x);
148:   TaoSetObjective(tao, FormFunction, &user);
149:   TaoSetGradient(tao, NULL, FormGradient, &user);
150:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user);

152:   TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user);
153:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user);

155:   TaoSetStateDesignIS(tao, user.s_is, user.d_is);
156:   TaoSetFromOptions(tao);

158:   /* SOLVE THE APPLICATION */
159:   PetscLogStageRegister("Trials", &user.stages[1]);
160:   PetscLogStagePush(user.stages[1]);
161:   for (i = 0; i < ntests; i++) {
162:     TaoSolve(tao);
163:     PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its);
164:     VecCopy(x0, user.x);
165:   }
166:   PetscLogStagePop();
167:   PetscBarrier((PetscObject)user.x);
168:   PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: ");
169:   PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial);

171:   TaoDestroy(&tao);
172:   VecDestroy(&x0);
173:   EllipticDestroy(&user);
174:   PetscFinalize();
175:   return 0;
176: }
177: /* ------------------------------------------------------------------- */
178: /*
179:    dwork = Qy - d
180:    lwork = L*(u-ur)
181:    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
182: */
183: PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr)
184: {
185:   PetscReal d1 = 0, d2 = 0;
186:   AppCtx   *user = (AppCtx *)ptr;

188:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
189:   MatMult(user->MQ, user->y, user->dwork);
190:   VecAXPY(user->dwork, -1.0, user->d);
191:   VecDot(user->dwork, user->dwork, &d1);
192:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
193:   MatMult(user->L, user->uwork, user->lwork);
194:   VecDot(user->lwork, user->lwork, &d2);
195:   *f = 0.5 * (d1 + user->alpha * d2);
196:   return 0;
197: }

199: /* ------------------------------------------------------------------- */
200: /*
201:     state: g_s = Q' *(Qy - d)
202:     design: g_d = alpha*L'*L*(u-ur)
203: */
204: PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr)
205: {
206:   AppCtx *user = (AppCtx *)ptr;

208:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
209:   MatMult(user->MQ, user->y, user->dwork);
210:   VecAXPY(user->dwork, -1.0, user->d);
211:   MatMultTranspose(user->MQ, user->dwork, user->ywork);
212:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
213:   MatMult(user->L, user->uwork, user->lwork);
214:   MatMultTranspose(user->L, user->lwork, user->uwork);
215:   VecScale(user->uwork, user->alpha);
216:   Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
217:   return 0;
218: }

220: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
221: {
222:   PetscReal d1, d2;
223:   AppCtx   *user = (AppCtx *)ptr;

225:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
226:   MatMult(user->MQ, user->y, user->dwork);
227:   VecAXPY(user->dwork, -1.0, user->d);
228:   VecDot(user->dwork, user->dwork, &d1);
229:   MatMultTranspose(user->MQ, user->dwork, user->ywork);

231:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
232:   MatMult(user->L, user->uwork, user->lwork);
233:   VecDot(user->lwork, user->lwork, &d2);
234:   MatMultTranspose(user->L, user->lwork, user->uwork);
235:   VecScale(user->uwork, user->alpha);
236:   *f = 0.5 * (d1 + user->alpha * d2);
237:   Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
238:   return 0;
239: }

241: /* ------------------------------------------------------------------- */
242: /* A
243: MatShell object
244: */
245: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
246: {
247:   AppCtx *user = (AppCtx *)ptr;

249:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
250:   /* DSG = Div * (1/Av_u) * Grad */
251:   VecSet(user->uwork, 0);
252:   VecAXPY(user->uwork, -1.0, user->u);
253:   VecExp(user->uwork);
254:   MatMult(user->Av, user->uwork, user->Av_u);
255:   VecCopy(user->Av_u, user->Swork);
256:   VecReciprocal(user->Swork);
257:   if (user->use_ptap) {
258:     MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES);
259:     MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG);
260:   } else {
261:     MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
262:     MatDiagonalScale(user->Divwork, NULL, user->Swork);
263:     MatProductNumeric(user->DSG);
264:   }
265:   return 0;
266: }
267: /* ------------------------------------------------------------------- */
268: /* B */
269: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
270: {
271:   AppCtx *user = (AppCtx *)ptr;

273:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
274:   return 0;
275: }

277: PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
278: {
279:   PetscReal sum;
280:   AppCtx   *user;

282:   MatShellGetContext(J_shell, &user);
283:   MatMult(user->DSG, X, Y);
284:   VecSum(X, &sum);
285:   sum /= user->ndesign;
286:   VecShift(Y, sum);
287:   return 0;
288: }

290: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
291: {
292:   PetscInt i;
293:   AppCtx  *user;

295:   MatShellGetContext(J_shell, &user);
296:   if (user->ns == 1) {
297:     MatMult(user->JsBlock, X, Y);
298:   } else {
299:     for (i = 0; i < user->ns; i++) {
300:       Scatter(X, user->subq, user->yi_scatter[i], 0, 0);
301:       Scatter(Y, user->suby, user->yi_scatter[i], 0, 0);
302:       MatMult(user->JsBlock, user->subq, user->suby);
303:       Gather(Y, user->suby, user->yi_scatter[i], 0, 0);
304:     }
305:   }
306:   return 0;
307: }

309: PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
310: {
311:   PetscInt its, i;
312:   AppCtx  *user;

314:   MatShellGetContext(J_shell, &user);
315:   KSPSetOperators(user->solver, user->JsBlock, user->DSG);
316:   if (Y == user->ytrue) {
317:     /* First solve is done using true solution to set up problem */
318:     KSPSetTolerances(user->solver, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
319:   } else {
320:     KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
321:   }
322:   if (user->ns == 1) {
323:     KSPSolve(user->solver, X, Y);
324:     KSPGetIterationNumber(user->solver, &its);
325:     user->ksp_its += its;
326:   } else {
327:     for (i = 0; i < user->ns; i++) {
328:       Scatter(X, user->subq, user->yi_scatter[i], 0, 0);
329:       Scatter(Y, user->suby, user->yi_scatter[i], 0, 0);
330:       KSPSolve(user->solver, user->subq, user->suby);
331:       KSPGetIterationNumber(user->solver, &its);
332:       user->ksp_its += its;
333:       Gather(Y, user->suby, user->yi_scatter[i], 0, 0);
334:     }
335:   }
336:   return 0;
337: }
338: PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
339: {
340:   AppCtx  *user;
341:   PetscInt i;

343:   MatShellGetContext(J_shell, &user);
344:   if (user->ns == 1) {
345:     MatMult(user->Q, X, Y);
346:   } else {
347:     for (i = 0; i < user->ns; i++) {
348:       Scatter(X, user->subq, user->yi_scatter[i], 0, 0);
349:       Scatter(Y, user->subd, user->di_scatter[i], 0, 0);
350:       MatMult(user->Q, user->subq, user->subd);
351:       Gather(Y, user->subd, user->di_scatter[i], 0, 0);
352:     }
353:   }
354:   return 0;
355: }

357: PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
358: {
359:   AppCtx  *user;
360:   PetscInt i;

362:   MatShellGetContext(J_shell, &user);
363:   if (user->ns == 1) {
364:     MatMultTranspose(user->Q, X, Y);
365:   } else {
366:     for (i = 0; i < user->ns; i++) {
367:       Scatter(X, user->subd, user->di_scatter[i], 0, 0);
368:       Scatter(Y, user->suby, user->yi_scatter[i], 0, 0);
369:       MatMultTranspose(user->Q, user->subd, user->suby);
370:       Gather(Y, user->suby, user->yi_scatter[i], 0, 0);
371:     }
372:   }
373:   return 0;
374: }

376: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
377: {
378:   PetscInt i;
379:   AppCtx  *user;

381:   MatShellGetContext(J_shell, &user);

383:   /* sdiag(1./v) */
384:   VecSet(user->uwork, 0);
385:   VecAXPY(user->uwork, -1.0, user->u);
386:   VecExp(user->uwork);

388:   /* sdiag(1./((Av*(1./v)).^2)) */
389:   MatMult(user->Av, user->uwork, user->Swork);
390:   VecPointwiseMult(user->Swork, user->Swork, user->Swork);
391:   VecReciprocal(user->Swork);

393:   /* (Av * (sdiag(1./v) * b)) */
394:   VecPointwiseMult(user->uwork, user->uwork, X);
395:   MatMult(user->Av, user->uwork, user->Twork);

397:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
398:   VecPointwiseMult(user->Swork, user->Twork, user->Swork);

400:   if (user->ns == 1) {
401:     /* (sdiag(Grad*y(:,i)) */
402:     MatMult(user->Grad, user->y, user->Twork);

404:     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
405:     VecPointwiseMult(user->Swork, user->Twork, user->Swork);
406:     MatMultTranspose(user->Grad, user->Swork, Y);
407:   } else {
408:     for (i = 0; i < user->ns; i++) {
409:       Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0);
410:       Scatter(Y, user->subq, user->yi_scatter[i], 0, 0);

412:       MatMult(user->Grad, user->suby, user->Twork);
413:       VecPointwiseMult(user->Twork, user->Twork, user->Swork);
414:       MatMultTranspose(user->Grad, user->Twork, user->subq);
415:       Gather(user->y, user->suby, user->yi_scatter[i], 0, 0);
416:       Gather(Y, user->subq, user->yi_scatter[i], 0, 0);
417:     }
418:   }
419:   return 0;
420: }

422: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
423: {
424:   PetscInt i;
425:   AppCtx  *user;

427:   MatShellGetContext(J_shell, &user);
428:   VecZeroEntries(Y);

430:   /* Sdiag = 1./((Av*(1./v)).^2) */
431:   VecSet(user->uwork, 0);
432:   VecAXPY(user->uwork, -1.0, user->u);
433:   VecExp(user->uwork);
434:   MatMult(user->Av, user->uwork, user->Swork);
435:   VecPointwiseMult(user->Sdiag, user->Swork, user->Swork);
436:   VecReciprocal(user->Sdiag);

438:   for (i = 0; i < user->ns; i++) {
439:     Scatter(X, user->subq, user->yi_scatter[i], 0, 0);
440:     Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0);

442:     /* Swork = (Div' * b(:,i)) */
443:     MatMult(user->Grad, user->subq, user->Swork);

445:     /* Twork = Grad*y(:,i) */
446:     MatMult(user->Grad, user->suby, user->Twork);

448:     /* Twork = sdiag(Twork) * Swork */
449:     VecPointwiseMult(user->Twork, user->Swork, user->Twork);

451:     /* Swork = pointwisemult(Sdiag,Twork) */
452:     VecPointwiseMult(user->Swork, user->Twork, user->Sdiag);

454:     /* Ywork = Av' * Swork */
455:     MatMultTranspose(user->Av, user->Swork, user->Ywork);

457:     /* Ywork = pointwisemult(uwork,Ywork) */
458:     VecPointwiseMult(user->Ywork, user->uwork, user->Ywork);
459:     VecAXPY(Y, 1.0, user->Ywork);
460:     Gather(user->y, user->suby, user->yi_scatter[i], 0, 0);
461:   }
462:   return 0;
463: }

465: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
466: {
467:   /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
468:   PetscReal sum;
469:   PetscInt  i;
470:   AppCtx   *user = (AppCtx *)ptr;

472:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
473:   if (user->ns == 1) {
474:     MatMult(user->Grad, user->y, user->Swork);
475:     VecPointwiseDivide(user->Swork, user->Swork, user->Av_u);
476:     MatMultTranspose(user->Grad, user->Swork, C);
477:     VecSum(user->y, &sum);
478:     sum /= user->ndesign;
479:     VecShift(C, sum);
480:   } else {
481:     for (i = 0; i < user->ns; i++) {
482:       Scatter(user->y, user->suby, user->yi_scatter[i], 0, 0);
483:       Scatter(C, user->subq, user->yi_scatter[i], 0, 0);
484:       MatMult(user->Grad, user->suby, user->Swork);
485:       VecPointwiseDivide(user->Swork, user->Swork, user->Av_u);
486:       MatMultTranspose(user->Grad, user->Swork, user->subq);

488:       VecSum(user->suby, &sum);
489:       sum /= user->ndesign;
490:       VecShift(user->subq, sum);

492:       Gather(user->y, user->suby, user->yi_scatter[i], 0, 0);
493:       Gather(C, user->subq, user->yi_scatter[i], 0, 0);
494:     }
495:   }
496:   VecAXPY(C, -1.0, user->q);
497:   return 0;
498: }

500: PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
501: {
502:   VecScatterBegin(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD);
503:   VecScatterEnd(scat1, x, sub1, INSERT_VALUES, SCATTER_FORWARD);
504:   if (sub2) {
505:     VecScatterBegin(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD);
506:     VecScatterEnd(scat2, x, sub2, INSERT_VALUES, SCATTER_FORWARD);
507:   }
508:   return 0;
509: }

511: PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
512: {
513:   VecScatterBegin(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE);
514:   VecScatterEnd(scat1, sub1, x, INSERT_VALUES, SCATTER_REVERSE);
515:   if (sub2) {
516:     VecScatterBegin(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE);
517:     VecScatterEnd(scat2, sub2, x, INSERT_VALUES, SCATTER_REVERSE);
518:   }
519:   return 0;
520: }

522: PetscErrorCode EllipticInitialize(AppCtx *user)
523: {
524:   PetscInt        m, n, i, j, k, l, linear_index, is, js, ks, ls, istart, iend, iblock;
525:   Vec             XX, YY, ZZ, XXwork, YYwork, ZZwork, UTwork;
526:   PetscReal      *x, *y, *z;
527:   PetscReal       h, meanut;
528:   PetscScalar     hinv, neg_hinv, half = 0.5, sqrt_beta;
529:   PetscInt        im, indx1, indx2, indy1, indy2, indz1, indz2, nx, ny, nz;
530:   IS              is_alldesign, is_allstate;
531:   IS              is_from_d;
532:   IS              is_from_y;
533:   PetscInt        lo, hi, hi2, lo2, ysubnlocal, dsubnlocal;
534:   const PetscInt *ranges, *subranges;
535:   PetscMPIInt     size;
536:   PetscReal       xri, yri, zri, xim, yim, zim, dx1, dx2, dy1, dy2, dz1, dz2, Dx, Dy, Dz;
537:   PetscScalar     v, vx, vy, vz;
538:   PetscInt        offset, subindex, subvec, nrank, kk;

540:   PetscScalar xr[64] = {0.4970, 0.8498, 0.7814, 0.6268, 0.7782, 0.6402, 0.3617, 0.3160, 0.3610, 0.5298, 0.6987, 0.3331, 0.7962, 0.5596, 0.3866, 0.6774, 0.5407, 0.4518, 0.6702, 0.6061, 0.7580, 0.8997,
541:                         0.5198, 0.8326, 0.2138, 0.9198, 0.3000, 0.2833, 0.8288, 0.7076, 0.1820, 0.0728, 0.8447, 0.2367, 0.3239, 0.6413, 0.3114, 0.4731, 0.1192, 0.9273, 0.5724, 0.4331, 0.5136, 0.3547,
542:                         0.4413, 0.2602, 0.5698, 0.7278, 0.5261, 0.6230, 0.2454, 0.3948, 0.7479, 0.6582, 0.4660, 0.5594, 0.7574, 0.1143, 0.5900, 0.1065, 0.4260, 0.3294, 0.8276, 0.0756};

544:   PetscScalar yr[64] = {0.7345, 0.9120, 0.9288, 0.7528, 0.4463, 0.4985, 0.2497, 0.6256, 0.3425, 0.9026, 0.6983, 0.4230, 0.7140, 0.2970, 0.4474, 0.8792, 0.6604, 0.2485, 0.7968, 0.6127, 0.1796, 0.2437,
545:                         0.5938, 0.6137, 0.3867, 0.5658, 0.4575, 0.1009, 0.0863, 0.3361, 0.0738, 0.3985, 0.6602, 0.1437, 0.0934, 0.5983, 0.5950, 0.0763, 0.0768, 0.2288, 0.5761, 0.1129, 0.3841, 0.6150,
546:                         0.6904, 0.6686, 0.1361, 0.4601, 0.4491, 0.3716, 0.1969, 0.6537, 0.6743, 0.6991, 0.4811, 0.5480, 0.1684, 0.4569, 0.6889, 0.8437, 0.3015, 0.2854, 0.8199, 0.2658};

548:   PetscScalar zr[64] = {0.7668, 0.8573, 0.2654, 0.2719, 0.1060, 0.1311, 0.6232, 0.2295, 0.8009, 0.2147, 0.2119, 0.9325, 0.4473, 0.3600, 0.3374, 0.3819, 0.4066, 0.5801, 0.1673, 0.0959, 0.4638, 0.8236,
549:                         0.8800, 0.2939, 0.2028, 0.8262, 0.2706, 0.6276, 0.9085, 0.6443, 0.8241, 0.0712, 0.1824, 0.7789, 0.4389, 0.8415, 0.7055, 0.6639, 0.3653, 0.2078, 0.1987, 0.2297, 0.4321, 0.8115,
550:                         0.4915, 0.7764, 0.4657, 0.4627, 0.4569, 0.4232, 0.8514, 0.0674, 0.3227, 0.1055, 0.6690, 0.6313, 0.9226, 0.5461, 0.4126, 0.2364, 0.6096, 0.7042, 0.3914, 0.0711};

552:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
553:   PetscLogStageRegister("Elliptic Setup", &user->stages[0]);
554:   PetscLogStagePush(user->stages[0]);

556:   /* Create u,y,c,x */
557:   VecCreate(PETSC_COMM_WORLD, &user->u);
558:   VecCreate(PETSC_COMM_WORLD, &user->y);
559:   VecCreate(PETSC_COMM_WORLD, &user->c);
560:   VecSetSizes(user->u, PETSC_DECIDE, user->ndesign);
561:   VecSetFromOptions(user->u);
562:   VecGetLocalSize(user->u, &ysubnlocal);
563:   VecSetSizes(user->y, ysubnlocal * user->ns, user->nstate);
564:   VecSetSizes(user->c, ysubnlocal * user->ns, user->m);
565:   VecSetFromOptions(user->y);
566:   VecSetFromOptions(user->c);

568:   /*
569:      *******************************
570:      Create scatters for x <-> y,u
571:      *******************************

573:      If the state vector y and design vector u are partitioned as
574:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
575:      then the solution vector x is organized as
576:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
577:      The index sets user->s_is and user->d_is correspond to the indices of the
578:      state and design variables owned by the current processor.
579:   */
580:   VecCreate(PETSC_COMM_WORLD, &user->x);

582:   VecGetOwnershipRange(user->y, &lo, &hi);
583:   VecGetOwnershipRange(user->u, &lo2, &hi2);

585:   ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate);
586:   ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user->s_is);
587:   ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign);
588:   ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user->d_is);

590:   VecSetSizes(user->x, hi - lo + hi2 - lo2, user->n);
591:   VecSetFromOptions(user->x);

593:   VecScatterCreate(user->x, user->s_is, user->y, is_allstate, &user->state_scatter);
594:   VecScatterCreate(user->x, user->d_is, user->u, is_alldesign, &user->design_scatter);
595:   ISDestroy(&is_alldesign);
596:   ISDestroy(&is_allstate);
597:   /*
598:      *******************************
599:      Create scatter from y to y_1,y_2,...,y_ns
600:      *******************************
601:   */
602:   PetscMalloc1(user->ns, &user->yi_scatter);
603:   VecDuplicate(user->u, &user->suby);
604:   VecDuplicate(user->u, &user->subq);

606:   VecGetOwnershipRange(user->y, &lo2, &hi2);
607:   istart = 0;
608:   for (i = 0; i < user->ns; i++) {
609:     VecGetOwnershipRange(user->suby, &lo, &hi);
610:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_y);
611:     VecScatterCreate(user->y, is_from_y, user->suby, NULL, &user->yi_scatter[i]);
612:     istart = istart + hi - lo;
613:     ISDestroy(&is_from_y);
614:   }
615:   /*
616:      *******************************
617:      Create scatter from d to d_1,d_2,...,d_ns
618:      *******************************
619:   */
620:   VecCreate(PETSC_COMM_WORLD, &user->subd);
621:   VecSetSizes(user->subd, PETSC_DECIDE, user->ndata);
622:   VecSetFromOptions(user->subd);
623:   VecCreate(PETSC_COMM_WORLD, &user->d);
624:   VecGetLocalSize(user->subd, &dsubnlocal);
625:   VecSetSizes(user->d, dsubnlocal * user->ns, user->ndata * user->ns);
626:   VecSetFromOptions(user->d);
627:   PetscMalloc1(user->ns, &user->di_scatter);

629:   VecGetOwnershipRange(user->d, &lo2, &hi2);
630:   istart = 0;
631:   for (i = 0; i < user->ns; i++) {
632:     VecGetOwnershipRange(user->subd, &lo, &hi);
633:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_d);
634:     VecScatterCreate(user->d, is_from_d, user->subd, NULL, &user->di_scatter[i]);
635:     istart = istart + hi - lo;
636:     ISDestroy(&is_from_d);
637:   }

639:   PetscMalloc1(user->mx, &x);
640:   PetscMalloc1(user->mx, &y);
641:   PetscMalloc1(user->mx, &z);

643:   user->ksp_its         = 0;
644:   user->ksp_its_initial = 0;

646:   n         = user->mx * user->mx * user->mx;
647:   m         = 3 * user->mx * user->mx * (user->mx - 1);
648:   sqrt_beta = PetscSqrtScalar(user->beta);

650:   VecCreate(PETSC_COMM_WORLD, &XX);
651:   VecCreate(PETSC_COMM_WORLD, &user->q);
652:   VecSetSizes(XX, ysubnlocal, n);
653:   VecSetSizes(user->q, ysubnlocal * user->ns, user->m);
654:   VecSetFromOptions(XX);
655:   VecSetFromOptions(user->q);

657:   VecDuplicate(XX, &YY);
658:   VecDuplicate(XX, &ZZ);
659:   VecDuplicate(XX, &XXwork);
660:   VecDuplicate(XX, &YYwork);
661:   VecDuplicate(XX, &ZZwork);
662:   VecDuplicate(XX, &UTwork);
663:   VecDuplicate(XX, &user->utrue);

665:   /* map for striding q */
666:   VecGetOwnershipRanges(user->q, &ranges);
667:   VecGetOwnershipRanges(user->u, &subranges);

669:   VecGetOwnershipRange(user->q, &lo2, &hi2);
670:   VecGetOwnershipRange(user->u, &lo, &hi);
671:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
672:   h        = 1.0 / user->mx;
673:   hinv     = user->mx;
674:   neg_hinv = -hinv;

676:   VecGetOwnershipRange(XX, &istart, &iend);
677:   for (linear_index = istart; linear_index < iend; linear_index++) {
678:     i  = linear_index % user->mx;
679:     j  = ((linear_index - i) / user->mx) % user->mx;
680:     k  = ((linear_index - i) / user->mx - j) / user->mx;
681:     vx = h * (i + 0.5);
682:     vy = h * (j + 0.5);
683:     vz = h * (k + 0.5);
684:     VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES);
685:     VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES);
686:     VecSetValues(ZZ, 1, &linear_index, &vz, INSERT_VALUES);
687:     for (is = 0; is < 2; is++) {
688:       for (js = 0; js < 2; js++) {
689:         for (ks = 0; ks < 2; ks++) {
690:           ls = is * 4 + js * 2 + ks;
691:           if (ls < user->ns) {
692:             l = ls * n + linear_index;
693:             /* remap */
694:             subindex = l % n;
695:             subvec   = l / n;
696:             nrank    = 0;
697:             while (subindex >= subranges[nrank + 1]) nrank++;
698:             offset = subindex - subranges[nrank];
699:             istart = 0;
700:             for (kk = 0; kk < nrank; kk++) istart += user->ns * (subranges[kk + 1] - subranges[kk]);
701:             istart += (subranges[nrank + 1] - subranges[nrank]) * subvec;
702:             l = istart + offset;
703:             v = 100 * PetscSinScalar(2 * PETSC_PI * (vx + 0.25 * is)) * PetscSinScalar(2 * PETSC_PI * (vy + 0.25 * js)) * PetscSinScalar(2 * PETSC_PI * (vz + 0.25 * ks));
704:             VecSetValues(user->q, 1, &l, &v, INSERT_VALUES);
705:           }
706:         }
707:       }
708:     }
709:   }

711:   VecAssemblyBegin(XX);
712:   VecAssemblyEnd(XX);
713:   VecAssemblyBegin(YY);
714:   VecAssemblyEnd(YY);
715:   VecAssemblyBegin(ZZ);
716:   VecAssemblyEnd(ZZ);
717:   VecAssemblyBegin(user->q);
718:   VecAssemblyEnd(user->q);

720:   /* Compute true parameter function
721:      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
722:   VecCopy(XX, XXwork);
723:   VecCopy(YY, YYwork);
724:   VecCopy(ZZ, ZZwork);

726:   VecShift(XXwork, -0.25);
727:   VecShift(YYwork, -0.25);
728:   VecShift(ZZwork, -0.25);

730:   VecPointwiseMult(XXwork, XXwork, XXwork);
731:   VecPointwiseMult(YYwork, YYwork, YYwork);
732:   VecPointwiseMult(ZZwork, ZZwork, ZZwork);

734:   VecCopy(XXwork, UTwork);
735:   VecAXPY(UTwork, 1.0, YYwork);
736:   VecAXPY(UTwork, 1.0, ZZwork);
737:   VecScale(UTwork, -20.0);
738:   VecExp(UTwork);
739:   VecCopy(UTwork, user->utrue);

741:   VecCopy(XX, XXwork);
742:   VecCopy(YY, YYwork);
743:   VecCopy(ZZ, ZZwork);

745:   VecShift(XXwork, -0.75);
746:   VecShift(YYwork, -0.75);
747:   VecShift(ZZwork, -0.75);

749:   VecPointwiseMult(XXwork, XXwork, XXwork);
750:   VecPointwiseMult(YYwork, YYwork, YYwork);
751:   VecPointwiseMult(ZZwork, ZZwork, ZZwork);

753:   VecCopy(XXwork, UTwork);
754:   VecAXPY(UTwork, 1.0, YYwork);
755:   VecAXPY(UTwork, 1.0, ZZwork);
756:   VecScale(UTwork, -20.0);
757:   VecExp(UTwork);

759:   VecAXPY(user->utrue, -1.0, UTwork);

761:   VecDestroy(&XX);
762:   VecDestroy(&YY);
763:   VecDestroy(&ZZ);
764:   VecDestroy(&XXwork);
765:   VecDestroy(&YYwork);
766:   VecDestroy(&ZZwork);
767:   VecDestroy(&UTwork);

769:   /* Initial guess and reference model */
770:   VecDuplicate(user->utrue, &user->ur);
771:   VecSum(user->utrue, &meanut);
772:   meanut = meanut / n;
773:   VecSet(user->ur, meanut);
774:   VecCopy(user->ur, user->u);

776:   /* Generate Grad matrix */
777:   MatCreate(PETSC_COMM_WORLD, &user->Grad);
778:   MatSetSizes(user->Grad, PETSC_DECIDE, ysubnlocal, m, n);
779:   MatSetFromOptions(user->Grad);
780:   MatMPIAIJSetPreallocation(user->Grad, 2, NULL, 2, NULL);
781:   MatSeqAIJSetPreallocation(user->Grad, 2, NULL);
782:   MatGetOwnershipRange(user->Grad, &istart, &iend);

784:   for (i = istart; i < iend; i++) {
785:     if (i < m / 3) {
786:       iblock = i / (user->mx - 1);
787:       j      = iblock * user->mx + (i % (user->mx - 1));
788:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
789:       j = j + 1;
790:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
791:     }
792:     if (i >= m / 3 && i < 2 * m / 3) {
793:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
794:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
795:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
796:       j = j + user->mx;
797:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
798:     }
799:     if (i >= 2 * m / 3) {
800:       j = i - 2 * m / 3;
801:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
802:       j = j + user->mx * user->mx;
803:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
804:     }
805:   }

807:   MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY);
808:   MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY);

810:   /* Generate arithmetic averaging matrix Av */
811:   MatCreate(PETSC_COMM_WORLD, &user->Av);
812:   MatSetSizes(user->Av, PETSC_DECIDE, ysubnlocal, m, n);
813:   MatSetFromOptions(user->Av);
814:   MatMPIAIJSetPreallocation(user->Av, 2, NULL, 2, NULL);
815:   MatSeqAIJSetPreallocation(user->Av, 2, NULL);
816:   MatGetOwnershipRange(user->Av, &istart, &iend);

818:   for (i = istart; i < iend; i++) {
819:     if (i < m / 3) {
820:       iblock = i / (user->mx - 1);
821:       j      = iblock * user->mx + (i % (user->mx - 1));
822:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
823:       j = j + 1;
824:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
825:     }
826:     if (i >= m / 3 && i < 2 * m / 3) {
827:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
828:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
829:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
830:       j = j + user->mx;
831:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
832:     }
833:     if (i >= 2 * m / 3) {
834:       j = i - 2 * m / 3;
835:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
836:       j = j + user->mx * user->mx;
837:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
838:     }
839:   }

841:   MatAssemblyBegin(user->Av, MAT_FINAL_ASSEMBLY);
842:   MatAssemblyEnd(user->Av, MAT_FINAL_ASSEMBLY);

844:   MatCreate(PETSC_COMM_WORLD, &user->L);
845:   MatSetSizes(user->L, PETSC_DECIDE, ysubnlocal, m + n, n);
846:   MatSetFromOptions(user->L);
847:   MatMPIAIJSetPreallocation(user->L, 2, NULL, 2, NULL);
848:   MatSeqAIJSetPreallocation(user->L, 2, NULL);
849:   MatGetOwnershipRange(user->L, &istart, &iend);

851:   for (i = istart; i < iend; i++) {
852:     if (i < m / 3) {
853:       iblock = i / (user->mx - 1);
854:       j      = iblock * user->mx + (i % (user->mx - 1));
855:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
856:       j = j + 1;
857:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
858:     }
859:     if (i >= m / 3 && i < 2 * m / 3) {
860:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
861:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
862:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
863:       j = j + user->mx;
864:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
865:     }
866:     if (i >= 2 * m / 3 && i < m) {
867:       j = i - 2 * m / 3;
868:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
869:       j = j + user->mx * user->mx;
870:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
871:     }
872:     if (i >= m) {
873:       j = i - m;
874:       MatSetValues(user->L, 1, &i, 1, &j, &sqrt_beta, INSERT_VALUES);
875:     }
876:   }
877:   MatAssemblyBegin(user->L, MAT_FINAL_ASSEMBLY);
878:   MatAssemblyEnd(user->L, MAT_FINAL_ASSEMBLY);
879:   MatScale(user->L, PetscPowScalar(h, 1.5));

881:   /* Generate Div matrix */
882:   if (!user->use_ptap) {
883:     /* Generate Div matrix */
884:     MatCreate(PETSC_COMM_WORLD, &user->Div);
885:     MatSetSizes(user->Div, ysubnlocal, PETSC_DECIDE, n, m);
886:     MatSetFromOptions(user->Div);
887:     MatMPIAIJSetPreallocation(user->Div, 4, NULL, 4, NULL);
888:     MatSeqAIJSetPreallocation(user->Div, 6, NULL);
889:     MatGetOwnershipRange(user->Grad, &istart, &iend);

891:     for (i = istart; i < iend; i++) {
892:       if (i < m / 3) {
893:         iblock = i / (user->mx - 1);
894:         j      = iblock * user->mx + (i % (user->mx - 1));
895:         MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES);
896:         j = j + 1;
897:         MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES);
898:       }
899:       if (i >= m / 3 && i < 2 * m / 3) {
900:         iblock = (i - m / 3) / (user->mx * (user->mx - 1));
901:         j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
902:         MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES);
903:         j = j + user->mx;
904:         MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES);
905:       }
906:       if (i >= 2 * m / 3) {
907:         j = i - 2 * m / 3;
908:         MatSetValues(user->Div, 1, &j, 1, &i, &neg_hinv, INSERT_VALUES);
909:         j = j + user->mx * user->mx;
910:         MatSetValues(user->Div, 1, &j, 1, &i, &hinv, INSERT_VALUES);
911:       }
912:     }

914:     MatAssemblyBegin(user->Div, MAT_FINAL_ASSEMBLY);
915:     MatAssemblyEnd(user->Div, MAT_FINAL_ASSEMBLY);
916:     MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork);
917:   } else {
918:     MatCreate(PETSC_COMM_WORLD, &user->Diag);
919:     MatSetSizes(user->Diag, PETSC_DECIDE, PETSC_DECIDE, m, m);
920:     MatSetFromOptions(user->Diag);
921:     MatMPIAIJSetPreallocation(user->Diag, 1, NULL, 0, NULL);
922:     MatSeqAIJSetPreallocation(user->Diag, 1, NULL);
923:   }

925:   /* Build work vectors and matrices */
926:   VecCreate(PETSC_COMM_WORLD, &user->S);
927:   VecSetSizes(user->S, PETSC_DECIDE, m);
928:   VecSetFromOptions(user->S);

930:   VecCreate(PETSC_COMM_WORLD, &user->lwork);
931:   VecSetSizes(user->lwork, PETSC_DECIDE, m + user->mx * user->mx * user->mx);
932:   VecSetFromOptions(user->lwork);

934:   MatDuplicate(user->Av, MAT_SHARE_NONZERO_PATTERN, &user->Avwork);

936:   VecDuplicate(user->S, &user->Swork);
937:   VecDuplicate(user->S, &user->Sdiag);
938:   VecDuplicate(user->S, &user->Av_u);
939:   VecDuplicate(user->S, &user->Twork);
940:   VecDuplicate(user->y, &user->ywork);
941:   VecDuplicate(user->u, &user->Ywork);
942:   VecDuplicate(user->u, &user->uwork);
943:   VecDuplicate(user->u, &user->js_diag);
944:   VecDuplicate(user->c, &user->cwork);
945:   VecDuplicate(user->d, &user->dwork);

947:   /* Create a matrix-free shell user->Jd for computing B*x */
948:   MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal, user->nstate, user->ndesign, user, &user->Jd);
949:   MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult);
950:   MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose);

952:   /* Compute true state function ytrue given utrue */
953:   VecDuplicate(user->y, &user->ytrue);

955:   /* First compute Av_u = Av*exp(-u) */
956:   VecSet(user->uwork, 0);
957:   VecAXPY(user->uwork, -1.0, user->utrue); /* Note: user->utrue */
958:   VecExp(user->uwork);
959:   MatMult(user->Av, user->uwork, user->Av_u);

961:   /* Next form DSG = Div*S*Grad */
962:   VecCopy(user->Av_u, user->Swork);
963:   VecReciprocal(user->Swork);
964:   if (user->use_ptap) {
965:     MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES);
966:     MatPtAP(user->Diag, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG);
967:   } else {
968:     MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
969:     MatDiagonalScale(user->Divwork, NULL, user->Swork);

971:     MatMatMult(user->Divwork, user->Grad, MAT_INITIAL_MATRIX, 1.0, &user->DSG);
972:   }

974:   MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE);
975:   MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);

977:   if (user->use_lrc == PETSC_TRUE) {
978:     v = PetscSqrtReal(1.0 / user->ndesign);
979:     PetscMalloc1(user->ndesign, &user->ones);

981:     for (i = 0; i < user->ndesign; i++) user->ones[i] = v;
982:     MatCreateDense(PETSC_COMM_WORLD, ysubnlocal, PETSC_DECIDE, user->ndesign, 1, user->ones, &user->Ones);
983:     MatCreateLRC(user->DSG, user->Ones, NULL, user->Ones, &user->JsBlock);
984:     MatSetUp(user->JsBlock);
985:   } else {
986:     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
987:     MatCreateShell(PETSC_COMM_WORLD, ysubnlocal, ysubnlocal, user->ndesign, user->ndesign, user, &user->JsBlock);
988:     MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateBlockMatMult);
989:     MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateBlockMatMult);
990:   }
991:   MatSetOption(user->JsBlock, MAT_SYMMETRIC, PETSC_TRUE);
992:   MatSetOption(user->JsBlock, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);
993:   MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->Js);
994:   MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult);
995:   MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMult);
996:   MatSetOption(user->Js, MAT_SYMMETRIC, PETSC_TRUE);
997:   MatSetOption(user->Js, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);

999:   MatCreateShell(PETSC_COMM_WORLD, ysubnlocal * user->ns, ysubnlocal * user->ns, user->nstate, user->nstate, user, &user->JsInv);
1000:   MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateInvMatMult);
1001:   MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateInvMatMult);
1002:   MatSetOption(user->JsInv, MAT_SYMMETRIC, PETSC_TRUE);
1003:   MatSetOption(user->JsInv, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);

1005:   MatSetOption(user->DSG, MAT_SYMMETRIC, PETSC_TRUE);
1006:   MatSetOption(user->DSG, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);
1007:   /* Now solve for ytrue */
1008:   KSPCreate(PETSC_COMM_WORLD, &user->solver);
1009:   KSPSetFromOptions(user->solver);

1011:   KSPSetOperators(user->solver, user->JsBlock, user->DSG);

1013:   MatMult(user->JsInv, user->q, user->ytrue);
1014:   /* First compute Av_u = Av*exp(-u) */
1015:   VecSet(user->uwork, 0);
1016:   VecAXPY(user->uwork, -1.0, user->u); /* Note: user->u */
1017:   VecExp(user->uwork);
1018:   MatMult(user->Av, user->uwork, user->Av_u);

1020:   /* Next update DSG = Div*S*Grad  with user->u */
1021:   VecCopy(user->Av_u, user->Swork);
1022:   VecReciprocal(user->Swork);
1023:   if (user->use_ptap) {
1024:     MatDiagonalSet(user->Diag, user->Swork, INSERT_VALUES);
1025:     MatPtAP(user->Diag, user->Grad, MAT_REUSE_MATRIX, 1.0, &user->DSG);
1026:   } else {
1027:     MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
1028:     MatDiagonalScale(user->Divwork, NULL, user->Av_u);
1029:     MatProductNumeric(user->DSG);
1030:   }

1032:   /* Now solve for y */

1034:   MatMult(user->JsInv, user->q, user->y);

1036:   user->ksp_its_initial = user->ksp_its;
1037:   user->ksp_its         = 0;
1038:   /* Construct projection matrix Q (blocks) */
1039:   MatCreate(PETSC_COMM_WORLD, &user->Q);
1040:   MatSetSizes(user->Q, dsubnlocal, ysubnlocal, user->ndata, user->ndesign);
1041:   MatSetFromOptions(user->Q);
1042:   MatMPIAIJSetPreallocation(user->Q, 8, NULL, 8, NULL);
1043:   MatSeqAIJSetPreallocation(user->Q, 8, NULL);

1045:   for (i = 0; i < user->mx; i++) {
1046:     x[i] = h * (i + 0.5);
1047:     y[i] = h * (i + 0.5);
1048:     z[i] = h * (i + 0.5);
1049:   }
1050:   MatGetOwnershipRange(user->Q, &istart, &iend);

1052:   nx = user->mx;
1053:   ny = user->mx;
1054:   nz = user->mx;
1055:   for (i = istart; i < iend; i++) {
1056:     xri = xr[i];
1057:     im  = 0;
1058:     xim = x[im];
1059:     while (xri > xim && im < nx) {
1060:       im  = im + 1;
1061:       xim = x[im];
1062:     }
1063:     indx1 = im - 1;
1064:     indx2 = im;
1065:     dx1   = xri - x[indx1];
1066:     dx2   = x[indx2] - xri;

1068:     yri = yr[i];
1069:     im  = 0;
1070:     yim = y[im];
1071:     while (yri > yim && im < ny) {
1072:       im  = im + 1;
1073:       yim = y[im];
1074:     }
1075:     indy1 = im - 1;
1076:     indy2 = im;
1077:     dy1   = yri - y[indy1];
1078:     dy2   = y[indy2] - yri;

1080:     zri = zr[i];
1081:     im  = 0;
1082:     zim = z[im];
1083:     while (zri > zim && im < nz) {
1084:       im  = im + 1;
1085:       zim = z[im];
1086:     }
1087:     indz1 = im - 1;
1088:     indz2 = im;
1089:     dz1   = zri - z[indz1];
1090:     dz2   = z[indz2] - zri;

1092:     Dx = x[indx2] - x[indx1];
1093:     Dy = y[indy2] - y[indy1];
1094:     Dz = z[indz2] - z[indz1];

1096:     j = indx1 + indy1 * nx + indz1 * nx * ny;
1097:     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1098:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1100:     j = indx1 + indy1 * nx + indz2 * nx * ny;
1101:     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1102:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1104:     j = indx1 + indy2 * nx + indz1 * nx * ny;
1105:     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1106:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1108:     j = indx1 + indy2 * nx + indz2 * nx * ny;
1109:     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1110:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1112:     j = indx2 + indy1 * nx + indz1 * nx * ny;
1113:     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1114:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1116:     j = indx2 + indy1 * nx + indz2 * nx * ny;
1117:     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1118:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1120:     j = indx2 + indy2 * nx + indz1 * nx * ny;
1121:     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1122:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);

1124:     j = indx2 + indy2 * nx + indz2 * nx * ny;
1125:     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1126:     MatSetValues(user->Q, 1, &i, 1, &j, &v, INSERT_VALUES);
1127:   }

1129:   MatAssemblyBegin(user->Q, MAT_FINAL_ASSEMBLY);
1130:   MatAssemblyEnd(user->Q, MAT_FINAL_ASSEMBLY);
1131:   /* Create MQ (composed of blocks of Q */
1132:   MatCreateShell(PETSC_COMM_WORLD, dsubnlocal * user->ns, PETSC_DECIDE, user->ndata * user->ns, user->nstate, user, &user->MQ);
1133:   MatShellSetOperation(user->MQ, MATOP_MULT, (void (*)(void))QMatMult);
1134:   MatShellSetOperation(user->MQ, MATOP_MULT_TRANSPOSE, (void (*)(void))QMatMultTranspose);

1136:   /* Add noise to the measurement data */
1137:   VecSet(user->ywork, 1.0);
1138:   VecAYPX(user->ywork, user->noise, user->ytrue);
1139:   MatMult(user->MQ, user->ywork, user->d);

1141:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1142:   PetscFree(x);
1143:   PetscFree(y);
1144:   PetscFree(z);
1145:   PetscLogStagePop();
1146:   return 0;
1147: }

1149: PetscErrorCode EllipticDestroy(AppCtx *user)
1150: {
1151:   PetscInt i;

1153:   MatDestroy(&user->DSG);
1154:   KSPDestroy(&user->solver);
1155:   MatDestroy(&user->Q);
1156:   MatDestroy(&user->MQ);
1157:   if (!user->use_ptap) {
1158:     MatDestroy(&user->Div);
1159:     MatDestroy(&user->Divwork);
1160:   } else {
1161:     MatDestroy(&user->Diag);
1162:   }
1163:   if (user->use_lrc) MatDestroy(&user->Ones);

1165:   MatDestroy(&user->Grad);
1166:   MatDestroy(&user->Av);
1167:   MatDestroy(&user->Avwork);
1168:   MatDestroy(&user->L);
1169:   MatDestroy(&user->Js);
1170:   MatDestroy(&user->Jd);
1171:   MatDestroy(&user->JsBlock);
1172:   MatDestroy(&user->JsInv);

1174:   VecDestroy(&user->x);
1175:   VecDestroy(&user->u);
1176:   VecDestroy(&user->uwork);
1177:   VecDestroy(&user->utrue);
1178:   VecDestroy(&user->y);
1179:   VecDestroy(&user->ywork);
1180:   VecDestroy(&user->ytrue);
1181:   VecDestroy(&user->c);
1182:   VecDestroy(&user->cwork);
1183:   VecDestroy(&user->ur);
1184:   VecDestroy(&user->q);
1185:   VecDestroy(&user->d);
1186:   VecDestroy(&user->dwork);
1187:   VecDestroy(&user->lwork);
1188:   VecDestroy(&user->S);
1189:   VecDestroy(&user->Swork);
1190:   VecDestroy(&user->Sdiag);
1191:   VecDestroy(&user->Ywork);
1192:   VecDestroy(&user->Twork);
1193:   VecDestroy(&user->Av_u);
1194:   VecDestroy(&user->js_diag);
1195:   ISDestroy(&user->s_is);
1196:   ISDestroy(&user->d_is);
1197:   VecDestroy(&user->suby);
1198:   VecDestroy(&user->subd);
1199:   VecDestroy(&user->subq);
1200:   VecScatterDestroy(&user->state_scatter);
1201:   VecScatterDestroy(&user->design_scatter);
1202:   for (i = 0; i < user->ns; i++) {
1203:     VecScatterDestroy(&user->yi_scatter[i]);
1204:     VecScatterDestroy(&user->di_scatter[i]);
1205:   }
1206:   PetscFree(user->yi_scatter);
1207:   PetscFree(user->di_scatter);
1208:   if (user->use_lrc) {
1209:     PetscFree(user->ones);
1210:     MatDestroy(&user->Ones);
1211:   }
1212:   return 0;
1213: }

1215: PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1216: {
1217:   Vec       X;
1218:   PetscReal unorm, ynorm;
1219:   AppCtx   *user = (AppCtx *)ptr;

1221:   TaoGetSolution(tao, &X);
1222:   Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
1223:   VecAXPY(user->ywork, -1.0, user->ytrue);
1224:   VecAXPY(user->uwork, -1.0, user->utrue);
1225:   VecNorm(user->uwork, NORM_2, &unorm);
1226:   VecNorm(user->ywork, NORM_2, &ynorm);
1227:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm);
1228:   return 0;
1229: }

1231: /*TEST

1233:    build:
1234:       requires: !complex

1236:    test:
1237:       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1238:       requires: !single

1240:    test:
1241:       suffix: 2
1242:       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1243:       requires: !single

1245: TEST*/