Actual source code: parabolic.c

  1: #include <petsc/private/taoimpl.h>

  3: typedef struct {
  4:   PetscInt  n;            /*  Number of variables */
  5:   PetscInt  m;            /*  Number of constraints per time step */
  6:   PetscInt  mx;           /*  grid points in each direction */
  7:   PetscInt  nt;           /*  Number of time steps; as of now, must be divisible by 8 */
  8:   PetscInt  ndata;        /*  Number of data points per sample */
  9:   PetscInt  ns;           /*  Number of samples */
 10:   PetscInt *sample_times; /*  Times of samples */
 11:   IS        s_is;
 12:   IS        d_is;

 14:   VecScatter  state_scatter;
 15:   VecScatter  design_scatter;
 16:   VecScatter *yi_scatter;
 17:   VecScatter *di_scatter;

 19:   Mat       Js, Jd, JsBlockPrec, JsInv, JsBlock;
 20:   PetscBool jformed, dsg_formed;

 22:   PetscReal alpha; /*  Regularization parameter */
 23:   PetscReal beta;  /*  Weight attributed to ||u||^2 in regularization functional */
 24:   PetscReal noise; /*  Amount of noise to add to data */
 25:   PetscReal ht;    /*  Time step */

 27:   Mat Qblock, QblockT;
 28:   Mat L, LT;
 29:   Mat Div, Divwork;
 30:   Mat Grad;
 31:   Mat Av, Avwork, AvT;
 32:   Mat DSG;
 33:   Vec q;
 34:   Vec ur; /*  reference */

 36:   Vec  d;
 37:   Vec  dwork;
 38:   Vec *di;

 40:   Vec y; /*  state variables */
 41:   Vec ywork;

 43:   Vec  ytrue;
 44:   Vec *yi, *yiwork;

 46:   Vec u; /*  design variables */
 47:   Vec uwork;

 49:   Vec utrue;
 50:   Vec js_diag;
 51:   Vec c; /*  constraint vector */
 52:   Vec cwork;

 54:   Vec lwork;
 55:   Vec S;
 56:   Vec Rwork, Swork, Twork;
 57:   Vec Av_u;

 59:   KSP solver;
 60:   PC  prec;

 62:   PetscInt ksp_its;
 63:   PetscInt ksp_its_initial;
 64: } AppCtx;

 66: PetscErrorCode FormFunction(Tao, Vec, PetscReal *, void *);
 67: PetscErrorCode FormGradient(Tao, Vec, Vec, void *);
 68: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal *, Vec, void *);
 69: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void *);
 70: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat, void *);
 71: PetscErrorCode FormConstraints(Tao, Vec, Vec, void *);
 72: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void *);
 73: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 74: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat);
 75: PetscErrorCode ParabolicInitialize(AppCtx *user);
 76: PetscErrorCode ParabolicDestroy(AppCtx *user);
 77: PetscErrorCode ParabolicMonitor(Tao, void *);

 79: PetscErrorCode StateMatMult(Mat, Vec, Vec);
 80: PetscErrorCode StateMatBlockMult(Mat, Vec, Vec);
 81: PetscErrorCode StateMatMultTranspose(Mat, Vec, Vec);
 82: PetscErrorCode StateMatGetDiagonal(Mat, Vec);
 83: PetscErrorCode StateMatDuplicate(Mat, MatDuplicateOption, Mat *);
 84: PetscErrorCode StateMatInvMult(Mat, Vec, Vec);
 85: PetscErrorCode StateMatInvTransposeMult(Mat, Vec, Vec);
 86: PetscErrorCode StateMatBlockPrecMult(PC, Vec, Vec);

 88: PetscErrorCode DesignMatMult(Mat, Vec, Vec);
 89: PetscErrorCode DesignMatMultTranspose(Mat, Vec, Vec);

 91: PetscErrorCode Gather_i(Vec, Vec *, VecScatter *, PetscInt);
 92: PetscErrorCode Scatter_i(Vec, Vec *, VecScatter *, PetscInt);

 94: static char help[] = "";

 96: int main(int argc, char **argv)
 97: {
 98:   Vec      x, x0;
 99:   Tao      tao;
100:   AppCtx   user;
101:   IS       is_allstate, is_alldesign;
102:   PetscInt lo, hi, hi2, lo2, ksp_old;
103:   PetscInt ntests = 1;
104:   PetscInt i;
105: #if defined(PETSC_USE_LOG)
106:   PetscLogStage stages[1];
107: #endif

110:   PetscInitialize(&argc, &argv, (char *)0, help);
111:   user.mx = 8;
112:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "parabolic example", NULL);
113:   PetscOptionsInt("-mx", "Number of grid points in each direction", "", user.mx, &user.mx, NULL);
114:   user.nt = 8;
115:   PetscOptionsInt("-nt", "Number of time steps", "", user.nt, &user.nt, NULL);
116:   user.ndata = 64;
117:   PetscOptionsInt("-ndata", "Numbers of data points per sample", "", user.ndata, &user.ndata, NULL);
118:   user.ns = 8;
119:   PetscOptionsInt("-ns", "Number of samples", "", user.ns, &user.ns, NULL);
120:   user.alpha = 1.0;
121:   PetscOptionsReal("-alpha", "Regularization parameter", "", user.alpha, &user.alpha, NULL);
122:   user.beta = 0.01;
123:   PetscOptionsReal("-beta", "Weight attributed to ||u||^2 in regularization functional", "", user.beta, &user.beta, NULL);
124:   user.noise = 0.01;
125:   PetscOptionsReal("-noise", "Amount of noise to add to data", "", user.noise, &user.noise, NULL);
126:   PetscOptionsInt("-ntests", "Number of times to repeat TaoSolve", "", ntests, &ntests, NULL);
127:   PetscOptionsEnd();

129:   user.m  = user.mx * user.mx * user.mx; /*  number of constraints per time step */
130:   user.n  = user.m * (user.nt + 1);      /*  number of variables */
131:   user.ht = (PetscReal)1 / user.nt;

133:   VecCreate(PETSC_COMM_WORLD, &user.u);
134:   VecCreate(PETSC_COMM_WORLD, &user.y);
135:   VecCreate(PETSC_COMM_WORLD, &user.c);
136:   VecSetSizes(user.u, PETSC_DECIDE, user.n - user.m * user.nt);
137:   VecSetSizes(user.y, PETSC_DECIDE, user.m * user.nt);
138:   VecSetSizes(user.c, PETSC_DECIDE, user.m * user.nt);
139:   VecSetFromOptions(user.u);
140:   VecSetFromOptions(user.y);
141:   VecSetFromOptions(user.c);

143:   /* Create scatters for reduced spaces.
144:      If the state vector y and design vector u are partitioned as
145:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
146:      then the solution vector x is organized as
147:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
148:      The index sets user.s_is and user.d_is correspond to the indices of the
149:      state and design variables owned by the current processor.
150:   */
151:   VecCreate(PETSC_COMM_WORLD, &x);

153:   VecGetOwnershipRange(user.y, &lo, &hi);
154:   VecGetOwnershipRange(user.u, &lo2, &hi2);

156:   ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_allstate);
157:   ISCreateStride(PETSC_COMM_SELF, hi - lo, lo + lo2, 1, &user.s_is);

159:   ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, lo2, 1, &is_alldesign);
160:   ISCreateStride(PETSC_COMM_SELF, hi2 - lo2, hi + lo2, 1, &user.d_is);

162:   VecSetSizes(x, hi - lo + hi2 - lo2, user.n);
163:   VecSetFromOptions(x);

165:   VecScatterCreate(x, user.s_is, user.y, is_allstate, &user.state_scatter);
166:   VecScatterCreate(x, user.d_is, user.u, is_alldesign, &user.design_scatter);
167:   ISDestroy(&is_alldesign);
168:   ISDestroy(&is_allstate);

170:   /* Create TAO solver and set desired solution method */
171:   TaoCreate(PETSC_COMM_WORLD, &tao);
172:   TaoSetType(tao, TAOLCL);

174:   /* Set up initial vectors and matrices */
175:   ParabolicInitialize(&user);

177:   Gather(x, user.y, user.state_scatter, user.u, user.design_scatter);
178:   VecDuplicate(x, &x0);
179:   VecCopy(x, x0);

181:   /* Set solution vector with an initial guess */
182:   TaoSetSolution(tao, x);
183:   TaoSetObjective(tao, FormFunction, &user);
184:   TaoSetGradient(tao, NULL, FormGradient, &user);
185:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user);

187:   TaoSetJacobianStateRoutine(tao, user.Js, user.JsBlockPrec, user.JsInv, FormJacobianState, &user);
188:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user);

190:   TaoSetFromOptions(tao);
191:   TaoSetStateDesignIS(tao, user.s_is, user.d_is);

193:   /* SOLVE THE APPLICATION */
194:   PetscLogStageRegister("Trials", &stages[0]);
195:   PetscLogStagePush(stages[0]);
196:   user.ksp_its_initial = user.ksp_its;
197:   ksp_old              = user.ksp_its;
198:   for (i = 0; i < ntests; i++) {
199:     TaoSolve(tao);
200:     PetscPrintf(PETSC_COMM_WORLD, "KSP Iterations = %" PetscInt_FMT "\n", user.ksp_its - ksp_old);
201:     VecCopy(x0, x);
202:     TaoSetSolution(tao, x);
203:   }
204:   PetscLogStagePop();
205:   PetscBarrier((PetscObject)x);
206:   PetscPrintf(PETSC_COMM_WORLD, "KSP iterations within initialization: ");
207:   PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its_initial);
208:   PetscPrintf(PETSC_COMM_WORLD, "Total KSP iterations over %" PetscInt_FMT " trial(s): ", ntests);
209:   PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", user.ksp_its);
210:   PetscPrintf(PETSC_COMM_WORLD, "KSP iterations per trial: ");
211:   PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "\n", (user.ksp_its - user.ksp_its_initial) / ntests);

213:   TaoDestroy(&tao);
214:   VecDestroy(&x);
215:   VecDestroy(&x0);
216:   ParabolicDestroy(&user);

218:   PetscFinalize();
219:   return 0;
220: }
221: /* ------------------------------------------------------------------- */
222: /*
223:    dwork = Qy - d
224:    lwork = L*(u-ur)
225:    f = 1/2 * (dwork.dork + alpha*lwork.lwork)
226: */
227: PetscErrorCode FormFunction(Tao tao, Vec X, PetscReal *f, void *ptr)
228: {
229:   PetscReal d1 = 0, d2 = 0;
230:   PetscInt  i, j;
231:   AppCtx   *user = (AppCtx *)ptr;

233:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
234:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
235:   for (j = 0; j < user->ns; j++) {
236:     i = user->sample_times[j];
237:     MatMult(user->Qblock, user->yi[i], user->di[j]);
238:   }
239:   Gather_i(user->dwork, user->di, user->di_scatter, user->ns);
240:   VecAXPY(user->dwork, -1.0, user->d);
241:   VecDot(user->dwork, user->dwork, &d1);

243:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
244:   MatMult(user->L, user->uwork, user->lwork);
245:   VecDot(user->lwork, user->lwork, &d2);

247:   *f = 0.5 * (d1 + user->alpha * d2);
248:   return 0;
249: }

251: /* ------------------------------------------------------------------- */
252: /*
253:     state: g_s = Q' *(Qy - d)
254:     design: g_d = alpha*L'*L*(u-ur)
255: */
256: PetscErrorCode FormGradient(Tao tao, Vec X, Vec G, void *ptr)
257: {
258:   PetscInt i, j;
259:   AppCtx  *user = (AppCtx *)ptr;

261:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
262:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
263:   for (j = 0; j < user->ns; j++) {
264:     i = user->sample_times[j];
265:     MatMult(user->Qblock, user->yi[i], user->di[j]);
266:   }
267:   Gather_i(user->dwork, user->di, user->di_scatter, user->ns);
268:   VecAXPY(user->dwork, -1.0, user->d);
269:   Scatter_i(user->dwork, user->di, user->di_scatter, user->ns);
270:   VecSet(user->ywork, 0.0);
271:   Scatter_i(user->ywork, user->yiwork, user->yi_scatter, user->nt);
272:   for (j = 0; j < user->ns; j++) {
273:     i = user->sample_times[j];
274:     MatMult(user->QblockT, user->di[j], user->yiwork[i]);
275:   }
276:   Gather_i(user->ywork, user->yiwork, user->yi_scatter, user->nt);

278:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
279:   MatMult(user->L, user->uwork, user->lwork);
280:   MatMult(user->LT, user->lwork, user->uwork);
281:   VecScale(user->uwork, user->alpha);
282:   Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
283:   return 0;
284: }

286: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
287: {
288:   PetscReal d1, d2;
289:   PetscInt  i, j;
290:   AppCtx   *user = (AppCtx *)ptr;

292:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
293:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
294:   for (j = 0; j < user->ns; j++) {
295:     i = user->sample_times[j];
296:     MatMult(user->Qblock, user->yi[i], user->di[j]);
297:   }
298:   Gather_i(user->dwork, user->di, user->di_scatter, user->ns);
299:   VecAXPY(user->dwork, -1.0, user->d);
300:   VecDot(user->dwork, user->dwork, &d1);
301:   Scatter_i(user->dwork, user->di, user->di_scatter, user->ns);
302:   VecSet(user->ywork, 0.0);
303:   Scatter_i(user->ywork, user->yiwork, user->yi_scatter, user->nt);
304:   for (j = 0; j < user->ns; j++) {
305:     i = user->sample_times[j];
306:     MatMult(user->QblockT, user->di[j], user->yiwork[i]);
307:   }
308:   Gather_i(user->ywork, user->yiwork, user->yi_scatter, user->nt);

310:   VecWAXPY(user->uwork, -1.0, user->ur, user->u);
311:   MatMult(user->L, user->uwork, user->lwork);
312:   VecDot(user->lwork, user->lwork, &d2);
313:   MatMult(user->LT, user->lwork, user->uwork);
314:   VecScale(user->uwork, user->alpha);
315:   *f = 0.5 * (d1 + user->alpha * d2);

317:   Gather(G, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
318:   return 0;
319: }

321: /* ------------------------------------------------------------------- */
322: /* A
323: MatShell object
324: */
325: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
326: {
327:   AppCtx *user = (AppCtx *)ptr;

329:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
330:   VecSet(user->uwork, 0);
331:   VecAXPY(user->uwork, -1.0, user->u);
332:   VecExp(user->uwork);
333:   MatMult(user->Av, user->uwork, user->Av_u);
334:   VecCopy(user->Av_u, user->Swork);
335:   VecReciprocal(user->Swork);
336:   MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
337:   MatDiagonalScale(user->Divwork, NULL, user->Swork);
338:   if (user->dsg_formed) {
339:     MatProductNumeric(user->DSG);
340:   } else {
341:     MatMatMult(user->Divwork, user->Grad, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &user->DSG);
342:     user->dsg_formed = PETSC_TRUE;
343:   }

345:   /* B = speye(nx^3) + ht*DSG; */
346:   MatScale(user->DSG, user->ht);
347:   MatShift(user->DSG, 1.0);
348:   return 0;
349: }

351: /* ------------------------------------------------------------------- */
352: /* B */
353: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
354: {
355:   AppCtx *user = (AppCtx *)ptr;

357:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
358:   return 0;
359: }

361: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
362: {
363:   PetscInt i;
364:   AppCtx  *user;

366:   MatShellGetContext(J_shell, &user);
367:   Scatter_i(X, user->yi, user->yi_scatter, user->nt);
368:   MatMult(user->JsBlock, user->yi[0], user->yiwork[0]);
369:   for (i = 1; i < user->nt; i++) {
370:     MatMult(user->JsBlock, user->yi[i], user->yiwork[i]);
371:     VecAXPY(user->yiwork[i], -1.0, user->yi[i - 1]);
372:   }
373:   Gather_i(Y, user->yiwork, user->yi_scatter, user->nt);
374:   return 0;
375: }

377: PetscErrorCode StateMatMultTranspose(Mat J_shell, Vec X, Vec Y)
378: {
379:   PetscInt i;
380:   AppCtx  *user;

382:   MatShellGetContext(J_shell, &user);
383:   Scatter_i(X, user->yi, user->yi_scatter, user->nt);
384:   for (i = 0; i < user->nt - 1; i++) {
385:     MatMult(user->JsBlock, user->yi[i], user->yiwork[i]);
386:     VecAXPY(user->yiwork[i], -1.0, user->yi[i + 1]);
387:   }
388:   i = user->nt - 1;
389:   MatMult(user->JsBlock, user->yi[i], user->yiwork[i]);
390:   Gather_i(Y, user->yiwork, user->yi_scatter, user->nt);
391:   return 0;
392: }

394: PetscErrorCode StateMatBlockMult(Mat J_shell, Vec X, Vec Y)
395: {
396:   AppCtx *user;

398:   MatShellGetContext(J_shell, &user);
399:   MatMult(user->Grad, X, user->Swork);
400:   VecPointwiseDivide(user->Swork, user->Swork, user->Av_u);
401:   MatMult(user->Div, user->Swork, Y);
402:   VecAYPX(Y, user->ht, X);
403:   return 0;
404: }

406: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
407: {
408:   PetscInt i;
409:   AppCtx  *user;

411:   MatShellGetContext(J_shell, &user);

413:   /* sdiag(1./v) */
414:   VecSet(user->uwork, 0);
415:   VecAXPY(user->uwork, -1.0, user->u);
416:   VecExp(user->uwork);

418:   /* sdiag(1./((Av*(1./v)).^2)) */
419:   MatMult(user->Av, user->uwork, user->Swork);
420:   VecPointwiseMult(user->Swork, user->Swork, user->Swork);
421:   VecReciprocal(user->Swork);

423:   /* (Av * (sdiag(1./v) * b)) */
424:   VecPointwiseMult(user->uwork, user->uwork, X);
425:   MatMult(user->Av, user->uwork, user->Twork);

427:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
428:   VecPointwiseMult(user->Swork, user->Twork, user->Swork);

430:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
431:   for (i = 0; i < user->nt; i++) {
432:     /* (sdiag(Grad*y(:,i)) */
433:     MatMult(user->Grad, user->yi[i], user->Twork);

435:     /* ht * Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
436:     VecPointwiseMult(user->Twork, user->Twork, user->Swork);
437:     MatMult(user->Div, user->Twork, user->yiwork[i]);
438:     VecScale(user->yiwork[i], user->ht);
439:   }
440:   Gather_i(Y, user->yiwork, user->yi_scatter, user->nt);

442:   return 0;
443: }

445: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
446: {
447:   PetscInt i;
448:   AppCtx  *user;

450:   MatShellGetContext(J_shell, &user);

452:   /* sdiag(1./((Av*(1./v)).^2)) */
453:   VecSet(user->uwork, 0);
454:   VecAXPY(user->uwork, -1.0, user->u);
455:   VecExp(user->uwork);
456:   MatMult(user->Av, user->uwork, user->Rwork);
457:   VecPointwiseMult(user->Rwork, user->Rwork, user->Rwork);
458:   VecReciprocal(user->Rwork);

460:   VecSet(Y, 0.0);
461:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
462:   Scatter_i(X, user->yiwork, user->yi_scatter, user->nt);
463:   for (i = 0; i < user->nt; i++) {
464:     /* (Div' * b(:,i)) */
465:     MatMult(user->Grad, user->yiwork[i], user->Swork);

467:     /* sdiag(Grad*y(:,i)) */
468:     MatMult(user->Grad, user->yi[i], user->Twork);

470:     /* (sdiag(Grad*y(:,i)) * (Div' * b(:,i))) */
471:     VecPointwiseMult(user->Twork, user->Swork, user->Twork);

473:     /* (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i)))) */
474:     VecPointwiseMult(user->Twork, user->Rwork, user->Twork);

476:     /* (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
477:     MatMult(user->AvT, user->Twork, user->yiwork[i]);

479:     /* sdiag(1./v) * (Av' * (sdiag(1./((Av*(1./v)).^2)) * (sdiag(Grad*y(:,i)) * (Div' * b(:,i))))) */
480:     VecPointwiseMult(user->yiwork[i], user->uwork, user->yiwork[i]);
481:     VecAXPY(Y, user->ht, user->yiwork[i]);
482:   }
483:   return 0;
484: }

486: PetscErrorCode StateMatBlockPrecMult(PC PC_shell, Vec X, Vec Y)
487: {
488:   AppCtx *user;

490:   PCShellGetContext(PC_shell, &user);

492:   if (user->dsg_formed) {
493:     MatSOR(user->DSG, X, 1.0, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_SYMMETRIC_SWEEP), 0.0, 1, 1, Y);
494:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "DSG not formed");
495:   return 0;
496: }

498: PetscErrorCode StateMatInvMult(Mat J_shell, Vec X, Vec Y)
499: {
500:   AppCtx  *user;
501:   PetscInt its, i;

503:   MatShellGetContext(J_shell, &user);

505:   if (Y == user->ytrue) {
506:     /* First solve is done with true solution to set up problem */
507:     KSPSetTolerances(user->solver, 1e-8, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
508:   } else {
509:     KSPSetTolerances(user->solver, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT);
510:   }

512:   Scatter_i(X, user->yi, user->yi_scatter, user->nt);
513:   KSPSolve(user->solver, user->yi[0], user->yiwork[0]);
514:   KSPGetIterationNumber(user->solver, &its);
515:   user->ksp_its = user->ksp_its + its;

517:   for (i = 1; i < user->nt; i++) {
518:     VecAXPY(user->yi[i], 1.0, user->yiwork[i - 1]);
519:     KSPSolve(user->solver, user->yi[i], user->yiwork[i]);
520:     KSPGetIterationNumber(user->solver, &its);
521:     user->ksp_its = user->ksp_its + its;
522:   }
523:   Gather_i(Y, user->yiwork, user->yi_scatter, user->nt);
524:   return 0;
525: }

527: PetscErrorCode StateMatInvTransposeMult(Mat J_shell, Vec X, Vec Y)
528: {
529:   AppCtx  *user;
530:   PetscInt its, i;

532:   MatShellGetContext(J_shell, &user);

534:   Scatter_i(X, user->yi, user->yi_scatter, user->nt);

536:   i = user->nt - 1;
537:   KSPSolve(user->solver, user->yi[i], user->yiwork[i]);

539:   KSPGetIterationNumber(user->solver, &its);
540:   user->ksp_its = user->ksp_its + its;

542:   for (i = user->nt - 2; i >= 0; i--) {
543:     VecAXPY(user->yi[i], 1.0, user->yiwork[i + 1]);
544:     KSPSolve(user->solver, user->yi[i], user->yiwork[i]);

546:     KSPGetIterationNumber(user->solver, &its);
547:     user->ksp_its = user->ksp_its + its;
548:   }

550:   Gather_i(Y, user->yiwork, user->yi_scatter, user->nt);
551:   return 0;
552: }

554: PetscErrorCode StateMatDuplicate(Mat J_shell, MatDuplicateOption opt, Mat *new_shell)
555: {
556:   AppCtx *user;

558:   MatShellGetContext(J_shell, &user);

560:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, new_shell);
561:   MatShellSetOperation(*new_shell, MATOP_MULT, (void (*)(void))StateMatMult);
562:   MatShellSetOperation(*new_shell, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate);
563:   MatShellSetOperation(*new_shell, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose);
564:   MatShellSetOperation(*new_shell, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal);
565:   return 0;
566: }

568: PetscErrorCode StateMatGetDiagonal(Mat J_shell, Vec X)
569: {
570:   AppCtx *user;

572:   MatShellGetContext(J_shell, &user);
573:   VecCopy(user->js_diag, X);
574:   return 0;
575: }

577: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
578: {
579:   /* con = Ay - q, A = [B  0  0 ... 0;
580:                        -I  B  0 ... 0;
581:                         0 -I  B ... 0;
582:                              ...     ;
583:                         0    ... -I B]
584:      B = ht * Div * Sigma * Grad + eye */
585:   PetscInt i;
586:   AppCtx  *user = (AppCtx *)ptr;

588:   Scatter(X, user->y, user->state_scatter, user->u, user->design_scatter);
589:   Scatter_i(user->y, user->yi, user->yi_scatter, user->nt);
590:   MatMult(user->JsBlock, user->yi[0], user->yiwork[0]);
591:   for (i = 1; i < user->nt; i++) {
592:     MatMult(user->JsBlock, user->yi[i], user->yiwork[i]);
593:     VecAXPY(user->yiwork[i], -1.0, user->yi[i - 1]);
594:   }
595:   Gather_i(C, user->yiwork, user->yi_scatter, user->nt);
596:   VecAXPY(C, -1.0, user->q);
597:   return 0;
598: }

600: PetscErrorCode Scatter(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
601: {
602:   VecScatterBegin(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD);
603:   VecScatterEnd(s_scat, x, state, INSERT_VALUES, SCATTER_FORWARD);
604:   VecScatterBegin(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD);
605:   VecScatterEnd(d_scat, x, design, INSERT_VALUES, SCATTER_FORWARD);
606:   return 0;
607: }

609: PetscErrorCode Scatter_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
610: {
611:   PetscInt i;

613:   for (i = 0; i < nt; i++) {
614:     VecScatterBegin(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD);
615:     VecScatterEnd(scat[i], y, yi[i], INSERT_VALUES, SCATTER_FORWARD);
616:   }
617:   return 0;
618: }

620: PetscErrorCode Gather(Vec x, Vec state, VecScatter s_scat, Vec design, VecScatter d_scat)
621: {
622:   VecScatterBegin(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE);
623:   VecScatterEnd(s_scat, state, x, INSERT_VALUES, SCATTER_REVERSE);
624:   VecScatterBegin(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE);
625:   VecScatterEnd(d_scat, design, x, INSERT_VALUES, SCATTER_REVERSE);
626:   return 0;
627: }

629: PetscErrorCode Gather_i(Vec y, Vec *yi, VecScatter *scat, PetscInt nt)
630: {
631:   PetscInt i;

633:   for (i = 0; i < nt; i++) {
634:     VecScatterBegin(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE);
635:     VecScatterEnd(scat[i], yi[i], y, INSERT_VALUES, SCATTER_REVERSE);
636:   }
637:   return 0;
638: }

640: PetscErrorCode ParabolicInitialize(AppCtx *user)
641: {
642:   PetscInt    m, n, i, j, k, linear_index, istart, iend, iblock, lo, hi, lo2, hi2;
643:   Vec         XX, YY, ZZ, XXwork, YYwork, ZZwork, UTwork, yi, di, bc;
644:   PetscReal  *x, *y, *z;
645:   PetscReal   h, stime;
646:   PetscScalar hinv, neg_hinv, half = 0.5, sqrt_beta;
647:   PetscInt    im, indx1, indx2, indy1, indy2, indz1, indz2, nx, ny, nz;
648:   PetscReal   xri, yri, zri, xim, yim, zim, dx1, dx2, dy1, dy2, dz1, dz2, Dx, Dy, Dz;
649:   PetscScalar v, vx, vy, vz;
650:   IS          is_from_y, is_to_yi, is_from_d, is_to_di;
651:   /* Data locations */
652:   PetscScalar xr[64] = {0.4970, 0.8498, 0.7814, 0.6268, 0.7782, 0.6402, 0.3617, 0.3160, 0.3610, 0.5298, 0.6987, 0.3331, 0.7962, 0.5596, 0.3866, 0.6774, 0.5407, 0.4518, 0.6702, 0.6061, 0.7580, 0.8997,
653:                         0.5198, 0.8326, 0.2138, 0.9198, 0.3000, 0.2833, 0.8288, 0.7076, 0.1820, 0.0728, 0.8447, 0.2367, 0.3239, 0.6413, 0.3114, 0.4731, 0.1192, 0.9273, 0.5724, 0.4331, 0.5136, 0.3547,
654:                         0.4413, 0.2602, 0.5698, 0.7278, 0.5261, 0.6230, 0.2454, 0.3948, 0.7479, 0.6582, 0.4660, 0.5594, 0.7574, 0.1143, 0.5900, 0.1065, 0.4260, 0.3294, 0.8276, 0.0756};

656:   PetscScalar yr[64] = {0.7345, 0.9120, 0.9288, 0.7528, 0.4463, 0.4985, 0.2497, 0.6256, 0.3425, 0.9026, 0.6983, 0.4230, 0.7140, 0.2970, 0.4474, 0.8792, 0.6604, 0.2485, 0.7968, 0.6127, 0.1796, 0.2437,
657:                         0.5938, 0.6137, 0.3867, 0.5658, 0.4575, 0.1009, 0.0863, 0.3361, 0.0738, 0.3985, 0.6602, 0.1437, 0.0934, 0.5983, 0.5950, 0.0763, 0.0768, 0.2288, 0.5761, 0.1129, 0.3841, 0.6150,
658:                         0.6904, 0.6686, 0.1361, 0.4601, 0.4491, 0.3716, 0.1969, 0.6537, 0.6743, 0.6991, 0.4811, 0.5480, 0.1684, 0.4569, 0.6889, 0.8437, 0.3015, 0.2854, 0.8199, 0.2658};

660:   PetscScalar zr[64] = {0.7668, 0.8573, 0.2654, 0.2719, 0.1060, 0.1311, 0.6232, 0.2295, 0.8009, 0.2147, 0.2119, 0.9325, 0.4473, 0.3600, 0.3374, 0.3819, 0.4066, 0.5801, 0.1673, 0.0959, 0.4638, 0.8236,
661:                         0.8800, 0.2939, 0.2028, 0.8262, 0.2706, 0.6276, 0.9085, 0.6443, 0.8241, 0.0712, 0.1824, 0.7789, 0.4389, 0.8415, 0.7055, 0.6639, 0.3653, 0.2078, 0.1987, 0.2297, 0.4321, 0.8115,
662:                         0.4915, 0.7764, 0.4657, 0.4627, 0.4569, 0.4232, 0.8514, 0.0674, 0.3227, 0.1055, 0.6690, 0.6313, 0.9226, 0.5461, 0.4126, 0.2364, 0.6096, 0.7042, 0.3914, 0.0711};

664:   PetscMalloc1(user->mx, &x);
665:   PetscMalloc1(user->mx, &y);
666:   PetscMalloc1(user->mx, &z);
667:   user->jformed    = PETSC_FALSE;
668:   user->dsg_formed = PETSC_FALSE;

670:   n         = user->mx * user->mx * user->mx;
671:   m         = 3 * user->mx * user->mx * (user->mx - 1);
672:   sqrt_beta = PetscSqrtScalar(user->beta);

674:   user->ksp_its         = 0;
675:   user->ksp_its_initial = 0;

677:   stime = (PetscReal)user->nt / user->ns;
678:   PetscMalloc1(user->ns, &user->sample_times);
679:   for (i = 0; i < user->ns; i++) user->sample_times[i] = (PetscInt)(stime * ((PetscReal)i + 1.0) - 0.5);

681:   VecCreate(PETSC_COMM_WORLD, &XX);
682:   VecCreate(PETSC_COMM_WORLD, &user->q);
683:   VecSetSizes(XX, PETSC_DECIDE, n);
684:   VecSetSizes(user->q, PETSC_DECIDE, n * user->nt);
685:   VecSetFromOptions(XX);
686:   VecSetFromOptions(user->q);

688:   VecDuplicate(XX, &YY);
689:   VecDuplicate(XX, &ZZ);
690:   VecDuplicate(XX, &XXwork);
691:   VecDuplicate(XX, &YYwork);
692:   VecDuplicate(XX, &ZZwork);
693:   VecDuplicate(XX, &UTwork);
694:   VecDuplicate(XX, &user->utrue);
695:   VecDuplicate(XX, &bc);

697:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
698:   h        = 1.0 / user->mx;
699:   hinv     = user->mx;
700:   neg_hinv = -hinv;

702:   VecGetOwnershipRange(XX, &istart, &iend);
703:   for (linear_index = istart; linear_index < iend; linear_index++) {
704:     i  = linear_index % user->mx;
705:     j  = ((linear_index - i) / user->mx) % user->mx;
706:     k  = ((linear_index - i) / user->mx - j) / user->mx;
707:     vx = h * (i + 0.5);
708:     vy = h * (j + 0.5);
709:     vz = h * (k + 0.5);
710:     VecSetValues(XX, 1, &linear_index, &vx, INSERT_VALUES);
711:     VecSetValues(YY, 1, &linear_index, &vy, INSERT_VALUES);
712:     VecSetValues(ZZ, 1, &linear_index, &vz, INSERT_VALUES);
713:     if ((vx < 0.6) && (vx > 0.4) && (vy < 0.6) && (vy > 0.4) && (vy < 0.6) && (vz < 0.6) && (vz > 0.4)) {
714:       v = 1000.0;
715:       VecSetValues(bc, 1, &linear_index, &v, INSERT_VALUES);
716:     }
717:   }

719:   VecAssemblyBegin(XX);
720:   VecAssemblyEnd(XX);
721:   VecAssemblyBegin(YY);
722:   VecAssemblyEnd(YY);
723:   VecAssemblyBegin(ZZ);
724:   VecAssemblyEnd(ZZ);
725:   VecAssemblyBegin(bc);
726:   VecAssemblyEnd(bc);

728:   /* Compute true parameter function
729:      ut = 0.5 + exp(-10*((x-0.5)^2+(y-0.5)^2+(z-0.5)^2)) */
730:   VecCopy(XX, XXwork);
731:   VecCopy(YY, YYwork);
732:   VecCopy(ZZ, ZZwork);

734:   VecShift(XXwork, -0.5);
735:   VecShift(YYwork, -0.5);
736:   VecShift(ZZwork, -0.5);

738:   VecPointwiseMult(XXwork, XXwork, XXwork);
739:   VecPointwiseMult(YYwork, YYwork, YYwork);
740:   VecPointwiseMult(ZZwork, ZZwork, ZZwork);

742:   VecCopy(XXwork, user->utrue);
743:   VecAXPY(user->utrue, 1.0, YYwork);
744:   VecAXPY(user->utrue, 1.0, ZZwork);
745:   VecScale(user->utrue, -10.0);
746:   VecExp(user->utrue);
747:   VecShift(user->utrue, 0.5);

749:   VecDestroy(&XX);
750:   VecDestroy(&YY);
751:   VecDestroy(&ZZ);
752:   VecDestroy(&XXwork);
753:   VecDestroy(&YYwork);
754:   VecDestroy(&ZZwork);
755:   VecDestroy(&UTwork);

757:   /* Initial guess and reference model */
758:   VecDuplicate(user->utrue, &user->ur);
759:   VecSet(user->ur, 0.0);

761:   /* Generate Grad matrix */
762:   MatCreate(PETSC_COMM_WORLD, &user->Grad);
763:   MatSetSizes(user->Grad, PETSC_DECIDE, PETSC_DECIDE, m, n);
764:   MatSetFromOptions(user->Grad);
765:   MatMPIAIJSetPreallocation(user->Grad, 2, NULL, 2, NULL);
766:   MatSeqAIJSetPreallocation(user->Grad, 2, NULL);
767:   MatGetOwnershipRange(user->Grad, &istart, &iend);

769:   for (i = istart; i < iend; i++) {
770:     if (i < m / 3) {
771:       iblock = i / (user->mx - 1);
772:       j      = iblock * user->mx + (i % (user->mx - 1));
773:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
774:       j = j + 1;
775:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
776:     }
777:     if (i >= m / 3 && i < 2 * m / 3) {
778:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
779:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
780:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
781:       j = j + user->mx;
782:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
783:     }
784:     if (i >= 2 * m / 3) {
785:       j = i - 2 * m / 3;
786:       MatSetValues(user->Grad, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
787:       j = j + user->mx * user->mx;
788:       MatSetValues(user->Grad, 1, &i, 1, &j, &hinv, INSERT_VALUES);
789:     }
790:   }

792:   MatAssemblyBegin(user->Grad, MAT_FINAL_ASSEMBLY);
793:   MatAssemblyEnd(user->Grad, MAT_FINAL_ASSEMBLY);

795:   /* Generate arithmetic averaging matrix Av */
796:   MatCreate(PETSC_COMM_WORLD, &user->Av);
797:   MatSetSizes(user->Av, PETSC_DECIDE, PETSC_DECIDE, m, n);
798:   MatSetFromOptions(user->Av);
799:   MatMPIAIJSetPreallocation(user->Av, 2, NULL, 2, NULL);
800:   MatSeqAIJSetPreallocation(user->Av, 2, NULL);
801:   MatGetOwnershipRange(user->Av, &istart, &iend);

803:   for (i = istart; i < iend; i++) {
804:     if (i < m / 3) {
805:       iblock = i / (user->mx - 1);
806:       j      = iblock * user->mx + (i % (user->mx - 1));
807:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
808:       j = j + 1;
809:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
810:     }
811:     if (i >= m / 3 && i < 2 * m / 3) {
812:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
813:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
814:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
815:       j = j + user->mx;
816:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
817:     }
818:     if (i >= 2 * m / 3) {
819:       j = i - 2 * m / 3;
820:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
821:       j = j + user->mx * user->mx;
822:       MatSetValues(user->Av, 1, &i, 1, &j, &half, INSERT_VALUES);
823:     }
824:   }

826:   MatAssemblyBegin(user->Av, MAT_FINAL_ASSEMBLY);
827:   MatAssemblyEnd(user->Av, MAT_FINAL_ASSEMBLY);

829:   /* Generate transpose of averaging matrix Av */
830:   MatTranspose(user->Av, MAT_INITIAL_MATRIX, &user->AvT);

832:   MatCreate(PETSC_COMM_WORLD, &user->L);
833:   MatSetSizes(user->L, PETSC_DECIDE, PETSC_DECIDE, m + n, n);
834:   MatSetFromOptions(user->L);
835:   MatMPIAIJSetPreallocation(user->L, 2, NULL, 2, NULL);
836:   MatSeqAIJSetPreallocation(user->L, 2, NULL);
837:   MatGetOwnershipRange(user->L, &istart, &iend);

839:   for (i = istart; i < iend; i++) {
840:     if (i < m / 3) {
841:       iblock = i / (user->mx - 1);
842:       j      = iblock * user->mx + (i % (user->mx - 1));
843:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
844:       j = j + 1;
845:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
846:     }
847:     if (i >= m / 3 && i < 2 * m / 3) {
848:       iblock = (i - m / 3) / (user->mx * (user->mx - 1));
849:       j      = iblock * user->mx * user->mx + ((i - m / 3) % (user->mx * (user->mx - 1)));
850:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
851:       j = j + user->mx;
852:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
853:     }
854:     if (i >= 2 * m / 3 && i < m) {
855:       j = i - 2 * m / 3;
856:       MatSetValues(user->L, 1, &i, 1, &j, &neg_hinv, INSERT_VALUES);
857:       j = j + user->mx * user->mx;
858:       MatSetValues(user->L, 1, &i, 1, &j, &hinv, INSERT_VALUES);
859:     }
860:     if (i >= m) {
861:       j = i - m;
862:       MatSetValues(user->L, 1, &i, 1, &j, &sqrt_beta, INSERT_VALUES);
863:     }
864:   }

866:   MatAssemblyBegin(user->L, MAT_FINAL_ASSEMBLY);
867:   MatAssemblyEnd(user->L, MAT_FINAL_ASSEMBLY);

869:   MatScale(user->L, PetscPowScalar(h, 1.5));

871:   /* Generate Div matrix */
872:   MatTranspose(user->Grad, MAT_INITIAL_MATRIX, &user->Div);

874:   /* Build work vectors and matrices */
875:   VecCreate(PETSC_COMM_WORLD, &user->S);
876:   VecSetSizes(user->S, PETSC_DECIDE, user->mx * user->mx * (user->mx - 1) * 3);
877:   VecSetFromOptions(user->S);

879:   VecCreate(PETSC_COMM_WORLD, &user->lwork);
880:   VecSetSizes(user->lwork, PETSC_DECIDE, m + user->mx * user->mx * user->mx);
881:   VecSetFromOptions(user->lwork);

883:   MatDuplicate(user->Div, MAT_SHARE_NONZERO_PATTERN, &user->Divwork);
884:   MatDuplicate(user->Av, MAT_SHARE_NONZERO_PATTERN, &user->Avwork);

886:   VecCreate(PETSC_COMM_WORLD, &user->d);
887:   VecSetSizes(user->d, PETSC_DECIDE, user->ndata * user->nt);
888:   VecSetFromOptions(user->d);

890:   VecDuplicate(user->S, &user->Swork);
891:   VecDuplicate(user->S, &user->Av_u);
892:   VecDuplicate(user->S, &user->Twork);
893:   VecDuplicate(user->S, &user->Rwork);
894:   VecDuplicate(user->y, &user->ywork);
895:   VecDuplicate(user->u, &user->uwork);
896:   VecDuplicate(user->u, &user->js_diag);
897:   VecDuplicate(user->c, &user->cwork);
898:   VecDuplicate(user->d, &user->dwork);

900:   /* Create matrix-free shell user->Js for computing A*x */
901:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m * user->nt, user->m * user->nt, user, &user->Js);
902:   MatShellSetOperation(user->Js, MATOP_MULT, (void (*)(void))StateMatMult);
903:   MatShellSetOperation(user->Js, MATOP_DUPLICATE, (void (*)(void))StateMatDuplicate);
904:   MatShellSetOperation(user->Js, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatMultTranspose);
905:   MatShellSetOperation(user->Js, MATOP_GET_DIAGONAL, (void (*)(void))StateMatGetDiagonal);

907:   /* Diagonal blocks of user->Js */
908:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->JsBlock);
909:   MatShellSetOperation(user->JsBlock, MATOP_MULT, (void (*)(void))StateMatBlockMult);
910:   /* Blocks are symmetric */
911:   MatShellSetOperation(user->JsBlock, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockMult);

913:   /* Create a matrix-free shell user->JsBlockPrec for computing (U+D)\D*(L+D)\x, where JsBlock = L+D+U,
914:      D is diagonal, L is strictly lower triangular, and U is strictly upper triangular.
915:      This is an SSOR preconditioner for user->JsBlock. */
916:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m, user->m, user, &user->JsBlockPrec);
917:   MatShellSetOperation(user->JsBlockPrec, MATOP_MULT, (void (*)(void))StateMatBlockPrecMult);
918:   /* JsBlockPrec is symmetric */
919:   MatShellSetOperation(user->JsBlockPrec, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatBlockPrecMult);
920:   MatSetOption(user->JsBlockPrec, MAT_SYMMETRIC, PETSC_TRUE);
921:   MatSetOption(user->JsBlockPrec, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);

923:   /* Create a matrix-free shell user->Jd for computing B*x */
924:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m * user->nt, user->m, user, &user->Jd);
925:   MatShellSetOperation(user->Jd, MATOP_MULT, (void (*)(void))DesignMatMult);
926:   MatShellSetOperation(user->Jd, MATOP_MULT_TRANSPOSE, (void (*)(void))DesignMatMultTranspose);

928:   /* User-defined routines for computing user->Js\x and user->Js^T\x*/
929:   MatCreateShell(PETSC_COMM_WORLD, PETSC_DETERMINE, PETSC_DETERMINE, user->m * user->nt, user->m * user->nt, user, &user->JsInv);
930:   MatShellSetOperation(user->JsInv, MATOP_MULT, (void (*)(void))StateMatInvMult);
931:   MatShellSetOperation(user->JsInv, MATOP_MULT_TRANSPOSE, (void (*)(void))StateMatInvTransposeMult);

933:   /* Solver options and tolerances */
934:   KSPCreate(PETSC_COMM_WORLD, &user->solver);
935:   KSPSetType(user->solver, KSPCG);
936:   KSPSetOperators(user->solver, user->JsBlock, user->JsBlockPrec);
937:   KSPSetInitialGuessNonzero(user->solver, PETSC_FALSE);
938:   KSPSetTolerances(user->solver, 1e-4, 1e-20, 1e3, 500);
939:   KSPSetFromOptions(user->solver);
940:   KSPGetPC(user->solver, &user->prec);
941:   PCSetType(user->prec, PCSHELL);

943:   PCShellSetApply(user->prec, StateMatBlockPrecMult);
944:   PCShellSetApplyTranspose(user->prec, StateMatBlockPrecMult);
945:   PCShellSetContext(user->prec, user);

947:   /* Create scatter from y to y_1,y_2,...,y_nt */
948:   PetscMalloc1(user->nt * user->m, &user->yi_scatter);
949:   VecCreate(PETSC_COMM_WORLD, &yi);
950:   VecSetSizes(yi, PETSC_DECIDE, user->mx * user->mx * user->mx);
951:   VecSetFromOptions(yi);
952:   VecDuplicateVecs(yi, user->nt, &user->yi);
953:   VecDuplicateVecs(yi, user->nt, &user->yiwork);

955:   VecGetOwnershipRange(user->y, &lo2, &hi2);
956:   istart = 0;
957:   for (i = 0; i < user->nt; i++) {
958:     VecGetOwnershipRange(user->yi[i], &lo, &hi);
959:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_yi);
960:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_y);
961:     VecScatterCreate(user->y, is_from_y, user->yi[i], is_to_yi, &user->yi_scatter[i]);
962:     istart = istart + hi - lo;
963:     ISDestroy(&is_to_yi);
964:     ISDestroy(&is_from_y);
965:   }
966:   VecDestroy(&yi);

968:   /* Create scatter from d to d_1,d_2,...,d_ns */
969:   PetscMalloc1(user->ns * user->ndata, &user->di_scatter);
970:   VecCreate(PETSC_COMM_WORLD, &di);
971:   VecSetSizes(di, PETSC_DECIDE, user->ndata);
972:   VecSetFromOptions(di);
973:   VecDuplicateVecs(di, user->ns, &user->di);
974:   VecGetOwnershipRange(user->d, &lo2, &hi2);
975:   istart = 0;
976:   for (i = 0; i < user->ns; i++) {
977:     VecGetOwnershipRange(user->di[i], &lo, &hi);
978:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo, 1, &is_to_di);
979:     ISCreateStride(PETSC_COMM_SELF, hi - lo, lo2 + istart, 1, &is_from_d);
980:     VecScatterCreate(user->d, is_from_d, user->di[i], is_to_di, &user->di_scatter[i]);
981:     istart = istart + hi - lo;
982:     ISDestroy(&is_to_di);
983:     ISDestroy(&is_from_d);
984:   }
985:   VecDestroy(&di);

987:   /* Assemble RHS of forward problem */
988:   VecCopy(bc, user->yiwork[0]);
989:   for (i = 1; i < user->nt; i++) VecSet(user->yiwork[i], 0.0);
990:   Gather_i(user->q, user->yiwork, user->yi_scatter, user->nt);
991:   VecDestroy(&bc);

993:   /* Compute true state function yt given ut */
994:   VecCreate(PETSC_COMM_WORLD, &user->ytrue);
995:   VecSetSizes(user->ytrue, PETSC_DECIDE, n * user->nt);
996:   VecSetFromOptions(user->ytrue);

998:   /* First compute Av_u = Av*exp(-u) */
999:   VecSet(user->uwork, 0);
1000:   VecAXPY(user->uwork, -1.0, user->utrue); /*  Note: user->utrue */
1001:   VecExp(user->uwork);
1002:   MatMult(user->Av, user->uwork, user->Av_u);

1004:   /* Symbolic DSG = Div * Grad */
1005:   MatProductCreate(user->Div, user->Grad, NULL, &user->DSG);
1006:   MatProductSetType(user->DSG, MATPRODUCT_AB);
1007:   MatProductSetAlgorithm(user->DSG, "default");
1008:   MatProductSetFill(user->DSG, PETSC_DEFAULT);
1009:   MatProductSetFromOptions(user->DSG);
1010:   MatProductSymbolic(user->DSG);

1012:   user->dsg_formed = PETSC_TRUE;

1014:   /* Next form DSG = Div*Grad */
1015:   MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
1016:   MatDiagonalScale(user->Divwork, NULL, user->Av_u);
1017:   if (user->dsg_formed) {
1018:     MatProductNumeric(user->DSG);
1019:   } else {
1020:     MatMatMult(user->Div, user->Grad, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &user->DSG);
1021:     user->dsg_formed = PETSC_TRUE;
1022:   }
1023:   /* B = speye(nx^3) + ht*DSG; */
1024:   MatScale(user->DSG, user->ht);
1025:   MatShift(user->DSG, 1.0);

1027:   /* Now solve for ytrue */
1028:   StateMatInvMult(user->Js, user->q, user->ytrue);

1030:   /* Initial guess y0 for state given u0 */

1032:   /* First compute Av_u = Av*exp(-u) */
1033:   VecSet(user->uwork, 0);
1034:   VecAXPY(user->uwork, -1.0, user->u); /*  Note: user->u */
1035:   VecExp(user->uwork);
1036:   MatMult(user->Av, user->uwork, user->Av_u);

1038:   /* Next form DSG = Div*S*Grad */
1039:   MatCopy(user->Div, user->Divwork, SAME_NONZERO_PATTERN);
1040:   MatDiagonalScale(user->Divwork, NULL, user->Av_u);
1041:   if (user->dsg_formed) {
1042:     MatProductNumeric(user->DSG);
1043:   } else {
1044:     MatMatMult(user->Div, user->Grad, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &user->DSG);

1046:     user->dsg_formed = PETSC_TRUE;
1047:   }
1048:   /* B = speye(nx^3) + ht*DSG; */
1049:   MatScale(user->DSG, user->ht);
1050:   MatShift(user->DSG, 1.0);

1052:   /* Now solve for y */
1053:   StateMatInvMult(user->Js, user->q, user->y);

1055:   /* Construct projection matrix Q, a block diagonal matrix consisting of nt copies of Qblock along the diagonal */
1056:   MatCreate(PETSC_COMM_WORLD, &user->Qblock);
1057:   MatSetSizes(user->Qblock, PETSC_DECIDE, PETSC_DECIDE, user->ndata, n);
1058:   MatSetFromOptions(user->Qblock);
1059:   MatMPIAIJSetPreallocation(user->Qblock, 8, NULL, 8, NULL);
1060:   MatSeqAIJSetPreallocation(user->Qblock, 8, NULL);

1062:   for (i = 0; i < user->mx; i++) {
1063:     x[i] = h * (i + 0.5);
1064:     y[i] = h * (i + 0.5);
1065:     z[i] = h * (i + 0.5);
1066:   }

1068:   MatGetOwnershipRange(user->Qblock, &istart, &iend);
1069:   nx = user->mx;
1070:   ny = user->mx;
1071:   nz = user->mx;
1072:   for (i = istart; i < iend; i++) {
1073:     xri = xr[i];
1074:     im  = 0;
1075:     xim = x[im];
1076:     while (xri > xim && im < nx) {
1077:       im  = im + 1;
1078:       xim = x[im];
1079:     }
1080:     indx1 = im - 1;
1081:     indx2 = im;
1082:     dx1   = xri - x[indx1];
1083:     dx2   = x[indx2] - xri;

1085:     yri = yr[i];
1086:     im  = 0;
1087:     yim = y[im];
1088:     while (yri > yim && im < ny) {
1089:       im  = im + 1;
1090:       yim = y[im];
1091:     }
1092:     indy1 = im - 1;
1093:     indy2 = im;
1094:     dy1   = yri - y[indy1];
1095:     dy2   = y[indy2] - yri;

1097:     zri = zr[i];
1098:     im  = 0;
1099:     zim = z[im];
1100:     while (zri > zim && im < nz) {
1101:       im  = im + 1;
1102:       zim = z[im];
1103:     }
1104:     indz1 = im - 1;
1105:     indz2 = im;
1106:     dz1   = zri - z[indz1];
1107:     dz2   = z[indz2] - zri;

1109:     Dx = x[indx2] - x[indx1];
1110:     Dy = y[indy2] - y[indy1];
1111:     Dz = z[indz2] - z[indz1];

1113:     j = indx1 + indy1 * nx + indz1 * nx * ny;
1114:     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1115:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1117:     j = indx1 + indy1 * nx + indz2 * nx * ny;
1118:     v = (1 - dx1 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1119:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1121:     j = indx1 + indy2 * nx + indz1 * nx * ny;
1122:     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1123:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1125:     j = indx1 + indy2 * nx + indz2 * nx * ny;
1126:     v = (1 - dx1 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1127:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1129:     j = indx2 + indy1 * nx + indz1 * nx * ny;
1130:     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz1 / Dz);
1131:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1133:     j = indx2 + indy1 * nx + indz2 * nx * ny;
1134:     v = (1 - dx2 / Dx) * (1 - dy1 / Dy) * (1 - dz2 / Dz);
1135:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1137:     j = indx2 + indy2 * nx + indz1 * nx * ny;
1138:     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz1 / Dz);
1139:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);

1141:     j = indx2 + indy2 * nx + indz2 * nx * ny;
1142:     v = (1 - dx2 / Dx) * (1 - dy2 / Dy) * (1 - dz2 / Dz);
1143:     MatSetValues(user->Qblock, 1, &i, 1, &j, &v, INSERT_VALUES);
1144:   }
1145:   MatAssemblyBegin(user->Qblock, MAT_FINAL_ASSEMBLY);
1146:   MatAssemblyEnd(user->Qblock, MAT_FINAL_ASSEMBLY);

1148:   MatTranspose(user->Qblock, MAT_INITIAL_MATRIX, &user->QblockT);
1149:   MatTranspose(user->L, MAT_INITIAL_MATRIX, &user->LT);

1151:   /* Add noise to the measurement data */
1152:   VecSet(user->ywork, 1.0);
1153:   VecAYPX(user->ywork, user->noise, user->ytrue);
1154:   Scatter_i(user->ywork, user->yiwork, user->yi_scatter, user->nt);
1155:   for (j = 0; j < user->ns; j++) {
1156:     i = user->sample_times[j];
1157:     MatMult(user->Qblock, user->yiwork[i], user->di[j]);
1158:   }
1159:   Gather_i(user->d, user->di, user->di_scatter, user->ns);

1161:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1162:   KSPSetFromOptions(user->solver);
1163:   PetscFree(x);
1164:   PetscFree(y);
1165:   PetscFree(z);
1166:   return 0;
1167: }

1169: PetscErrorCode ParabolicDestroy(AppCtx *user)
1170: {
1171:   PetscInt i;

1173:   MatDestroy(&user->Qblock);
1174:   MatDestroy(&user->QblockT);
1175:   MatDestroy(&user->Div);
1176:   MatDestroy(&user->Divwork);
1177:   MatDestroy(&user->Grad);
1178:   MatDestroy(&user->Av);
1179:   MatDestroy(&user->Avwork);
1180:   MatDestroy(&user->AvT);
1181:   MatDestroy(&user->DSG);
1182:   MatDestroy(&user->L);
1183:   MatDestroy(&user->LT);
1184:   KSPDestroy(&user->solver);
1185:   MatDestroy(&user->Js);
1186:   MatDestroy(&user->Jd);
1187:   MatDestroy(&user->JsInv);
1188:   MatDestroy(&user->JsBlock);
1189:   MatDestroy(&user->JsBlockPrec);
1190:   VecDestroy(&user->u);
1191:   VecDestroy(&user->uwork);
1192:   VecDestroy(&user->utrue);
1193:   VecDestroy(&user->y);
1194:   VecDestroy(&user->ywork);
1195:   VecDestroy(&user->ytrue);
1196:   VecDestroyVecs(user->nt, &user->yi);
1197:   VecDestroyVecs(user->nt, &user->yiwork);
1198:   VecDestroyVecs(user->ns, &user->di);
1199:   PetscFree(user->yi);
1200:   PetscFree(user->yiwork);
1201:   PetscFree(user->di);
1202:   VecDestroy(&user->c);
1203:   VecDestroy(&user->cwork);
1204:   VecDestroy(&user->ur);
1205:   VecDestroy(&user->q);
1206:   VecDestroy(&user->d);
1207:   VecDestroy(&user->dwork);
1208:   VecDestroy(&user->lwork);
1209:   VecDestroy(&user->S);
1210:   VecDestroy(&user->Swork);
1211:   VecDestroy(&user->Av_u);
1212:   VecDestroy(&user->Twork);
1213:   VecDestroy(&user->Rwork);
1214:   VecDestroy(&user->js_diag);
1215:   ISDestroy(&user->s_is);
1216:   ISDestroy(&user->d_is);
1217:   VecScatterDestroy(&user->state_scatter);
1218:   VecScatterDestroy(&user->design_scatter);
1219:   for (i = 0; i < user->nt; i++) VecScatterDestroy(&user->yi_scatter[i]);
1220:   for (i = 0; i < user->ns; i++) VecScatterDestroy(&user->di_scatter[i]);
1221:   PetscFree(user->yi_scatter);
1222:   PetscFree(user->di_scatter);
1223:   PetscFree(user->sample_times);
1224:   return 0;
1225: }

1227: PetscErrorCode ParabolicMonitor(Tao tao, void *ptr)
1228: {
1229:   Vec       X;
1230:   PetscReal unorm, ynorm;
1231:   AppCtx   *user = (AppCtx *)ptr;

1233:   TaoGetSolution(tao, &X);
1234:   Scatter(X, user->ywork, user->state_scatter, user->uwork, user->design_scatter);
1235:   VecAXPY(user->ywork, -1.0, user->ytrue);
1236:   VecAXPY(user->uwork, -1.0, user->utrue);
1237:   VecNorm(user->uwork, NORM_2, &unorm);
1238:   VecNorm(user->ywork, NORM_2, &ynorm);
1239:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n", (double)unorm, (double)ynorm);
1240:   return 0;
1241: }

1243: /*TEST

1245:    build:
1246:       requires: !complex

1248:    test:
1249:       args: -tao_cmonitor -tao_type lcl -ns 1 -tao_gatol 1.e-4 -ksp_max_it 30
1250:       requires: !single

1252: TEST*/