Actual source code: ex74.c
  1: static char help[] = "Solves the constant-coefficient 1D heat equation \n\
  2: with an Implicit Runge-Kutta method using MatKAIJ.                  \n\
  3:                                                                     \n\
  4:     du      d^2 u                                                   \n\
  5:     --  = a ----- ; 0 <= x <= 1;                                    \n\
  6:     dt      dx^2                                                    \n\
  7:                                                                     \n\
  8:   with periodic boundary conditions                                 \n\
  9:                                                                     \n\
 10: 2nd order central discretization in space:                          \n\
 11:                                                                     \n\
 12:    [ d^2 u ]     u_{i+1} - 2u_i + u_{i-1}                           \n\
 13:    [ ----- ]  =  ------------------------                           \n\
 14:    [ dx^2  ]i              h^2                                      \n\
 15:                                                                     \n\
 16:     i = grid index;    h = x_{i+1}-x_i (Uniform)                    \n\
 17:     0 <= i < n         h = 1.0/n                                    \n\
 18:                                                                     \n\
 19: Thus,                                                               \n\
 20:                                                                     \n\
 21:    du                                                               \n\
 22:    --  = Ju;  J = (a/h^2) tridiagonal(1,-2,1)_n                     \n\
 23:    dt                                                               \n\
 24:                                                                     \n\
 25: Implicit Runge-Kutta method:                                        \n\
 26:                                                                     \n\
 27:   U^(k)   = u^n + dt \\sum_i a_{ki} JU^{i}                          \n\
 28:   u^{n+1} = u^n + dt \\sum_i b_i JU^{i}                             \n\
 29:                                                                     \n\
 30:   i = 1,...,s (s -> number of stages)                               \n\
 31:                                                                     \n\
 32: At each time step, we solve                                         \n\
 33:                                                                     \n\
 34:  [  1                                  ]     1                      \n\
 35:  [ -- I \\otimes A^{-1} - J \\otimes I ] U = -- u^n \\otimes A^{-1} \n\
 36:  [ dt                                  ]     dt                     \n\
 37:                                                                     \n\
 38:   where A is the Butcher tableaux of the implicit                   \n\
 39:   Runge-Kutta method,                                               \n\
 40:                                                                     \n\
 41: with MATKAIJ and KSP.                                               \n\
 42:                                                                     \n\
 43: Available IRK Methods:                                              \n\
 44:   gauss       n-stage Gauss method                                  \n\
 45:                                                                     \n";

 47: /*
 48:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 49:   automatically includes:
 50:   petscsys.h      - base PETSc routines
 51:   petscvec.h      - vectors
 52:   petscmat.h      - matrices
 53:   petscis.h       - index sets
 54:   petscviewer.h   - viewers
 55:   petscpc.h       - preconditioners
 56: */
 57: #include <petscksp.h>
 58: #include <petscdt.h>

 60: /* define the IRK methods available */
 61: #define IRKGAUSS "gauss"

 63: typedef enum {
 64:   PHYSICS_DIFFUSION,
 65:   PHYSICS_ADVECTION
 66: } PhysicsType;
 67: const char *const PhysicsTypes[] = {"DIFFUSION", "ADVECTION", "PhysicsType", "PHYSICS_", NULL};

 69: typedef struct __context__ {
 70:   PetscReal   a;          /* diffusion coefficient      */
 71:   PetscReal   xmin, xmax; /* domain bounds              */
 72:   PetscInt    imax;       /* number of grid points      */
 73:   PetscInt    niter;      /* number of time iterations  */
 74:   PetscReal   dt;         /* time step size             */
 75:   PhysicsType physics_type;
 76: } UserContext;

 78: static PetscErrorCode ExactSolution(Vec, void *, PetscReal);
 79: static PetscErrorCode RKCreate_Gauss(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
 80: static PetscErrorCode Assemble_AdvDiff(MPI_Comm, UserContext *, Mat *);

 82: #include <petsc/private/kernels/blockinvert.h>

 84: int main(int argc, char **argv)
 85: {
 86:   Vec               u, uex, rhs, z;
 87:   UserContext       ctxt;
 88:   PetscInt          nstages, is, ie, matis, matie, *ix, *ix2;
 89:   PetscInt          n, i, s, t, total_its;
 90:   PetscScalar      *A, *B, *At, *b, *zvals, one = 1.0;
 91:   PetscReal        *c, err, time;
 92:   Mat               Identity, J, TA, SC, R;
 93:   KSP               ksp;
 94:   PetscFunctionList IRKList      = NULL;
 95:   char              irktype[256] = IRKGAUSS;

 97:   PetscFunctionBeginUser;
 98:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 99:   PetscCall(PetscFunctionListAdd(&IRKList, IRKGAUSS, RKCreate_Gauss));

101:   /* default value */
102:   ctxt.a            = 1.0;
103:   ctxt.xmin         = 0.0;
104:   ctxt.xmax         = 1.0;
105:   ctxt.imax         = 20;
106:   ctxt.niter        = 0;
107:   ctxt.dt           = 0.0;
108:   ctxt.physics_type = PHYSICS_DIFFUSION;

110:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "IRK options", "");
111:   PetscCall(PetscOptionsReal("-a", "diffusion coefficient", "<1.0>", ctxt.a, &ctxt.a, NULL));
112:   PetscCall(PetscOptionsInt("-imax", "grid size", "<20>", ctxt.imax, &ctxt.imax, NULL));
113:   PetscCall(PetscOptionsReal("-xmin", "xmin", "<0.0>", ctxt.xmin, &ctxt.xmin, NULL));
114:   PetscCall(PetscOptionsReal("-xmax", "xmax", "<1.0>", ctxt.xmax, &ctxt.xmax, NULL));
115:   PetscCall(PetscOptionsInt("-niter", "number of time steps", "<0>", ctxt.niter, &ctxt.niter, NULL));
116:   PetscCall(PetscOptionsReal("-dt", "time step size", "<0.0>", ctxt.dt, &ctxt.dt, NULL));
117:   PetscCall(PetscOptionsFList("-irk_type", "IRK method family", "", IRKList, irktype, irktype, sizeof(irktype), NULL));
118:   nstages = 2;
119:   PetscCall(PetscOptionsInt("-irk_nstages", "Number of stages in IRK method", "", nstages, &nstages, NULL));
120:   PetscCall(PetscOptionsEnum("-physics_type", "Type of process to discretize", "", PhysicsTypes, (PetscEnum)ctxt.physics_type, (PetscEnum *)&ctxt.physics_type, NULL));
121:   PetscOptionsEnd();

123:   /* allocate and initialize solution vector and exact solution */
124:   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
125:   PetscCall(VecSetSizes(u, PETSC_DECIDE, ctxt.imax));
126:   PetscCall(VecSetFromOptions(u));
127:   PetscCall(VecDuplicate(u, &uex));
128:   /* initial solution */
129:   PetscCall(ExactSolution(u, &ctxt, 0.0));
130:   /* exact   solution */
131:   PetscCall(ExactSolution(uex, &ctxt, ctxt.dt * ctxt.niter));

133:   { /* Create A,b,c */
134:     PetscErrorCode (*irkcreate)(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
135:     PetscCall(PetscFunctionListFind(IRKList, irktype, &irkcreate));
136:     PetscCall((*irkcreate)(nstages, &A, &b, &c));
137:   }
138:   { /* Invert A */
139:     /* PETSc does not provide a routine to calculate the inverse of a general matrix.
140:      * To get the inverse of A, we form a sequential BAIJ matrix from it, consisting of a single block with block size
141:      * equal to the dimension of A, and then use MatInvertBlockDiagonal(). */
142:     Mat                A_baij;
143:     PetscInt           idxm[1] = {0}, idxn[1] = {0};
144:     const PetscScalar *A_inv;
145:     PetscCall(MatCreateSeqBAIJ(PETSC_COMM_SELF, nstages, nstages, nstages, 1, NULL, &A_baij));
146:     PetscCall(MatSetOption(A_baij, MAT_ROW_ORIENTED, PETSC_FALSE));
147:     PetscCall(MatSetValuesBlocked(A_baij, 1, idxm, 1, idxn, A, INSERT_VALUES));
148:     PetscCall(MatAssemblyBegin(A_baij, MAT_FINAL_ASSEMBLY));
149:     PetscCall(MatAssemblyEnd(A_baij, MAT_FINAL_ASSEMBLY));
150:     PetscCall(MatInvertBlockDiagonal(A_baij, &A_inv));
151:     PetscCall(PetscMemcpy(A, A_inv, nstages * nstages * sizeof(PetscScalar)));
152:     PetscCall(MatDestroy(&A_baij));
153:   }
154:   /* Scale (1/dt)*A^{-1} and (1/dt)*b */
155:   for (s = 0; s < nstages * nstages; s++) A[s] *= 1.0 / ctxt.dt;
156:   for (s = 0; s < nstages; s++) b[s] *= (-ctxt.dt);

158:   /* Compute row sums At and identity B */
159:   PetscCall(PetscMalloc2(nstages, &At, PetscSqr(nstages), &B));
160:   for (s = 0; s < nstages; s++) {
161:     At[s] = 0;
162:     for (t = 0; t < nstages; t++) {
163:       At[s] += A[s + nstages * t];        /* Row sums of  */
164:       B[s + nstages * t] = 1. * (s == t); /* identity */
165:     }
166:   }

168:   /* allocate and calculate the (-J) matrix */
169:   switch (ctxt.physics_type) {
170:   case PHYSICS_ADVECTION:
171:   case PHYSICS_DIFFUSION:
172:     PetscCall(Assemble_AdvDiff(PETSC_COMM_WORLD, &ctxt, &J));
173:   }
174:   PetscCall(MatCreate(PETSC_COMM_WORLD, &Identity));
175:   PetscCall(MatSetType(Identity, MATAIJ));
176:   PetscCall(MatGetOwnershipRange(J, &matis, &matie));
177:   PetscCall(MatSetSizes(Identity, matie - matis, matie - matis, ctxt.imax, ctxt.imax));
178:   PetscCall(MatSetUp(Identity));
179:   for (i = matis; i < matie; i++) PetscCall(MatSetValues(Identity, 1, &i, 1, &i, &one, INSERT_VALUES));
180:   PetscCall(MatAssemblyBegin(Identity, MAT_FINAL_ASSEMBLY));
181:   PetscCall(MatAssemblyEnd(Identity, MAT_FINAL_ASSEMBLY));

183:   /* Create the KAIJ matrix for solving the stages */
184:   PetscCall(MatCreateKAIJ(J, nstages, nstages, A, B, &TA));

186:   /* Create the KAIJ matrix for step completion */
187:   PetscCall(MatCreateKAIJ(J, 1, nstages, NULL, b, &SC));

189:   /* Create the KAIJ matrix to create the R for solving the stages */
190:   PetscCall(MatCreateKAIJ(Identity, nstages, 1, NULL, At, &R));

192:   /* Create and set options for KSP */
193:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
194:   PetscCall(KSPSetOperators(ksp, TA, TA));
195:   PetscCall(KSPSetFromOptions(ksp));

197:   /* Allocate work and right-hand-side vectors */
198:   PetscCall(VecCreate(PETSC_COMM_WORLD, &z));
199:   PetscCall(VecSetFromOptions(z));
200:   PetscCall(VecSetSizes(z, PETSC_DECIDE, ctxt.imax * nstages));
201:   PetscCall(VecDuplicate(z, &rhs));

203:   PetscCall(VecGetOwnershipRange(u, &is, &ie));
204:   PetscCall(PetscMalloc3(nstages, &ix, nstages, &zvals, ie - is, &ix2));
205:   /* iterate in time */
206:   for (n = 0, time = 0., total_its = 0; n < ctxt.niter; n++) {
207:     PetscInt its;

209:     /* compute and set the right-hand side */
210:     PetscCall(MatMult(R, u, rhs));

212:     /* Solve the system */
213:     PetscCall(KSPSolve(ksp, rhs, z));
214:     PetscCall(KSPGetIterationNumber(ksp, &its));
215:     total_its += its;

217:     /* Update the solution */
218:     PetscCall(MatMultAdd(SC, z, u, u));

220:     /* time step complete */
221:     time += ctxt.dt;
222:   }
223:   PetscCall(PetscFree3(ix, ix2, zvals));

225:   /* Deallocate work and right-hand-side vectors */
226:   PetscCall(VecDestroy(&z));
227:   PetscCall(VecDestroy(&rhs));

229:   /* Calculate error in final solution */
230:   PetscCall(VecAYPX(uex, -1.0, u));
231:   PetscCall(VecNorm(uex, NORM_2, &err));
232:   err = PetscSqrtReal(err * err / ((PetscReal)ctxt.imax));
233:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "L2 norm of the numerical error = %g (time=%g)\n", (double)err, (double)time));
234:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of time steps: %" PetscInt_FMT " (%" PetscInt_FMT " Krylov iterations)\n", ctxt.niter, total_its));

236:   /* Free up memory */
237:   PetscCall(KSPDestroy(&ksp));
238:   PetscCall(MatDestroy(&TA));
239:   PetscCall(MatDestroy(&SC));
240:   PetscCall(MatDestroy(&R));
241:   PetscCall(MatDestroy(&J));
242:   PetscCall(MatDestroy(&Identity));
243:   PetscCall(PetscFree3(A, b, c));
244:   PetscCall(PetscFree2(At, B));
245:   PetscCall(VecDestroy(&uex));
246:   PetscCall(VecDestroy(&u));
247:   PetscCall(PetscFunctionListDestroy(&IRKList));

249:   PetscCall(PetscFinalize());
250:   return 0;
251: }

253: PetscErrorCode ExactSolution(Vec u, void *c, PetscReal t)
254: {
255:   UserContext *ctxt = (UserContext *)c;
256:   PetscInt     i, is, ie;
257:   PetscScalar *uarr;
258:   PetscReal    x, dx, a = ctxt->a, pi = PETSC_PI;

260:   PetscFunctionBegin;
261:   dx = (ctxt->xmax - ctxt->xmin) / ((PetscReal)ctxt->imax);
262:   PetscCall(VecGetOwnershipRange(u, &is, &ie));
263:   PetscCall(VecGetArray(u, &uarr));
264:   for (i = is; i < ie; i++) {
265:     x = i * dx;
266:     switch (ctxt->physics_type) {
267:     case PHYSICS_DIFFUSION:
268:       uarr[i - is] = PetscExpScalar(-4.0 * pi * pi * a * t) * PetscSinScalar(2 * pi * x);
269:       break;
270:     case PHYSICS_ADVECTION:
271:       uarr[i - is] = PetscSinScalar(2 * pi * (x - a * t));
272:       break;
273:     default:
274:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[ctxt->physics_type]);
275:     }
276:   }
277:   PetscCall(VecRestoreArray(u, &uarr));
278:   PetscFunctionReturn(PETSC_SUCCESS);
279: }

281: /* Arrays should be freed with PetscFree3(A,b,c) */
282: static PetscErrorCode RKCreate_Gauss(PetscInt nstages, PetscScalar **gauss_A, PetscScalar **gauss_b, PetscReal **gauss_c)
283: {
284:   PetscScalar *A, *G0, *G1;
285:   PetscReal   *b, *c;
286:   PetscInt     i, j;
287:   Mat          G0mat, G1mat, Amat;

289:   PetscFunctionBegin;
290:   PetscCall(PetscMalloc3(PetscSqr(nstages), &A, nstages, gauss_b, nstages, &c));
291:   PetscCall(PetscMalloc3(nstages, &b, PetscSqr(nstages), &G0, PetscSqr(nstages), &G1));
292:   PetscCall(PetscDTGaussQuadrature(nstages, 0., 1., c, b));
293:   for (i = 0; i < nstages; i++) (*gauss_b)[i] = b[i]; /* copy to possibly-complex array */

295:   /* A^T = G0^{-1} G1 */
296:   for (i = 0; i < nstages; i++) {
297:     for (j = 0; j < nstages; j++) {
298:       G0[i * nstages + j] = PetscPowRealInt(c[i], j);
299:       G1[i * nstages + j] = PetscPowRealInt(c[i], j + 1) / (j + 1);
300:     }
301:   }
302:   /* The arrays above are row-aligned, but we create dense matrices as the transpose */
303:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G0, &G0mat));
304:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G1, &G1mat));
305:   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, A, &Amat));
306:   PetscCall(MatLUFactor(G0mat, NULL, NULL, NULL));
307:   PetscCall(MatMatSolve(G0mat, G1mat, Amat));
308:   PetscCall(MatTranspose(Amat, MAT_INPLACE_MATRIX, &Amat));

310:   PetscCall(MatDestroy(&G0mat));
311:   PetscCall(MatDestroy(&G1mat));
312:   PetscCall(MatDestroy(&Amat));
313:   PetscCall(PetscFree3(b, G0, G1));
314:   *gauss_A = A;
315:   *gauss_c = c;
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: static PetscErrorCode Assemble_AdvDiff(MPI_Comm comm, UserContext *user, Mat *J)
320: {
321:   PetscInt  matis, matie, i;
322:   PetscReal dx, dx2;

324:   PetscFunctionBegin;
325:   dx  = (user->xmax - user->xmin) / ((PetscReal)user->imax);
326:   dx2 = dx * dx;
327:   PetscCall(MatCreate(comm, J));
328:   PetscCall(MatSetType(*J, MATAIJ));
329:   PetscCall(MatSetSizes(*J, PETSC_DECIDE, PETSC_DECIDE, user->imax, user->imax));
330:   PetscCall(MatSetUp(*J));
331:   PetscCall(MatGetOwnershipRange(*J, &matis, &matie));
332:   for (i = matis; i < matie; i++) {
333:     PetscScalar values[3];
334:     PetscInt    col[3];
335:     switch (user->physics_type) {
336:     case PHYSICS_DIFFUSION:
337:       values[0] = -user->a * 1.0 / dx2;
338:       values[1] = user->a * 2.0 / dx2;
339:       values[2] = -user->a * 1.0 / dx2;
340:       break;
341:     case PHYSICS_ADVECTION:
342:       values[0] = -user->a * .5 / dx;
343:       values[1] = 0.;
344:       values[2] = user->a * .5 / dx;
345:       break;
346:     default:
347:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[user->physics_type]);
348:     }
349:     /* periodic boundaries */
350:     if (i == 0) {
351:       col[0] = user->imax - 1;
352:       col[1] = i;
353:       col[2] = i + 1;
354:     } else if (i == user->imax - 1) {
355:       col[0] = i - 1;
356:       col[1] = i;
357:       col[2] = 0;
358:     } else {
359:       col[0] = i - 1;
360:       col[1] = i;
361:       col[2] = i + 1;
362:     }
363:     PetscCall(MatSetValues(*J, 1, &i, 3, col, values, INSERT_VALUES));
364:   }
365:   PetscCall(MatAssemblyBegin(*J, MAT_FINAL_ASSEMBLY));
366:   PetscCall(MatAssemblyEnd(*J, MAT_FINAL_ASSEMBLY));
367:   PetscFunctionReturn(PETSC_SUCCESS);
368: }

370: /*TEST
371:  testset:
372:    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -irk_type gauss -irk_nstages 2
373:    test:
374:      suffix: 1
375:      requires: !single
376:      args: -ksp_atol 1e-6 -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
377:    test:
378:      requires: hpddm !single
379:      suffix: hpddm
380:      output_file: output/ex74_1.out
381:      args: -ksp_atol 1e-6 -ksp_type hpddm
382:    test:
383:      requires: hpddm
384:      suffix: hpddm_gcrodr
385:      output_file: output/ex74_1_hpddm.out
386:      args: -ksp_atol 1e-4 -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 2
387:  test:
388:    suffix: 2
389:    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100
390:  testset:
391:    suffix: 3
392:    requires: !single
393:    args: -a 1 -dt .33 -niter 3 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100 -physics_type advection
394:    test:
395:      args: -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
396:    test:
397:      requires: hpddm
398:      suffix: hpddm
399:      output_file: output/ex74_3.out
400:      args: -ksp_type hpddm
401:    test:
402:      requires: hpddm
403:      suffix: hpddm_gcrodr
404:      output_file: output/ex74_3_hpddm.out
405:      args: -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 5

407: TEST*/