Actual source code: ex74.c
1: static char help[] = "Solves the constant-coefficient 1D heat equation \n\
2: with an Implicit Runge-Kutta method using MatKAIJ. \n\
3: \n\
4: du d^2 u \n\
5: -- = a ----- ; 0 <= x <= 1; \n\
6: dt dx^2 \n\
7: \n\
8: with periodic boundary conditions \n\
9: \n\
10: 2nd order central discretization in space: \n\
11: \n\
12: [ d^2 u ] u_{i+1} - 2u_i + u_{i-1} \n\
13: [ ----- ] = ------------------------ \n\
14: [ dx^2 ]i h^2 \n\
15: \n\
16: i = grid index; h = x_{i+1}-x_i (Uniform) \n\
17: 0 <= i < n h = 1.0/n \n\
18: \n\
19: Thus, \n\
20: \n\
21: du \n\
22: -- = Ju; J = (a/h^2) tridiagonal(1,-2,1)_n \n\
23: dt \n\
24: \n\
25: Implicit Runge-Kutta method: \n\
26: \n\
27: U^(k) = u^n + dt \\sum_i a_{ki} JU^{i} \n\
28: u^{n+1} = u^n + dt \\sum_i b_i JU^{i} \n\
29: \n\
30: i = 1,...,s (s -> number of stages) \n\
31: \n\
32: At each time step, we solve \n\
33: \n\
34: [ 1 ] 1 \n\
35: [ -- I \\otimes A^{-1} - J \\otimes I ] U = -- u^n \\otimes A^{-1} \n\
36: [ dt ] dt \n\
37: \n\
38: where A is the Butcher tableaux of the implicit \n\
39: Runge-Kutta method, \n\
40: \n\
41: with MATKAIJ and KSP. \n\
42: \n\
43: Available IRK Methods: \n\
44: gauss n-stage Gauss method \n\
45: \n";
47: /*
48: Include "petscksp.h" so that we can use KSP solvers. Note that this file
49: automatically includes:
50: petscsys.h - base PETSc routines
51: petscvec.h - vectors
52: petscmat.h - matrices
53: petscis.h - index sets
54: petscviewer.h - viewers
55: petscpc.h - preconditioners
56: */
57: #include <petscksp.h>
58: #include <petscdt.h>
60: /* define the IRK methods available */
61: #define IRKGAUSS "gauss"
63: typedef enum {
64: PHYSICS_DIFFUSION,
65: PHYSICS_ADVECTION
66: } PhysicsType;
67: const char *const PhysicsTypes[] = {"DIFFUSION", "ADVECTION", "PhysicsType", "PHYSICS_", NULL};
69: typedef struct __context__ {
70: PetscReal a; /* diffusion coefficient */
71: PetscReal xmin, xmax; /* domain bounds */
72: PetscInt imax; /* number of grid points */
73: PetscInt niter; /* number of time iterations */
74: PetscReal dt; /* time step size */
75: PhysicsType physics_type;
76: } UserContext;
78: static PetscErrorCode ExactSolution(Vec, void *, PetscReal);
79: static PetscErrorCode RKCreate_Gauss(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
80: static PetscErrorCode Assemble_AdvDiff(MPI_Comm, UserContext *, Mat *);
82: int main(int argc, char **argv)
83: {
84: Vec u, uex, rhs, z;
85: UserContext ctxt;
86: PetscInt nstages, is, ie, matis, matie, *ix, *ix2;
87: PetscInt n, i, s, t, total_its;
88: PetscScalar *A, *B, *At, *b, *zvals, one = 1.0;
89: PetscReal *c, err, time;
90: Mat Identity, J, TA, SC, R;
91: KSP ksp;
92: PetscFunctionList IRKList = NULL;
93: char irktype[256] = IRKGAUSS;
95: PetscFunctionBeginUser;
96: PetscCall(PetscInitialize(&argc, &argv, NULL, help));
97: PetscCall(PetscFunctionListAdd(&IRKList, IRKGAUSS, RKCreate_Gauss));
99: /* default value */
100: ctxt.a = 1.0;
101: ctxt.xmin = 0.0;
102: ctxt.xmax = 1.0;
103: ctxt.imax = 20;
104: ctxt.niter = 0;
105: ctxt.dt = 0.0;
106: ctxt.physics_type = PHYSICS_DIFFUSION;
108: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "IRK options", "");
109: PetscCall(PetscOptionsReal("-a", "diffusion coefficient", "<1.0>", ctxt.a, &ctxt.a, NULL));
110: PetscCall(PetscOptionsInt("-imax", "grid size", "<20>", ctxt.imax, &ctxt.imax, NULL));
111: PetscCall(PetscOptionsReal("-xmin", "xmin", "<0.0>", ctxt.xmin, &ctxt.xmin, NULL));
112: PetscCall(PetscOptionsReal("-xmax", "xmax", "<1.0>", ctxt.xmax, &ctxt.xmax, NULL));
113: PetscCall(PetscOptionsInt("-niter", "number of time steps", "<0>", ctxt.niter, &ctxt.niter, NULL));
114: PetscCall(PetscOptionsReal("-dt", "time step size", "<0.0>", ctxt.dt, &ctxt.dt, NULL));
115: PetscCall(PetscOptionsFList("-irk_type", "IRK method family", "", IRKList, irktype, irktype, sizeof(irktype), NULL));
116: nstages = 2;
117: PetscCall(PetscOptionsInt("-irk_nstages", "Number of stages in IRK method", "", nstages, &nstages, NULL));
118: PetscCall(PetscOptionsEnum("-physics_type", "Type of process to discretize", "", PhysicsTypes, (PetscEnum)ctxt.physics_type, (PetscEnum *)&ctxt.physics_type, NULL));
119: PetscOptionsEnd();
121: /* allocate and initialize solution vector and exact solution */
122: PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
123: PetscCall(VecSetSizes(u, PETSC_DECIDE, ctxt.imax));
124: PetscCall(VecSetFromOptions(u));
125: PetscCall(VecDuplicate(u, &uex));
126: /* initial solution */
127: PetscCall(ExactSolution(u, &ctxt, 0.0));
128: /* exact solution */
129: PetscCall(ExactSolution(uex, &ctxt, ctxt.dt * ctxt.niter));
131: { /* Create A,b,c */
132: PetscErrorCode (*irkcreate)(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
133: PetscCall(PetscFunctionListFind(IRKList, irktype, &irkcreate));
134: PetscCall((*irkcreate)(nstages, &A, &b, &c));
135: }
136: { /* Invert A */
137: /* PETSc does not provide a routine to calculate the inverse of a general matrix.
138: * To get the inverse of A, we form a sequential BAIJ matrix from it, consisting of a single block with block size
139: * equal to the dimension of A, and then use MatInvertBlockDiagonal(). */
140: Mat A_baij;
141: PetscInt idxm[1] = {0}, idxn[1] = {0};
142: const PetscScalar *A_inv;
143: PetscCall(MatCreateSeqBAIJ(PETSC_COMM_SELF, nstages, nstages, nstages, 1, NULL, &A_baij));
144: PetscCall(MatSetOption(A_baij, MAT_ROW_ORIENTED, PETSC_FALSE));
145: PetscCall(MatSetValuesBlocked(A_baij, 1, idxm, 1, idxn, A, INSERT_VALUES));
146: PetscCall(MatAssemblyBegin(A_baij, MAT_FINAL_ASSEMBLY));
147: PetscCall(MatAssemblyEnd(A_baij, MAT_FINAL_ASSEMBLY));
148: PetscCall(MatInvertBlockDiagonal(A_baij, &A_inv));
149: PetscCall(PetscMemcpy(A, A_inv, nstages * nstages * sizeof(PetscScalar)));
150: PetscCall(MatDestroy(&A_baij));
151: }
152: /* Scale (1/dt)*A^{-1} and (1/dt)*b */
153: for (s = 0; s < nstages * nstages; s++) A[s] *= 1.0 / ctxt.dt;
154: for (s = 0; s < nstages; s++) b[s] *= (-ctxt.dt);
156: /* Compute row sums At and identity B */
157: PetscCall(PetscMalloc2(nstages, &At, PetscSqr(nstages), &B));
158: for (s = 0; s < nstages; s++) {
159: At[s] = 0;
160: for (t = 0; t < nstages; t++) {
161: At[s] += A[s + nstages * t]; /* Row sums of */
162: B[s + nstages * t] = 1. * (s == t); /* identity */
163: }
164: }
166: /* allocate and calculate the (-J) matrix */
167: switch (ctxt.physics_type) {
168: case PHYSICS_ADVECTION:
169: case PHYSICS_DIFFUSION:
170: PetscCall(Assemble_AdvDiff(PETSC_COMM_WORLD, &ctxt, &J));
171: }
172: PetscCall(MatCreate(PETSC_COMM_WORLD, &Identity));
173: PetscCall(MatSetType(Identity, MATAIJ));
174: PetscCall(MatGetOwnershipRange(J, &matis, &matie));
175: PetscCall(MatSetSizes(Identity, matie - matis, matie - matis, ctxt.imax, ctxt.imax));
176: PetscCall(MatSetUp(Identity));
177: for (i = matis; i < matie; i++) PetscCall(MatSetValues(Identity, 1, &i, 1, &i, &one, INSERT_VALUES));
178: PetscCall(MatAssemblyBegin(Identity, MAT_FINAL_ASSEMBLY));
179: PetscCall(MatAssemblyEnd(Identity, MAT_FINAL_ASSEMBLY));
181: /* Create the KAIJ matrix for solving the stages */
182: PetscCall(MatCreateKAIJ(J, nstages, nstages, A, B, &TA));
184: /* Create the KAIJ matrix for step completion */
185: PetscCall(MatCreateKAIJ(J, 1, nstages, NULL, b, &SC));
187: /* Create the KAIJ matrix to create the R for solving the stages */
188: PetscCall(MatCreateKAIJ(Identity, nstages, 1, NULL, At, &R));
190: /* Create and set options for KSP */
191: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
192: PetscCall(KSPSetOperators(ksp, TA, TA));
193: PetscCall(KSPSetFromOptions(ksp));
195: /* Allocate work and right-hand-side vectors */
196: PetscCall(VecCreate(PETSC_COMM_WORLD, &z));
197: PetscCall(VecSetFromOptions(z));
198: PetscCall(VecSetSizes(z, PETSC_DECIDE, ctxt.imax * nstages));
199: PetscCall(VecDuplicate(z, &rhs));
201: PetscCall(VecGetOwnershipRange(u, &is, &ie));
202: PetscCall(PetscMalloc3(nstages, &ix, nstages, &zvals, ie - is, &ix2));
203: /* iterate in time */
204: for (n = 0, time = 0., total_its = 0; n < ctxt.niter; n++) {
205: PetscInt its;
207: /* compute and set the right-hand side */
208: PetscCall(MatMult(R, u, rhs));
210: /* Solve the system */
211: PetscCall(KSPSolve(ksp, rhs, z));
212: PetscCall(KSPGetIterationNumber(ksp, &its));
213: total_its += its;
215: /* Update the solution */
216: PetscCall(MatMultAdd(SC, z, u, u));
218: /* time step complete */
219: time += ctxt.dt;
220: }
221: PetscCall(PetscFree3(ix, ix2, zvals));
223: /* Deallocate work and right-hand-side vectors */
224: PetscCall(VecDestroy(&z));
225: PetscCall(VecDestroy(&rhs));
227: /* Calculate error in final solution */
228: PetscCall(VecAYPX(uex, -1.0, u));
229: PetscCall(VecNorm(uex, NORM_2, &err));
230: err = PetscSqrtReal(err * err / ((PetscReal)ctxt.imax));
231: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "L2 norm of the numerical error = %g (time=%g)\n", (double)err, (double)time));
232: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of time steps: %" PetscInt_FMT " (%" PetscInt_FMT " Krylov iterations)\n", ctxt.niter, total_its));
234: /* Free up memory */
235: PetscCall(KSPDestroy(&ksp));
236: PetscCall(MatDestroy(&TA));
237: PetscCall(MatDestroy(&SC));
238: PetscCall(MatDestroy(&R));
239: PetscCall(MatDestroy(&J));
240: PetscCall(MatDestroy(&Identity));
241: PetscCall(PetscFree3(A, b, c));
242: PetscCall(PetscFree2(At, B));
243: PetscCall(VecDestroy(&uex));
244: PetscCall(VecDestroy(&u));
245: PetscCall(PetscFunctionListDestroy(&IRKList));
247: PetscCall(PetscFinalize());
248: return 0;
249: }
251: PetscErrorCode ExactSolution(Vec u, void *c, PetscReal t)
252: {
253: UserContext *ctxt = (UserContext *)c;
254: PetscInt i, is, ie;
255: PetscScalar *uarr;
256: PetscReal x, dx, a = ctxt->a, pi = PETSC_PI;
258: PetscFunctionBegin;
259: dx = (ctxt->xmax - ctxt->xmin) / ((PetscReal)ctxt->imax);
260: PetscCall(VecGetOwnershipRange(u, &is, &ie));
261: PetscCall(VecGetArray(u, &uarr));
262: for (i = is; i < ie; i++) {
263: x = i * dx;
264: switch (ctxt->physics_type) {
265: case PHYSICS_DIFFUSION:
266: uarr[i - is] = PetscExpScalar(-4.0 * pi * pi * a * t) * PetscSinScalar(2 * pi * x);
267: break;
268: case PHYSICS_ADVECTION:
269: uarr[i - is] = PetscSinScalar(2 * pi * (x - a * t));
270: break;
271: default:
272: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[ctxt->physics_type]);
273: }
274: }
275: PetscCall(VecRestoreArray(u, &uarr));
276: PetscFunctionReturn(PETSC_SUCCESS);
277: }
279: /* Arrays should be freed with PetscFree3(A,b,c) */
280: static PetscErrorCode RKCreate_Gauss(PetscInt nstages, PetscScalar **gauss_A, PetscScalar **gauss_b, PetscReal **gauss_c)
281: {
282: PetscScalar *A, *G0, *G1;
283: PetscReal *b, *c;
284: PetscInt i, j;
285: Mat G0mat, G1mat, Amat;
287: PetscFunctionBegin;
288: PetscCall(PetscMalloc3(PetscSqr(nstages), &A, nstages, gauss_b, nstages, &c));
289: PetscCall(PetscMalloc3(nstages, &b, PetscSqr(nstages), &G0, PetscSqr(nstages), &G1));
290: PetscCall(PetscDTGaussQuadrature(nstages, 0., 1., c, b));
291: for (i = 0; i < nstages; i++) (*gauss_b)[i] = b[i]; /* copy to possibly-complex array */
293: /* A^T = G0^{-1} G1 */
294: for (i = 0; i < nstages; i++) {
295: for (j = 0; j < nstages; j++) {
296: G0[i * nstages + j] = PetscPowRealInt(c[i], j);
297: G1[i * nstages + j] = PetscPowRealInt(c[i], j + 1) / (j + 1);
298: }
299: }
300: /* The arrays above are row-aligned, but we create dense matrices as the transpose */
301: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G0, &G0mat));
302: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G1, &G1mat));
303: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, A, &Amat));
304: PetscCall(MatLUFactor(G0mat, NULL, NULL, NULL));
305: PetscCall(MatMatSolve(G0mat, G1mat, Amat));
306: PetscCall(MatTranspose(Amat, MAT_INPLACE_MATRIX, &Amat));
308: PetscCall(MatDestroy(&G0mat));
309: PetscCall(MatDestroy(&G1mat));
310: PetscCall(MatDestroy(&Amat));
311: PetscCall(PetscFree3(b, G0, G1));
312: *gauss_A = A;
313: *gauss_c = c;
314: PetscFunctionReturn(PETSC_SUCCESS);
315: }
317: static PetscErrorCode Assemble_AdvDiff(MPI_Comm comm, UserContext *user, Mat *J)
318: {
319: PetscInt matis, matie, i;
320: PetscReal dx, dx2;
322: PetscFunctionBegin;
323: dx = (user->xmax - user->xmin) / ((PetscReal)user->imax);
324: dx2 = dx * dx;
325: PetscCall(MatCreate(comm, J));
326: PetscCall(MatSetType(*J, MATAIJ));
327: PetscCall(MatSetSizes(*J, PETSC_DECIDE, PETSC_DECIDE, user->imax, user->imax));
328: PetscCall(MatSetUp(*J));
329: PetscCall(MatGetOwnershipRange(*J, &matis, &matie));
330: for (i = matis; i < matie; i++) {
331: PetscScalar values[3];
332: PetscInt col[3];
333: switch (user->physics_type) {
334: case PHYSICS_DIFFUSION:
335: values[0] = -user->a * 1.0 / dx2;
336: values[1] = user->a * 2.0 / dx2;
337: values[2] = -user->a * 1.0 / dx2;
338: break;
339: case PHYSICS_ADVECTION:
340: values[0] = -user->a * .5 / dx;
341: values[1] = 0.;
342: values[2] = user->a * .5 / dx;
343: break;
344: default:
345: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[user->physics_type]);
346: }
347: /* periodic boundaries */
348: if (i == 0) {
349: col[0] = user->imax - 1;
350: col[1] = i;
351: col[2] = i + 1;
352: } else if (i == user->imax - 1) {
353: col[0] = i - 1;
354: col[1] = i;
355: col[2] = 0;
356: } else {
357: col[0] = i - 1;
358: col[1] = i;
359: col[2] = i + 1;
360: }
361: PetscCall(MatSetValues(*J, 1, &i, 3, col, values, INSERT_VALUES));
362: }
363: PetscCall(MatAssemblyBegin(*J, MAT_FINAL_ASSEMBLY));
364: PetscCall(MatAssemblyEnd(*J, MAT_FINAL_ASSEMBLY));
365: PetscFunctionReturn(PETSC_SUCCESS);
366: }
368: /*TEST
369: testset:
370: args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -irk_type gauss -irk_nstages 2
371: test:
372: suffix: 1
373: requires: !single
374: args: -ksp_atol 1e-6 -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
375: test:
376: requires: hpddm !single
377: suffix: hpddm
378: output_file: output/ex74_1.out
379: args: -ksp_atol 1e-6 -ksp_type hpddm
380: test:
381: requires: hpddm
382: suffix: hpddm_gcrodr
383: output_file: output/ex74_1_hpddm.out
384: args: -ksp_atol 1e-4 -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 2
385: test:
386: suffix: 2
387: args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100
388: testset:
389: suffix: 3
390: requires: !single
391: args: -a 1 -dt .33 -niter 3 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100 -physics_type advection
392: test:
393: args: -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
394: test:
395: requires: hpddm
396: suffix: hpddm
397: output_file: output/ex74_3.out
398: args: -ksp_type hpddm
399: test:
400: requires: hpddm
401: suffix: hpddm_gcrodr
402: output_file: output/ex74_3_hpddm.out
403: args: -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 5
405: TEST*/