Actual source code: ex74.c

  1: static char help[] = "Solves the constant-coefficient 1D heat equation \n\
  2: with an Implicit Runge-Kutta method using MatKAIJ.                  \n\
  3:                                                                     \n\
  4:     du      d^2 u                                                   \n\
  5:     --  = a ----- ; 0 <= x <= 1;                                    \n\
  6:     dt      dx^2                                                    \n\
  7:                                                                     \n\
  8:   with periodic boundary conditions                                 \n\
  9:                                                                     \n\
 10: 2nd order central discretization in space:                          \n\
 11:                                                                     \n\
 12:    [ d^2 u ]     u_{i+1} - 2u_i + u_{i-1}                           \n\
 13:    [ ----- ]  =  ------------------------                           \n\
 14:    [ dx^2  ]i              h^2                                      \n\
 15:                                                                     \n\
 16:     i = grid index;    h = x_{i+1}-x_i (Uniform)                    \n\
 17:     0 <= i < n         h = 1.0/n                                    \n\
 18:                                                                     \n\
 19: Thus,                                                               \n\
 20:                                                                     \n\
 21:    du                                                               \n\
 22:    --  = Ju;  J = (a/h^2) tridiagonal(1,-2,1)_n                     \n\
 23:    dt                                                               \n\
 24:                                                                     \n\
 25: Implicit Runge-Kutta method:                                        \n\
 26:                                                                     \n\
 27:   U^(k)   = u^n + dt \\sum_i a_{ki} JU^{i}                          \n\
 28:   u^{n+1} = u^n + dt \\sum_i b_i JU^{i}                             \n\
 29:                                                                     \n\
 30:   i = 1,...,s (s -> number of stages)                               \n\
 31:                                                                     \n\
 32: At each time step, we solve                                         \n\
 33:                                                                     \n\
 34:  [  1                                  ]     1                      \n\
 35:  [ -- I \\otimes A^{-1} - J \\otimes I ] U = -- u^n \\otimes A^{-1} \n\
 36:  [ dt                                  ]     dt                     \n\
 37:                                                                     \n\
 38:   where A is the Butcher tableaux of the implicit                   \n\
 39:   Runge-Kutta method,                                               \n\
 40:                                                                     \n\
 41: with MATKAIJ and KSP.                                               \n\
 42:                                                                     \n\
 43: Available IRK Methods:                                              \n\
 44:   gauss       n-stage Gauss method                                  \n\
 45:                                                                     \n";

 47: /*
 48:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 49:   automatically includes:
 50:   petscsys.h      - base PETSc routines
 51:   petscvec.h      - vectors
 52:   petscmat.h      - matrices
 53:   petscis.h       - index sets
 54:   petscviewer.h   - viewers
 55:   petscpc.h       - preconditioners
 56: */
 57: #include <petscksp.h>
 58: #include <petscdt.h>

 60: /* define the IRK methods available */
 61: #define IRKGAUSS "gauss"

 63: typedef enum {
 64:   PHYSICS_DIFFUSION,
 65:   PHYSICS_ADVECTION
 66: } PhysicsType;
 67: const char *const PhysicsTypes[] = {"DIFFUSION", "ADVECTION", "PhysicsType", "PHYSICS_", NULL};

 69: typedef struct __context__ {
 70:   PetscReal   a;          /* diffusion coefficient      */
 71:   PetscReal   xmin, xmax; /* domain bounds              */
 72:   PetscInt    imax;       /* number of grid points      */
 73:   PetscInt    niter;      /* number of time iterations  */
 74:   PetscReal   dt;         /* time step size             */
 75:   PhysicsType physics_type;
 76: } UserContext;

 78: static PetscErrorCode ExactSolution(Vec, void *, PetscReal);
 79: static PetscErrorCode RKCreate_Gauss(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
 80: static PetscErrorCode Assemble_AdvDiff(MPI_Comm, UserContext *, Mat *);

 82: #include <petsc/private/kernels/blockinvert.h>

 84: int main(int argc, char **argv)
 85: {
 86:   Vec               u, uex, rhs, z;
 87:   UserContext       ctxt;
 88:   PetscInt          nstages, is, ie, matis, matie, *ix, *ix2;
 89:   PetscInt          n, i, s, t, total_its;
 90:   PetscScalar      *A, *B, *At, *b, *zvals, one = 1.0;
 91:   PetscReal        *c, err, time;
 92:   Mat               Identity, J, TA, SC, R;
 93:   KSP               ksp;
 94:   PetscFunctionList IRKList      = NULL;
 95:   char              irktype[256] = IRKGAUSS;

 98:   PetscInitialize(&argc, &argv, (char *)0, help);
 99:   PetscFunctionListAdd(&IRKList, IRKGAUSS, RKCreate_Gauss);

101:   /* default value */
102:   ctxt.a            = 1.0;
103:   ctxt.xmin         = 0.0;
104:   ctxt.xmax         = 1.0;
105:   ctxt.imax         = 20;
106:   ctxt.niter        = 0;
107:   ctxt.dt           = 0.0;
108:   ctxt.physics_type = PHYSICS_DIFFUSION;

110:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "IRK options", "");
111:   PetscOptionsReal("-a", "diffusion coefficient", "<1.0>", ctxt.a, &ctxt.a, NULL);
112:   PetscOptionsInt("-imax", "grid size", "<20>", ctxt.imax, &ctxt.imax, NULL);
113:   PetscOptionsReal("-xmin", "xmin", "<0.0>", ctxt.xmin, &ctxt.xmin, NULL);
114:   PetscOptionsReal("-xmax", "xmax", "<1.0>", ctxt.xmax, &ctxt.xmax, NULL);
115:   PetscOptionsInt("-niter", "number of time steps", "<0>", ctxt.niter, &ctxt.niter, NULL);
116:   PetscOptionsReal("-dt", "time step size", "<0.0>", ctxt.dt, &ctxt.dt, NULL);
117:   PetscOptionsFList("-irk_type", "IRK method family", "", IRKList, irktype, irktype, sizeof(irktype), NULL);
118:   nstages = 2;
119:   PetscOptionsInt("-irk_nstages", "Number of stages in IRK method", "", nstages, &nstages, NULL);
120:   PetscOptionsEnum("-physics_type", "Type of process to discretize", "", PhysicsTypes, (PetscEnum)ctxt.physics_type, (PetscEnum *)&ctxt.physics_type, NULL);
121:   PetscOptionsEnd();

123:   /* allocate and initialize solution vector and exact solution */
124:   VecCreate(PETSC_COMM_WORLD, &u);
125:   VecSetSizes(u, PETSC_DECIDE, ctxt.imax);
126:   VecSetFromOptions(u);
127:   VecDuplicate(u, &uex);
128:   /* initial solution */
129:   ExactSolution(u, &ctxt, 0.0);
130:   /* exact   solution */
131:   ExactSolution(uex, &ctxt, ctxt.dt * ctxt.niter);

133:   { /* Create A,b,c */
134:     PetscErrorCode (*irkcreate)(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
135:     PetscFunctionListFind(IRKList, irktype, &irkcreate);
136:     (*irkcreate)(nstages, &A, &b, &c);
137:   }
138:   { /* Invert A */
139:     /* PETSc does not provide a routine to calculate the inverse of a general matrix.
140:      * To get the inverse of A, we form a sequential BAIJ matrix from it, consisting of a single block with block size
141:      * equal to the dimension of A, and then use MatInvertBlockDiagonal(). */
142:     Mat                A_baij;
143:     PetscInt           idxm[1] = {0}, idxn[1] = {0};
144:     const PetscScalar *A_inv;
145:     MatCreateSeqBAIJ(PETSC_COMM_SELF, nstages, nstages, nstages, 1, NULL, &A_baij);
146:     MatSetOption(A_baij, MAT_ROW_ORIENTED, PETSC_FALSE);
147:     MatSetValuesBlocked(A_baij, 1, idxm, 1, idxn, A, INSERT_VALUES);
148:     MatAssemblyBegin(A_baij, MAT_FINAL_ASSEMBLY);
149:     MatAssemblyEnd(A_baij, MAT_FINAL_ASSEMBLY);
150:     MatInvertBlockDiagonal(A_baij, &A_inv);
151:     PetscMemcpy(A, A_inv, nstages * nstages * sizeof(PetscScalar));
152:     MatDestroy(&A_baij);
153:   }
154:   /* Scale (1/dt)*A^{-1} and (1/dt)*b */
155:   for (s = 0; s < nstages * nstages; s++) A[s] *= 1.0 / ctxt.dt;
156:   for (s = 0; s < nstages; s++) b[s] *= (-ctxt.dt);

158:   /* Compute row sums At and identity B */
159:   PetscMalloc2(nstages, &At, PetscSqr(nstages), &B);
160:   for (s = 0; s < nstages; s++) {
161:     At[s] = 0;
162:     for (t = 0; t < nstages; t++) {
163:       At[s] += A[s + nstages * t];        /* Row sums of  */
164:       B[s + nstages * t] = 1. * (s == t); /* identity */
165:     }
166:   }

168:   /* allocate and calculate the (-J) matrix */
169:   switch (ctxt.physics_type) {
170:   case PHYSICS_ADVECTION:
171:   case PHYSICS_DIFFUSION:
172:     Assemble_AdvDiff(PETSC_COMM_WORLD, &ctxt, &J);
173:   }
174:   MatCreate(PETSC_COMM_WORLD, &Identity);
175:   MatSetType(Identity, MATAIJ);
176:   MatGetOwnershipRange(J, &matis, &matie);
177:   MatSetSizes(Identity, matie - matis, matie - matis, ctxt.imax, ctxt.imax);
178:   MatSetUp(Identity);
179:   for (i = matis; i < matie; i++) MatSetValues(Identity, 1, &i, 1, &i, &one, INSERT_VALUES);
180:   MatAssemblyBegin(Identity, MAT_FINAL_ASSEMBLY);
181:   MatAssemblyEnd(Identity, MAT_FINAL_ASSEMBLY);

183:   /* Create the KAIJ matrix for solving the stages */
184:   MatCreateKAIJ(J, nstages, nstages, A, B, &TA);

186:   /* Create the KAIJ matrix for step completion */
187:   MatCreateKAIJ(J, 1, nstages, NULL, b, &SC);

189:   /* Create the KAIJ matrix to create the R for solving the stages */
190:   MatCreateKAIJ(Identity, nstages, 1, NULL, At, &R);

192:   /* Create and set options for KSP */
193:   KSPCreate(PETSC_COMM_WORLD, &ksp);
194:   KSPSetOperators(ksp, TA, TA);
195:   KSPSetFromOptions(ksp);

197:   /* Allocate work and right-hand-side vectors */
198:   VecCreate(PETSC_COMM_WORLD, &z);
199:   VecSetFromOptions(z);
200:   VecSetSizes(z, PETSC_DECIDE, ctxt.imax * nstages);
201:   VecDuplicate(z, &rhs);

203:   VecGetOwnershipRange(u, &is, &ie);
204:   PetscMalloc3(nstages, &ix, nstages, &zvals, ie - is, &ix2);
205:   /* iterate in time */
206:   for (n = 0, time = 0., total_its = 0; n < ctxt.niter; n++) {
207:     PetscInt its;

209:     /* compute and set the right hand side */
210:     MatMult(R, u, rhs);

212:     /* Solve the system */
213:     KSPSolve(ksp, rhs, z);
214:     KSPGetIterationNumber(ksp, &its);
215:     total_its += its;

217:     /* Update the solution */
218:     MatMultAdd(SC, z, u, u);

220:     /* time step complete */
221:     time += ctxt.dt;
222:   }
223:   PetscFree3(ix, ix2, zvals);

225:   /* Deallocate work and right-hand-side vectors */
226:   VecDestroy(&z);
227:   VecDestroy(&rhs);

229:   /* Calculate error in final solution */
230:   VecAYPX(uex, -1.0, u);
231:   VecNorm(uex, NORM_2, &err);
232:   err = PetscSqrtReal(err * err / ((PetscReal)ctxt.imax));
233:   PetscPrintf(PETSC_COMM_WORLD, "L2 norm of the numerical error = %g (time=%g)\n", (double)err, (double)time);
234:   PetscPrintf(PETSC_COMM_WORLD, "Number of time steps: %" PetscInt_FMT " (%" PetscInt_FMT " Krylov iterations)\n", ctxt.niter, total_its);

236:   /* Free up memory */
237:   KSPDestroy(&ksp);
238:   MatDestroy(&TA);
239:   MatDestroy(&SC);
240:   MatDestroy(&R);
241:   MatDestroy(&J);
242:   MatDestroy(&Identity);
243:   PetscFree3(A, b, c);
244:   PetscFree2(At, B);
245:   VecDestroy(&uex);
246:   VecDestroy(&u);
247:   PetscFunctionListDestroy(&IRKList);

249:   PetscFinalize();
250:   return 0;
251: }

253: PetscErrorCode ExactSolution(Vec u, void *c, PetscReal t)
254: {
255:   UserContext *ctxt = (UserContext *)c;
256:   PetscInt     i, is, ie;
257:   PetscScalar *uarr;
258:   PetscReal    x, dx, a = ctxt->a, pi = PETSC_PI;

260:   dx = (ctxt->xmax - ctxt->xmin) / ((PetscReal)ctxt->imax);
261:   VecGetOwnershipRange(u, &is, &ie);
262:   VecGetArray(u, &uarr);
263:   for (i = is; i < ie; i++) {
264:     x = i * dx;
265:     switch (ctxt->physics_type) {
266:     case PHYSICS_DIFFUSION:
267:       uarr[i - is] = PetscExpScalar(-4.0 * pi * pi * a * t) * PetscSinScalar(2 * pi * x);
268:       break;
269:     case PHYSICS_ADVECTION:
270:       uarr[i - is] = PetscSinScalar(2 * pi * (x - a * t));
271:       break;
272:     default:
273:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[ctxt->physics_type]);
274:     }
275:   }
276:   VecRestoreArray(u, &uarr);
277:   return 0;
278: }

280: /* Arrays should be freed with PetscFree3(A,b,c) */
281: static PetscErrorCode RKCreate_Gauss(PetscInt nstages, PetscScalar **gauss_A, PetscScalar **gauss_b, PetscReal **gauss_c)
282: {
283:   PetscScalar *A, *G0, *G1;
284:   PetscReal   *b, *c;
285:   PetscInt     i, j;
286:   Mat          G0mat, G1mat, Amat;

288:   PetscMalloc3(PetscSqr(nstages), &A, nstages, gauss_b, nstages, &c);
289:   PetscMalloc3(nstages, &b, PetscSqr(nstages), &G0, PetscSqr(nstages), &G1);
290:   PetscDTGaussQuadrature(nstages, 0., 1., c, b);
291:   for (i = 0; i < nstages; i++) (*gauss_b)[i] = b[i]; /* copy to possibly-complex array */

293:   /* A^T = G0^{-1} G1 */
294:   for (i = 0; i < nstages; i++) {
295:     for (j = 0; j < nstages; j++) {
296:       G0[i * nstages + j] = PetscPowRealInt(c[i], j);
297:       G1[i * nstages + j] = PetscPowRealInt(c[i], j + 1) / (j + 1);
298:     }
299:   }
300:   /* The arrays above are row-aligned, but we create dense matrices as the transpose */
301:   MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G0, &G0mat);
302:   MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G1, &G1mat);
303:   MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, A, &Amat);
304:   MatLUFactor(G0mat, NULL, NULL, NULL);
305:   MatMatSolve(G0mat, G1mat, Amat);
306:   MatTranspose(Amat, MAT_INPLACE_MATRIX, &Amat);

308:   MatDestroy(&G0mat);
309:   MatDestroy(&G1mat);
310:   MatDestroy(&Amat);
311:   PetscFree3(b, G0, G1);
312:   *gauss_A = A;
313:   *gauss_c = c;
314:   return 0;
315: }

317: static PetscErrorCode Assemble_AdvDiff(MPI_Comm comm, UserContext *user, Mat *J)
318: {
319:   PetscInt  matis, matie, i;
320:   PetscReal dx, dx2;

322:   dx  = (user->xmax - user->xmin) / ((PetscReal)user->imax);
323:   dx2 = dx * dx;
324:   MatCreate(comm, J);
325:   MatSetType(*J, MATAIJ);
326:   MatSetSizes(*J, PETSC_DECIDE, PETSC_DECIDE, user->imax, user->imax);
327:   MatSetUp(*J);
328:   MatGetOwnershipRange(*J, &matis, &matie);
329:   for (i = matis; i < matie; i++) {
330:     PetscScalar values[3];
331:     PetscInt    col[3];
332:     switch (user->physics_type) {
333:     case PHYSICS_DIFFUSION:
334:       values[0] = -user->a * 1.0 / dx2;
335:       values[1] = user->a * 2.0 / dx2;
336:       values[2] = -user->a * 1.0 / dx2;
337:       break;
338:     case PHYSICS_ADVECTION:
339:       values[0] = -user->a * .5 / dx;
340:       values[1] = 0.;
341:       values[2] = user->a * .5 / dx;
342:       break;
343:     default:
344:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[user->physics_type]);
345:     }
346:     /* periodic boundaries */
347:     if (i == 0) {
348:       col[0] = user->imax - 1;
349:       col[1] = i;
350:       col[2] = i + 1;
351:     } else if (i == user->imax - 1) {
352:       col[0] = i - 1;
353:       col[1] = i;
354:       col[2] = 0;
355:     } else {
356:       col[0] = i - 1;
357:       col[1] = i;
358:       col[2] = i + 1;
359:     }
360:     MatSetValues(*J, 1, &i, 3, col, values, INSERT_VALUES);
361:   }
362:   MatAssemblyBegin(*J, MAT_FINAL_ASSEMBLY);
363:   MatAssemblyEnd(*J, MAT_FINAL_ASSEMBLY);
364:   return 0;
365: }

367: /*TEST
368:  testset:
369:    suffix: 1
370:    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -irk_type gauss -irk_nstages 2
371:    test:
372:      args: -ksp_atol 1e-6
373:    test:
374:      requires: hpddm !single
375:      suffix: hpddm
376:      output_file: output/ex74_1.out
377:      args: -ksp_atol 1e-6 -ksp_type hpddm
378:    test:
379:      requires: hpddm
380:      suffix: hpddm_gcrodr
381:      output_file: output/ex74_1_hpddm.out
382:      args: -ksp_atol 1e-4 -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 2
383:  test:
384:    suffix: 2
385:    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100
386:  testset:
387:    suffix: 3
388:    requires: !single
389:    args: -a 1 -dt .33 -niter 3 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100 -physics_type advection
390:    test:
391:      args:
392:    test:
393:      requires: hpddm
394:      suffix: hpddm
395:      output_file: output/ex74_3.out
396:      args: -ksp_type hpddm
397:    test:
398:      requires: hpddm
399:      suffix: hpddm_gcrodr
400:      output_file: output/ex74_3_hpddm.out
401:      args: -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 5

403: TEST*/