Actual source code: ex51.c
1: static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\
2: uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\
3: matrix assembly, the matrix is intentionally laid out across processors\n\
4: differently from the way it is assembled. Input arguments are:\n\
5: -m <size> -p <order> : mesh size and polynomial order\n\n";
7: /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
8: based on src/ksp/ksp/tutorials/ex3.c
9: */
11: #include <petscksp.h>
13: /* Declare user-defined routines */
14: static PetscReal src(PetscReal, PetscReal);
15: static PetscReal ubdy(PetscReal, PetscReal);
16: static PetscReal polyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
17: static PetscReal derivPolyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal);
18: static PetscErrorCode Form1DElementMass(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
19: static PetscErrorCode Form1DElementStiffness(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *);
20: static PetscErrorCode Form2DElementMass(PetscInt, PetscScalar *, PetscScalar *);
21: static PetscErrorCode Form2DElementStiffness(PetscInt, PetscScalar *, PetscScalar *, PetscScalar *);
22: static PetscErrorCode FormNodalRhs(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
23: static PetscErrorCode FormNodalSoln(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *);
24: static void leggaulob(PetscReal, PetscReal, PetscReal[], PetscReal[], PetscInt);
25: static void qAndLEvaluation(PetscInt, PetscReal, PetscReal *, PetscReal *, PetscReal *);
27: int main(int argc, char **args)
28: {
29: PetscInt p = 2, m = 5;
30: PetscInt num1Dnodes, num2Dnodes;
31: PetscScalar *Ke1D, *Ke2D, *Me1D, *Me2D;
32: PetscScalar *r, *ue, val;
33: Vec u, ustar, b, q;
34: Mat A, Mass;
35: KSP ksp;
36: PetscInt M, N;
37: PetscMPIInt rank, size;
38: PetscReal x, y, h, norm;
39: PetscInt *idx, indx, count, *rows, i, j, k, start, end, its;
40: PetscReal *rowsx, *rowsy;
41: PetscReal *gllNode, *gllWgts;
43: PetscFunctionBeginUser;
44: PetscCall(PetscInitialize(&argc, &args, NULL, help));
45: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Options for p-FEM", "");
46: PetscCall(PetscOptionsInt("-m", "Number of elements in each direction", "None", m, &m, NULL));
47: PetscCall(PetscOptionsInt("-p", "Order of each element (tensor product basis)", "None", p, &p, NULL));
48: PetscOptionsEnd();
49: PetscCheck(p > 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Option -p value should be greater than zero");
50: N = (p * m + 1) * (p * m + 1); /* dimension of matrix */
51: M = m * m; /* number of elements */
52: h = 1.0 / m; /* mesh width */
53: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
54: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
56: /* Create stiffness matrix */
57: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
58: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N));
59: PetscCall(MatSetFromOptions(A));
60: PetscCall(MatSetUp(A));
62: /* Create matrix */
63: PetscCall(MatCreate(PETSC_COMM_WORLD, &Mass));
64: PetscCall(MatSetSizes(Mass, PETSC_DECIDE, PETSC_DECIDE, N, N));
65: PetscCall(MatSetFromOptions(Mass));
66: PetscCall(MatSetUp(Mass));
67: start = rank * (M / size) + ((M % size) < rank ? (M % size) : rank);
68: end = start + M / size + ((M % size) > rank);
70: /* Allocate element stiffness matrices */
71: num1Dnodes = (p + 1);
72: num2Dnodes = num1Dnodes * num1Dnodes;
74: PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Me1D));
75: PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Ke1D));
76: PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Me2D));
77: PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Ke2D));
78: PetscCall(PetscMalloc1(num2Dnodes, &idx));
79: PetscCall(PetscMalloc1(num2Dnodes, &r));
80: PetscCall(PetscMalloc1(num2Dnodes, &ue));
82: /* Allocate quadrature and create stiffness matrices */
83: PetscCall(PetscMalloc1(p + 1, &gllNode));
84: PetscCall(PetscMalloc1(p + 1, &gllWgts));
85: leggaulob(0.0, 1.0, gllNode, gllWgts, p); /* Get GLL nodes and weights */
86: PetscCall(Form1DElementMass(h, p, gllNode, gllWgts, Me1D));
87: PetscCall(Form1DElementStiffness(h, p, gllNode, gllWgts, Ke1D));
88: PetscCall(Form2DElementMass(p, Me1D, Me2D));
89: PetscCall(Form2DElementStiffness(p, Ke1D, Me1D, Ke2D));
91: /* Assemble matrix */
92: for (i = start; i < end; i++) {
93: indx = 0;
94: for (k = 0; k < (p + 1); ++k) {
95: for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
96: }
97: PetscCall(MatSetValues(A, num2Dnodes, idx, num2Dnodes, idx, Ke2D, ADD_VALUES));
98: PetscCall(MatSetValues(Mass, num2Dnodes, idx, num2Dnodes, idx, Me2D, ADD_VALUES));
99: }
100: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
101: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
102: PetscCall(MatAssemblyBegin(Mass, MAT_FINAL_ASSEMBLY));
103: PetscCall(MatAssemblyEnd(Mass, MAT_FINAL_ASSEMBLY));
105: PetscCall(PetscFree(Me1D));
106: PetscCall(PetscFree(Ke1D));
107: PetscCall(PetscFree(Me2D));
108: PetscCall(PetscFree(Ke2D));
110: /* Create right-hand side and solution vectors */
111: PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
112: PetscCall(VecSetSizes(u, PETSC_DECIDE, N));
113: PetscCall(VecSetFromOptions(u));
114: PetscCall(PetscObjectSetName((PetscObject)u, "Approx. Solution"));
115: PetscCall(VecDuplicate(u, &b));
116: PetscCall(PetscObjectSetName((PetscObject)b, "Right hand side"));
117: PetscCall(VecDuplicate(u, &q));
118: PetscCall(PetscObjectSetName((PetscObject)q, "Right hand side 2"));
119: PetscCall(VecDuplicate(b, &ustar));
120: PetscCall(VecSet(u, 0.0));
121: PetscCall(VecSet(b, 0.0));
122: PetscCall(VecSet(q, 0.0));
124: /* Assemble nodal right-hand side and soln vector */
125: for (i = start; i < end; i++) {
126: x = h * (i % m);
127: y = h * (i / m);
128: indx = 0;
129: for (k = 0; k < (p + 1); ++k) {
130: for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j;
131: }
132: PetscCall(FormNodalRhs(p, x, y, h, gllNode, r));
133: PetscCall(FormNodalSoln(p, x, y, h, gllNode, ue));
134: PetscCall(VecSetValues(q, num2Dnodes, idx, r, INSERT_VALUES));
135: PetscCall(VecSetValues(ustar, num2Dnodes, idx, ue, INSERT_VALUES));
136: }
137: PetscCall(VecAssemblyBegin(q));
138: PetscCall(VecAssemblyEnd(q));
139: PetscCall(VecAssemblyBegin(ustar));
140: PetscCall(VecAssemblyEnd(ustar));
142: PetscCall(PetscFree(idx));
143: PetscCall(PetscFree(r));
144: PetscCall(PetscFree(ue));
146: /* Get FE right-hand side vector */
147: PetscCall(MatMult(Mass, q, b));
149: /* Modify matrix and right-hand side for Dirichlet boundary conditions */
150: PetscCall(PetscMalloc1(4 * p * m, &rows));
151: PetscCall(PetscMalloc1(4 * p * m, &rowsx));
152: PetscCall(PetscMalloc1(4 * p * m, &rowsy));
153: for (i = 0; i < p * m + 1; i++) {
154: rows[i] = i; /* bottom */
155: rowsx[i] = (i / p) * h + gllNode[i % p] * h;
156: rowsy[i] = 0.0;
157: rows[3 * p * m - 1 + i] = (p * m) * (p * m + 1) + i; /* top */
158: rowsx[3 * p * m - 1 + i] = (i / p) * h + gllNode[i % p] * h;
159: rowsy[3 * p * m - 1 + i] = 1.0;
160: }
161: count = p * m + 1; /* left side */
162: indx = 1;
163: for (i = p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
164: rows[count] = i;
165: rowsx[count] = 0.0;
166: rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
167: indx++;
168: }
169: count = 2 * p * m; /* right side */
170: indx = 1;
171: for (i = 2 * p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) {
172: rows[count] = i;
173: rowsx[count] = 1.0;
174: rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h;
175: indx++;
176: }
177: for (i = 0; i < 4 * p * m; i++) {
178: x = rowsx[i];
179: y = rowsy[i];
180: val = ubdy(x, y);
181: PetscCall(VecSetValues(b, 1, &rows[i], &val, INSERT_VALUES));
182: PetscCall(VecSetValues(u, 1, &rows[i], &val, INSERT_VALUES));
183: }
184: PetscCall(MatZeroRows(A, 4 * p * m, rows, 1.0, 0, 0));
185: PetscCall(PetscFree(rows));
186: PetscCall(PetscFree(rowsx));
187: PetscCall(PetscFree(rowsy));
189: PetscCall(VecAssemblyBegin(u));
190: PetscCall(VecAssemblyEnd(u));
191: PetscCall(VecAssemblyBegin(b));
192: PetscCall(VecAssemblyEnd(b));
194: /* Solve linear system */
195: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
196: PetscCall(KSPSetOperators(ksp, A, A));
197: PetscCall(KSPSetInitialGuessNonzero(ksp, PETSC_TRUE));
198: PetscCall(KSPSetFromOptions(ksp));
199: PetscCall(KSPSolve(ksp, b, u));
201: /* Check error */
202: PetscCall(VecAXPY(u, -1.0, ustar));
203: PetscCall(VecNorm(u, NORM_2, &norm));
204: PetscCall(KSPGetIterationNumber(ksp, &its));
205: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)(norm * h), its));
207: PetscCall(PetscFree(gllNode));
208: PetscCall(PetscFree(gllWgts));
210: PetscCall(KSPDestroy(&ksp));
211: PetscCall(VecDestroy(&u));
212: PetscCall(VecDestroy(&b));
213: PetscCall(VecDestroy(&q));
214: PetscCall(VecDestroy(&ustar));
215: PetscCall(MatDestroy(&A));
216: PetscCall(MatDestroy(&Mass));
218: PetscCall(PetscFinalize());
219: return 0;
220: }
222: /* --------------------------------------------------------------------- */
224: /* 1d element stiffness mass matrix */
225: static PetscErrorCode Form1DElementMass(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Me1D)
226: {
227: PetscInt i, j, k;
228: PetscInt indx;
230: PetscFunctionBeginUser;
231: for (j = 0; j < (P + 1); ++j) {
232: for (i = 0; i < (P + 1); ++i) {
233: indx = j * (P + 1) + i;
234: Me1D[indx] = 0.0;
235: for (k = 0; k < (P + 1); ++k) Me1D[indx] += H * gqw[k] * polyBasisFunc(P, i, gqn, gqn[k]) * polyBasisFunc(P, j, gqn, gqn[k]);
236: }
237: }
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: /* --------------------------------------------------------------------- */
243: /* 1d element stiffness matrix for derivative */
244: static PetscErrorCode Form1DElementStiffness(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Ke1D)
245: {
246: PetscInt i, j, k;
247: PetscInt indx;
249: PetscFunctionBeginUser;
250: for (j = 0; j < (P + 1); ++j) {
251: for (i = 0; i < (P + 1); ++i) {
252: indx = j * (P + 1) + i;
253: Ke1D[indx] = 0.0;
254: for (k = 0; k < (P + 1); ++k) Ke1D[indx] += (1. / H) * gqw[k] * derivPolyBasisFunc(P, i, gqn, gqn[k]) * derivPolyBasisFunc(P, j, gqn, gqn[k]);
255: }
256: }
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: /* --------------------------------------------------------------------- */
262: /* element mass matrix */
263: static PetscErrorCode Form2DElementMass(PetscInt P, PetscScalar *Me1D, PetscScalar *Me2D)
264: {
265: PetscInt i1, j1, i2, j2;
266: PetscInt indx1, indx2, indx3;
268: PetscFunctionBeginUser;
269: for (j2 = 0; j2 < (P + 1); ++j2) {
270: for (i2 = 0; i2 < (P + 1); ++i2) {
271: for (j1 = 0; j1 < (P + 1); ++j1) {
272: for (i1 = 0; i1 < (P + 1); ++i1) {
273: indx1 = j1 * (P + 1) + i1;
274: indx2 = j2 * (P + 1) + i2;
275: indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
276: Me2D[indx3] = Me1D[indx1] * Me1D[indx2];
277: }
278: }
279: }
280: }
281: PetscFunctionReturn(PETSC_SUCCESS);
282: }
284: /* --------------------------------------------------------------------- */
286: /* element stiffness for Laplacian */
287: static PetscErrorCode Form2DElementStiffness(PetscInt P, PetscScalar *Ke1D, PetscScalar *Me1D, PetscScalar *Ke2D)
288: {
289: PetscInt i1, j1, i2, j2;
290: PetscInt indx1, indx2, indx3;
292: PetscFunctionBeginUser;
293: for (j2 = 0; j2 < (P + 1); ++j2) {
294: for (i2 = 0; i2 < (P + 1); ++i2) {
295: for (j1 = 0; j1 < (P + 1); ++j1) {
296: for (i1 = 0; i1 < (P + 1); ++i1) {
297: indx1 = j1 * (P + 1) + i1;
298: indx2 = j2 * (P + 1) + i2;
299: indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1);
300: Ke2D[indx3] = Ke1D[indx1] * Me1D[indx2] + Me1D[indx1] * Ke1D[indx2];
301: }
302: }
303: }
304: }
305: PetscFunctionReturn(PETSC_SUCCESS);
306: }
308: /* --------------------------------------------------------------------- */
310: static PetscErrorCode FormNodalRhs(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *r)
311: {
312: PetscInt i, j, indx;
314: PetscFunctionBeginUser;
315: indx = 0;
316: for (j = 0; j < (P + 1); ++j) {
317: for (i = 0; i < (P + 1); ++i) {
318: r[indx] = src(x + H * nds[i], y + H * nds[j]);
319: indx++;
320: }
321: }
322: PetscFunctionReturn(PETSC_SUCCESS);
323: }
325: /* --------------------------------------------------------------------- */
327: static PetscErrorCode FormNodalSoln(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *u)
328: {
329: PetscInt i, j, indx;
331: PetscFunctionBeginUser;
332: indx = 0;
333: for (j = 0; j < (P + 1); ++j) {
334: for (i = 0; i < (P + 1); ++i) {
335: u[indx] = ubdy(x + H * nds[i], y + H * nds[j]);
336: indx++;
337: }
338: }
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: /* --------------------------------------------------------------------- */
344: static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
345: {
346: PetscReal denominator = 1.;
347: PetscReal numerator = 1.;
348: PetscInt i = 0;
350: for (i = 0; i < (order + 1); i++) {
351: if (i != basis) {
352: numerator *= (xval - xLocVal[i]);
353: denominator *= (xLocVal[basis] - xLocVal[i]);
354: }
355: }
356: return numerator / denominator;
357: }
359: /* --------------------------------------------------------------------- */
361: static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
362: {
363: PetscReal denominator;
364: PetscReal numerator;
365: PetscReal numtmp;
366: PetscInt i = 0, j = 0;
368: denominator = 1.;
369: for (i = 0; i < (order + 1); i++) {
370: if (i != basis) denominator *= (xLocVal[basis] - xLocVal[i]);
371: }
372: numerator = 0.;
373: for (j = 0; j < (order + 1); ++j) {
374: if (j != basis) {
375: numtmp = 1.;
376: for (i = 0; i < (order + 1); ++i) {
377: if (i != basis && j != i) numtmp *= (xval - xLocVal[i]);
378: }
379: numerator += numtmp;
380: }
381: }
383: return numerator / denominator;
384: }
386: /* --------------------------------------------------------------------- */
388: static PetscReal ubdy(PetscReal x, PetscReal y)
389: {
390: return x * x * y * y;
391: }
393: static PetscReal src(PetscReal x, PetscReal y)
394: {
395: return -2. * y * y - 2. * x * x;
396: }
397: /* --------------------------------------------------------------------- */
399: static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], PetscInt n)
400: /*******************************************************************************
401: Given the lower and upper limits of integration x1 and x2, and given n, this
402: routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
403: and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
404: *******************************************************************************/
405: {
406: PetscInt j, m;
407: PetscReal z1, z, xm, xl, q, qp, Ln, scale;
408: if (n == 1) {
409: x[0] = x1; /* Scale the root to the desired interval, */
410: x[1] = x2; /* and put in its symmetric counterpart. */
411: w[0] = 1.; /* Compute the weight */
412: w[1] = 1.; /* and its symmetric counterpart. */
413: } else {
414: x[0] = x1; /* Scale the root to the desired interval, */
415: x[n] = x2; /* and put in its symmetric counterpart. */
416: w[0] = 2. / (n * (n + 1)); /* Compute the weight */
417: w[n] = 2. / (n * (n + 1)); /* and its symmetric counterpart. */
418: m = (n + 1) / 2; /* The roots are symmetric, so we only find half of them. */
419: xm = 0.5 * (x2 + x1);
420: xl = 0.5 * (x2 - x1);
421: for (j = 1; j <= (m - 1); j++) { /* Loop over the desired roots. */
422: z = -1.0 * PetscCosReal((PETSC_PI * (j + 0.25) / (n)) - (3.0 / (8.0 * n * PETSC_PI)) * (1.0 / (j + 0.25)));
423: /* Starting with the above approximation to the ith root, we enter */
424: /* the main loop of refinement by Newton's method. */
425: do {
426: qAndLEvaluation(n, z, &q, &qp, &Ln);
427: z1 = z;
428: z = z1 - q / qp; /* Newton's method. */
429: } while (PetscAbsReal(z - z1) > 3.0e-11);
430: qAndLEvaluation(n, z, &q, &qp, &Ln);
431: x[j] = xm + xl * z; /* Scale the root to the desired interval, */
432: x[n - j] = xm - xl * z; /* and put in its symmetric counterpart. */
433: w[j] = 2.0 / (n * (n + 1) * Ln * Ln); /* Compute the weight */
434: w[n - j] = w[j]; /* and its symmetric counterpart. */
435: }
436: }
437: if (n % 2 == 0) {
438: qAndLEvaluation(n, 0.0, &q, &qp, &Ln);
439: x[n / 2] = (x2 - x1) / 2.0;
440: w[n / 2] = 2.0 / (n * (n + 1) * Ln * Ln);
441: }
442: /* scale the weights according to mapping from [-1,1] to [0,1] */
443: scale = (x2 - x1) / 2.0;
444: for (j = 0; j <= n; ++j) w[j] = w[j] * scale;
445: }
447: /******************************************************************************/
448: static void qAndLEvaluation(PetscInt n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
449: /*******************************************************************************
450: Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
451: addition to L_N(x) as these are needed for the GLL points. See the book titled
452: "Implementing Spectral Methods for Partial Differential Equations: Algorithms
453: for Scientists and Engineers" by David A. Kopriva.
454: *******************************************************************************/
455: {
456: PetscInt k;
458: PetscReal Lnp;
459: PetscReal Lnp1, Lnp1p;
460: PetscReal Lnm1, Lnm1p;
461: PetscReal Lnm2, Lnm2p;
463: Lnm1 = 1.0;
464: *Ln = x;
465: Lnm1p = 0.0;
466: Lnp = 1.0;
468: for (k = 2; k <= n; ++k) {
469: Lnm2 = Lnm1;
470: Lnm1 = *Ln;
471: Lnm2p = Lnm1p;
472: Lnm1p = Lnp;
473: *Ln = (2. * k - 1.) / (1.0 * k) * x * Lnm1 - (k - 1.) / (1.0 * k) * Lnm2;
474: Lnp = Lnm2p + (2.0 * k - 1) * Lnm1;
475: }
476: k = n + 1;
477: Lnp1 = (2. * k - 1.) / (1.0 * k) * x * (*Ln) - (k - 1.) / (1.0 * k) * Lnm1;
478: Lnp1p = Lnm1p + (2.0 * k - 1) * (*Ln);
479: *q = Lnp1 - Lnm1;
480: *qp = Lnp1p - Lnm1p;
481: }
483: /*TEST
485: test:
486: nsize: 2
487: args: -ksp_monitor_short
489: TEST*/