Actual source code: ex2.c
2: static char help[] = "Solves a linear system in parallel with KSP.\n\
3: Input parameters include:\n\
4: -view_exact_sol : write exact solution vector to stdout\n\
5: -m <mesh_x> : number of mesh points in x-direction\n\
6: -n <mesh_y> : number of mesh points in y-direction\n\n";
8: /*
9: Include "petscksp.h" so that we can use KSP solvers.
10: */
11: #include <petscksp.h>
13: int main(int argc, char **args)
14: {
15: Vec x, b, u; /* approx solution, RHS, exact solution */
16: Mat A; /* linear system matrix */
17: KSP ksp; /* linear solver context */
18: PetscReal norm; /* norm of solution error */
19: PetscInt i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
20: PetscBool flg;
21: PetscScalar v;
24: PetscInitialize(&argc, &args, (char *)0, help);
25: PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
26: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
27: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
28: Compute the matrix and right-hand-side vector that define
29: the linear system, Ax = b.
30: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
31: /*
32: Create parallel matrix, specifying only its global dimensions.
33: When using MatCreate(), the matrix format can be specified at
34: runtime. Also, the parallel partitioning of the matrix is
35: determined by PETSc at runtime.
37: Performance tuning note: For problems of substantial size,
38: preallocation of matrix memory is crucial for attaining good
39: performance. See the matrix chapter of the users manual for details.
40: */
41: MatCreate(PETSC_COMM_WORLD, &A);
42: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
43: MatSetFromOptions(A);
44: MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
45: MatSeqAIJSetPreallocation(A, 5, NULL);
46: MatSeqSBAIJSetPreallocation(A, 1, 5, NULL);
47: MatMPISBAIJSetPreallocation(A, 1, 5, NULL, 5, NULL);
48: MatMPISELLSetPreallocation(A, 5, NULL, 5, NULL);
49: MatSeqSELLSetPreallocation(A, 5, NULL);
51: /*
52: Currently, all PETSc parallel matrix formats are partitioned by
53: contiguous chunks of rows across the processors. Determine which
54: rows of the matrix are locally owned.
55: */
56: MatGetOwnershipRange(A, &Istart, &Iend);
58: /*
59: Set matrix elements for the 2-D, five-point stencil in parallel.
60: - Each processor needs to insert only elements that it owns
61: locally (but any non-local elements will be sent to the
62: appropriate processor during matrix assembly).
63: - Always specify global rows and columns of matrix entries.
65: Note: this uses the less common natural ordering that orders first
66: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
67: instead of J = I +- m as you might expect. The more standard ordering
68: would first do all variables for y = h, then y = 2h etc.
70: */
71: for (Ii = Istart; Ii < Iend; Ii++) {
72: v = -1.0;
73: i = Ii / n;
74: j = Ii - i * n;
75: if (i > 0) {
76: J = Ii - n;
77: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
78: }
79: if (i < m - 1) {
80: J = Ii + n;
81: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
82: }
83: if (j > 0) {
84: J = Ii - 1;
85: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
86: }
87: if (j < n - 1) {
88: J = Ii + 1;
89: MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
90: }
91: v = 4.0;
92: MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
93: }
95: /*
96: Assemble matrix, using the 2-step process:
97: MatAssemblyBegin(), MatAssemblyEnd()
98: Computations can be done while messages are in transition
99: by placing code between these two statements.
100: */
101: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
102: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
104: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
105: MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
107: /*
108: Create parallel vectors.
109: - We form 1 vector from scratch and then duplicate as needed.
110: - When using VecCreate(), VecSetSizes and VecSetFromOptions()
111: in this example, we specify only the
112: vector's global dimension; the parallel partitioning is determined
113: at runtime.
114: - When solving a linear system, the vectors and matrices MUST
115: be partitioned accordingly. PETSc automatically generates
116: appropriately partitioned matrices and vectors when MatCreate()
117: and VecCreate() are used with the same communicator.
118: - The user can alternatively specify the local vector and matrix
119: dimensions when more sophisticated partitioning is needed
120: (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
121: below).
122: */
123: VecCreate(PETSC_COMM_WORLD, &u);
124: VecSetSizes(u, PETSC_DECIDE, m * n);
125: VecSetFromOptions(u);
126: VecDuplicate(u, &b);
127: VecDuplicate(b, &x);
129: /*
130: Set exact solution; then compute right-hand-side vector.
131: By default we use an exact solution of a vector with all
132: elements of 1.0;
133: */
134: VecSet(u, 1.0);
135: MatMult(A, u, b);
137: /*
138: View the exact solution vector if desired
139: */
140: flg = PETSC_FALSE;
141: PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL);
142: if (flg) VecView(u, PETSC_VIEWER_STDOUT_WORLD);
144: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
145: Create the linear solver and set various options
146: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
147: KSPCreate(PETSC_COMM_WORLD, &ksp);
149: /*
150: Set operators. Here the matrix that defines the linear system
151: also serves as the preconditioning matrix.
152: */
153: KSPSetOperators(ksp, A, A);
155: /*
156: Set linear solver defaults for this problem (optional).
157: - By extracting the KSP and PC contexts from the KSP context,
158: we can then directly call any KSP and PC routines to set
159: various options.
160: - The following two statements are optional; all of these
161: parameters could alternatively be specified at runtime via
162: KSPSetFromOptions(). All of these defaults can be
163: overridden at runtime, as indicated below.
164: */
165: KSPSetTolerances(ksp, 1.e-2 / ((m + 1) * (n + 1)), 1.e-50, PETSC_DEFAULT, PETSC_DEFAULT);
167: /*
168: Set runtime options, e.g.,
169: -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
170: These options will override those specified above as long as
171: KSPSetFromOptions() is called _after_ any other customization
172: routines.
173: */
174: KSPSetFromOptions(ksp);
176: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177: Solve the linear system
178: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
180: KSPSolve(ksp, b, x);
182: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183: Check the solution and clean up
184: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
185: VecAXPY(x, -1.0, u);
186: VecNorm(x, NORM_2, &norm);
187: KSPGetIterationNumber(ksp, &its);
189: /*
190: Print convergence information. PetscPrintf() produces a single
191: print statement from all processes that share a communicator.
192: An alternative is PetscFPrintf(), which prints to a file.
193: */
194: PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its);
196: /*
197: Free work space. All PETSc objects should be destroyed when they
198: are no longer needed.
199: */
200: KSPDestroy(&ksp);
201: VecDestroy(&u);
202: VecDestroy(&x);
203: VecDestroy(&b);
204: MatDestroy(&A);
206: /*
207: Always call PetscFinalize() before exiting a program. This routine
208: - finalizes the PETSc libraries as well as MPI
209: - provides summary and diagnostic information if certain runtime
210: options are chosen (e.g., -log_view).
211: */
212: PetscFinalize();
213: return 0;
214: }
216: /*TEST
218: build:
219: requires: !single
221: test:
222: suffix: chebyest_1
223: args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_monitor_short
225: test:
226: suffix: chebyest_2
227: args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_esteig_ksp_type cg -ksp_monitor_short
229: test:
230: args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always
232: test:
233: suffix: 2
234: nsize: 2
235: args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always
237: test:
238: suffix: 3
239: args: -pc_type sor -pc_sor_symmetric -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
241: test:
242: suffix: 4
243: args: -pc_type eisenstat -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
245: test:
246: suffix: 5
247: nsize: 2
248: args: -ksp_monitor_short -m 5 -n 5 -mat_view draw -ksp_gmres_cgs_refinement_type refine_always -nox
249: output_file: output/ex2_2.out
251: test:
252: suffix: bjacobi
253: nsize: 4
254: args: -pc_type bjacobi -pc_bjacobi_blocks 1 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres
256: test:
257: suffix: bjacobi_2
258: nsize: 4
259: args: -pc_type bjacobi -pc_bjacobi_blocks 2 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres -ksp_view
261: test:
262: suffix: bjacobi_3
263: nsize: 4
264: args: -pc_type bjacobi -pc_bjacobi_blocks 4 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres
266: test:
267: suffix: qmrcgs
268: args: -ksp_type qmrcgs -pc_type ilu
269: output_file: output/ex2_fbcgs.out
271: test:
272: suffix: qmrcgs_2
273: nsize: 3
274: args: -ksp_type qmrcgs -pc_type bjacobi
275: output_file: output/ex2_fbcgs_2.out
277: test:
278: suffix: fbcgs
279: args: -ksp_type fbcgs -pc_type ilu
281: test:
282: suffix: fbcgs_2
283: nsize: 3
284: args: -ksp_type fbcgsr -pc_type bjacobi
286: test:
287: suffix: groppcg
288: args: -ksp_monitor_short -ksp_type groppcg -m 9 -n 9
290: test:
291: suffix: mkl_pardiso_cholesky
292: requires: mkl_pardiso
293: args: -ksp_type preonly -pc_type cholesky -mat_type sbaij -pc_factor_mat_solver_type mkl_pardiso
295: test:
296: suffix: mkl_pardiso_lu
297: requires: mkl_pardiso
298: args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mkl_pardiso
300: test:
301: suffix: pipebcgs
302: args: -ksp_monitor_short -ksp_type pipebcgs -m 9 -n 9
304: test:
305: suffix: pipecg
306: args: -ksp_monitor_short -ksp_type pipecg -m 9 -n 9
308: test:
309: suffix: pipecgrr
310: args: -ksp_monitor_short -ksp_type pipecgrr -m 9 -n 9
312: test:
313: suffix: pipecr
314: args: -ksp_monitor_short -ksp_type pipecr -m 9 -n 9
316: test:
317: suffix: pipelcg
318: args: -ksp_monitor_short -ksp_type pipelcg -m 9 -n 9 -pc_type none -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmax 2
319: filter: grep -v "sqrt breakdown in iteration"
321: test:
322: suffix: sell
323: args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -m 9 -n 9 -mat_type sell
325: test:
326: requires: mumps
327: suffix: sell_mumps
328: args: -ksp_type preonly -m 9 -n 12 -mat_type sell -pc_type lu -pc_factor_mat_solver_type mumps -pc_factor_mat_ordering_type natural
330: test:
331: suffix: telescope
332: nsize: 4
333: args: -m 100 -n 100 -ksp_converged_reason -pc_type telescope -pc_telescope_reduction_factor 4 -telescope_pc_type bjacobi
335: test:
336: suffix: umfpack
337: requires: suitesparse
338: args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type umfpack
340: test:
341: suffix: spqr
342: requires: suitesparse
343: args: -ksp_type preonly -pc_type qr -pc_factor_mat_solver_type spqr
345: test:
346: suffix: pc_symmetric
347: args: -m 10 -n 9 -ksp_converged_reason -ksp_type gmres -ksp_pc_side symmetric -pc_type cholesky
349: test:
350: suffix: pipeprcg
351: args: -ksp_monitor_short -ksp_type pipeprcg -m 9 -n 9
353: test:
354: suffix: pipeprcg_rcw
355: args: -ksp_monitor_short -ksp_type pipeprcg -recompute_w false -m 9 -n 9
357: test:
358: suffix: pipecg2
359: args: -ksp_monitor_short -ksp_type pipecg2 -m 9 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}
361: test:
362: suffix: pipecg2_2
363: nsize: 4
364: args: -ksp_monitor_short -ksp_type pipecg2 -m 15 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}
366: test:
367: suffix: hpddm
368: nsize: 4
369: requires: hpddm !__float128 !__fp16
370: filter: sed -e "s/ iterations 9/ iterations 8/g"
371: args: -ksp_converged_reason -ksp_type hpddm -ksp_hpddm_precision {{single double}shared output} -ksp_pc_side {{left right}shared output}
373: test:
374: suffix: hpddm___float128
375: output_file: output/ex2_hpddm.out
376: nsize: 4
377: requires: hpddm __float128
378: filter: sed -e "s/ iterations 9/ iterations 8/g"
379: args: -ksp_converged_reason -ksp_type hpddm -ksp_hpddm_precision {{double quadruple}shared output} -ksp_pc_side {{left right}shared output}
381: test:
382: suffix: symmetric_pc
383: nsize: 1
384: args: -ksp_monitor -ksp_type gmres -pc_type bjacobi -sub_pc_type icc -ksp_pc_side symmetric
386: test:
387: suffix: symmetric_pc2
388: nsize: 1
389: args: -ksp_monitor -ksp_type gmres -pc_type bjacobi -sub_pc_type icc -ksp_pc_side symmetric -pc_bjacobi_blocks 2
391: TEST*/