Actual source code: ex7.c
1: static char help[] = "Block Jacobi preconditioner for solving a linear system in parallel with KSP.\n\
2: The code indicates the\n\
3: procedures for setting the particular block sizes and for using different\n\
4: linear solvers on the individual blocks.\n\n";
6: /*
7: Note: This example focuses on ways to customize the block Jacobi
8: preconditioner. See ex1.c and ex2.c for more detailed comments on
9: the basic usage of KSP (including working with matrices and vectors).
11: Recall: The block Jacobi method is equivalent to the ASM preconditioner
12: with zero overlap.
13: */
15: /*
16: Include "petscksp.h" so that we can use KSP solvers. Note that this file
17: automatically includes:
18: petscsys.h - base PETSc routines petscvec.h - vectors
19: petscmat.h - matrices
20: petscis.h - index sets petscksp.h - Krylov subspace methods
21: petscviewer.h - viewers petscpc.h - preconditioners
22: */
23: #include <petscksp.h>
25: int main(int argc, char **args)
26: {
27: Vec x, b, u; /* approx solution, RHS, exact solution */
28: Mat A; /* linear system matrix */
29: KSP ksp; /* KSP context */
30: KSP *subksp; /* array of local KSP contexts on this processor */
31: PC pc; /* PC context */
32: PC subpc; /* PC context for subdomain */
33: PetscReal norm; /* norm of solution error */
34: PetscInt i, j, Ii, J, *blks, m = 4, n;
35: PetscMPIInt rank, size;
36: PetscInt its, nlocal, first, Istart, Iend;
37: PetscScalar v, one = 1.0, none = -1.0;
38: PetscBool isbjacobi;
40: PetscFunctionBeginUser;
41: PetscCall(PetscInitialize(&argc, &args, NULL, help));
42: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
43: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
44: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
45: n = m + 2;
47: /* -------------------------------------------------------------------
48: Compute the matrix and right-hand-side vector that define
49: the linear system, Ax = b.
50: ------------------------------------------------------------------- */
52: /*
53: Create and assemble parallel matrix
54: */
55: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
56: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
57: PetscCall(MatSetFromOptions(A));
58: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
59: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
60: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
61: for (Ii = Istart; Ii < Iend; Ii++) {
62: v = -1.0;
63: i = Ii / n;
64: j = Ii - i * n;
65: if (i > 0) {
66: J = Ii - n;
67: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
68: }
69: if (i < m - 1) {
70: J = Ii + n;
71: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
72: }
73: if (j > 0) {
74: J = Ii - 1;
75: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
76: }
77: if (j < n - 1) {
78: J = Ii + 1;
79: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
80: }
81: v = 4.0;
82: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
83: }
84: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
85: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
86: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
88: /*
89: Create parallel vectors
90: */
91: PetscCall(MatCreateVecs(A, &u, &b));
92: PetscCall(VecDuplicate(u, &x));
94: /*
95: Set exact solution; then compute right-hand-side vector.
96: */
97: PetscCall(VecSet(u, one));
98: PetscCall(MatMult(A, u, b));
100: /*
101: Create linear solver context
102: */
103: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
105: /*
106: Set operators. Here the matrix that defines the linear system
107: also serves as the preconditioning matrix.
108: */
109: PetscCall(KSPSetOperators(ksp, A, A));
111: /*
112: Set default preconditioner for this program to be block Jacobi.
113: This choice can be overridden at runtime with the option
114: -pc_type <type>
116: IMPORTANT NOTE: Since the inners solves below are constructed to use
117: iterative methods (such as KSPGMRES) the outer Krylov method should
118: be set to use KSPFGMRES since it is the only Krylov method (plus KSPFCG)
119: that allows the preconditioners to be nonlinear (that is have iterative methods
120: inside them). The reason these examples work is because the number of
121: iterations on the inner solves is left at the default (which is 10,000)
122: and the tolerance on the inner solves is set to be a tight value of around 10^-6.
123: */
124: PetscCall(KSPGetPC(ksp, &pc));
125: PetscCall(PCSetType(pc, PCBJACOBI));
127: /* -------------------------------------------------------------------
128: Define the problem decomposition
129: ------------------------------------------------------------------- */
131: /*
132: Call PCBJacobiSetTotalBlocks() to set individually the size of
133: each block in the preconditioner. This could also be done with
134: the runtime option
135: -pc_bjacobi_blocks <blocks>
136: Also, see the command PCBJacobiSetLocalBlocks() to set the
137: local blocks.
139: Note: The default decomposition is 1 block per processor.
140: */
141: PetscCall(PetscMalloc1(m, &blks));
142: for (i = 0; i < m; i++) blks[i] = n;
143: PetscCall(PCBJacobiSetTotalBlocks(pc, m, blks));
144: PetscCall(PetscFree(blks));
146: /*
147: Set runtime options
148: */
149: PetscCall(KSPSetFromOptions(ksp));
151: /* -------------------------------------------------------------------
152: Set the linear solvers for the subblocks
153: ------------------------------------------------------------------- */
155: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156: Basic method, should be sufficient for the needs of most users.
157: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159: By default, the block Jacobi method uses the same solver on each
160: block of the problem. To set the same solver options on all blocks,
161: use the prefix -sub before the usual PC and KSP options, e.g.,
162: -sub_pc_type <pc> -sub_ksp_type <ksp> -sub_ksp_rtol 1.e-4
163: */
165: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
166: Advanced method, setting different solvers for various blocks.
167: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
169: Note that each block's KSP context is completely independent of
170: the others, and the full range of uniprocessor KSP options is
171: available for each block. The following section of code is intended
172: to be a simple illustration of setting different linear solvers for
173: the individual blocks. These choices are obviously not recommended
174: for solving this particular problem.
175: */
176: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCBJACOBI, &isbjacobi));
177: if (isbjacobi) {
178: /*
179: Call KSPSetUp() to set the block Jacobi data structures (including
180: creation of an internal KSP context for each block).
182: Note: KSPSetUp() MUST be called before PCBJacobiGetSubKSP().
183: */
184: PetscCall(KSPSetUp(ksp));
186: /*
187: Extract the array of KSP contexts for the local blocks
188: */
189: PetscCall(PCBJacobiGetSubKSP(pc, &nlocal, &first, &subksp));
191: /*
192: Loop over the local blocks, setting various KSP options
193: for each block.
194: */
195: for (i = 0; i < nlocal; i++) {
196: PetscCall(KSPGetPC(subksp[i], &subpc));
197: if (rank == 0) {
198: if (i % 2) {
199: PetscCall(PCSetType(subpc, PCILU));
200: } else {
201: PetscCall(PCSetType(subpc, PCNONE));
202: PetscCall(KSPSetType(subksp[i], KSPBCGS));
203: PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT));
204: }
205: } else {
206: PetscCall(PCSetType(subpc, PCJACOBI));
207: PetscCall(KSPSetType(subksp[i], KSPGMRES));
208: PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT));
209: }
210: }
211: }
213: /* -------------------------------------------------------------------
214: Solve the linear system
215: ------------------------------------------------------------------- */
217: /*
218: Solve the linear system
219: */
220: PetscCall(KSPSolve(ksp, b, x));
222: /* -------------------------------------------------------------------
223: Check solution and clean up
224: ------------------------------------------------------------------- */
226: /*
227: Check the error
228: */
229: PetscCall(VecAXPY(x, none, u));
230: PetscCall(VecNorm(x, NORM_2, &norm));
231: PetscCall(KSPGetIterationNumber(ksp, &its));
232: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));
234: /*
235: Free work space. All PETSc objects should be destroyed when they
236: are no longer needed.
237: */
238: PetscCall(KSPDestroy(&ksp));
239: PetscCall(VecDestroy(&u));
240: PetscCall(VecDestroy(&x));
241: PetscCall(VecDestroy(&b));
242: PetscCall(MatDestroy(&A));
243: PetscCall(PetscFinalize());
244: return 0;
245: }
247: /*TEST
249: test:
250: suffix: 1
251: nsize: 2
252: args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
254: test:
255: suffix: 2
256: nsize: 2
257: args: -ksp_view ::ascii_info_detail
259: test:
260: suffix: viennacl
261: requires: viennacl
262: args: -ksp_monitor_short -mat_type aijviennacl
263: output_file: output/ex7_mpiaijcusparse.out
265: test:
266: suffix: viennacl_2
267: nsize: 2
268: requires: viennacl
269: args: -ksp_monitor_short -mat_type aijviennacl
270: output_file: output/ex7_mpiaijcusparse_2.out
272: test:
273: suffix: mpiaijcusparse
274: requires: cuda
275: args: -ksp_monitor_short -mat_type aijcusparse
277: test:
278: suffix: mpiaijcusparse_2
279: nsize: 2
280: requires: cuda
281: args: -ksp_monitor_short -mat_type aijcusparse
283: test:
284: suffix: mpiaijcusparse_simple
285: requires: cuda
286: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu
288: test:
289: suffix: mpiaijcusparse_simple_2
290: nsize: 2
291: requires: cuda
292: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu
294: test:
295: suffix: mpiaijcusparse_3
296: requires: cuda
297: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
299: test:
300: suffix: mpiaijcusparse_4
301: nsize: 2
302: requires: cuda
303: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
305: testset:
306: args: -ksp_monitor_short -pc_type gamg -ksp_view -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10
307: test:
308: suffix: gamg_cuda
309: nsize: {{1 2}separate output}
310: requires: cuda
311: args: -mat_type aijcusparse
312: args: -pc_gamg_threshold -1
313: test:
314: suffix: gamg_kokkos
315: nsize: {{1 2}separate output}
316: requires: kokkos_kernels
317: args: -mat_type aijkokkos
319: TEST*/