Actual source code: ex7.c
1: static char help[] = "Block Jacobi preconditioner for solving a linear system in parallel with KSP.\n\
2: The code indicates the\n\
3: procedures for setting the particular block sizes and for using different\n\
4: linear solvers on the individual blocks.\n\n";
6: /*
7: Note: This example focuses on ways to customize the block Jacobi
8: preconditioner. See ex1.c and ex2.c for more detailed comments on
9: the basic usage of KSP (including working with matrices and vectors).
11: Recall: The block Jacobi method is equivalent to the ASM preconditioner
12: with zero overlap.
13: */
15: /*
16: Include "petscksp.h" so that we can use KSP solvers. Note that this file
17: automatically includes:
18: petscsys.h - base PETSc routines petscvec.h - vectors
19: petscmat.h - matrices
20: petscis.h - index sets petscksp.h - Krylov subspace methods
21: petscviewer.h - viewers petscpc.h - preconditioners
22: */
23: #include <petscksp.h>
25: int main(int argc, char **args)
26: {
27: Vec x, b, u; /* approx solution, RHS, exact solution */
28: Mat A; /* linear system matrix */
29: KSP ksp; /* KSP context */
30: KSP *subksp; /* array of local KSP contexts on this processor */
31: PC pc; /* PC context */
32: PC subpc; /* PC context for subdomain */
33: PetscReal norm; /* norm of solution error */
34: PetscInt i, j, Ii, J, *blks, m = 4, n;
35: PetscMPIInt rank, size;
36: PetscInt its, nlocal, first, Istart, Iend;
37: PetscScalar v, one = 1.0, none = -1.0;
38: PetscBool isbjacobi;
40: PetscFunctionBeginUser;
41: PetscCall(PetscInitialize(&argc, &args, NULL, help));
42: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
43: PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
44: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
45: n = m + 2;
47: /* -------------------------------------------------------------------
48: Compute the matrix and right-hand-side vector that define
49: the linear system, Ax = b.
50: ------------------------------------------------------------------- */
52: /*
53: Create and assemble parallel matrix
54: */
55: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
56: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
57: PetscCall(MatSetFromOptions(A));
58: PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
59: PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
60: PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
61: for (Ii = Istart; Ii < Iend; Ii++) {
62: v = -1.0;
63: i = Ii / n;
64: j = Ii - i * n;
65: if (i > 0) {
66: J = Ii - n;
67: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
68: }
69: if (i < m - 1) {
70: J = Ii + n;
71: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
72: }
73: if (j > 0) {
74: J = Ii - 1;
75: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
76: }
77: if (j < n - 1) {
78: J = Ii + 1;
79: PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
80: }
81: v = 4.0;
82: PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
83: }
84: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
85: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
86: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
88: /*
89: Create parallel vectors
90: */
91: PetscCall(MatCreateVecs(A, &u, &b));
92: PetscCall(VecDuplicate(u, &x));
94: /*
95: Set exact solution; then compute right-hand-side vector.
96: */
97: PetscCall(VecSet(u, one));
98: PetscCall(MatMult(A, u, b));
100: /*
101: Create linear solver context
102: */
103: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
105: /*
106: Set operators. Here the matrix that defines the linear system
107: also serves as the matrix from which the preconditioner is constructed.
108: */
109: PetscCall(KSPSetOperators(ksp, A, A));
111: /*
112: Set default preconditioner for this program to be block Jacobi.
113: This choice can be overridden at runtime with the option
114: -pc_type <type>
116: IMPORTANT NOTE: Since the inners solves below are constructed to use
117: iterative methods (such as KSPGMRES) the outer Krylov method should
118: be set to use KSPFGMRES since it is the only Krylov method (plus KSPFCG)
119: that allows the preconditioners to be nonlinear (that is have iterative methods
120: inside them). The reason these examples work is because the number of
121: iterations on the inner solves is left at the default (which is 10,000)
122: and the tolerance on the inner solves is set to be a tight value of around 10^-6.
123: */
124: PetscCall(KSPGetPC(ksp, &pc));
125: PetscCall(PCSetType(pc, PCBJACOBI));
127: /* -------------------------------------------------------------------
128: Define the problem decomposition
129: ------------------------------------------------------------------- */
131: /*
132: Call PCBJacobiSetTotalBlocks() to set individually the size of
133: each block in the preconditioner. This could also be done with
134: the runtime option
135: -pc_bjacobi_blocks <blocks>
136: Also, see the command PCBJacobiSetLocalBlocks() to set the
137: local blocks.
139: Note: The default decomposition is 1 block per processor.
140: */
141: PetscCall(PetscMalloc1(m, &blks));
142: for (i = 0; i < m; i++) blks[i] = n;
143: PetscCall(PCBJacobiSetTotalBlocks(pc, m, blks));
144: PetscCall(PetscFree(blks));
146: /*
147: Set runtime options
148: */
149: PetscCall(KSPSetFromOptions(ksp));
151: /* -------------------------------------------------------------------
152: Set the linear solvers for the subblocks
153: ------------------------------------------------------------------- */
155: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156: Basic method, should be sufficient for the needs of most users.
157: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159: By default, the block Jacobi method uses the same solver on each
160: block of the problem. To set the same solver options on all blocks,
161: use the prefix -sub before the usual PC and KSP options, e.g.,
162: -sub_pc_type <pc> -sub_ksp_type <ksp> -sub_ksp_rtol 1.e-4
163: */
165: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
166: Advanced method, setting different solvers for various blocks.
167: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
169: Note that each block's KSP context is completely independent of
170: the others, and the full range of uniprocessor KSP options is
171: available for each block. The following section of code is intended
172: to be a simple illustration of setting different linear solvers for
173: the individual blocks. These choices are obviously not recommended
174: for solving this particular problem.
175: */
176: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCBJACOBI, &isbjacobi));
177: if (isbjacobi) {
178: /*
179: Call KSPSetUp() to set the block Jacobi data structures (including
180: creation of an internal KSP context for each block).
182: Note: KSPSetUp() MUST be called before PCBJacobiGetSubKSP().
183: */
184: PetscCall(KSPSetUp(ksp));
186: /*
187: Extract the array of KSP contexts for the local blocks
188: */
189: PetscCall(PCBJacobiGetSubKSP(pc, &nlocal, &first, &subksp));
191: /*
192: Loop over the local blocks, setting various KSP options
193: for each block.
194: */
195: for (i = 0; i < nlocal; i++) {
196: PetscCall(KSPGetPC(subksp[i], &subpc));
197: if (rank == 0) {
198: if (i % 2) PetscCall(PCSetType(subpc, PCILU));
199: else {
200: PetscCall(PCSetType(subpc, PCNONE));
201: PetscCall(KSPSetType(subksp[i], KSPBCGS));
202: PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT));
203: }
204: } else {
205: PetscCall(PCSetType(subpc, PCJACOBI));
206: PetscCall(KSPSetType(subksp[i], KSPGMRES));
207: PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT));
208: }
209: }
210: }
212: /* -------------------------------------------------------------------
213: Solve the linear system
214: ------------------------------------------------------------------- */
216: /*
217: Solve the linear system
218: */
219: PetscCall(KSPSolve(ksp, b, x));
221: /* -------------------------------------------------------------------
222: Check solution and clean up
223: ------------------------------------------------------------------- */
225: /*
226: Check the error
227: */
228: PetscCall(VecAXPY(x, none, u));
229: PetscCall(VecNorm(x, NORM_2, &norm));
230: PetscCall(KSPGetIterationNumber(ksp, &its));
231: PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));
233: /*
234: Free work space. All PETSc objects should be destroyed when they
235: are no longer needed.
236: */
237: PetscCall(KSPDestroy(&ksp));
238: PetscCall(VecDestroy(&u));
239: PetscCall(VecDestroy(&x));
240: PetscCall(VecDestroy(&b));
241: PetscCall(MatDestroy(&A));
242: PetscCall(PetscFinalize());
243: return 0;
244: }
246: /*TEST
248: test:
249: suffix: 1
250: nsize: 2
251: args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
253: test:
254: suffix: 2
255: nsize: 2
256: args: -ksp_view ::ascii_info_detail
258: test:
259: suffix: viennacl
260: requires: viennacl
261: args: -ksp_monitor_short -mat_type aijviennacl
262: output_file: output/ex7_mpiaijcusparse.out
264: test:
265: suffix: viennacl_2
266: nsize: 2
267: requires: viennacl
268: args: -ksp_monitor_short -mat_type aijviennacl
269: output_file: output/ex7_mpiaijcusparse_2.out
271: test:
272: suffix: mpiaijcusparse
273: requires: cuda
274: args: -ksp_monitor_short -mat_type aijcusparse
276: test:
277: suffix: mpiaijcusparse_2
278: nsize: 2
279: requires: cuda
280: args: -ksp_monitor_short -mat_type aijcusparse
282: test:
283: suffix: mpiaijcusparse_simple
284: requires: cuda
285: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu
287: test:
288: suffix: mpiaijcusparse_simple_2
289: nsize: 2
290: requires: cuda
291: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu
293: test:
294: suffix: mpiaijcusparse_3
295: requires: cuda
296: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
298: test:
299: suffix: mpiaijcusparse_4
300: nsize: 2
301: requires: cuda
302: args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse
304: testset:
305: args: -ksp_monitor_short -pc_type gamg -ksp_view -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10
306: test:
307: suffix: gamg_cuda
308: nsize: {{1 2}separate output}
309: requires: cuda
310: args: -mat_type aijcusparse
311: args: -pc_gamg_threshold -1
312: test:
313: suffix: gamg_kokkos
314: nsize: {{1 2}separate output}
315: requires: kokkos_kernels
316: args: -mat_type aijkokkos
318: TEST*/