Actual source code: ex7.c
  1: static char help[] = "Block Jacobi preconditioner for solving a linear system in parallel with KSP.\n\
  2: The code indicates the\n\
  3: procedures for setting the particular block sizes and for using different\n\
  4: linear solvers on the individual blocks.\n\n";

  6: /*
  7:    Note:  This example focuses on ways to customize the block Jacobi
  8:    preconditioner. See ex1.c and ex2.c for more detailed comments on
  9:    the basic usage of KSP (including working with matrices and vectors).

 11:    Recall: The block Jacobi method is equivalent to the ASM preconditioner
 12:    with zero overlap.
 13:  */

 15: /*
 16:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 17:   automatically includes:
 18:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 19:      petscmat.h - matrices
 20:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 21:      petscviewer.h - viewers               petscpc.h  - preconditioners
 22: */
 23: #include <petscksp.h>

 25: int main(int argc, char **args)
 26: {
 27:   Vec         x, b, u; /* approx solution, RHS, exact solution */
 28:   Mat         A;       /* linear system matrix */
 29:   KSP         ksp;     /* KSP context */
 30:   KSP        *subksp;  /* array of local KSP contexts on this processor */
 31:   PC          pc;      /* PC context */
 32:   PC          subpc;   /* PC context for subdomain */
 33:   PetscReal   norm;    /* norm of solution error */
 34:   PetscInt    i, j, Ii, J, *blks, m = 4, n;
 35:   PetscMPIInt rank, size;
 36:   PetscInt    its, nlocal, first, Istart, Iend;
 37:   PetscScalar v, one = 1.0, none = -1.0;
 38:   PetscBool   isbjacobi;

 40:   PetscFunctionBeginUser;
 41:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 42:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 43:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 44:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 45:   n = m + 2;

 47:   /* -------------------------------------------------------------------
 48:          Compute the matrix and right-hand-side vector that define
 49:          the linear system, Ax = b.
 50:      ------------------------------------------------------------------- */

 52:   /*
 53:      Create and assemble parallel matrix
 54:   */
 55:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 56:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
 57:   PetscCall(MatSetFromOptions(A));
 58:   PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL));
 59:   PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 60:   PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 61:   for (Ii = Istart; Ii < Iend; Ii++) {
 62:     v = -1.0;
 63:     i = Ii / n;
 64:     j = Ii - i * n;
 65:     if (i > 0) {
 66:       J = Ii - n;
 67:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 68:     }
 69:     if (i < m - 1) {
 70:       J = Ii + n;
 71:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 72:     }
 73:     if (j > 0) {
 74:       J = Ii - 1;
 75:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 76:     }
 77:     if (j < n - 1) {
 78:       J = Ii + 1;
 79:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 80:     }
 81:     v = 4.0;
 82:     PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
 83:   }
 84:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 85:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 86:   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));

 88:   /*
 89:      Create parallel vectors
 90:   */
 91:   PetscCall(MatCreateVecs(A, &u, &b));
 92:   PetscCall(VecDuplicate(u, &x));

 94:   /*
 95:      Set exact solution; then compute right-hand-side vector.
 96:   */
 97:   PetscCall(VecSet(u, one));
 98:   PetscCall(MatMult(A, u, b));

100:   /*
101:      Create linear solver context
102:   */
103:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));

105:   /*
106:      Set operators. Here the matrix that defines the linear system
107:      also serves as the preconditioning matrix.
108:   */
109:   PetscCall(KSPSetOperators(ksp, A, A));

111:   /*
112:      Set default preconditioner for this program to be block Jacobi.
113:      This choice can be overridden at runtime with the option
114:         -pc_type <type>

116:      IMPORTANT NOTE: Since the inners solves below are constructed to use
117:      iterative methods (such as KSPGMRES) the outer Krylov method should
118:      be set to use KSPFGMRES since it is the only Krylov method (plus KSPFCG)
119:      that allows the preconditioners to be nonlinear (that is have iterative methods
120:      inside them). The reason these examples work is because the number of
121:      iterations on the inner solves is left at the default (which is 10,000)
122:      and the tolerance on the inner solves is set to be a tight value of around 10^-6.
123:   */
124:   PetscCall(KSPGetPC(ksp, &pc));
125:   PetscCall(PCSetType(pc, PCBJACOBI));

127:   /* -------------------------------------------------------------------
128:                    Define the problem decomposition
129:      ------------------------------------------------------------------- */

131:   /*
132:      Call PCBJacobiSetTotalBlocks() to set individually the size of
133:      each block in the preconditioner.  This could also be done with
134:      the runtime option
135:          -pc_bjacobi_blocks <blocks>
136:      Also, see the command PCBJacobiSetLocalBlocks() to set the
137:      local blocks.

139:       Note: The default decomposition is 1 block per processor.
140:   */
141:   PetscCall(PetscMalloc1(m, &blks));
142:   for (i = 0; i < m; i++) blks[i] = n;
143:   PetscCall(PCBJacobiSetTotalBlocks(pc, m, blks));
144:   PetscCall(PetscFree(blks));

146:   /*
147:     Set runtime options
148:   */
149:   PetscCall(KSPSetFromOptions(ksp));

151:   /* -------------------------------------------------------------------
152:                Set the linear solvers for the subblocks
153:      ------------------------------------------------------------------- */

155:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156:        Basic method, should be sufficient for the needs of most users.
157:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

159:      By default, the block Jacobi method uses the same solver on each
160:      block of the problem.  To set the same solver options on all blocks,
161:      use the prefix -sub before the usual PC and KSP options, e.g.,
162:           -sub_pc_type <pc> -sub_ksp_type <ksp> -sub_ksp_rtol 1.e-4
163:   */

165:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
166:         Advanced method, setting different solvers for various blocks.
167:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

169:      Note that each block's KSP context is completely independent of
170:      the others, and the full range of uniprocessor KSP options is
171:      available for each block. The following section of code is intended
172:      to be a simple illustration of setting different linear solvers for
173:      the individual blocks.  These choices are obviously not recommended
174:      for solving this particular problem.
175:   */
176:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCBJACOBI, &isbjacobi));
177:   if (isbjacobi) {
178:     /*
179:        Call KSPSetUp() to set the block Jacobi data structures (including
180:        creation of an internal KSP context for each block).

182:        Note: KSPSetUp() MUST be called before PCBJacobiGetSubKSP().
183:     */
184:     PetscCall(KSPSetUp(ksp));

186:     /*
187:        Extract the array of KSP contexts for the local blocks
188:     */
189:     PetscCall(PCBJacobiGetSubKSP(pc, &nlocal, &first, &subksp));

191:     /*
192:        Loop over the local blocks, setting various KSP options
193:        for each block.
194:     */
195:     for (i = 0; i < nlocal; i++) {
196:       PetscCall(KSPGetPC(subksp[i], &subpc));
197:       if (rank == 0) {
198:         if (i % 2) {
199:           PetscCall(PCSetType(subpc, PCILU));
200:         } else {
201:           PetscCall(PCSetType(subpc, PCNONE));
202:           PetscCall(KSPSetType(subksp[i], KSPBCGS));
203:           PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
204:         }
205:       } else {
206:         PetscCall(PCSetType(subpc, PCJACOBI));
207:         PetscCall(KSPSetType(subksp[i], KSPGMRES));
208:         PetscCall(KSPSetTolerances(subksp[i], 1.e-6, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT));
209:       }
210:     }
211:   }

213:   /* -------------------------------------------------------------------
214:                       Solve the linear system
215:      ------------------------------------------------------------------- */

217:   /*
218:      Solve the linear system
219:   */
220:   PetscCall(KSPSolve(ksp, b, x));

222:   /* -------------------------------------------------------------------
223:                       Check solution and clean up
224:      ------------------------------------------------------------------- */

226:   /*
227:      Check the error
228:   */
229:   PetscCall(VecAXPY(x, none, u));
230:   PetscCall(VecNorm(x, NORM_2, &norm));
231:   PetscCall(KSPGetIterationNumber(ksp, &its));
232:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its));

234:   /*
235:      Free work space.  All PETSc objects should be destroyed when they
236:      are no longer needed.
237:   */
238:   PetscCall(KSPDestroy(&ksp));
239:   PetscCall(VecDestroy(&u));
240:   PetscCall(VecDestroy(&x));
241:   PetscCall(VecDestroy(&b));
242:   PetscCall(MatDestroy(&A));
243:   PetscCall(PetscFinalize());
244:   return 0;
245: }

247: /*TEST

249:    test:
250:       suffix: 1
251:       nsize: 2
252:       args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

254:    test:
255:       suffix: 2
256:       nsize: 2
257:       args: -ksp_view ::ascii_info_detail

259:    test:
260:       suffix: viennacl
261:       requires: viennacl
262:       args: -ksp_monitor_short -mat_type aijviennacl
263:       output_file: output/ex7_mpiaijcusparse.out

265:    test:
266:       suffix: viennacl_2
267:       nsize: 2
268:       requires: viennacl
269:       args: -ksp_monitor_short -mat_type aijviennacl
270:       output_file: output/ex7_mpiaijcusparse_2.out

272:    test:
273:       suffix: mpiaijcusparse
274:       requires: cuda
275:       args: -ksp_monitor_short -mat_type aijcusparse

277:    test:
278:       suffix: mpiaijcusparse_2
279:       nsize: 2
280:       requires: cuda
281:       args: -ksp_monitor_short -mat_type aijcusparse

283:    test:
284:       suffix: mpiaijcusparse_simple
285:       requires: cuda
286:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu

288:    test:
289:       suffix: mpiaijcusparse_simple_2
290:       nsize: 2
291:       requires: cuda
292:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu

294:    test:
295:       suffix: mpiaijcusparse_3
296:       requires: cuda
297:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

299:    test:
300:       suffix: mpiaijcusparse_4
301:       nsize: 2
302:       requires: cuda
303:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

305:    testset:
306:       args: -ksp_monitor_short -pc_type gamg -ksp_view -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10
307:       test:
308:         suffix: gamg_cuda
309:         nsize: {{1 2}separate output}
310:         requires: cuda
311:         args: -mat_type aijcusparse
312:         args: -pc_gamg_threshold -1
313:       test:
314:         suffix: gamg_kokkos
315:         nsize: {{1 2}separate output}
316:         requires: kokkos_kernels
317:         args: -mat_type aijkokkos

319: TEST*/