Actual source code: ex7.c

```  1: static char help[] = "Block Jacobi preconditioner for solving a linear system in parallel with KSP.\n\
2: The code indicates the\n\
3: procedures for setting the particular block sizes and for using different\n\
4: linear solvers on the individual blocks.\n\n";

6: /*
7:    Note:  This example focuses on ways to customize the block Jacobi
8:    preconditioner. See ex1.c and ex2.c for more detailed comments on
9:    the basic usage of KSP (including working with matrices and vectors).

11:    Recall: The block Jacobi method is equivalent to the ASM preconditioner
12:    with zero overlap.
13:  */

15: /*T
16:    Concepts: KSP^customizing the block Jacobi preconditioner
17:    Processors: n
18: T*/

20: /*
21:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
22:   automatically includes:
23:      petscsys.h       - base PETSc routines   petscvec.h - vectors
24:      petscmat.h - matrices
25:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
26:      petscviewer.h - viewers               petscpc.h  - preconditioners
27: */
28: #include <petscksp.h>

30: int main(int argc,char **args)
31: {
32:   Vec            x,b,u;      /* approx solution, RHS, exact solution */
33:   Mat            A;            /* linear system matrix */
34:   KSP            ksp;         /* KSP context */
35:   KSP            *subksp;     /* array of local KSP contexts on this processor */
36:   PC             pc;           /* PC context */
37:   PC             subpc;        /* PC context for subdomain */
38:   PetscReal      norm;         /* norm of solution error */
40:   PetscInt       i,j,Ii,J,*blks,m = 4,n;
41:   PetscMPIInt    rank,size;
42:   PetscInt       its,nlocal,first,Istart,Iend;
43:   PetscScalar    v,one = 1.0,none = -1.0;
44:   PetscBool      isbjacobi;

46:   PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
47:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
48:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
49:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
50:   n    = m+2;

52:   /* -------------------------------------------------------------------
53:          Compute the matrix and right-hand-side vector that define
54:          the linear system, Ax = b.
55:      ------------------------------------------------------------------- */

57:   /*
58:      Create and assemble parallel matrix
59:   */
60:   MatCreate(PETSC_COMM_WORLD,&A);
61:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
62:   MatSetFromOptions(A);
63:   MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
64:   MatSeqAIJSetPreallocation(A,5,NULL);
65:   MatGetOwnershipRange(A,&Istart,&Iend);
66:   for (Ii=Istart; Ii<Iend; Ii++) {
67:     v = -1.0; i = Ii/n; j = Ii - i*n;
68:     if (i>0)   {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
69:     if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
70:     if (j>0)   {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
71:     if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
73:   }
74:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
75:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
76:   MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);

78:   /*
79:      Create parallel vectors
80:   */
81:   MatCreateVecs(A,&u,&b);
82:   VecDuplicate(u,&x);

84:   /*
85:      Set exact solution; then compute right-hand-side vector.
86:   */
87:   VecSet(u,one);
88:   MatMult(A,u,b);

90:   /*
91:      Create linear solver context
92:   */
93:   KSPCreate(PETSC_COMM_WORLD,&ksp);

95:   /*
96:      Set operators. Here the matrix that defines the linear system
97:      also serves as the preconditioning matrix.
98:   */
99:   KSPSetOperators(ksp,A,A);

101:   /*
102:      Set default preconditioner for this program to be block Jacobi.
103:      This choice can be overridden at runtime with the option
104:         -pc_type <type>

106:      IMPORTANT NOTE: Since the inners solves below are constructed to use
107:      iterative methods (such as KSPGMRES) the outer Krylov method should
108:      be set to use KSPFGMRES since it is the only Krylov method (plus KSPFCG)
109:      that allows the preconditioners to be nonlinear (that is have iterative methods
110:      inside them). The reason these examples work is because the number of
111:      iterations on the inner solves is left at the default (which is 10,000)
112:      and the tolerance on the inner solves is set to be a tight value of around 10^-6.
113:   */
114:   KSPGetPC(ksp,&pc);
115:   PCSetType(pc,PCBJACOBI);

117:   /* -------------------------------------------------------------------
118:                    Define the problem decomposition
119:      ------------------------------------------------------------------- */

121:   /*
122:      Call PCBJacobiSetTotalBlocks() to set individually the size of
123:      each block in the preconditioner.  This could also be done with
124:      the runtime option
125:          -pc_bjacobi_blocks <blocks>
126:      Also, see the command PCBJacobiSetLocalBlocks() to set the
127:      local blocks.

129:       Note: The default decomposition is 1 block per processor.
130:   */
131:   PetscMalloc1(m,&blks);
132:   for (i=0; i<m; i++) blks[i] = n;
133:   PCBJacobiSetTotalBlocks(pc,m,blks);
134:   PetscFree(blks);

136:   /*
137:     Set runtime options
138:   */
139:   KSPSetFromOptions(ksp);

141:   /* -------------------------------------------------------------------
142:                Set the linear solvers for the subblocks
143:      ------------------------------------------------------------------- */

145:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
146:        Basic method, should be sufficient for the needs of most users.
147:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

149:      By default, the block Jacobi method uses the same solver on each
150:      block of the problem.  To set the same solver options on all blocks,
151:      use the prefix -sub before the usual PC and KSP options, e.g.,
152:           -sub_pc_type <pc> -sub_ksp_type <ksp> -sub_ksp_rtol 1.e-4
153:   */

155:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156:         Advanced method, setting different solvers for various blocks.
157:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

159:      Note that each block's KSP context is completely independent of
160:      the others, and the full range of uniprocessor KSP options is
161:      available for each block. The following section of code is intended
162:      to be a simple illustration of setting different linear solvers for
163:      the individual blocks.  These choices are obviously not recommended
164:      for solving this particular problem.
165:   */
166:   PetscObjectTypeCompare((PetscObject)pc,PCBJACOBI,&isbjacobi);
167:   if (isbjacobi) {
168:     /*
169:        Call KSPSetUp() to set the block Jacobi data structures (including
170:        creation of an internal KSP context for each block).

172:        Note: KSPSetUp() MUST be called before PCBJacobiGetSubKSP().
173:     */
174:     KSPSetUp(ksp);

176:     /*
177:        Extract the array of KSP contexts for the local blocks
178:     */
179:     PCBJacobiGetSubKSP(pc,&nlocal,&first,&subksp);

181:     /*
182:        Loop over the local blocks, setting various KSP options
183:        for each block.
184:     */
185:     for (i=0; i<nlocal; i++) {
186:       KSPGetPC(subksp[i],&subpc);
187:       if (!rank) {
188:         if (i%2) {
189:           PCSetType(subpc,PCILU);
190:         } else {
191:           PCSetType(subpc,PCNONE);
192:           KSPSetType(subksp[i],KSPBCGS);
193:           KSPSetTolerances(subksp[i],1.e-6,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
194:         }
195:       } else {
196:         PCSetType(subpc,PCJACOBI);
197:         KSPSetType(subksp[i],KSPGMRES);
198:         KSPSetTolerances(subksp[i],1.e-6,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
199:       }
200:     }
201:   }

203:   /* -------------------------------------------------------------------
204:                       Solve the linear system
205:      ------------------------------------------------------------------- */

207:   /*
208:      Solve the linear system
209:   */
210:   KSPSolve(ksp,b,x);

212:   /* -------------------------------------------------------------------
213:                       Check solution and clean up
214:      ------------------------------------------------------------------- */

216:   /*
217:      Check the error
218:   */
219:   VecAXPY(x,none,u);
220:   VecNorm(x,NORM_2,&norm);
221:   KSPGetIterationNumber(ksp,&its);
222:   PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);

224:   /*
225:      Free work space.  All PETSc objects should be destroyed when they
226:      are no longer needed.
227:   */
228:   KSPDestroy(&ksp);
229:   VecDestroy(&u);  VecDestroy(&x);
230:   VecDestroy(&b);  MatDestroy(&A);
231:   PetscFinalize();
232:   return ierr;
233: }

235: /*TEST

237:    test:
238:       suffix: 1
239:       nsize: 2
240:       args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

242:    test:
243:       suffix: 2
244:       nsize: 2
245:       args: -ksp_view ::ascii_info_detail

247:    test:
248:       suffix: viennacl
249:       requires: viennacl
250:       args: -ksp_monitor_short -mat_type aijviennacl
251:       output_file: output/ex7_mpiaijcusparse.out

253:    test:
254:       suffix: viennacl_2
255:       nsize: 2
256:       requires: viennacl
257:       args: -ksp_monitor_short -mat_type aijviennacl
258:       output_file: output/ex7_mpiaijcusparse_2.out

260:    test:
261:       suffix: mpiaijcusparse
262:       requires: cuda
263:       args: -ksp_monitor_short -mat_type aijcusparse

265:    test:
266:       suffix: mpiaijcusparse_2
267:       nsize: 2
268:       requires: cuda
269:       args: -ksp_monitor_short -mat_type aijcusparse

271:    test:
272:       suffix: mpiaijcusparse_simple
273:       requires: cuda
274:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu

276:    test:
277:       suffix: mpiaijcusparse_simple_2
278:       nsize: 2
279:       requires: cuda
280:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse -sub_ksp_type preonly -sub_pc_type ilu

282:    test:
283:       suffix: mpiaijcusparse_3
284:       requires: cuda
285:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

287:    test:
288:       suffix: mpiaijcusparse_4
289:       nsize: 2
290:       requires: cuda
291:       args: -ksp_monitor_short -mat_type aijcusparse -sub_pc_factor_mat_solver_type cusparse

293:    testset:
294:       args: -ksp_monitor_short -pc_type gamg -ksp_view
295:       test:
296:         suffix: gamg_cuda
297:         nsize: {{1 2}separate output}
298:         requires: cuda
299:         args: -mat_type aijcusparse
300:         # triggers cusparse MatTransposeMat operation when squaring the graph
301:         args: -pc_gamg_sym_graph 0 -pc_gamg_threshold -1 -pc_gamg_square_graph 1
302:       test:
303:         suffix: gamg_kokkos
304:         nsize: {{1 2}separate output}
305:         requires: kokkos_kernels
306:         args: -mat_type aijkokkos

308: TEST*/
```