Actual source code: ex2.c


  2: static char help[] = "Solves a linear system in parallel with KSP.\n\
  3: Input parameters include:\n\
  4:   -view_exact_sol   : write exact solution vector to stdout\n\
  5:   -m <mesh_x>       : number of mesh points in x-direction\n\
  6:   -n <mesh_y>       : number of mesh points in y-direction\n\n";

  8: /*
  9:   Include "petscksp.h" so that we can use KSP solvers.
 10: */
 11: #include <petscksp.h>

 13: int main(int argc, char **args)
 14: {
 15:   Vec         x, b, u; /* approx solution, RHS, exact solution */
 16:   Mat         A;       /* linear system matrix */
 17:   KSP         ksp;     /* linear solver context */
 18:   PetscReal   norm;    /* norm of solution error */
 19:   PetscInt    i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
 20:   PetscBool   flg;
 21:   PetscScalar v;

 24:   PetscInitialize(&argc, &args, (char *)0, help);
 25:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 26:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
 27:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 28:          Compute the matrix and right-hand-side vector that define
 29:          the linear system, Ax = b.
 30:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 31:   /*
 32:      Create parallel matrix, specifying only its global dimensions.
 33:      When using MatCreate(), the matrix format can be specified at
 34:      runtime. Also, the parallel partitioning of the matrix is
 35:      determined by PETSc at runtime.

 37:      Performance tuning note:  For problems of substantial size,
 38:      preallocation of matrix memory is crucial for attaining good
 39:      performance. See the matrix chapter of the users manual for details.
 40:   */
 41:   MatCreate(PETSC_COMM_WORLD, &A);
 42:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
 43:   MatSetFromOptions(A);
 44:   MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
 45:   MatSeqAIJSetPreallocation(A, 5, NULL);
 46:   MatSeqSBAIJSetPreallocation(A, 1, 5, NULL);
 47:   MatMPISBAIJSetPreallocation(A, 1, 5, NULL, 5, NULL);
 48:   MatMPISELLSetPreallocation(A, 5, NULL, 5, NULL);
 49:   MatSeqSELLSetPreallocation(A, 5, NULL);

 51:   /*
 52:      Currently, all PETSc parallel matrix formats are partitioned by
 53:      contiguous chunks of rows across the processors.  Determine which
 54:      rows of the matrix are locally owned.
 55:   */
 56:   MatGetOwnershipRange(A, &Istart, &Iend);

 58:   /*
 59:      Set matrix elements for the 2-D, five-point stencil in parallel.
 60:       - Each processor needs to insert only elements that it owns
 61:         locally (but any non-local elements will be sent to the
 62:         appropriate processor during matrix assembly).
 63:       - Always specify global rows and columns of matrix entries.

 65:      Note: this uses the less common natural ordering that orders first
 66:      all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
 67:      instead of J = I +- m as you might expect. The more standard ordering
 68:      would first do all variables for y = h, then y = 2h etc.

 70:    */
 71:   for (Ii = Istart; Ii < Iend; Ii++) {
 72:     v = -1.0;
 73:     i = Ii / n;
 74:     j = Ii - i * n;
 75:     if (i > 0) {
 76:       J = Ii - n;
 77:       MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 78:     }
 79:     if (i < m - 1) {
 80:       J = Ii + n;
 81:       MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 82:     }
 83:     if (j > 0) {
 84:       J = Ii - 1;
 85:       MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 86:     }
 87:     if (j < n - 1) {
 88:       J = Ii + 1;
 89:       MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES);
 90:     }
 91:     v = 4.0;
 92:     MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
 93:   }

 95:   /*
 96:      Assemble matrix, using the 2-step process:
 97:        MatAssemblyBegin(), MatAssemblyEnd()
 98:      Computations can be done while messages are in transition
 99:      by placing code between these two statements.
100:   */
101:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
102:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);

104:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
105:   MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);

107:   /*
108:      Create parallel vectors.
109:       - We form 1 vector from scratch and then duplicate as needed.
110:       - When using VecCreate(), VecSetSizes and VecSetFromOptions()
111:         in this example, we specify only the
112:         vector's global dimension; the parallel partitioning is determined
113:         at runtime.
114:       - When solving a linear system, the vectors and matrices MUST
115:         be partitioned accordingly.  PETSc automatically generates
116:         appropriately partitioned matrices and vectors when MatCreate()
117:         and VecCreate() are used with the same communicator.
118:       - The user can alternatively specify the local vector and matrix
119:         dimensions when more sophisticated partitioning is needed
120:         (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
121:         below).
122:   */
123:   VecCreate(PETSC_COMM_WORLD, &u);
124:   VecSetSizes(u, PETSC_DECIDE, m * n);
125:   VecSetFromOptions(u);
126:   VecDuplicate(u, &b);
127:   VecDuplicate(b, &x);

129:   /*
130:      Set exact solution; then compute right-hand-side vector.
131:      By default we use an exact solution of a vector with all
132:      elements of 1.0;
133:   */
134:   VecSet(u, 1.0);
135:   MatMult(A, u, b);

137:   /*
138:      View the exact solution vector if desired
139:   */
140:   flg = PETSC_FALSE;
141:   PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL);
142:   if (flg) VecView(u, PETSC_VIEWER_STDOUT_WORLD);

144:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
145:                 Create the linear solver and set various options
146:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
147:   KSPCreate(PETSC_COMM_WORLD, &ksp);

149:   /*
150:      Set operators. Here the matrix that defines the linear system
151:      also serves as the preconditioning matrix.
152:   */
153:   KSPSetOperators(ksp, A, A);

155:   /*
156:      Set linear solver defaults for this problem (optional).
157:      - By extracting the KSP and PC contexts from the KSP context,
158:        we can then directly call any KSP and PC routines to set
159:        various options.
160:      - The following two statements are optional; all of these
161:        parameters could alternatively be specified at runtime via
162:        KSPSetFromOptions().  All of these defaults can be
163:        overridden at runtime, as indicated below.
164:   */
165:   KSPSetTolerances(ksp, 1.e-2 / ((m + 1) * (n + 1)), 1.e-50, PETSC_DEFAULT, PETSC_DEFAULT);

167:   /*
168:     Set runtime options, e.g.,
169:         -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
170:     These options will override those specified above as long as
171:     KSPSetFromOptions() is called _after_ any other customization
172:     routines.
173:   */
174:   KSPSetFromOptions(ksp);

176:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
177:                       Solve the linear system
178:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

180:   KSPSolve(ksp, b, x);

182:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183:                       Check the solution and clean up
184:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
185:   VecAXPY(x, -1.0, u);
186:   VecNorm(x, NORM_2, &norm);
187:   KSPGetIterationNumber(ksp, &its);

189:   /*
190:      Print convergence information.  PetscPrintf() produces a single
191:      print statement from all processes that share a communicator.
192:      An alternative is PetscFPrintf(), which prints to a file.
193:   */
194:   PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its);

196:   /*
197:      Free work space.  All PETSc objects should be destroyed when they
198:      are no longer needed.
199:   */
200:   KSPDestroy(&ksp);
201:   VecDestroy(&u);
202:   VecDestroy(&x);
203:   VecDestroy(&b);
204:   MatDestroy(&A);

206:   /*
207:      Always call PetscFinalize() before exiting a program.  This routine
208:        - finalizes the PETSc libraries as well as MPI
209:        - provides summary and diagnostic information if certain runtime
210:          options are chosen (e.g., -log_view).
211:   */
212:   PetscFinalize();
213:   return 0;
214: }

216: /*TEST

218:    build:
219:       requires: !single

221:    test:
222:       suffix: chebyest_1
223:       args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_monitor_short

225:    test:
226:       suffix: chebyest_2
227:       args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_esteig_ksp_type cg -ksp_monitor_short

229:    test:
230:       args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always

232:    test:
233:       suffix: 2
234:       nsize: 2
235:       args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always

237:    test:
238:       suffix: 3
239:       args: -pc_type sor -pc_sor_symmetric -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

241:    test:
242:       suffix: 4
243:       args: -pc_type eisenstat -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

245:    test:
246:       suffix: 5
247:       nsize: 2
248:       args: -ksp_monitor_short -m 5 -n 5 -mat_view draw -ksp_gmres_cgs_refinement_type refine_always -nox
249:       output_file: output/ex2_2.out

251:    test:
252:       suffix: bjacobi
253:       nsize: 4
254:       args: -pc_type bjacobi -pc_bjacobi_blocks 1 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres

256:    test:
257:       suffix: bjacobi_2
258:       nsize: 4
259:       args: -pc_type bjacobi -pc_bjacobi_blocks 2 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres -ksp_view

261:    test:
262:       suffix: bjacobi_3
263:       nsize: 4
264:       args: -pc_type bjacobi -pc_bjacobi_blocks 4 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres

266:    test:
267:       suffix: qmrcgs
268:       args: -ksp_type qmrcgs -pc_type ilu
269:       output_file: output/ex2_fbcgs.out

271:    test:
272:       suffix: qmrcgs_2
273:       nsize: 3
274:       args: -ksp_type qmrcgs -pc_type bjacobi
275:       output_file: output/ex2_fbcgs_2.out

277:    test:
278:       suffix: fbcgs
279:       args: -ksp_type fbcgs -pc_type ilu

281:    test:
282:       suffix: fbcgs_2
283:       nsize: 3
284:       args: -ksp_type fbcgsr -pc_type bjacobi

286:    test:
287:       suffix: groppcg
288:       args: -ksp_monitor_short -ksp_type groppcg -m 9 -n 9

290:    test:
291:       suffix: mkl_pardiso_cholesky
292:       requires: mkl_pardiso
293:       args: -ksp_type preonly -pc_type cholesky -mat_type sbaij -pc_factor_mat_solver_type mkl_pardiso

295:    test:
296:       suffix: mkl_pardiso_lu
297:       requires: mkl_pardiso
298:       args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mkl_pardiso

300:    test:
301:       suffix: pipebcgs
302:       args: -ksp_monitor_short -ksp_type pipebcgs -m 9 -n 9

304:    test:
305:       suffix: pipecg
306:       args: -ksp_monitor_short -ksp_type pipecg -m 9 -n 9

308:    test:
309:       suffix: pipecgrr
310:       args: -ksp_monitor_short -ksp_type pipecgrr -m 9 -n 9

312:    test:
313:       suffix: pipecr
314:       args: -ksp_monitor_short -ksp_type pipecr -m 9 -n 9

316:    test:
317:       suffix: pipelcg
318:       args: -ksp_monitor_short -ksp_type pipelcg -m 9 -n 9 -pc_type none -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmax 2
319:       filter: grep -v "sqrt breakdown in iteration"

321:    test:
322:       suffix: sell
323:       args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -m 9 -n 9 -mat_type sell

325:    test:
326:       requires: mumps
327:       suffix: sell_mumps
328:       args: -ksp_type preonly -m 9 -n 12 -mat_type sell -pc_type lu -pc_factor_mat_solver_type mumps -pc_factor_mat_ordering_type natural

330:    test:
331:       suffix: telescope
332:       nsize: 4
333:       args: -m 100 -n 100 -ksp_converged_reason -pc_type telescope -pc_telescope_reduction_factor 4 -telescope_pc_type bjacobi

335:    test:
336:       suffix: umfpack
337:       requires: suitesparse
338:       args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type umfpack

340:    test:
341:       suffix: spqr
342:       requires: suitesparse
343:       args: -ksp_type preonly -pc_type qr -pc_factor_mat_solver_type spqr

345:    test:
346:      suffix: pc_symmetric
347:      args: -m 10 -n 9 -ksp_converged_reason -ksp_type gmres -ksp_pc_side symmetric -pc_type cholesky

349:    test:
350:       suffix: pipeprcg
351:       args: -ksp_monitor_short -ksp_type pipeprcg -m 9 -n 9

353:    test:
354:       suffix: pipeprcg_rcw
355:       args: -ksp_monitor_short -ksp_type pipeprcg -recompute_w false -m 9 -n 9

357:    test:
358:       suffix: pipecg2
359:       args: -ksp_monitor_short -ksp_type pipecg2 -m 9 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}

361:    test:
362:       suffix: pipecg2_2
363:       nsize: 4
364:       args: -ksp_monitor_short -ksp_type pipecg2 -m 15 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}

366:    test:
367:       suffix: hpddm
368:       nsize: 4
369:       requires: hpddm !__float128 !__fp16
370:       filter: sed -e "s/ iterations 9/ iterations 8/g"
371:       args: -ksp_converged_reason -ksp_type hpddm -ksp_hpddm_precision {{single double}shared output} -ksp_pc_side {{left right}shared output}

373:    test:
374:       suffix: hpddm___float128
375:       output_file: output/ex2_hpddm.out
376:       nsize: 4
377:       requires: hpddm __float128
378:       filter: sed -e "s/ iterations 9/ iterations 8/g"
379:       args: -ksp_converged_reason -ksp_type hpddm -ksp_hpddm_precision {{double quadruple}shared output} -ksp_pc_side {{left right}shared output}

381:    test:
382:       suffix: symmetric_pc
383:       nsize: 1
384:       args: -ksp_monitor -ksp_type gmres -pc_type bjacobi -sub_pc_type icc -ksp_pc_side symmetric

386:    test:
387:       suffix: symmetric_pc2
388:       nsize: 1
389:       args: -ksp_monitor -ksp_type gmres -pc_type bjacobi -sub_pc_type icc -ksp_pc_side symmetric -pc_bjacobi_blocks 2

391:  TEST*/