Actual source code: ex52.c
2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
3: Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -m <mesh_x> : number of mesh points in x-direction\n\
8: -n <mesh_y> : number of mesh points in y-direction\n\n";
10: #include <petscksp.h>
12: #if defined(PETSC_HAVE_MUMPS)
13: /* Subroutine contributed by Varun Hiremath */
14: PetscErrorCode printMumpsMemoryInfo(Mat F)
15: {
16: PetscInt maxMem, sumMem;
19: MatMumpsGetInfog(F, 16, &maxMem);
20: MatMumpsGetInfog(F, 17, &sumMem);
21: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %" PetscInt_FMT, maxMem);
22: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %" PetscInt_FMT "\n", sumMem);
23: return 0;
24: }
25: #endif
27: int main(int argc, char **args)
28: {
29: Vec x, b, u; /* approx solution, RHS, exact solution */
30: Mat A, F;
31: KSP ksp; /* linear solver context */
32: PC pc;
33: PetscRandom rctx; /* random number generator context */
34: PetscReal norm; /* norm of solution error */
35: PetscInt i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
36: PetscBool flg = PETSC_FALSE, flg_ilu = PETSC_FALSE, flg_ch = PETSC_FALSE;
37: #if defined(PETSC_HAVE_MUMPS)
38: PetscBool flg_mumps = PETSC_FALSE, flg_mumps_ch = PETSC_FALSE;
39: #endif
40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
41: PetscBool flg_superlu = PETSC_FALSE;
42: #endif
43: #if defined(PETSC_HAVE_STRUMPACK)
44: PetscBool flg_strumpack = PETSC_FALSE;
45: #endif
46: PetscScalar v;
47: PetscMPIInt rank, size;
48: #if defined(PETSC_USE_LOG)
49: PetscLogStage stage;
50: #endif
53: PetscInitialize(&argc, &args, (char *)0, help);
54: MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
55: MPI_Comm_size(PETSC_COMM_WORLD, &size);
56: PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
57: PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
58: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
59: Compute the matrix and right-hand-side vector that define
60: the linear system, Ax = b.
61: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
62: MatCreate(PETSC_COMM_WORLD, &A);
63: MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
64: MatSetFromOptions(A);
65: MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
66: MatSeqAIJSetPreallocation(A, 5, NULL);
67: MatSetUp(A);
69: /*
70: Currently, all PETSc parallel matrix formats are partitioned by
71: contiguous chunks of rows across the processors. Determine which
72: rows of the matrix are locally owned.
73: */
74: MatGetOwnershipRange(A, &Istart, &Iend);
76: /*
77: Set matrix elements for the 2-D, five-point stencil in parallel.
78: - Each processor needs to insert only elements that it owns
79: locally (but any non-local elements will be sent to the
80: appropriate processor during matrix assembly).
81: - Always specify global rows and columns of matrix entries.
83: Note: this uses the less common natural ordering that orders first
84: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
85: instead of J = I +- m as you might expect. The more standard ordering
86: would first do all variables for y = h, then y = 2h etc.
88: */
89: PetscLogStageRegister("Assembly", &stage);
90: PetscLogStagePush(stage);
91: for (Ii = Istart; Ii < Iend; Ii++) {
92: v = -1.0;
93: i = Ii / n;
94: j = Ii - i * n;
95: if (i > 0) {
96: J = Ii - n;
97: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
98: }
99: if (i < m - 1) {
100: J = Ii + n;
101: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
102: }
103: if (j > 0) {
104: J = Ii - 1;
105: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
106: }
107: if (j < n - 1) {
108: J = Ii + 1;
109: MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
110: }
111: v = 4.0;
112: MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
113: }
115: /*
116: Assemble matrix, using the 2-step process:
117: MatAssemblyBegin(), MatAssemblyEnd()
118: Computations can be done while messages are in transition
119: by placing code between these two statements.
120: */
121: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
122: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
123: PetscLogStagePop();
125: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
126: MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);
128: /*
129: Create parallel vectors.
130: - We form 1 vector from scratch and then duplicate as needed.
131: - When using VecCreate(), VecSetSizes and VecSetFromOptions()
132: in this example, we specify only the
133: vector's global dimension; the parallel partitioning is determined
134: at runtime.
135: - When solving a linear system, the vectors and matrices MUST
136: be partitioned accordingly. PETSc automatically generates
137: appropriately partitioned matrices and vectors when MatCreate()
138: and VecCreate() are used with the same communicator.
139: - The user can alternatively specify the local vector and matrix
140: dimensions when more sophisticated partitioning is needed
141: (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
142: below).
143: */
144: VecCreate(PETSC_COMM_WORLD, &u);
145: VecSetSizes(u, PETSC_DECIDE, m * n);
146: VecSetFromOptions(u);
147: VecDuplicate(u, &b);
148: VecDuplicate(b, &x);
150: /*
151: Set exact solution; then compute right-hand-side vector.
152: By default we use an exact solution of a vector with all
153: elements of 1.0; Alternatively, using the runtime option
154: -random_sol forms a solution vector with random components.
155: */
156: PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL);
157: if (flg) {
158: PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
159: PetscRandomSetFromOptions(rctx);
160: VecSetRandom(u, rctx);
161: PetscRandomDestroy(&rctx);
162: } else {
163: VecSet(u, 1.0);
164: }
165: MatMult(A, u, b);
167: /*
168: View the exact solution vector if desired
169: */
170: flg = PETSC_FALSE;
171: PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL);
172: if (flg) VecView(u, PETSC_VIEWER_STDOUT_WORLD);
174: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
175: Create the linear solver and set various options
176: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
178: /*
179: Create linear solver context
180: */
181: KSPCreate(PETSC_COMM_WORLD, &ksp);
182: KSPSetOperators(ksp, A, A);
184: /*
185: Example of how to use external package MUMPS
186: Note: runtime options
187: '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
188: are equivalent to these procedural calls
189: */
190: #if defined(PETSC_HAVE_MUMPS)
191: flg_mumps = PETSC_FALSE;
192: flg_mumps_ch = PETSC_FALSE;
193: PetscOptionsGetBool(NULL, NULL, "-use_mumps_lu", &flg_mumps, NULL);
194: PetscOptionsGetBool(NULL, NULL, "-use_mumps_ch", &flg_mumps_ch, NULL);
195: if (flg_mumps || flg_mumps_ch) {
196: KSPSetType(ksp, KSPPREONLY);
197: PetscInt ival, icntl;
198: PetscReal val;
199: KSPGetPC(ksp, &pc);
200: if (flg_mumps) {
201: PCSetType(pc, PCLU);
202: } else if (flg_mumps_ch) {
203: MatSetOption(A, MAT_SPD, PETSC_TRUE); /* set MUMPS id%SYM=1 */
204: PCSetType(pc, PCCHOLESKY);
205: }
206: PCFactorSetMatSolverType(pc, MATSOLVERMUMPS);
207: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
208: PCFactorGetMatrix(pc, &F);
210: if (flg_mumps) {
211: /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
212: KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
213: MatFactorInfo info;
214: MatLUFactorSymbolic(F, A, NULL, NULL, &info);
215: flg = PETSC_FALSE;
216: PetscOptionsGetBool(NULL, NULL, "-print_mumps_memory", &flg, NULL);
217: if (flg) printMumpsMemoryInfo(F);
218: }
220: /* sequential ordering */
221: icntl = 7;
222: ival = 2;
223: MatMumpsSetIcntl(F, icntl, ival);
225: /* threshold for row pivot detection */
226: MatMumpsSetIcntl(F, 24, 1);
227: icntl = 3;
228: val = 1.e-6;
229: MatMumpsSetCntl(F, icntl, val);
231: /* compute determinant of A */
232: MatMumpsSetIcntl(F, 33, 1);
233: }
234: #endif
236: /*
237: Example of how to use external package SuperLU
238: Note: runtime options
239: '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
240: are equivalent to these procedual calls
241: */
242: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
243: flg_ilu = PETSC_FALSE;
244: flg_superlu = PETSC_FALSE;
245: PetscOptionsGetBool(NULL, NULL, "-use_superlu_lu", &flg_superlu, NULL);
246: PetscOptionsGetBool(NULL, NULL, "-use_superlu_ilu", &flg_ilu, NULL);
247: if (flg_superlu || flg_ilu) {
248: KSPSetType(ksp, KSPPREONLY);
249: KSPGetPC(ksp, &pc);
250: if (flg_superlu) PCSetType(pc, PCLU);
251: else if (flg_ilu) PCSetType(pc, PCILU);
252: if (size == 1) {
253: #if !defined(PETSC_HAVE_SUPERLU)
254: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU");
255: #else
256: PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU);
257: #endif
258: } else {
259: #if !defined(PETSC_HAVE_SUPERLU_DIST)
260: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU_DIST");
261: #else
262: PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST);
263: #endif
264: }
265: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
266: PCFactorGetMatrix(pc, &F);
267: #if defined(PETSC_HAVE_SUPERLU)
268: if (size == 1) MatSuperluSetILUDropTol(F, 1.e-8);
269: #endif
270: }
271: #endif
273: /*
274: Example of how to use external package STRUMPACK
275: Note: runtime options
276: '-pc_type lu/ilu \
277: -pc_factor_mat_solver_type strumpack \
278: -mat_strumpack_reordering METIS \
279: -mat_strumpack_colperm 0 \
280: -mat_strumpack_hss_rel_tol 1.e-3 \
281: -mat_strumpack_hss_min_sep_size 50 \
282: -mat_strumpack_max_rank 100 \
283: -mat_strumpack_leaf_size 4'
284: are equivalent to these procedural calls
286: We refer to the STRUMPACK-sparse manual, section 5, for more info on
287: how to tune the preconditioner.
288: */
289: #if defined(PETSC_HAVE_STRUMPACK)
290: flg_ilu = PETSC_FALSE;
291: flg_strumpack = PETSC_FALSE;
292: PetscOptionsGetBool(NULL, NULL, "-use_strumpack_lu", &flg_strumpack, NULL);
293: PetscOptionsGetBool(NULL, NULL, "-use_strumpack_ilu", &flg_ilu, NULL);
294: if (flg_strumpack || flg_ilu) {
295: KSPSetType(ksp, KSPPREONLY);
296: KSPGetPC(ksp, &pc);
297: if (flg_strumpack) PCSetType(pc, PCLU);
298: else if (flg_ilu) PCSetType(pc, PCILU);
299: #if !defined(PETSC_HAVE_STRUMPACK)
300: SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires STRUMPACK");
301: #endif
302: PCFactorSetMatSolverType(pc, MATSOLVERSTRUMPACK);
303: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
304: PCFactorGetMatrix(pc, &F);
305: #if defined(PETSC_HAVE_STRUMPACK)
306: /* Set the fill-reducing reordering. */
307: MatSTRUMPACKSetReordering(F, MAT_STRUMPACK_METIS);
308: /* Since this is a simple discretization, the diagonal is always */
309: /* nonzero, and there is no need for the extra MC64 permutation. */
310: MatSTRUMPACKSetColPerm(F, PETSC_FALSE);
311: /* The compression tolerance used when doing low-rank compression */
312: /* in the preconditioner. This is problem specific! */
313: MatSTRUMPACKSetHSSRelTol(F, 1.e-3);
314: /* Set minimum matrix size for HSS compression to 15 in order to */
315: /* demonstrate preconditioner on small problems. For performance */
316: /* a value of say 500 is better. */
317: MatSTRUMPACKSetHSSMinSepSize(F, 15);
318: /* You can further limit the fill in the preconditioner by */
319: /* setting a maximum rank */
320: MatSTRUMPACKSetHSSMaxRank(F, 100);
321: /* Set the size of the diagonal blocks (the leafs) in the HSS */
322: /* approximation. The default value should be better for real */
323: /* problems. This is mostly for illustration on a small problem. */
324: MatSTRUMPACKSetHSSLeafSize(F, 4);
325: #endif
326: }
327: #endif
329: /*
330: Example of how to use procedural calls that are equivalent to
331: '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
332: */
333: flg = PETSC_FALSE;
334: flg_ilu = PETSC_FALSE;
335: flg_ch = PETSC_FALSE;
336: PetscOptionsGetBool(NULL, NULL, "-use_petsc_lu", &flg, NULL);
337: PetscOptionsGetBool(NULL, NULL, "-use_petsc_ilu", &flg_ilu, NULL);
338: PetscOptionsGetBool(NULL, NULL, "-use_petsc_ch", &flg_ch, NULL);
339: if (flg || flg_ilu || flg_ch) {
340: Vec diag;
342: KSPSetType(ksp, KSPPREONLY);
343: KSPGetPC(ksp, &pc);
344: if (flg) PCSetType(pc, PCLU);
345: else if (flg_ilu) PCSetType(pc, PCILU);
346: else if (flg_ch) PCSetType(pc, PCCHOLESKY);
347: PCFactorSetMatSolverType(pc, MATSOLVERPETSC);
348: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
349: PCFactorGetMatrix(pc, &F);
351: /* Test MatGetDiagonal() */
352: KSPSetUp(ksp);
353: VecDuplicate(x, &diag);
354: MatGetDiagonal(F, diag);
355: /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
356: VecDestroy(&diag);
357: }
359: KSPSetFromOptions(ksp);
361: /* Get info from matrix factors */
362: KSPSetUp(ksp);
364: #if defined(PETSC_HAVE_MUMPS)
365: if (flg_mumps || flg_mumps_ch) {
366: PetscInt icntl, infog34;
367: PetscReal cntl, rinfo12, rinfo13;
368: icntl = 3;
369: MatMumpsGetCntl(F, icntl, &cntl);
371: /* compute determinant */
372: if (rank == 0) {
373: MatMumpsGetInfog(F, 34, &infog34);
374: MatMumpsGetRinfog(F, 12, &rinfo12);
375: MatMumpsGetRinfog(F, 13, &rinfo13);
376: PetscPrintf(PETSC_COMM_SELF, " Mumps row pivot threshold = %g\n", cntl);
377: PetscPrintf(PETSC_COMM_SELF, " Mumps determinant = (%g, %g) * 2^%" PetscInt_FMT " \n", (double)rinfo12, (double)rinfo13, infog34);
378: }
379: }
380: #endif
382: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
383: Solve the linear system
384: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
385: KSPSolve(ksp, b, x);
387: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
388: Check solution and clean up
389: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
390: VecAXPY(x, -1.0, u);
391: VecNorm(x, NORM_2, &norm);
392: KSPGetIterationNumber(ksp, &its);
394: /*
395: Print convergence information. PetscPrintf() produces a single
396: print statement from all processes that share a communicator.
397: An alternative is PetscFPrintf(), which prints to a file.
398: */
399: if (norm < 1.e-12) {
400: PetscPrintf(PETSC_COMM_WORLD, "Norm of error < 1.e-12 iterations %" PetscInt_FMT "\n", its);
401: } else {
402: PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its);
403: }
405: /*
406: Free work space. All PETSc objects should be destroyed when they
407: are no longer needed.
408: */
409: KSPDestroy(&ksp);
410: VecDestroy(&u);
411: VecDestroy(&x);
412: VecDestroy(&b);
413: MatDestroy(&A);
415: /*
416: Always call PetscFinalize() before exiting a program. This routine
417: - finalizes the PETSc libraries as well as MPI
418: - provides summary and diagnostic information if certain runtime
419: options are chosen (e.g., -log_view).
420: */
421: PetscFinalize();
422: return 0;
423: }
425: /*TEST
427: test:
428: args: -use_petsc_lu
429: output_file: output/ex52_2.out
431: test:
432: suffix: mumps
433: nsize: 3
434: requires: mumps
435: args: -use_mumps_lu
436: output_file: output/ex52_1.out
438: test:
439: suffix: mumps_2
440: nsize: 3
441: requires: mumps
442: args: -use_mumps_ch
443: output_file: output/ex52_1.out
445: test:
446: suffix: mumps_3
447: nsize: 3
448: requires: mumps
449: args: -use_mumps_ch -mat_type sbaij
450: output_file: output/ex52_1.out
452: test:
453: suffix: mumps_4
454: nsize: 3
455: requires: mumps !complex !single
456: args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
457: output_file: output/ex52_4.out
459: test:
460: suffix: mumps_omp_2
461: nsize: 4
462: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
463: args: -use_mumps_lu -mat_mumps_use_omp_threads 2
464: output_file: output/ex52_1.out
466: test:
467: suffix: mumps_omp_3
468: nsize: 4
469: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
470: args: -use_mumps_ch -mat_mumps_use_omp_threads 3
471: # Ignore the warning since we are intentionally testing the imbalanced case
472: filter: grep -v "Warning: number of OpenMP threads"
473: output_file: output/ex52_1.out
475: test:
476: suffix: mumps_omp_4
477: nsize: 4
478: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
479: # let petsc guess a proper number for threads
480: args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
481: output_file: output/ex52_1.out
483: test:
484: suffix: strumpack
485: requires: strumpack
486: args: -use_strumpack_lu
487: output_file: output/ex52_3.out
489: test:
490: suffix: strumpack_2
491: nsize: 2
492: requires: strumpack
493: args: -use_strumpack_lu
494: output_file: output/ex52_3.out
496: test:
497: suffix: strumpack_ilu
498: requires: strumpack
499: args: -use_strumpack_ilu
500: output_file: output/ex52_3.out
502: test:
503: suffix: strumpack_ilu_2
504: nsize: 2
505: requires: strumpack
506: args: -use_strumpack_ilu
507: output_file: output/ex52_3.out
509: test:
510: suffix: superlu
511: requires: superlu superlu_dist
512: args: -use_superlu_lu
513: output_file: output/ex52_2.out
515: test:
516: suffix: superlu_dist
517: nsize: 2
518: requires: superlu superlu_dist
519: args: -use_superlu_lu
520: output_file: output/ex52_2.out
522: test:
523: suffix: superlu_ilu
524: requires: superlu superlu_dist
525: args: -use_superlu_ilu
526: output_file: output/ex52_2.out
528: TEST*/