Actual source code: ex52.c
2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
3: Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
4: Input parameters include:\n\
5: -random_exact_sol : use a random exact solution vector\n\
6: -view_exact_sol : write exact solution vector to stdout\n\
7: -m <mesh_x> : number of mesh points in x-direction\n\
8: -n <mesh_y> : number of mesh points in y-direction\n\n";
10: #include <petscksp.h>
12: #if defined(PETSC_HAVE_MUMPS)
13: /* Subroutine contributed by Varun Hiremath */
14: PetscErrorCode printMumpsMemoryInfo(Mat F)
15: {
16: PetscInt maxMem, sumMem;
19: MatMumpsGetInfog(F,16,&maxMem);
20: MatMumpsGetInfog(F,17,&sumMem);
21: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %" PetscInt_FMT, maxMem);
22: PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %" PetscInt_FMT "\n", sumMem);
23: return 0;
24: }
25: #endif
27: int main(int argc,char **args)
28: {
29: Vec x,b,u; /* approx solution, RHS, exact solution */
30: Mat A,F;
31: KSP ksp; /* linear solver context */
32: PC pc;
33: PetscRandom rctx; /* random number generator context */
34: PetscReal norm; /* norm of solution error */
35: PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
36: PetscBool flg=PETSC_FALSE,flg_ilu=PETSC_FALSE,flg_ch=PETSC_FALSE;
37: #if defined(PETSC_HAVE_MUMPS)
38: PetscBool flg_mumps=PETSC_FALSE,flg_mumps_ch=PETSC_FALSE;
39: #endif
40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
41: PetscBool flg_superlu=PETSC_FALSE;
42: #endif
43: #if defined(PETSC_HAVE_STRUMPACK)
44: PetscBool flg_strumpack=PETSC_FALSE;
45: #endif
46: PetscScalar v;
47: PetscMPIInt rank,size;
48: #if defined(PETSC_USE_LOG)
49: PetscLogStage stage;
50: #endif
53: PetscInitialize(&argc,&args,(char*)0,help);
54: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
55: MPI_Comm_size(PETSC_COMM_WORLD,&size);
56: PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
57: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
58: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
59: Compute the matrix and right-hand-side vector that define
60: the linear system, Ax = b.
61: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
62: MatCreate(PETSC_COMM_WORLD,&A);
63: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
64: MatSetFromOptions(A);
65: MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
66: MatSeqAIJSetPreallocation(A,5,NULL);
67: MatSetUp(A);
69: /*
70: Currently, all PETSc parallel matrix formats are partitioned by
71: contiguous chunks of rows across the processors. Determine which
72: rows of the matrix are locally owned.
73: */
74: MatGetOwnershipRange(A,&Istart,&Iend);
76: /*
77: Set matrix elements for the 2-D, five-point stencil in parallel.
78: - Each processor needs to insert only elements that it owns
79: locally (but any non-local elements will be sent to the
80: appropriate processor during matrix assembly).
81: - Always specify global rows and columns of matrix entries.
83: Note: this uses the less common natural ordering that orders first
84: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
85: instead of J = I +- m as you might expect. The more standard ordering
86: would first do all variables for y = h, then y = 2h etc.
88: */
89: PetscLogStageRegister("Assembly", &stage);
90: PetscLogStagePush(stage);
91: for (Ii=Istart; Ii<Iend; Ii++) {
92: v = -1.0; i = Ii/n; j = Ii - i*n;
93: if (i>0) {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
94: if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
95: if (j>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
96: if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
97: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
98: }
100: /*
101: Assemble matrix, using the 2-step process:
102: MatAssemblyBegin(), MatAssemblyEnd()
103: Computations can be done while messages are in transition
104: by placing code between these two statements.
105: */
106: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
107: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
108: PetscLogStagePop();
110: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
111: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
113: /*
114: Create parallel vectors.
115: - We form 1 vector from scratch and then duplicate as needed.
116: - When using VecCreate(), VecSetSizes and VecSetFromOptions()
117: in this example, we specify only the
118: vector's global dimension; the parallel partitioning is determined
119: at runtime.
120: - When solving a linear system, the vectors and matrices MUST
121: be partitioned accordingly. PETSc automatically generates
122: appropriately partitioned matrices and vectors when MatCreate()
123: and VecCreate() are used with the same communicator.
124: - The user can alternatively specify the local vector and matrix
125: dimensions when more sophisticated partitioning is needed
126: (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
127: below).
128: */
129: VecCreate(PETSC_COMM_WORLD,&u);
130: VecSetSizes(u,PETSC_DECIDE,m*n);
131: VecSetFromOptions(u);
132: VecDuplicate(u,&b);
133: VecDuplicate(b,&x);
135: /*
136: Set exact solution; then compute right-hand-side vector.
137: By default we use an exact solution of a vector with all
138: elements of 1.0; Alternatively, using the runtime option
139: -random_sol forms a solution vector with random components.
140: */
141: PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
142: if (flg) {
143: PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
144: PetscRandomSetFromOptions(rctx);
145: VecSetRandom(u,rctx);
146: PetscRandomDestroy(&rctx);
147: } else {
148: VecSet(u,1.0);
149: }
150: MatMult(A,u,b);
152: /*
153: View the exact solution vector if desired
154: */
155: flg = PETSC_FALSE;
156: PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
157: if (flg) VecView(u,PETSC_VIEWER_STDOUT_WORLD);
159: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
160: Create the linear solver and set various options
161: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
163: /*
164: Create linear solver context
165: */
166: KSPCreate(PETSC_COMM_WORLD,&ksp);
167: KSPSetOperators(ksp,A,A);
169: /*
170: Example of how to use external package MUMPS
171: Note: runtime options
172: '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
173: are equivalent to these procedural calls
174: */
175: #if defined(PETSC_HAVE_MUMPS)
176: flg_mumps = PETSC_FALSE;
177: flg_mumps_ch = PETSC_FALSE;
178: PetscOptionsGetBool(NULL,NULL,"-use_mumps_lu",&flg_mumps,NULL);
179: PetscOptionsGetBool(NULL,NULL,"-use_mumps_ch",&flg_mumps_ch,NULL);
180: if (flg_mumps || flg_mumps_ch) {
181: KSPSetType(ksp,KSPPREONLY);
182: PetscInt ival,icntl;
183: PetscReal val;
184: KSPGetPC(ksp,&pc);
185: if (flg_mumps) {
186: PCSetType(pc,PCLU);
187: } else if (flg_mumps_ch) {
188: MatSetOption(A,MAT_SPD,PETSC_TRUE); /* set MUMPS id%SYM=1 */
189: PCSetType(pc,PCCHOLESKY);
190: }
191: PCFactorSetMatSolverType(pc,MATSOLVERMUMPS);
192: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
193: PCFactorGetMatrix(pc,&F);
195: if (flg_mumps) {
196: /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
197: KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
198: MatFactorInfo info;
199: MatLUFactorSymbolic(F,A,NULL,NULL,&info);
200: flg = PETSC_FALSE;
201: PetscOptionsGetBool(NULL,NULL,"-print_mumps_memory",&flg,NULL);
202: if (flg) printMumpsMemoryInfo(F);
203: }
205: /* sequential ordering */
206: icntl = 7; ival = 2;
207: MatMumpsSetIcntl(F,icntl,ival);
209: /* threshold for row pivot detection */
210: MatMumpsSetIcntl(F,24,1);
211: icntl = 3; val = 1.e-6;
212: MatMumpsSetCntl(F,icntl,val);
214: /* compute determinant of A */
215: MatMumpsSetIcntl(F,33,1);
216: }
217: #endif
219: /*
220: Example of how to use external package SuperLU
221: Note: runtime options
222: '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
223: are equivalent to these procedual calls
224: */
225: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
226: flg_ilu = PETSC_FALSE;
227: flg_superlu = PETSC_FALSE;
228: PetscOptionsGetBool(NULL,NULL,"-use_superlu_lu",&flg_superlu,NULL);
229: PetscOptionsGetBool(NULL,NULL,"-use_superlu_ilu",&flg_ilu,NULL);
230: if (flg_superlu || flg_ilu) {
231: KSPSetType(ksp,KSPPREONLY);
232: KSPGetPC(ksp,&pc);
233: if (flg_superlu) PCSetType(pc,PCLU);
234: else if (flg_ilu) PCSetType(pc,PCILU);
235: if (size == 1) {
236: #if !defined(PETSC_HAVE_SUPERLU)
237: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU");
238: #else
239: PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU);
240: #endif
241: } else {
242: #if !defined(PETSC_HAVE_SUPERLU_DIST)
243: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU_DIST");
244: #else
245: PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU_DIST);
246: #endif
247: }
248: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
249: PCFactorGetMatrix(pc,&F);
250: #if defined(PETSC_HAVE_SUPERLU)
251: if (size == 1) {
252: MatSuperluSetILUDropTol(F,1.e-8);
253: }
254: #endif
255: }
256: #endif
258: /*
259: Example of how to use external package STRUMPACK
260: Note: runtime options
261: '-pc_type lu/ilu \
262: -pc_factor_mat_solver_type strumpack \
263: -mat_strumpack_reordering METIS \
264: -mat_strumpack_colperm 0 \
265: -mat_strumpack_hss_rel_tol 1.e-3 \
266: -mat_strumpack_hss_min_sep_size 50 \
267: -mat_strumpack_max_rank 100 \
268: -mat_strumpack_leaf_size 4'
269: are equivalent to these procedural calls
271: We refer to the STRUMPACK-sparse manual, section 5, for more info on
272: how to tune the preconditioner.
273: */
274: #if defined(PETSC_HAVE_STRUMPACK)
275: flg_ilu = PETSC_FALSE;
276: flg_strumpack = PETSC_FALSE;
277: PetscOptionsGetBool(NULL,NULL,"-use_strumpack_lu",&flg_strumpack,NULL);
278: PetscOptionsGetBool(NULL,NULL,"-use_strumpack_ilu",&flg_ilu,NULL);
279: if (flg_strumpack || flg_ilu) {
280: KSPSetType(ksp,KSPPREONLY);
281: KSPGetPC(ksp,&pc);
282: if (flg_strumpack) PCSetType(pc,PCLU);
283: else if (flg_ilu) PCSetType(pc,PCILU);
284: #if !defined(PETSC_HAVE_STRUMPACK)
285: SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires STRUMPACK");
286: #endif
287: PCFactorSetMatSolverType(pc,MATSOLVERSTRUMPACK);
288: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
289: PCFactorGetMatrix(pc,&F);
290: #if defined(PETSC_HAVE_STRUMPACK)
291: /* Set the fill-reducing reordering. */
292: MatSTRUMPACKSetReordering(F,MAT_STRUMPACK_METIS);
293: /* Since this is a simple discretization, the diagonal is always */
294: /* nonzero, and there is no need for the extra MC64 permutation. */
295: MatSTRUMPACKSetColPerm(F,PETSC_FALSE);
296: /* The compression tolerance used when doing low-rank compression */
297: /* in the preconditioner. This is problem specific! */
298: MatSTRUMPACKSetHSSRelTol(F,1.e-3);
299: /* Set minimum matrix size for HSS compression to 15 in order to */
300: /* demonstrate preconditioner on small problems. For performance */
301: /* a value of say 500 is better. */
302: MatSTRUMPACKSetHSSMinSepSize(F,15);
303: /* You can further limit the fill in the preconditioner by */
304: /* setting a maximum rank */
305: MatSTRUMPACKSetHSSMaxRank(F,100);
306: /* Set the size of the diagonal blocks (the leafs) in the HSS */
307: /* approximation. The default value should be better for real */
308: /* problems. This is mostly for illustration on a small problem. */
309: MatSTRUMPACKSetHSSLeafSize(F,4);
310: #endif
311: }
312: #endif
314: /*
315: Example of how to use procedural calls that are equivalent to
316: '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
317: */
318: flg = PETSC_FALSE;
319: flg_ilu = PETSC_FALSE;
320: flg_ch = PETSC_FALSE;
321: PetscOptionsGetBool(NULL,NULL,"-use_petsc_lu",&flg,NULL);
322: PetscOptionsGetBool(NULL,NULL,"-use_petsc_ilu",&flg_ilu,NULL);
323: PetscOptionsGetBool(NULL,NULL,"-use_petsc_ch",&flg_ch,NULL);
324: if (flg || flg_ilu || flg_ch) {
325: Vec diag;
327: KSPSetType(ksp,KSPPREONLY);
328: KSPGetPC(ksp,&pc);
329: if (flg) PCSetType(pc,PCLU);
330: else if (flg_ilu) PCSetType(pc,PCILU);
331: else if (flg_ch) PCSetType(pc,PCCHOLESKY);
332: PCFactorSetMatSolverType(pc,MATSOLVERPETSC);
333: PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
334: PCFactorGetMatrix(pc,&F);
336: /* Test MatGetDiagonal() */
337: KSPSetUp(ksp);
338: VecDuplicate(x,&diag);
339: MatGetDiagonal(F,diag);
340: /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
341: VecDestroy(&diag);
342: }
344: KSPSetFromOptions(ksp);
346: /* Get info from matrix factors */
347: KSPSetUp(ksp);
349: #if defined(PETSC_HAVE_MUMPS)
350: if (flg_mumps || flg_mumps_ch) {
351: PetscInt icntl,infog34;
352: PetscReal cntl,rinfo12,rinfo13;
353: icntl = 3;
354: MatMumpsGetCntl(F,icntl,&cntl);
356: /* compute determinant */
357: if (rank == 0) {
358: MatMumpsGetInfog(F,34,&infog34);
359: MatMumpsGetRinfog(F,12,&rinfo12);
360: MatMumpsGetRinfog(F,13,&rinfo13);
361: PetscPrintf(PETSC_COMM_SELF," Mumps row pivot threshold = %g\n",cntl);
362: PetscPrintf(PETSC_COMM_SELF," Mumps determinant = (%g, %g) * 2^%" PetscInt_FMT " \n",(double)rinfo12,(double)rinfo13,infog34);
363: }
364: }
365: #endif
367: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
368: Solve the linear system
369: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
370: KSPSolve(ksp,b,x);
372: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
373: Check solution and clean up
374: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
375: VecAXPY(x,-1.0,u);
376: VecNorm(x,NORM_2,&norm);
377: KSPGetIterationNumber(ksp,&its);
379: /*
380: Print convergence information. PetscPrintf() produces a single
381: print statement from all processes that share a communicator.
382: An alternative is PetscFPrintf(), which prints to a file.
383: */
384: if (norm < 1.e-12) {
385: PetscPrintf(PETSC_COMM_WORLD,"Norm of error < 1.e-12 iterations %" PetscInt_FMT "\n",its);
386: } else {
387: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %" PetscInt_FMT "\n",(double)norm,its);
388: }
390: /*
391: Free work space. All PETSc objects should be destroyed when they
392: are no longer needed.
393: */
394: KSPDestroy(&ksp);
395: VecDestroy(&u)); PetscCall(VecDestroy(&x);
396: VecDestroy(&b)); PetscCall(MatDestroy(&A);
398: /*
399: Always call PetscFinalize() before exiting a program. This routine
400: - finalizes the PETSc libraries as well as MPI
401: - provides summary and diagnostic information if certain runtime
402: options are chosen (e.g., -log_view).
403: */
404: PetscFinalize();
405: return 0;
406: }
408: /*TEST
410: test:
411: args: -use_petsc_lu
412: output_file: output/ex52_2.out
414: test:
415: suffix: mumps
416: nsize: 3
417: requires: mumps
418: args: -use_mumps_lu
419: output_file: output/ex52_1.out
421: test:
422: suffix: mumps_2
423: nsize: 3
424: requires: mumps
425: args: -use_mumps_ch
426: output_file: output/ex52_1.out
428: test:
429: suffix: mumps_3
430: nsize: 3
431: requires: mumps
432: args: -use_mumps_ch -mat_type sbaij
433: output_file: output/ex52_1.out
435: test:
436: suffix: mumps_4
437: nsize: 3
438: requires: mumps !complex !single
439: args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
440: output_file: output/ex52_4.out
442: test:
443: suffix: mumps_omp_2
444: nsize: 4
445: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
446: args: -use_mumps_lu -mat_mumps_use_omp_threads 2
447: output_file: output/ex52_1.out
449: test:
450: suffix: mumps_omp_3
451: nsize: 4
452: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
453: args: -use_mumps_ch -mat_mumps_use_omp_threads 3
454: # Ignore the warning since we are intentionally testing the imbalanced case
455: filter: grep -v "Warning: number of OpenMP threads"
456: output_file: output/ex52_1.out
458: test:
459: suffix: mumps_omp_4
460: nsize: 4
461: requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
462: # let petsc guess a proper number for threads
463: args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
464: output_file: output/ex52_1.out
466: test:
467: suffix: strumpack
468: requires: strumpack
469: args: -use_strumpack_lu
470: output_file: output/ex52_3.out
472: test:
473: suffix: strumpack_2
474: nsize: 2
475: requires: strumpack
476: args: -use_strumpack_lu
477: output_file: output/ex52_3.out
479: test:
480: suffix: strumpack_ilu
481: requires: strumpack
482: args: -use_strumpack_ilu
483: output_file: output/ex52_3.out
485: test:
486: suffix: strumpack_ilu_2
487: nsize: 2
488: requires: strumpack
489: args: -use_strumpack_ilu
490: output_file: output/ex52_3.out
492: test:
493: suffix: superlu
494: requires: superlu superlu_dist
495: args: -use_superlu_lu
496: output_file: output/ex52_2.out
498: test:
499: suffix: superlu_dist
500: nsize: 2
501: requires: superlu superlu_dist
502: args: -use_superlu_lu
503: output_file: output/ex52_2.out
505: test:
506: suffix: superlu_ilu
507: requires: superlu superlu_dist
508: args: -use_superlu_ilu
509: output_file: output/ex52_2.out
511: TEST*/