Actual source code: ex52.c


  2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
  3:                       Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
  4: Input parameters include:\n\
  5:   -random_exact_sol : use a random exact solution vector\n\
  6:   -view_exact_sol   : write exact solution vector to stdout\n\
  7:   -m <mesh_x>       : number of mesh points in x-direction\n\
  8:   -n <mesh_y>       : number of mesh points in y-direction\n\n";

 10: #include <petscksp.h>

 12: #if defined(PETSC_HAVE_MUMPS)
 13: /* Subroutine contributed by Varun Hiremath */
 14: PetscErrorCode printMumpsMemoryInfo(Mat F)
 15: {
 16:   PetscInt maxMem, sumMem;

 19:   MatMumpsGetInfog(F, 16, &maxMem);
 20:   MatMumpsGetInfog(F, 17, &sumMem);
 21:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %" PetscInt_FMT, maxMem);
 22:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %" PetscInt_FMT "\n", sumMem);
 23:   return 0;
 24: }
 25: #endif

 27: int main(int argc, char **args)
 28: {
 29:   Vec         x, b, u; /* approx solution, RHS, exact solution */
 30:   Mat         A, F;
 31:   KSP         ksp; /* linear solver context */
 32:   PC          pc;
 33:   PetscRandom rctx; /* random number generator context */
 34:   PetscReal   norm; /* norm of solution error */
 35:   PetscInt    i, j, Ii, J, Istart, Iend, m = 8, n = 7, its;
 36:   PetscBool   flg = PETSC_FALSE, flg_ilu = PETSC_FALSE, flg_ch = PETSC_FALSE;
 37: #if defined(PETSC_HAVE_MUMPS)
 38:   PetscBool flg_mumps = PETSC_FALSE, flg_mumps_ch = PETSC_FALSE;
 39: #endif
 40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
 41:   PetscBool flg_superlu = PETSC_FALSE;
 42: #endif
 43: #if defined(PETSC_HAVE_STRUMPACK)
 44:   PetscBool flg_strumpack = PETSC_FALSE;
 45: #endif
 46:   PetscScalar v;
 47:   PetscMPIInt rank, size;
 48: #if defined(PETSC_USE_LOG)
 49:   PetscLogStage stage;
 50: #endif

 53:   PetscInitialize(&argc, &args, (char *)0, help);
 54:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 56:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 57:   PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL);
 58:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 59:          Compute the matrix and right-hand-side vector that define
 60:          the linear system, Ax = b.
 61:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 62:   MatCreate(PETSC_COMM_WORLD, &A);
 63:   MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
 64:   MatSetFromOptions(A);
 65:   MatMPIAIJSetPreallocation(A, 5, NULL, 5, NULL);
 66:   MatSeqAIJSetPreallocation(A, 5, NULL);
 67:   MatSetUp(A);

 69:   /*
 70:      Currently, all PETSc parallel matrix formats are partitioned by
 71:      contiguous chunks of rows across the processors.  Determine which
 72:      rows of the matrix are locally owned.
 73:   */
 74:   MatGetOwnershipRange(A, &Istart, &Iend);

 76:   /*
 77:      Set matrix elements for the 2-D, five-point stencil in parallel.
 78:       - Each processor needs to insert only elements that it owns
 79:         locally (but any non-local elements will be sent to the
 80:         appropriate processor during matrix assembly).
 81:       - Always specify global rows and columns of matrix entries.

 83:      Note: this uses the less common natural ordering that orders first
 84:      all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
 85:      instead of J = I +- m as you might expect. The more standard ordering
 86:      would first do all variables for y = h, then y = 2h etc.

 88:    */
 89:   PetscLogStageRegister("Assembly", &stage);
 90:   PetscLogStagePush(stage);
 91:   for (Ii = Istart; Ii < Iend; Ii++) {
 92:     v = -1.0;
 93:     i = Ii / n;
 94:     j = Ii - i * n;
 95:     if (i > 0) {
 96:       J = Ii - n;
 97:       MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
 98:     }
 99:     if (i < m - 1) {
100:       J = Ii + n;
101:       MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
102:     }
103:     if (j > 0) {
104:       J = Ii - 1;
105:       MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
106:     }
107:     if (j < n - 1) {
108:       J = Ii + 1;
109:       MatSetValues(A, 1, &Ii, 1, &J, &v, INSERT_VALUES);
110:     }
111:     v = 4.0;
112:     MatSetValues(A, 1, &Ii, 1, &Ii, &v, INSERT_VALUES);
113:   }

115:   /*
116:      Assemble matrix, using the 2-step process:
117:        MatAssemblyBegin(), MatAssemblyEnd()
118:      Computations can be done while messages are in transition
119:      by placing code between these two statements.
120:   */
121:   MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
122:   MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
123:   PetscLogStagePop();

125:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
126:   MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE);

128:   /*
129:      Create parallel vectors.
130:       - We form 1 vector from scratch and then duplicate as needed.
131:       - When using VecCreate(), VecSetSizes and VecSetFromOptions()
132:         in this example, we specify only the
133:         vector's global dimension; the parallel partitioning is determined
134:         at runtime.
135:       - When solving a linear system, the vectors and matrices MUST
136:         be partitioned accordingly.  PETSc automatically generates
137:         appropriately partitioned matrices and vectors when MatCreate()
138:         and VecCreate() are used with the same communicator.
139:       - The user can alternatively specify the local vector and matrix
140:         dimensions when more sophisticated partitioning is needed
141:         (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
142:         below).
143:   */
144:   VecCreate(PETSC_COMM_WORLD, &u);
145:   VecSetSizes(u, PETSC_DECIDE, m * n);
146:   VecSetFromOptions(u);
147:   VecDuplicate(u, &b);
148:   VecDuplicate(b, &x);

150:   /*
151:      Set exact solution; then compute right-hand-side vector.
152:      By default we use an exact solution of a vector with all
153:      elements of 1.0;  Alternatively, using the runtime option
154:      -random_sol forms a solution vector with random components.
155:   */
156:   PetscOptionsGetBool(NULL, NULL, "-random_exact_sol", &flg, NULL);
157:   if (flg) {
158:     PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
159:     PetscRandomSetFromOptions(rctx);
160:     VecSetRandom(u, rctx);
161:     PetscRandomDestroy(&rctx);
162:   } else {
163:     VecSet(u, 1.0);
164:   }
165:   MatMult(A, u, b);

167:   /*
168:      View the exact solution vector if desired
169:   */
170:   flg = PETSC_FALSE;
171:   PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL);
172:   if (flg) VecView(u, PETSC_VIEWER_STDOUT_WORLD);

174:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
175:                 Create the linear solver and set various options
176:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

178:   /*
179:      Create linear solver context
180:   */
181:   KSPCreate(PETSC_COMM_WORLD, &ksp);
182:   KSPSetOperators(ksp, A, A);

184:   /*
185:     Example of how to use external package MUMPS
186:     Note: runtime options
187:           '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
188:           are equivalent to these procedural calls
189:   */
190: #if defined(PETSC_HAVE_MUMPS)
191:   flg_mumps    = PETSC_FALSE;
192:   flg_mumps_ch = PETSC_FALSE;
193:   PetscOptionsGetBool(NULL, NULL, "-use_mumps_lu", &flg_mumps, NULL);
194:   PetscOptionsGetBool(NULL, NULL, "-use_mumps_ch", &flg_mumps_ch, NULL);
195:   if (flg_mumps || flg_mumps_ch) {
196:     KSPSetType(ksp, KSPPREONLY);
197:     PetscInt  ival, icntl;
198:     PetscReal val;
199:     KSPGetPC(ksp, &pc);
200:     if (flg_mumps) {
201:       PCSetType(pc, PCLU);
202:     } else if (flg_mumps_ch) {
203:       MatSetOption(A, MAT_SPD, PETSC_TRUE); /* set MUMPS id%SYM=1 */
204:       PCSetType(pc, PCCHOLESKY);
205:     }
206:     PCFactorSetMatSolverType(pc, MATSOLVERMUMPS);
207:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
208:     PCFactorGetMatrix(pc, &F);

210:     if (flg_mumps) {
211:       /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
212:          KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
213:       MatFactorInfo info;
214:       MatLUFactorSymbolic(F, A, NULL, NULL, &info);
215:       flg = PETSC_FALSE;
216:       PetscOptionsGetBool(NULL, NULL, "-print_mumps_memory", &flg, NULL);
217:       if (flg) printMumpsMemoryInfo(F);
218:     }

220:     /* sequential ordering */
221:     icntl = 7;
222:     ival  = 2;
223:     MatMumpsSetIcntl(F, icntl, ival);

225:     /* threshold for row pivot detection */
226:     MatMumpsSetIcntl(F, 24, 1);
227:     icntl = 3;
228:     val   = 1.e-6;
229:     MatMumpsSetCntl(F, icntl, val);

231:     /* compute determinant of A */
232:     MatMumpsSetIcntl(F, 33, 1);
233:   }
234: #endif

236:   /*
237:     Example of how to use external package SuperLU
238:     Note: runtime options
239:           '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
240:           are equivalent to these procedual calls
241:   */
242: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
243:   flg_ilu     = PETSC_FALSE;
244:   flg_superlu = PETSC_FALSE;
245:   PetscOptionsGetBool(NULL, NULL, "-use_superlu_lu", &flg_superlu, NULL);
246:   PetscOptionsGetBool(NULL, NULL, "-use_superlu_ilu", &flg_ilu, NULL);
247:   if (flg_superlu || flg_ilu) {
248:     KSPSetType(ksp, KSPPREONLY);
249:     KSPGetPC(ksp, &pc);
250:     if (flg_superlu) PCSetType(pc, PCLU);
251:     else if (flg_ilu) PCSetType(pc, PCILU);
252:     if (size == 1) {
253:   #if !defined(PETSC_HAVE_SUPERLU)
254:       SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU");
255:   #else
256:       PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU);
257:   #endif
258:     } else {
259:   #if !defined(PETSC_HAVE_SUPERLU_DIST)
260:       SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires SUPERLU_DIST");
261:   #else
262:       PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST);
263:   #endif
264:     }
265:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
266:     PCFactorGetMatrix(pc, &F);
267:   #if defined(PETSC_HAVE_SUPERLU)
268:     if (size == 1) MatSuperluSetILUDropTol(F, 1.e-8);
269:   #endif
270:   }
271: #endif

273:   /*
274:     Example of how to use external package STRUMPACK
275:     Note: runtime options
276:           '-pc_type lu/ilu \
277:            -pc_factor_mat_solver_type strumpack \
278:            -mat_strumpack_reordering METIS \
279:            -mat_strumpack_colperm 0 \
280:            -mat_strumpack_hss_rel_tol 1.e-3 \
281:            -mat_strumpack_hss_min_sep_size 50 \
282:            -mat_strumpack_max_rank 100 \
283:            -mat_strumpack_leaf_size 4'
284:        are equivalent to these procedural calls

286:     We refer to the STRUMPACK-sparse manual, section 5, for more info on
287:     how to tune the preconditioner.
288:   */
289: #if defined(PETSC_HAVE_STRUMPACK)
290:   flg_ilu       = PETSC_FALSE;
291:   flg_strumpack = PETSC_FALSE;
292:   PetscOptionsGetBool(NULL, NULL, "-use_strumpack_lu", &flg_strumpack, NULL);
293:   PetscOptionsGetBool(NULL, NULL, "-use_strumpack_ilu", &flg_ilu, NULL);
294:   if (flg_strumpack || flg_ilu) {
295:     KSPSetType(ksp, KSPPREONLY);
296:     KSPGetPC(ksp, &pc);
297:     if (flg_strumpack) PCSetType(pc, PCLU);
298:     else if (flg_ilu) PCSetType(pc, PCILU);
299:   #if !defined(PETSC_HAVE_STRUMPACK)
300:     SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This test requires STRUMPACK");
301:   #endif
302:     PCFactorSetMatSolverType(pc, MATSOLVERSTRUMPACK);
303:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
304:     PCFactorGetMatrix(pc, &F);
305:   #if defined(PETSC_HAVE_STRUMPACK)
306:     /* Set the fill-reducing reordering.                              */
307:     MatSTRUMPACKSetReordering(F, MAT_STRUMPACK_METIS);
308:     /* Since this is a simple discretization, the diagonal is always  */
309:     /* nonzero, and there is no need for the extra MC64 permutation.  */
310:     MatSTRUMPACKSetColPerm(F, PETSC_FALSE);
311:     /* The compression tolerance used when doing low-rank compression */
312:     /* in the preconditioner. This is problem specific!               */
313:     MatSTRUMPACKSetHSSRelTol(F, 1.e-3);
314:     /* Set minimum matrix size for HSS compression to 15 in order to  */
315:     /* demonstrate preconditioner on small problems. For performance  */
316:     /* a value of say 500 is better.                                  */
317:     MatSTRUMPACKSetHSSMinSepSize(F, 15);
318:     /* You can further limit the fill in the preconditioner by        */
319:     /* setting a maximum rank                                         */
320:     MatSTRUMPACKSetHSSMaxRank(F, 100);
321:     /* Set the size of the diagonal blocks (the leafs) in the HSS     */
322:     /* approximation. The default value should be better for real     */
323:     /* problems. This is mostly for illustration on a small problem.  */
324:     MatSTRUMPACKSetHSSLeafSize(F, 4);
325:   #endif
326:   }
327: #endif

329:   /*
330:     Example of how to use procedural calls that are equivalent to
331:           '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
332:   */
333:   flg     = PETSC_FALSE;
334:   flg_ilu = PETSC_FALSE;
335:   flg_ch  = PETSC_FALSE;
336:   PetscOptionsGetBool(NULL, NULL, "-use_petsc_lu", &flg, NULL);
337:   PetscOptionsGetBool(NULL, NULL, "-use_petsc_ilu", &flg_ilu, NULL);
338:   PetscOptionsGetBool(NULL, NULL, "-use_petsc_ch", &flg_ch, NULL);
339:   if (flg || flg_ilu || flg_ch) {
340:     Vec diag;

342:     KSPSetType(ksp, KSPPREONLY);
343:     KSPGetPC(ksp, &pc);
344:     if (flg) PCSetType(pc, PCLU);
345:     else if (flg_ilu) PCSetType(pc, PCILU);
346:     else if (flg_ch) PCSetType(pc, PCCHOLESKY);
347:     PCFactorSetMatSolverType(pc, MATSOLVERPETSC);
348:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
349:     PCFactorGetMatrix(pc, &F);

351:     /* Test MatGetDiagonal() */
352:     KSPSetUp(ksp);
353:     VecDuplicate(x, &diag);
354:     MatGetDiagonal(F, diag);
355:     /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
356:     VecDestroy(&diag);
357:   }

359:   KSPSetFromOptions(ksp);

361:   /* Get info from matrix factors */
362:   KSPSetUp(ksp);

364: #if defined(PETSC_HAVE_MUMPS)
365:   if (flg_mumps || flg_mumps_ch) {
366:     PetscInt  icntl, infog34;
367:     PetscReal cntl, rinfo12, rinfo13;
368:     icntl = 3;
369:     MatMumpsGetCntl(F, icntl, &cntl);

371:     /* compute determinant */
372:     if (rank == 0) {
373:       MatMumpsGetInfog(F, 34, &infog34);
374:       MatMumpsGetRinfog(F, 12, &rinfo12);
375:       MatMumpsGetRinfog(F, 13, &rinfo13);
376:       PetscPrintf(PETSC_COMM_SELF, "  Mumps row pivot threshold = %g\n", cntl);
377:       PetscPrintf(PETSC_COMM_SELF, "  Mumps determinant = (%g, %g) * 2^%" PetscInt_FMT " \n", (double)rinfo12, (double)rinfo13, infog34);
378:     }
379:   }
380: #endif

382:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
383:                       Solve the linear system
384:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
385:   KSPSolve(ksp, b, x);

387:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
388:                       Check solution and clean up
389:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
390:   VecAXPY(x, -1.0, u);
391:   VecNorm(x, NORM_2, &norm);
392:   KSPGetIterationNumber(ksp, &its);

394:   /*
395:      Print convergence information.  PetscPrintf() produces a single
396:      print statement from all processes that share a communicator.
397:      An alternative is PetscFPrintf(), which prints to a file.
398:   */
399:   if (norm < 1.e-12) {
400:     PetscPrintf(PETSC_COMM_WORLD, "Norm of error < 1.e-12 iterations %" PetscInt_FMT "\n", its);
401:   } else {
402:     PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g iterations %" PetscInt_FMT "\n", (double)norm, its);
403:   }

405:   /*
406:      Free work space.  All PETSc objects should be destroyed when they
407:      are no longer needed.
408:   */
409:   KSPDestroy(&ksp);
410:   VecDestroy(&u);
411:   VecDestroy(&x);
412:   VecDestroy(&b);
413:   MatDestroy(&A);

415:   /*
416:      Always call PetscFinalize() before exiting a program.  This routine
417:        - finalizes the PETSc libraries as well as MPI
418:        - provides summary and diagnostic information if certain runtime
419:          options are chosen (e.g., -log_view).
420:   */
421:   PetscFinalize();
422:   return 0;
423: }

425: /*TEST

427:    test:
428:       args: -use_petsc_lu
429:       output_file: output/ex52_2.out

431:    test:
432:       suffix: mumps
433:       nsize: 3
434:       requires: mumps
435:       args: -use_mumps_lu
436:       output_file: output/ex52_1.out

438:    test:
439:       suffix: mumps_2
440:       nsize: 3
441:       requires: mumps
442:       args: -use_mumps_ch
443:       output_file: output/ex52_1.out

445:    test:
446:       suffix: mumps_3
447:       nsize: 3
448:       requires: mumps
449:       args: -use_mumps_ch -mat_type sbaij
450:       output_file: output/ex52_1.out

452:    test:
453:       suffix: mumps_4
454:       nsize: 3
455:       requires: mumps !complex !single
456:       args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
457:       output_file: output/ex52_4.out

459:    test:
460:       suffix: mumps_omp_2
461:       nsize: 4
462:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
463:       args: -use_mumps_lu -mat_mumps_use_omp_threads 2
464:       output_file: output/ex52_1.out

466:    test:
467:       suffix: mumps_omp_3
468:       nsize: 4
469:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
470:       args: -use_mumps_ch -mat_mumps_use_omp_threads 3
471:       # Ignore the warning since we are intentionally testing the imbalanced case
472:       filter: grep -v "Warning: number of OpenMP threads"
473:       output_file: output/ex52_1.out

475:    test:
476:       suffix: mumps_omp_4
477:       nsize: 4
478:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
479:       # let petsc guess a proper number for threads
480:       args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
481:       output_file: output/ex52_1.out

483:    test:
484:       suffix: strumpack
485:       requires: strumpack
486:       args: -use_strumpack_lu
487:       output_file: output/ex52_3.out

489:    test:
490:       suffix: strumpack_2
491:       nsize: 2
492:       requires: strumpack
493:       args: -use_strumpack_lu
494:       output_file: output/ex52_3.out

496:    test:
497:       suffix: strumpack_ilu
498:       requires: strumpack
499:       args: -use_strumpack_ilu
500:       output_file: output/ex52_3.out

502:    test:
503:       suffix: strumpack_ilu_2
504:       nsize: 2
505:       requires: strumpack
506:       args: -use_strumpack_ilu
507:       output_file: output/ex52_3.out

509:    test:
510:       suffix: superlu
511:       requires: superlu superlu_dist
512:       args: -use_superlu_lu
513:       output_file: output/ex52_2.out

515:    test:
516:       suffix: superlu_dist
517:       nsize: 2
518:       requires: superlu superlu_dist
519:       args: -use_superlu_lu
520:       output_file: output/ex52_2.out

522:    test:
523:       suffix: superlu_ilu
524:       requires: superlu superlu_dist
525:       args: -use_superlu_ilu
526:       output_file: output/ex52_2.out

528: TEST*/