Actual source code: ex52.c


  2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
  3:                       Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
  4: Input parameters include:\n\
  5:   -random_exact_sol : use a random exact solution vector\n\
  6:   -view_exact_sol   : write exact solution vector to stdout\n\
  7:   -m <mesh_x>       : number of mesh points in x-direction\n\
  8:   -n <mesh_y>       : number of mesh points in y-direction\n\n";

 10: #include <petscksp.h>

 12: #if defined(PETSC_HAVE_MUMPS)
 13: /* Subroutine contributed by Varun Hiremath */
 14: PetscErrorCode printMumpsMemoryInfo(Mat F)
 15: {
 16:   PetscInt       maxMem, sumMem;

 19:   MatMumpsGetInfog(F,16,&maxMem);
 20:   MatMumpsGetInfog(F,17,&sumMem);
 21:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %d", maxMem);
 22:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %d \n", sumMem);
 23:   return 0;
 24: }
 25: #endif

 27: int main(int argc,char **args)
 28: {
 29:   Vec            x,b,u;    /* approx solution, RHS, exact solution */
 30:   Mat            A,F;
 31:   KSP            ksp;      /* linear solver context */
 32:   PC             pc;
 33:   PetscRandom    rctx;     /* random number generator context */
 34:   PetscReal      norm;     /* norm of solution error */
 35:   PetscInt       i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
 36:   PetscBool      flg=PETSC_FALSE,flg_ilu=PETSC_FALSE,flg_ch=PETSC_FALSE;
 37: #if defined(PETSC_HAVE_MUMPS)
 38:   PetscBool      flg_mumps=PETSC_FALSE,flg_mumps_ch=PETSC_FALSE;
 39: #endif
 40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
 41:   PetscBool      flg_superlu=PETSC_FALSE;
 42: #endif
 43: #if defined(PETSC_HAVE_STRUMPACK)
 44:   PetscBool      flg_strumpack=PETSC_FALSE;
 45: #endif
 46:   PetscScalar    v;
 47:   PetscMPIInt    rank,size;
 48: #if defined(PETSC_USE_LOG)
 49:   PetscLogStage  stage;
 50: #endif

 52:   PetscInitialize(&argc,&args,(char*)0,help);
 53:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 54:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 55:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 56:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 57:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 58:          Compute the matrix and right-hand-side vector that define
 59:          the linear system, Ax = b.
 60:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 61:   MatCreate(PETSC_COMM_WORLD,&A);
 62:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 63:   MatSetFromOptions(A);
 64:   MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
 65:   MatSeqAIJSetPreallocation(A,5,NULL);
 66:   MatSetUp(A);

 68:   /*
 69:      Currently, all PETSc parallel matrix formats are partitioned by
 70:      contiguous chunks of rows across the processors.  Determine which
 71:      rows of the matrix are locally owned.
 72:   */
 73:   MatGetOwnershipRange(A,&Istart,&Iend);

 75:   /*
 76:      Set matrix elements for the 2-D, five-point stencil in parallel.
 77:       - Each processor needs to insert only elements that it owns
 78:         locally (but any non-local elements will be sent to the
 79:         appropriate processor during matrix assembly).
 80:       - Always specify global rows and columns of matrix entries.

 82:      Note: this uses the less common natural ordering that orders first
 83:      all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
 84:      instead of J = I +- m as you might expect. The more standard ordering
 85:      would first do all variables for y = h, then y = 2h etc.

 87:    */
 88:   PetscLogStageRegister("Assembly", &stage);
 89:   PetscLogStagePush(stage);
 90:   for (Ii=Istart; Ii<Iend; Ii++) {
 91:     v = -1.0; i = Ii/n; j = Ii - i*n;
 92:     if (i>0)   {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 93:     if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 94:     if (j>0)   {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 95:     if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 96:     v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
 97:   }

 99:   /*
100:      Assemble matrix, using the 2-step process:
101:        MatAssemblyBegin(), MatAssemblyEnd()
102:      Computations can be done while messages are in transition
103:      by placing code between these two statements.
104:   */
105:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
106:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
107:   PetscLogStagePop();

109:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
110:   MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);

112:   /*
113:      Create parallel vectors.
114:       - We form 1 vector from scratch and then duplicate as needed.
115:       - When using VecCreate(), VecSetSizes and VecSetFromOptions()
116:         in this example, we specify only the
117:         vector's global dimension; the parallel partitioning is determined
118:         at runtime.
119:       - When solving a linear system, the vectors and matrices MUST
120:         be partitioned accordingly.  PETSc automatically generates
121:         appropriately partitioned matrices and vectors when MatCreate()
122:         and VecCreate() are used with the same communicator.
123:       - The user can alternatively specify the local vector and matrix
124:         dimensions when more sophisticated partitioning is needed
125:         (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
126:         below).
127:   */
128:   VecCreate(PETSC_COMM_WORLD,&u);
129:   VecSetSizes(u,PETSC_DECIDE,m*n);
130:   VecSetFromOptions(u);
131:   VecDuplicate(u,&b);
132:   VecDuplicate(b,&x);

134:   /*
135:      Set exact solution; then compute right-hand-side vector.
136:      By default we use an exact solution of a vector with all
137:      elements of 1.0;  Alternatively, using the runtime option
138:      -random_sol forms a solution vector with random components.
139:   */
140:   PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
141:   if (flg) {
142:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
143:     PetscRandomSetFromOptions(rctx);
144:     VecSetRandom(u,rctx);
145:     PetscRandomDestroy(&rctx);
146:   } else {
147:     VecSet(u,1.0);
148:   }
149:   MatMult(A,u,b);

151:   /*
152:      View the exact solution vector if desired
153:   */
154:   flg  = PETSC_FALSE;
155:   PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
156:   if (flg) VecView(u,PETSC_VIEWER_STDOUT_WORLD);

158:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
159:                 Create the linear solver and set various options
160:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

162:   /*
163:      Create linear solver context
164:   */
165:   KSPCreate(PETSC_COMM_WORLD,&ksp);
166:   KSPSetOperators(ksp,A,A);

168:   /*
169:     Example of how to use external package MUMPS
170:     Note: runtime options
171:           '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
172:           are equivalent to these procedural calls
173:   */
174: #if defined(PETSC_HAVE_MUMPS)
175:   flg_mumps    = PETSC_FALSE;
176:   flg_mumps_ch = PETSC_FALSE;
177:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_lu",&flg_mumps,NULL);
178:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_ch",&flg_mumps_ch,NULL);
179:   if (flg_mumps || flg_mumps_ch) {
180:     KSPSetType(ksp,KSPPREONLY);
181:     PetscInt  ival,icntl;
182:     PetscReal val;
183:     KSPGetPC(ksp,&pc);
184:     if (flg_mumps) {
185:       PCSetType(pc,PCLU);
186:     } else if (flg_mumps_ch) {
187:       MatSetOption(A,MAT_SPD,PETSC_TRUE); /* set MUMPS id%SYM=1 */
188:       PCSetType(pc,PCCHOLESKY);
189:     }
190:     PCFactorSetMatSolverType(pc,MATSOLVERMUMPS);
191:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
192:     PCFactorGetMatrix(pc,&F);

194:     if (flg_mumps) {
195:       /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
196:          KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
197:       MatFactorInfo info;
198:       MatLUFactorSymbolic(F,A,NULL,NULL,&info);
199:       flg = PETSC_FALSE;
200:       PetscOptionsGetBool(NULL,NULL,"-print_mumps_memory",&flg,NULL);
201:       if (flg) {
202:         printMumpsMemoryInfo(F);
203:       }
204:     }

206:     /* sequential ordering */
207:     icntl = 7; ival = 2;
208:     MatMumpsSetIcntl(F,icntl,ival);

210:     /* threshold for row pivot detection */
211:     MatMumpsSetIcntl(F,24,1);
212:     icntl = 3; val = 1.e-6;
213:     MatMumpsSetCntl(F,icntl,val);

215:     /* compute determinant of A */
216:     MatMumpsSetIcntl(F,33,1);
217:   }
218: #endif

220:   /*
221:     Example of how to use external package SuperLU
222:     Note: runtime options
223:           '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
224:           are equivalent to these procedual calls
225:   */
226: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
227:   flg_ilu     = PETSC_FALSE;
228:   flg_superlu = PETSC_FALSE;
229:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_lu",&flg_superlu,NULL);
230:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_ilu",&flg_ilu,NULL);
231:   if (flg_superlu || flg_ilu) {
232:     KSPSetType(ksp,KSPPREONLY);
233:     KSPGetPC(ksp,&pc);
234:     if (flg_superlu) {
235:       PCSetType(pc,PCLU);
236:     } else if (flg_ilu) {
237:       PCSetType(pc,PCILU);
238:     }
239:     if (size == 1) {
240: #if !defined(PETSC_HAVE_SUPERLU)
241:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU");
242: #else
243:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU);
244: #endif
245:     } else {
246: #if !defined(PETSC_HAVE_SUPERLU_DIST)
247:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU_DIST");
248: #else
249:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU_DIST);
250: #endif
251:     }
252:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
253:     PCFactorGetMatrix(pc,&F);
254: #if defined(PETSC_HAVE_SUPERLU)
255:     if (size == 1) {
256:       MatSuperluSetILUDropTol(F,1.e-8);
257:     }
258: #endif
259:   }
260: #endif

262:   /*
263:     Example of how to use external package STRUMPACK
264:     Note: runtime options
265:           '-pc_type lu/ilu \
266:            -pc_factor_mat_solver_type strumpack \
267:            -mat_strumpack_reordering METIS \
268:            -mat_strumpack_colperm 0 \
269:            -mat_strumpack_hss_rel_tol 1.e-3 \
270:            -mat_strumpack_hss_min_sep_size 50 \
271:            -mat_strumpack_max_rank 100 \
272:            -mat_strumpack_leaf_size 4'
273:        are equivalent to these procedural calls

275:     We refer to the STRUMPACK-sparse manual, section 5, for more info on
276:     how to tune the preconditioner.
277:   */
278: #if defined(PETSC_HAVE_STRUMPACK)
279:   flg_ilu       = PETSC_FALSE;
280:   flg_strumpack = PETSC_FALSE;
281:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_lu",&flg_strumpack,NULL);
282:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_ilu",&flg_ilu,NULL);
283:   if (flg_strumpack || flg_ilu) {
284:     KSPSetType(ksp,KSPPREONLY);
285:     KSPGetPC(ksp,&pc);
286:     if (flg_strumpack) {
287:       PCSetType(pc,PCLU);
288:     } else if (flg_ilu) {
289:       PCSetType(pc,PCILU);
290:     }
291: #if !defined(PETSC_HAVE_STRUMPACK)
292:     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires STRUMPACK");
293: #endif
294:     PCFactorSetMatSolverType(pc,MATSOLVERSTRUMPACK);
295:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
296:     PCFactorGetMatrix(pc,&F);
297: #if defined(PETSC_HAVE_STRUMPACK)
298:     /* Set the fill-reducing reordering.                              */
299:     MatSTRUMPACKSetReordering(F,MAT_STRUMPACK_METIS);
300:     /* Since this is a simple discretization, the diagonal is always  */
301:     /* nonzero, and there is no need for the extra MC64 permutation.  */
302:     MatSTRUMPACKSetColPerm(F,PETSC_FALSE);
303:     /* The compression tolerance used when doing low-rank compression */
304:     /* in the preconditioner. This is problem specific!               */
305:     MatSTRUMPACKSetHSSRelTol(F,1.e-3);
306:     /* Set minimum matrix size for HSS compression to 15 in order to  */
307:     /* demonstrate preconditioner on small problems. For performance  */
308:     /* a value of say 500 is better.                                  */
309:     MatSTRUMPACKSetHSSMinSepSize(F,15);
310:     /* You can further limit the fill in the preconditioner by        */
311:     /* setting a maximum rank                                         */
312:     MatSTRUMPACKSetHSSMaxRank(F,100);
313:     /* Set the size of the diagonal blocks (the leafs) in the HSS     */
314:     /* approximation. The default value should be better for real     */
315:     /* problems. This is mostly for illustration on a small problem.  */
316:     MatSTRUMPACKSetHSSLeafSize(F,4);
317: #endif
318:   }
319: #endif

321:   /*
322:     Example of how to use procedural calls that are equivalent to
323:           '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
324:   */
325:   flg     = PETSC_FALSE;
326:   flg_ilu = PETSC_FALSE;
327:   flg_ch  = PETSC_FALSE;
328:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_lu",&flg,NULL);
329:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ilu",&flg_ilu,NULL);
330:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ch",&flg_ch,NULL);
331:   if (flg || flg_ilu || flg_ch) {
332:     Vec diag;

334:     KSPSetType(ksp,KSPPREONLY);
335:     KSPGetPC(ksp,&pc);
336:     if (flg) {
337:       PCSetType(pc,PCLU);
338:     } else if (flg_ilu) {
339:       PCSetType(pc,PCILU);
340:     } else if (flg_ch) {
341:       PCSetType(pc,PCCHOLESKY);
342:     }
343:     PCFactorSetMatSolverType(pc,MATSOLVERPETSC);
344:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
345:     PCFactorGetMatrix(pc,&F);

347:     /* Test MatGetDiagonal() */
348:     KSPSetUp(ksp);
349:     VecDuplicate(x,&diag);
350:     MatGetDiagonal(F,diag);
351:     /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
352:     VecDestroy(&diag);
353:   }

355:   KSPSetFromOptions(ksp);

357:   /* Get info from matrix factors */
358:   KSPSetUp(ksp);

360: #if defined(PETSC_HAVE_MUMPS)
361:   if (flg_mumps || flg_mumps_ch) {
362:     PetscInt  icntl,infog34;
363:     PetscReal cntl,rinfo12,rinfo13;
364:     icntl = 3;
365:     MatMumpsGetCntl(F,icntl,&cntl);

367:     /* compute determinant */
368:     if (rank == 0) {
369:       MatMumpsGetInfog(F,34,&infog34);
370:       MatMumpsGetRinfog(F,12,&rinfo12);
371:       MatMumpsGetRinfog(F,13,&rinfo13);
372:       PetscPrintf(PETSC_COMM_SELF,"  Mumps row pivot threshold = %g\n",cntl);
373:       PetscPrintf(PETSC_COMM_SELF,"  Mumps determinant = (%g, %g) * 2^%D \n",(double)rinfo12,(double)rinfo13,infog34);
374:     }
375:   }
376: #endif

378:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
379:                       Solve the linear system
380:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381:   KSPSolve(ksp,b,x);

383:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
384:                       Check solution and clean up
385:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
386:   VecAXPY(x,-1.0,u);
387:   VecNorm(x,NORM_2,&norm);
388:   KSPGetIterationNumber(ksp,&its);

390:   /*
391:      Print convergence information.  PetscPrintf() produces a single
392:      print statement from all processes that share a communicator.
393:      An alternative is PetscFPrintf(), which prints to a file.
394:   */
395:   if (norm < 1.e-12) {
396:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error < 1.e-12 iterations %D\n",its);
397:   } else {
398:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);
399:  }

401:   /*
402:      Free work space.  All PETSc objects should be destroyed when they
403:      are no longer needed.
404:   */
405:   KSPDestroy(&ksp);
406:   VecDestroy(&u));  PetscCall(VecDestroy(&x);
407:   VecDestroy(&b));  PetscCall(MatDestroy(&A);

409:   /*
410:      Always call PetscFinalize() before exiting a program.  This routine
411:        - finalizes the PETSc libraries as well as MPI
412:        - provides summary and diagnostic information if certain runtime
413:          options are chosen (e.g., -log_view).
414:   */
415:   PetscFinalize();
416:   return 0;
417: }

419: /*TEST

421:    test:
422:       args: -use_petsc_lu
423:       output_file: output/ex52_2.out

425:    test:
426:       suffix: mumps
427:       nsize: 3
428:       requires: mumps
429:       args: -use_mumps_lu
430:       output_file: output/ex52_1.out

432:    test:
433:       suffix: mumps_2
434:       nsize: 3
435:       requires: mumps
436:       args: -use_mumps_ch
437:       output_file: output/ex52_1.out

439:    test:
440:       suffix: mumps_3
441:       nsize: 3
442:       requires: mumps
443:       args: -use_mumps_ch -mat_type sbaij
444:       output_file: output/ex52_1.out

446:    test:
447:       suffix: mumps_4
448:       nsize: 3
449:       requires: mumps !complex !single
450:       args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
451:       output_file: output/ex52_4.out

453:    test:
454:       suffix: mumps_omp_2
455:       nsize: 4
456:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
457:       args: -use_mumps_lu -mat_mumps_use_omp_threads 2
458:       output_file: output/ex52_1.out

460:    test:
461:       suffix: mumps_omp_3
462:       nsize: 4
463:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
464:       args: -use_mumps_ch -mat_mumps_use_omp_threads 3
465:       # Ignore the warning since we are intentionally testing the imbalanced case
466:       filter: grep -v "Warning: number of OpenMP threads"
467:       output_file: output/ex52_1.out

469:    test:
470:       suffix: mumps_omp_4
471:       nsize: 4
472:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
473:       # let petsc guess a proper number for threads
474:       args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
475:       output_file: output/ex52_1.out

477:    test:
478:       suffix: strumpack
479:       requires: strumpack
480:       args: -use_strumpack_lu
481:       output_file: output/ex52_3.out

483:    test:
484:       suffix: strumpack_2
485:       nsize: 2
486:       requires: strumpack
487:       args: -use_strumpack_lu
488:       output_file: output/ex52_3.out

490:    test:
491:       suffix: strumpack_ilu
492:       requires: strumpack
493:       args: -use_strumpack_ilu
494:       output_file: output/ex52_3.out

496:    test:
497:       suffix: strumpack_ilu_2
498:       nsize: 2
499:       requires: strumpack
500:       args: -use_strumpack_ilu
501:       output_file: output/ex52_3.out

503:    test:
504:       suffix: superlu
505:       requires: superlu superlu_dist
506:       args: -use_superlu_lu
507:       output_file: output/ex52_2.out

509:    test:
510:       suffix: superlu_dist
511:       nsize: 2
512:       requires: superlu superlu_dist
513:       args: -use_superlu_lu
514:       output_file: output/ex52_2.out

516:    test:
517:       suffix: superlu_ilu
518:       requires: superlu superlu_dist
519:       args: -use_superlu_ilu
520:       output_file: output/ex52_2.out

522: TEST*/