Actual source code: ex52.c


  2: static char help[] = "Solves a linear system in parallel with KSP. Modified from ex2.c \n\
  3:                       Illustrate how to use external packages MUMPS, SUPERLU and STRUMPACK \n\
  4: Input parameters include:\n\
  5:   -random_exact_sol : use a random exact solution vector\n\
  6:   -view_exact_sol   : write exact solution vector to stdout\n\
  7:   -m <mesh_x>       : number of mesh points in x-direction\n\
  8:   -n <mesh_y>       : number of mesh points in y-direction\n\n";

 10: #include <petscksp.h>

 12: #if defined(PETSC_HAVE_MUMPS)
 13: /* Subroutine contributed by Varun Hiremath */
 14: PetscErrorCode printMumpsMemoryInfo(Mat F)
 15: {
 16:   PetscInt       maxMem, sumMem;

 19:   MatMumpsGetInfog(F,16,&maxMem);
 20:   MatMumpsGetInfog(F,17,&sumMem);
 21:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(16) :: Max memory in MB = %" PetscInt_FMT, maxMem);
 22:   PetscPrintf(PETSC_COMM_WORLD, "\n MUMPS INFOG(17) :: Sum memory in MB = %" PetscInt_FMT "\n", sumMem);
 23:   return 0;
 24: }
 25: #endif

 27: int main(int argc,char **args)
 28: {
 29:   Vec            x,b,u;    /* approx solution, RHS, exact solution */
 30:   Mat            A,F;
 31:   KSP            ksp;      /* linear solver context */
 32:   PC             pc;
 33:   PetscRandom    rctx;     /* random number generator context */
 34:   PetscReal      norm;     /* norm of solution error */
 35:   PetscInt       i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
 36:   PetscBool      flg=PETSC_FALSE,flg_ilu=PETSC_FALSE,flg_ch=PETSC_FALSE;
 37: #if defined(PETSC_HAVE_MUMPS)
 38:   PetscBool      flg_mumps=PETSC_FALSE,flg_mumps_ch=PETSC_FALSE;
 39: #endif
 40: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
 41:   PetscBool      flg_superlu=PETSC_FALSE;
 42: #endif
 43: #if defined(PETSC_HAVE_STRUMPACK)
 44:   PetscBool      flg_strumpack=PETSC_FALSE;
 45: #endif
 46:   PetscScalar    v;
 47:   PetscMPIInt    rank,size;
 48: #if defined(PETSC_USE_LOG)
 49:   PetscLogStage  stage;
 50: #endif

 53:   PetscInitialize(&argc,&args,(char*)0,help);
 54:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 56:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 57:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
 58:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 59:          Compute the matrix and right-hand-side vector that define
 60:          the linear system, Ax = b.
 61:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 62:   MatCreate(PETSC_COMM_WORLD,&A);
 63:   MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 64:   MatSetFromOptions(A);
 65:   MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
 66:   MatSeqAIJSetPreallocation(A,5,NULL);
 67:   MatSetUp(A);

 69:   /*
 70:      Currently, all PETSc parallel matrix formats are partitioned by
 71:      contiguous chunks of rows across the processors.  Determine which
 72:      rows of the matrix are locally owned.
 73:   */
 74:   MatGetOwnershipRange(A,&Istart,&Iend);

 76:   /*
 77:      Set matrix elements for the 2-D, five-point stencil in parallel.
 78:       - Each processor needs to insert only elements that it owns
 79:         locally (but any non-local elements will be sent to the
 80:         appropriate processor during matrix assembly).
 81:       - Always specify global rows and columns of matrix entries.

 83:      Note: this uses the less common natural ordering that orders first
 84:      all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
 85:      instead of J = I +- m as you might expect. The more standard ordering
 86:      would first do all variables for y = h, then y = 2h etc.

 88:    */
 89:   PetscLogStageRegister("Assembly", &stage);
 90:   PetscLogStagePush(stage);
 91:   for (Ii=Istart; Ii<Iend; Ii++) {
 92:     v = -1.0; i = Ii/n; j = Ii - i*n;
 93:     if (i>0)   {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 94:     if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 95:     if (j>0)   {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 96:     if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,INSERT_VALUES);}
 97:     v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,INSERT_VALUES);
 98:   }

100:   /*
101:      Assemble matrix, using the 2-step process:
102:        MatAssemblyBegin(), MatAssemblyEnd()
103:      Computations can be done while messages are in transition
104:      by placing code between these two statements.
105:   */
106:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
107:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
108:   PetscLogStagePop();

110:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
111:   MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);

113:   /*
114:      Create parallel vectors.
115:       - We form 1 vector from scratch and then duplicate as needed.
116:       - When using VecCreate(), VecSetSizes and VecSetFromOptions()
117:         in this example, we specify only the
118:         vector's global dimension; the parallel partitioning is determined
119:         at runtime.
120:       - When solving a linear system, the vectors and matrices MUST
121:         be partitioned accordingly.  PETSc automatically generates
122:         appropriately partitioned matrices and vectors when MatCreate()
123:         and VecCreate() are used with the same communicator.
124:       - The user can alternatively specify the local vector and matrix
125:         dimensions when more sophisticated partitioning is needed
126:         (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
127:         below).
128:   */
129:   VecCreate(PETSC_COMM_WORLD,&u);
130:   VecSetSizes(u,PETSC_DECIDE,m*n);
131:   VecSetFromOptions(u);
132:   VecDuplicate(u,&b);
133:   VecDuplicate(b,&x);

135:   /*
136:      Set exact solution; then compute right-hand-side vector.
137:      By default we use an exact solution of a vector with all
138:      elements of 1.0;  Alternatively, using the runtime option
139:      -random_sol forms a solution vector with random components.
140:   */
141:   PetscOptionsGetBool(NULL,NULL,"-random_exact_sol",&flg,NULL);
142:   if (flg) {
143:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
144:     PetscRandomSetFromOptions(rctx);
145:     VecSetRandom(u,rctx);
146:     PetscRandomDestroy(&rctx);
147:   } else {
148:     VecSet(u,1.0);
149:   }
150:   MatMult(A,u,b);

152:   /*
153:      View the exact solution vector if desired
154:   */
155:   flg  = PETSC_FALSE;
156:   PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
157:   if (flg) VecView(u,PETSC_VIEWER_STDOUT_WORLD);

159:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
160:                 Create the linear solver and set various options
161:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

163:   /*
164:      Create linear solver context
165:   */
166:   KSPCreate(PETSC_COMM_WORLD,&ksp);
167:   KSPSetOperators(ksp,A,A);

169:   /*
170:     Example of how to use external package MUMPS
171:     Note: runtime options
172:           '-ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mumps -mat_mumps_icntl_7 3 -mat_mumps_icntl_1 0.0'
173:           are equivalent to these procedural calls
174:   */
175: #if defined(PETSC_HAVE_MUMPS)
176:   flg_mumps    = PETSC_FALSE;
177:   flg_mumps_ch = PETSC_FALSE;
178:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_lu",&flg_mumps,NULL);
179:   PetscOptionsGetBool(NULL,NULL,"-use_mumps_ch",&flg_mumps_ch,NULL);
180:   if (flg_mumps || flg_mumps_ch) {
181:     KSPSetType(ksp,KSPPREONLY);
182:     PetscInt  ival,icntl;
183:     PetscReal val;
184:     KSPGetPC(ksp,&pc);
185:     if (flg_mumps) {
186:       PCSetType(pc,PCLU);
187:     } else if (flg_mumps_ch) {
188:       MatSetOption(A,MAT_SPD,PETSC_TRUE); /* set MUMPS id%SYM=1 */
189:       PCSetType(pc,PCCHOLESKY);
190:     }
191:     PCFactorSetMatSolverType(pc,MATSOLVERMUMPS);
192:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
193:     PCFactorGetMatrix(pc,&F);

195:     if (flg_mumps) {
196:       /* Get memory estimates from MUMPS' MatLUFactorSymbolic(), e.g. INFOG(16), INFOG(17).
197:          KSPSetUp() below will do nothing inside MatLUFactorSymbolic() */
198:       MatFactorInfo info;
199:       MatLUFactorSymbolic(F,A,NULL,NULL,&info);
200:       flg = PETSC_FALSE;
201:       PetscOptionsGetBool(NULL,NULL,"-print_mumps_memory",&flg,NULL);
202:       if (flg) printMumpsMemoryInfo(F);
203:     }

205:     /* sequential ordering */
206:     icntl = 7; ival = 2;
207:     MatMumpsSetIcntl(F,icntl,ival);

209:     /* threshold for row pivot detection */
210:     MatMumpsSetIcntl(F,24,1);
211:     icntl = 3; val = 1.e-6;
212:     MatMumpsSetCntl(F,icntl,val);

214:     /* compute determinant of A */
215:     MatMumpsSetIcntl(F,33,1);
216:   }
217: #endif

219:   /*
220:     Example of how to use external package SuperLU
221:     Note: runtime options
222:           '-ksp_type preonly -pc_type ilu -pc_factor_mat_solver_type superlu -mat_superlu_ilu_droptol 1.e-8'
223:           are equivalent to these procedual calls
224:   */
225: #if defined(PETSC_HAVE_SUPERLU) || defined(PETSC_HAVE_SUPERLU_DIST)
226:   flg_ilu     = PETSC_FALSE;
227:   flg_superlu = PETSC_FALSE;
228:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_lu",&flg_superlu,NULL);
229:   PetscOptionsGetBool(NULL,NULL,"-use_superlu_ilu",&flg_ilu,NULL);
230:   if (flg_superlu || flg_ilu) {
231:     KSPSetType(ksp,KSPPREONLY);
232:     KSPGetPC(ksp,&pc);
233:     if (flg_superlu) PCSetType(pc,PCLU);
234:     else if (flg_ilu) PCSetType(pc,PCILU);
235:     if (size == 1) {
236: #if !defined(PETSC_HAVE_SUPERLU)
237:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU");
238: #else
239:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU);
240: #endif
241:     } else {
242: #if !defined(PETSC_HAVE_SUPERLU_DIST)
243:       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires SUPERLU_DIST");
244: #else
245:       PCFactorSetMatSolverType(pc,MATSOLVERSUPERLU_DIST);
246: #endif
247:     }
248:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
249:     PCFactorGetMatrix(pc,&F);
250: #if defined(PETSC_HAVE_SUPERLU)
251:     if (size == 1) {
252:       MatSuperluSetILUDropTol(F,1.e-8);
253:     }
254: #endif
255:   }
256: #endif

258:   /*
259:     Example of how to use external package STRUMPACK
260:     Note: runtime options
261:           '-pc_type lu/ilu \
262:            -pc_factor_mat_solver_type strumpack \
263:            -mat_strumpack_reordering METIS \
264:            -mat_strumpack_colperm 0 \
265:            -mat_strumpack_hss_rel_tol 1.e-3 \
266:            -mat_strumpack_hss_min_sep_size 50 \
267:            -mat_strumpack_max_rank 100 \
268:            -mat_strumpack_leaf_size 4'
269:        are equivalent to these procedural calls

271:     We refer to the STRUMPACK-sparse manual, section 5, for more info on
272:     how to tune the preconditioner.
273:   */
274: #if defined(PETSC_HAVE_STRUMPACK)
275:   flg_ilu       = PETSC_FALSE;
276:   flg_strumpack = PETSC_FALSE;
277:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_lu",&flg_strumpack,NULL);
278:   PetscOptionsGetBool(NULL,NULL,"-use_strumpack_ilu",&flg_ilu,NULL);
279:   if (flg_strumpack || flg_ilu) {
280:     KSPSetType(ksp,KSPPREONLY);
281:     KSPGetPC(ksp,&pc);
282:     if (flg_strumpack) PCSetType(pc,PCLU);
283:     else if (flg_ilu) PCSetType(pc,PCILU);
284: #if !defined(PETSC_HAVE_STRUMPACK)
285:     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This test requires STRUMPACK");
286: #endif
287:     PCFactorSetMatSolverType(pc,MATSOLVERSTRUMPACK);
288:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
289:     PCFactorGetMatrix(pc,&F);
290: #if defined(PETSC_HAVE_STRUMPACK)
291:     /* Set the fill-reducing reordering.                              */
292:     MatSTRUMPACKSetReordering(F,MAT_STRUMPACK_METIS);
293:     /* Since this is a simple discretization, the diagonal is always  */
294:     /* nonzero, and there is no need for the extra MC64 permutation.  */
295:     MatSTRUMPACKSetColPerm(F,PETSC_FALSE);
296:     /* The compression tolerance used when doing low-rank compression */
297:     /* in the preconditioner. This is problem specific!               */
298:     MatSTRUMPACKSetHSSRelTol(F,1.e-3);
299:     /* Set minimum matrix size for HSS compression to 15 in order to  */
300:     /* demonstrate preconditioner on small problems. For performance  */
301:     /* a value of say 500 is better.                                  */
302:     MatSTRUMPACKSetHSSMinSepSize(F,15);
303:     /* You can further limit the fill in the preconditioner by        */
304:     /* setting a maximum rank                                         */
305:     MatSTRUMPACKSetHSSMaxRank(F,100);
306:     /* Set the size of the diagonal blocks (the leafs) in the HSS     */
307:     /* approximation. The default value should be better for real     */
308:     /* problems. This is mostly for illustration on a small problem.  */
309:     MatSTRUMPACKSetHSSLeafSize(F,4);
310: #endif
311:   }
312: #endif

314:   /*
315:     Example of how to use procedural calls that are equivalent to
316:           '-ksp_type preonly -pc_type lu/ilu -pc_factor_mat_solver_type petsc'
317:   */
318:   flg     = PETSC_FALSE;
319:   flg_ilu = PETSC_FALSE;
320:   flg_ch  = PETSC_FALSE;
321:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_lu",&flg,NULL);
322:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ilu",&flg_ilu,NULL);
323:   PetscOptionsGetBool(NULL,NULL,"-use_petsc_ch",&flg_ch,NULL);
324:   if (flg || flg_ilu || flg_ch) {
325:     Vec diag;

327:     KSPSetType(ksp,KSPPREONLY);
328:     KSPGetPC(ksp,&pc);
329:     if (flg) PCSetType(pc,PCLU);
330:     else if (flg_ilu) PCSetType(pc,PCILU);
331:     else if (flg_ch) PCSetType(pc,PCCHOLESKY);
332:     PCFactorSetMatSolverType(pc,MATSOLVERPETSC);
333:     PCFactorSetUpMatSolverType(pc); /* call MatGetFactor() to create F */
334:     PCFactorGetMatrix(pc,&F);

336:     /* Test MatGetDiagonal() */
337:     KSPSetUp(ksp);
338:     VecDuplicate(x,&diag);
339:     MatGetDiagonal(F,diag);
340:     /* VecView(diag,PETSC_VIEWER_STDOUT_WORLD); */
341:     VecDestroy(&diag);
342:   }

344:   KSPSetFromOptions(ksp);

346:   /* Get info from matrix factors */
347:   KSPSetUp(ksp);

349: #if defined(PETSC_HAVE_MUMPS)
350:   if (flg_mumps || flg_mumps_ch) {
351:     PetscInt  icntl,infog34;
352:     PetscReal cntl,rinfo12,rinfo13;
353:     icntl = 3;
354:     MatMumpsGetCntl(F,icntl,&cntl);

356:     /* compute determinant */
357:     if (rank == 0) {
358:       MatMumpsGetInfog(F,34,&infog34);
359:       MatMumpsGetRinfog(F,12,&rinfo12);
360:       MatMumpsGetRinfog(F,13,&rinfo13);
361:       PetscPrintf(PETSC_COMM_SELF,"  Mumps row pivot threshold = %g\n",cntl);
362:       PetscPrintf(PETSC_COMM_SELF,"  Mumps determinant = (%g, %g) * 2^%" PetscInt_FMT " \n",(double)rinfo12,(double)rinfo13,infog34);
363:     }
364:   }
365: #endif

367:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
368:                       Solve the linear system
369:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
370:   KSPSolve(ksp,b,x);

372:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
373:                       Check solution and clean up
374:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
375:   VecAXPY(x,-1.0,u);
376:   VecNorm(x,NORM_2,&norm);
377:   KSPGetIterationNumber(ksp,&its);

379:   /*
380:      Print convergence information.  PetscPrintf() produces a single
381:      print statement from all processes that share a communicator.
382:      An alternative is PetscFPrintf(), which prints to a file.
383:   */
384:   if (norm < 1.e-12) {
385:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error < 1.e-12 iterations %" PetscInt_FMT "\n",its);
386:   } else {
387:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %" PetscInt_FMT "\n",(double)norm,its);
388:  }

390:   /*
391:      Free work space.  All PETSc objects should be destroyed when they
392:      are no longer needed.
393:   */
394:   KSPDestroy(&ksp);
395:   VecDestroy(&u));  PetscCall(VecDestroy(&x);
396:   VecDestroy(&b));  PetscCall(MatDestroy(&A);

398:   /*
399:      Always call PetscFinalize() before exiting a program.  This routine
400:        - finalizes the PETSc libraries as well as MPI
401:        - provides summary and diagnostic information if certain runtime
402:          options are chosen (e.g., -log_view).
403:   */
404:   PetscFinalize();
405:   return 0;
406: }

408: /*TEST

410:    test:
411:       args: -use_petsc_lu
412:       output_file: output/ex52_2.out

414:    test:
415:       suffix: mumps
416:       nsize: 3
417:       requires: mumps
418:       args: -use_mumps_lu
419:       output_file: output/ex52_1.out

421:    test:
422:       suffix: mumps_2
423:       nsize: 3
424:       requires: mumps
425:       args: -use_mumps_ch
426:       output_file: output/ex52_1.out

428:    test:
429:       suffix: mumps_3
430:       nsize: 3
431:       requires: mumps
432:       args: -use_mumps_ch -mat_type sbaij
433:       output_file: output/ex52_1.out

435:    test:
436:       suffix: mumps_4
437:       nsize: 3
438:       requires: mumps !complex !single
439:       args: -use_mumps_lu -m 50 -n 50 -use_mumps_lu -print_mumps_memory
440:       output_file: output/ex52_4.out

442:    test:
443:       suffix: mumps_omp_2
444:       nsize: 4
445:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
446:       args: -use_mumps_lu -mat_mumps_use_omp_threads 2
447:       output_file: output/ex52_1.out

449:    test:
450:       suffix: mumps_omp_3
451:       nsize: 4
452:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
453:       args: -use_mumps_ch -mat_mumps_use_omp_threads 3
454:       # Ignore the warning since we are intentionally testing the imbalanced case
455:       filter: grep -v "Warning: number of OpenMP threads"
456:       output_file: output/ex52_1.out

458:    test:
459:       suffix: mumps_omp_4
460:       nsize: 4
461:       requires: mumps hwloc openmp pthread defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
462:       # let petsc guess a proper number for threads
463:       args: -use_mumps_ch -mat_type sbaij -mat_mumps_use_omp_threads
464:       output_file: output/ex52_1.out

466:    test:
467:       suffix: strumpack
468:       requires: strumpack
469:       args: -use_strumpack_lu
470:       output_file: output/ex52_3.out

472:    test:
473:       suffix: strumpack_2
474:       nsize: 2
475:       requires: strumpack
476:       args: -use_strumpack_lu
477:       output_file: output/ex52_3.out

479:    test:
480:       suffix: strumpack_ilu
481:       requires: strumpack
482:       args: -use_strumpack_ilu
483:       output_file: output/ex52_3.out

485:    test:
486:       suffix: strumpack_ilu_2
487:       nsize: 2
488:       requires: strumpack
489:       args: -use_strumpack_ilu
490:       output_file: output/ex52_3.out

492:    test:
493:       suffix: superlu
494:       requires: superlu superlu_dist
495:       args: -use_superlu_lu
496:       output_file: output/ex52_2.out

498:    test:
499:       suffix: superlu_dist
500:       nsize: 2
501:       requires: superlu superlu_dist
502:       args: -use_superlu_lu
503:       output_file: output/ex52_2.out

505:    test:
506:       suffix: superlu_ilu
507:       requires: superlu superlu_dist
508:       args: -use_superlu_ilu
509:       output_file: output/ex52_2.out

511: TEST*/