Actual source code: ex5.c


  2: static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
  3: illustrates repeated solution of linear systems with the same preconditioner\n\
  4: method but different matrices (having the same nonzero structure).  The code\n\
  5: also uses multiple profiling stages.  Input arguments are\n\
  6:   -m <size> : problem size\n\
  7:   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";

  9: /*
 10:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 11:   automatically includes:
 12:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 13:      petscmat.h - matrices
 14:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 15:      petscviewer.h - viewers               petscpc.h  - preconditioners
 16: */
 17: #include <petscksp.h>

 19: int main(int argc, char **args)
 20: {
 21:   KSP         ksp;         /* linear solver context */
 22:   Mat         C;           /* matrix */
 23:   Vec         x, u, b;     /* approx solution, RHS, exact solution */
 24:   PetscReal   norm, bnorm; /* norm of solution error */
 25:   PetscScalar v, none = -1.0;
 26:   PetscInt    Ii, J, ldim, low, high, iglobal, Istart, Iend;
 27:   PetscInt    i, j, m = 3, n = 2, its;
 28:   PetscMPIInt size, rank;
 29:   PetscBool   mat_nonsymmetric = PETSC_FALSE;
 30:   PetscBool   testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE;
 31: #if defined(PETSC_USE_LOG)
 32:   PetscLogStage stages[2];
 33: #endif

 36:   PetscInitialize(&argc, &args, (char *)0, help);
 37:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 38:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 39:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 40:   n = 2 * size;

 42:   /*
 43:      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
 44:   */
 45:   PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL);

 47:   PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL);

 49:   /*
 50:      Register two stages for separate profiling of the two linear solves.
 51:      Use the runtime option -log_view for a printout of performance
 52:      statistics at the program's conlusion.
 53:   */
 54:   PetscLogStageRegister("Original Solve", &stages[0]);
 55:   PetscLogStageRegister("Second Solve", &stages[1]);

 57:   /* -------------- Stage 0: Solve Original System ---------------------- */
 58:   /*
 59:      Indicate to PETSc profiling that we're beginning the first stage
 60:   */
 61:   PetscLogStagePush(stages[0]);

 63:   /*
 64:      Create parallel matrix, specifying only its global dimensions.
 65:      When using MatCreate(), the matrix format can be specified at
 66:      runtime. Also, the parallel partitioning of the matrix is
 67:      determined by PETSc at runtime.
 68:   */
 69:   MatCreate(PETSC_COMM_WORLD, &C);
 70:   MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
 71:   MatSetFromOptions(C);
 72:   MatSetUp(C);

 74:   /*
 75:      Currently, all PETSc parallel matrix formats are partitioned by
 76:      contiguous chunks of rows across the processors.  Determine which
 77:      rows of the matrix are locally owned.
 78:   */
 79:   MatGetOwnershipRange(C, &Istart, &Iend);

 81:   /*
 82:      Set matrix entries matrix in parallel.
 83:       - Each processor needs to insert only elements that it owns
 84:         locally (but any non-local elements will be sent to the
 85:         appropriate processor during matrix assembly).
 86:       - Always specify global row and columns of matrix entries.
 87:   */
 88:   for (Ii = Istart; Ii < Iend; Ii++) {
 89:     v = -1.0;
 90:     i = Ii / n;
 91:     j = Ii - i * n;
 92:     if (i > 0) {
 93:       J = Ii - n;
 94:       MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
 95:     }
 96:     if (i < m - 1) {
 97:       J = Ii + n;
 98:       MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
 99:     }
100:     if (j > 0) {
101:       J = Ii - 1;
102:       MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
103:     }
104:     if (j < n - 1) {
105:       J = Ii + 1;
106:       MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
107:     }
108:     v = 4.0;
109:     MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
110:   }

112:   /*
113:      Make the matrix nonsymmetric if desired
114:   */
115:   if (mat_nonsymmetric) {
116:     for (Ii = Istart; Ii < Iend; Ii++) {
117:       v = -1.5;
118:       i = Ii / n;
119:       if (i > 1) {
120:         J = Ii - n - 1;
121:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
122:       }
123:     }
124:   } else {
125:     MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE);
126:     MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE);
127:   }

129:   /*
130:      Assemble matrix, using the 2-step process:
131:        MatAssemblyBegin(), MatAssemblyEnd()
132:      Computations can be done while messages are in transition
133:      by placing code between these two statements.
134:   */
135:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
136:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);

138:   /*
139:      Create parallel vectors.
140:       - When using VecSetSizes(), we specify only the vector's global
141:         dimension; the parallel partitioning is determined at runtime.
142:       - Note: We form 1 vector from scratch and then duplicate as needed.
143:   */
144:   VecCreate(PETSC_COMM_WORLD, &u);
145:   VecSetSizes(u, PETSC_DECIDE, m * n);
146:   VecSetFromOptions(u);
147:   VecDuplicate(u, &b);
148:   VecDuplicate(b, &x);

150:   /*
151:      Currently, all parallel PETSc vectors are partitioned by
152:      contiguous chunks across the processors.  Determine which
153:      range of entries are locally owned.
154:   */
155:   VecGetOwnershipRange(x, &low, &high);

157:   /*
158:     Set elements within the exact solution vector in parallel.
159:      - Each processor needs to insert only elements that it owns
160:        locally (but any non-local entries will be sent to the
161:        appropriate processor during vector assembly).
162:      - Always specify global locations of vector entries.
163:   */
164:   VecGetLocalSize(x, &ldim);
165:   for (i = 0; i < ldim; i++) {
166:     iglobal = i + low;
167:     v       = (PetscScalar)(i + 100 * rank);
168:     VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES);
169:   }

171:   /*
172:      Assemble vector, using the 2-step process:
173:        VecAssemblyBegin(), VecAssemblyEnd()
174:      Computations can be done while messages are in transition,
175:      by placing code between these two statements.
176:   */
177:   VecAssemblyBegin(u);
178:   VecAssemblyEnd(u);

180:   /*
181:      Compute right-hand-side vector
182:   */
183:   MatMult(C, u, b);

185:   /*
186:     Create linear solver context
187:   */
188:   KSPCreate(PETSC_COMM_WORLD, &ksp);

190:   /*
191:      Set operators. Here the matrix that defines the linear system
192:      also serves as the preconditioning matrix.
193:   */
194:   KSPSetOperators(ksp, C, C);

196:   /*
197:      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
198:   */
199:   KSPSetFromOptions(ksp);

201:   /*
202:      Solve linear system.  Here we explicitly call KSPSetUp() for more
203:      detailed performance monitoring of certain preconditioners, such
204:      as ICC and ILU.  This call is optional, as KSPSetUp() will
205:      automatically be called within KSPSolve() if it hasn't been
206:      called already.
207:   */
208:   KSPSetUp(ksp);
209:   KSPSolve(ksp, b, x);

211:   /*
212:      Check the residual
213:   */
214:   VecAXPY(x, none, u);
215:   VecNorm(x, NORM_2, &norm);
216:   VecNorm(b, NORM_2, &bnorm);
217:   KSPGetIterationNumber(ksp, &its);
218:   if (!testscaledMat || norm / bnorm > 1.e-5) PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its);

220:   /* -------------- Stage 1: Solve Second System ---------------------- */
221:   /*
222:      Solve another linear system with the same method.  We reuse the KSP
223:      context, matrix and vector data structures, and hence save the
224:      overhead of creating new ones.

226:      Indicate to PETSc profiling that we're concluding the first
227:      stage with PetscLogStagePop(), and beginning the second stage with
228:      PetscLogStagePush().
229:   */
230:   PetscLogStagePop();
231:   PetscLogStagePush(stages[1]);

233:   /*
234:      Initialize all matrix entries to zero.  MatZeroEntries() retains the
235:      nonzero structure of the matrix for sparse formats.
236:   */
237:   MatZeroEntries(C);

239:   /*
240:      Assemble matrix again.  Note that we retain the same matrix data
241:      structure and the same nonzero pattern; we just change the values
242:      of the matrix entries.
243:   */
244:   for (i = 0; i < m; i++) {
245:     for (j = 2 * rank; j < 2 * rank + 2; j++) {
246:       v  = -1.0;
247:       Ii = j + n * i;
248:       if (i > 0) {
249:         J = Ii - n;
250:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
251:       }
252:       if (i < m - 1) {
253:         J = Ii + n;
254:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
255:       }
256:       if (j > 0) {
257:         J = Ii - 1;
258:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
259:       }
260:       if (j < n - 1) {
261:         J = Ii + 1;
262:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
263:       }
264:       v = 6.0;
265:       MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
266:     }
267:   }
268:   if (mat_nonsymmetric) {
269:     for (Ii = Istart; Ii < Iend; Ii++) {
270:       v = -1.5;
271:       i = Ii / n;
272:       if (i > 1) {
273:         J = Ii - n - 1;
274:         MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES);
275:       }
276:     }
277:   }
278:   MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
279:   MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);

281:   if (testscaledMat) {
282:     PetscRandom rctx;

284:     /* Scale a(0,0) and a(M-1,M-1) */
285:     if (rank == 0) {
286:       v  = 6.0 * 0.00001;
287:       Ii = 0;
288:       J  = 0;
289:       MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES);
290:     } else if (rank == size - 1) {
291:       v  = 6.0 * 0.00001;
292:       Ii = m * n - 1;
293:       J  = m * n - 1;
294:       MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES);
295:     }
296:     MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY);
297:     MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY);

299:     /* Compute a new right-hand-side vector */
300:     VecDestroy(&u);
301:     VecCreate(PETSC_COMM_WORLD, &u);
302:     VecSetSizes(u, PETSC_DECIDE, m * n);
303:     VecSetFromOptions(u);

305:     PetscRandomCreate(PETSC_COMM_WORLD, &rctx);
306:     PetscRandomSetFromOptions(rctx);
307:     VecSetRandom(u, rctx);
308:     PetscRandomDestroy(&rctx);
309:     VecAssemblyBegin(u);
310:     VecAssemblyEnd(u);
311:   }

313:   PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL);
314:   if (testnewC) {
315:     /*
316:      User may use a new matrix C with same nonzero pattern, e.g.
317:       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
318:     */
319:     Mat Ctmp;
320:     MatDuplicate(C, MAT_COPY_VALUES, &Ctmp);
321:     MatDestroy(&C);
322:     MatDuplicate(Ctmp, MAT_COPY_VALUES, &C);
323:     MatDestroy(&Ctmp);
324:   }

326:   MatMult(C, u, b);

328:   /*
329:      Set operators. Here the matrix that defines the linear system
330:      also serves as the preconditioning matrix.
331:   */
332:   KSPSetOperators(ksp, C, C);

334:   /*
335:      Solve linear system
336:   */
337:   KSPSetUp(ksp);
338:   KSPSolve(ksp, b, x);

340:   /*
341:      Check the residual
342:   */
343:   VecAXPY(x, none, u);
344:   VecNorm(x, NORM_2, &norm);
345:   KSPGetIterationNumber(ksp, &its);
346:   if (!testscaledMat || norm / bnorm > PETSC_SMALL) PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its);

348:   /*
349:      Free work space.  All PETSc objects should be destroyed when they
350:      are no longer needed.
351:   */
352:   KSPDestroy(&ksp);
353:   VecDestroy(&u);
354:   VecDestroy(&x);
355:   VecDestroy(&b);
356:   MatDestroy(&C);

358:   /*
359:      Indicate to PETSc profiling that we're concluding the second stage
360:   */
361:   PetscLogStagePop();

363:   PetscFinalize();
364:   return 0;
365: }

367: /*TEST

369:    test:
370:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

372:    test:
373:       suffix: 2
374:       nsize: 2
375:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001

377:    test:
378:       suffix: 5
379:       nsize: 2
380:       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg

382:    test:
383:       suffix: asm
384:       nsize: 4
385:       args: -pc_type asm

387:    test:
388:       suffix: asm_baij
389:       nsize: 4
390:       args: -pc_type asm -mat_type baij
391:       output_file: output/ex5_asm.out

393:    test:
394:       suffix: redundant_0
395:       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

397:    test:
398:       suffix: redundant_1
399:       nsize: 5
400:       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

402:    test:
403:       suffix: redundant_2
404:       nsize: 5
405:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi

407:    test:
408:       suffix: redundant_3
409:       nsize: 5
410:       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi

412:    test:
413:       suffix: redundant_4
414:       nsize: 5
415:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced

417:    test:
418:       suffix: superlu_dist
419:       nsize: 15
420:       requires: superlu_dist
421:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat

423:    test:
424:       suffix: superlu_dist_2
425:       nsize: 15
426:       requires: superlu_dist
427:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
428:       output_file: output/ex5_superlu_dist.out

430:    test:
431:       suffix: superlu_dist_3
432:       nsize: 15
433:       requires: superlu_dist
434:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
435:       output_file: output/ex5_superlu_dist.out

437:    test:
438:       suffix: superlu_dist_0
439:       nsize: 1
440:       requires: superlu_dist
441:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
442:       output_file: output/ex5_superlu_dist.out

444: TEST*/