Actual source code: ex9.c


  2: static char help[] = "The solution of 2 different linear systems with different linear solvers.\n\
  3: Also, this example illustrates the repeated\n\
  4: solution of linear systems, while reusing matrix, vector, and solver data\n\
  5: structures throughout the process.  Note the various stages of event logging.\n\n";

  7: /*
  8:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
  9:   automatically includes:
 10:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 11:      petscmat.h - matrices
 12:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 13:      petscviewer.h - viewers               petscpc.h  - preconditioners
 14: */
 15: #include <petscksp.h>

 17: /*
 18:    Declare user-defined routines
 19: */
 20: extern PetscErrorCode CheckError(Vec, Vec, Vec, PetscInt, PetscReal, PetscLogEvent);
 21: extern PetscErrorCode MyKSPMonitor(KSP, PetscInt, PetscReal, void *);

 23: int main(int argc, char **args)
 24: {
 25:   Vec           x1, b1, x2, b2; /* solution and RHS vectors for systems #1 and #2 */
 26:   Vec           u;              /* exact solution vector */
 27:   Mat           C1, C2;         /* matrices for systems #1 and #2 */
 28:   KSP           ksp1, ksp2;     /* KSP contexts for systems #1 and #2 */
 29:   PetscInt      ntimes = 3;     /* number of times to solve the linear systems */
 30:   PetscLogEvent CHECK_ERROR;    /* event number for error checking */
 31:   PetscInt      ldim, low, high, iglobal, Istart, Iend, Istart2, Iend2;
 32:   PetscInt      Ii, J, i, j, m = 3, n = 2, its, t;
 33:   PetscBool     flg = PETSC_FALSE, unsym = PETSC_TRUE;
 34:   PetscScalar   v;
 35:   PetscMPIInt   rank, size;
 36: #if defined(PETSC_USE_LOG)
 37:   PetscLogStage stages[3];
 38: #endif

 41:   PetscInitialize(&argc, &args, (char *)0, help);
 42:   PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL);
 43:   PetscOptionsGetInt(NULL, NULL, "-t", &ntimes, NULL);
 44:   PetscOptionsGetBool(NULL, NULL, "-unsym", &unsym, NULL);
 45:   MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
 46:   MPI_Comm_size(PETSC_COMM_WORLD, &size);
 47:   n = 2 * size;

 49:   /*
 50:      Register various stages for profiling
 51:   */
 52:   PetscLogStageRegister("Prelim setup", &stages[0]);
 53:   PetscLogStageRegister("Linear System 1", &stages[1]);
 54:   PetscLogStageRegister("Linear System 2", &stages[2]);

 56:   /*
 57:      Register a user-defined event for profiling (error checking).
 58:   */
 59:   CHECK_ERROR = 0;
 60:   PetscLogEventRegister("Check Error", KSP_CLASSID, &CHECK_ERROR);

 62:   /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - -
 63:                         Preliminary Setup
 64:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

 66:   PetscLogStagePush(stages[0]);

 68:   /*
 69:      Create data structures for first linear system.
 70:       - Create parallel matrix, specifying only its global dimensions.
 71:         When using MatCreate(), the matrix format can be specified at
 72:         runtime. Also, the parallel partitioning of the matrix is
 73:         determined by PETSc at runtime.
 74:       - Create parallel vectors.
 75:         - When using VecSetSizes(), we specify only the vector's global
 76:           dimension; the parallel partitioning is determined at runtime.
 77:         - Note: We form 1 vector from scratch and then duplicate as needed.
 78:   */
 79:   MatCreate(PETSC_COMM_WORLD, &C1);
 80:   MatSetSizes(C1, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
 81:   MatSetFromOptions(C1);
 82:   MatSetUp(C1);
 83:   MatGetOwnershipRange(C1, &Istart, &Iend);
 84:   VecCreate(PETSC_COMM_WORLD, &u);
 85:   VecSetSizes(u, PETSC_DECIDE, m * n);
 86:   VecSetFromOptions(u);
 87:   VecDuplicate(u, &b1);
 88:   VecDuplicate(u, &x1);

 90:   /*
 91:      Create first linear solver context.
 92:      Set runtime options (e.g., -pc_type <type>).
 93:      Note that the first linear system uses the default option
 94:      names, while the second linear system uses a different
 95:      options prefix.
 96:   */
 97:   KSPCreate(PETSC_COMM_WORLD, &ksp1);
 98:   KSPSetFromOptions(ksp1);

100:   /*
101:      Set user-defined monitoring routine for first linear system.
102:   */
103:   PetscOptionsGetBool(NULL, NULL, "-my_ksp_monitor", &flg, NULL);
104:   if (flg) KSPMonitorSet(ksp1, MyKSPMonitor, NULL, 0);

106:   /*
107:      Create data structures for second linear system.
108:   */
109:   MatCreate(PETSC_COMM_WORLD, &C2);
110:   MatSetSizes(C2, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n);
111:   MatSetFromOptions(C2);
112:   MatSetUp(C2);
113:   MatGetOwnershipRange(C2, &Istart2, &Iend2);
114:   VecDuplicate(u, &b2);
115:   VecDuplicate(u, &x2);

117:   /*
118:      Create second linear solver context
119:   */
120:   KSPCreate(PETSC_COMM_WORLD, &ksp2);

122:   /*
123:      Set different options prefix for second linear system.
124:      Set runtime options (e.g., -s2_pc_type <type>)
125:   */
126:   KSPAppendOptionsPrefix(ksp2, "s2_");
127:   KSPSetFromOptions(ksp2);

129:   /*
130:      Assemble exact solution vector in parallel.  Note that each
131:      processor needs to set only its local part of the vector.
132:   */
133:   VecGetLocalSize(u, &ldim);
134:   VecGetOwnershipRange(u, &low, &high);
135:   for (i = 0; i < ldim; i++) {
136:     iglobal = i + low;
137:     v       = (PetscScalar)(i + 100 * rank);
138:     VecSetValues(u, 1, &iglobal, &v, ADD_VALUES);
139:   }
140:   VecAssemblyBegin(u);
141:   VecAssemblyEnd(u);

143:   /*
144:      Log the number of flops for computing vector entries
145:   */
146:   PetscLogFlops(2.0 * ldim);

148:   /*
149:      End curent profiling stage
150:   */
151:   PetscLogStagePop();

153:   /* --------------------------------------------------------------
154:                         Linear solver loop:
155:       Solve 2 different linear systems several times in succession
156:      -------------------------------------------------------------- */

158:   for (t = 0; t < ntimes; t++) {
159:     /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - -
160:                  Assemble and solve first linear system
161:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

163:     /*
164:        Begin profiling stage #1
165:     */
166:     PetscLogStagePush(stages[1]);

168:     /*
169:        Initialize all matrix entries to zero.  MatZeroEntries() retains
170:        the nonzero structure of the matrix for sparse formats.
171:     */
172:     if (t > 0) MatZeroEntries(C1);

174:     /*
175:        Set matrix entries in parallel.  Also, log the number of flops
176:        for computing matrix entries.
177:         - Each processor needs to insert only elements that it owns
178:           locally (but any non-local elements will be sent to the
179:           appropriate processor during matrix assembly).
180:         - Always specify global row and columns of matrix entries.
181:     */
182:     for (Ii = Istart; Ii < Iend; Ii++) {
183:       v = -1.0;
184:       i = Ii / n;
185:       j = Ii - i * n;
186:       if (i > 0) {
187:         J = Ii - n;
188:         MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES);
189:       }
190:       if (i < m - 1) {
191:         J = Ii + n;
192:         MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES);
193:       }
194:       if (j > 0) {
195:         J = Ii - 1;
196:         MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES);
197:       }
198:       if (j < n - 1) {
199:         J = Ii + 1;
200:         MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES);
201:       }
202:       v = 4.0;
203:       MatSetValues(C1, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
204:     }
205:     if (unsym) {
206:       for (Ii = Istart; Ii < Iend; Ii++) { /* Make matrix nonsymmetric */
207:         v = -1.0 * (t + 0.5);
208:         i = Ii / n;
209:         if (i > 0) {
210:           J = Ii - n;
211:           MatSetValues(C1, 1, &Ii, 1, &J, &v, ADD_VALUES);
212:         }
213:       }
214:       PetscLogFlops(2.0 * (Iend - Istart));
215:     }

217:     /*
218:        Assemble matrix, using the 2-step process:
219:          MatAssemblyBegin(), MatAssemblyEnd()
220:        Computations can be done while messages are in transition
221:        by placing code between these two statements.
222:     */
223:     MatAssemblyBegin(C1, MAT_FINAL_ASSEMBLY);
224:     MatAssemblyEnd(C1, MAT_FINAL_ASSEMBLY);

226:     /*
227:        Indicate same nonzero structure of successive linear system matrices
228:     */
229:     MatSetOption(C1, MAT_NEW_NONZERO_LOCATIONS, PETSC_TRUE);

231:     /*
232:        Compute right-hand-side vector
233:     */
234:     MatMult(C1, u, b1);

236:     /*
237:        Set operators. Here the matrix that defines the linear system
238:        also serves as the preconditioning matrix.
239:     */
240:     KSPSetOperators(ksp1, C1, C1);

242:     /*
243:        Use the previous solution of linear system #1 as the initial
244:        guess for the next solve of linear system #1.  The user MUST
245:        call KSPSetInitialGuessNonzero() in indicate use of an initial
246:        guess vector; otherwise, an initial guess of zero is used.
247:     */
248:     if (t > 0) KSPSetInitialGuessNonzero(ksp1, PETSC_TRUE);

250:     /*
251:        Solve the first linear system.  Here we explicitly call
252:        KSPSetUp() for more detailed performance monitoring of
253:        certain preconditioners, such as ICC and ILU.  This call
254:        is optional, ase KSPSetUp() will automatically be called
255:        within KSPSolve() if it hasn't been called already.
256:     */
257:     KSPSetUp(ksp1);
258:     KSPSolve(ksp1, b1, x1);
259:     KSPGetIterationNumber(ksp1, &its);

261:     /*
262:        Check error of solution to first linear system
263:     */
264:     CheckError(u, x1, b1, its, 1.e-4, CHECK_ERROR);

266:     /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - -
267:                  Assemble and solve second linear system
268:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

270:     /*
271:        Conclude profiling stage #1; begin profiling stage #2
272:     */
273:     PetscLogStagePop();
274:     PetscLogStagePush(stages[2]);

276:     /*
277:        Initialize all matrix entries to zero
278:     */
279:     if (t > 0) MatZeroEntries(C2);

281:     /*
282:        Assemble matrix in parallel. Also, log the number of flops
283:        for computing matrix entries.
284:         - To illustrate the features of parallel matrix assembly, we
285:           intentionally set the values differently from the way in
286:           which the matrix is distributed across the processors.  Each
287:           entry that is not owned locally will be sent to the appropriate
288:           processor during MatAssemblyBegin() and MatAssemblyEnd().
289:         - For best efficiency the user should strive to set as many
290:           entries locally as possible.
291:      */
292:     for (i = 0; i < m; i++) {
293:       for (j = 2 * rank; j < 2 * rank + 2; j++) {
294:         v  = -1.0;
295:         Ii = j + n * i;
296:         if (i > 0) {
297:           J = Ii - n;
298:           MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES);
299:         }
300:         if (i < m - 1) {
301:           J = Ii + n;
302:           MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES);
303:         }
304:         if (j > 0) {
305:           J = Ii - 1;
306:           MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES);
307:         }
308:         if (j < n - 1) {
309:           J = Ii + 1;
310:           MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES);
311:         }
312:         v = 6.0 + t * 0.5;
313:         MatSetValues(C2, 1, &Ii, 1, &Ii, &v, ADD_VALUES);
314:       }
315:     }
316:     if (unsym) {
317:       for (Ii = Istart2; Ii < Iend2; Ii++) { /* Make matrix nonsymmetric */
318:         v = -1.0 * (t + 0.5);
319:         i = Ii / n;
320:         if (i > 0) {
321:           J = Ii - n;
322:           MatSetValues(C2, 1, &Ii, 1, &J, &v, ADD_VALUES);
323:         }
324:       }
325:     }
326:     MatAssemblyBegin(C2, MAT_FINAL_ASSEMBLY);
327:     MatAssemblyEnd(C2, MAT_FINAL_ASSEMBLY);
328:     PetscLogFlops(2.0 * (Iend - Istart));

330:     /*
331:        Indicate same nonzero structure of successive linear system matrices
332:     */
333:     MatSetOption(C2, MAT_NEW_NONZERO_LOCATIONS, PETSC_FALSE);

335:     /*
336:        Compute right-hand-side vector
337:     */
338:     MatMult(C2, u, b2);

340:     /*
341:        Set operators. Here the matrix that defines the linear system
342:        also serves as the preconditioning matrix.  Indicate same nonzero
343:        structure of successive preconditioner matrices by setting flag
344:        SAME_NONZERO_PATTERN.
345:     */
346:     KSPSetOperators(ksp2, C2, C2);

348:     /*
349:        Solve the second linear system
350:     */
351:     KSPSetUp(ksp2);
352:     KSPSolve(ksp2, b2, x2);
353:     KSPGetIterationNumber(ksp2, &its);

355:     /*
356:        Check error of solution to second linear system
357:     */
358:     CheckError(u, x2, b2, its, 1.e-4, CHECK_ERROR);

360:     /*
361:        Conclude profiling stage #2
362:     */
363:     PetscLogStagePop();
364:   }
365:   /* --------------------------------------------------------------
366:                        End of linear solver loop
367:      -------------------------------------------------------------- */

369:   /*
370:      Free work space.  All PETSc objects should be destroyed when they
371:      are no longer needed.
372:   */
373:   KSPDestroy(&ksp1);
374:   KSPDestroy(&ksp2);
375:   VecDestroy(&x1);
376:   VecDestroy(&x2);
377:   VecDestroy(&b1);
378:   VecDestroy(&b2);
379:   MatDestroy(&C1);
380:   MatDestroy(&C2);
381:   VecDestroy(&u);

383:   PetscFinalize();
384:   return 0;
385: }
386: /* ------------------------------------------------------------- */
387: /*
388:     CheckError - Checks the error of the solution.

390:     Input Parameters:
391:     u - exact solution
392:     x - approximate solution
393:     b - work vector
394:     its - number of iterations for convergence
395:     tol - tolerance
396:     CHECK_ERROR - the event number for error checking
397:                   (for use with profiling)

399:     Notes:
400:     In order to profile this section of code separately from the
401:     rest of the program, we register it as an "event" with
402:     PetscLogEventRegister() in the main program.  Then, we indicate
403:     the start and end of this event by respectively calling
404:         PetscLogEventBegin(CHECK_ERROR,u,x,b,0);
405:         PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
406:     Here, we specify the objects most closely associated with
407:     the event (the vectors u,x,b).  Such information is optional;
408:     we could instead just use 0 instead for all objects.
409: */
410: PetscErrorCode CheckError(Vec u, Vec x, Vec b, PetscInt its, PetscReal tol, PetscLogEvent CHECK_ERROR)
411: {
412:   PetscScalar none = -1.0;
413:   PetscReal   norm;

415:   PetscLogEventBegin(CHECK_ERROR, u, x, b, 0);

417:   /*
418:      Compute error of the solution, using b as a work vector.
419:   */
420:   VecCopy(x, b);
421:   VecAXPY(b, none, u);
422:   VecNorm(b, NORM_2, &norm);
423:   if (norm > tol) PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g, Iterations %" PetscInt_FMT "\n", (double)norm, its);
424:   PetscLogEventEnd(CHECK_ERROR, u, x, b, 0);
425:   return 0;
426: }
427: /* ------------------------------------------------------------- */
428: /*
429:    MyKSPMonitor - This is a user-defined routine for monitoring
430:    the KSP iterative solvers.

432:    Input Parameters:
433:      ksp   - iterative context
434:      n     - iteration number
435:      rnorm - 2-norm (preconditioned) residual value (may be estimated)
436:      dummy - optional user-defined monitor context (unused here)
437: */
438: PetscErrorCode MyKSPMonitor(KSP ksp, PetscInt n, PetscReal rnorm, void *dummy)
439: {
440:   Vec x;

442:   /*
443:      Build the solution vector
444:   */
445:   KSPBuildSolution(ksp, NULL, &x);

447:   /*
448:      Write the solution vector and residual norm to stdout.
449:       - PetscPrintf() handles output for multiprocessor jobs
450:         by printing from only one processor in the communicator.
451:       - The parallel viewer PETSC_VIEWER_STDOUT_WORLD handles
452:         data from multiple processors so that the output
453:         is not jumbled.
454:   */
455:   PetscPrintf(PETSC_COMM_WORLD, "iteration %" PetscInt_FMT " solution vector:\n", n);
456:   VecView(x, PETSC_VIEWER_STDOUT_WORLD);
457:   PetscPrintf(PETSC_COMM_WORLD, "iteration %" PetscInt_FMT " KSP Residual norm %14.12e \n", n, (double)rnorm);
458:   return 0;
459: }

461: /*TEST

463:    test:
464:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -ksp_gmres_cgs_refinement_type refine_always -s2_ksp_type bcgs -s2_pc_type jacobi -s2_ksp_monitor_short

466:    test:
467:       requires: hpddm
468:       suffix: hpddm
469:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type {{gmres hpddm}} -s2_ksp_type {{gmres hpddm}} -s2_pc_type jacobi -s2_ksp_monitor_short

471:    test:
472:       requires: hpddm
473:       suffix: hpddm_2
474:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -s2_ksp_type hpddm -s2_ksp_hpddm_type gcrodr -s2_ksp_hpddm_recycle 10 -s2_pc_type jacobi -s2_ksp_monitor_short

476:    testset:
477:       requires: hpddm
478:       output_file: output/ex9_hpddm_cg.out
479:       args: -unsym 0 -t 2 -pc_type jacobi -ksp_monitor_short -s2_pc_type jacobi -s2_ksp_monitor_short -ksp_rtol 1.e-2 -s2_ksp_rtol 1.e-2
480:       test:
481:          suffix: hpddm_cg_p_p
482:          args: -ksp_type cg -s2_ksp_type cg
483:       test:
484:          suffix: hpddm_cg_p_h
485:          args: -ksp_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}
486:       test:
487:          suffix: hpddm_cg_h_h
488:          args: -ksp_type hpddm -ksp_hpddm_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}

490: TEST*/