Actual source code: ex9.c


  2: static char help[] = "The solution of 2 different linear systems with different linear solvers.\n\
  3: Also, this example illustrates the repeated\n\
  4: solution of linear systems, while reusing matrix, vector, and solver data\n\
  5: structures throughout the process.  Note the various stages of event logging.\n\n";

  7: /*
  8:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
  9:   automatically includes:
 10:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 11:      petscmat.h - matrices
 12:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 13:      petscviewer.h - viewers               petscpc.h  - preconditioners
 14: */
 15: #include <petscksp.h>

 17: /*
 18:    Declare user-defined routines
 19: */
 20: extern PetscErrorCode CheckError(Vec,Vec,Vec,PetscInt,PetscReal,PetscLogEvent);
 21: extern PetscErrorCode MyKSPMonitor(KSP,PetscInt,PetscReal,void*);

 23: int main(int argc,char **args)
 24: {
 25:   Vec            x1,b1,x2,b2; /* solution and RHS vectors for systems #1 and #2 */
 26:   Vec            u;              /* exact solution vector */
 27:   Mat            C1,C2;         /* matrices for systems #1 and #2 */
 28:   KSP            ksp1,ksp2;   /* KSP contexts for systems #1 and #2 */
 29:   PetscInt       ntimes = 3;     /* number of times to solve the linear systems */
 30:   PetscLogEvent  CHECK_ERROR;    /* event number for error checking */
 31:   PetscInt       ldim,low,high,iglobal,Istart,Iend,Istart2,Iend2;
 32:   PetscInt       Ii,J,i,j,m = 3,n = 2,its,t;
 33:   PetscBool      flg = PETSC_FALSE, unsym = PETSC_TRUE;
 34:   PetscScalar    v;
 35:   PetscMPIInt    rank,size;
 36: #if defined(PETSC_USE_LOG)
 37:   PetscLogStage stages[3];
 38: #endif

 40:   PetscInitialize(&argc,&args,(char*)0,help);
 41:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 42:   PetscOptionsGetInt(NULL,NULL,"-t",&ntimes,NULL);
 43:   PetscOptionsGetBool(NULL,NULL,"-unsym",&unsym,NULL);
 44:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 45:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 46:   n    = 2*size;

 48:   /*
 49:      Register various stages for profiling
 50:   */
 51:   PetscLogStageRegister("Prelim setup",&stages[0]);
 52:   PetscLogStageRegister("Linear System 1",&stages[1]);
 53:   PetscLogStageRegister("Linear System 2",&stages[2]);

 55:   /*
 56:      Register a user-defined event for profiling (error checking).
 57:   */
 58:   CHECK_ERROR = 0;
 59:   PetscLogEventRegister("Check Error",KSP_CLASSID,&CHECK_ERROR);

 61:   /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - -
 62:                         Preliminary Setup
 63:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

 65:   PetscLogStagePush(stages[0]);

 67:   /*
 68:      Create data structures for first linear system.
 69:       - Create parallel matrix, specifying only its global dimensions.
 70:         When using MatCreate(), the matrix format can be specified at
 71:         runtime. Also, the parallel partitioning of the matrix is
 72:         determined by PETSc at runtime.
 73:       - Create parallel vectors.
 74:         - When using VecSetSizes(), we specify only the vector's global
 75:           dimension; the parallel partitioning is determined at runtime.
 76:         - Note: We form 1 vector from scratch and then duplicate as needed.
 77:   */
 78:   MatCreate(PETSC_COMM_WORLD,&C1);
 79:   MatSetSizes(C1,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 80:   MatSetFromOptions(C1);
 81:   MatSetUp(C1);
 82:   MatGetOwnershipRange(C1,&Istart,&Iend);
 83:   VecCreate(PETSC_COMM_WORLD,&u);
 84:   VecSetSizes(u,PETSC_DECIDE,m*n);
 85:   VecSetFromOptions(u);
 86:   VecDuplicate(u,&b1);
 87:   VecDuplicate(u,&x1);

 89:   /*
 90:      Create first linear solver context.
 91:      Set runtime options (e.g., -pc_type <type>).
 92:      Note that the first linear system uses the default option
 93:      names, while the second linear system uses a different
 94:      options prefix.
 95:   */
 96:   KSPCreate(PETSC_COMM_WORLD,&ksp1);
 97:   KSPSetFromOptions(ksp1);

 99:   /*
100:      Set user-defined monitoring routine for first linear system.
101:   */
102:   PetscOptionsGetBool(NULL,NULL,"-my_ksp_monitor",&flg,NULL);
103:   if (flg) KSPMonitorSet(ksp1,MyKSPMonitor,NULL,0);

105:   /*
106:      Create data structures for second linear system.
107:   */
108:   MatCreate(PETSC_COMM_WORLD,&C2);
109:   MatSetSizes(C2,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
110:   MatSetFromOptions(C2);
111:   MatSetUp(C2);
112:   MatGetOwnershipRange(C2,&Istart2,&Iend2);
113:   VecDuplicate(u,&b2);
114:   VecDuplicate(u,&x2);

116:   /*
117:      Create second linear solver context
118:   */
119:   KSPCreate(PETSC_COMM_WORLD,&ksp2);

121:   /*
122:      Set different options prefix for second linear system.
123:      Set runtime options (e.g., -s2_pc_type <type>)
124:   */
125:   KSPAppendOptionsPrefix(ksp2,"s2_");
126:   KSPSetFromOptions(ksp2);

128:   /*
129:      Assemble exact solution vector in parallel.  Note that each
130:      processor needs to set only its local part of the vector.
131:   */
132:   VecGetLocalSize(u,&ldim);
133:   VecGetOwnershipRange(u,&low,&high);
134:   for (i=0; i<ldim; i++) {
135:     iglobal = i + low;
136:     v       = (PetscScalar)(i + 100*rank);
137:     VecSetValues(u,1,&iglobal,&v,ADD_VALUES);
138:   }
139:   VecAssemblyBegin(u);
140:   VecAssemblyEnd(u);

142:   /*
143:      Log the number of flops for computing vector entries
144:   */
145:   PetscLogFlops(2.0*ldim);

147:   /*
148:      End curent profiling stage
149:   */
150:   PetscLogStagePop();

152:   /* --------------------------------------------------------------
153:                         Linear solver loop:
154:       Solve 2 different linear systems several times in succession
155:      -------------------------------------------------------------- */

157:   for (t=0; t<ntimes; t++) {

159:     /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - -
160:                  Assemble and solve first linear system
161:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

163:     /*
164:        Begin profiling stage #1
165:     */
166:     PetscLogStagePush(stages[1]);

168:     /*
169:        Initialize all matrix entries to zero.  MatZeroEntries() retains
170:        the nonzero structure of the matrix for sparse formats.
171:     */
172:     if (t > 0) MatZeroEntries(C1);

174:     /*
175:        Set matrix entries in parallel.  Also, log the number of flops
176:        for computing matrix entries.
177:         - Each processor needs to insert only elements that it owns
178:           locally (but any non-local elements will be sent to the
179:           appropriate processor during matrix assembly).
180:         - Always specify global row and columns of matrix entries.
181:     */
182:     for (Ii=Istart; Ii<Iend; Ii++) {
183:       v = -1.0; i = Ii/n; j = Ii - i*n;
184:       if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
185:       if (i<m-1) {J = Ii + n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
186:       if (j>0)   {J = Ii - 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
187:       if (j<n-1) {J = Ii + 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
188:       v = 4.0; MatSetValues(C1,1,&Ii,1,&Ii,&v,ADD_VALUES);
189:     }
190:     if (unsym) {
191:       for (Ii=Istart; Ii<Iend; Ii++) { /* Make matrix nonsymmetric */
192:         v = -1.0*(t+0.5); i = Ii/n;
193:         if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
194:       }
195:       PetscLogFlops(2.0*(Iend-Istart));
196:     }

198:     /*
199:        Assemble matrix, using the 2-step process:
200:          MatAssemblyBegin(), MatAssemblyEnd()
201:        Computations can be done while messages are in transition
202:        by placing code between these two statements.
203:     */
204:     MatAssemblyBegin(C1,MAT_FINAL_ASSEMBLY);
205:     MatAssemblyEnd(C1,MAT_FINAL_ASSEMBLY);

207:     /*
208:        Indicate same nonzero structure of successive linear system matrices
209:     */
210:     MatSetOption(C1,MAT_NEW_NONZERO_LOCATIONS,PETSC_TRUE);

212:     /*
213:        Compute right-hand-side vector
214:     */
215:     MatMult(C1,u,b1);

217:     /*
218:        Set operators. Here the matrix that defines the linear system
219:        also serves as the preconditioning matrix.
220:     */
221:     KSPSetOperators(ksp1,C1,C1);

223:     /*
224:        Use the previous solution of linear system #1 as the initial
225:        guess for the next solve of linear system #1.  The user MUST
226:        call KSPSetInitialGuessNonzero() in indicate use of an initial
227:        guess vector; otherwise, an initial guess of zero is used.
228:     */
229:     if (t>0) {
230:       KSPSetInitialGuessNonzero(ksp1,PETSC_TRUE);
231:     }

233:     /*
234:        Solve the first linear system.  Here we explicitly call
235:        KSPSetUp() for more detailed performance monitoring of
236:        certain preconditioners, such as ICC and ILU.  This call
237:        is optional, ase KSPSetUp() will automatically be called
238:        within KSPSolve() if it hasn't been called already.
239:     */
240:     KSPSetUp(ksp1);
241:     KSPSolve(ksp1,b1,x1);
242:     KSPGetIterationNumber(ksp1,&its);

244:     /*
245:        Check error of solution to first linear system
246:     */
247:     CheckError(u,x1,b1,its,1.e-4,CHECK_ERROR);

249:     /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - -
250:                  Assemble and solve second linear system
251:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

253:     /*
254:        Conclude profiling stage #1; begin profiling stage #2
255:     */
256:     PetscLogStagePop();
257:     PetscLogStagePush(stages[2]);

259:     /*
260:        Initialize all matrix entries to zero
261:     */
262:     if (t > 0) MatZeroEntries(C2);

264:     /*
265:        Assemble matrix in parallel. Also, log the number of flops
266:        for computing matrix entries.
267:         - To illustrate the features of parallel matrix assembly, we
268:           intentionally set the values differently from the way in
269:           which the matrix is distributed across the processors.  Each
270:           entry that is not owned locally will be sent to the appropriate
271:           processor during MatAssemblyBegin() and MatAssemblyEnd().
272:         - For best efficiency the user should strive to set as many
273:           entries locally as possible.
274:      */
275:     for (i=0; i<m; i++) {
276:       for (j=2*rank; j<2*rank+2; j++) {
277:         v = -1.0;  Ii = j + n*i;
278:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
279:         if (i<m-1) {J = Ii + n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
280:         if (j>0)   {J = Ii - 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
281:         if (j<n-1) {J = Ii + 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
282:         v = 6.0 + t*0.5; MatSetValues(C2,1,&Ii,1,&Ii,&v,ADD_VALUES);
283:       }
284:     }
285:     if (unsym) {
286:       for (Ii=Istart2; Ii<Iend2; Ii++) { /* Make matrix nonsymmetric */
287:         v = -1.0*(t+0.5); i = Ii/n;
288:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
289:       }
290:     }
291:     MatAssemblyBegin(C2,MAT_FINAL_ASSEMBLY);
292:     MatAssemblyEnd(C2,MAT_FINAL_ASSEMBLY);
293:     PetscLogFlops(2.0*(Iend-Istart));

295:     /*
296:        Indicate same nonzero structure of successive linear system matrices
297:     */
298:     MatSetOption(C2,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);

300:     /*
301:        Compute right-hand-side vector
302:     */
303:     MatMult(C2,u,b2);

305:     /*
306:        Set operators. Here the matrix that defines the linear system
307:        also serves as the preconditioning matrix.  Indicate same nonzero
308:        structure of successive preconditioner matrices by setting flag
309:        SAME_NONZERO_PATTERN.
310:     */
311:     KSPSetOperators(ksp2,C2,C2);

313:     /*
314:        Solve the second linear system
315:     */
316:     KSPSetUp(ksp2);
317:     KSPSolve(ksp2,b2,x2);
318:     KSPGetIterationNumber(ksp2,&its);

320:     /*
321:        Check error of solution to second linear system
322:     */
323:     CheckError(u,x2,b2,its,1.e-4,CHECK_ERROR);

325:     /*
326:        Conclude profiling stage #2
327:     */
328:     PetscLogStagePop();
329:   }
330:   /* --------------------------------------------------------------
331:                        End of linear solver loop
332:      -------------------------------------------------------------- */

334:   /*
335:      Free work space.  All PETSc objects should be destroyed when they
336:      are no longer needed.
337:   */
338:   KSPDestroy(&ksp1)); PetscCall(KSPDestroy(&ksp2);
339:   VecDestroy(&x1));   PetscCall(VecDestroy(&x2);
340:   VecDestroy(&b1));   PetscCall(VecDestroy(&b2);
341:   MatDestroy(&C1));   PetscCall(MatDestroy(&C2);
342:   VecDestroy(&u);

344:   PetscFinalize();
345:   return 0;
346: }
347: /* ------------------------------------------------------------- */
348: /*
349:     CheckError - Checks the error of the solution.

351:     Input Parameters:
352:     u - exact solution
353:     x - approximate solution
354:     b - work vector
355:     its - number of iterations for convergence
356:     tol - tolerance
357:     CHECK_ERROR - the event number for error checking
358:                   (for use with profiling)

360:     Notes:
361:     In order to profile this section of code separately from the
362:     rest of the program, we register it as an "event" with
363:     PetscLogEventRegister() in the main program.  Then, we indicate
364:     the start and end of this event by respectively calling
365:         PetscLogEventBegin(CHECK_ERROR,u,x,b,0);
366:         PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
367:     Here, we specify the objects most closely associated with
368:     the event (the vectors u,x,b).  Such information is optional;
369:     we could instead just use 0 instead for all objects.
370: */
371: PetscErrorCode CheckError(Vec u,Vec x,Vec b,PetscInt its,PetscReal tol,PetscLogEvent CHECK_ERROR)
372: {
373:   PetscScalar    none = -1.0;
374:   PetscReal      norm;

376:   PetscLogEventBegin(CHECK_ERROR,u,x,b,0);

378:   /*
379:      Compute error of the solution, using b as a work vector.
380:   */
381:   VecCopy(x,b);
382:   VecAXPY(b,none,u);
383:   VecNorm(b,NORM_2,&norm);
384:   if (norm > tol) {
385:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
386:   }
387:   PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
388:   return 0;
389: }
390: /* ------------------------------------------------------------- */
391: /*
392:    MyKSPMonitor - This is a user-defined routine for monitoring
393:    the KSP iterative solvers.

395:    Input Parameters:
396:      ksp   - iterative context
397:      n     - iteration number
398:      rnorm - 2-norm (preconditioned) residual value (may be estimated)
399:      dummy - optional user-defined monitor context (unused here)
400: */
401: PetscErrorCode MyKSPMonitor(KSP ksp,PetscInt n,PetscReal rnorm,void *dummy)
402: {
403:   Vec            x;

405:   /*
406:      Build the solution vector
407:   */
408:   KSPBuildSolution(ksp,NULL,&x);

410:   /*
411:      Write the solution vector and residual norm to stdout.
412:       - PetscPrintf() handles output for multiprocessor jobs
413:         by printing from only one processor in the communicator.
414:       - The parallel viewer PETSC_VIEWER_STDOUT_WORLD handles
415:         data from multiple processors so that the output
416:         is not jumbled.
417:   */
418:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D solution vector:\n",n);
419:   VecView(x,PETSC_VIEWER_STDOUT_WORLD);
420:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D KSP Residual norm %14.12e \n",n,rnorm);
421:   return 0;
422: }

424: /*TEST

426:    test:
427:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -ksp_gmres_cgs_refinement_type refine_always -s2_ksp_type bcgs -s2_pc_type jacobi -s2_ksp_monitor_short

429:    test:
430:       requires: hpddm
431:       suffix: hpddm
432:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type {{gmres hpddm}} -s2_ksp_type {{gmres hpddm}} -s2_pc_type jacobi -s2_ksp_monitor_short

434:    test:
435:       requires: hpddm
436:       suffix: hpddm_2
437:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -s2_ksp_type hpddm -s2_ksp_hpddm_type gcrodr -s2_ksp_hpddm_recycle 10 -s2_pc_type jacobi -s2_ksp_monitor_short

439:    testset:
440:       requires: hpddm
441:       output_file: output/ex9_hpddm_cg.out
442:       args: -unsym 0 -t 2 -pc_type jacobi -ksp_monitor_short -s2_pc_type jacobi -s2_ksp_monitor_short -ksp_rtol 1.e-2 -s2_ksp_rtol 1.e-2
443:       test:
444:          suffix: hpddm_cg_p_p
445:          args: -ksp_type cg -s2_ksp_type cg
446:       test:
447:          suffix: hpddm_cg_p_h
448:          args: -ksp_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}
449:       test:
450:          suffix: hpddm_cg_h_h
451:          args: -ksp_type hpddm -ksp_hpddm_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}

453: TEST*/