Actual source code: ex5.c


  2: static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
  3: illustrates repeated solution of linear systems with the same preconditioner\n\
  4: method but different matrices (having the same nonzero structure).  The code\n\
  5: also uses multiple profiling stages.  Input arguments are\n\
  6:   -m <size> : problem size\n\
  7:   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";

  9: /*
 10:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 11:   automatically includes:
 12:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 13:      petscmat.h - matrices
 14:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 15:      petscviewer.h - viewers               petscpc.h  - preconditioners
 16: */
 17: #include <petscksp.h>

 19: int main(int argc,char **args)
 20: {
 21:   KSP            ksp;              /* linear solver context */
 22:   Mat            C;                /* matrix */
 23:   Vec            x,u,b;            /* approx solution, RHS, exact solution */
 24:   PetscReal      norm;             /* norm of solution error */
 25:   PetscScalar    v,none = -1.0;
 26:   PetscInt       Ii,J,ldim,low,high,iglobal,Istart,Iend;
 27:   PetscInt       i,j,m = 3,n = 2,its;
 28:   PetscMPIInt    size,rank;
 29:   PetscBool      mat_nonsymmetric = PETSC_FALSE;
 30:   PetscBool      testnewC = PETSC_FALSE,testscaledMat=PETSC_FALSE;
 31: #if defined(PETSC_USE_LOG)
 32:   PetscLogStage stages[2];
 33: #endif

 35:   PetscInitialize(&argc,&args,(char*)0,help);
 36:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 37:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 38:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 39:   n    = 2*size;

 41:   /*
 42:      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
 43:   */
 44:   PetscOptionsGetBool(NULL,NULL,"-mat_nonsym",&mat_nonsymmetric,NULL);

 46:   PetscOptionsGetBool(NULL,NULL,"-test_scaledMat",&testscaledMat,NULL);

 48:   /*
 49:      Register two stages for separate profiling of the two linear solves.
 50:      Use the runtime option -log_view for a printout of performance
 51:      statistics at the program's conlusion.
 52:   */
 53:   PetscLogStageRegister("Original Solve",&stages[0]);
 54:   PetscLogStageRegister("Second Solve",&stages[1]);

 56:   /* -------------- Stage 0: Solve Original System ---------------------- */
 57:   /*
 58:      Indicate to PETSc profiling that we're beginning the first stage
 59:   */
 60:   PetscLogStagePush(stages[0]);

 62:   /*
 63:      Create parallel matrix, specifying only its global dimensions.
 64:      When using MatCreate(), the matrix format can be specified at
 65:      runtime. Also, the parallel partitioning of the matrix is
 66:      determined by PETSc at runtime.
 67:   */
 68:   MatCreate(PETSC_COMM_WORLD,&C);
 69:   MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 70:   MatSetFromOptions(C);
 71:   MatSetUp(C);

 73:   /*
 74:      Currently, all PETSc parallel matrix formats are partitioned by
 75:      contiguous chunks of rows across the processors.  Determine which
 76:      rows of the matrix are locally owned.
 77:   */
 78:   MatGetOwnershipRange(C,&Istart,&Iend);

 80:   /*
 81:      Set matrix entries matrix in parallel.
 82:       - Each processor needs to insert only elements that it owns
 83:         locally (but any non-local elements will be sent to the
 84:         appropriate processor during matrix assembly).
 85:       - Always specify global row and columns of matrix entries.
 86:   */
 87:   for (Ii=Istart; Ii<Iend; Ii++) {
 88:     v = -1.0; i = Ii/n; j = Ii - i*n;
 89:     if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 90:     if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 91:     if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 92:     if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 93:     v = 4.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
 94:   }

 96:   /*
 97:      Make the matrix nonsymmetric if desired
 98:   */
 99:   if (mat_nonsymmetric) {
100:     for (Ii=Istart; Ii<Iend; Ii++) {
101:       v = -1.5; i = Ii/n;
102:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
103:     }
104:   } else {
105:     MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);
106:     MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
107:   }

109:   /*
110:      Assemble matrix, using the 2-step process:
111:        MatAssemblyBegin(), MatAssemblyEnd()
112:      Computations can be done while messages are in transition
113:      by placing code between these two statements.
114:   */
115:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
116:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

118:   /*
119:      Create parallel vectors.
120:       - When using VecSetSizes(), we specify only the vector's global
121:         dimension; the parallel partitioning is determined at runtime.
122:       - Note: We form 1 vector from scratch and then duplicate as needed.
123:   */
124:   VecCreate(PETSC_COMM_WORLD,&u);
125:   VecSetSizes(u,PETSC_DECIDE,m*n);
126:   VecSetFromOptions(u);
127:   VecDuplicate(u,&b);
128:   VecDuplicate(b,&x);

130:   /*
131:      Currently, all parallel PETSc vectors are partitioned by
132:      contiguous chunks across the processors.  Determine which
133:      range of entries are locally owned.
134:   */
135:   VecGetOwnershipRange(x,&low,&high);

137:   /*
138:     Set elements within the exact solution vector in parallel.
139:      - Each processor needs to insert only elements that it owns
140:        locally (but any non-local entries will be sent to the
141:        appropriate processor during vector assembly).
142:      - Always specify global locations of vector entries.
143:   */
144:   VecGetLocalSize(x,&ldim);
145:   for (i=0; i<ldim; i++) {
146:     iglobal = i + low;
147:     v       = (PetscScalar)(i + 100*rank);
148:     VecSetValues(u,1,&iglobal,&v,INSERT_VALUES);
149:   }

151:   /*
152:      Assemble vector, using the 2-step process:
153:        VecAssemblyBegin(), VecAssemblyEnd()
154:      Computations can be done while messages are in transition,
155:      by placing code between these two statements.
156:   */
157:   VecAssemblyBegin(u);
158:   VecAssemblyEnd(u);

160:   /*
161:      Compute right-hand-side vector
162:   */
163:   MatMult(C,u,b);

165:   /*
166:     Create linear solver context
167:   */
168:   KSPCreate(PETSC_COMM_WORLD,&ksp);

170:   /*
171:      Set operators. Here the matrix that defines the linear system
172:      also serves as the preconditioning matrix.
173:   */
174:   KSPSetOperators(ksp,C,C);

176:   /*
177:      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
178:   */
179:   KSPSetFromOptions(ksp);

181:   /*
182:      Solve linear system.  Here we explicitly call KSPSetUp() for more
183:      detailed performance monitoring of certain preconditioners, such
184:      as ICC and ILU.  This call is optional, as KSPSetUp() will
185:      automatically be called within KSPSolve() if it hasn't been
186:      called already.
187:   */
188:   KSPSetUp(ksp);
189:   KSPSolve(ksp,b,x);

191:   /*
192:      Check the error
193:   */
194:   VecAXPY(x,none,u);
195:   VecNorm(x,NORM_2,&norm);
196:   KSPGetIterationNumber(ksp,&its);
197:   if (!testscaledMat || norm > 1.e-7) {
198:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
199:   }

201:   /* -------------- Stage 1: Solve Second System ---------------------- */
202:   /*
203:      Solve another linear system with the same method.  We reuse the KSP
204:      context, matrix and vector data structures, and hence save the
205:      overhead of creating new ones.

207:      Indicate to PETSc profiling that we're concluding the first
208:      stage with PetscLogStagePop(), and beginning the second stage with
209:      PetscLogStagePush().
210:   */
211:   PetscLogStagePop();
212:   PetscLogStagePush(stages[1]);

214:   /*
215:      Initialize all matrix entries to zero.  MatZeroEntries() retains the
216:      nonzero structure of the matrix for sparse formats.
217:   */
218:   MatZeroEntries(C);

220:   /*
221:      Assemble matrix again.  Note that we retain the same matrix data
222:      structure and the same nonzero pattern; we just change the values
223:      of the matrix entries.
224:   */
225:   for (i=0; i<m; i++) {
226:     for (j=2*rank; j<2*rank+2; j++) {
227:       v = -1.0;  Ii = j + n*i;
228:       if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
229:       if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
230:       if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
231:       if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
232:       v = 6.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
233:     }
234:   }
235:   if (mat_nonsymmetric) {
236:     for (Ii=Istart; Ii<Iend; Ii++) {
237:       v = -1.5; i = Ii/n;
238:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
239:     }
240:   }
241:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
242:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

244:   if (testscaledMat) {
245:     PetscRandom    rctx;

247:     /* Scale a(0,0) and a(M-1,M-1) */
248:     if (rank == 0) {
249:       v = 6.0*0.00001; Ii = 0; J = 0;
250:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
251:     } else if (rank == size -1) {
252:       v = 6.0*0.00001; Ii = m*n-1; J = m*n-1;
253:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
254:     }
255:     MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
256:     MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

258:     /* Compute a new right-hand-side vector */
259:     VecDestroy(&u);
260:     VecCreate(PETSC_COMM_WORLD,&u);
261:     VecSetSizes(u,PETSC_DECIDE,m*n);
262:     VecSetFromOptions(u);

264:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
265:     PetscRandomSetFromOptions(rctx);
266:     VecSetRandom(u,rctx);
267:     PetscRandomDestroy(&rctx);
268:     VecAssemblyBegin(u);
269:     VecAssemblyEnd(u);
270:   }

272:   PetscOptionsGetBool(NULL,NULL,"-test_newMat",&testnewC,NULL);
273:   if (testnewC) {
274:     /*
275:      User may use a new matrix C with same nonzero pattern, e.g.
276:       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
277:     */
278:     Mat Ctmp;
279:     MatDuplicate(C,MAT_COPY_VALUES,&Ctmp);
280:     MatDestroy(&C);
281:     MatDuplicate(Ctmp,MAT_COPY_VALUES,&C);
282:     MatDestroy(&Ctmp);
283:   }

285:   MatMult(C,u,b);

287:   /*
288:      Set operators. Here the matrix that defines the linear system
289:      also serves as the preconditioning matrix.
290:   */
291:   KSPSetOperators(ksp,C,C);

293:   /*
294:      Solve linear system
295:   */
296:   KSPSetUp(ksp);
297:   KSPSolve(ksp,b,x);

299:   /*
300:      Check the error
301:   */
302:   VecAXPY(x,none,u);
303:   VecNorm(x,NORM_2,&norm);
304:   KSPGetIterationNumber(ksp,&its);
305:   if (!testscaledMat || norm > 1.e-7) {
306:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
307:   }

309:   /*
310:      Free work space.  All PETSc objects should be destroyed when they
311:      are no longer needed.
312:   */
313:   KSPDestroy(&ksp);
314:   VecDestroy(&u);
315:   VecDestroy(&x);
316:   VecDestroy(&b);
317:   MatDestroy(&C);

319:   /*
320:      Indicate to PETSc profiling that we're concluding the second stage
321:   */
322:   PetscLogStagePop();

324:   PetscFinalize();
325:   return 0;
326: }

328: /*TEST

330:    test:
331:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

333:    test:
334:       suffix: 2
335:       nsize: 2
336:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001

338:    test:
339:       suffix: 5
340:       nsize: 2
341:       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg

343:    test:
344:       suffix: asm
345:       nsize: 4
346:       args: -pc_type asm

348:    test:
349:       suffix: asm_baij
350:       nsize: 4
351:       args: -pc_type asm -mat_type baij
352:       output_file: output/ex5_asm.out

354:    test:
355:       suffix: redundant_0
356:       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

358:    test:
359:       suffix: redundant_1
360:       nsize: 5
361:       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

363:    test:
364:       suffix: redundant_2
365:       nsize: 5
366:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi

368:    test:
369:       suffix: redundant_3
370:       nsize: 5
371:       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi

373:    test:
374:       suffix: redundant_4
375:       nsize: 5
376:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced

378:    test:
379:       suffix: superlu_dist
380:       nsize: 15
381:       requires: superlu_dist
382:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat

384:    test:
385:       suffix: superlu_dist_2
386:       nsize: 15
387:       requires: superlu_dist
388:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
389:       output_file: output/ex5_superlu_dist.out

391:    test:
392:       suffix: superlu_dist_3
393:       nsize: 15
394:       requires: superlu_dist
395:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
396:       output_file: output/ex5_superlu_dist.out

398:    test:
399:       suffix: superlu_dist_0
400:       nsize: 1
401:       requires: superlu_dist
402:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
403:       output_file: output/ex5_superlu_dist.out

405: TEST*/