Actual source code: ex19.c

  1: static char help[] = "Solve a 2D 5-point stencil in parallel with Kokkos batched KSP and ASM solvers.\n\
  2: Input parameters include:\n\
  3:   -n                : number of mesh points in x direction\n\
  4:   -m                : number of mesh points in y direction\n\
  5:   -num_local_blocks : number of local sub domains for block jacobi\n\n";

  7: /*
  8:   Include "petscksp.h" so that we can use KSP solvers.
  9: */
 10: #include <petscksp.h>
 11: #include <petscmat.h>

 13: int main(int argc, char **args)
 14: {
 15:   Vec         x, b, u;           /* approx solution, RHS, exact solution */
 16:   Mat         A, Pmat, Aseq, AA; /* linear system matrix */
 17:   KSP         ksp;               /* linear solver context */
 18:   PetscReal   norm, norm0;       /* norm of solution error */
 19:   PetscInt    i, j, Ii, J, Istart, Iend, n = 7, m = 8, its, nblocks = 2;
 20:   PetscBool   flg, ismpi;
 21:   PetscScalar v;
 22:   PetscMPIInt size, rank;
 23:   IS         *loc_blocks = NULL;
 24:   PC          pc;

 26:   PetscFunctionBeginUser;
 27:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 28:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 29:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
 30:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 31:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 32:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-num_local_blocks", &nblocks, NULL));
 33:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 34:          Compute the matrix and right-hand-side vector that define
 35:          the linear system, Ax = b.
 36:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 37:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 38:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, n * m, n * m));
 39:   PetscCall(MatSetFromOptions(A));
 40:   PetscCall(MatSeqAIJSetPreallocation(A, 5, NULL));
 41:   PetscCall(MatMPIAIJSetPreallocation(A, 5, NULL, 3, NULL));
 42:   /*
 43:      Currently, all PETSc parallel matrix formats are partitioned by
 44:      contiguous chunks of rows across the processors.  Determine which
 45:      rows of the matrix are locally owned.
 46:   */
 47:   PetscCall(MatGetOwnershipRange(A, &Istart, &Iend));
 48:   /*
 49:     Set matrix elements for the 2-D, five-point stencil.
 50:     */
 51:   for (Ii = Istart; Ii < Iend; Ii++) {
 52:     v = -1.0;
 53:     i = Ii / n;
 54:     j = Ii - i * n;
 55:     if (i > 0) {
 56:       J = Ii - n;
 57:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 58:     }
 59:     if (i < m - 1) {
 60:       J = Ii + n;
 61:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 62:     }
 63:     if (j > 0) {
 64:       J = Ii - 1;
 65:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 66:     }
 67:     if (j < n - 1) {
 68:       J = Ii + 1;
 69:       PetscCall(MatSetValues(A, 1, &Ii, 1, &J, &v, ADD_VALUES));
 70:     }
 71:     v = 4.0;
 72:     PetscCall(MatSetValues(A, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
 73:   }
 74:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 75:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
 76:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 77:                 Setup ASM solver and batched KSP solver data
 78:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 79:   /* make explicit block matrix for batch solver */
 80:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpi));
 81:   if (!ismpi) {
 82:     Aseq = A;
 83:   } else {
 84:     PetscCall(MatMPIAIJGetSeqAIJ(A, &Aseq, NULL, NULL));
 85:   }
 86:   PetscCall(PCASMCreateSubdomains(Aseq, nblocks, &loc_blocks)); // A
 87:   Mat nest, array[10000];
 88:   for (Ii = 0; Ii < 10000; Ii++) array[Ii] = NULL;
 89:   for (PetscInt bid = 0; bid < nblocks; bid++) {
 90:     Mat matblock;
 91:     PetscCall(MatCreateSubMatrix(Aseq, loc_blocks[bid], loc_blocks[bid], MAT_INITIAL_MATRIX, &matblock));
 92:     //PetscCall(MatViewFromOptions(matblock, NULL, "-view_b"));
 93:     array[bid * nblocks + bid] = matblock;
 94:   }
 95:   PetscCall(MatCreate(PETSC_COMM_SELF, &nest));
 96:   PetscCall(MatSetFromOptions(nest));
 97:   PetscCall(MatSetType(nest, MATNEST));
 98:   PetscCall(MatNestSetSubMats(nest, nblocks, NULL, nblocks, NULL, array));
 99:   PetscCall(MatSetUp(nest));
100:   PetscCall(MatConvert(nest, MATAIJKOKKOS, MAT_INITIAL_MATRIX, &AA));
101:   PetscCall(MatDestroy(&nest));
102:   for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(MatDestroy(&array[bid * nblocks + bid]));
103:   if (ismpi) {
104:     Mat AAseq;
105:     PetscCall(MatCreate(PETSC_COMM_WORLD, &Pmat));
106:     PetscCall(MatSetSizes(Pmat, Iend - Istart, Iend - Istart, n * m, n * m));
107:     PetscCall(MatSetFromOptions(Pmat));
108:     PetscCall(MatSeqAIJSetPreallocation(Pmat, 5, NULL));
109:     PetscCall(MatMPIAIJSetPreallocation(Pmat, 5, NULL, 3, NULL));
110:     PetscCall(MatAssemblyBegin(Pmat, MAT_FINAL_ASSEMBLY));
111:     PetscCall(MatAssemblyEnd(Pmat, MAT_FINAL_ASSEMBLY));
112:     PetscCall(MatMPIAIJGetSeqAIJ(Pmat, &AAseq, NULL, NULL));
113:     PetscCheck(AAseq, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "No A mat");
114:     PetscCall(MatAXPY(AAseq, 1.0, AA, DIFFERENT_NONZERO_PATTERN));
115:     PetscCall(MatDestroy(&AA));
116:   } else {
117:     Pmat = AA;
118:   }
119:   PetscCall(MatViewFromOptions(Pmat, NULL, "-view_p"));
120:   PetscCall(MatViewFromOptions(A, NULL, "-view_a"));

122:   /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
123:   PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
124:   PetscCall(MatCreateVecs(A, &u, &b));
125:   PetscCall(MatCreateVecs(A, &x, NULL));
126:   /*
127:      Set exact solution; then compute right-hand-side vector.
128:      By default we use an exact solution of a vector with all
129:      elements of 1.0;
130:   */
131:   PetscCall(VecSet(u, 1.0));
132:   PetscCall(MatMult(A, u, b));
133:   /*
134:      View the exact solution vector if desired
135:   */
136:   flg = PETSC_FALSE;
137:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_exact_sol", &flg, NULL));
138:   if (flg) PetscCall(VecView(u, PETSC_VIEWER_STDOUT_WORLD));

140:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
141:                 Create the linear solver and set various options
142:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
143:   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
144:   PetscCall(KSPSetOperators(ksp, A, Pmat));
145:   PetscCall(KSPSetFromOptions(ksp));
146:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
147:     Setup ASM solver
148:     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
149:   PetscCall(KSPGetPC(ksp, &pc));
150:   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCASM, &flg));
151:   if (flg && nblocks > 0) {
152:     for (PetscInt bid = 0, gid0 = Istart; bid < nblocks; bid++) {
153:       PetscInt nn;
154:       IS       new_loc_blocks;
155:       PetscCall(ISGetSize(loc_blocks[bid], &nn)); // size only
156:       PetscCall(ISCreateStride(PETSC_COMM_SELF, nn, gid0, 1, &new_loc_blocks));
157:       PetscCall(ISDestroy(&loc_blocks[bid]));
158:       loc_blocks[bid] = new_loc_blocks;
159:       gid0 += nn; // start of next block
160:     }
161:     PetscCall(PCASMSetLocalSubdomains(pc, nblocks, loc_blocks, NULL));
162:   }
163:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
164:                       Solve the linear system
165:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
166:   PetscCall(KSPSolve(ksp, b, x));
167:   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168:                       Check the solution and clean up
169:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
170:   PetscCall(VecAXPY(x, -1.0, u));
171:   PetscCall(VecNorm(x, NORM_2, &norm));
172:   PetscCall(VecNorm(b, NORM_2, &norm0));
173:   PetscCall(KSPGetIterationNumber(ksp, &its));
174:   /*
175:      Print convergence information.
176:   */
177:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of error %g iterations %" PetscInt_FMT "\n", (double)norm / norm0, its));
178:   /*
179:     cleanup
180:   */
181:   PetscCall(KSPDestroy(&ksp));
182:   if (loc_blocks) {
183:     if (0) {
184:       for (PetscInt bid = 0; bid < nblocks; bid++) PetscCall(ISDestroy(&loc_blocks[bid]));
185:       PetscCall(PetscFree(loc_blocks));
186:     } else {
187:       PetscCall(PCASMDestroySubdomains(nblocks, loc_blocks, NULL));
188:     }
189:   }
190:   PetscCall(VecDestroy(&u));
191:   PetscCall(VecDestroy(&x));
192:   PetscCall(VecDestroy(&b));
193:   PetscCall(MatDestroy(&A));
194:   PetscCall(MatDestroy(&Pmat));
195:   PetscCall(PetscFinalize());
196:   return 0;
197: }

199: /*TEST
200:   build:
201:     requires: kokkos_kernels
202:   testset:
203:     requires: parmetis
204:     args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 4
205:     nsize: 4
206:     output_file: output/ex19_0.out
207:     test:
208:       suffix: batch
209:       args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos
210:     test:
211:       suffix: asm
212:       args: -ksp_type cg -pc_type asm -sub_pc_type jacobi -sub_ksp_type tfqmr -sub_ksp_rtol 1e-3
213:     test:
214:       suffix: batch_bicg
215:       args: -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type bicg -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos

217:   test:
218:     nsize: 4
219:     suffix: no_metis_batch
220:     args: -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-6 -m 37 -n 23 -num_local_blocks 4 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-3 -mat_type aijkokkos

222:   test:
223:     nsize: 1
224:     suffix: serial_batch
225:     args: -ksp_monitor -ksp_converged_reason -ksp_norm_type unpreconditioned -ksp_rtol 1e-4 -m 37 -n 23 -num_local_blocks 16 -ksp_type cg -pc_type bjkokkos -pc_bjkokkos_ksp_max_it 60 -pc_bjkokkos_ksp_type tfqmr -pc_bjkokkos_pc_type jacobi -pc_bjkokkos_ksp_rtol 1e-6 -mat_type aijkokkos -pc_bjkokkos_ksp_converged_reason

227:  TEST*/