Actual source code: ex192.c

  1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
  2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **args)
  7: {
  8:   Mat         A, RHS, C, F, X, S;
  9:   Vec         u, x, b;
 10:   Vec         xschur, bschur, uschur;
 11:   IS          is_schur;
 12:   PetscMPIInt size;
 13:   PetscInt    isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
 14:   PetscReal   norm, tol = PETSC_SQRT_MACHINE_EPSILON;
 15:   PetscRandom rand;
 16:   PetscBool   data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
 17:   PetscBool   isdata_provided;
 18:   PetscReal   sratio = 5.1 / 12.;
 19:   PetscViewer fd; /* viewer */
 20:   char        solver[256];
 21:   char        file[PETSC_MAX_PATH_LEN];   /* input Mat file name */
 22:   char        isfile[PETSC_MAX_PATH_LEN]; /* input IS file name */

 24:   PetscFunctionBeginUser;
 25:   PetscCall(PetscInitialize(&argc, &args, NULL, help));
 26:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 27:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test");
 28:   /* Determine which type of solver we want to test for */
 29:   herm = PETSC_FALSE;
 30:   symm = PETSC_FALSE;
 31:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL));
 32:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL));
 33:   if (herm) symm = PETSC_TRUE;
 34:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL));
 35:   PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));

 37:   /* Determine file from which we read the matrix A */
 38:   PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided));
 39:   if (!data_provided) { /* get matrices from PETSc distribution */
 40:     PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file)));
 41:     if (symm) {
 42: #if defined(PETSC_USE_COMPLEX)
 43:       PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file)));
 44: #else
 45:       PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file)));
 46: #endif
 47:     } else {
 48: #if defined(PETSC_USE_COMPLEX)
 49:       PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file)));
 50: #else
 51:       PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file)));
 52: #endif
 53:     }
 54: #if defined(PETSC_USE_64BIT_INDICES)
 55:     PetscCall(PetscStrlcat(file, "int64-", sizeof(file)));
 56: #else
 57:     PetscCall(PetscStrlcat(file, "int32-", sizeof(file)));
 58: #endif
 59: #if defined(PETSC_USE_REAL_SINGLE)
 60:     PetscCall(PetscStrlcat(file, "float32", sizeof(file)));
 61: #else
 62:     PetscCall(PetscStrlcat(file, "float64", sizeof(file)));
 63: #endif
 64:   }

 66:   /* Load matrix A */
 67:   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd));
 68:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 69:   PetscCall(MatLoad(A, fd));
 70:   PetscCall(MatGetSize(A, &m, &n));
 71:   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n);

 73:   PetscCall(PetscOptionsGetString(NULL, NULL, "-fis", isfile, sizeof(isfile), &isdata_provided));
 74:   if (isdata_provided) {
 75:     PetscBool samefile;

 77:     PetscCall(PetscStrcmp(isfile, file, &samefile));
 78:     if (!samefile) {
 79:       PetscCall(PetscViewerDestroy(&fd));
 80:       PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, isfile, FILE_MODE_READ, &fd));
 81:     }
 82:     PetscCall(ISCreate(PETSC_COMM_SELF, &is_schur));
 83:     PetscCall(ISLoad(is_schur, fd));
 84:   } else {
 85:     PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL));
 86:     PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio);
 87:     size_schur = (PetscInt)(sratio * m);
 88:     PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur));
 89:   }
 90:   PetscCall(ISGetSize(is_schur, &size_schur));
 91:   PetscCall(PetscViewerDestroy(&fd));

 93:   /* Create dense matrix C and X; C holds true solution with identical columns */
 94:   nrhs = 2;
 95:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
 96:   PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
 97:   PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs));
 98:   PetscCall(MatSetType(C, MATDENSE));
 99:   PetscCall(MatSetFromOptions(C));
100:   PetscCall(MatSetUp(C));

102:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
103:   PetscCall(PetscRandomSetFromOptions(rand));
104:   PetscCall(MatSetRandom(C, rand));
105:   PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X));

107:   /* Create vectors */
108:   PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
109:   PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
110:   PetscCall(VecSetFromOptions(x));
111:   PetscCall(VecDuplicate(x, &b));
112:   PetscCall(VecDuplicate(x, &u)); /* save the true solution */

114:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL));
115:   switch (isolver) {
116: #if defined(PETSC_HAVE_MUMPS)
117:   case 0:
118:     PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver)));
119:     break;
120: #endif
121: #if defined(PETSC_HAVE_MKL_PARDISO)
122:   case 1:
123:     PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver)));
124:     break;
125: #endif
126:   default:
127:     PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
128:     break;
129:   }

131: #if defined(PETSC_USE_COMPLEX)
132:   if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for Hermitian matrices, so make them symmetric */
133:     PetscScalar im  = PetscSqrtScalar((PetscScalar)-1.);
134:     PetscScalar val = -1.0;
135:     val             = val + im;
136:     PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES));
137:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
138:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
139:   }
140: #endif

142:   PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m));

144:   /* Test LU/Cholesky Factorization */
145:   use_lu = PETSC_FALSE;
146:   if (!symm) use_lu = PETSC_TRUE;
147:   if (PetscDefined(USE_COMPLEX) && isolver == 1) use_lu = PETSC_TRUE;
148:   if (cuda && symm && !herm) use_lu = PETSC_TRUE;

150:   if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
151:     PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
152:     PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A));
153:   }

155:   if (use_lu) {
156:     PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F));
157:   } else {
158:     if (herm) {
159:       PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
160:     } else {
161:       PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
162:       PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE));
163:     }
164:     PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F));
165:   }

167:   /* Set Schur complement indices */
168:   PetscCall(MatFactorSetSchurIS(F, is_schur));
169:   PetscCall(ISDestroy(&is_schur));

171:   if (use_lu) {
172:     PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
173:   } else {
174:     PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
175:   }

177:   for (nfact = 0; nfact < 3; nfact++) {
178:     Mat AD;

180:     if (nfact == 1) {
181:       PetscCall(VecSetRandom(x, rand));
182:       if (symm && herm) PetscCall(VecAbs(x));
183:       PetscCall(MatDiagonalSet(A, x, ADD_VALUES));
184:     }
185:     if (use_lu) {
186:       PetscCall(MatLUFactorNumeric(F, A, NULL));
187:     } else {
188:       PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
189:     }

191:     if (cuda) {
192:       PetscCall(MatFactorGetSchurComplement(F, &S, NULL));
193:       PetscCall(MatSetType(S, MATSEQDENSECUDA));
194:       PetscCall(MatCreateVecs(S, &xschur, &bschur));
195:       PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED));
196:     }
197:     PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
198:     if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur));
199:     PetscCall(VecDuplicate(xschur, &uschur));
200:     if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F));
201:     for (nsolve = 0; nsolve < 2; nsolve++) {
202:       PetscCall(VecSetRandom(x, rand));
203:       PetscCall(VecCopy(x, u));

205:       if (nsolve) {
206:         PetscCall(MatMult(A, x, b));
207:         PetscCall(MatSolve(F, b, x));
208:       } else {
209:         PetscCall(MatMultTranspose(A, x, b));
210:         PetscCall(MatSolveTranspose(F, b, x));
211:       }
212:       /* Check the error */
213:       PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */
214:       PetscCall(VecNorm(u, NORM_2, &norm));
215:       if (norm > tol) {
216:         PetscReal resi;
217:         if (nsolve) {
218:           PetscCall(MatMult(A, x, u)); /* u = A*x */
219:         } else {
220:           PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */
221:         }
222:         PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */
223:         PetscCall(VecNorm(u, NORM_2, &resi));
224:         if (nsolve) {
225:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
226:         } else {
227:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
228:         }
229:       }
230:       PetscCall(VecSetRandom(xschur, rand));
231:       PetscCall(VecCopy(xschur, uschur));
232:       if (nsolve) {
233:         PetscCall(MatMult(S, xschur, bschur));
234:         PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur));
235:       } else {
236:         PetscCall(MatMultTranspose(S, xschur, bschur));
237:         PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur));
238:       }
239:       /* Check the error */
240:       PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */
241:       PetscCall(VecNorm(uschur, NORM_2, &norm));
242:       if (norm > tol) {
243:         PetscReal resi;
244:         if (nsolve) {
245:           PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */
246:         } else {
247:           PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */
248:         }
249:         PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */
250:         PetscCall(VecNorm(uschur, NORM_2, &resi));
251:         if (nsolve) {
252:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
253:         } else {
254:           PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
255:         }
256:       }
257:     }
258:     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD));
259:     if (!nfact) PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS));
260:     else PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS));
261:     PetscCall(MatDestroy(&AD));
262:     for (nsolve = 0; nsolve < 2; nsolve++) {
263:       PetscCall(MatMatSolve(F, RHS, X));

265:       /* Check the error */
266:       PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
267:       PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
268:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
269: #if PetscDefined(HAVE_MUMPS)
270:       PetscCall(MatMumpsSetIcntl(F, 26, 1));
271:       PetscCall(MatMatSolve(F, RHS, X));
272:       PetscCall(MatMumpsSetIcntl(F, 26, 2));
273:       PetscCall(MatMatSolve(F, RHS, X));
274:       PetscCall(MatMumpsSetIcntl(F, 26, -1));

276:       /* Check the error */
277:       PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
278:       PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
279:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
280: #endif
281:     }
282:     if (isolver == 0) {
283:       Mat spRHS, spRHST, RHST;

285:       PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST));
286:       PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST));
287:       PetscCall(MatCreateTranspose(spRHST, &spRHS));
288:       for (nsolve = 0; nsolve < 2; nsolve++) {
289:         PetscCall(MatMatSolve(F, spRHS, X));

291:         /* Check the error */
292:         PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
293:         PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
294:         if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
295:       }
296:       PetscCall(MatDestroy(&spRHST));
297:       PetscCall(MatDestroy(&spRHS));
298:       PetscCall(MatDestroy(&RHST));
299:     }
300:     PetscCall(MatDestroy(&S));
301:     PetscCall(VecDestroy(&xschur));
302:     PetscCall(VecDestroy(&bschur));
303:     PetscCall(VecDestroy(&uschur));
304:   }
305:   /* Free data structures */
306:   PetscCall(MatDestroy(&A));
307:   PetscCall(MatDestroy(&C));
308:   PetscCall(MatDestroy(&F));
309:   PetscCall(MatDestroy(&X));
310:   PetscCall(MatDestroy(&RHS));
311:   PetscCall(PetscRandomDestroy(&rand));
312:   PetscCall(VecDestroy(&x));
313:   PetscCall(VecDestroy(&b));
314:   PetscCall(VecDestroy(&u));
315:   PetscCall(PetscFinalize());
316:   return 0;
317: }

319: /*TEST

321:    testset:
322:      requires: mkl_pardiso double !complex
323:      args: -solver 1

325:      test:
326:        suffix: mkl_pardiso
327:      test:
328:        requires: cuda
329:        suffix: mkl_pardiso_cuda
330:        args: -cuda_solve
331:        output_file: output/ex192_mkl_pardiso.out
332:      test:
333:        suffix: mkl_pardiso_1
334:        args: -symmetric_solve
335:        output_file: output/ex192_mkl_pardiso_1.out
336:      test:
337:        requires: cuda
338:        suffix: mkl_pardiso_cuda_1
339:        args: -symmetric_solve -cuda_solve
340:        output_file: output/ex192_mkl_pardiso_1.out
341:      test:
342:        suffix: mkl_pardiso_3
343:        args: -symmetric_solve -hermitian_solve
344:        output_file: output/ex192_mkl_pardiso_3.out
345:      test:
346:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
347:        suffix: mkl_pardiso_cuda_3
348:        args: -symmetric_solve -hermitian_solve -cuda_solve
349:        output_file: output/ex192_mkl_pardiso_3.out

351:    testset:
352:      requires: mumps double !complex
353:      args: -solver 0

355:      test:
356:        suffix: mumps
357:      test:
358:        requires: cuda
359:        suffix: mumps_cuda
360:        args: -cuda_solve
361:        output_file: output/ex192_mumps.out
362:      test:
363:        suffix: mumps_2
364:        args: -symmetric_solve
365:        output_file: output/ex192_mumps_2.out
366:      test:
367:        requires: cuda
368:        suffix: mumps_cuda_2
369:        args: -symmetric_solve -cuda_solve
370:        output_file: output/ex192_mumps_2.out
371:      test:
372:        suffix: mumps_3
373:        args: -symmetric_solve -hermitian_solve
374:        output_file: output/ex192_mumps_3.out
375:      test:
376:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
377:        suffix: mumps_cuda_3
378:        args: -symmetric_solve -hermitian_solve -cuda_solve
379:        output_file: output/ex192_mumps_3.out

381:    testset:
382:      requires: mumps double !complex defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
383:      args: -solver 0 -pc_precision single -tol 3.4e-4

385:      test:
386:        suffix: mumps_s
387:        output_file: output/ex192_mumps.out

389:      test:
390:        requires: cuda
391:        suffix: mumps_cuda_s
392:        args: -cuda_solve
393:        output_file: output/ex192_mumps.out
394:      test:
395:        suffix: mumps_2_s
396:        args: -symmetric_solve
397:        output_file: output/ex192_mumps_2.out
398:      test:
399:        requires: cuda
400:        suffix: mumps_cuda_2_s
401:        args: -symmetric_solve -cuda_solve
402:        output_file: output/ex192_mumps_2.out
403:      test:
404:        suffix: mumps_3_s
405:        args: -symmetric_solve -hermitian_solve
406:        output_file: output/ex192_mumps_3.out
407:      test:
408:        requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
409:        suffix: mumps_cuda_3_s
410:        args: -symmetric_solve -hermitian_solve -cuda_solve
411:        output_file: output/ex192_mumps_3.out

413: TEST*/