Actual source code: ex192.c
1: static char help[] = "Tests MatSolve() and MatMatSolve() with MUMPS or MKL_PARDISO sequential solvers in Schur complement mode.\n\
2: Example: mpiexec -n 1 ./ex192 -f <matrix binary file> -nrhs 4 -symmetric_solve -hermitian_solve -schur_ratio 0.3\n\n";
4: #include <petscmat.h>
6: int main(int argc, char **args)
7: {
8: Mat A, RHS, C, F, X, S;
9: Vec u, x, b;
10: Vec xschur, bschur, uschur;
11: IS is_schur;
12: PetscMPIInt size;
13: PetscInt isolver = 0, size_schur, m, n, nfact, nsolve, nrhs;
14: PetscReal norm, tol = PETSC_SQRT_MACHINE_EPSILON;
15: PetscRandom rand;
16: PetscBool data_provided, herm, symm, use_lu, cuda = PETSC_FALSE;
17: PetscBool isdata_provided;
18: PetscReal sratio = 5.1 / 12.;
19: PetscViewer fd; /* viewer */
20: char solver[256];
21: char file[PETSC_MAX_PATH_LEN]; /* input Mat file name */
22: char isfile[PETSC_MAX_PATH_LEN]; /* input IS file name */
24: PetscFunctionBeginUser;
25: PetscCall(PetscInitialize(&argc, &args, NULL, help));
26: PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
27: PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor test");
28: /* Determine which type of solver we want to test for */
29: herm = PETSC_FALSE;
30: symm = PETSC_FALSE;
31: PetscCall(PetscOptionsGetBool(NULL, NULL, "-symmetric_solve", &symm, NULL));
32: PetscCall(PetscOptionsGetBool(NULL, NULL, "-hermitian_solve", &herm, NULL));
33: if (herm) symm = PETSC_TRUE;
34: PetscCall(PetscOptionsGetBool(NULL, NULL, "-cuda_solve", &cuda, NULL));
35: PetscCall(PetscOptionsGetReal(NULL, NULL, "-tol", &tol, NULL));
37: /* Determine file from which we read the matrix A */
38: PetscCall(PetscOptionsGetString(NULL, NULL, "-f", file, sizeof(file), &data_provided));
39: if (!data_provided) { /* get matrices from PETSc distribution */
40: PetscCall(PetscStrncpy(file, "${PETSC_DIR}/share/petsc/datafiles/matrices/", sizeof(file)));
41: if (symm) {
42: #if defined(PETSC_USE_COMPLEX)
43: PetscCall(PetscStrlcat(file, "hpd-complex-", sizeof(file)));
44: #else
45: PetscCall(PetscStrlcat(file, "spd-real-", sizeof(file)));
46: #endif
47: } else {
48: #if defined(PETSC_USE_COMPLEX)
49: PetscCall(PetscStrlcat(file, "nh-complex-", sizeof(file)));
50: #else
51: PetscCall(PetscStrlcat(file, "ns-real-", sizeof(file)));
52: #endif
53: }
54: #if defined(PETSC_USE_64BIT_INDICES)
55: PetscCall(PetscStrlcat(file, "int64-", sizeof(file)));
56: #else
57: PetscCall(PetscStrlcat(file, "int32-", sizeof(file)));
58: #endif
59: #if defined(PETSC_USE_REAL_SINGLE)
60: PetscCall(PetscStrlcat(file, "float32", sizeof(file)));
61: #else
62: PetscCall(PetscStrlcat(file, "float64", sizeof(file)));
63: #endif
64: }
66: /* Load matrix A */
67: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, file, FILE_MODE_READ, &fd));
68: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
69: PetscCall(MatLoad(A, fd));
70: PetscCall(MatGetSize(A, &m, &n));
71: PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "This example is not intended for rectangular matrices (%" PetscInt_FMT ", %" PetscInt_FMT ")", m, n);
73: PetscCall(PetscOptionsGetString(NULL, NULL, "-fis", isfile, sizeof(isfile), &isdata_provided));
74: if (isdata_provided) {
75: PetscBool samefile;
77: PetscCall(PetscStrcmp(isfile, file, &samefile));
78: if (!samefile) {
79: PetscCall(PetscViewerDestroy(&fd));
80: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, isfile, FILE_MODE_READ, &fd));
81: }
82: PetscCall(ISCreate(PETSC_COMM_SELF, &is_schur));
83: PetscCall(ISLoad(is_schur, fd));
84: } else {
85: PetscCall(PetscOptionsGetReal(NULL, NULL, "-schur_ratio", &sratio, NULL));
86: PetscCheck(sratio >= 0. && sratio <= 1., PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Invalid ratio for schur degrees of freedom %g", (double)sratio);
87: size_schur = (PetscInt)(sratio * m);
88: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, m - size_schur, 1, &is_schur));
89: }
90: PetscCall(ISGetSize(is_schur, &size_schur));
91: PetscCall(PetscViewerDestroy(&fd));
93: /* Create dense matrix C and X; C holds true solution with identical columns */
94: nrhs = 2;
95: PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
96: PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
97: PetscCall(MatSetSizes(C, m, PETSC_DECIDE, PETSC_DECIDE, nrhs));
98: PetscCall(MatSetType(C, MATDENSE));
99: PetscCall(MatSetFromOptions(C));
100: PetscCall(MatSetUp(C));
102: PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
103: PetscCall(PetscRandomSetFromOptions(rand));
104: PetscCall(MatSetRandom(C, rand));
105: PetscCall(MatDuplicate(C, MAT_DO_NOT_COPY_VALUES, &X));
107: /* Create vectors */
108: PetscCall(VecCreate(PETSC_COMM_WORLD, &x));
109: PetscCall(VecSetSizes(x, n, PETSC_DECIDE));
110: PetscCall(VecSetFromOptions(x));
111: PetscCall(VecDuplicate(x, &b));
112: PetscCall(VecDuplicate(x, &u)); /* save the true solution */
114: PetscCall(PetscOptionsGetInt(NULL, NULL, "-solver", &isolver, NULL));
115: switch (isolver) {
116: #if defined(PETSC_HAVE_MUMPS)
117: case 0:
118: PetscCall(PetscStrncpy(solver, MATSOLVERMUMPS, sizeof(solver)));
119: break;
120: #endif
121: #if defined(PETSC_HAVE_MKL_PARDISO)
122: case 1:
123: PetscCall(PetscStrncpy(solver, MATSOLVERMKL_PARDISO, sizeof(solver)));
124: break;
125: #endif
126: default:
127: PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
128: break;
129: }
131: #if defined(PETSC_USE_COMPLEX)
132: if (isolver == 0 && symm && !data_provided) { /* MUMPS (5.0.0) does not have support for Hermitian matrices, so make them symmetric */
133: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.);
134: PetscScalar val = -1.0;
135: val = val + im;
136: PetscCall(MatSetValue(A, 1, 0, val, INSERT_VALUES));
137: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
138: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
139: }
140: #endif
142: PetscCall(PetscPrintf(PETSC_COMM_SELF, "Solving with %s: nrhs %" PetscInt_FMT ", sym %d, herm %d, size schur %" PetscInt_FMT ", size mat %" PetscInt_FMT "\n", solver, nrhs, symm, herm, size_schur, m));
144: /* Test LU/Cholesky Factorization */
145: use_lu = PETSC_FALSE;
146: if (!symm) use_lu = PETSC_TRUE;
147: if (PetscDefined(USE_COMPLEX) && isolver == 1) use_lu = PETSC_TRUE;
148: if (cuda && symm && !herm) use_lu = PETSC_TRUE;
150: if (herm && !use_lu) { /* test also conversion routines inside the solver packages */
151: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
152: PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INPLACE_MATRIX, &A));
153: }
155: if (use_lu) {
156: PetscCall(MatGetFactor(A, solver, MAT_FACTOR_LU, &F));
157: } else {
158: if (herm) {
159: PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
160: } else {
161: PetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE));
162: PetscCall(MatSetOption(A, MAT_SPD, PETSC_FALSE));
163: }
164: PetscCall(MatGetFactor(A, solver, MAT_FACTOR_CHOLESKY, &F));
165: }
167: /* Set Schur complement indices */
168: PetscCall(MatFactorSetSchurIS(F, is_schur));
169: PetscCall(ISDestroy(&is_schur));
171: if (use_lu) {
172: PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
173: } else {
174: PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
175: }
177: for (nfact = 0; nfact < 3; nfact++) {
178: Mat AD;
180: if (nfact == 1) {
181: PetscCall(VecSetRandom(x, rand));
182: if (symm && herm) PetscCall(VecAbs(x));
183: PetscCall(MatDiagonalSet(A, x, ADD_VALUES));
184: }
185: if (use_lu) {
186: PetscCall(MatLUFactorNumeric(F, A, NULL));
187: } else {
188: PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
189: }
191: if (cuda) {
192: PetscCall(MatFactorGetSchurComplement(F, &S, NULL));
193: PetscCall(MatSetType(S, MATSEQDENSECUDA));
194: PetscCall(MatCreateVecs(S, &xschur, &bschur));
195: PetscCall(MatFactorRestoreSchurComplement(F, &S, MAT_FACTOR_SCHUR_UNFACTORED));
196: }
197: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
198: if (!cuda) PetscCall(MatCreateVecs(S, &xschur, &bschur));
199: PetscCall(VecDuplicate(xschur, &uschur));
200: if (nfact == 1 && (!cuda || (herm && symm))) PetscCall(MatFactorInvertSchurComplement(F));
201: for (nsolve = 0; nsolve < 2; nsolve++) {
202: PetscCall(VecSetRandom(x, rand));
203: PetscCall(VecCopy(x, u));
205: if (nsolve) {
206: PetscCall(MatMult(A, x, b));
207: PetscCall(MatSolve(F, b, x));
208: } else {
209: PetscCall(MatMultTranspose(A, x, b));
210: PetscCall(MatSolveTranspose(F, b, x));
211: }
212: /* Check the error */
213: PetscCall(VecAXPY(u, -1.0, x)); /* u <- (-1.0)x + u */
214: PetscCall(VecNorm(u, NORM_2, &norm));
215: if (norm > tol) {
216: PetscReal resi;
217: if (nsolve) {
218: PetscCall(MatMult(A, x, u)); /* u = A*x */
219: } else {
220: PetscCall(MatMultTranspose(A, x, u)); /* u = A*x */
221: }
222: PetscCall(VecAXPY(u, -1.0, b)); /* u <- (-1.0)b + u */
223: PetscCall(VecNorm(u, NORM_2, &resi));
224: if (nsolve) {
225: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolve error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
226: } else {
227: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatSolveTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
228: }
229: }
230: PetscCall(VecSetRandom(xschur, rand));
231: PetscCall(VecCopy(xschur, uschur));
232: if (nsolve) {
233: PetscCall(MatMult(S, xschur, bschur));
234: PetscCall(MatFactorSolveSchurComplement(F, bschur, xschur));
235: } else {
236: PetscCall(MatMultTranspose(S, xschur, bschur));
237: PetscCall(MatFactorSolveSchurComplementTranspose(F, bschur, xschur));
238: }
239: /* Check the error */
240: PetscCall(VecAXPY(uschur, -1.0, xschur)); /* u <- (-1.0)x + u */
241: PetscCall(VecNorm(uschur, NORM_2, &norm));
242: if (norm > tol) {
243: PetscReal resi;
244: if (nsolve) {
245: PetscCall(MatMult(S, xschur, uschur)); /* u = A*x */
246: } else {
247: PetscCall(MatMultTranspose(S, xschur, uschur)); /* u = A*x */
248: }
249: PetscCall(VecAXPY(uschur, -1.0, bschur)); /* u <- (-1.0)b + u */
250: PetscCall(VecNorm(uschur, NORM_2, &resi));
251: if (nsolve) {
252: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplement error: Norm of error %g, residual %g\n", nfact, nsolve, (double)norm, (double)resi));
253: } else {
254: PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatFactorSolveSchurComplementTranspose error: Norm of error %g, residual %f\n", nfact, nsolve, (double)norm, (double)resi));
255: }
256: }
257: }
258: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &AD));
259: if (!nfact) PetscCall(MatMatMult(AD, C, MAT_INITIAL_MATRIX, 2.0, &RHS));
260: else PetscCall(MatMatMult(AD, C, MAT_REUSE_MATRIX, 2.0, &RHS));
261: PetscCall(MatDestroy(&AD));
262: for (nsolve = 0; nsolve < 2; nsolve++) {
263: PetscCall(MatMatSolve(F, RHS, X));
265: /* Check the error */
266: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
267: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
268: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
269: #if PetscDefined(HAVE_MUMPS)
270: PetscCall(MatMumpsSetIcntl(F, 26, 1));
271: PetscCall(MatMatSolve(F, RHS, X));
272: PetscCall(MatMumpsSetIcntl(F, 26, 2));
273: PetscCall(MatMatSolve(F, RHS, X));
274: PetscCall(MatMumpsSetIcntl(F, 26, -1));
276: /* Check the error */
277: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
278: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
279: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
280: #endif
281: }
282: if (isolver == 0) {
283: Mat spRHS, spRHST, RHST;
285: PetscCall(MatTranspose(RHS, MAT_INITIAL_MATRIX, &RHST));
286: PetscCall(MatConvert(RHST, MATSEQAIJ, MAT_INITIAL_MATRIX, &spRHST));
287: PetscCall(MatCreateTranspose(spRHST, &spRHS));
288: for (nsolve = 0; nsolve < 2; nsolve++) {
289: PetscCall(MatMatSolve(F, spRHS, X));
291: /* Check the error */
292: PetscCall(MatAXPY(X, -1.0, C, SAME_NONZERO_PATTERN));
293: PetscCall(MatNorm(X, NORM_FROBENIUS, &norm));
294: if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_SELF, "(f %" PetscInt_FMT ", s %" PetscInt_FMT ") sparse MatMatSolve: Norm of error %g\n", nfact, nsolve, (double)norm));
295: }
296: PetscCall(MatDestroy(&spRHST));
297: PetscCall(MatDestroy(&spRHS));
298: PetscCall(MatDestroy(&RHST));
299: }
300: PetscCall(MatDestroy(&S));
301: PetscCall(VecDestroy(&xschur));
302: PetscCall(VecDestroy(&bschur));
303: PetscCall(VecDestroy(&uschur));
304: }
305: /* Free data structures */
306: PetscCall(MatDestroy(&A));
307: PetscCall(MatDestroy(&C));
308: PetscCall(MatDestroy(&F));
309: PetscCall(MatDestroy(&X));
310: PetscCall(MatDestroy(&RHS));
311: PetscCall(PetscRandomDestroy(&rand));
312: PetscCall(VecDestroy(&x));
313: PetscCall(VecDestroy(&b));
314: PetscCall(VecDestroy(&u));
315: PetscCall(PetscFinalize());
316: return 0;
317: }
319: /*TEST
321: testset:
322: requires: mkl_pardiso double !complex
323: args: -solver 1
325: test:
326: suffix: mkl_pardiso
327: test:
328: requires: cuda
329: suffix: mkl_pardiso_cuda
330: args: -cuda_solve
331: output_file: output/ex192_mkl_pardiso.out
332: test:
333: suffix: mkl_pardiso_1
334: args: -symmetric_solve
335: output_file: output/ex192_mkl_pardiso_1.out
336: test:
337: requires: cuda
338: suffix: mkl_pardiso_cuda_1
339: args: -symmetric_solve -cuda_solve
340: output_file: output/ex192_mkl_pardiso_1.out
341: test:
342: suffix: mkl_pardiso_3
343: args: -symmetric_solve -hermitian_solve
344: output_file: output/ex192_mkl_pardiso_3.out
345: test:
346: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
347: suffix: mkl_pardiso_cuda_3
348: args: -symmetric_solve -hermitian_solve -cuda_solve
349: output_file: output/ex192_mkl_pardiso_3.out
351: testset:
352: requires: mumps double !complex
353: args: -solver 0
355: test:
356: suffix: mumps
357: test:
358: requires: cuda
359: suffix: mumps_cuda
360: args: -cuda_solve
361: output_file: output/ex192_mumps.out
362: test:
363: suffix: mumps_2
364: args: -symmetric_solve
365: output_file: output/ex192_mumps_2.out
366: test:
367: requires: cuda
368: suffix: mumps_cuda_2
369: args: -symmetric_solve -cuda_solve
370: output_file: output/ex192_mumps_2.out
371: test:
372: suffix: mumps_3
373: args: -symmetric_solve -hermitian_solve
374: output_file: output/ex192_mumps_3.out
375: test:
376: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
377: suffix: mumps_cuda_3
378: args: -symmetric_solve -hermitian_solve -cuda_solve
379: output_file: output/ex192_mumps_3.out
381: testset:
382: requires: mumps double !complex defined(PETSC_HAVE_MUMPS_MIXED_PRECISION)
383: args: -solver 0 -pc_precision single -tol 3.4e-4
385: test:
386: suffix: mumps_s
387: output_file: output/ex192_mumps.out
389: test:
390: requires: cuda
391: suffix: mumps_cuda_s
392: args: -cuda_solve
393: output_file: output/ex192_mumps.out
394: test:
395: suffix: mumps_2_s
396: args: -symmetric_solve
397: output_file: output/ex192_mumps_2.out
398: test:
399: requires: cuda
400: suffix: mumps_cuda_2_s
401: args: -symmetric_solve -cuda_solve
402: output_file: output/ex192_mumps_2.out
403: test:
404: suffix: mumps_3_s
405: args: -symmetric_solve -hermitian_solve
406: output_file: output/ex192_mumps_3.out
407: test:
408: requires: cuda defined(PETSC_HAVE_CUSOLVERDNDPOTRI)
409: suffix: mumps_cuda_3_s
410: args: -symmetric_solve -hermitian_solve -cuda_solve
411: output_file: output/ex192_mumps_3.out
413: TEST*/