Actual source code: ex79.c
1: #include <petsc.h>
3: #if PetscDefined(HAVE_HYPRE_DEVICE)
4: #include <petsc/private/petschypre.h>
5: #endif
7: static char help[] = "Solves a linear system with a block of right-hand sides, apply a preconditioner to the same block.\n\n";
9: PetscErrorCode MatApply(PC pc, Mat X, Mat Y)
10: {
11: PetscFunctionBeginUser;
12: PetscCall(MatCopy(X, Y, SAME_NONZERO_PATTERN));
13: PetscFunctionReturn(PETSC_SUCCESS);
14: }
16: int main(int argc, char **args)
17: {
18: Mat A, X, B; /* computed solutions and RHS */
19: KSP ksp; /* linear solver context */
20: PC pc; /* preconditioner context */
21: PetscInt m = 10;
22: PetscBool flg, transpose = PETSC_FALSE, hypre_mat_on_device = PETSC_FALSE;
23: PetscLogEvent event;
24: PetscEventPerfInfo info;
26: PetscFunctionBeginUser;
27: PetscCall(PetscInitialize(&argc, &args, NULL, help));
28: PetscCall(PetscLogDefaultBegin());
29: PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
30: PetscCall(MatCreateAIJ(PETSC_COMM_WORLD, m, m, PETSC_DECIDE, PETSC_DECIDE, m, NULL, m, NULL, &A));
31: PetscCall(MatSetRandom(A, NULL));
32: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
33: PetscCall(PetscOptionsGetBool(NULL, NULL, "-transpose", &transpose, NULL));
34: PetscCall(MatShift(A, 10.0));
35: PetscCall(MatCreateDense(PETSC_COMM_WORLD, m, PETSC_DECIDE, PETSC_DECIDE, m, NULL, &B));
36: PetscCall(MatCreateDense(PETSC_COMM_WORLD, m, PETSC_DECIDE, PETSC_DECIDE, m, NULL, &X));
37: PetscCall(MatSetRandom(B, NULL));
38: PetscCall(MatSetFromOptions(A));
39: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
40: if (flg) {
41: PetscCall(MatConvert(B, MATDENSECUDA, MAT_INPLACE_MATRIX, &B));
42: PetscCall(MatConvert(X, MATDENSECUDA, MAT_INPLACE_MATRIX, &X));
43: } else {
44: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
45: if (flg) {
46: PetscCall(MatConvert(B, MATDENSEHIP, MAT_INPLACE_MATRIX, &B));
47: PetscCall(MatConvert(X, MATDENSEHIP, MAT_INPLACE_MATRIX, &X));
48: }
49: }
50: PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
51: PetscCall(KSPSetOperators(ksp, A, A));
52: PetscCall(KSPSetFromOptions(ksp));
53: PetscCall(KSPGetPC(ksp, &pc));
54: /* PCFIELDSPLIT: set fields */
55: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCFIELDSPLIT, &flg));
56: if (flg) {
57: PetscInt rbegin, rend, rsize = m / 2;
58: IS is;
60: PetscCall(MatGetOwnershipRange(A, &rbegin, &rend));
61: PetscCall(ISCreateStride(PETSC_COMM_WORLD, rsize, rbegin, 1, &is));
62: PetscCall(PCFieldSplitSetIS(pc, NULL, is));
63: PetscCall(ISDestroy(&is));
64: }
65: #if defined(PETSC_HAVE_HYPRE_DEVICE)
66: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCHYPRE, &flg));
67: if (flg) {
68: HYPRE_MemoryLocation hmem;
69: PetscCallHYPRE(HYPRE_GetMemoryLocation(&hmem));
70: if (hmem == HYPRE_MEMORY_DEVICE) hypre_mat_on_device = PETSC_TRUE;
71: }
72: #endif
73: if (!hypre_mat_on_device) {
74: PetscCall(PCShellSetMatApply(pc, MatApply));
75: PetscCall(PCShellSetMatApplyTranspose(pc, MatApply));
76: }
77: PetscCall(KSPMatSolve(ksp, B, X));
78: PetscCall(PCMatApply(pc, B, X));
79: if (transpose) {
80: PetscCall(KSPMatSolveTranspose(ksp, B, X));
81: PetscCall(PCMatApply(pc, B, X));
82: PetscCall(KSPMatSolve(ksp, B, X));
83: }
84: PetscCall(MatDestroy(&X));
85: PetscCall(MatDestroy(&B));
86: PetscCall(MatDestroy(&A));
87: PetscCall(KSPDestroy(&ksp));
88: PetscCall(PetscLogEventRegister("PCApply", PC_CLASSID, &event)); // there is no PCApplyTranspose event
89: PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info));
90: PetscCheck(!PetscDefined(USE_LOG) || m <= 1 || !info.count, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "PCApply() called %d times", info.count);
91: PetscCall(PetscLogEventRegister("PCMatApply", PC_CLASSID, &event)); // there is no PCMatApplyTranspose event
92: PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &info));
93: PetscCheck(!PetscDefined(USE_LOG) || m <= 1 || info.count, PetscObjectComm((PetscObject)ksp), PETSC_ERR_PLIB, "PCMatApply() never called");
94: PetscCall(PetscFinalize());
95: return 0;
96: }
98: /*TEST
99: # KSPHPDDM does either pseudo-blocking or "true" blocking, all tests should succeed with other -ksp_hpddm_type
100: testset:
101: nsize: 1
102: args: -pc_type {{mat none shell gasm}shared output}
103: test:
104: suffix: 1
105: output_file: output/empty.out
106: args: -ksp_type preonly
107: test:
108: suffix: 1_hpddm
109: output_file: output/empty.out
110: requires: hpddm
111: args: -ksp_type hpddm -ksp_hpddm_type preonly
113: testset:
114: nsize: 1
115: args: -pc_type ksp -transpose
116: test:
117: suffix: 2
118: output_file: output/empty.out
119: args: -ksp_ksp_type preonly -ksp_type preonly
120: test:
121: suffix: 2_hpddm
122: output_file: output/empty.out
123: requires: hpddm
124: args: -ksp_ksp_type hpddm -ksp_type hpddm -ksp_hpddm_type preonly -ksp_ksp_hpddm_type preonly
126: testset:
127: nsize: 1
128: requires: h2opus
129: args: -pc_type h2opus -pc_h2opus_init_mat_h2opus_leafsize 10
130: test:
131: suffix: 3
132: output_file: output/empty.out
133: args: -ksp_type preonly
134: test:
135: suffix: 3_hpddm
136: output_file: output/empty.out
137: requires: hpddm
138: args: -ksp_type hpddm -ksp_hpddm_type preonly
140: testset:
141: nsize: 1
142: requires: spai
143: args: -pc_type spai
144: test:
145: suffix: 4
146: output_file: output/empty.out
147: args: -ksp_type preonly
148: test:
149: suffix: 4_hpddm
150: output_file: output/empty.out
151: requires: hpddm
152: args: -ksp_type hpddm -ksp_hpddm_type preonly
153: # special code path in PCMatApply() for PCBJACOBI when a block is shared by multiple processes
154: testset:
155: nsize: 2
156: args: -pc_type bjacobi -pc_bjacobi_blocks 1 -sub_pc_type none
157: test:
158: suffix: 5
159: output_file: output/empty.out
160: args: -ksp_type preonly -sub_ksp_type preonly
161: test:
162: suffix: 5_hpddm
163: output_file: output/empty.out
164: requires: hpddm
165: args: -ksp_type hpddm -ksp_hpddm_type preonly -sub_ksp_type hpddm
166: # special code path in PCMatApply() for PCGASM when a block is shared by multiple processes
167: testset:
168: nsize: 2
169: args: -pc_type gasm -pc_gasm_total_subdomains 1 -sub_pc_type none
170: test:
171: suffix: 6
172: output_file: output/empty.out
173: args: -ksp_type preonly -sub_ksp_type preonly
174: test:
175: suffix: 6_hpddm
176: output_file: output/empty.out
177: requires: hpddm
178: args: -ksp_type hpddm -ksp_hpddm_type preonly -sub_ksp_type hpddm
180: testset:
181: nsize: 1
182: requires: suitesparse
183: args: -pc_type qr
184: test:
185: suffix: 7
186: output_file: output/empty.out
187: args: -ksp_type preonly
188: test:
189: suffix: 7_hpddm
190: output_file: output/empty.out
191: requires: hpddm
192: args: -ksp_type hpddm -ksp_hpddm_type preonly
194: testset:
195: nsize: 1
196: requires: hpddm cuda
197: args: -mat_type aijcusparse -ksp_type hpddm
198: test:
199: suffix: 8_hpddm
200: output_file: output/empty.out
201: test:
202: suffix: 8_hpddm_transpose
203: output_file: output/empty.out
204: args: -pc_type icc -transpose
206: testset:
207: nsize: 1
208: args: -pc_type {{bjacobi lu ilu cholesky icc none shell}shared output} -transpose
209: test:
210: suffix: 1_transpose
211: output_file: output/empty.out
212: args: -ksp_type preonly
213: test:
214: suffix: 1_hpddm_transpose
215: output_file: output/empty.out
216: requires: hpddm
217: args: -ksp_type hpddm -ksp_hpddm_type preonly
219: testset:
220: nsize: 2
221: args: -pc_type asm -transpose
222: test:
223: suffix: 2_transpose
224: output_file: output/empty.out
225: args: -ksp_type preonly
226: test:
227: suffix: 2_hpddm_transpose
228: output_file: output/empty.out
229: requires: hpddm
230: args: -ksp_type hpddm -ksp_hpddm_type preonly
232: testset:
233: requires: hypre !complex
234: args: -pc_type hypre -pc_hypre_boomeramg_relax_type_all l1scaled-Jacobi -pc_hypre_boomeramg_no_CF
235: test:
236: suffix: 9
237: output_file: output/empty.out
238: args: -ksp_type preonly
239: test:
240: suffix: 9_hpddm
241: output_file: output/empty.out
242: requires: hpddm !hip
243: args: -ksp_type hpddm -ksp_max_it 15 -ksp_error_if_not_converged
244: test:
245: suffix: 9_cuda
246: output_file: output/empty.out
247: requires: cuda
248: args: -mat_type aijcusparse -ksp_type preonly
250: testset:
251: nsize: 2
252: args: -pc_type fieldsplit -pc_fieldsplit_type {{additive multiplicative symmetric_multiplicative}shared output}
253: test:
254: suffix: 10
255: output_file: output/empty.out
256: args: -ksp_type preonly
257: test:
258: suffix: 10_hpddm
259: output_file: output/empty.out
260: requires: hpddm
261: args: -ksp_type hpddm -ksp_hpddm_type preonly
263: TEST*/