Actual source code: pbjacobi.c
1: #include <../src/ksp/pc/impls/pbjacobi/pbjacobi.h>
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode PCApply_PBJacobi(PC pc, Vec x, Vec y)
5: {
6: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
7: PetscInt i, ib, jb;
8: const PetscInt m = jac->mbs;
9: const PetscInt bs = jac->bs;
10: const MatScalar *diag = jac->diag;
11: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
12: const PetscScalar *xx;
14: PetscFunctionBegin;
15: PetscCall(VecGetArrayRead(x, &xx));
16: PetscCall(VecGetArray(y, &yy));
17: switch (bs) {
18: case 1:
19: for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
20: break;
21: case 2:
22: for (i = 0; i < m; i++) {
23: x0 = xx[2 * i];
24: x1 = xx[2 * i + 1];
25: yy[2 * i] = diag[0] * x0 + diag[2] * x1;
26: yy[2 * i + 1] = diag[1] * x0 + diag[3] * x1;
27: diag += 4;
28: }
29: break;
30: case 3:
31: for (i = 0; i < m; i++) {
32: x0 = xx[3 * i];
33: x1 = xx[3 * i + 1];
34: x2 = xx[3 * i + 2];
36: yy[3 * i] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
37: yy[3 * i + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
38: yy[3 * i + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
39: diag += 9;
40: }
41: break;
42: case 4:
43: for (i = 0; i < m; i++) {
44: x0 = xx[4 * i];
45: x1 = xx[4 * i + 1];
46: x2 = xx[4 * i + 2];
47: x3 = xx[4 * i + 3];
49: yy[4 * i] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
50: yy[4 * i + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
51: yy[4 * i + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
52: yy[4 * i + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
53: diag += 16;
54: }
55: break;
56: case 5:
57: for (i = 0; i < m; i++) {
58: x0 = xx[5 * i];
59: x1 = xx[5 * i + 1];
60: x2 = xx[5 * i + 2];
61: x3 = xx[5 * i + 3];
62: x4 = xx[5 * i + 4];
64: yy[5 * i] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
65: yy[5 * i + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
66: yy[5 * i + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
67: yy[5 * i + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
68: yy[5 * i + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
69: diag += 25;
70: }
71: break;
72: case 6:
73: for (i = 0; i < m; i++) {
74: x0 = xx[6 * i];
75: x1 = xx[6 * i + 1];
76: x2 = xx[6 * i + 2];
77: x3 = xx[6 * i + 3];
78: x4 = xx[6 * i + 4];
79: x5 = xx[6 * i + 5];
81: yy[6 * i] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
82: yy[6 * i + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
83: yy[6 * i + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
84: yy[6 * i + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
85: yy[6 * i + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
86: yy[6 * i + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
87: diag += 36;
88: }
89: break;
90: case 7:
91: for (i = 0; i < m; i++) {
92: x0 = xx[7 * i];
93: x1 = xx[7 * i + 1];
94: x2 = xx[7 * i + 2];
95: x3 = xx[7 * i + 3];
96: x4 = xx[7 * i + 4];
97: x5 = xx[7 * i + 5];
98: x6 = xx[7 * i + 6];
100: yy[7 * i] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
101: yy[7 * i + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
102: yy[7 * i + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
103: yy[7 * i + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
104: yy[7 * i + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
105: yy[7 * i + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
106: yy[7 * i + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
107: diag += 49;
108: }
109: break;
110: default:
111: for (i = 0; i < m; i++) {
112: for (ib = 0; ib < bs; ib++) {
113: PetscScalar rowsum = 0;
114: for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[bs * i + jb];
115: yy[bs * i + ib] = rowsum;
116: }
117: diag += bs * bs;
118: }
119: }
120: PetscCall(VecRestoreArrayRead(x, &xx));
121: PetscCall(VecRestoreArray(y, &yy));
122: PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m)); /* 2*bs2 - bs */
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: static PetscErrorCode PCApplyTranspose_PBJacobi(PC pc, Vec x, Vec y)
127: {
128: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
129: PetscInt i, ib, jb;
130: const PetscInt m = jac->mbs;
131: const PetscInt bs = jac->bs;
132: const MatScalar *diag = jac->diag;
133: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
134: const PetscScalar *xx;
136: PetscFunctionBegin;
137: PetscCall(VecGetArrayRead(x, &xx));
138: PetscCall(VecGetArray(y, &yy));
139: switch (bs) {
140: case 1:
141: for (i = 0; i < m; i++) yy[i] = diag[i] * xx[i];
142: break;
143: case 2:
144: for (i = 0; i < m; i++) {
145: x0 = xx[2 * i];
146: x1 = xx[2 * i + 1];
147: yy[2 * i] = diag[0] * x0 + diag[1] * x1;
148: yy[2 * i + 1] = diag[2] * x0 + diag[3] * x1;
149: diag += 4;
150: }
151: break;
152: case 3:
153: for (i = 0; i < m; i++) {
154: x0 = xx[3 * i];
155: x1 = xx[3 * i + 1];
156: x2 = xx[3 * i + 2];
158: yy[3 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
159: yy[3 * i + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
160: yy[3 * i + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
161: diag += 9;
162: }
163: break;
164: case 4:
165: for (i = 0; i < m; i++) {
166: x0 = xx[4 * i];
167: x1 = xx[4 * i + 1];
168: x2 = xx[4 * i + 2];
169: x3 = xx[4 * i + 3];
171: yy[4 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
172: yy[4 * i + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
173: yy[4 * i + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
174: yy[4 * i + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
175: diag += 16;
176: }
177: break;
178: case 5:
179: for (i = 0; i < m; i++) {
180: x0 = xx[5 * i];
181: x1 = xx[5 * i + 1];
182: x2 = xx[5 * i + 2];
183: x3 = xx[5 * i + 3];
184: x4 = xx[5 * i + 4];
186: yy[5 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
187: yy[5 * i + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
188: yy[5 * i + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
189: yy[5 * i + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
190: yy[5 * i + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
191: diag += 25;
192: }
193: break;
194: case 6:
195: for (i = 0; i < m; i++) {
196: x0 = xx[6 * i];
197: x1 = xx[6 * i + 1];
198: x2 = xx[6 * i + 2];
199: x3 = xx[6 * i + 3];
200: x4 = xx[6 * i + 4];
201: x5 = xx[6 * i + 5];
203: yy[6 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
204: yy[6 * i + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
205: yy[6 * i + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
206: yy[6 * i + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
207: yy[6 * i + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
208: yy[6 * i + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
209: diag += 36;
210: }
211: break;
212: case 7:
213: for (i = 0; i < m; i++) {
214: x0 = xx[7 * i];
215: x1 = xx[7 * i + 1];
216: x2 = xx[7 * i + 2];
217: x3 = xx[7 * i + 3];
218: x4 = xx[7 * i + 4];
219: x5 = xx[7 * i + 5];
220: x6 = xx[7 * i + 6];
222: yy[7 * i] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
223: yy[7 * i + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
224: yy[7 * i + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
225: yy[7 * i + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
226: yy[7 * i + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
227: yy[7 * i + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
228: yy[7 * i + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
229: diag += 49;
230: }
231: break;
232: default:
233: for (i = 0; i < m; i++) {
234: for (ib = 0; ib < bs; ib++) {
235: PetscScalar rowsum = 0;
236: for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[bs * i + jb];
237: yy[bs * i + ib] = rowsum;
238: }
239: diag += bs * bs;
240: }
241: }
242: PetscCall(VecRestoreArrayRead(x, &xx));
243: PetscCall(VecRestoreArray(y, &yy));
244: PetscCall(PetscLogFlops((2.0 * bs * bs - bs) * m));
245: PetscFunctionReturn(PETSC_SUCCESS);
246: }
248: PETSC_INTERN PetscErrorCode PCSetUp_PBJacobi_Host(PC pc, Mat diagPB)
249: {
250: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
251: Mat A = diagPB ? diagPB : pc->pmat;
252: MatFactorError err;
253: PetscInt nlocal;
255: PetscFunctionBegin;
256: PetscCall(MatInvertBlockDiagonal(A, &jac->diag));
257: PetscCall(MatFactorGetError(A, &err));
258: if (err) pc->failedreason = (PCFailedReason)err;
260: PetscCall(MatGetBlockSize(A, &jac->bs));
261: PetscCall(MatGetLocalSize(A, &nlocal, NULL));
262: jac->mbs = nlocal / jac->bs;
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
267: {
268: PetscBool flg;
269: Mat diagPB = NULL;
271: PetscFunctionBegin;
272: /* In PCCreate_PBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch */
274: // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagPB)
275: // made of the diagonal blocks. So we check both pmat and diagVPB.
276: PetscCall(MatHasOperation(pc->pmat, MATOP_GET_BLOCK_DIAGONAL, &flg));
277: if (flg) PetscUseTypeMethod(pc->pmat, getblockdiagonal, &diagPB); // diagPB's reference count is increased upon return
279: #if defined(PETSC_HAVE_CUDA)
280: PetscBool isCuda;
281: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
282: if (!isCuda && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
283: #endif
284: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
285: PetscBool isKok;
286: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
287: if (!isKok && diagPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
288: #endif
290: #if defined(PETSC_HAVE_CUDA)
291: if (isCuda) PetscCall(PCSetUp_PBJacobi_CUDA(pc, diagPB));
292: else
293: #endif
294: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
295: if (isKok)
296: PetscCall(PCSetUp_PBJacobi_Kokkos(pc, diagPB));
297: else
298: #endif
299: {
300: PetscCall(PCSetUp_PBJacobi_Host(pc, diagPB));
301: }
302: PetscCall(MatDestroy(&diagPB)); // since we don't need it anymore, we don't stash it in PC_PBJacobi
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: PetscErrorCode PCDestroy_PBJacobi(PC pc)
307: {
308: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
310: PetscFunctionBegin;
311: /*
312: Free the private data structure that was hanging off the PC
313: */
314: // PetscCall(PetscFree(jac->diag)); // the memory is owned by e.g., a->ibdiag in Mat_SeqAIJ, so don't free it here.
315: PetscCall(MatDestroy(&jac->diagPB));
316: PetscCall(PetscFree(pc->data));
317: PetscFunctionReturn(PETSC_SUCCESS);
318: }
320: static PetscErrorCode PCView_PBJacobi(PC pc, PetscViewer viewer)
321: {
322: PC_PBJacobi *jac = (PC_PBJacobi *)pc->data;
323: PetscBool iascii;
325: PetscFunctionBegin;
326: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
327: if (iascii) PetscCall(PetscViewerASCIIPrintf(viewer, " point-block size %" PetscInt_FMT "\n", jac->bs));
328: PetscFunctionReturn(PETSC_SUCCESS);
329: }
331: /*MC
332: PCPBJACOBI - Point block Jacobi preconditioner
334: Notes:
335: See `PCJACOBI` for diagonal Jacobi, `PCVPBJACOBI` for variable-size point block, and `PCBJACOBI` for large size blocks
337: This works for `MATAIJ` and `MATBAIJ` matrices and uses the blocksize provided to the matrix
339: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
340: is detected a PETSc error is generated.
342: Developer Notes:
343: This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
344: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
345: terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
346: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
348: Perhaps should provide an option that allows generation of a valid preconditioner
349: even if a block is singular as the `PCJACOBI` does.
351: Level: beginner
353: .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCVPBJACOBI`, `PCBJACOBI`
354: M*/
356: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
357: {
358: PC_PBJacobi *jac;
360: PetscFunctionBegin;
361: /*
362: Creates the private data structure for this preconditioner and
363: attach it to the PC object.
364: */
365: PetscCall(PetscNew(&jac));
366: pc->data = (void *)jac;
368: /*
369: Initialize the pointers to vectors to ZERO; these will be used to store
370: diagonal entries of the matrix for fast preconditioner application.
371: */
372: jac->diag = NULL;
374: /*
375: Set the pointers for the functions that are provided above.
376: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
377: are called, they will automatically call these functions. Note we
378: choose not to provide a couple of these functions since they are
379: not needed.
380: */
381: pc->ops->apply = PCApply_PBJacobi;
382: pc->ops->applytranspose = PCApplyTranspose_PBJacobi;
383: pc->ops->setup = PCSetUp_PBJacobi;
384: pc->ops->destroy = PCDestroy_PBJacobi;
385: pc->ops->setfromoptions = NULL;
386: pc->ops->view = PCView_PBJacobi;
387: pc->ops->applyrichardson = NULL;
388: pc->ops->applysymmetricleft = NULL;
389: pc->ops->applysymmetricright = NULL;
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }