Actual source code: vpbjacobi.c
1: #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y)
5: {
6: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
7: PetscInt i, ncnt = 0;
8: const MatScalar *diag = jac->diag;
9: PetscInt ib, jb, bs;
10: const PetscScalar *xx;
11: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
12: PetscInt nblocks;
13: const PetscInt *bsizes;
15: PetscFunctionBegin;
16: PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
17: PetscCall(VecGetArrayRead(x, &xx));
18: PetscCall(VecGetArray(y, &yy));
19: for (i = 0; i < nblocks; i++) {
20: bs = bsizes[i];
21: switch (bs) {
22: case 1:
23: yy[ncnt] = *diag * xx[ncnt];
24: break;
25: case 2:
26: x0 = xx[ncnt];
27: x1 = xx[ncnt + 1];
28: yy[ncnt] = diag[0] * x0 + diag[2] * x1;
29: yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
30: break;
31: case 3:
32: x0 = xx[ncnt];
33: x1 = xx[ncnt + 1];
34: x2 = xx[ncnt + 2];
35: yy[ncnt] = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
36: yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
37: yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
38: break;
39: case 4:
40: x0 = xx[ncnt];
41: x1 = xx[ncnt + 1];
42: x2 = xx[ncnt + 2];
43: x3 = xx[ncnt + 3];
44: yy[ncnt] = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
45: yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
46: yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
47: yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
48: break;
49: case 5:
50: x0 = xx[ncnt];
51: x1 = xx[ncnt + 1];
52: x2 = xx[ncnt + 2];
53: x3 = xx[ncnt + 3];
54: x4 = xx[ncnt + 4];
55: yy[ncnt] = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
56: yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
57: yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
58: yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
59: yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
60: break;
61: case 6:
62: x0 = xx[ncnt];
63: x1 = xx[ncnt + 1];
64: x2 = xx[ncnt + 2];
65: x3 = xx[ncnt + 3];
66: x4 = xx[ncnt + 4];
67: x5 = xx[ncnt + 5];
68: yy[ncnt] = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
69: yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
70: yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
71: yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
72: yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
73: yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
74: break;
75: case 7:
76: x0 = xx[ncnt];
77: x1 = xx[ncnt + 1];
78: x2 = xx[ncnt + 2];
79: x3 = xx[ncnt + 3];
80: x4 = xx[ncnt + 4];
81: x5 = xx[ncnt + 5];
82: x6 = xx[ncnt + 6];
83: yy[ncnt] = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
84: yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
85: yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
86: yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
87: yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
88: yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
89: yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
90: break;
91: default:
92: for (ib = 0; ib < bs; ib++) {
93: PetscScalar rowsum = 0;
94: for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
95: yy[ncnt + ib] = rowsum;
96: }
97: }
98: ncnt += bsizes[i];
99: diag += bsizes[i] * bsizes[i];
100: }
101: PetscCall(VecRestoreArrayRead(x, &xx));
102: PetscCall(VecRestoreArray(y, &yy));
103: PetscFunctionReturn(PETSC_SUCCESS);
104: }
106: static PetscErrorCode PCApplyTranspose_VPBJacobi(PC pc, Vec x, Vec y)
107: {
108: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
109: PetscInt i, ncnt = 0;
110: const MatScalar *diag = jac->diag;
111: PetscInt ib, jb, bs;
112: const PetscScalar *xx;
113: PetscScalar *yy, x0, x1, x2, x3, x4, x5, x6;
114: PetscInt nblocks;
115: const PetscInt *bsizes;
117: PetscFunctionBegin;
118: PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
119: PetscCall(VecGetArrayRead(x, &xx));
120: PetscCall(VecGetArray(y, &yy));
121: for (i = 0; i < nblocks; i++) {
122: bs = bsizes[i];
123: switch (bs) {
124: case 1:
125: yy[ncnt] = *diag * xx[ncnt];
126: break;
127: case 2:
128: x0 = xx[ncnt];
129: x1 = xx[ncnt + 1];
130: yy[ncnt] = diag[0] * x0 + diag[1] * x1;
131: yy[ncnt + 1] = diag[2] * x0 + diag[3] * x1;
132: break;
133: case 3:
134: x0 = xx[ncnt];
135: x1 = xx[ncnt + 1];
136: x2 = xx[ncnt + 2];
137: yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
138: yy[ncnt + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
139: yy[ncnt + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
140: break;
141: case 4:
142: x0 = xx[ncnt];
143: x1 = xx[ncnt + 1];
144: x2 = xx[ncnt + 2];
145: x3 = xx[ncnt + 3];
146: yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
147: yy[ncnt + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
148: yy[ncnt + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
149: yy[ncnt + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
150: break;
151: case 5:
152: x0 = xx[ncnt];
153: x1 = xx[ncnt + 1];
154: x2 = xx[ncnt + 2];
155: x3 = xx[ncnt + 3];
156: x4 = xx[ncnt + 4];
157: yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
158: yy[ncnt + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
159: yy[ncnt + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
160: yy[ncnt + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
161: yy[ncnt + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
162: break;
163: case 6:
164: x0 = xx[ncnt];
165: x1 = xx[ncnt + 1];
166: x2 = xx[ncnt + 2];
167: x3 = xx[ncnt + 3];
168: x4 = xx[ncnt + 4];
169: x5 = xx[ncnt + 5];
170: yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
171: yy[ncnt + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
172: yy[ncnt + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
173: yy[ncnt + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
174: yy[ncnt + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
175: yy[ncnt + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
176: break;
177: case 7:
178: x0 = xx[ncnt];
179: x1 = xx[ncnt + 1];
180: x2 = xx[ncnt + 2];
181: x3 = xx[ncnt + 3];
182: x4 = xx[ncnt + 4];
183: x5 = xx[ncnt + 5];
184: x6 = xx[ncnt + 6];
185: yy[ncnt] = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
186: yy[ncnt + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
187: yy[ncnt + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
188: yy[ncnt + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
189: yy[ncnt + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
190: yy[ncnt + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
191: yy[ncnt + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
192: break;
193: default:
194: for (ib = 0; ib < bs; ib++) {
195: PetscScalar rowsum = 0;
196: for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[ncnt + jb];
197: yy[ncnt + ib] = rowsum;
198: }
199: }
200: ncnt += bsizes[i];
201: diag += bsizes[i] * bsizes[i];
202: }
203: PetscCall(VecRestoreArrayRead(x, &xx));
204: PetscCall(VecRestoreArray(y, &yy));
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc, Mat diagVPB)
209: {
210: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
211: Mat A = diagVPB ? diagVPB : pc->pmat;
212: MatFactorError err;
213: PetscInt i, nsize = 0, nlocal;
214: PetscInt nblocks;
215: const PetscInt *bsizes;
217: PetscFunctionBegin;
218: PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
219: PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
220: PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
221: if (!jac->diag) {
222: PetscInt max_bs = -1, min_bs = PETSC_INT_MAX;
223: for (i = 0; i < nblocks; i++) {
224: min_bs = PetscMin(min_bs, bsizes[i]);
225: max_bs = PetscMax(max_bs, bsizes[i]);
226: nsize += bsizes[i] * bsizes[i];
227: }
228: PetscCall(PetscMalloc1(nsize, &jac->diag));
229: jac->nblocks = nblocks;
230: jac->min_bs = min_bs;
231: jac->max_bs = max_bs;
232: }
233: PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
234: PetscCall(MatFactorGetError(A, &err));
235: if (err) pc->failedreason = (PCFailedReason)err;
236: pc->ops->apply = PCApply_VPBJacobi;
237: pc->ops->applytranspose = PCApplyTranspose_VPBJacobi;
238: PetscFunctionReturn(PETSC_SUCCESS);
239: }
241: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
242: {
243: PetscBool flg;
244: Mat diagVPB = NULL;
246: PetscFunctionBegin;
247: // In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch
249: // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagVPB)
250: // made of the diagonal blocks. So we check both pmat and diagVPB.
251: PetscCall(MatHasOperation(pc->pmat, MATOP_GET_VBLOCK_DIAGONAL, &flg));
252: if (flg) PetscUseTypeMethod(pc->pmat, getvblockdiagonal, &diagVPB); // diagVPB's reference count is increased upon return
254: #if defined(PETSC_HAVE_CUDA)
255: PetscBool isCuda;
256: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
257: if (!isCuda && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
258: #endif
259: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
260: PetscBool isKok;
261: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
262: if (!isKok && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
263: #endif
265: #if defined(PETSC_HAVE_CUDA)
266: if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc, diagVPB));
267: else
268: #endif
269: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
270: if (isKok)
271: PetscCall(PCSetUp_VPBJacobi_Kokkos(pc, diagVPB));
272: else
273: #endif
274: {
275: PetscCall(PCSetUp_VPBJacobi_Host(pc, diagVPB));
276: }
277: PetscCall(MatDestroy(&diagVPB)); // since we don't need it anymore, we don't need to stash it in PC_VPBJacobi
278: PetscFunctionReturn(PETSC_SUCCESS);
279: }
281: static PetscErrorCode PCView_VPBJacobi(PC pc, PetscViewer viewer)
282: {
283: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
284: PetscBool iascii;
286: PetscFunctionBegin;
287: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
288: if (iascii) {
289: PetscCall(PetscViewerASCIIPrintf(viewer, " number of blocks: %" PetscInt_FMT "\n", jac->nblocks));
290: PetscCall(PetscViewerASCIIPrintf(viewer, " block sizes: min=%" PetscInt_FMT " max=%" PetscInt_FMT "\n", jac->min_bs, jac->max_bs));
291: }
292: PetscFunctionReturn(PETSC_SUCCESS);
293: }
295: PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
296: {
297: PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
299: PetscFunctionBegin;
300: /*
301: Free the private data structure that was hanging off the PC
302: */
303: PetscCall(PetscFree(jac->diag));
304: PetscCall(MatDestroy(&jac->diagVPB));
305: PetscCall(PetscFree(pc->data));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /*MC
310: PCVPBJACOBI - Variable size point block Jacobi preconditioner
312: Level: beginner
314: Notes:
315: See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks
317: This works for `MATAIJ` matrices
319: Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
320: is detected a PETSc error is generated.
322: One must call `MatSetVariableBlockSizes()` to use this preconditioner
324: Developer Notes:
325: This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
326: the factorization to continue even after a zero pivot is found resulting in a Nan and hence
327: terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
328: a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.
330: Perhaps should provide an option that allows generation of a valid preconditioner
331: even if a block is singular as the `PCJACOBI` does.
333: .seealso: [](ch_ksp), `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
334: M*/
336: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
337: {
338: PC_VPBJacobi *jac;
340: PetscFunctionBegin;
341: /*
342: Creates the private data structure for this preconditioner and
343: attach it to the PC object.
344: */
345: PetscCall(PetscNew(&jac));
346: pc->data = (void *)jac;
348: /*
349: Initialize the pointers to vectors to ZERO; these will be used to store
350: diagonal entries of the matrix for fast preconditioner application.
351: */
352: jac->diag = NULL;
354: /*
355: Set the pointers for the functions that are provided above.
356: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
357: are called, they will automatically call these functions. Note we
358: choose not to provide a couple of these functions since they are
359: not needed.
360: */
361: pc->ops->apply = PCApply_VPBJacobi;
362: pc->ops->applytranspose = NULL;
363: pc->ops->setup = PCSetUp_VPBJacobi;
364: pc->ops->destroy = PCDestroy_VPBJacobi;
365: pc->ops->setfromoptions = NULL;
366: pc->ops->view = PCView_VPBJacobi;
367: pc->ops->applyrichardson = NULL;
368: pc->ops->applysymmetricleft = NULL;
369: pc->ops->applysymmetricright = NULL;
370: PetscFunctionReturn(PETSC_SUCCESS);
371: }