Actual source code: vpbjacobi.c

  1: #include <../src/ksp/pc/impls/vpbjacobi/vpbjacobi.h>
  2: #include <petsc/private/matimpl.h>

  4: static PetscErrorCode PCApply_VPBJacobi(PC pc, Vec x, Vec y)
  5: {
  6:   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
  7:   PetscInt           i, ncnt = 0;
  8:   const MatScalar   *diag = jac->diag;
  9:   PetscInt           ib, jb, bs;
 10:   const PetscScalar *xx;
 11:   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
 12:   PetscInt           nblocks;
 13:   const PetscInt    *bsizes;

 15:   PetscFunctionBegin;
 16:   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
 17:   PetscCall(VecGetArrayRead(x, &xx));
 18:   PetscCall(VecGetArray(y, &yy));
 19:   for (i = 0; i < nblocks; i++) {
 20:     bs = bsizes[i];
 21:     switch (bs) {
 22:     case 1:
 23:       yy[ncnt] = *diag * xx[ncnt];
 24:       break;
 25:     case 2:
 26:       x0           = xx[ncnt];
 27:       x1           = xx[ncnt + 1];
 28:       yy[ncnt]     = diag[0] * x0 + diag[2] * x1;
 29:       yy[ncnt + 1] = diag[1] * x0 + diag[3] * x1;
 30:       break;
 31:     case 3:
 32:       x0           = xx[ncnt];
 33:       x1           = xx[ncnt + 1];
 34:       x2           = xx[ncnt + 2];
 35:       yy[ncnt]     = diag[0] * x0 + diag[3] * x1 + diag[6] * x2;
 36:       yy[ncnt + 1] = diag[1] * x0 + diag[4] * x1 + diag[7] * x2;
 37:       yy[ncnt + 2] = diag[2] * x0 + diag[5] * x1 + diag[8] * x2;
 38:       break;
 39:     case 4:
 40:       x0           = xx[ncnt];
 41:       x1           = xx[ncnt + 1];
 42:       x2           = xx[ncnt + 2];
 43:       x3           = xx[ncnt + 3];
 44:       yy[ncnt]     = diag[0] * x0 + diag[4] * x1 + diag[8] * x2 + diag[12] * x3;
 45:       yy[ncnt + 1] = diag[1] * x0 + diag[5] * x1 + diag[9] * x2 + diag[13] * x3;
 46:       yy[ncnt + 2] = diag[2] * x0 + diag[6] * x1 + diag[10] * x2 + diag[14] * x3;
 47:       yy[ncnt + 3] = diag[3] * x0 + diag[7] * x1 + diag[11] * x2 + diag[15] * x3;
 48:       break;
 49:     case 5:
 50:       x0           = xx[ncnt];
 51:       x1           = xx[ncnt + 1];
 52:       x2           = xx[ncnt + 2];
 53:       x3           = xx[ncnt + 3];
 54:       x4           = xx[ncnt + 4];
 55:       yy[ncnt]     = diag[0] * x0 + diag[5] * x1 + diag[10] * x2 + diag[15] * x3 + diag[20] * x4;
 56:       yy[ncnt + 1] = diag[1] * x0 + diag[6] * x1 + diag[11] * x2 + diag[16] * x3 + diag[21] * x4;
 57:       yy[ncnt + 2] = diag[2] * x0 + diag[7] * x1 + diag[12] * x2 + diag[17] * x3 + diag[22] * x4;
 58:       yy[ncnt + 3] = diag[3] * x0 + diag[8] * x1 + diag[13] * x2 + diag[18] * x3 + diag[23] * x4;
 59:       yy[ncnt + 4] = diag[4] * x0 + diag[9] * x1 + diag[14] * x2 + diag[19] * x3 + diag[24] * x4;
 60:       break;
 61:     case 6:
 62:       x0           = xx[ncnt];
 63:       x1           = xx[ncnt + 1];
 64:       x2           = xx[ncnt + 2];
 65:       x3           = xx[ncnt + 3];
 66:       x4           = xx[ncnt + 4];
 67:       x5           = xx[ncnt + 5];
 68:       yy[ncnt]     = diag[0] * x0 + diag[6] * x1 + diag[12] * x2 + diag[18] * x3 + diag[24] * x4 + diag[30] * x5;
 69:       yy[ncnt + 1] = diag[1] * x0 + diag[7] * x1 + diag[13] * x2 + diag[19] * x3 + diag[25] * x4 + diag[31] * x5;
 70:       yy[ncnt + 2] = diag[2] * x0 + diag[8] * x1 + diag[14] * x2 + diag[20] * x3 + diag[26] * x4 + diag[32] * x5;
 71:       yy[ncnt + 3] = diag[3] * x0 + diag[9] * x1 + diag[15] * x2 + diag[21] * x3 + diag[27] * x4 + diag[33] * x5;
 72:       yy[ncnt + 4] = diag[4] * x0 + diag[10] * x1 + diag[16] * x2 + diag[22] * x3 + diag[28] * x4 + diag[34] * x5;
 73:       yy[ncnt + 5] = diag[5] * x0 + diag[11] * x1 + diag[17] * x2 + diag[23] * x3 + diag[29] * x4 + diag[35] * x5;
 74:       break;
 75:     case 7:
 76:       x0           = xx[ncnt];
 77:       x1           = xx[ncnt + 1];
 78:       x2           = xx[ncnt + 2];
 79:       x3           = xx[ncnt + 3];
 80:       x4           = xx[ncnt + 4];
 81:       x5           = xx[ncnt + 5];
 82:       x6           = xx[ncnt + 6];
 83:       yy[ncnt]     = diag[0] * x0 + diag[7] * x1 + diag[14] * x2 + diag[21] * x3 + diag[28] * x4 + diag[35] * x5 + diag[42] * x6;
 84:       yy[ncnt + 1] = diag[1] * x0 + diag[8] * x1 + diag[15] * x2 + diag[22] * x3 + diag[29] * x4 + diag[36] * x5 + diag[43] * x6;
 85:       yy[ncnt + 2] = diag[2] * x0 + diag[9] * x1 + diag[16] * x2 + diag[23] * x3 + diag[30] * x4 + diag[37] * x5 + diag[44] * x6;
 86:       yy[ncnt + 3] = diag[3] * x0 + diag[10] * x1 + diag[17] * x2 + diag[24] * x3 + diag[31] * x4 + diag[38] * x5 + diag[45] * x6;
 87:       yy[ncnt + 4] = diag[4] * x0 + diag[11] * x1 + diag[18] * x2 + diag[25] * x3 + diag[32] * x4 + diag[39] * x5 + diag[46] * x6;
 88:       yy[ncnt + 5] = diag[5] * x0 + diag[12] * x1 + diag[19] * x2 + diag[26] * x3 + diag[33] * x4 + diag[40] * x5 + diag[47] * x6;
 89:       yy[ncnt + 6] = diag[6] * x0 + diag[13] * x1 + diag[20] * x2 + diag[27] * x3 + diag[34] * x4 + diag[41] * x5 + diag[48] * x6;
 90:       break;
 91:     default:
 92:       for (ib = 0; ib < bs; ib++) {
 93:         PetscScalar rowsum = 0;
 94:         for (jb = 0; jb < bs; jb++) rowsum += diag[ib + jb * bs] * xx[ncnt + jb];
 95:         yy[ncnt + ib] = rowsum;
 96:       }
 97:     }
 98:     ncnt += bsizes[i];
 99:     diag += bsizes[i] * bsizes[i];
100:   }
101:   PetscCall(VecRestoreArrayRead(x, &xx));
102:   PetscCall(VecRestoreArray(y, &yy));
103:   PetscFunctionReturn(PETSC_SUCCESS);
104: }

106: static PetscErrorCode PCApplyTranspose_VPBJacobi(PC pc, Vec x, Vec y)
107: {
108:   PC_VPBJacobi      *jac = (PC_VPBJacobi *)pc->data;
109:   PetscInt           i, ncnt = 0;
110:   const MatScalar   *diag = jac->diag;
111:   PetscInt           ib, jb, bs;
112:   const PetscScalar *xx;
113:   PetscScalar       *yy, x0, x1, x2, x3, x4, x5, x6;
114:   PetscInt           nblocks;
115:   const PetscInt    *bsizes;

117:   PetscFunctionBegin;
118:   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
119:   PetscCall(VecGetArrayRead(x, &xx));
120:   PetscCall(VecGetArray(y, &yy));
121:   for (i = 0; i < nblocks; i++) {
122:     bs = bsizes[i];
123:     switch (bs) {
124:     case 1:
125:       yy[ncnt] = *diag * xx[ncnt];
126:       break;
127:     case 2:
128:       x0           = xx[ncnt];
129:       x1           = xx[ncnt + 1];
130:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1;
131:       yy[ncnt + 1] = diag[2] * x0 + diag[3] * x1;
132:       break;
133:     case 3:
134:       x0           = xx[ncnt];
135:       x1           = xx[ncnt + 1];
136:       x2           = xx[ncnt + 2];
137:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2;
138:       yy[ncnt + 1] = diag[3] * x0 + diag[4] * x1 + diag[5] * x2;
139:       yy[ncnt + 2] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2;
140:       break;
141:     case 4:
142:       x0           = xx[ncnt];
143:       x1           = xx[ncnt + 1];
144:       x2           = xx[ncnt + 2];
145:       x3           = xx[ncnt + 3];
146:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3;
147:       yy[ncnt + 1] = diag[4] * x0 + diag[5] * x1 + diag[6] * x2 + diag[7] * x3;
148:       yy[ncnt + 2] = diag[8] * x0 + diag[9] * x1 + diag[10] * x2 + diag[11] * x3;
149:       yy[ncnt + 3] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3;
150:       break;
151:     case 5:
152:       x0           = xx[ncnt];
153:       x1           = xx[ncnt + 1];
154:       x2           = xx[ncnt + 2];
155:       x3           = xx[ncnt + 3];
156:       x4           = xx[ncnt + 4];
157:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4;
158:       yy[ncnt + 1] = diag[5] * x0 + diag[6] * x1 + diag[7] * x2 + diag[8] * x3 + diag[9] * x4;
159:       yy[ncnt + 2] = diag[10] * x0 + diag[11] * x1 + diag[12] * x2 + diag[13] * x3 + diag[14] * x4;
160:       yy[ncnt + 3] = diag[15] * x0 + diag[16] * x1 + diag[17] * x2 + diag[18] * x3 + diag[19] * x4;
161:       yy[ncnt + 4] = diag[20] * x0 + diag[21] * x1 + diag[22] * x2 + diag[23] * x3 + diag[24] * x4;
162:       break;
163:     case 6:
164:       x0           = xx[ncnt];
165:       x1           = xx[ncnt + 1];
166:       x2           = xx[ncnt + 2];
167:       x3           = xx[ncnt + 3];
168:       x4           = xx[ncnt + 4];
169:       x5           = xx[ncnt + 5];
170:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5;
171:       yy[ncnt + 1] = diag[6] * x0 + diag[7] * x1 + diag[8] * x2 + diag[9] * x3 + diag[10] * x4 + diag[11] * x5;
172:       yy[ncnt + 2] = diag[12] * x0 + diag[13] * x1 + diag[14] * x2 + diag[15] * x3 + diag[16] * x4 + diag[17] * x5;
173:       yy[ncnt + 3] = diag[18] * x0 + diag[19] * x1 + diag[20] * x2 + diag[21] * x3 + diag[22] * x4 + diag[23] * x5;
174:       yy[ncnt + 4] = diag[24] * x0 + diag[25] * x1 + diag[26] * x2 + diag[27] * x3 + diag[28] * x4 + diag[29] * x5;
175:       yy[ncnt + 5] = diag[30] * x0 + diag[31] * x1 + diag[32] * x2 + diag[33] * x3 + diag[34] * x4 + diag[35] * x5;
176:       break;
177:     case 7:
178:       x0           = xx[ncnt];
179:       x1           = xx[ncnt + 1];
180:       x2           = xx[ncnt + 2];
181:       x3           = xx[ncnt + 3];
182:       x4           = xx[ncnt + 4];
183:       x5           = xx[ncnt + 5];
184:       x6           = xx[ncnt + 6];
185:       yy[ncnt]     = diag[0] * x0 + diag[1] * x1 + diag[2] * x2 + diag[3] * x3 + diag[4] * x4 + diag[5] * x5 + diag[6] * x6;
186:       yy[ncnt + 1] = diag[7] * x0 + diag[8] * x1 + diag[9] * x2 + diag[10] * x3 + diag[11] * x4 + diag[12] * x5 + diag[13] * x6;
187:       yy[ncnt + 2] = diag[14] * x0 + diag[15] * x1 + diag[16] * x2 + diag[17] * x3 + diag[18] * x4 + diag[19] * x5 + diag[20] * x6;
188:       yy[ncnt + 3] = diag[21] * x0 + diag[22] * x1 + diag[23] * x2 + diag[24] * x3 + diag[25] * x4 + diag[26] * x5 + diag[27] * x6;
189:       yy[ncnt + 4] = diag[28] * x0 + diag[29] * x1 + diag[30] * x2 + diag[31] * x3 + diag[32] * x4 + diag[33] * x5 + diag[34] * x6;
190:       yy[ncnt + 5] = diag[35] * x0 + diag[36] * x1 + diag[37] * x2 + diag[38] * x3 + diag[39] * x4 + diag[40] * x5 + diag[41] * x6;
191:       yy[ncnt + 6] = diag[42] * x0 + diag[43] * x1 + diag[44] * x2 + diag[45] * x3 + diag[46] * x4 + diag[47] * x5 + diag[48] * x6;
192:       break;
193:     default:
194:       for (ib = 0; ib < bs; ib++) {
195:         PetscScalar rowsum = 0;
196:         for (jb = 0; jb < bs; jb++) rowsum += diag[ib * bs + jb] * xx[ncnt + jb];
197:         yy[ncnt + ib] = rowsum;
198:       }
199:     }
200:     ncnt += bsizes[i];
201:     diag += bsizes[i] * bsizes[i];
202:   }
203:   PetscCall(VecRestoreArrayRead(x, &xx));
204:   PetscCall(VecRestoreArray(y, &yy));
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: PETSC_INTERN PetscErrorCode PCSetUp_VPBJacobi_Host(PC pc, Mat diagVPB)
209: {
210:   PC_VPBJacobi   *jac = (PC_VPBJacobi *)pc->data;
211:   Mat             A   = diagVPB ? diagVPB : pc->pmat;
212:   MatFactorError  err;
213:   PetscInt        i, nsize = 0, nlocal;
214:   PetscInt        nblocks;
215:   const PetscInt *bsizes;

217:   PetscFunctionBegin;
218:   PetscCall(MatGetVariableBlockSizes(pc->pmat, &nblocks, &bsizes));
219:   PetscCall(MatGetLocalSize(pc->pmat, &nlocal, NULL));
220:   PetscCheck(!nlocal || nblocks, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call MatSetVariableBlockSizes() before using PCVPBJACOBI");
221:   if (!jac->diag) {
222:     PetscInt max_bs = -1, min_bs = PETSC_INT_MAX;
223:     for (i = 0; i < nblocks; i++) {
224:       min_bs = PetscMin(min_bs, bsizes[i]);
225:       max_bs = PetscMax(max_bs, bsizes[i]);
226:       nsize += bsizes[i] * bsizes[i];
227:     }
228:     PetscCall(PetscMalloc1(nsize, &jac->diag));
229:     jac->nblocks = nblocks;
230:     jac->min_bs  = min_bs;
231:     jac->max_bs  = max_bs;
232:   }
233:   PetscCall(MatInvertVariableBlockDiagonal(A, nblocks, bsizes, jac->diag));
234:   PetscCall(MatFactorGetError(A, &err));
235:   if (err) pc->failedreason = (PCFailedReason)err;
236:   pc->ops->apply          = PCApply_VPBJacobi;
237:   pc->ops->applytranspose = PCApplyTranspose_VPBJacobi;
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: static PetscErrorCode PCSetUp_VPBJacobi(PC pc)
242: {
243:   PetscBool flg;
244:   Mat       diagVPB = NULL;

246:   PetscFunctionBegin;
247:   // In PCCreate_VPBJacobi() pmat might have not been set, so we wait to the last minute to do the dispatch

249:   // pmat (e.g., MatCEED from libCEED) might have its own method to provide a matrix (diagVPB)
250:   // made of the diagonal blocks. So we check both pmat and diagVPB.
251:   PetscCall(MatHasOperation(pc->pmat, MATOP_GET_VBLOCK_DIAGONAL, &flg));
252:   if (flg) PetscUseTypeMethod(pc->pmat, getvblockdiagonal, &diagVPB); // diagVPB's reference count is increased upon return

254: #if defined(PETSC_HAVE_CUDA)
255:   PetscBool isCuda;
256:   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
257:   if (!isCuda && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isCuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
258: #endif
259: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
260:   PetscBool isKok;
261:   PetscCall(PetscObjectTypeCompareAny((PetscObject)pc->pmat, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
262:   if (!isKok && diagVPB) PetscCall(PetscObjectTypeCompareAny((PetscObject)diagVPB, &isKok, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
263: #endif

265: #if defined(PETSC_HAVE_CUDA)
266:   if (isCuda) PetscCall(PCSetUp_VPBJacobi_CUDA(pc, diagVPB));
267:   else
268: #endif
269: #if defined(PETSC_HAVE_KOKKOS_KERNELS)
270:     if (isKok)
271:     PetscCall(PCSetUp_VPBJacobi_Kokkos(pc, diagVPB));
272:   else
273: #endif
274:   {
275:     PetscCall(PCSetUp_VPBJacobi_Host(pc, diagVPB));
276:   }
277:   PetscCall(MatDestroy(&diagVPB)); // since we don't need it anymore, we don't need to stash it in PC_VPBJacobi
278:   PetscFunctionReturn(PETSC_SUCCESS);
279: }

281: static PetscErrorCode PCView_VPBJacobi(PC pc, PetscViewer viewer)
282: {
283:   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;
284:   PetscBool     iascii;

286:   PetscFunctionBegin;
287:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
288:   if (iascii) {
289:     PetscCall(PetscViewerASCIIPrintf(viewer, "  number of blocks: %" PetscInt_FMT "\n", jac->nblocks));
290:     PetscCall(PetscViewerASCIIPrintf(viewer, "  block sizes: min=%" PetscInt_FMT " max=%" PetscInt_FMT "\n", jac->min_bs, jac->max_bs));
291:   }
292:   PetscFunctionReturn(PETSC_SUCCESS);
293: }

295: PETSC_INTERN PetscErrorCode PCDestroy_VPBJacobi(PC pc)
296: {
297:   PC_VPBJacobi *jac = (PC_VPBJacobi *)pc->data;

299:   PetscFunctionBegin;
300:   /*
301:       Free the private data structure that was hanging off the PC
302:   */
303:   PetscCall(PetscFree(jac->diag));
304:   PetscCall(MatDestroy(&jac->diagVPB));
305:   PetscCall(PetscFree(pc->data));
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }

309: /*MC
310:      PCVPBJACOBI - Variable size point block Jacobi preconditioner

312:    Level: beginner

314:    Notes:
315:      See `PCJACOBI` for point Jacobi preconditioning, `PCPBJACOBI` for fixed point block size, and `PCBJACOBI` for large size blocks

317:      This works for `MATAIJ` matrices

319:      Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
320:      is detected a PETSc error is generated.

322:      One must call `MatSetVariableBlockSizes()` to use this preconditioner

324:    Developer Notes:
325:      This should support the `PCSetErrorIfFailure()` flag set to `PETSC_TRUE` to allow
326:      the factorization to continue even after a zero pivot is found resulting in a Nan and hence
327:      terminating `KSP` with a `KSP_DIVERGED_NANORINF` allowing
328:      a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

330:      Perhaps should provide an option that allows generation of a valid preconditioner
331:      even if a block is singular as the `PCJACOBI` does.

333: .seealso: [](ch_ksp), `MatSetVariableBlockSizes()`, `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `PCJACOBI`, `PCPBJACOBI`, `PCBJACOBI`
334: M*/

336: PETSC_EXTERN PetscErrorCode PCCreate_VPBJacobi(PC pc)
337: {
338:   PC_VPBJacobi *jac;

340:   PetscFunctionBegin;
341:   /*
342:      Creates the private data structure for this preconditioner and
343:      attach it to the PC object.
344:   */
345:   PetscCall(PetscNew(&jac));
346:   pc->data = (void *)jac;

348:   /*
349:      Initialize the pointers to vectors to ZERO; these will be used to store
350:      diagonal entries of the matrix for fast preconditioner application.
351:   */
352:   jac->diag = NULL;

354:   /*
355:       Set the pointers for the functions that are provided above.
356:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
357:       are called, they will automatically call these functions.  Note we
358:       choose not to provide a couple of these functions since they are
359:       not needed.
360:   */
361:   pc->ops->apply               = PCApply_VPBJacobi;
362:   pc->ops->applytranspose      = NULL;
363:   pc->ops->setup               = PCSetUp_VPBJacobi;
364:   pc->ops->destroy             = PCDestroy_VPBJacobi;
365:   pc->ops->setfromoptions      = NULL;
366:   pc->ops->view                = PCView_VPBJacobi;
367:   pc->ops->applyrichardson     = NULL;
368:   pc->ops->applysymmetricleft  = NULL;
369:   pc->ops->applysymmetricright = NULL;
370:   PetscFunctionReturn(PETSC_SUCCESS);
371: }