Actual source code: pbjacobi.c


  2: /*
  3:    Include files needed for the PBJacobi preconditioner:
  4:      pcimpl.h - private include file intended for use by all preconditioners
  5: */

  7: #include <petsc/private/pcimpl.h>

  9: /*
 10:    Private context (data structure) for the PBJacobi preconditioner.
 11: */
 12: typedef struct {
 13:   const MatScalar *diag;
 14:   PetscInt        bs,mbs;
 15: } PC_PBJacobi;

 17: static PetscErrorCode PCApply_PBJacobi_1(PC pc,Vec x,Vec y)
 18: {
 19:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
 20:   PetscErrorCode    ierr;
 21:   PetscInt          i,m = jac->mbs;
 22:   const MatScalar   *diag = jac->diag;
 23:   const PetscScalar *xx;
 24:   PetscScalar       *yy;

 27:   VecGetArrayRead(x,&xx);
 28:   VecGetArray(y,&yy);
 29:   for (i=0; i<m; i++) yy[i] = diag[i]*xx[i];
 30:   VecRestoreArrayRead(x,&xx);
 31:   VecRestoreArray(y,&yy);
 32:   PetscLogFlops(m);
 33:   return(0);
 34: }

 36: static PetscErrorCode PCApply_PBJacobi_2(PC pc,Vec x,Vec y)
 37: {
 38:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 39:   PetscErrorCode  ierr;
 40:   PetscInt        i,m = jac->mbs;
 41:   const MatScalar *diag = jac->diag;
 42:   PetscScalar     x0,x1,*yy;
 43:   const PetscScalar *xx;

 46:   VecGetArrayRead(x,&xx);
 47:   VecGetArray(y,&yy);
 48:   for (i=0; i<m; i++) {
 49:     x0        = xx[2*i]; x1 = xx[2*i+1];
 50:     yy[2*i]   = diag[0]*x0 + diag[2]*x1;
 51:     yy[2*i+1] = diag[1]*x0 + diag[3]*x1;
 52:     diag     += 4;
 53:   }
 54:   VecRestoreArrayRead(x,&xx);
 55:   VecRestoreArray(y,&yy);
 56:   PetscLogFlops(6.0*m);
 57:   return(0);
 58: }
 59: static PetscErrorCode PCApply_PBJacobi_3(PC pc,Vec x,Vec y)
 60: {
 61:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 62:   PetscErrorCode  ierr;
 63:   PetscInt        i,m = jac->mbs;
 64:   const MatScalar *diag = jac->diag;
 65:   PetscScalar     x0,x1,x2,*yy;
 66:   const PetscScalar *xx;

 69:   VecGetArrayRead(x,&xx);
 70:   VecGetArray(y,&yy);
 71:   for (i=0; i<m; i++) {
 72:     x0 = xx[3*i]; x1 = xx[3*i+1]; x2 = xx[3*i+2];

 74:     yy[3*i]   = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
 75:     yy[3*i+1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
 76:     yy[3*i+2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
 77:     diag     += 9;
 78:   }
 79:   VecRestoreArrayRead(x,&xx);
 80:   VecRestoreArray(y,&yy);
 81:   PetscLogFlops(15.0*m);
 82:   return(0);
 83: }
 84: static PetscErrorCode PCApply_PBJacobi_4(PC pc,Vec x,Vec y)
 85: {
 86:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
 87:   PetscErrorCode  ierr;
 88:   PetscInt        i,m = jac->mbs;
 89:   const MatScalar *diag = jac->diag;
 90:   PetscScalar     x0,x1,x2,x3,*yy;
 91:   const PetscScalar *xx;

 94:   VecGetArrayRead(x,&xx);
 95:   VecGetArray(y,&yy);
 96:   for (i=0; i<m; i++) {
 97:     x0 = xx[4*i]; x1 = xx[4*i+1]; x2 = xx[4*i+2]; x3 = xx[4*i+3];

 99:     yy[4*i]   = diag[0]*x0 + diag[4]*x1 + diag[8]*x2  + diag[12]*x3;
100:     yy[4*i+1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2  + diag[13]*x3;
101:     yy[4*i+2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2 + diag[14]*x3;
102:     yy[4*i+3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2 + diag[15]*x3;
103:     diag     += 16;
104:   }
105:   VecRestoreArrayRead(x,&xx);
106:   VecRestoreArray(y,&yy);
107:   PetscLogFlops(28.0*m);
108:   return(0);
109: }
110: static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y)
111: {
112:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
113:   PetscErrorCode  ierr;
114:   PetscInt        i,m = jac->mbs;
115:   const MatScalar *diag = jac->diag;
116:   PetscScalar     x0,x1,x2,x3,x4,*yy;
117:   const PetscScalar *xx;

120:   VecGetArrayRead(x,&xx);
121:   VecGetArray(y,&yy);
122:   for (i=0; i<m; i++) {
123:     x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4];

125:     yy[5*i]   = diag[0]*x0 + diag[5]*x1 + diag[10]*x2  + diag[15]*x3 + diag[20]*x4;
126:     yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2  + diag[16]*x3 + diag[21]*x4;
127:     yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
128:     yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
129:     yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
130:     diag     += 25;
131:   }
132:   VecRestoreArrayRead(x,&xx);
133:   VecRestoreArray(y,&yy);
134:   PetscLogFlops(45.0*m);
135:   return(0);
136: }
137: static PetscErrorCode PCApply_PBJacobi_6(PC pc,Vec x,Vec y)
138: {
139:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
140:   PetscErrorCode  ierr;
141:   PetscInt        i,m = jac->mbs;
142:   const MatScalar *diag = jac->diag;
143:   PetscScalar     x0,x1,x2,x3,x4,x5,*yy;
144:   const PetscScalar *xx;

147:   VecGetArrayRead(x,&xx);
148:   VecGetArray(y,&yy);
149:   for (i=0; i<m; i++) {
150:     x0 = xx[6*i]; x1 = xx[6*i+1]; x2 = xx[6*i+2]; x3 = xx[6*i+3]; x4 = xx[6*i+4]; x5 = xx[6*i+5];

152:     yy[6*i]   = diag[0]*x0 + diag[6]*x1  + diag[12]*x2  + diag[18]*x3 + diag[24]*x4 + diag[30]*x5;
153:     yy[6*i+1] = diag[1]*x0 + diag[7]*x1  + diag[13]*x2  + diag[19]*x3 + diag[25]*x4 + diag[31]*x5;
154:     yy[6*i+2] = diag[2]*x0 + diag[8]*x1  + diag[14]*x2  + diag[20]*x3 + diag[26]*x4 + diag[32]*x5;
155:     yy[6*i+3] = diag[3]*x0 + diag[9]*x1  + diag[15]*x2  + diag[21]*x3 + diag[27]*x4 + diag[33]*x5;
156:     yy[6*i+4] = diag[4]*x0 + diag[10]*x1 + diag[16]*x2  + diag[22]*x3 + diag[28]*x4 + diag[34]*x5;
157:     yy[6*i+5] = diag[5]*x0 + diag[11]*x1 + diag[17]*x2  + diag[23]*x3 + diag[29]*x4 + diag[35]*x5;
158:     diag     += 36;
159:   }
160:   VecRestoreArrayRead(x,&xx);
161:   VecRestoreArray(y,&yy);
162:   PetscLogFlops(66.0*m);
163:   return(0);
164: }
165: static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y)
166: {
167:   PC_PBJacobi     *jac = (PC_PBJacobi*)pc->data;
168:   PetscErrorCode  ierr;
169:   PetscInt        i,m = jac->mbs;
170:   const MatScalar *diag = jac->diag;
171:   PetscScalar     x0,x1,x2,x3,x4,x5,x6,*yy;
172:   const PetscScalar *xx;

175:   VecGetArrayRead(x,&xx);
176:   VecGetArray(y,&yy);
177:   for (i=0; i<m; i++) {
178:     x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6];

180:     yy[7*i]   = diag[0]*x0 + diag[7]*x1  + diag[14]*x2  + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6;
181:     yy[7*i+1] = diag[1]*x0 + diag[8]*x1  + diag[15]*x2  + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6;
182:     yy[7*i+2] = diag[2]*x0 + diag[9]*x1  + diag[16]*x2  + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6;
183:     yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2  + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6;
184:     yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2  + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6;
185:     yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2  + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6;
186:     yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2  + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6;
187:     diag     += 49;
188:   }
189:   VecRestoreArrayRead(x,&xx);
190:   VecRestoreArray(y,&yy);
191:   PetscLogFlops(91.0*m); /* 2*bs2 - bs */
192:   return(0);
193: }
194: static PetscErrorCode PCApply_PBJacobi_N(PC pc,Vec x,Vec y)
195: {
196:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
197:   PetscErrorCode    ierr;
198:   PetscInt          i,ib,jb;
199:   const PetscInt    m = jac->mbs;
200:   const PetscInt    bs = jac->bs;
201:   const MatScalar   *diag = jac->diag;
202:   PetscScalar       *yy;
203:   const PetscScalar *xx;

206:   VecGetArrayRead(x,&xx);
207:   VecGetArray(y,&yy);
208:   for (i=0; i<m; i++) {
209:     for (ib=0; ib<bs; ib++) {
210:       PetscScalar rowsum = 0;
211:       for (jb=0; jb<bs; jb++) {
212:         rowsum += diag[ib+jb*bs] * xx[bs*i+jb];
213:       }
214:       yy[bs*i+ib] = rowsum;
215:     }
216:     diag += bs*bs;
217:   }
218:   VecRestoreArrayRead(x,&xx);
219:   VecRestoreArray(y,&yy);
220:   PetscLogFlops((2.0*bs*bs-bs)*m); /* 2*bs2 - bs */
221:   return(0);
222: }

224: static PetscErrorCode PCApplyTranspose_PBJacobi_N(PC pc,Vec x,Vec y)
225: {
226:   PC_PBJacobi       *jac = (PC_PBJacobi*)pc->data;
227:   PetscErrorCode    ierr;
228:   PetscInt          i,j,k,m = jac->mbs,bs=jac->bs;
229:   const MatScalar   *diag = jac->diag;
230:   const PetscScalar *xx;
231:   PetscScalar       *yy;

234:   VecGetArrayRead(x,&xx);
235:   VecGetArray(y,&yy);
236:   for (i=0; i<m; i++) {
237:     for (j=0; j<bs; j++) yy[i*bs+j] = 0.;
238:     for (j=0; j<bs; j++) {
239:       for (k=0; k<bs; k++) {
240:         yy[i*bs+k] += diag[k*bs+j]*xx[i*bs+j];
241:       }
242:     }
243:     diag += bs*bs;
244:   }
245:   VecRestoreArrayRead(x,&xx);
246:   VecRestoreArray(y,&yy);
247:   PetscLogFlops(m*bs*(2*bs-1));
248:   return(0);
249: }

251: /* -------------------------------------------------------------------------- */
252: static PetscErrorCode PCSetUp_PBJacobi(PC pc)
253: {
254:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
256:   Mat            A = pc->pmat;
257:   MatFactorError err;
258:   PetscInt       nlocal;

261:   MatInvertBlockDiagonal(A,&jac->diag);
262:   MatFactorGetError(A,&err);
263:   if (err) pc->failedreason = (PCFailedReason)err;

265:   MatGetBlockSize(A,&jac->bs);
266:   MatGetLocalSize(A,&nlocal,NULL);
267:   jac->mbs = nlocal/jac->bs;
268:   switch (jac->bs) {
269:   case 1:
270:     pc->ops->apply = PCApply_PBJacobi_1;
271:     break;
272:   case 2:
273:     pc->ops->apply = PCApply_PBJacobi_2;
274:     break;
275:   case 3:
276:     pc->ops->apply = PCApply_PBJacobi_3;
277:     break;
278:   case 4:
279:     pc->ops->apply = PCApply_PBJacobi_4;
280:     break;
281:   case 5:
282:     pc->ops->apply = PCApply_PBJacobi_5;
283:     break;
284:   case 6:
285:     pc->ops->apply = PCApply_PBJacobi_6;
286:     break;
287:   case 7:
288:     pc->ops->apply = PCApply_PBJacobi_7;
289:     break;
290:   default:
291:     pc->ops->apply = PCApply_PBJacobi_N;
292:     break;
293:   }
294:   pc->ops->applytranspose = PCApplyTranspose_PBJacobi_N;
295:   return(0);
296: }
297: /* -------------------------------------------------------------------------- */
298: static PetscErrorCode PCDestroy_PBJacobi(PC pc)
299: {

303:   /*
304:       Free the private data structure that was hanging off the PC
305:   */
306:   PetscFree(pc->data);
307:   return(0);
308: }

310: static PetscErrorCode PCView_PBJacobi(PC pc,PetscViewer viewer)
311: {
313:   PC_PBJacobi    *jac = (PC_PBJacobi*)pc->data;
314:   PetscBool      iascii;

317:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
318:   if (iascii) {
319:     PetscViewerASCIIPrintf(viewer,"  point-block size %D\n",jac->bs);
320:   }
321:   return(0);
322: }

324: /* -------------------------------------------------------------------------- */
325: /*MC
326:      PCPBJACOBI - Point block Jacobi preconditioner

328:    Notes:
329:     See PCJACOBI for point Jacobi preconditioning

331:    This works for AIJ and BAIJ matrices and uses the blocksize provided to the matrix

333:    Uses dense LU factorization with partial pivoting to invert the blocks; if a zero pivot
334:    is detected a PETSc error is generated.

336:    Developer Notes:
337:     This should support the PCSetErrorIfFailure() flag set to PETSC_TRUE to allow
338:    the factorization to continue even after a zero pivot is found resulting in a Nan and hence
339:    terminating KSP with a KSP_DIVERGED_NANORIF allowing
340:    a nonlinear solver/ODE integrator to recover without stopping the program as currently happens.

342:    Developer Note: Perhaps should provide an option that allows generation of a valid preconditioner
343:    even if a block is singular as the PCJACOBI does.

345:    Level: beginner

347: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC, PCJACOBI

349: M*/

351: PETSC_EXTERN PetscErrorCode PCCreate_PBJacobi(PC pc)
352: {
353:   PC_PBJacobi    *jac;

357:   /*
358:      Creates the private data structure for this preconditioner and
359:      attach it to the PC object.
360:   */
361:   PetscNewLog(pc,&jac);
362:   pc->data = (void*)jac;

364:   /*
365:      Initialize the pointers to vectors to ZERO; these will be used to store
366:      diagonal entries of the matrix for fast preconditioner application.
367:   */
368:   jac->diag = NULL;

370:   /*
371:       Set the pointers for the functions that are provided above.
372:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
373:       are called, they will automatically call these functions.  Note we
374:       choose not to provide a couple of these functions since they are
375:       not needed.
376:   */
377:   pc->ops->apply               = NULL; /*set depending on the block size */
378:   pc->ops->applytranspose      = NULL;
379:   pc->ops->setup               = PCSetUp_PBJacobi;
380:   pc->ops->destroy             = PCDestroy_PBJacobi;
381:   pc->ops->setfromoptions      = NULL;
382:   pc->ops->view                = PCView_PBJacobi;
383:   pc->ops->applyrichardson     = NULL;
384:   pc->ops->applysymmetricleft  = NULL;
385:   pc->ops->applysymmetricright = NULL;
386:   return(0);
387: }