Actual source code: baijfact.c

  1: /*
  2:     Factorization code for BAIJ format.
  3: */
  4: #include <../src/mat/impls/baij/seq/baij.h>
  5: #include <petsc/private/kernels/blockinvert.h>

  7: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat B, Mat A, const MatFactorInfo *info)
  8: {
  9:   Mat             C = B;
 10:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
 11:   IS              isrow = b->row, isicol = b->icol;
 12:   const PetscInt *r, *ic;
 13:   const PetscInt  n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
 14:   const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
 15:   MatScalar      *rtmp, *pc, *mwork, *pv;
 16:   MatScalar      *aa             = a->a, *v;
 17:   PetscReal       shift          = info->shiftamount;
 18:   const PetscBool allowzeropivot = PetscNot(A->erroriffailure);

 20:   PetscFunctionBegin;
 21:   PetscCall(ISGetIndices(isrow, &r));
 22:   PetscCall(ISGetIndices(isicol, &ic));

 24:   /* generate work space needed by the factorization */
 25:   PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
 26:   PetscCall(PetscArrayzero(rtmp, bs2 * n));

 28:   for (PetscInt i = 0; i < n; i++) {
 29:     /* zero rtmp */
 30:     /* L part */
 31:     PetscInt *pj;
 32:     PetscInt  nzL, nz = bi[i + 1] - bi[i];
 33:     bjtmp = bj + bi[i];
 34:     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));

 36:     /* U part */
 37:     nz    = bdiag[i] - bdiag[i + 1];
 38:     bjtmp = bj + bdiag[i + 1] + 1;
 39:     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));

 41:     /* load in initial (unfactored row) */
 42:     nz    = ai[r[i] + 1] - ai[r[i]];
 43:     ajtmp = aj + ai[r[i]];
 44:     v     = aa + bs2 * ai[r[i]];
 45:     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ic[ajtmp[j]], v + bs2 * j, bs2));

 47:     /* elimination */
 48:     bjtmp = bj + bi[i];
 49:     nzL   = bi[i + 1] - bi[i];
 50:     for (PetscInt k = 0; k < nzL; k++) {
 51:       PetscBool flg = PETSC_FALSE;
 52:       PetscInt  row = bjtmp[k];

 54:       pc = rtmp + bs2 * row;
 55:       for (PetscInt j = 0; j < bs2; j++) {
 56:         if (pc[j] != (PetscScalar)0.0) {
 57:           flg = PETSC_TRUE;
 58:           break;
 59:         }
 60:       }
 61:       if (flg) {
 62:         pv = b->a + bs2 * bdiag[row];
 63:         /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
 64:         PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));

 66:         pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
 67:         pv = b->a + bs2 * (bdiag[row + 1] + 1);
 68:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries inU(row,:), excluding diag */
 69:         for (PetscInt j = 0; j < nz; j++) {
 70:           /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
 71:           /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
 72:           v = rtmp + 4 * pj[j];
 73:           PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
 74:           pv += 4;
 75:         }
 76:         PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
 77:       }
 78:     }

 80:     /* finished row so stick it into b->a */
 81:     /* L part */
 82:     pv = b->a + bs2 * bi[i];
 83:     pj = b->j + bi[i];
 84:     nz = bi[i + 1] - bi[i];
 85:     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));

 87:     /* Mark diagonal and invert diagonal for simpler triangular solves */
 88:     pv = b->a + bs2 * bdiag[i];
 89:     pj = b->j + bdiag[i];
 90:     PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
 91:     {
 92:       PetscBool zeropivotdetected;

 94:       PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
 95:       if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
 96:     }

 98:     /* U part */
 99:     pv = b->a + bs2 * (bdiag[i + 1] + 1);
100:     pj = b->j + bdiag[i + 1] + 1;
101:     nz = bdiag[i] - bdiag[i + 1] - 1;
102:     for (PetscInt j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
103:   }

105:   PetscCall(PetscFree2(rtmp, mwork));
106:   PetscCall(ISRestoreIndices(isicol, &ic));
107:   PetscCall(ISRestoreIndices(isrow, &r));

109:   C->ops->solve          = MatSolve_SeqBAIJ_2;
110:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2;
111:   C->assembled           = PETSC_TRUE;

113:   PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
114:   PetscFunctionReturn(PETSC_SUCCESS);
115: }

117: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
118: {
119:   Mat             C = B;
120:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
121:   PetscInt        i, j, k, nz, nzL, row, *pj;
122:   const PetscInt  n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, bs2 = a->bs2;
123:   const PetscInt *ajtmp, *bjtmp, *bdiag = b->diag;
124:   MatScalar      *rtmp, *pc, *mwork, *pv;
125:   MatScalar      *aa = a->a, *v;
126:   PetscInt        flg;
127:   PetscReal       shift = info->shiftamount;
128:   PetscBool       allowzeropivot, zeropivotdetected;

130:   PetscFunctionBegin;
131:   allowzeropivot = PetscNot(A->erroriffailure);

133:   /* generate work space needed by the factorization */
134:   PetscCall(PetscMalloc2(bs2 * n, &rtmp, bs2, &mwork));
135:   PetscCall(PetscArrayzero(rtmp, bs2 * n));

137:   for (i = 0; i < n; i++) {
138:     /* zero rtmp */
139:     /* L part */
140:     nz    = bi[i + 1] - bi[i];
141:     bjtmp = bj + bi[i];
142:     for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));

144:     /* U part */
145:     nz    = bdiag[i] - bdiag[i + 1];
146:     bjtmp = bj + bdiag[i + 1] + 1;
147:     for (j = 0; j < nz; j++) PetscCall(PetscArrayzero(rtmp + bs2 * bjtmp[j], bs2));

149:     /* load in initial (unfactored row) */
150:     nz    = ai[i + 1] - ai[i];
151:     ajtmp = aj + ai[i];
152:     v     = aa + bs2 * ai[i];
153:     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(rtmp + bs2 * ajtmp[j], v + bs2 * j, bs2));

155:     /* elimination */
156:     bjtmp = bj + bi[i];
157:     nzL   = bi[i + 1] - bi[i];
158:     for (k = 0; k < nzL; k++) {
159:       row = bjtmp[k];
160:       pc  = rtmp + bs2 * row;
161:       for (flg = 0, j = 0; j < bs2; j++) {
162:         if (pc[j] != (PetscScalar)0.0) {
163:           flg = 1;
164:           break;
165:         }
166:       }
167:       if (flg) {
168:         pv = b->a + bs2 * bdiag[row];
169:         /* PetscKernel_A_gets_A_times_B(bs,pc,pv,mwork); *pc = *pc * (*pv); */
170:         PetscCall(PetscKernel_A_gets_A_times_B_2(pc, pv, mwork));

172:         pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
173:         pv = b->a + bs2 * (bdiag[row + 1] + 1);
174:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
175:         for (j = 0; j < nz; j++) {
176:           /* PetscKernel_A_gets_A_minus_B_times_C(bs,rtmp+bs2*pj[j],pc,pv+bs2*j); */
177:           /* rtmp+bs2*pj[j] = rtmp+bs2*pj[j] - (*pc)*(pv+bs2*j) */
178:           v = rtmp + 4 * pj[j];
179:           PetscCall(PetscKernel_A_gets_A_minus_B_times_C_2(v, pc, pv));
180:           pv += 4;
181:         }
182:         PetscCall(PetscLogFlops(16.0 * nz + 12)); /* flops = 2*bs^3*nz + 2*bs^3 - bs2) */
183:       }
184:     }

186:     /* finished row so stick it into b->a */
187:     /* L part */
188:     pv = b->a + bs2 * bi[i];
189:     pj = b->j + bi[i];
190:     nz = bi[i + 1] - bi[i];
191:     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));

193:     /* Mark diagonal and invert diagonal for simpler triangular solves */
194:     pv = b->a + bs2 * bdiag[i];
195:     pj = b->j + bdiag[i];
196:     PetscCall(PetscArraycpy(pv, rtmp + bs2 * pj[0], bs2));
197:     PetscCall(PetscKernel_A_gets_inverse_A_2(pv, shift, allowzeropivot, &zeropivotdetected));
198:     if (zeropivotdetected) B->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

200:     /* U part */
201:     /*
202:     pv = b->a + bs2*bi[2*n-i];
203:     pj = b->j + bi[2*n-i];
204:     nz = bi[2*n-i+1] - bi[2*n-i] - 1;
205:     */
206:     pv = b->a + bs2 * (bdiag[i + 1] + 1);
207:     pj = b->j + bdiag[i + 1] + 1;
208:     nz = bdiag[i] - bdiag[i + 1] - 1;
209:     for (j = 0; j < nz; j++) PetscCall(PetscArraycpy(pv + bs2 * j, rtmp + bs2 * pj[j], bs2));
210:   }
211:   PetscCall(PetscFree2(rtmp, mwork));

213:   C->ops->solve          = MatSolve_SeqBAIJ_2_NaturalOrdering;
214:   C->ops->forwardsolve   = MatForwardSolve_SeqBAIJ_2_NaturalOrdering;
215:   C->ops->backwardsolve  = MatBackwardSolve_SeqBAIJ_2_NaturalOrdering;
216:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering;
217:   C->assembled           = PETSC_TRUE;

219:   PetscCall(PetscLogFlops(1.333333333333 * 2 * 2 * 2 * n)); /* from inverting diagonal blocks */
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_inplace(Mat B, Mat A, const MatFactorInfo *info)
224: {
225:   Mat             C = B;
226:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
227:   IS              isrow = b->row, isicol = b->icol;
228:   const PetscInt *r, *ic;
229:   PetscInt        i, j, n = a->mbs, *bi = b->i, *bj = b->j;
230:   PetscInt       *ajtmpold, *ajtmp, nz, row;
231:   PetscInt        idx, *ai = a->i, *aj = a->j, *pj;
232:   const PetscInt *diag_offset;
233:   MatScalar      *pv, *v, *rtmp, m1, m2, m3, m4, *pc, *w, *x, x1, x2, x3, x4;
234:   MatScalar       p1, p2, p3, p4;
235:   MatScalar      *ba = b->a, *aa = a->a;
236:   PetscReal       shift = info->shiftamount;
237:   PetscBool       allowzeropivot, zeropivotdetected;

239:   PetscFunctionBegin;
240:   /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
241:   A->factortype = MAT_FACTOR_NONE;
242:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
243:   A->factortype  = MAT_FACTOR_ILU;
244:   allowzeropivot = PetscNot(A->erroriffailure);
245:   PetscCall(ISGetIndices(isrow, &r));
246:   PetscCall(ISGetIndices(isicol, &ic));
247:   PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));

249:   for (i = 0; i < n; i++) {
250:     nz    = bi[i + 1] - bi[i];
251:     ajtmp = bj + bi[i];
252:     for (j = 0; j < nz; j++) {
253:       x    = rtmp + 4 * ajtmp[j];
254:       x[0] = x[1] = x[2] = x[3] = 0.0;
255:     }
256:     /* load in initial (unfactored row) */
257:     idx      = r[i];
258:     nz       = ai[idx + 1] - ai[idx];
259:     ajtmpold = aj + ai[idx];
260:     v        = aa + 4 * ai[idx];
261:     for (j = 0; j < nz; j++) {
262:       x    = rtmp + 4 * ic[ajtmpold[j]];
263:       x[0] = v[0];
264:       x[1] = v[1];
265:       x[2] = v[2];
266:       x[3] = v[3];
267:       v += 4;
268:     }
269:     row = *ajtmp++;
270:     while (row < i) {
271:       pc = rtmp + 4 * row;
272:       p1 = pc[0];
273:       p2 = pc[1];
274:       p3 = pc[2];
275:       p4 = pc[3];
276:       if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
277:         pv    = ba + 4 * diag_offset[row];
278:         pj    = bj + diag_offset[row] + 1;
279:         x1    = pv[0];
280:         x2    = pv[1];
281:         x3    = pv[2];
282:         x4    = pv[3];
283:         pc[0] = m1 = p1 * x1 + p3 * x2;
284:         pc[1] = m2 = p2 * x1 + p4 * x2;
285:         pc[2] = m3 = p1 * x3 + p3 * x4;
286:         pc[3] = m4 = p2 * x3 + p4 * x4;
287:         nz         = bi[row + 1] - diag_offset[row] - 1;
288:         pv += 4;
289:         for (j = 0; j < nz; j++) {
290:           x1 = pv[0];
291:           x2 = pv[1];
292:           x3 = pv[2];
293:           x4 = pv[3];
294:           x  = rtmp + 4 * pj[j];
295:           x[0] -= m1 * x1 + m3 * x2;
296:           x[1] -= m2 * x1 + m4 * x2;
297:           x[2] -= m1 * x3 + m3 * x4;
298:           x[3] -= m2 * x3 + m4 * x4;
299:           pv += 4;
300:         }
301:         PetscCall(PetscLogFlops(16.0 * nz + 12.0));
302:       }
303:       row = *ajtmp++;
304:     }
305:     /* finished row so stick it into b->a */
306:     pv = ba + 4 * bi[i];
307:     pj = bj + bi[i];
308:     nz = bi[i + 1] - bi[i];
309:     for (j = 0; j < nz; j++) {
310:       x     = rtmp + 4 * pj[j];
311:       pv[0] = x[0];
312:       pv[1] = x[1];
313:       pv[2] = x[2];
314:       pv[3] = x[3];
315:       pv += 4;
316:     }
317:     /* invert diagonal block */
318:     w = ba + 4 * diag_offset[i];
319:     PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
320:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
321:   }

323:   PetscCall(PetscFree(rtmp));
324:   PetscCall(ISRestoreIndices(isicol, &ic));
325:   PetscCall(ISRestoreIndices(isrow, &r));

327:   C->ops->solve          = MatSolve_SeqBAIJ_2_inplace;
328:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_inplace;
329:   C->assembled           = PETSC_TRUE;

331:   PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
332:   PetscFunctionReturn(PETSC_SUCCESS);
333: }
334: /*
335:       Version for when blocks are 2 by 2 Using natural ordering
336: */
337: PetscErrorCode MatILUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
338: {
339:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
340:   PetscInt        i, j, n = a->mbs, *bi = b->i, *bj = b->j;
341:   PetscInt       *ajtmpold, *ajtmp, nz, row;
342:   PetscInt       *ai = a->i, *aj = a->j, *pj;
343:   const PetscInt *diag_offset;
344:   MatScalar      *pv, *v, *rtmp, *pc, *w, *x;
345:   MatScalar       p1, p2, p3, p4, m1, m2, m3, m4, x1, x2, x3, x4;
346:   MatScalar      *ba = b->a, *aa = a->a;
347:   PetscReal       shift = info->shiftamount;
348:   PetscBool       allowzeropivot, zeropivotdetected;

350:   PetscFunctionBegin;
351:   /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
352:   A->factortype = MAT_FACTOR_NONE;
353:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
354:   A->factortype  = MAT_FACTOR_ILU;
355:   allowzeropivot = PetscNot(A->erroriffailure);
356:   PetscCall(PetscMalloc1(4 * (n + 1), &rtmp));
357:   for (i = 0; i < n; i++) {
358:     nz    = bi[i + 1] - bi[i];
359:     ajtmp = bj + bi[i];
360:     for (j = 0; j < nz; j++) {
361:       x    = rtmp + 4 * ajtmp[j];
362:       x[0] = x[1] = x[2] = x[3] = 0.0;
363:     }
364:     /* load in initial (unfactored row) */
365:     nz       = ai[i + 1] - ai[i];
366:     ajtmpold = aj + ai[i];
367:     v        = aa + 4 * ai[i];
368:     for (j = 0; j < nz; j++) {
369:       x    = rtmp + 4 * ajtmpold[j];
370:       x[0] = v[0];
371:       x[1] = v[1];
372:       x[2] = v[2];
373:       x[3] = v[3];
374:       v += 4;
375:     }
376:     row = *ajtmp++;
377:     while (row < i) {
378:       pc = rtmp + 4 * row;
379:       p1 = pc[0];
380:       p2 = pc[1];
381:       p3 = pc[2];
382:       p4 = pc[3];
383:       if (p1 != (PetscScalar)0.0 || p2 != (PetscScalar)0.0 || p3 != (PetscScalar)0.0 || p4 != (PetscScalar)0.0) {
384:         pv    = ba + 4 * diag_offset[row];
385:         pj    = bj + diag_offset[row] + 1;
386:         x1    = pv[0];
387:         x2    = pv[1];
388:         x3    = pv[2];
389:         x4    = pv[3];
390:         pc[0] = m1 = p1 * x1 + p3 * x2;
391:         pc[1] = m2 = p2 * x1 + p4 * x2;
392:         pc[2] = m3 = p1 * x3 + p3 * x4;
393:         pc[3] = m4 = p2 * x3 + p4 * x4;
394:         nz         = bi[row + 1] - diag_offset[row] - 1;
395:         pv += 4;
396:         for (j = 0; j < nz; j++) {
397:           x1 = pv[0];
398:           x2 = pv[1];
399:           x3 = pv[2];
400:           x4 = pv[3];
401:           x  = rtmp + 4 * pj[j];
402:           x[0] -= m1 * x1 + m3 * x2;
403:           x[1] -= m2 * x1 + m4 * x2;
404:           x[2] -= m1 * x3 + m3 * x4;
405:           x[3] -= m2 * x3 + m4 * x4;
406:           pv += 4;
407:         }
408:         PetscCall(PetscLogFlops(16.0 * nz + 12.0));
409:       }
410:       row = *ajtmp++;
411:     }
412:     /* finished row so stick it into b->a */
413:     pv = ba + 4 * bi[i];
414:     pj = bj + bi[i];
415:     nz = bi[i + 1] - bi[i];
416:     for (j = 0; j < nz; j++) {
417:       x     = rtmp + 4 * pj[j];
418:       pv[0] = x[0];
419:       pv[1] = x[1];
420:       pv[2] = x[2];
421:       pv[3] = x[3];
422:       /*
423:       printf(" col %d:",pj[j]);
424:       for (PetscInt j1=0; j1<4; j1++) printf(" %g,",*(pv+j1));
425:       printf("\n");
426:       */
427:       pv += 4;
428:     }
429:     /* invert diagonal block */
430:     w = ba + 4 * diag_offset[i];
431:     PetscCall(PetscKernel_A_gets_inverse_A_2(w, shift, allowzeropivot, &zeropivotdetected));
432:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
433:   }

435:   PetscCall(PetscFree(rtmp));

437:   C->ops->solve          = MatSolve_SeqBAIJ_2_NaturalOrdering_inplace;
438:   C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering_inplace;
439:   C->assembled           = PETSC_TRUE;

441:   PetscCall(PetscLogFlops(1.333333333333 * 8 * b->mbs)); /* from inverting diagonal blocks */
442:   PetscFunctionReturn(PETSC_SUCCESS);
443: }

445: /*
446:      Version for when blocks are 1 by 1.
447: */
448: PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat B, Mat A, const MatFactorInfo *info)
449: {
450:   Mat              C = B;
451:   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
452:   IS               isrow = b->row, isicol = b->icol;
453:   const PetscInt  *r, *ic, *ics;
454:   const PetscInt   n = a->mbs, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
455:   PetscInt         i, j, k, nz, nzL, row, *pj;
456:   const PetscInt  *ajtmp, *bjtmp;
457:   MatScalar       *rtmp, *pc, multiplier, *pv;
458:   const MatScalar *aa = a->a, *v;
459:   PetscBool        row_identity, col_identity;
460:   FactorShiftCtx   sctx;
461:   const PetscInt  *ddiag;
462:   PetscReal        rs;
463:   MatScalar        d;

465:   PetscFunctionBegin;
466:   /* MatPivotSetUp(): initialize shift context sctx */
467:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

469:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
470:     PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &ddiag, NULL));
471:     sctx.shift_top = info->zeropivot;
472:     for (i = 0; i < n; i++) {
473:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
474:       d  = (aa)[ddiag[i]];
475:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
476:       v  = aa + ai[i];
477:       nz = ai[i + 1] - ai[i];
478:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
479:       if (rs > sctx.shift_top) sctx.shift_top = rs;
480:     }
481:     sctx.shift_top *= 1.1;
482:     sctx.nshift_max = 5;
483:     sctx.shift_lo   = 0.;
484:     sctx.shift_hi   = 1.;
485:   }

487:   PetscCall(ISGetIndices(isrow, &r));
488:   PetscCall(ISGetIndices(isicol, &ic));
489:   PetscCall(PetscMalloc1(n + 1, &rtmp));
490:   ics = ic;

492:   do {
493:     sctx.newshift = PETSC_FALSE;
494:     for (i = 0; i < n; i++) {
495:       /* zero rtmp */
496:       /* L part */
497:       nz    = bi[i + 1] - bi[i];
498:       bjtmp = bj + bi[i];
499:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

501:       /* U part */
502:       nz    = bdiag[i] - bdiag[i + 1];
503:       bjtmp = bj + bdiag[i + 1] + 1;
504:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

506:       /* load in initial (unfactored row) */
507:       nz    = ai[r[i] + 1] - ai[r[i]];
508:       ajtmp = aj + ai[r[i]];
509:       v     = aa + ai[r[i]];
510:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];

512:       /* ZeropivotApply() */
513:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

515:       /* elimination */
516:       bjtmp = bj + bi[i];
517:       row   = *bjtmp++;
518:       nzL   = bi[i + 1] - bi[i];
519:       for (k = 0; k < nzL; k++) {
520:         pc = rtmp + row;
521:         if (*pc != (PetscScalar)0.0) {
522:           pv         = b->a + bdiag[row];
523:           multiplier = *pc * (*pv);
524:           *pc        = multiplier;

526:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
527:           pv = b->a + bdiag[row + 1] + 1;
528:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
529:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
530:           PetscCall(PetscLogFlops(2.0 * nz));
531:         }
532:         row = *bjtmp++;
533:       }

535:       /* finished row so stick it into b->a */
536:       rs = 0.0;
537:       /* L part */
538:       pv = b->a + bi[i];
539:       pj = b->j + bi[i];
540:       nz = bi[i + 1] - bi[i];
541:       for (j = 0; j < nz; j++) {
542:         pv[j] = rtmp[pj[j]];
543:         rs += PetscAbsScalar(pv[j]);
544:       }

546:       /* U part */
547:       pv = b->a + bdiag[i + 1] + 1;
548:       pj = b->j + bdiag[i + 1] + 1;
549:       nz = bdiag[i] - bdiag[i + 1] - 1;
550:       for (j = 0; j < nz; j++) {
551:         pv[j] = rtmp[pj[j]];
552:         rs += PetscAbsScalar(pv[j]);
553:       }

555:       sctx.rs = rs;
556:       sctx.pv = rtmp[i];
557:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
558:       if (sctx.newshift) break; /* break for-loop */
559:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

561:       /* Mark diagonal and invert diagonal for simpler triangular solves */
562:       pv  = b->a + bdiag[i];
563:       *pv = (PetscScalar)1.0 / rtmp[i];

565:     } /* endof for (i=0; i<n; i++) { */

567:     /* MatPivotRefine() */
568:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
569:       /*
570:        * if no shift in this attempt & shifting & started shifting & can refine,
571:        * then try lower shift
572:        */
573:       sctx.shift_hi       = sctx.shift_fraction;
574:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
575:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
576:       sctx.newshift       = PETSC_TRUE;
577:       sctx.nshift++;
578:     }
579:   } while (sctx.newshift);

581:   PetscCall(PetscFree(rtmp));
582:   PetscCall(ISRestoreIndices(isicol, &ic));
583:   PetscCall(ISRestoreIndices(isrow, &r));

585:   PetscCall(ISIdentity(isrow, &row_identity));
586:   PetscCall(ISIdentity(isicol, &col_identity));
587:   if (row_identity && col_identity) {
588:     C->ops->solve          = MatSolve_SeqBAIJ_1_NaturalOrdering;
589:     C->ops->forwardsolve   = MatForwardSolve_SeqBAIJ_1_NaturalOrdering;
590:     C->ops->backwardsolve  = MatBackwardSolve_SeqBAIJ_1_NaturalOrdering;
591:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering;
592:   } else {
593:     C->ops->solve          = MatSolve_SeqBAIJ_1;
594:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1;
595:   }
596:   C->assembled = PETSC_TRUE;
597:   PetscCall(PetscLogFlops(C->cmap->n));

599:   /* MatShiftView(A,info,&sctx) */
600:   if (sctx.nshift) {
601:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
602:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
603:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
604:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
605:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
606:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
607:     }
608:   }
609:   PetscFunctionReturn(PETSC_SUCCESS);
610: }

612: PetscErrorCode MatILUFactorNumeric_SeqBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
613: {
614:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)C->data;
615:   IS              isrow = b->row, isicol = b->icol;
616:   const PetscInt *r, *ic;
617:   PetscInt        i, j, n = a->mbs, *bi = b->i, *bj = b->j;
618:   PetscInt       *ajtmpold, *ajtmp, nz, row, *ai = a->i, *aj = a->j;
619:   PetscInt        diag, *pj;
620:   const PetscInt *diag_offset;
621:   MatScalar      *pv, *v, *rtmp, multiplier, *pc;
622:   MatScalar      *ba = b->a, *aa = a->a;
623:   PetscBool       row_identity, col_identity;

625:   PetscFunctionBegin;
626:   /* Since A is C and C is labeled as a factored matrix we need to lie to MatGetDiagonalMarkers_SeqBAIJ() to get it to compute the diagonals */
627:   A->factortype = MAT_FACTOR_NONE;
628:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &diag_offset, NULL));
629:   A->factortype = MAT_FACTOR_ILU;
630:   PetscCall(ISGetIndices(isrow, &r));
631:   PetscCall(ISGetIndices(isicol, &ic));
632:   PetscCall(PetscMalloc1(n + 1, &rtmp));

634:   for (i = 0; i < n; i++) {
635:     nz    = bi[i + 1] - bi[i];
636:     ajtmp = bj + bi[i];
637:     for (j = 0; j < nz; j++) rtmp[ajtmp[j]] = 0.0;

639:     /* load in initial (unfactored row) */
640:     nz       = ai[r[i] + 1] - ai[r[i]];
641:     ajtmpold = aj + ai[r[i]];
642:     v        = aa + ai[r[i]];
643:     for (j = 0; j < nz; j++) rtmp[ic[ajtmpold[j]]] = v[j];

645:     row = *ajtmp++;
646:     while (row < i) {
647:       pc = rtmp + row;
648:       if (*pc != 0.0) {
649:         pv         = ba + diag_offset[row];
650:         pj         = bj + diag_offset[row] + 1;
651:         multiplier = *pc * *pv++;
652:         *pc        = multiplier;
653:         nz         = bi[row + 1] - diag_offset[row] - 1;
654:         for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
655:         PetscCall(PetscLogFlops(1.0 + 2.0 * nz));
656:       }
657:       row = *ajtmp++;
658:     }
659:     /* finished row so stick it into b->a */
660:     pv = ba + bi[i];
661:     pj = bj + bi[i];
662:     nz = bi[i + 1] - bi[i];
663:     for (j = 0; j < nz; j++) pv[j] = rtmp[pj[j]];
664:     diag = diag_offset[i] - bi[i];
665:     /* check pivot entry for current row */
666:     PetscCheck(pv[diag] != 0.0, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
667:     pv[diag] = 1.0 / pv[diag];
668:   }

670:   PetscCall(PetscFree(rtmp));
671:   PetscCall(ISRestoreIndices(isicol, &ic));
672:   PetscCall(ISRestoreIndices(isrow, &r));
673:   PetscCall(ISIdentity(isrow, &row_identity));
674:   PetscCall(ISIdentity(isicol, &col_identity));
675:   if (row_identity && col_identity) {
676:     C->ops->solve          = MatSolve_SeqBAIJ_1_NaturalOrdering_inplace;
677:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering_inplace;
678:   } else {
679:     C->ops->solve          = MatSolve_SeqBAIJ_1_inplace;
680:     C->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_inplace;
681:   }
682:   C->assembled = PETSC_TRUE;
683:   PetscCall(PetscLogFlops(C->cmap->n));
684:   PetscFunctionReturn(PETSC_SUCCESS);
685: }

687: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
688: {
689:   PetscFunctionBegin;
690:   *type = MATSOLVERPETSC;
691:   PetscFunctionReturn(PETSC_SUCCESS);
692: }

694: PETSC_INTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat A, MatFactorType ftype, Mat *B)
695: {
696:   PetscInt n = A->rmap->n;

698:   PetscFunctionBegin;
699:   PetscCheck(!PetscDefined(USE_COMPLEX) || A->hermitian != PETSC_BOOL3_TRUE || !(ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC), PETSC_COMM_SELF, PETSC_ERR_SUP, "Hermitian Factor is not supported");
700:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
701:   PetscCall(MatSetSizes(*B, n, n, n, n));
702:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
703:     PetscCall(MatSetType(*B, MATSEQBAIJ));

705:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqBAIJ;
706:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqBAIJ;
707:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
708:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
709:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
710:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
711:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
712:     PetscCall(MatSeqSBAIJSetPreallocation(*B, A->rmap->bs, MAT_SKIP_ALLOCATION, NULL));

714:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqBAIJ;
715:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqBAIJ;
716:     /*  Future optimization would be direct symbolic and numerical factorization for BAIJ to support orderings and Cholesky, instead of first converting to SBAIJ */
717:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
718:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
719:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
720:   (*B)->factortype     = ftype;
721:   (*B)->canuseordering = PETSC_TRUE;

723:   PetscCall(PetscFree((*B)->solvertype));
724:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
725:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
726:   PetscFunctionReturn(PETSC_SUCCESS);
727: }

729: PetscErrorCode MatLUFactor_SeqBAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
730: {
731:   Mat C;

733:   PetscFunctionBegin;
734:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
735:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
736:   PetscCall(MatLUFactorNumeric(C, A, info));

738:   A->ops->solve          = C->ops->solve;
739:   A->ops->solvetranspose = C->ops->solvetranspose;

741:   PetscCall(MatHeaderMerge(A, &C));
742:   PetscFunctionReturn(PETSC_SUCCESS);
743: }

745: #include <../src/mat/impls/sbaij/seq/sbaij.h>
746: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
747: {
748:   Mat_SeqBAIJ    *a  = (Mat_SeqBAIJ *)A->data;
749:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
750:   IS              ip = b->row;
751:   const PetscInt *rip;
752:   PetscInt        i, j, mbs = a->mbs, bs = A->rmap->bs, *bi = b->i, *bj = b->j, *bcol;
753:   PetscInt       *ai = a->i, *aj = a->j;
754:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
755:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
756:   PetscReal       rs;
757:   FactorShiftCtx  sctx;

759:   PetscFunctionBegin;
760:   if (bs > 1) { /* convert A to a SBAIJ matrix and apply Cholesky factorization from it */
761:     if (!a->sbaijMat) {
762:       A->symmetric = PETSC_BOOL3_TRUE;
763:       if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
764:       PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
765:     }
766:     PetscCall(a->sbaijMat->ops->choleskyfactornumeric(C, a->sbaijMat, info));
767:     PetscCall(MatDestroy(&a->sbaijMat));
768:     PetscFunctionReturn(PETSC_SUCCESS);
769:   }

771:   /* MatPivotSetUp(): initialize shift context sctx */
772:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

774:   PetscCall(ISGetIndices(ip, &rip));
775:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

777:   sctx.shift_amount = 0.;
778:   sctx.nshift       = 0;
779:   do {
780:     sctx.newshift = PETSC_FALSE;
781:     for (i = 0; i < mbs; i++) {
782:       rtmp[i] = 0.0;
783:       jl[i]   = mbs;
784:       il[0]   = 0;
785:     }

787:     for (k = 0; k < mbs; k++) {
788:       bval = ba + bi[k];
789:       /* initialize k-th row by the perm[k]-th row of A */
790:       jmin = ai[rip[k]];
791:       jmax = ai[rip[k] + 1];
792:       for (j = jmin; j < jmax; j++) {
793:         col = rip[aj[j]];
794:         if (col >= k) { /* only take upper triangular entry */
795:           rtmp[col] = aa[j];
796:           *bval++   = 0.0; /* for in-place factorization */
797:         }
798:       }

800:       /* shift the diagonal of the matrix */
801:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

803:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
804:       dk = rtmp[k];
805:       i  = jl[k]; /* first row to be added to k_th row  */

807:       while (i < k) {
808:         nexti = jl[i]; /* next row to be added to k_th row */

810:         /* compute multiplier, update diag(k) and U(i,k) */
811:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
812:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
813:         dk += uikdi * ba[ili];
814:         ba[ili] = uikdi; /* -U(i,k) */

816:         /* add multiple of row i to k-th row */
817:         jmin = ili + 1;
818:         jmax = bi[i + 1];
819:         if (jmin < jmax) {
820:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
821:           /* update il and jl for row i */
822:           il[i] = jmin;
823:           j     = bj[jmin];
824:           jl[i] = jl[j];
825:           jl[j] = i;
826:         }
827:         i = nexti;
828:       }

830:       /* shift the diagonals when zero pivot is detected */
831:       /* compute rs=sum of abs(off-diagonal) */
832:       rs   = 0.0;
833:       jmin = bi[k] + 1;
834:       nz   = bi[k + 1] - jmin;
835:       if (nz) {
836:         bcol = bj + jmin;
837:         while (nz--) {
838:           rs += PetscAbsScalar(rtmp[*bcol]);
839:           bcol++;
840:         }
841:       }

843:       sctx.rs = rs;
844:       sctx.pv = dk;
845:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
846:       if (sctx.newshift) break;
847:       dk = sctx.pv;

849:       /* copy data into U(k,:) */
850:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
851:       jmin      = bi[k] + 1;
852:       jmax      = bi[k + 1];
853:       if (jmin < jmax) {
854:         for (j = jmin; j < jmax; j++) {
855:           col       = bj[j];
856:           ba[j]     = rtmp[col];
857:           rtmp[col] = 0.0;
858:         }
859:         /* add the k-th row into il and jl */
860:         il[k] = jmin;
861:         i     = bj[jmin];
862:         jl[k] = jl[i];
863:         jl[i] = k;
864:       }
865:     }
866:   } while (sctx.newshift);
867:   PetscCall(PetscFree3(rtmp, il, jl));

869:   PetscCall(ISRestoreIndices(ip, &rip));

871:   C->assembled    = PETSC_TRUE;
872:   C->preallocated = PETSC_TRUE;

874:   PetscCall(PetscLogFlops(C->rmap->N));
875:   if (sctx.nshift) {
876:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
877:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
878:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
879:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
880:     }
881:   }
882:   PetscFunctionReturn(PETSC_SUCCESS);
883: }

885: PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
886: {
887:   Mat_SeqBAIJ   *a = (Mat_SeqBAIJ *)A->data;
888:   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)C->data;
889:   PetscInt       i, j, am = a->mbs;
890:   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
891:   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
892:   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
893:   PetscReal      rs;
894:   FactorShiftCtx sctx;

896:   PetscFunctionBegin;
897:   /* MatPivotSetUp(): initialize shift context sctx */
898:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

900:   PetscCall(PetscMalloc3(am, &rtmp, am, &il, am, &jl));

902:   do {
903:     sctx.newshift = PETSC_FALSE;
904:     for (i = 0; i < am; i++) {
905:       rtmp[i] = 0.0;
906:       jl[i]   = am;
907:       il[0]   = 0;
908:     }

910:     for (k = 0; k < am; k++) {
911:       /* initialize k-th row with elements nonzero in row perm(k) of A */
912:       nz   = ai[k + 1] - ai[k];
913:       acol = aj + ai[k];
914:       aval = aa + ai[k];
915:       bval = ba + bi[k];
916:       while (nz--) {
917:         if (*acol < k) { /* skip lower triangular entries */
918:           acol++;
919:           aval++;
920:         } else {
921:           rtmp[*acol++] = *aval++;
922:           *bval++       = 0.0; /* for in-place factorization */
923:         }
924:       }

926:       /* shift the diagonal of the matrix */
927:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

929:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
930:       dk = rtmp[k];
931:       i  = jl[k]; /* first row to be added to k_th row  */

933:       while (i < k) {
934:         nexti = jl[i]; /* next row to be added to k_th row */
935:         /* compute multiplier, update D(k) and U(i,k) */
936:         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
937:         uikdi = -ba[ili] * ba[bi[i]];
938:         dk += uikdi * ba[ili];
939:         ba[ili] = uikdi; /* -U(i,k) */

941:         /* add multiple of row i to k-th row ... */
942:         jmin = ili + 1;
943:         nz   = bi[i + 1] - jmin;
944:         if (nz > 0) {
945:           bcol = bj + jmin;
946:           bval = ba + jmin;
947:           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
948:           /* update il and jl for i-th row */
949:           il[i] = jmin;
950:           j     = bj[jmin];
951:           jl[i] = jl[j];
952:           jl[j] = i;
953:         }
954:         i = nexti;
955:       }

957:       /* shift the diagonals when zero pivot is detected */
958:       /* compute rs=sum of abs(off-diagonal) */
959:       rs   = 0.0;
960:       jmin = bi[k] + 1;
961:       nz   = bi[k + 1] - jmin;
962:       if (nz) {
963:         bcol = bj + jmin;
964:         while (nz--) {
965:           rs += PetscAbsScalar(rtmp[*bcol]);
966:           bcol++;
967:         }
968:       }

970:       sctx.rs = rs;
971:       sctx.pv = dk;
972:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
973:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
974:       dk = sctx.pv;

976:       /* copy data into U(k,:) */
977:       ba[bi[k]] = 1.0 / dk;
978:       jmin      = bi[k] + 1;
979:       nz        = bi[k + 1] - jmin;
980:       if (nz) {
981:         bcol = bj + jmin;
982:         bval = ba + jmin;
983:         while (nz--) {
984:           *bval++       = rtmp[*bcol];
985:           rtmp[*bcol++] = 0.0;
986:         }
987:         /* add k-th row into il and jl */
988:         il[k] = jmin;
989:         i     = bj[jmin];
990:         jl[k] = jl[i];
991:         jl[i] = k;
992:       }
993:     }
994:   } while (sctx.newshift);
995:   PetscCall(PetscFree3(rtmp, il, jl));

997:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
998:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
999:   C->assembled           = PETSC_TRUE;
1000:   C->preallocated        = PETSC_TRUE;

1002:   PetscCall(PetscLogFlops(C->rmap->N));
1003:   if (sctx.nshift) {
1004:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1005:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1006:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1007:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1008:     }
1009:   }
1010:   PetscFunctionReturn(PETSC_SUCCESS);
1011: }

1013: #include <petscbt.h>
1014: #include <../src/mat/utils/freespace.h>
1015: PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1016: {
1017:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1018:   Mat_SeqSBAIJ      *b;
1019:   Mat                B;
1020:   PetscBool          perm_identity;
1021:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = a->mbs, bs = A->rmap->bs, *ui;
1022:   const PetscInt    *rip;
1023:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
1024:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, ncols, ncols_upper, *cols, *cols_lvl, *uj, **uj_ptr, **uj_lvl_ptr;
1025:   PetscReal          fill = info->fill, levels = info->levels;
1026:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1027:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1028:   PetscBT            lnkbt;
1029:   PetscBool          diagDense;
1030:   const PetscInt    *adiag;

1032:   PetscFunctionBegin;
1033:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, &diagDense));
1034:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");

1036:   if (bs > 1) {
1037:     if (!a->sbaijMat) {
1038:       A->symmetric = PETSC_BOOL3_TRUE;
1039:       if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
1040:       PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1041:     }
1042:     fact->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */

1044:     PetscCall(MatICCFactorSymbolic(fact, a->sbaijMat, perm, info));
1045:     PetscFunctionReturn(PETSC_SUCCESS);
1046:   }

1048:   PetscCall(ISIdentity(perm, &perm_identity));
1049:   PetscCall(ISGetIndices(perm, &rip));

1051:   /* special case that simply copies fill pattern */
1052:   if (!levels && perm_identity) {
1053:     PetscCall(PetscMalloc1(am + 1, &ui));
1054:     for (i = 0; i < am; i++) ui[i] = ai[i + 1] - adiag[i]; /* ui: rowlengths - changes when !perm_identity */
1055:     B = fact;
1056:     PetscCall(MatSeqSBAIJSetPreallocation(B, 1, 0, ui));

1058:     b  = (Mat_SeqSBAIJ *)B->data;
1059:     uj = b->j;
1060:     for (i = 0; i < am; i++) {
1061:       aj = a->j + adiag[i];
1062:       for (j = 0; j < ui[i]; j++) *uj++ = *aj++;
1063:       b->ilen[i] = ui[i];
1064:     }
1065:     PetscCall(PetscFree(ui));

1067:     B->factortype = MAT_FACTOR_NONE;

1069:     PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1070:     PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1071:     B->factortype = MAT_FACTOR_ICC;

1073:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1074:     PetscFunctionReturn(PETSC_SUCCESS);
1075:   }

1077:   /* initialization */
1078:   PetscCall(PetscMalloc1(am + 1, &ui));
1079:   ui[0] = 0;
1080:   PetscCall(PetscMalloc1(2 * am + 1, &cols_lvl));

1082:   /* jl: linked list for storing indices of the pivot rows
1083:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1084:   PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &il, am, &jl));
1085:   for (i = 0; i < am; i++) {
1086:     jl[i] = am;
1087:     il[i] = 0;
1088:   }

1090:   /* create and initialize a linked list for storing column indices of the active row k */
1091:   nlnk = am + 1;
1092:   PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

1094:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
1095:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space));

1097:   current_space = free_space;

1099:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[am] / 2, am / 2)), &free_space_lvl));
1100:   current_space_lvl = free_space_lvl;

1102:   for (k = 0; k < am; k++) { /* for each active row k */
1103:     /* initialize lnk by the column indices of row rip[k] of A */
1104:     nzk         = 0;
1105:     ncols       = ai[rip[k] + 1] - ai[rip[k]];
1106:     ncols_upper = 0;
1107:     cols        = cols_lvl + am;
1108:     for (j = 0; j < ncols; j++) {
1109:       i = rip[*(aj + ai[rip[k]] + j)];
1110:       if (i >= k) { /* only take upper triangular entry */
1111:         cols[ncols_upper]     = i;
1112:         cols_lvl[ncols_upper] = -1; /* initialize level for nonzero entries */
1113:         ncols_upper++;
1114:       }
1115:     }
1116:     PetscCall(PetscIncompleteLLAdd(ncols_upper, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
1117:     nzk += nlnk;

1119:     /* update lnk by computing fill-in for each pivot row to be merged in */
1120:     prow = jl[k]; /* 1st pivot row */

1122:     while (prow < k) {
1123:       nextprow = jl[prow];

1125:       /* merge prow into k-th row */
1126:       jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1127:       jmax  = ui[prow + 1];
1128:       ncols = jmax - jmin;
1129:       i     = jmin - ui[prow];
1130:       cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1131:       for (j = 0; j < ncols; j++) cols_lvl[j] = *(uj_lvl_ptr[prow] + i + j);
1132:       PetscCall(PetscIncompleteLLAddSorted(ncols, cols, levels, cols_lvl, am, &nlnk, lnk, lnk_lvl, lnkbt));
1133:       nzk += nlnk;

1135:       /* update il and jl for prow */
1136:       if (jmin < jmax) {
1137:         il[prow] = jmin;

1139:         j        = *cols;
1140:         jl[prow] = jl[j];
1141:         jl[j]    = prow;
1142:       }
1143:       prow = nextprow;
1144:     }

1146:     /* if free space is not available, make more free space */
1147:     if (current_space->local_remaining < nzk) {
1148:       i = am - k + 1;                                                             /* num of unfactored rows */
1149:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1150:       PetscCall(PetscFreeSpaceGet(i, &current_space));
1151:       PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
1152:       reallocs++;
1153:     }

1155:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1156:     PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

1158:     /* add the k-th row into il and jl */
1159:     if (nzk - 1 > 0) {
1160:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1161:       jl[k] = jl[i];
1162:       jl[i] = k;
1163:       il[k] = ui[k] + 1;
1164:     }
1165:     uj_ptr[k]     = current_space->array;
1166:     uj_lvl_ptr[k] = current_space_lvl->array;

1168:     current_space->array += nzk;
1169:     current_space->local_used += nzk;
1170:     current_space->local_remaining -= nzk;

1172:     current_space_lvl->array += nzk;
1173:     current_space_lvl->local_used += nzk;
1174:     current_space_lvl->local_remaining -= nzk;

1176:     ui[k + 1] = ui[k] + nzk;
1177:   }

1179:   PetscCall(ISRestoreIndices(perm, &rip));
1180:   PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, il, jl));
1181:   PetscCall(PetscFree(cols_lvl));

1183:   /* copy free_space into uj and free free_space; set uj in new datastructure; */
1184:   PetscCall(PetscMalloc1(ui[am] + 1, &uj));
1185:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
1186:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1187:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

1189:   /* put together the new matrix in MATSEQSBAIJ format */
1190:   B = fact;
1191:   PetscCall(MatSeqSBAIJSetPreallocation(B, 1, MAT_SKIP_ALLOCATION, NULL));

1193:   b          = (Mat_SeqSBAIJ *)B->data;
1194:   b->free_a  = PETSC_TRUE;
1195:   b->free_ij = PETSC_TRUE;

1197:   PetscCall(PetscMalloc1(ui[am] + 1, &b->a));

1199:   b->j             = uj;
1200:   b->i             = ui;
1201:   b->diag          = NULL;
1202:   b->ilen          = NULL;
1203:   b->imax          = NULL;
1204:   b->row           = perm;
1205:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

1207:   PetscCall(PetscObjectReference((PetscObject)perm));

1209:   b->icol = perm;

1211:   PetscCall(PetscObjectReference((PetscObject)perm));
1212:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

1214:   b->maxnz = b->nz = ui[am];

1216:   B->info.factor_mallocs   = reallocs;
1217:   B->info.fill_ratio_given = fill;
1218:   if (ai[am] != 0.) {
1219:     /* nonzeros in lower triangular part of A (including diagonals)= (ai[am]+am)/2 */
1220:     B->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
1221:   } else {
1222:     B->info.fill_ratio_needed = 0.0;
1223:   }
1224: #if defined(PETSC_USE_INFO)
1225:   if (ai[am] != 0) {
1226:     PetscReal af = B->info.fill_ratio_needed;
1227:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
1228:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
1229:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
1230:   } else {
1231:     PetscCall(PetscInfo(A, "Empty matrix\n"));
1232:   }
1233: #endif
1234:   if (perm_identity) {
1235:     B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1236:     B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1237:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1238:   } else {
1239:     fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
1240:   }
1241:   PetscFunctionReturn(PETSC_SUCCESS);
1242: }

1244: PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
1245: {
1246:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1247:   Mat_SeqSBAIJ      *b;
1248:   Mat                B;
1249:   PetscBool          perm_identity;
1250:   PetscReal          fill = info->fill;
1251:   const PetscInt    *rip;
1252:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, *ai = a->i, *aj = a->j, reallocs = 0, prow;
1253:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
1254:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
1255:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1256:   PetscBT            lnkbt;
1257:   PetscBool          diagDense;

1259:   PetscFunctionBegin;
1260:   if (bs > 1) { /* convert to seqsbaij */
1261:     if (!a->sbaijMat) {
1262:       A->symmetric = PETSC_BOOL3_TRUE;
1263:       if (!PetscDefined(USE_COMPLEX)) A->hermitian = PETSC_BOOL3_TRUE;
1264:       PetscCall(MatConvert(A, MATSEQSBAIJ, MAT_INITIAL_MATRIX, &a->sbaijMat));
1265:     }
1266:     fact->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqSBAIJ; /* undue the change made in MatGetFactor_seqbaij_petsc */

1268:     PetscCall(MatCholeskyFactorSymbolic(fact, a->sbaijMat, perm, info));
1269:     PetscFunctionReturn(PETSC_SUCCESS);
1270:   }
1271:   PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, NULL, &diagDense));
1272:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");

1274:   /* check whether perm is the identity mapping */
1275:   PetscCall(ISIdentity(perm, &perm_identity));
1276:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported");
1277:   PetscCall(ISGetIndices(perm, &rip));

1279:   /* initialization */
1280:   PetscCall(PetscMalloc1(mbs + 1, &ui));
1281:   ui[0] = 0;

1283:   /* jl: linked list for storing indices of the pivot rows
1284:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
1285:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
1286:   for (i = 0; i < mbs; i++) {
1287:     jl[i] = mbs;
1288:     il[i] = 0;
1289:   }

1291:   /* create and initialize a linked list for storing column indices of the active row k */
1292:   nlnk = mbs + 1;
1293:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

1295:   /* initial FreeSpace size is fill* (ai[mbs]+mbs)/2 */
1296:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, PetscIntSumTruncate(ai[mbs] / 2, mbs / 2)), &free_space));

1298:   current_space = free_space;

1300:   for (k = 0; k < mbs; k++) { /* for each active row k */
1301:     /* initialize lnk by the column indices of row rip[k] of A */
1302:     nzk         = 0;
1303:     ncols       = ai[rip[k] + 1] - ai[rip[k]];
1304:     ncols_upper = 0;
1305:     for (j = 0; j < ncols; j++) {
1306:       i = rip[*(aj + ai[rip[k]] + j)];
1307:       if (i >= k) { /* only take upper triangular entry */
1308:         cols[ncols_upper] = i;
1309:         ncols_upper++;
1310:       }
1311:     }
1312:     PetscCall(PetscLLAdd(ncols_upper, cols, mbs, &nlnk, lnk, lnkbt));
1313:     nzk += nlnk;

1315:     /* update lnk by computing fill-in for each pivot row to be merged in */
1316:     prow = jl[k]; /* 1st pivot row */

1318:     while (prow < k) {
1319:       nextprow = jl[prow];
1320:       /* merge prow into k-th row */
1321:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
1322:       jmax   = ui[prow + 1];
1323:       ncols  = jmax - jmin;
1324:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
1325:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
1326:       nzk += nlnk;

1328:       /* update il and jl for prow */
1329:       if (jmin < jmax) {
1330:         il[prow] = jmin;
1331:         j        = *uj_ptr;
1332:         jl[prow] = jl[j];
1333:         jl[j]    = prow;
1334:       }
1335:       prow = nextprow;
1336:     }

1338:     /* if free space is not available, make more free space */
1339:     if (current_space->local_remaining < nzk) {
1340:       i = mbs - k + 1;                                                            /* num of unfactored rows */
1341:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1342:       PetscCall(PetscFreeSpaceGet(i, &current_space));
1343:       reallocs++;
1344:     }

1346:     /* copy data into free space, then initialize lnk */
1347:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

1349:     /* add the k-th row into il and jl */
1350:     if (nzk - 1 > 0) {
1351:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
1352:       jl[k] = jl[i];
1353:       jl[i] = k;
1354:       il[k] = ui[k] + 1;
1355:     }
1356:     ui_ptr[k] = current_space->array;
1357:     current_space->array += nzk;
1358:     current_space->local_used += nzk;
1359:     current_space->local_remaining -= nzk;

1361:     ui[k + 1] = ui[k] + nzk;
1362:   }

1364:   PetscCall(ISRestoreIndices(perm, &rip));
1365:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

1367:   /* copy free_space into uj and free free_space; set uj in new datastructure; */
1368:   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
1369:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
1370:   PetscCall(PetscLLDestroy(lnk, lnkbt));

1372:   /* put together the new matrix in MATSEQSBAIJ format */
1373:   B = fact;
1374:   PetscCall(MatSeqSBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));

1376:   b          = (Mat_SeqSBAIJ *)B->data;
1377:   b->free_a  = PETSC_TRUE;
1378:   b->free_ij = PETSC_TRUE;

1380:   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));

1382:   b->j             = uj;
1383:   b->i             = ui;
1384:   b->diag          = NULL;
1385:   b->ilen          = NULL;
1386:   b->imax          = NULL;
1387:   b->row           = perm;
1388:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

1390:   PetscCall(PetscObjectReference((PetscObject)perm));
1391:   b->icol = perm;
1392:   PetscCall(PetscObjectReference((PetscObject)perm));
1393:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
1394:   b->maxnz = b->nz = ui[mbs];

1396:   B->info.factor_mallocs   = reallocs;
1397:   B->info.fill_ratio_given = fill;
1398:   if (ai[mbs] != 0.) {
1399:     /* nonzeros in lower triangular part of A = (ai[mbs]+mbs)/2 */
1400:     B->info.fill_ratio_needed = ((PetscReal)2 * ui[mbs]) / (ai[mbs] + mbs);
1401:   } else {
1402:     B->info.fill_ratio_needed = 0.0;
1403:   }
1404: #if defined(PETSC_USE_INFO)
1405:   if (ai[mbs] != 0.) {
1406:     PetscReal af = B->info.fill_ratio_needed;
1407:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
1408:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
1409:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
1410:   } else {
1411:     PetscCall(PetscInfo(A, "Empty matrix\n"));
1412:   }
1413: #endif
1414:   if (perm_identity) {
1415:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering;
1416:   } else {
1417:     B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqBAIJ_N;
1418:   }
1419:   PetscFunctionReturn(PETSC_SUCCESS);
1420: }

1422: PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering(Mat A, Vec bb, Vec xx)
1423: {
1424:   Mat_SeqBAIJ       *a  = (Mat_SeqBAIJ *)A->data;
1425:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
1426:   PetscInt           i, k, n                       = a->mbs;
1427:   PetscInt           nz, bs = A->rmap->bs, bs2 = a->bs2;
1428:   const MatScalar   *aa = a->a, *v;
1429:   PetscScalar       *x, *s, *t, *ls;
1430:   const PetscScalar *b;

1432:   PetscFunctionBegin;
1433:   PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
1434:   PetscCall(VecGetArrayRead(bb, &b));
1435:   PetscCall(VecGetArray(xx, &x));
1436:   t = a->solve_work;

1438:   /* forward solve the lower triangular */
1439:   PetscCall(PetscArraycpy(t, b, bs)); /* copy 1st block of b to t */

1441:   for (i = 1; i < n; i++) {
1442:     v  = aa + bs2 * ai[i];
1443:     vi = aj + ai[i];
1444:     nz = ai[i + 1] - ai[i];
1445:     s  = t + bs * i;
1446:     PetscCall(PetscArraycpy(s, b + bs * i, bs)); /* copy i_th block of b to t */
1447:     for (k = 0; k < nz; k++) {
1448:       PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[k]);
1449:       v += bs2;
1450:     }
1451:   }

1453:   /* backward solve the upper triangular */
1454:   ls = a->solve_work + A->cmap->n;
1455:   for (i = n - 1; i >= 0; i--) {
1456:     v  = aa + bs2 * (adiag[i + 1] + 1);
1457:     vi = aj + adiag[i + 1] + 1;
1458:     nz = adiag[i] - adiag[i + 1] - 1;
1459:     PetscCall(PetscArraycpy(ls, t + i * bs, bs));
1460:     for (k = 0; k < nz; k++) {
1461:       PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[k]);
1462:       v += bs2;
1463:     }
1464:     PetscKernel_w_gets_A_times_v(bs, ls, aa + bs2 * adiag[i], t + i * bs); /* *inv(diagonal[i]) */
1465:     PetscCall(PetscArraycpy(x + i * bs, t + i * bs, bs));
1466:   }

1468:   PetscCall(VecRestoreArrayRead(bb, &b));
1469:   PetscCall(VecRestoreArray(xx, &x));
1470:   PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
1471:   PetscFunctionReturn(PETSC_SUCCESS);
1472: }

1474: PetscErrorCode MatSolve_SeqBAIJ_N(Mat A, Vec bb, Vec xx)
1475: {
1476:   Mat_SeqBAIJ       *a     = (Mat_SeqBAIJ *)A->data;
1477:   IS                 iscol = a->col, isrow = a->row;
1478:   const PetscInt    *r, *c, *rout, *cout, *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
1479:   PetscInt           i, m, n = a->mbs;
1480:   PetscInt           nz, bs = A->rmap->bs, bs2 = a->bs2;
1481:   const MatScalar   *aa = a->a, *v;
1482:   PetscScalar       *x, *s, *t, *ls;
1483:   const PetscScalar *b;

1485:   PetscFunctionBegin;
1486:   PetscCheck(bs > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Expected bs %" PetscInt_FMT " > 0", bs);
1487:   PetscCall(VecGetArrayRead(bb, &b));
1488:   PetscCall(VecGetArray(xx, &x));
1489:   t = a->solve_work;

1491:   PetscCall(ISGetIndices(isrow, &rout));
1492:   r = rout;
1493:   PetscCall(ISGetIndices(iscol, &cout));
1494:   c = cout;

1496:   /* forward solve the lower triangular */
1497:   PetscCall(PetscArraycpy(t, b + bs * r[0], bs));
1498:   for (i = 1; i < n; i++) {
1499:     v  = aa + bs2 * ai[i];
1500:     vi = aj + ai[i];
1501:     nz = ai[i + 1] - ai[i];
1502:     s  = t + bs * i;
1503:     PetscCall(PetscArraycpy(s, b + bs * r[i], bs));
1504:     for (m = 0; m < nz; m++) {
1505:       PetscKernel_v_gets_v_minus_A_times_w(bs, s, v, t + bs * vi[m]);
1506:       v += bs2;
1507:     }
1508:   }

1510:   /* backward solve the upper triangular */
1511:   ls = a->solve_work + A->cmap->n;
1512:   for (i = n - 1; i >= 0; i--) {
1513:     v  = aa + bs2 * (adiag[i + 1] + 1);
1514:     vi = aj + adiag[i + 1] + 1;
1515:     nz = adiag[i] - adiag[i + 1] - 1;
1516:     PetscCall(PetscArraycpy(ls, t + i * bs, bs));
1517:     for (m = 0; m < nz; m++) {
1518:       PetscKernel_v_gets_v_minus_A_times_w(bs, ls, v, t + bs * vi[m]);
1519:       v += bs2;
1520:     }
1521:     PetscKernel_w_gets_A_times_v(bs, ls, v, t + i * bs); /* *inv(diagonal[i]) */
1522:     PetscCall(PetscArraycpy(x + bs * c[i], t + i * bs, bs));
1523:   }
1524:   PetscCall(ISRestoreIndices(isrow, &rout));
1525:   PetscCall(ISRestoreIndices(iscol, &cout));
1526:   PetscCall(VecRestoreArrayRead(bb, &b));
1527:   PetscCall(VecRestoreArray(xx, &x));
1528:   PetscCall(PetscLogFlops(2.0 * (a->bs2) * (a->nz) - A->rmap->bs * A->cmap->n));
1529:   PetscFunctionReturn(PETSC_SUCCESS);
1530: }