Actual source code: sbaijfact.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petsc/private/kernels/blockinvert.h>
  4: #include <petscis.h>

  6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
  7: {
  8:   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
  9:   MatScalar    *dd   = fact->a;
 10:   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;

 12:   PetscFunctionBegin;
 13:   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);

 15:   nneg_tmp = 0;
 16:   npos_tmp = 0;
 17:   if (fi) {
 18:     for (i = 0; i < mbs; i++) {
 19:       if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 20:       else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 21:       fi++;
 22:     }
 23:   } else {
 24:     for (i = 0; i < mbs; i++) {
 25:       if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++;
 26:       else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++;
 27:     }
 28:   }
 29:   if (nneg) *nneg = nneg_tmp;
 30:   if (npos) *npos = npos_tmp;
 31:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 32:   PetscFunctionReturn(PETSC_SUCCESS);
 33: }

 35: /*
 36:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 37:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 38: */
 39: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
 40: {
 41:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
 42:   const PetscInt *rip, *ai, *aj;
 43:   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
 44:   PetscInt        m, reallocs = 0, prow;
 45:   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
 46:   PetscReal       f = info->fill;
 47:   PetscBool       perm_identity;

 49:   PetscFunctionBegin;
 50:   /* check whether perm is the identity mapping */
 51:   PetscCall(ISIdentity(perm, &perm_identity));
 52:   PetscCall(ISGetIndices(perm, &rip));

 54:   if (perm_identity) { /* without permutation */
 55:     a->permute = PETSC_FALSE;

 57:     ai = a->i;
 58:     aj = a->j;
 59:   } else { /* non-trivial permutation */
 60:     a->permute = PETSC_TRUE;

 62:     PetscCall(MatReorderingSeqSBAIJ(A, perm));

 64:     ai = a->inew;
 65:     aj = a->jnew;
 66:   }

 68:   /* initialization */
 69:   PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu));
 70:   umax = (PetscInt)(f * ai[mbs] + 1);
 71:   umax += mbs + 1;
 72:   PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju));
 73:   iu[0] = mbs + 1;
 74:   juidx = mbs + 1; /* index for ju */
 75:   /* jl linked list for pivot row -- linked list for col index */
 76:   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
 77:   for (i = 0; i < mbs; i++) {
 78:     jl[i] = mbs;
 79:     q[i]  = 0;
 80:   }

 82:   /* for each row k */
 83:   for (k = 0; k < mbs; k++) {
 84:     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
 85:     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
 86:     q[k] = mbs;
 87:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 88:     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
 89:     jmax = ai[rip[k] + 1];
 90:     for (j = jmin; j < jmax; j++) {
 91:       vj = rip[aj[j]]; /* col. value */
 92:       if (vj > k) {
 93:         qm = k;
 94:         do {
 95:           m  = qm;
 96:           qm = q[m];
 97:         } while (qm < vj);
 98:         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
 99:         nzk++;
100:         q[m]  = vj;
101:         q[vj] = qm;
102:       } /* if (vj > k) */
103:     } /* for (j=jmin; j<jmax; j++) */

105:     /* modify nonzero structure of k-th row by computing fill-in
106:        for each row i to be merged in */
107:     prow = k;
108:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

110:     while (prow < k) {
111:       /* merge row prow into k-th row */
112:       jmin = iu[prow] + 1;
113:       jmax = iu[prow + 1];
114:       qm   = k;
115:       for (j = jmin; j < jmax; j++) {
116:         vj = ju[j];
117:         do {
118:           m  = qm;
119:           qm = q[m];
120:         } while (qm < vj);
121:         if (qm != vj) {
122:           nzk++;
123:           q[m]  = vj;
124:           q[vj] = qm;
125:           qm    = vj;
126:         }
127:       }
128:       prow = jl[prow]; /* next pivot row */
129:     }

131:     /* add k to row list for first nonzero element in k-th row */
132:     if (nzk > 0) {
133:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134:       jl[k] = jl[i];
135:       jl[i] = k;
136:     }
137:     iu[k + 1] = iu[k] + nzk;

139:     /* allocate more space to ju if needed */
140:     if (iu[k + 1] > umax) {
141:       /* estimate how much additional space we will need */
142:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143:       /* just double the memory each time */
144:       maxadd = umax;
145:       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
146:       umax += maxadd;

148:       /* allocate a longer ju */
149:       PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp));
150:       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
151:       PetscCall(PetscShmgetDeallocateArray((void **)&ju));
152:       ju = jutmp;
153:       reallocs++; /* count how many times we realloc */
154:     }

156:     /* save nonzero structure of k-th row in ju */
157:     i = k;
158:     while (nzk--) {
159:       i           = q[i];
160:       ju[juidx++] = i;
161:     }
162:   }

164: #if defined(PETSC_USE_INFO)
165:   if (ai[mbs] != 0) {
166:     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
167:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
168:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
169:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
170:     PetscCall(PetscInfo(A, "for best performance.\n"));
171:   } else {
172:     PetscCall(PetscInfo(A, "Empty matrix\n"));
173:   }
174: #endif

176:   PetscCall(ISRestoreIndices(perm, &rip));
177:   PetscCall(PetscFree2(jl, q));

179:   /* put together the new matrix */
180:   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));

182:   b          = (Mat_SeqSBAIJ *)F->data;
183:   b->free_ij = PETSC_TRUE;
184:   PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
185:   b->free_a = PETSC_TRUE;
186:   b->j      = ju;
187:   b->i      = iu;
188:   b->diag   = NULL;
189:   b->ilen   = NULL;
190:   b->imax   = NULL;
191:   b->row    = perm;

193:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

195:   PetscCall(PetscObjectReference((PetscObject)perm));

197:   b->icol = perm;
198:   PetscCall(PetscObjectReference((PetscObject)perm));
199:   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
200:   /* In b structure:  Free imax, ilen, old a, old j.
201:      Allocate idnew, solve_work, new a, new j */
202:   b->maxnz = b->nz = iu[mbs];

204:   F->info.factor_mallocs   = reallocs;
205:   F->info.fill_ratio_given = f;
206:   if (ai[mbs] != 0) {
207:     F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
208:   } else {
209:     F->info.fill_ratio_needed = 0.0;
210:   }
211:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
212:   PetscFunctionReturn(PETSC_SUCCESS);
213: }
214: /*
215:     Symbolic U^T*D*U factorization for SBAIJ format.
216:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
217: */
218: #include <petscbt.h>
219: #include <../src/mat/utils/freespace.h>
220: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
221: {
222:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
223:   Mat_SeqSBAIJ      *b;
224:   PetscBool          perm_identity, missing;
225:   PetscReal          fill = info->fill;
226:   const PetscInt    *rip, *ai = a->i, *aj = a->j;
227:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
228:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
229:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
230:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
231:   PetscBT            lnkbt;

233:   PetscFunctionBegin;
234:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
235:   PetscCall(MatMissingDiagonal(A, &missing, &i));
236:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
237:   if (bs > 1) {
238:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
239:     PetscFunctionReturn(PETSC_SUCCESS);
240:   }

242:   /* check whether perm is the identity mapping */
243:   PetscCall(ISIdentity(perm, &perm_identity));
244:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
245:   a->permute = PETSC_FALSE;
246:   PetscCall(ISGetIndices(perm, &rip));

248:   /* initialization */
249:   PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
250:   PetscCall(PetscMalloc1(mbs + 1, &udiag));
251:   ui[0] = 0;

253:   /* jl: linked list for storing indices of the pivot rows
254:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
255:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
256:   for (i = 0; i < mbs; i++) {
257:     jl[i] = mbs;
258:     il[i] = 0;
259:   }

261:   /* create and initialize a linked list for storing column indices of the active row k */
262:   nlnk = mbs + 1;
263:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

265:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
266:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
267:   current_space = free_space;

269:   for (k = 0; k < mbs; k++) { /* for each active row k */
270:     /* initialize lnk by the column indices of row rip[k] of A */
271:     nzk   = 0;
272:     ncols = ai[k + 1] - ai[k];
273:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
274:     for (j = 0; j < ncols; j++) {
275:       i       = *(aj + ai[k] + j);
276:       cols[j] = i;
277:     }
278:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
279:     nzk += nlnk;

281:     /* update lnk by computing fill-in for each pivot row to be merged in */
282:     prow = jl[k]; /* 1st pivot row */

284:     while (prow < k) {
285:       nextprow = jl[prow];
286:       /* merge prow into k-th row */
287:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
288:       jmax   = ui[prow + 1];
289:       ncols  = jmax - jmin;
290:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
291:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
292:       nzk += nlnk;

294:       /* update il and jl for prow */
295:       if (jmin < jmax) {
296:         il[prow] = jmin;
297:         j        = *uj_ptr;
298:         jl[prow] = jl[j];
299:         jl[j]    = prow;
300:       }
301:       prow = nextprow;
302:     }

304:     /* if free space is not available, make more free space */
305:     if (current_space->local_remaining < nzk) {
306:       i = mbs - k + 1;                                   /* num of unfactored rows */
307:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
308:       PetscCall(PetscFreeSpaceGet(i, &current_space));
309:       reallocs++;
310:     }

312:     /* copy data into free space, then initialize lnk */
313:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

315:     /* add the k-th row into il and jl */
316:     if (nzk > 1) {
317:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
318:       jl[k] = jl[i];
319:       jl[i] = k;
320:       il[k] = ui[k] + 1;
321:     }
322:     ui_ptr[k] = current_space->array;

324:     current_space->array += nzk;
325:     current_space->local_used += nzk;
326:     current_space->local_remaining -= nzk;

328:     ui[k + 1] = ui[k] + nzk;
329:   }

331:   PetscCall(ISRestoreIndices(perm, &rip));
332:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

334:   /* destroy list of free space and other temporary array(s) */
335:   PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
336:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
337:   PetscCall(PetscLLDestroy(lnk, lnkbt));

339:   /* put together the new matrix in MATSEQSBAIJ format */
340:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

342:   b          = (Mat_SeqSBAIJ *)fact->data;
343:   b->free_ij = PETSC_TRUE;
344:   PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
345:   b->free_a    = PETSC_TRUE;
346:   b->j         = uj;
347:   b->i         = ui;
348:   b->diag      = udiag;
349:   b->free_diag = PETSC_TRUE;
350:   b->ilen      = NULL;
351:   b->imax      = NULL;
352:   b->row       = perm;
353:   b->icol      = perm;

355:   PetscCall(PetscObjectReference((PetscObject)perm));
356:   PetscCall(PetscObjectReference((PetscObject)perm));

358:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

360:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));

362:   b->maxnz = b->nz = ui[mbs];

364:   fact->info.factor_mallocs   = reallocs;
365:   fact->info.fill_ratio_given = fill;
366:   if (ai[mbs] != 0) {
367:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
368:   } else {
369:     fact->info.fill_ratio_needed = 0.0;
370:   }
371: #if defined(PETSC_USE_INFO)
372:   if (ai[mbs] != 0) {
373:     PetscReal af = fact->info.fill_ratio_needed;
374:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
375:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
376:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
377:   } else {
378:     PetscCall(PetscInfo(A, "Empty matrix\n"));
379:   }
380: #endif
381:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }

385: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
386: {
387:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
388:   Mat_SeqSBAIJ      *b;
389:   PetscBool          perm_identity, missing;
390:   PetscReal          fill = info->fill;
391:   const PetscInt    *rip, *ai, *aj;
392:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
393:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
394:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
395:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
396:   PetscBT            lnkbt;

398:   PetscFunctionBegin;
399:   PetscCall(MatMissingDiagonal(A, &missing, &d));
400:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);

402:   /*
403:    This code originally uses Modified Sparse Row (MSR) storage
404:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
405:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
406:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
407:    thus the original code in MSR format is still used for these cases.
408:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
409:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
410:   */
411:   if (bs > 1) {
412:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
413:     PetscFunctionReturn(PETSC_SUCCESS);
414:   }

416:   /* check whether perm is the identity mapping */
417:   PetscCall(ISIdentity(perm, &perm_identity));
418:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
419:   a->permute = PETSC_FALSE;
420:   ai         = a->i;
421:   aj         = a->j;
422:   PetscCall(ISGetIndices(perm, &rip));

424:   /* initialization */
425:   PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
426:   ui[0] = 0;

428:   /* jl: linked list for storing indices of the pivot rows
429:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
430:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
431:   for (i = 0; i < mbs; i++) {
432:     jl[i] = mbs;
433:     il[i] = 0;
434:   }

436:   /* create and initialize a linked list for storing column indices of the active row k */
437:   nlnk = mbs + 1;
438:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

440:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
441:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
442:   current_space = free_space;

444:   for (k = 0; k < mbs; k++) { /* for each active row k */
445:     /* initialize lnk by the column indices of row rip[k] of A */
446:     nzk   = 0;
447:     ncols = ai[rip[k] + 1] - ai[rip[k]];
448:     for (j = 0; j < ncols; j++) {
449:       i       = *(aj + ai[rip[k]] + j);
450:       cols[j] = rip[i];
451:     }
452:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
453:     nzk += nlnk;

455:     /* update lnk by computing fill-in for each pivot row to be merged in */
456:     prow = jl[k]; /* 1st pivot row */

458:     while (prow < k) {
459:       nextprow = jl[prow];
460:       /* merge prow into k-th row */
461:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
462:       jmax   = ui[prow + 1];
463:       ncols  = jmax - jmin;
464:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
465:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
466:       nzk += nlnk;

468:       /* update il and jl for prow */
469:       if (jmin < jmax) {
470:         il[prow] = jmin;

472:         j        = *uj_ptr;
473:         jl[prow] = jl[j];
474:         jl[j]    = prow;
475:       }
476:       prow = nextprow;
477:     }

479:     /* if free space is not available, make more free space */
480:     if (current_space->local_remaining < nzk) {
481:       i = mbs - k + 1;                                                            /* num of unfactored rows */
482:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
483:       PetscCall(PetscFreeSpaceGet(i, &current_space));
484:       reallocs++;
485:     }

487:     /* copy data into free space, then initialize lnk */
488:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

490:     /* add the k-th row into il and jl */
491:     if (nzk - 1 > 0) {
492:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
493:       jl[k] = jl[i];
494:       jl[i] = k;
495:       il[k] = ui[k] + 1;
496:     }
497:     ui_ptr[k] = current_space->array;

499:     current_space->array += nzk;
500:     current_space->local_used += nzk;
501:     current_space->local_remaining -= nzk;

503:     ui[k + 1] = ui[k] + nzk;
504:   }

506:   PetscCall(ISRestoreIndices(perm, &rip));
507:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

509:   /* destroy list of free space and other temporary array(s) */
510:   PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
511:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
512:   PetscCall(PetscLLDestroy(lnk, lnkbt));

514:   /* put together the new matrix in MATSEQSBAIJ format */
515:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

517:   b = (Mat_SeqSBAIJ *)fact->data;
518:   PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
519:   b->free_a  = PETSC_TRUE;
520:   b->free_ij = PETSC_TRUE;
521:   b->j       = uj;
522:   b->i       = ui;
523:   b->diag    = NULL;
524:   b->ilen    = NULL;
525:   b->imax    = NULL;
526:   b->row     = perm;

528:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

530:   PetscCall(PetscObjectReference((PetscObject)perm));
531:   b->icol = perm;
532:   PetscCall(PetscObjectReference((PetscObject)perm));
533:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
534:   b->maxnz = b->nz = ui[mbs];

536:   fact->info.factor_mallocs   = reallocs;
537:   fact->info.fill_ratio_given = fill;
538:   if (ai[mbs] != 0) {
539:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
540:   } else {
541:     fact->info.fill_ratio_needed = 0.0;
542:   }
543: #if defined(PETSC_USE_INFO)
544:   if (ai[mbs] != 0) {
545:     PetscReal af = fact->info.fill_ratio_needed;
546:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
547:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
548:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
549:   } else {
550:     PetscCall(PetscInfo(A, "Empty matrix\n"));
551:   }
552: #endif
553:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
554:   PetscFunctionReturn(PETSC_SUCCESS);
555: }

557: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
558: {
559:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
560:   IS              perm = b->row;
561:   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
562:   PetscInt        i, j;
563:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
564:   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
565:   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
566:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
567:   MatScalar      *work;
568:   PetscInt       *pivots;
569:   PetscBool       allowzeropivot, zeropivotdetected;

571:   PetscFunctionBegin;
572:   /* initialization */
573:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
574:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
575:   allowzeropivot = PetscNot(A->erroriffailure);

577:   il[0] = 0;
578:   for (i = 0; i < mbs; i++) jl[i] = mbs;

580:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
581:   PetscCall(PetscMalloc1(bs, &pivots));

583:   PetscCall(ISGetIndices(perm, &perm_ptr));

585:   /* check permutation */
586:   if (!a->permute) {
587:     ai = a->i;
588:     aj = a->j;
589:     aa = a->a;
590:   } else {
591:     ai = a->inew;
592:     aj = a->jnew;
593:     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
594:     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
595:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
596:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

598:     for (i = 0; i < mbs; i++) {
599:       jmin = ai[i];
600:       jmax = ai[i + 1];
601:       for (j = jmin; j < jmax; j++) {
602:         while (a2anew[j] != j) {
603:           k         = a2anew[j];
604:           a2anew[j] = a2anew[k];
605:           a2anew[k] = k;
606:           for (k1 = 0; k1 < bs2; k1++) {
607:             dk[k1]           = aa[k * bs2 + k1];
608:             aa[k * bs2 + k1] = aa[j * bs2 + k1];
609:             aa[j * bs2 + k1] = dk[k1];
610:           }
611:         }
612:         /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
613:         if (i > aj[j]) {
614:           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
615:           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
616:           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
617:             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
618:           }
619:         }
620:       }
621:     }
622:     PetscCall(PetscFree(a2anew));
623:   }

625:   /* for each row k */
626:   for (k = 0; k < mbs; k++) {
627:     /*initialize k-th row with elements nonzero in row perm(k) of A */
628:     jmin = ai[perm_ptr[k]];
629:     jmax = ai[perm_ptr[k] + 1];

631:     ap = aa + jmin * bs2;
632:     for (j = jmin; j < jmax; j++) {
633:       vj       = perm_ptr[aj[j]]; /* block col. index */
634:       rtmp_ptr = rtmp + vj * bs2;
635:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
636:     }

638:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
639:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
640:     i = jl[k]; /* first row to be added to k_th row  */

642:     while (i < k) {
643:       nexti = jl[i]; /* next row to be added to k_th row */

645:       /* compute multiplier */
646:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

648:       /* uik = -inv(Di)*U_bar(i,k) */
649:       diag = ba + i * bs2;
650:       u    = ba + ili * bs2;
651:       PetscCall(PetscArrayzero(uik, bs2));
652:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

654:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
655:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
656:       PetscCall(PetscLogFlops(4.0 * bs * bs2));

658:       /* update -U(i,k) */
659:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

661:       /* add multiple of row i to k-th row ... */
662:       jmin = ili + 1;
663:       jmax = bi[i + 1];
664:       if (jmin < jmax) {
665:         for (j = jmin; j < jmax; j++) {
666:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
667:           rtmp_ptr = rtmp + bj[j] * bs2;
668:           u        = ba + j * bs2;
669:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
670:         }
671:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

673:         /* ... add i to row list for next nonzero entry */
674:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
675:         j     = bj[jmin];
676:         jl[i] = jl[j];
677:         jl[j] = i; /* update jl */
678:       }
679:       i = nexti;
680:     }

682:     /* save nonzero entries in k-th row of U ... */

684:     /* invert diagonal block */
685:     diag = ba + k * bs2;
686:     PetscCall(PetscArraycpy(diag, dk, bs2));

688:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
689:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

691:     jmin = bi[k];
692:     jmax = bi[k + 1];
693:     if (jmin < jmax) {
694:       for (j = jmin; j < jmax; j++) {
695:         vj       = bj[j]; /* block col. index of U */
696:         u        = ba + j * bs2;
697:         rtmp_ptr = rtmp + vj * bs2;
698:         for (k1 = 0; k1 < bs2; k1++) {
699:           *u++        = *rtmp_ptr;
700:           *rtmp_ptr++ = 0.0;
701:         }
702:       }

704:       /* ... add k to row list for first nonzero entry in k-th row */
705:       il[k] = jmin;
706:       i     = bj[jmin];
707:       jl[k] = jl[i];
708:       jl[i] = k;
709:     }
710:   }

712:   PetscCall(PetscFree(rtmp));
713:   PetscCall(PetscFree2(il, jl));
714:   PetscCall(PetscFree3(dk, uik, work));
715:   PetscCall(PetscFree(pivots));
716:   if (a->permute) PetscCall(PetscFree(aa));

718:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

720:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
721:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
722:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
723:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

725:   C->assembled    = PETSC_TRUE;
726:   C->preallocated = PETSC_TRUE;

728:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
729:   PetscFunctionReturn(PETSC_SUCCESS);
730: }

732: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
733: {
734:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
735:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
736:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
737:   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
738:   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
739:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
740:   MatScalar    *work;
741:   PetscInt     *pivots;
742:   PetscBool     allowzeropivot, zeropivotdetected;

744:   PetscFunctionBegin;
745:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
746:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
747:   il[0] = 0;
748:   for (i = 0; i < mbs; i++) jl[i] = mbs;

750:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
751:   PetscCall(PetscMalloc1(bs, &pivots));
752:   allowzeropivot = PetscNot(A->erroriffailure);

754:   ai = a->i;
755:   aj = a->j;
756:   aa = a->a;

758:   /* for each row k */
759:   for (k = 0; k < mbs; k++) {
760:     /*initialize k-th row with elements nonzero in row k of A */
761:     jmin = ai[k];
762:     jmax = ai[k + 1];
763:     ap   = aa + jmin * bs2;
764:     for (j = jmin; j < jmax; j++) {
765:       vj       = aj[j]; /* block col. index */
766:       rtmp_ptr = rtmp + vj * bs2;
767:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
768:     }

770:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
771:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
772:     i = jl[k]; /* first row to be added to k_th row  */

774:     while (i < k) {
775:       nexti = jl[i]; /* next row to be added to k_th row */

777:       /* compute multiplier */
778:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

780:       /* uik = -inv(Di)*U_bar(i,k) */
781:       diag = ba + i * bs2;
782:       u    = ba + ili * bs2;
783:       PetscCall(PetscArrayzero(uik, bs2));
784:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

786:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
787:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
788:       PetscCall(PetscLogFlops(2.0 * bs * bs2));

790:       /* update -U(i,k) */
791:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

793:       /* add multiple of row i to k-th row ... */
794:       jmin = ili + 1;
795:       jmax = bi[i + 1];
796:       if (jmin < jmax) {
797:         for (j = jmin; j < jmax; j++) {
798:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
799:           rtmp_ptr = rtmp + bj[j] * bs2;
800:           u        = ba + j * bs2;
801:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
802:         }
803:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

805:         /* ... add i to row list for next nonzero entry */
806:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
807:         j     = bj[jmin];
808:         jl[i] = jl[j];
809:         jl[j] = i; /* update jl */
810:       }
811:       i = nexti;
812:     }

814:     /* save nonzero entries in k-th row of U ... */

816:     /* invert diagonal block */
817:     diag = ba + k * bs2;
818:     PetscCall(PetscArraycpy(diag, dk, bs2));

820:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
821:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

823:     jmin = bi[k];
824:     jmax = bi[k + 1];
825:     if (jmin < jmax) {
826:       for (j = jmin; j < jmax; j++) {
827:         vj       = bj[j]; /* block col. index of U */
828:         u        = ba + j * bs2;
829:         rtmp_ptr = rtmp + vj * bs2;
830:         for (k1 = 0; k1 < bs2; k1++) {
831:           *u++        = *rtmp_ptr;
832:           *rtmp_ptr++ = 0.0;
833:         }
834:       }

836:       /* ... add k to row list for first nonzero entry in k-th row */
837:       il[k] = jmin;
838:       i     = bj[jmin];
839:       jl[k] = jl[i];
840:       jl[i] = k;
841:     }
842:   }

844:   PetscCall(PetscFree(rtmp));
845:   PetscCall(PetscFree2(il, jl));
846:   PetscCall(PetscFree3(dk, uik, work));
847:   PetscCall(PetscFree(pivots));

849:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
850:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853:   C->assembled           = PETSC_TRUE;
854:   C->preallocated        = PETSC_TRUE;

856:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
857:   PetscFunctionReturn(PETSC_SUCCESS);
858: }

860: /*
861:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
862:     Version for blocks 2 by 2.
863: */
864: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
865: {
866:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
867:   IS              perm = b->row;
868:   const PetscInt *ai, *aj, *perm_ptr;
869:   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
870:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
871:   MatScalar      *ba = b->a, *aa, *ap;
872:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
873:   PetscReal       shift = info->shiftamount;
874:   PetscBool       allowzeropivot, zeropivotdetected;

876:   PetscFunctionBegin;
877:   allowzeropivot = PetscNot(A->erroriffailure);

879:   /* initialization */
880:   /* il and jl record the first nonzero element in each row of the accessing
881:      window U(0:k, k:mbs-1).
882:      jl:    list of rows to be added to uneliminated rows
883:             i>= k: jl(i) is the first row to be added to row i
884:             i<  k: jl(i) is the row following row i in some list of rows
885:             jl(i) = mbs indicates the end of a list
886:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
887:             row i of U */
888:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
889:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
890:   il[0] = 0;
891:   for (i = 0; i < mbs; i++) jl[i] = mbs;

893:   PetscCall(ISGetIndices(perm, &perm_ptr));

895:   /* check permutation */
896:   if (!a->permute) {
897:     ai = a->i;
898:     aj = a->j;
899:     aa = a->a;
900:   } else {
901:     ai = a->inew;
902:     aj = a->jnew;
903:     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
904:     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
905:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
906:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

908:     for (i = 0; i < mbs; i++) {
909:       jmin = ai[i];
910:       jmax = ai[i + 1];
911:       for (j = jmin; j < jmax; j++) {
912:         while (a2anew[j] != j) {
913:           k         = a2anew[j];
914:           a2anew[j] = a2anew[k];
915:           a2anew[k] = k;
916:           for (k1 = 0; k1 < 4; k1++) {
917:             dk[k1]         = aa[k * 4 + k1];
918:             aa[k * 4 + k1] = aa[j * 4 + k1];
919:             aa[j * 4 + k1] = dk[k1];
920:           }
921:         }
922:         /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
923:         if (i > aj[j]) {
924:           ap    = aa + j * 4; /* ptr to the beginning of the block */
925:           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
926:           ap[1] = ap[2];
927:           ap[2] = dk[1];
928:         }
929:       }
930:     }
931:     PetscCall(PetscFree(a2anew));
932:   }

934:   /* for each row k */
935:   for (k = 0; k < mbs; k++) {
936:     /*initialize k-th row with elements nonzero in row perm(k) of A */
937:     jmin = ai[perm_ptr[k]];
938:     jmax = ai[perm_ptr[k] + 1];
939:     ap   = aa + jmin * 4;
940:     for (j = jmin; j < jmax; j++) {
941:       vj       = perm_ptr[aj[j]]; /* block col. index */
942:       rtmp_ptr = rtmp + vj * 4;
943:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
944:     }

946:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
947:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
948:     i = jl[k]; /* first row to be added to k_th row  */

950:     while (i < k) {
951:       nexti = jl[i]; /* next row to be added to k_th row */

953:       /* compute multiplier */
954:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

956:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
957:       diag   = ba + i * 4;
958:       u      = ba + ili * 4;
959:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
960:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
961:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
962:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

964:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
965:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
966:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
967:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
968:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

970:       PetscCall(PetscLogFlops(16.0 * 2.0));

972:       /* update -U(i,k): ba[ili] = uik */
973:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

975:       /* add multiple of row i to k-th row ... */
976:       jmin = ili + 1;
977:       jmax = bi[i + 1];
978:       if (jmin < jmax) {
979:         for (j = jmin; j < jmax; j++) {
980:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
981:           rtmp_ptr = rtmp + bj[j] * 4;
982:           u        = ba + j * 4;
983:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
984:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
985:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
986:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
987:         }
988:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

990:         /* ... add i to row list for next nonzero entry */
991:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
992:         j     = bj[jmin];
993:         jl[i] = jl[j];
994:         jl[j] = i; /* update jl */
995:       }
996:       i = nexti;
997:     }

999:     /* save nonzero entries in k-th row of U ... */

1001:     /* invert diagonal block */
1002:     diag = ba + k * 4;
1003:     PetscCall(PetscArraycpy(diag, dk, 4));
1004:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1005:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1007:     jmin = bi[k];
1008:     jmax = bi[k + 1];
1009:     if (jmin < jmax) {
1010:       for (j = jmin; j < jmax; j++) {
1011:         vj       = bj[j]; /* block col. index of U */
1012:         u        = ba + j * 4;
1013:         rtmp_ptr = rtmp + vj * 4;
1014:         for (k1 = 0; k1 < 4; k1++) {
1015:           *u++        = *rtmp_ptr;
1016:           *rtmp_ptr++ = 0.0;
1017:         }
1018:       }

1020:       /* ... add k to row list for first nonzero entry in k-th row */
1021:       il[k] = jmin;
1022:       i     = bj[jmin];
1023:       jl[k] = jl[i];
1024:       jl[i] = k;
1025:     }
1026:   }

1028:   PetscCall(PetscFree(rtmp));
1029:   PetscCall(PetscFree2(il, jl));
1030:   if (a->permute) PetscCall(PetscFree(aa));
1031:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

1033:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1034:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1035:   C->assembled           = PETSC_TRUE;
1036:   C->preallocated        = PETSC_TRUE;

1038:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1039:   PetscFunctionReturn(PETSC_SUCCESS);
1040: }

1042: /*
1043:       Version for when blocks are 2 by 2 Using natural ordering
1044: */
1045: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1046: {
1047:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1048:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1049:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1050:   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1051:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1052:   PetscReal     shift = info->shiftamount;
1053:   PetscBool     allowzeropivot, zeropivotdetected;

1055:   PetscFunctionBegin;
1056:   allowzeropivot = PetscNot(A->erroriffailure);

1058:   /* initialization */
1059:   /* il and jl record the first nonzero element in each row of the accessing
1060:      window U(0:k, k:mbs-1).
1061:      jl:    list of rows to be added to uneliminated rows
1062:             i>= k: jl(i) is the first row to be added to row i
1063:             i<  k: jl(i) is the row following row i in some list of rows
1064:             jl(i) = mbs indicates the end of a list
1065:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1066:             row i of U */
1067:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1068:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1069:   il[0] = 0;
1070:   for (i = 0; i < mbs; i++) jl[i] = mbs;

1072:   ai = a->i;
1073:   aj = a->j;
1074:   aa = a->a;

1076:   /* for each row k */
1077:   for (k = 0; k < mbs; k++) {
1078:     /*initialize k-th row with elements nonzero in row k of A */
1079:     jmin = ai[k];
1080:     jmax = ai[k + 1];
1081:     ap   = aa + jmin * 4;
1082:     for (j = jmin; j < jmax; j++) {
1083:       vj       = aj[j]; /* block col. index */
1084:       rtmp_ptr = rtmp + vj * 4;
1085:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1086:     }

1088:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1089:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1090:     i = jl[k]; /* first row to be added to k_th row  */

1092:     while (i < k) {
1093:       nexti = jl[i]; /* next row to be added to k_th row */

1095:       /* compute multiplier */
1096:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

1098:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1099:       diag   = ba + i * 4;
1100:       u      = ba + ili * 4;
1101:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1102:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1103:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1104:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

1106:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1107:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1108:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1109:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1110:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

1112:       PetscCall(PetscLogFlops(16.0 * 2.0));

1114:       /* update -U(i,k): ba[ili] = uik */
1115:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

1117:       /* add multiple of row i to k-th row ... */
1118:       jmin = ili + 1;
1119:       jmax = bi[i + 1];
1120:       if (jmin < jmax) {
1121:         for (j = jmin; j < jmax; j++) {
1122:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1123:           rtmp_ptr = rtmp + bj[j] * 4;
1124:           u        = ba + j * 4;
1125:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1126:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1127:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1128:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1129:         }
1130:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

1132:         /* ... add i to row list for next nonzero entry */
1133:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1134:         j     = bj[jmin];
1135:         jl[i] = jl[j];
1136:         jl[j] = i; /* update jl */
1137:       }
1138:       i = nexti;
1139:     }

1141:     /* save nonzero entries in k-th row of U ... */

1143:     /* invert diagonal block */
1144:     diag = ba + k * 4;
1145:     PetscCall(PetscArraycpy(diag, dk, 4));
1146:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1147:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1149:     jmin = bi[k];
1150:     jmax = bi[k + 1];
1151:     if (jmin < jmax) {
1152:       for (j = jmin; j < jmax; j++) {
1153:         vj       = bj[j]; /* block col. index of U */
1154:         u        = ba + j * 4;
1155:         rtmp_ptr = rtmp + vj * 4;
1156:         for (k1 = 0; k1 < 4; k1++) {
1157:           *u++        = *rtmp_ptr;
1158:           *rtmp_ptr++ = 0.0;
1159:         }
1160:       }

1162:       /* ... add k to row list for first nonzero entry in k-th row */
1163:       il[k] = jmin;
1164:       i     = bj[jmin];
1165:       jl[k] = jl[i];
1166:       jl[i] = k;
1167:     }
1168:   }

1170:   PetscCall(PetscFree(rtmp));
1171:   PetscCall(PetscFree2(il, jl));

1173:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1174:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177:   C->assembled           = PETSC_TRUE;
1178:   C->preallocated        = PETSC_TRUE;

1180:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1181:   PetscFunctionReturn(PETSC_SUCCESS);
1182: }

1184: /*
1185:     Numeric U^T*D*U factorization for SBAIJ format.
1186:     Version for blocks are 1 by 1.
1187: */
1188: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1189: {
1190:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1191:   IS              ip = b->row;
1192:   const PetscInt *ai, *aj, *rip;
1193:   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1194:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1195:   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1196:   PetscReal       rs;
1197:   FactorShiftCtx  sctx;

1199:   PetscFunctionBegin;
1200:   /* MatPivotSetUp(): initialize shift context sctx */
1201:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1203:   PetscCall(ISGetIndices(ip, &rip));
1204:   if (!a->permute) {
1205:     ai = a->i;
1206:     aj = a->j;
1207:     aa = a->a;
1208:   } else {
1209:     ai = a->inew;
1210:     aj = a->jnew;
1211:     nz = ai[mbs];
1212:     PetscCall(PetscMalloc1(nz, &aa));
1213:     a2anew = a->a2anew;
1214:     bval   = a->a;
1215:     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1216:   }

1218:   /* initialization */
1219:   /* il and jl record the first nonzero element in each row of the accessing
1220:      window U(0:k, k:mbs-1).
1221:      jl:    list of rows to be added to uneliminated rows
1222:             i>= k: jl(i) is the first row to be added to row i
1223:             i<  k: jl(i) is the row following row i in some list of rows
1224:             jl(i) = mbs indicates the end of a list
1225:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1226:             row i of U */
1227:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

1229:   do {
1230:     sctx.newshift = PETSC_FALSE;
1231:     il[0]         = 0;
1232:     for (i = 0; i < mbs; i++) {
1233:       rtmp[i] = 0.0;
1234:       jl[i]   = mbs;
1235:     }

1237:     for (k = 0; k < mbs; k++) {
1238:       /*initialize k-th row by the perm[k]-th row of A */
1239:       jmin = ai[rip[k]];
1240:       jmax = ai[rip[k] + 1];
1241:       bval = ba + bi[k];
1242:       for (j = jmin; j < jmax; j++) {
1243:         col       = rip[aj[j]];
1244:         rtmp[col] = aa[j];
1245:         *bval++   = 0.0; /* for in-place factorization */
1246:       }

1248:       /* shift the diagonal of the matrix */
1249:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1251:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1252:       dk = rtmp[k];
1253:       i  = jl[k]; /* first row to be added to k_th row  */

1255:       while (i < k) {
1256:         nexti = jl[i]; /* next row to be added to k_th row */

1258:         /* compute multiplier, update diag(k) and U(i,k) */
1259:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1260:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1261:         dk += uikdi * ba[ili];
1262:         ba[ili] = uikdi; /* -U(i,k) */

1264:         /* add multiple of row i to k-th row */
1265:         jmin = ili + 1;
1266:         jmax = bi[i + 1];
1267:         if (jmin < jmax) {
1268:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1269:           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));

1271:           /* update il and jl for row i */
1272:           il[i] = jmin;
1273:           j     = bj[jmin];
1274:           jl[i] = jl[j];
1275:           jl[j] = i;
1276:         }
1277:         i = nexti;
1278:       }

1280:       /* shift the diagonals when zero pivot is detected */
1281:       /* compute rs=sum of abs(off-diagonal) */
1282:       rs   = 0.0;
1283:       jmin = bi[k] + 1;
1284:       nz   = bi[k + 1] - jmin;
1285:       if (nz) {
1286:         bcol = bj + jmin;
1287:         while (nz--) {
1288:           rs += PetscAbsScalar(rtmp[*bcol]);
1289:           bcol++;
1290:         }
1291:       }

1293:       sctx.rs = rs;
1294:       sctx.pv = dk;
1295:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1296:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1297:       dk = sctx.pv;

1299:       /* copy data into U(k,:) */
1300:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1301:       jmin      = bi[k] + 1;
1302:       jmax      = bi[k + 1];
1303:       if (jmin < jmax) {
1304:         for (j = jmin; j < jmax; j++) {
1305:           col       = bj[j];
1306:           ba[j]     = rtmp[col];
1307:           rtmp[col] = 0.0;
1308:         }
1309:         /* add the k-th row into il and jl */
1310:         il[k] = jmin;
1311:         i     = bj[jmin];
1312:         jl[k] = jl[i];
1313:         jl[i] = k;
1314:       }
1315:     }
1316:   } while (sctx.newshift);
1317:   PetscCall(PetscFree3(rtmp, il, jl));
1318:   if (a->permute) PetscCall(PetscFree(aa));

1320:   PetscCall(ISRestoreIndices(ip, &rip));

1322:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1323:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1324:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1325:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1326:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1327:   C->assembled           = PETSC_TRUE;
1328:   C->preallocated        = PETSC_TRUE;

1330:   PetscCall(PetscLogFlops(C->rmap->N));
1331:   if (sctx.nshift) {
1332:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1333:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1334:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1335:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336:     }
1337:   }
1338:   PetscFunctionReturn(PETSC_SUCCESS);
1339: }

1341: /*
1342:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1343:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1344: */
1345: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1346: {
1347:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1348:   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1349:   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1350:   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1351:   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1352:   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1353:   FactorShiftCtx sctx;
1354:   PetscReal      rs;
1355:   MatScalar      d, *v;

1357:   PetscFunctionBegin;
1358:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

1360:   /* MatPivotSetUp(): initialize shift context sctx */
1361:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1363:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1364:     sctx.shift_top = info->zeropivot;

1366:     PetscCall(PetscArrayzero(rtmp, mbs));

1368:     for (i = 0; i < mbs; i++) {
1369:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1370:       d = (aa)[a->diag[i]];
1371:       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1372:       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1373:       v     = aa + ai[i] + 1;
1374:       nz    = ai[i + 1] - ai[i] - 1;
1375:       for (j = 0; j < nz; j++) {
1376:         rtmp[i] += PetscAbsScalar(v[j]);
1377:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1378:       }
1379:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1380:     }
1381:     sctx.shift_top *= 1.1;
1382:     sctx.nshift_max = 5;
1383:     sctx.shift_lo   = 0.;
1384:     sctx.shift_hi   = 1.;
1385:   }

1387:   /* allocate working arrays
1388:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1389:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1390:   */
1391:   do {
1392:     sctx.newshift = PETSC_FALSE;

1394:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1395:     if (mbs) il[0] = 0;

1397:     for (k = 0; k < mbs; k++) {
1398:       /* zero rtmp */
1399:       nz    = bi[k + 1] - bi[k];
1400:       bjtmp = bj + bi[k];
1401:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1403:       /* load in initial unfactored row */
1404:       bval = ba + bi[k];
1405:       jmin = ai[k];
1406:       jmax = ai[k + 1];
1407:       for (j = jmin; j < jmax; j++) {
1408:         col       = aj[j];
1409:         rtmp[col] = aa[j];
1410:         *bval++   = 0.0; /* for in-place factorization */
1411:       }
1412:       /* shift the diagonal of the matrix: ZeropivotApply() */
1413:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1415:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1416:       dk = rtmp[k];
1417:       i  = c2r[k]; /* first row to be added to k_th row  */

1419:       while (i < k) {
1420:         nexti = c2r[i]; /* next row to be added to k_th row */

1422:         /* compute multiplier, update diag(k) and U(i,k) */
1423:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1424:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1425:         dk += uikdi * ba[ili];           /* update diag[k] */
1426:         ba[ili] = uikdi;                 /* -U(i,k) */

1428:         /* add multiple of row i to k-th row */
1429:         jmin = ili + 1;
1430:         jmax = bi[i + 1];
1431:         if (jmin < jmax) {
1432:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1433:           /* update il and c2r for row i */
1434:           il[i]  = jmin;
1435:           j      = bj[jmin];
1436:           c2r[i] = c2r[j];
1437:           c2r[j] = i;
1438:         }
1439:         i = nexti;
1440:       }

1442:       /* copy data into U(k,:) */
1443:       rs   = 0.0;
1444:       jmin = bi[k];
1445:       jmax = bi[k + 1] - 1;
1446:       if (jmin < jmax) {
1447:         for (j = jmin; j < jmax; j++) {
1448:           col   = bj[j];
1449:           ba[j] = rtmp[col];
1450:           rs += PetscAbsScalar(ba[j]);
1451:         }
1452:         /* add the k-th row into il and c2r */
1453:         il[k]  = jmin;
1454:         i      = bj[jmin];
1455:         c2r[k] = c2r[i];
1456:         c2r[i] = k;
1457:       }

1459:       sctx.rs = rs;
1460:       sctx.pv = dk;
1461:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1462:       if (sctx.newshift) break;
1463:       dk = sctx.pv;

1465:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1466:     }
1467:   } while (sctx.newshift);

1469:   PetscCall(PetscFree3(rtmp, il, c2r));

1471:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1472:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1473:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1474:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1475:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1476:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1478:   B->assembled    = PETSC_TRUE;
1479:   B->preallocated = PETSC_TRUE;

1481:   PetscCall(PetscLogFlops(B->rmap->n));

1483:   /* MatPivotView() */
1484:   if (sctx.nshift) {
1485:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1486:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1487:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1488:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1489:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1490:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1491:     }
1492:   }
1493:   PetscFunctionReturn(PETSC_SUCCESS);
1494: }

1496: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1497: {
1498:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1499:   PetscInt       i, j, mbs = a->mbs;
1500:   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1501:   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1502:   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1503:   PetscReal      rs;
1504:   FactorShiftCtx sctx;

1506:   PetscFunctionBegin;
1507:   /* MatPivotSetUp(): initialize shift context sctx */
1508:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1510:   /* initialization */
1511:   /* il and jl record the first nonzero element in each row of the accessing
1512:      window U(0:k, k:mbs-1).
1513:      jl:    list of rows to be added to uneliminated rows
1514:             i>= k: jl(i) is the first row to be added to row i
1515:             i<  k: jl(i) is the row following row i in some list of rows
1516:             jl(i) = mbs indicates the end of a list
1517:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1518:   */
1519:   PetscCall(PetscMalloc1(mbs, &rtmp));
1520:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));

1522:   do {
1523:     sctx.newshift = PETSC_FALSE;
1524:     il[0]         = 0;
1525:     for (i = 0; i < mbs; i++) {
1526:       rtmp[i] = 0.0;
1527:       jl[i]   = mbs;
1528:     }

1530:     for (k = 0; k < mbs; k++) {
1531:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1532:       nz   = ai[k + 1] - ai[k];
1533:       acol = aj + ai[k];
1534:       aval = aa + ai[k];
1535:       bval = ba + bi[k];
1536:       while (nz--) {
1537:         rtmp[*acol++] = *aval++;
1538:         *bval++       = 0.0; /* for in-place factorization */
1539:       }

1541:       /* shift the diagonal of the matrix */
1542:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1544:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1545:       dk = rtmp[k];
1546:       i  = jl[k]; /* first row to be added to k_th row  */

1548:       while (i < k) {
1549:         nexti = jl[i]; /* next row to be added to k_th row */
1550:         /* compute multiplier, update D(k) and U(i,k) */
1551:         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1552:         uikdi = -ba[ili] * ba[bi[i]];
1553:         dk += uikdi * ba[ili];
1554:         ba[ili] = uikdi; /* -U(i,k) */

1556:         /* add multiple of row i to k-th row ... */
1557:         jmin = ili + 1;
1558:         nz   = bi[i + 1] - jmin;
1559:         if (nz > 0) {
1560:           bcol = bj + jmin;
1561:           bval = ba + jmin;
1562:           PetscCall(PetscLogFlops(2.0 * nz));
1563:           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);

1565:           /* update il and jl for i-th row */
1566:           il[i] = jmin;
1567:           j     = bj[jmin];
1568:           jl[i] = jl[j];
1569:           jl[j] = i;
1570:         }
1571:         i = nexti;
1572:       }

1574:       /* shift the diagonals when zero pivot is detected */
1575:       /* compute rs=sum of abs(off-diagonal) */
1576:       rs   = 0.0;
1577:       jmin = bi[k] + 1;
1578:       nz   = bi[k + 1] - jmin;
1579:       if (nz) {
1580:         bcol = bj + jmin;
1581:         while (nz--) {
1582:           rs += PetscAbsScalar(rtmp[*bcol]);
1583:           bcol++;
1584:         }
1585:       }

1587:       sctx.rs = rs;
1588:       sctx.pv = dk;
1589:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1590:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1591:       dk = sctx.pv;

1593:       /* copy data into U(k,:) */
1594:       ba[bi[k]] = 1.0 / dk;
1595:       jmin      = bi[k] + 1;
1596:       nz        = bi[k + 1] - jmin;
1597:       if (nz) {
1598:         bcol = bj + jmin;
1599:         bval = ba + jmin;
1600:         while (nz--) {
1601:           *bval++       = rtmp[*bcol];
1602:           rtmp[*bcol++] = 0.0;
1603:         }
1604:         /* add k-th row into il and jl */
1605:         il[k] = jmin;
1606:         i     = bj[jmin];
1607:         jl[k] = jl[i];
1608:         jl[i] = k;
1609:       }
1610:     } /* end of for (k = 0; k<mbs; k++) */
1611:   } while (sctx.newshift);
1612:   PetscCall(PetscFree(rtmp));
1613:   PetscCall(PetscFree2(il, jl));

1615:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1616:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1617:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1618:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1621:   C->assembled    = PETSC_TRUE;
1622:   C->preallocated = PETSC_TRUE;

1624:   PetscCall(PetscLogFlops(C->rmap->N));
1625:   if (sctx.nshift) {
1626:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1627:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1628:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1629:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630:     }
1631:   }
1632:   PetscFunctionReturn(PETSC_SUCCESS);
1633: }

1635: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1636: {
1637:   Mat C;

1639:   PetscFunctionBegin;
1640:   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1641:   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1642:   PetscCall(MatCholeskyFactorNumeric(C, A, info));

1644:   A->ops->solve          = C->ops->solve;
1645:   A->ops->solvetranspose = C->ops->solvetranspose;

1647:   PetscCall(MatHeaderMerge(A, &C));
1648:   PetscFunctionReturn(PETSC_SUCCESS);
1649: }