Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
  7: {
  8:   PetscFunctionBegin;
  9:   *type = MATSOLVERPETSC;
 10:   PetscFunctionReturn(PETSC_SUCCESS);
 11: }

 13: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
 14: {
 15:   PetscInt n = A->rmap->n;

 17:   PetscFunctionBegin;
 18: #if defined(PETSC_USE_COMPLEX)
 19:   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
 20:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
 21:     *B = NULL;
 22:     PetscFunctionReturn(PETSC_SUCCESS);
 23:   }
 24: #endif

 26:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
 27:   PetscCall(MatSetSizes(*B, n, n, n, n));
 28:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
 29:     PetscCall(MatSetType(*B, MATSEQAIJ));

 31:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
 32:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

 34:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
 35:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
 36:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
 37:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
 38:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
 39:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
 40:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

 42:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
 43:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
 44:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
 45:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
 46:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
 47:   (*B)->factortype = ftype;

 49:   PetscCall(PetscFree((*B)->solvertype));
 50:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
 51:   (*B)->canuseordering = PETSC_TRUE;
 52:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
 53:   PetscFunctionReturn(PETSC_SUCCESS);
 54: }

 56: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
 57: {
 58:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
 59:   IS                 isicol;
 60:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
 61:   PetscInt           i, n = A->rmap->n;
 62:   PetscInt          *bi, *bj;
 63:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
 64:   PetscReal          f;
 65:   PetscInt           nlnk, *lnk, k, **bi_ptr;
 66:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
 67:   PetscBT            lnkbt;
 68:   PetscBool          diagDense;

 70:   PetscFunctionBegin;
 71:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
 72:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
 73:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

 75:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
 76:   PetscCall(ISGetIndices(isrow, &r));
 77:   PetscCall(ISGetIndices(isicol, &ic));

 79:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
 80:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
 81:   PetscCall(PetscMalloc1(n + 1, &bdiag));
 82:   bi[0] = bdiag[0] = 0;

 84:   /* linked list for storing column indices of the active row */
 85:   nlnk = n + 1;
 86:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

 88:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

 90:   /* initial FreeSpace size is f*(ai[n]+1) */
 91:   f = info->fill;
 92:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
 93:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
 94:   current_space = free_space;

 96:   for (i = 0; i < n; i++) {
 97:     /* copy previous fill into linked list */
 98:     nzi   = 0;
 99:     nnz   = ai[r[i] + 1] - ai[r[i]];
100:     ajtmp = aj + ai[r[i]];
101:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
102:     nzi += nlnk;

104:     /* add pivot rows into linked list */
105:     row = lnk[n];
106:     while (row < i) {
107:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
108:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
109:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
110:       nzi += nlnk;
111:       row = lnk[row];
112:     }
113:     bi[i + 1] = bi[i] + nzi;
114:     im[i]     = nzi;

116:     /* mark bdiag */
117:     nzbd = 0;
118:     nnz  = nzi;
119:     k    = lnk[n];
120:     while (nnz-- && k < i) {
121:       nzbd++;
122:       k = lnk[k];
123:     }
124:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

126:     /* if free space is not available, make more free space */
127:     if (current_space->local_remaining < nzi) {
128:       /* estimated additional space needed */
129:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
130:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
131:       reallocs++;
132:     }

134:     /* copy data into free space, then initialize lnk */
135:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

137:     bi_ptr[i] = current_space->array;
138:     current_space->array += nzi;
139:     current_space->local_used += nzi;
140:     current_space->local_remaining -= nzi;
141:   }

143:   PetscCall(ISRestoreIndices(isrow, &r));
144:   PetscCall(ISRestoreIndices(isicol, &ic));

146:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
147:   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
148:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
149:   PetscCall(PetscLLDestroy(lnk, lnkbt));
150:   PetscCall(PetscFree2(bi_ptr, im));

152:   /* put together the new matrix */
153:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
154:   b          = (Mat_SeqAIJ *)B->data;
155:   b->free_ij = PETSC_TRUE;
156:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
157:   b->free_a = PETSC_TRUE;
158:   b->j      = bj;
159:   b->i      = bi;
160:   b->diag   = bdiag;
161:   b->ilen   = NULL;
162:   b->imax   = NULL;
163:   b->row    = isrow;
164:   b->col    = iscol;
165:   PetscCall(PetscObjectReference((PetscObject)isrow));
166:   PetscCall(PetscObjectReference((PetscObject)iscol));
167:   b->icol = isicol;
168:   PetscCall(PetscMalloc1(n, &b->solve_work));

170:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
171:   b->maxnz = b->nz = bdiag[0] + 1;

173:   B->factortype            = MAT_FACTOR_LU;
174:   B->info.factor_mallocs   = reallocs;
175:   B->info.fill_ratio_given = f;

177:   if (ai[n]) {
178:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
179:   } else {
180:     B->info.fill_ratio_needed = 0.0;
181:   }
182: #if defined(PETSC_USE_INFO)
183:   if (ai[n] != 0) {
184:     PetscReal af = B->info.fill_ratio_needed;
185:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
186:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
187:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
188:     PetscCall(PetscInfo(A, "for best performance.\n"));
189:   } else {
190:     PetscCall(PetscInfo(A, "Empty matrix\n"));
191:   }
192: #endif
193:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
194:   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
195:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
196:   PetscFunctionReturn(PETSC_SUCCESS);
197: }

199: /*
200:     Trouble in factorization, should we dump the original matrix?
201: */
202: PetscErrorCode MatFactorDumpMatrix(Mat A)
203: {
204:   PetscBool flg = PETSC_FALSE;

206:   PetscFunctionBegin;
207:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
208:   if (flg) {
209:     PetscViewer viewer;
210:     char        filename[PETSC_MAX_PATH_LEN];

212:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
213:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
214:     PetscCall(MatView(A, viewer));
215:     PetscCall(PetscViewerDestroy(&viewer));
216:   }
217:   PetscFunctionReturn(PETSC_SUCCESS);
218: }

220: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
221: {
222:   Mat              C = B;
223:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
224:   IS               isrow = b->row, isicol = b->icol;
225:   const PetscInt  *r, *ic, *ics;
226:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
227:   PetscInt         i, j, k, nz, nzL, row, *pj;
228:   const PetscInt  *ajtmp, *bjtmp;
229:   MatScalar       *rtmp, *pc, multiplier, *pv;
230:   const MatScalar *aa, *v;
231:   MatScalar       *ba;
232:   PetscBool        row_identity, col_identity;
233:   FactorShiftCtx   sctx;
234:   const PetscInt  *ddiag;
235:   PetscReal        rs;
236:   MatScalar        d;

238:   PetscFunctionBegin;
239:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
240:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
241:   /* MatPivotSetUp(): initialize shift context sctx */
242:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

244:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
245:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
246:     sctx.shift_top = info->zeropivot;
247:     for (i = 0; i < n; i++) {
248:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
249:       d  = aa[ddiag[i]];
250:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
251:       v  = aa + ai[i];
252:       nz = ai[i + 1] - ai[i];
253:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
254:       if (rs > sctx.shift_top) sctx.shift_top = rs;
255:     }
256:     sctx.shift_top *= 1.1;
257:     sctx.nshift_max = 5;
258:     sctx.shift_lo   = 0.;
259:     sctx.shift_hi   = 1.;
260:   }

262:   PetscCall(ISGetIndices(isrow, &r));
263:   PetscCall(ISGetIndices(isicol, &ic));
264:   PetscCall(PetscMalloc1(n + 1, &rtmp));
265:   ics = ic;

267:   do {
268:     sctx.newshift = PETSC_FALSE;
269:     for (i = 0; i < n; i++) {
270:       /* zero rtmp */
271:       /* L part */
272:       nz    = bi[i + 1] - bi[i];
273:       bjtmp = bj + bi[i];
274:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

276:       /* U part */
277:       nz    = bdiag[i] - bdiag[i + 1];
278:       bjtmp = bj + bdiag[i + 1] + 1;
279:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

281:       /* load in initial (unfactored row) */
282:       nz    = ai[r[i] + 1] - ai[r[i]];
283:       ajtmp = aj + ai[r[i]];
284:       v     = aa + ai[r[i]];
285:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
286:       /* ZeropivotApply() */
287:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

289:       /* elimination */
290:       bjtmp = bj + bi[i];
291:       row   = *bjtmp++;
292:       nzL   = bi[i + 1] - bi[i];
293:       for (k = 0; k < nzL; k++) {
294:         pc = rtmp + row;
295:         if (*pc != 0.0) {
296:           pv         = ba + bdiag[row];
297:           multiplier = *pc * (*pv);
298:           *pc        = multiplier;

300:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
301:           pv = ba + bdiag[row + 1] + 1;
302:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

304:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
305:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
306:         }
307:         row = *bjtmp++;
308:       }

310:       /* finished row so stick it into b->a */
311:       rs = 0.0;
312:       /* L part */
313:       pv = ba + bi[i];
314:       pj = b->j + bi[i];
315:       nz = bi[i + 1] - bi[i];
316:       for (j = 0; j < nz; j++) {
317:         pv[j] = rtmp[pj[j]];
318:         rs += PetscAbsScalar(pv[j]);
319:       }

321:       /* U part */
322:       pv = ba + bdiag[i + 1] + 1;
323:       pj = b->j + bdiag[i + 1] + 1;
324:       nz = bdiag[i] - bdiag[i + 1] - 1;
325:       for (j = 0; j < nz; j++) {
326:         pv[j] = rtmp[pj[j]];
327:         rs += PetscAbsScalar(pv[j]);
328:       }

330:       sctx.rs = rs;
331:       sctx.pv = rtmp[i];
332:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
333:       if (sctx.newshift) break; /* break for-loop */
334:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

336:       /* Mark diagonal and invert diagonal for simpler triangular solves */
337:       pv  = ba + bdiag[i];
338:       *pv = 1.0 / rtmp[i];

340:     } /* endof for (i=0; i<n; i++) { */

342:     /* MatPivotRefine() */
343:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
344:       /*
345:        * if no shift in this attempt & shifting & started shifting & can refine,
346:        * then try lower shift
347:        */
348:       sctx.shift_hi       = sctx.shift_fraction;
349:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
350:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
351:       sctx.newshift       = PETSC_TRUE;
352:       sctx.nshift++;
353:     }
354:   } while (sctx.newshift);

356:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
357:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

359:   PetscCall(PetscFree(rtmp));
360:   PetscCall(ISRestoreIndices(isicol, &ic));
361:   PetscCall(ISRestoreIndices(isrow, &r));

363:   PetscCall(ISIdentity(isrow, &row_identity));
364:   PetscCall(ISIdentity(isicol, &col_identity));
365:   if (b->inode.size_csr) {
366:     C->ops->solve = MatSolve_SeqAIJ_Inode;
367:   } else if (row_identity && col_identity) {
368:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
369:   } else {
370:     C->ops->solve = MatSolve_SeqAIJ;
371:   }
372:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
373:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
374:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
375:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
376:   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
377:   C->assembled              = PETSC_TRUE;
378:   C->preallocated           = PETSC_TRUE;

380:   PetscCall(PetscLogFlops(C->cmap->n));

382:   /* MatShiftView(A,info,&sctx) */
383:   if (sctx.nshift) {
384:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
385:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
386:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
387:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
388:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
389:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
390:     }
391:   }
392:   PetscFunctionReturn(PETSC_SUCCESS);
393: }

395: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
396: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
397: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

399: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
400: {
401:   Mat              C = B;
402:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
403:   IS               isrow = b->row, isicol = b->icol;
404:   const PetscInt  *r, *ic, *ics;
405:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
406:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
407:   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
408:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
409:   const MatScalar *v, *aa;
410:   MatScalar       *ba;
411:   PetscReal        rs = 0.0;
412:   FactorShiftCtx   sctx;
413:   PetscBool        row_identity, col_identity;

415:   PetscFunctionBegin;
416:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));

418:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
419:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
420:   /* MatPivotSetUp(): initialize shift context sctx */
421:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

423:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
424:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
425:     sctx.shift_top = info->zeropivot;
426:     for (i = 0; i < n; i++) {
427:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
428:       d  = aa[ddiag[i]];
429:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
430:       v  = aa + ai[i];
431:       nz = ai[i + 1] - ai[i];
432:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
433:       if (rs > sctx.shift_top) sctx.shift_top = rs;
434:     }
435:     sctx.shift_top *= 1.1;
436:     sctx.nshift_max = 5;
437:     sctx.shift_lo   = 0.;
438:     sctx.shift_hi   = 1.;
439:   }

441:   PetscCall(ISGetIndices(isrow, &r));
442:   PetscCall(ISGetIndices(isicol, &ic));
443:   PetscCall(PetscMalloc1(n + 1, &rtmp));
444:   ics = ic;

446:   do {
447:     sctx.newshift = PETSC_FALSE;
448:     for (i = 0; i < n; i++) {
449:       nz    = bi[i + 1] - bi[i];
450:       bjtmp = bj + bi[i];
451:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

453:       /* load in initial (unfactored row) */
454:       nz    = ai[r[i] + 1] - ai[r[i]];
455:       ajtmp = aj + ai[r[i]];
456:       v     = aa + ai[r[i]];
457:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
458:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

460:       row = *bjtmp++;
461:       while (row < i) {
462:         pc = rtmp + row;
463:         if (*pc != 0.0) {
464:           pv         = ba + ddiag[row];
465:           pj         = b->j + ddiag[row] + 1;
466:           multiplier = *pc / *pv++;
467:           *pc        = multiplier;
468:           nz         = bi[row + 1] - ddiag[row] - 1;
469:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
470:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
471:         }
472:         row = *bjtmp++;
473:       }
474:       /* finished row so stick it into b->a */
475:       pv   = ba + bi[i];
476:       pj   = b->j + bi[i];
477:       nz   = bi[i + 1] - bi[i];
478:       diag = ddiag[i] - bi[i];
479:       rs   = 0.0;
480:       for (j = 0; j < nz; j++) {
481:         pv[j] = rtmp[pj[j]];
482:         rs += PetscAbsScalar(pv[j]);
483:       }
484:       rs -= PetscAbsScalar(pv[diag]);

486:       sctx.rs = rs;
487:       sctx.pv = pv[diag];
488:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
489:       if (sctx.newshift) break;
490:       pv[diag] = sctx.pv;
491:     }

493:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
494:       /*
495:        * if no shift in this attempt & shifting & started shifting & can refine,
496:        * then try lower shift
497:        */
498:       sctx.shift_hi       = sctx.shift_fraction;
499:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
500:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
501:       sctx.newshift       = PETSC_TRUE;
502:       sctx.nshift++;
503:     }
504:   } while (sctx.newshift);

506:   /* invert diagonal entries for simpler triangular solves */
507:   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
508:   PetscCall(PetscFree(rtmp));
509:   PetscCall(ISRestoreIndices(isicol, &ic));
510:   PetscCall(ISRestoreIndices(isrow, &r));
511:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
512:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

514:   PetscCall(ISIdentity(isrow, &row_identity));
515:   PetscCall(ISIdentity(isicol, &col_identity));
516:   if (row_identity && col_identity) {
517:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
518:   } else {
519:     C->ops->solve = MatSolve_SeqAIJ_inplace;
520:   }
521:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
522:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
523:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
524:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
525:   C->ops->matsolvetranspose = NULL;

527:   C->assembled    = PETSC_TRUE;
528:   C->preallocated = PETSC_TRUE;

530:   PetscCall(PetscLogFlops(C->cmap->n));
531:   if (sctx.nshift) {
532:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
533:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
534:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
535:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
536:     }
537:   }
538:   C->ops->solve          = MatSolve_SeqAIJ_inplace;
539:   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

541:   PetscCall(MatSeqAIJCheckInode(C));
542:   PetscFunctionReturn(PETSC_SUCCESS);
543: }

545: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

547: /*
548:    This routine implements inplace ILU(0) with row or/and column permutations.
549:    Input:
550:      A - original matrix
551:    Output;
552:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
553:          a->j (col index) is permuted by the inverse of colperm, then sorted
554:          a->a reordered accordingly with a->j
555:          a->diag (ptr to diagonal elements) is updated.
556: */
557: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
558: {
559:   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
560:   IS              isrow = a->row, isicol = a->icol;
561:   const PetscInt *r, *ic, *ics;
562:   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
563:   PetscInt       *ajtmp, nz, row;
564:   PetscInt        nbdiag, *pj;
565:   PetscScalar    *rtmp, *pc, multiplier, d;
566:   MatScalar      *pv, *v;
567:   PetscReal       rs;
568:   FactorShiftCtx  sctx;
569:   MatScalar      *aa, *vtmp;
570:   PetscInt       *diag;

572:   PetscFunctionBegin;
573:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

575:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
576:   PetscCall(MatSeqAIJGetArray(A, &aa));
577:   /* MatPivotSetUp(): initialize shift context sctx */
578:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

580:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
581:     const PetscInt *ddiag;

583:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
584:     sctx.shift_top = info->zeropivot;
585:     for (i = 0; i < n; i++) {
586:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
587:       d    = aa[ddiag[i]];
588:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
589:       vtmp = aa + ai[i];
590:       nz   = ai[i + 1] - ai[i];
591:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
592:       if (rs > sctx.shift_top) sctx.shift_top = rs;
593:     }
594:     sctx.shift_top *= 1.1;
595:     sctx.nshift_max = 5;
596:     sctx.shift_lo   = 0.;
597:     sctx.shift_hi   = 1.;
598:   }

600:   PetscCall(ISGetIndices(isrow, &r));
601:   PetscCall(ISGetIndices(isicol, &ic));
602:   PetscCall(PetscMalloc1(n + 1, &rtmp));
603:   PetscCall(PetscArrayzero(rtmp, n + 1));
604:   ics = ic;

606: #if defined(MV)
607:   sctx.shift_top      = 0.;
608:   sctx.nshift_max     = 0;
609:   sctx.shift_lo       = 0.;
610:   sctx.shift_hi       = 0.;
611:   sctx.shift_fraction = 0.;

613:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
614:     sctx.shift_top = 0.;
615:     for (i = 0; i < n; i++) {
616:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
617:       d  = aa[diag[i]];
618:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
619:       v  = aa + ai[i];
620:       nz = ai[i + 1] - ai[i];
621:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
622:       if (rs > sctx.shift_top) sctx.shift_top = rs;
623:     }
624:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
625:     sctx.shift_top *= 1.1;
626:     sctx.nshift_max = 5;
627:     sctx.shift_lo   = 0.;
628:     sctx.shift_hi   = 1.;
629:   }

631:   sctx.shift_amount = 0.;
632:   sctx.nshift       = 0;
633: #endif

635:   do {
636:     sctx.newshift = PETSC_FALSE;
637:     for (i = 0; i < n; i++) {
638:       /* load in initial unfactored row */
639:       nz    = ai[r[i] + 1] - ai[r[i]];
640:       ajtmp = aj + ai[r[i]];
641:       v     = aa + ai[r[i]];
642:       /* sort permuted ajtmp and values v accordingly */
643:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
644:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

646:       diag[r[i]] = ai[r[i]];
647:       for (j = 0; j < nz; j++) {
648:         rtmp[ajtmp[j]] = v[j];
649:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
650:       }
651:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

653:       row = *ajtmp++;
654:       while (row < i) {
655:         pc = rtmp + row;
656:         if (*pc != 0.0) {
657:           pv = aa + diag[r[row]];
658:           pj = aj + diag[r[row]] + 1;

660:           multiplier = *pc / *pv++;
661:           *pc        = multiplier;
662:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
663:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
664:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
665:         }
666:         row = *ajtmp++;
667:       }
668:       /* finished row so overwrite it onto aa */
669:       pv     = aa + ai[r[i]];
670:       pj     = aj + ai[r[i]];
671:       nz     = ai[r[i] + 1] - ai[r[i]];
672:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

674:       rs = 0.0;
675:       for (j = 0; j < nz; j++) {
676:         pv[j] = rtmp[pj[j]];
677:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
678:       }

680:       sctx.rs = rs;
681:       sctx.pv = pv[nbdiag];
682:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
683:       if (sctx.newshift) break;
684:       pv[nbdiag] = sctx.pv;
685:     }

687:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
688:       /*
689:        * if no shift in this attempt & shifting & started shifting & can refine,
690:        * then try lower shift
691:        */
692:       sctx.shift_hi       = sctx.shift_fraction;
693:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
694:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
695:       sctx.newshift       = PETSC_TRUE;
696:       sctx.nshift++;
697:     }
698:   } while (sctx.newshift);

700:   /* invert diagonal entries for simpler triangular solves */
701:   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];

703:   PetscCall(MatSeqAIJRestoreArray(A, &aa));
704:   PetscCall(PetscFree(rtmp));
705:   PetscCall(ISRestoreIndices(isicol, &ic));
706:   PetscCall(ISRestoreIndices(isrow, &r));

708:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
709:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
710:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
711:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

713:   A->assembled    = PETSC_TRUE;
714:   A->preallocated = PETSC_TRUE;

716:   PetscCall(PetscLogFlops(A->cmap->n));
717:   if (sctx.nshift) {
718:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
719:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
720:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
721:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
722:     }
723:   }
724:   PetscFunctionReturn(PETSC_SUCCESS);
725: }

727: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
728: {
729:   Mat C;

731:   PetscFunctionBegin;
732:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
733:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
734:   PetscCall(MatLUFactorNumeric(C, A, info));

736:   A->ops->solve          = C->ops->solve;
737:   A->ops->solvetranspose = C->ops->solvetranspose;

739:   PetscCall(MatHeaderMerge(A, &C));
740:   PetscFunctionReturn(PETSC_SUCCESS);
741: }

743: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
744: {
745:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
746:   IS                 iscol = a->col, isrow = a->row;
747:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
748:   PetscInt           nz;
749:   const PetscInt    *rout, *cout, *r, *c;
750:   PetscScalar       *x, *tmp, *tmps, sum;
751:   const PetscScalar *b;
752:   const MatScalar   *aa, *v;
753:   const PetscInt    *adiag;

755:   PetscFunctionBegin;
756:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

758:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
759:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
760:   PetscCall(VecGetArrayRead(bb, &b));
761:   PetscCall(VecGetArrayWrite(xx, &x));
762:   tmp = a->solve_work;

764:   PetscCall(ISGetIndices(isrow, &rout));
765:   r = rout;
766:   PetscCall(ISGetIndices(iscol, &cout));
767:   c = cout + (n - 1);

769:   /* forward solve the lower triangular */
770:   tmp[0] = b[*r++];
771:   tmps   = tmp;
772:   for (i = 1; i < n; i++) {
773:     v   = aa + ai[i];
774:     vi  = aj + ai[i];
775:     nz  = adiag[i] - ai[i];
776:     sum = b[*r++];
777:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
778:     tmp[i] = sum;
779:   }

781:   /* backward solve the upper triangular */
782:   for (i = n - 1; i >= 0; i--) {
783:     v   = aa + adiag[i] + 1;
784:     vi  = aj + adiag[i] + 1;
785:     nz  = ai[i + 1] - adiag[i] - 1;
786:     sum = tmp[i];
787:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
788:     x[*c--] = tmp[i] = sum * aa[adiag[i]];
789:   }
790:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
791:   PetscCall(ISRestoreIndices(isrow, &rout));
792:   PetscCall(ISRestoreIndices(iscol, &cout));
793:   PetscCall(VecRestoreArrayRead(bb, &b));
794:   PetscCall(VecRestoreArrayWrite(xx, &x));
795:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
796:   PetscFunctionReturn(PETSC_SUCCESS);
797: }

799: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
800: {
801:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
802:   IS                 iscol = a->col, isrow = a->row;
803:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
804:   PetscInt           nz, neq, ldb, ldx;
805:   const PetscInt    *rout, *cout, *r, *c;
806:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
807:   const PetscScalar *b, *aa, *v;
808:   PetscBool          isdense;
809:   const PetscInt    *adiag;

811:   PetscFunctionBegin;
812:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
813:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
814:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
815:   if (X != B) {
816:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
817:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
818:   }
819:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
820:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
821:   PetscCall(MatDenseGetArrayRead(B, &b));
822:   PetscCall(MatDenseGetLDA(B, &ldb));
823:   PetscCall(MatDenseGetArray(X, &x));
824:   PetscCall(MatDenseGetLDA(X, &ldx));
825:   PetscCall(ISGetIndices(isrow, &rout));
826:   r = rout;
827:   PetscCall(ISGetIndices(iscol, &cout));
828:   c = cout;
829:   for (neq = 0; neq < B->cmap->n; neq++) {
830:     /* forward solve the lower triangular */
831:     tmp[0] = b[r[0]];
832:     tmps   = tmp;
833:     for (i = 1; i < n; i++) {
834:       v   = aa + ai[i];
835:       vi  = aj + ai[i];
836:       nz  = adiag[i] - ai[i];
837:       sum = b[r[i]];
838:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
839:       tmp[i] = sum;
840:     }
841:     /* backward solve the upper triangular */
842:     for (i = n - 1; i >= 0; i--) {
843:       v   = aa + adiag[i] + 1;
844:       vi  = aj + adiag[i] + 1;
845:       nz  = ai[i + 1] - adiag[i] - 1;
846:       sum = tmp[i];
847:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
848:       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
849:     }
850:     b += ldb;
851:     x += ldx;
852:   }
853:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
854:   PetscCall(ISRestoreIndices(isrow, &rout));
855:   PetscCall(ISRestoreIndices(iscol, &cout));
856:   PetscCall(MatDenseRestoreArrayRead(B, &b));
857:   PetscCall(MatDenseRestoreArray(X, &x));
858:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
859:   PetscFunctionReturn(PETSC_SUCCESS);
860: }

862: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
863: {
864:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
865:   IS                 iscol = a->col, isrow = a->row;
866:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
867:   const PetscInt    *adiag;
868:   PetscInt           nz, neq, ldb, ldx;
869:   const PetscInt    *rout, *cout, *r, *c;
870:   PetscScalar       *x, *tmp = a->solve_work, sum;
871:   const PetscScalar *b, *aa, *v;
872:   PetscBool          isdense;

874:   PetscFunctionBegin;
875:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
876:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
877:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
878:   if (X != B) {
879:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
880:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
881:   }
882:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
883:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
884:   PetscCall(MatDenseGetArrayRead(B, &b));
885:   PetscCall(MatDenseGetLDA(B, &ldb));
886:   PetscCall(MatDenseGetArray(X, &x));
887:   PetscCall(MatDenseGetLDA(X, &ldx));
888:   PetscCall(ISGetIndices(isrow, &rout));
889:   r = rout;
890:   PetscCall(ISGetIndices(iscol, &cout));
891:   c = cout;
892:   for (neq = 0; neq < B->cmap->n; neq++) {
893:     /* forward solve the lower triangular */
894:     tmp[0] = b[r[0]];
895:     v      = aa;
896:     vi     = aj;
897:     for (i = 1; i < n; i++) {
898:       nz  = ai[i + 1] - ai[i];
899:       sum = b[r[i]];
900:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
901:       tmp[i] = sum;
902:       v += nz;
903:       vi += nz;
904:     }
905:     /* backward solve the upper triangular */
906:     for (i = n - 1; i >= 0; i--) {
907:       v   = aa + adiag[i + 1] + 1;
908:       vi  = aj + adiag[i + 1] + 1;
909:       nz  = adiag[i] - adiag[i + 1] - 1;
910:       sum = tmp[i];
911:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
912:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
913:     }
914:     b += ldb;
915:     x += ldx;
916:   }
917:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
918:   PetscCall(ISRestoreIndices(isrow, &rout));
919:   PetscCall(ISRestoreIndices(iscol, &cout));
920:   PetscCall(MatDenseRestoreArrayRead(B, &b));
921:   PetscCall(MatDenseRestoreArray(X, &x));
922:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
923:   PetscFunctionReturn(PETSC_SUCCESS);
924: }

926: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
927: {
928:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
929:   IS                 iscol = a->col, isrow = a->row;
930:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
931:   const PetscInt    *adiag = a->diag;
932:   PetscInt           nz, neq, ldb, ldx;
933:   const PetscInt    *rout, *cout, *r, *c;
934:   PetscScalar       *x, *tmp = a->solve_work, s1;
935:   const PetscScalar *b, *aa, *v;
936:   PetscBool          isdense;

938:   PetscFunctionBegin;
939:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
940:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
941:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
942:   if (X != B) {
943:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
944:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
945:   }
946:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
947:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
948:   PetscCall(MatDenseGetArrayRead(B, &b));
949:   PetscCall(MatDenseGetLDA(B, &ldb));
950:   PetscCall(MatDenseGetArray(X, &x));
951:   PetscCall(MatDenseGetLDA(X, &ldx));
952:   PetscCall(ISGetIndices(isrow, &rout));
953:   r = rout;
954:   PetscCall(ISGetIndices(iscol, &cout));
955:   c = cout;
956:   for (neq = 0; neq < B->cmap->n; neq++) {
957:     /* copy the b into temp work space according to permutation */
958:     for (i = 0; i < n; i++) tmp[i] = b[c[i]];

960:     /* forward solve the U^T */
961:     for (i = 0; i < n; i++) {
962:       v  = aa + adiag[i + 1] + 1;
963:       vi = aj + adiag[i + 1] + 1;
964:       nz = adiag[i] - adiag[i + 1] - 1;
965:       s1 = tmp[i];
966:       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
967:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
968:       tmp[i] = s1;
969:     }

971:     /* backward solve the L^T */
972:     for (i = n - 1; i >= 0; i--) {
973:       v  = aa + ai[i];
974:       vi = aj + ai[i];
975:       nz = ai[i + 1] - ai[i];
976:       s1 = tmp[i];
977:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
978:     }

980:     /* copy tmp into x according to permutation */
981:     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
982:     b += ldb;
983:     x += ldx;
984:   }
985:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
986:   PetscCall(ISRestoreIndices(isrow, &rout));
987:   PetscCall(ISRestoreIndices(iscol, &cout));
988:   PetscCall(MatDenseRestoreArrayRead(B, &b));
989:   PetscCall(MatDenseRestoreArray(X, &x));
990:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
991:   PetscFunctionReturn(PETSC_SUCCESS);
992: }

994: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
995: {
996:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
997:   IS                 iscol = a->col, isrow = a->row;
998:   const PetscInt    *r, *c, *rout, *cout, *adiag;
999:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1000:   PetscInt           nz, row;
1001:   PetscScalar       *x, *tmp, *tmps, sum;
1002:   const PetscScalar *b;
1003:   const MatScalar   *aa, *v;

1005:   PetscFunctionBegin;
1006:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1008:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1009:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1010:   PetscCall(VecGetArrayRead(bb, &b));
1011:   PetscCall(VecGetArrayWrite(xx, &x));
1012:   tmp = a->solve_work;

1014:   PetscCall(ISGetIndices(isrow, &rout));
1015:   r = rout;
1016:   PetscCall(ISGetIndices(iscol, &cout));
1017:   c = cout + (n - 1);

1019:   /* forward solve the lower triangular */
1020:   tmp[0] = b[*r++];
1021:   tmps   = tmp;
1022:   for (row = 1; row < n; row++) {
1023:     i   = rout[row]; /* permuted row */
1024:     v   = aa + ai[i];
1025:     vi  = aj + ai[i];
1026:     nz  = adiag[i] - ai[i];
1027:     sum = b[*r++];
1028:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1029:     tmp[row] = sum;
1030:   }

1032:   /* backward solve the upper triangular */
1033:   for (row = n - 1; row >= 0; row--) {
1034:     i   = rout[row]; /* permuted row */
1035:     v   = aa + adiag[i] + 1;
1036:     vi  = aj + adiag[i] + 1;
1037:     nz  = ai[i + 1] - adiag[i] - 1;
1038:     sum = tmp[row];
1039:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1040:     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1041:   }
1042:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1043:   PetscCall(ISRestoreIndices(isrow, &rout));
1044:   PetscCall(ISRestoreIndices(iscol, &cout));
1045:   PetscCall(VecRestoreArrayRead(bb, &b));
1046:   PetscCall(VecRestoreArrayWrite(xx, &x));
1047:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1048:   PetscFunctionReturn(PETSC_SUCCESS);
1049: }

1051: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1052: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1053: {
1054:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1055:   PetscInt           n  = A->rmap->n;
1056:   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1057:   PetscScalar       *x;
1058:   const PetscScalar *b;
1059:   const MatScalar   *aa;
1060: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1061:   PetscInt         adiag_i, i, nz, ai_i;
1062:   const PetscInt  *vi;
1063:   const MatScalar *v;
1064:   PetscScalar      sum;
1065: #endif

1067:   PetscFunctionBegin;
1068:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1069:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1070:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1071:   PetscCall(VecGetArrayRead(bb, &b));
1072:   PetscCall(VecGetArrayWrite(xx, &x));

1074: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1075:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1076: #else
1077:   /* forward solve the lower triangular */
1078:   x[0] = b[0];
1079:   for (i = 1; i < n; i++) {
1080:     ai_i = ai[i];
1081:     v    = aa + ai_i;
1082:     vi   = aj + ai_i;
1083:     nz   = adiag[i] - ai_i;
1084:     sum  = b[i];
1085:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1086:     x[i] = sum;
1087:   }

1089:   /* backward solve the upper triangular */
1090:   for (i = n - 1; i >= 0; i--) {
1091:     adiag_i = adiag[i];
1092:     v       = aa + adiag_i + 1;
1093:     vi      = aj + adiag_i + 1;
1094:     nz      = ai[i + 1] - adiag_i - 1;
1095:     sum     = x[i];
1096:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1097:     x[i] = sum * aa[adiag_i];
1098:   }
1099: #endif
1100:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1101:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1102:   PetscCall(VecRestoreArrayRead(bb, &b));
1103:   PetscCall(VecRestoreArrayWrite(xx, &x));
1104:   PetscFunctionReturn(PETSC_SUCCESS);
1105: }

1107: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1108: {
1109:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1110:   IS                 iscol = a->col, isrow = a->row;
1111:   PetscInt           i, n                  = A->rmap->n, j;
1112:   PetscInt           nz;
1113:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1114:   PetscScalar       *x, *tmp, sum;
1115:   const PetscScalar *b;
1116:   const MatScalar   *aa, *v;

1118:   PetscFunctionBegin;
1119:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1121:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1122:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1123:   PetscCall(VecGetArrayRead(bb, &b));
1124:   PetscCall(VecGetArray(xx, &x));
1125:   tmp = a->solve_work;

1127:   PetscCall(ISGetIndices(isrow, &rout));
1128:   r = rout;
1129:   PetscCall(ISGetIndices(iscol, &cout));
1130:   c = cout + (n - 1);

1132:   /* forward solve the lower triangular */
1133:   tmp[0] = b[*r++];
1134:   for (i = 1; i < n; i++) {
1135:     v   = aa + ai[i];
1136:     vi  = aj + ai[i];
1137:     nz  = adiag[i] - ai[i];
1138:     sum = b[*r++];
1139:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1140:     tmp[i] = sum;
1141:   }

1143:   /* backward solve the upper triangular */
1144:   for (i = n - 1; i >= 0; i--) {
1145:     v   = aa + adiag[i] + 1;
1146:     vi  = aj + adiag[i] + 1;
1147:     nz  = ai[i + 1] - adiag[i] - 1;
1148:     sum = tmp[i];
1149:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1150:     tmp[i] = sum * aa[adiag[i]];
1151:     x[*c--] += tmp[i];
1152:   }

1154:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1155:   PetscCall(ISRestoreIndices(isrow, &rout));
1156:   PetscCall(ISRestoreIndices(iscol, &cout));
1157:   PetscCall(VecRestoreArrayRead(bb, &b));
1158:   PetscCall(VecRestoreArray(xx, &x));
1159:   PetscCall(PetscLogFlops(2.0 * a->nz));
1160:   PetscFunctionReturn(PETSC_SUCCESS);
1161: }

1163: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1164: {
1165:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1166:   IS                 iscol = a->col, isrow = a->row;
1167:   PetscInt           i, n                  = A->rmap->n, j;
1168:   PetscInt           nz;
1169:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1170:   PetscScalar       *x, *tmp, sum;
1171:   const PetscScalar *b;
1172:   const MatScalar   *aa, *v;

1174:   PetscFunctionBegin;
1175:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1177:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1178:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1179:   PetscCall(VecGetArrayRead(bb, &b));
1180:   PetscCall(VecGetArray(xx, &x));
1181:   tmp = a->solve_work;

1183:   PetscCall(ISGetIndices(isrow, &rout));
1184:   r = rout;
1185:   PetscCall(ISGetIndices(iscol, &cout));
1186:   c = cout;

1188:   /* forward solve the lower triangular */
1189:   tmp[0] = b[r[0]];
1190:   v      = aa;
1191:   vi     = aj;
1192:   for (i = 1; i < n; i++) {
1193:     nz  = ai[i + 1] - ai[i];
1194:     sum = b[r[i]];
1195:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1196:     tmp[i] = sum;
1197:     v += nz;
1198:     vi += nz;
1199:   }

1201:   /* backward solve the upper triangular */
1202:   v  = aa + adiag[n - 1];
1203:   vi = aj + adiag[n - 1];
1204:   for (i = n - 1; i >= 0; i--) {
1205:     nz  = adiag[i] - adiag[i + 1] - 1;
1206:     sum = tmp[i];
1207:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1208:     tmp[i] = sum * v[nz];
1209:     x[c[i]] += tmp[i];
1210:     v += nz + 1;
1211:     vi += nz + 1;
1212:   }

1214:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1215:   PetscCall(ISRestoreIndices(isrow, &rout));
1216:   PetscCall(ISRestoreIndices(iscol, &cout));
1217:   PetscCall(VecRestoreArrayRead(bb, &b));
1218:   PetscCall(VecRestoreArray(xx, &x));
1219:   PetscCall(PetscLogFlops(2.0 * a->nz));
1220:   PetscFunctionReturn(PETSC_SUCCESS);
1221: }

1223: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1224: {
1225:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1226:   IS                 iscol = a->col, isrow = a->row;
1227:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1228:   PetscInt           i, n = A->rmap->n, j;
1229:   PetscInt           nz;
1230:   PetscScalar       *x, *tmp, s1;
1231:   const MatScalar   *aa, *v;
1232:   const PetscScalar *b;

1234:   PetscFunctionBegin;
1235:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1236:   PetscCall(VecGetArrayRead(bb, &b));
1237:   PetscCall(VecGetArrayWrite(xx, &x));
1238:   tmp = a->solve_work;

1240:   PetscCall(ISGetIndices(isrow, &rout));
1241:   r = rout;
1242:   PetscCall(ISGetIndices(iscol, &cout));
1243:   c = cout;

1245:   /* copy the b into temp work space according to permutation */
1246:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1248:   /* forward solve the U^T */
1249:   for (i = 0; i < n; i++) {
1250:     v  = aa + diag[i];
1251:     vi = aj + diag[i] + 1;
1252:     nz = ai[i + 1] - diag[i] - 1;
1253:     s1 = tmp[i];
1254:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1255:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1256:     tmp[i] = s1;
1257:   }

1259:   /* backward solve the L^T */
1260:   for (i = n - 1; i >= 0; i--) {
1261:     v  = aa + diag[i] - 1;
1262:     vi = aj + diag[i] - 1;
1263:     nz = diag[i] - ai[i];
1264:     s1 = tmp[i];
1265:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1266:   }

1268:   /* copy tmp into x according to permutation */
1269:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1271:   PetscCall(ISRestoreIndices(isrow, &rout));
1272:   PetscCall(ISRestoreIndices(iscol, &cout));
1273:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1274:   PetscCall(VecRestoreArrayRead(bb, &b));
1275:   PetscCall(VecRestoreArrayWrite(xx, &x));

1277:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1278:   PetscFunctionReturn(PETSC_SUCCESS);
1279: }

1281: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1282: {
1283:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1284:   IS                 iscol = a->col, isrow = a->row;
1285:   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1286:   PetscInt           i, n = A->rmap->n, j;
1287:   PetscInt           nz;
1288:   PetscScalar       *x, *tmp, s1;
1289:   const MatScalar   *aa, *v;
1290:   const PetscScalar *b;

1292:   PetscFunctionBegin;
1293:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1294:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1295:   PetscCall(VecGetArrayRead(bb, &b));
1296:   PetscCall(VecGetArrayWrite(xx, &x));
1297:   tmp = a->solve_work;

1299:   PetscCall(ISGetIndices(isrow, &rout));
1300:   r = rout;
1301:   PetscCall(ISGetIndices(iscol, &cout));
1302:   c = cout;

1304:   /* copy the b into temp work space according to permutation */
1305:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1307:   /* forward solve the U^T */
1308:   for (i = 0; i < n; i++) {
1309:     v  = aa + adiag[i + 1] + 1;
1310:     vi = aj + adiag[i + 1] + 1;
1311:     nz = adiag[i] - adiag[i + 1] - 1;
1312:     s1 = tmp[i];
1313:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1314:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1315:     tmp[i] = s1;
1316:   }

1318:   /* backward solve the L^T */
1319:   for (i = n - 1; i >= 0; i--) {
1320:     v  = aa + ai[i];
1321:     vi = aj + ai[i];
1322:     nz = ai[i + 1] - ai[i];
1323:     s1 = tmp[i];
1324:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1325:   }

1327:   /* copy tmp into x according to permutation */
1328:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1330:   PetscCall(ISRestoreIndices(isrow, &rout));
1331:   PetscCall(ISRestoreIndices(iscol, &cout));
1332:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1333:   PetscCall(VecRestoreArrayRead(bb, &b));
1334:   PetscCall(VecRestoreArrayWrite(xx, &x));

1336:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1337:   PetscFunctionReturn(PETSC_SUCCESS);
1338: }

1340: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1341: {
1342:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1343:   IS                 iscol = a->col, isrow = a->row;
1344:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1345:   PetscInt           i, n = A->rmap->n, j;
1346:   PetscInt           nz;
1347:   PetscScalar       *x, *tmp, s1;
1348:   const MatScalar   *aa, *v;
1349:   const PetscScalar *b;

1351:   PetscFunctionBegin;
1352:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1353:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1354:   PetscCall(VecGetArrayRead(bb, &b));
1355:   PetscCall(VecGetArray(xx, &x));
1356:   tmp = a->solve_work;

1358:   PetscCall(ISGetIndices(isrow, &rout));
1359:   r = rout;
1360:   PetscCall(ISGetIndices(iscol, &cout));
1361:   c = cout;

1363:   /* copy the b into temp work space according to permutation */
1364:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1366:   /* forward solve the U^T */
1367:   for (i = 0; i < n; i++) {
1368:     v  = aa + diag[i];
1369:     vi = aj + diag[i] + 1;
1370:     nz = ai[i + 1] - diag[i] - 1;
1371:     s1 = tmp[i];
1372:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1373:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1374:     tmp[i] = s1;
1375:   }

1377:   /* backward solve the L^T */
1378:   for (i = n - 1; i >= 0; i--) {
1379:     v  = aa + diag[i] - 1;
1380:     vi = aj + diag[i] - 1;
1381:     nz = diag[i] - ai[i];
1382:     s1 = tmp[i];
1383:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1384:   }

1386:   /* copy tmp into x according to permutation */
1387:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1389:   PetscCall(ISRestoreIndices(isrow, &rout));
1390:   PetscCall(ISRestoreIndices(iscol, &cout));
1391:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1392:   PetscCall(VecRestoreArrayRead(bb, &b));
1393:   PetscCall(VecRestoreArray(xx, &x));

1395:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1396:   PetscFunctionReturn(PETSC_SUCCESS);
1397: }

1399: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1400: {
1401:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1402:   IS                 iscol = a->col, isrow = a->row;
1403:   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1404:   PetscInt           i, n = A->rmap->n, j;
1405:   PetscInt           nz;
1406:   PetscScalar       *x, *tmp, s1;
1407:   const MatScalar   *aa, *v;
1408:   const PetscScalar *b;

1410:   PetscFunctionBegin;
1411:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1412:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1413:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1414:   PetscCall(VecGetArrayRead(bb, &b));
1415:   PetscCall(VecGetArray(xx, &x));
1416:   tmp = a->solve_work;

1418:   PetscCall(ISGetIndices(isrow, &rout));
1419:   r = rout;
1420:   PetscCall(ISGetIndices(iscol, &cout));
1421:   c = cout;

1423:   /* copy the b into temp work space according to permutation */
1424:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1426:   /* forward solve the U^T */
1427:   for (i = 0; i < n; i++) {
1428:     v  = aa + adiag[i + 1] + 1;
1429:     vi = aj + adiag[i + 1] + 1;
1430:     nz = adiag[i] - adiag[i + 1] - 1;
1431:     s1 = tmp[i];
1432:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1433:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1434:     tmp[i] = s1;
1435:   }

1437:   /* backward solve the L^T */
1438:   for (i = n - 1; i >= 0; i--) {
1439:     v  = aa + ai[i];
1440:     vi = aj + ai[i];
1441:     nz = ai[i + 1] - ai[i];
1442:     s1 = tmp[i];
1443:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444:   }

1446:   /* copy tmp into x according to permutation */
1447:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1449:   PetscCall(ISRestoreIndices(isrow, &rout));
1450:   PetscCall(ISRestoreIndices(iscol, &cout));
1451:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1452:   PetscCall(VecRestoreArrayRead(bb, &b));
1453:   PetscCall(VecRestoreArray(xx, &x));

1455:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1456:   PetscFunctionReturn(PETSC_SUCCESS);
1457: }

1459: /*
1460:    ilu() under revised new data structure.
1461:    Factored arrays bj and ba are stored as
1462:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1464:    bi=fact->i is an array of size n+1, in which
1465:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1466:      bi[n]:  points to L(n-1,n-1)+1

1468:   bdiag=fact->diag is an array of size n+1,in which
1469:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1470:      bdiag[n]: points to entry of U(n-1,0)-1

1472:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1473:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1474: */
1475: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1476: {
1477:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1478:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1479:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1480:   IS             isicol;

1482:   PetscFunctionBegin;
1483:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1484:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1485:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1486:   b = (Mat_SeqAIJ *)fact->data;

1488:   /* allocate matrix arrays for new data structure */
1489:   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1490:   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1491:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1492:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1493:   b->free_a  = PETSC_TRUE;
1494:   b->free_ij = PETSC_TRUE;

1496:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1497:   bdiag = b->diag;

1499:   /* set bi and bj with new data structure */
1500:   bi = b->i;
1501:   bj = b->j;

1503:   /* L part */
1504:   bi[0] = 0;
1505:   for (i = 0; i < n; i++) {
1506:     nz        = adiag[i] - ai[i];
1507:     bi[i + 1] = bi[i] + nz;
1508:     aj        = a->j + ai[i];
1509:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1510:   }

1512:   /* U part */
1513:   bdiag[n] = bi[n] - 1;
1514:   for (i = n - 1; i >= 0; i--) {
1515:     nz = ai[i + 1] - adiag[i] - 1;
1516:     aj = a->j + adiag[i] + 1;
1517:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1518:     /* diag[i] */
1519:     bj[k++]  = i;
1520:     bdiag[i] = bdiag[i + 1] + nz + 1;
1521:   }

1523:   fact->factortype             = MAT_FACTOR_ILU;
1524:   fact->info.factor_mallocs    = 0;
1525:   fact->info.fill_ratio_given  = info->fill;
1526:   fact->info.fill_ratio_needed = 1.0;
1527:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1528:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1530:   b       = (Mat_SeqAIJ *)fact->data;
1531:   b->row  = isrow;
1532:   b->col  = iscol;
1533:   b->icol = isicol;
1534:   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1535:   PetscCall(PetscObjectReference((PetscObject)isrow));
1536:   PetscCall(PetscObjectReference((PetscObject)iscol));
1537:   PetscFunctionReturn(PETSC_SUCCESS);
1538: }

1540: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1541: {
1542:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1543:   IS                 isicol;
1544:   const PetscInt    *r, *ic;
1545:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1546:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1547:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1548:   PetscInt           i, levels, diagonal_fill;
1549:   PetscBool          col_identity, row_identity;
1550:   PetscReal          f;
1551:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1552:   PetscBT            lnkbt;
1553:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1554:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1555:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1556:   PetscBool          diagDense;

1558:   PetscFunctionBegin;
1559:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1560:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1561:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

1563:   levels = (PetscInt)info->levels;
1564:   PetscCall(ISIdentity(isrow, &row_identity));
1565:   PetscCall(ISIdentity(iscol, &col_identity));
1566:   if (!levels && row_identity && col_identity) {
1567:     /* special case: ilu(0) with natural ordering */
1568:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1569:     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1570:     PetscFunctionReturn(PETSC_SUCCESS);
1571:   }

1573:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1574:   PetscCall(ISGetIndices(isrow, &r));
1575:   PetscCall(ISGetIndices(isicol, &ic));

1577:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1578:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1579:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1580:   bi[0] = bdiag[0] = 0;
1581:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1583:   /* create a linked list for storing column indices of the active row */
1584:   nlnk = n + 1;
1585:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1587:   /* initial FreeSpace size is f*(ai[n]+1) */
1588:   f             = info->fill;
1589:   diagonal_fill = (PetscInt)info->diagonal_fill;
1590:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1591:   current_space = free_space;
1592:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1593:   current_space_lvl = free_space_lvl;
1594:   for (i = 0; i < n; i++) {
1595:     nzi = 0;
1596:     /* copy current row into linked list */
1597:     nnz = ai[r[i] + 1] - ai[r[i]];
1598:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1599:     cols   = aj + ai[r[i]];
1600:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1601:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1602:     nzi += nlnk;

1604:     /* make sure diagonal entry is included */
1605:     if (diagonal_fill && lnk[i] == -1) {
1606:       fm = n;
1607:       while (lnk[fm] < i) fm = lnk[fm];
1608:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1609:       lnk[fm]    = i;
1610:       lnk_lvl[i] = 0;
1611:       nzi++;
1612:       dcount++;
1613:     }

1615:     /* add pivot rows into the active row */
1616:     nzbd = 0;
1617:     prow = lnk[n];
1618:     while (prow < i) {
1619:       nnz      = bdiag[prow];
1620:       cols     = bj_ptr[prow] + nnz + 1;
1621:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1622:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1623:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1624:       nzi += nlnk;
1625:       prow = lnk[prow];
1626:       nzbd++;
1627:     }
1628:     bdiag[i]  = nzbd;
1629:     bi[i + 1] = bi[i] + nzi;
1630:     /* if free space is not available, make more free space */
1631:     if (current_space->local_remaining < nzi) {
1632:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1633:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1634:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1635:       reallocs++;
1636:     }

1638:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1639:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1640:     bj_ptr[i]    = current_space->array;
1641:     bjlvl_ptr[i] = current_space_lvl->array;

1643:     /* make sure the active row i has diagonal entry */
1644:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1646:     current_space->array += nzi;
1647:     current_space->local_used += nzi;
1648:     current_space->local_remaining -= nzi;
1649:     current_space_lvl->array += nzi;
1650:     current_space_lvl->local_used += nzi;
1651:     current_space_lvl->local_remaining -= nzi;
1652:   }

1654:   PetscCall(ISRestoreIndices(isrow, &r));
1655:   PetscCall(ISRestoreIndices(isicol, &ic));
1656:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1657:   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1658:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1660:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1661:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1662:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1664: #if defined(PETSC_USE_INFO)
1665:   {
1666:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1667:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1668:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1669:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1670:     PetscCall(PetscInfo(A, "for best performance.\n"));
1671:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1672:   }
1673: #endif
1674:   /* put together the new matrix */
1675:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1676:   b          = (Mat_SeqAIJ *)fact->data;
1677:   b->free_ij = PETSC_TRUE;
1678:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1679:   b->free_a = PETSC_TRUE;
1680:   b->j      = bj;
1681:   b->i      = bi;
1682:   b->diag   = bdiag;
1683:   b->ilen   = NULL;
1684:   b->imax   = NULL;
1685:   b->row    = isrow;
1686:   b->col    = iscol;
1687:   PetscCall(PetscObjectReference((PetscObject)isrow));
1688:   PetscCall(PetscObjectReference((PetscObject)iscol));
1689:   b->icol = isicol;

1691:   PetscCall(PetscMalloc1(n, &b->solve_work));
1692:   /* In b structure:  Free imax, ilen, old a, old j.
1693:      Allocate bdiag, solve_work, new a, new j */
1694:   b->maxnz = b->nz = bdiag[0] + 1;

1696:   fact->info.factor_mallocs    = reallocs;
1697:   fact->info.fill_ratio_given  = f;
1698:   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1699:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1700:   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1701:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1702:   PetscFunctionReturn(PETSC_SUCCESS);
1703: }

1705: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1706: {
1707:   Mat              C  = B;
1708:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1709:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1710:   IS               ip = b->row, iip = b->icol;
1711:   const PetscInt  *rip, *riip;
1712:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1713:   PetscInt        *ai = a->i, *aj = a->j;
1714:   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1715:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1716:   PetscBool        perm_identity;
1717:   FactorShiftCtx   sctx;
1718:   PetscReal        rs;
1719:   const MatScalar *aa, *v;
1720:   MatScalar        d;
1721:   const PetscInt  *adiag;

1723:   PetscFunctionBegin;
1724:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1725:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1726:   /* MatPivotSetUp(): initialize shift context sctx */
1727:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1729:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1730:     sctx.shift_top = info->zeropivot;
1731:     for (i = 0; i < mbs; i++) {
1732:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1733:       d  = aa[adiag[i]];
1734:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1735:       v  = aa + ai[i];
1736:       nz = ai[i + 1] - ai[i];
1737:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1738:       if (rs > sctx.shift_top) sctx.shift_top = rs;
1739:     }
1740:     sctx.shift_top *= 1.1;
1741:     sctx.nshift_max = 5;
1742:     sctx.shift_lo   = 0.;
1743:     sctx.shift_hi   = 1.;
1744:   }

1746:   PetscCall(ISGetIndices(ip, &rip));
1747:   PetscCall(ISGetIndices(iip, &riip));

1749:   /* allocate working arrays
1750:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1751:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1752:   */
1753:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

1755:   do {
1756:     sctx.newshift = PETSC_FALSE;

1758:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1759:     if (mbs) il[0] = 0;

1761:     for (k = 0; k < mbs; k++) {
1762:       /* zero rtmp */
1763:       nz    = bi[k + 1] - bi[k];
1764:       bjtmp = bj + bi[k];
1765:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1767:       /* load in initial unfactored row */
1768:       bval = ba + bi[k];
1769:       jmin = ai[rip[k]];
1770:       jmax = ai[rip[k] + 1];
1771:       for (j = jmin; j < jmax; j++) {
1772:         col = riip[aj[j]];
1773:         if (col >= k) { /* only take upper triangular entry */
1774:           rtmp[col] = aa[j];
1775:           *bval++   = 0.0; /* for in-place factorization */
1776:         }
1777:       }
1778:       /* shift the diagonal of the matrix: ZeropivotApply() */
1779:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1781:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1782:       dk = rtmp[k];
1783:       i  = c2r[k]; /* first row to be added to k_th row  */

1785:       while (i < k) {
1786:         nexti = c2r[i]; /* next row to be added to k_th row */

1788:         /* compute multiplier, update diag(k) and U(i,k) */
1789:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1790:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1791:         dk += uikdi * ba[ili];           /* update diag[k] */
1792:         ba[ili] = uikdi;                 /* -U(i,k) */

1794:         /* add multiple of row i to k-th row */
1795:         jmin = ili + 1;
1796:         jmax = bi[i + 1];
1797:         if (jmin < jmax) {
1798:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1799:           /* update il and c2r for row i */
1800:           il[i]  = jmin;
1801:           j      = bj[jmin];
1802:           c2r[i] = c2r[j];
1803:           c2r[j] = i;
1804:         }
1805:         i = nexti;
1806:       }

1808:       /* copy data into U(k,:) */
1809:       rs   = 0.0;
1810:       jmin = bi[k];
1811:       jmax = bi[k + 1] - 1;
1812:       if (jmin < jmax) {
1813:         for (j = jmin; j < jmax; j++) {
1814:           col   = bj[j];
1815:           ba[j] = rtmp[col];
1816:           rs += PetscAbsScalar(ba[j]);
1817:         }
1818:         /* add the k-th row into il and c2r */
1819:         il[k]  = jmin;
1820:         i      = bj[jmin];
1821:         c2r[k] = c2r[i];
1822:         c2r[i] = k;
1823:       }

1825:       /* MatPivotCheck() */
1826:       sctx.rs = rs;
1827:       sctx.pv = dk;
1828:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1829:       if (sctx.newshift) break;
1830:       dk = sctx.pv;

1832:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1833:     }
1834:   } while (sctx.newshift);

1836:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1837:   PetscCall(PetscFree3(rtmp, il, c2r));
1838:   PetscCall(ISRestoreIndices(ip, &rip));
1839:   PetscCall(ISRestoreIndices(iip, &riip));

1841:   PetscCall(ISIdentity(ip, &perm_identity));
1842:   if (perm_identity) {
1843:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1844:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1845:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1846:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1847:   } else {
1848:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
1849:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1850:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
1851:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
1852:   }

1854:   C->assembled    = PETSC_TRUE;
1855:   C->preallocated = PETSC_TRUE;

1857:   PetscCall(PetscLogFlops(C->rmap->n));

1859:   /* MatPivotView() */
1860:   if (sctx.nshift) {
1861:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1862:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1863:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1864:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1865:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1866:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1867:     }
1868:   }
1869:   PetscFunctionReturn(PETSC_SUCCESS);
1870: }

1872: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1873: {
1874:   Mat              C  = B;
1875:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1876:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1877:   IS               ip = b->row, iip = b->icol;
1878:   const PetscInt  *rip, *riip;
1879:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1880:   PetscInt        *ai = a->i, *aj = a->j;
1881:   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1882:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1883:   const MatScalar *aa, *v;
1884:   PetscBool        perm_identity;
1885:   FactorShiftCtx   sctx;
1886:   PetscReal        rs;
1887:   MatScalar        d;
1888:   const PetscInt  *adiag;

1890:   PetscFunctionBegin;
1891:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1892:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1893:   /* MatPivotSetUp(): initialize shift context sctx */
1894:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1896:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1897:     sctx.shift_top = info->zeropivot;
1898:     for (i = 0; i < mbs; i++) {
1899:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1900:       d  = aa[adiag[i]];
1901:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1902:       v  = aa + ai[i];
1903:       nz = ai[i + 1] - ai[i];
1904:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1905:       if (rs > sctx.shift_top) sctx.shift_top = rs;
1906:     }
1907:     sctx.shift_top *= 1.1;
1908:     sctx.nshift_max = 5;
1909:     sctx.shift_lo   = 0.;
1910:     sctx.shift_hi   = 1.;
1911:   }

1913:   PetscCall(ISGetIndices(ip, &rip));
1914:   PetscCall(ISGetIndices(iip, &riip));

1916:   /* initialization */
1917:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

1919:   do {
1920:     sctx.newshift = PETSC_FALSE;

1922:     for (i = 0; i < mbs; i++) jl[i] = mbs;
1923:     il[0] = 0;

1925:     for (k = 0; k < mbs; k++) {
1926:       /* zero rtmp */
1927:       nz    = bi[k + 1] - bi[k];
1928:       bjtmp = bj + bi[k];
1929:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1931:       bval = ba + bi[k];
1932:       /* initialize k-th row by the perm[k]-th row of A */
1933:       jmin = ai[rip[k]];
1934:       jmax = ai[rip[k] + 1];
1935:       for (j = jmin; j < jmax; j++) {
1936:         col = riip[aj[j]];
1937:         if (col >= k) { /* only take upper triangular entry */
1938:           rtmp[col] = aa[j];
1939:           *bval++   = 0.0; /* for in-place factorization */
1940:         }
1941:       }
1942:       /* shift the diagonal of the matrix */
1943:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1945:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1946:       dk = rtmp[k];
1947:       i  = jl[k]; /* first row to be added to k_th row  */

1949:       while (i < k) {
1950:         nexti = jl[i]; /* next row to be added to k_th row */

1952:         /* compute multiplier, update diag(k) and U(i,k) */
1953:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1954:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1955:         dk += uikdi * ba[ili];
1956:         ba[ili] = uikdi; /* -U(i,k) */

1958:         /* add multiple of row i to k-th row */
1959:         jmin = ili + 1;
1960:         jmax = bi[i + 1];
1961:         if (jmin < jmax) {
1962:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1963:           /* update il and jl for row i */
1964:           il[i] = jmin;
1965:           j     = bj[jmin];
1966:           jl[i] = jl[j];
1967:           jl[j] = i;
1968:         }
1969:         i = nexti;
1970:       }

1972:       /* shift the diagonals when zero pivot is detected */
1973:       /* compute rs=sum of abs(off-diagonal) */
1974:       rs   = 0.0;
1975:       jmin = bi[k] + 1;
1976:       nz   = bi[k + 1] - jmin;
1977:       bcol = bj + jmin;
1978:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

1980:       sctx.rs = rs;
1981:       sctx.pv = dk;
1982:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1983:       if (sctx.newshift) break;
1984:       dk = sctx.pv;

1986:       /* copy data into U(k,:) */
1987:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1988:       jmin      = bi[k] + 1;
1989:       jmax      = bi[k + 1];
1990:       if (jmin < jmax) {
1991:         for (j = jmin; j < jmax; j++) {
1992:           col   = bj[j];
1993:           ba[j] = rtmp[col];
1994:         }
1995:         /* add the k-th row into il and jl */
1996:         il[k] = jmin;
1997:         i     = bj[jmin];
1998:         jl[k] = jl[i];
1999:         jl[i] = k;
2000:       }
2001:     }
2002:   } while (sctx.newshift);

2004:   PetscCall(PetscFree3(rtmp, il, jl));
2005:   PetscCall(ISRestoreIndices(ip, &rip));
2006:   PetscCall(ISRestoreIndices(iip, &riip));
2007:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));

2009:   PetscCall(ISIdentity(ip, &perm_identity));
2010:   if (perm_identity) {
2011:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2014:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2015:   } else {
2016:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2017:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2018:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2019:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2020:   }

2022:   C->assembled    = PETSC_TRUE;
2023:   C->preallocated = PETSC_TRUE;

2025:   PetscCall(PetscLogFlops(C->rmap->n));
2026:   if (sctx.nshift) {
2027:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2028:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2030:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2031:     }
2032:   }
2033:   PetscFunctionReturn(PETSC_SUCCESS);
2034: }

2036: /*
2037:    icc() under revised new data structure.
2038:    Factored arrays bj and ba are stored as
2039:      U(0,:),...,U(i,:),U(n-1,:)

2041:    ui=fact->i is an array of size n+1, in which
2042:    ui+
2043:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2044:      ui[n]:  points to U(n-1,n-1)+1

2046:   udiag=fact->diag is an array of size n,in which
2047:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2049:    U(i,:) contains udiag[i] as its last entry, i.e.,
2050:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2051: */

2053: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2054: {
2055:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2056:   Mat_SeqSBAIJ      *b;
2057:   PetscBool          perm_identity;
2058:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2059:   const PetscInt    *rip, *riip, *adiag;
2060:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2061:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
2062:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2063:   PetscReal          fill = info->fill, levels = info->levels;
2064:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2065:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2066:   PetscBT            lnkbt;
2067:   IS                 iperm;
2068:   PetscBool          diagDense;

2070:   PetscFunctionBegin;
2071:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2072:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2073:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2074:   PetscCall(ISIdentity(perm, &perm_identity));
2075:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2077:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2078:   PetscCall(PetscMalloc1(am + 1, &udiag));
2079:   ui[0] = 0;

2081:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2082:   if (!levels && perm_identity) {
2083:     for (i = 0; i < am; i++) {
2084:       ncols     = ai[i + 1] - adiag[i];
2085:       ui[i + 1] = ui[i] + ncols;
2086:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2087:     }
2088:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2089:     cols = uj;
2090:     for (i = 0; i < am; i++) {
2091:       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2092:       ncols = ai[i + 1] - adiag[i] - 1;
2093:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2094:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2095:     }
2096:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2097:     PetscCall(ISGetIndices(iperm, &riip));
2098:     PetscCall(ISGetIndices(perm, &rip));

2100:     /* initialization */
2101:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2103:     /* jl: linked list for storing indices of the pivot rows
2104:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2105:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2106:     for (i = 0; i < am; i++) {
2107:       jl[i] = am;
2108:       il[i] = 0;
2109:     }

2111:     /* create and initialize a linked list for storing column indices of the active row k */
2112:     nlnk = am + 1;
2113:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2115:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2116:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2117:     current_space = free_space;
2118:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2119:     current_space_lvl = free_space_lvl;

2121:     for (k = 0; k < am; k++) { /* for each active row k */
2122:       /* initialize lnk by the column indices of row rip[k] of A */
2123:       nzk   = 0;
2124:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2125:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2126:       ncols_upper = 0;
2127:       for (j = 0; j < ncols; j++) {
2128:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2129:         if (riip[i] >= k) {         /* only take upper triangular entry */
2130:           ajtmp[ncols_upper] = i;
2131:           ncols_upper++;
2132:         }
2133:       }
2134:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2135:       nzk += nlnk;

2137:       /* update lnk by computing fill-in for each pivot row to be merged in */
2138:       prow = jl[k]; /* 1st pivot row */

2140:       while (prow < k) {
2141:         nextprow = jl[prow];

2143:         /* merge prow into k-th row */
2144:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2145:         jmax  = ui[prow + 1];
2146:         ncols = jmax - jmin;
2147:         i     = jmin - ui[prow];
2148:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2149:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2150:         j     = *(uj - 1);
2151:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2152:         nzk += nlnk;

2154:         /* update il and jl for prow */
2155:         if (jmin < jmax) {
2156:           il[prow] = jmin;
2157:           j        = *cols;
2158:           jl[prow] = jl[j];
2159:           jl[j]    = prow;
2160:         }
2161:         prow = nextprow;
2162:       }

2164:       /* if free space is not available, make more free space */
2165:       if (current_space->local_remaining < nzk) {
2166:         i = am - k + 1;                                    /* num of unfactored rows */
2167:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2168:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2169:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2170:         reallocs++;
2171:       }

2173:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2174:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2175:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2177:       /* add the k-th row into il and jl */
2178:       if (nzk > 1) {
2179:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2180:         jl[k] = jl[i];
2181:         jl[i] = k;
2182:         il[k] = ui[k] + 1;
2183:       }
2184:       uj_ptr[k]     = current_space->array;
2185:       uj_lvl_ptr[k] = current_space_lvl->array;

2187:       current_space->array += nzk;
2188:       current_space->local_used += nzk;
2189:       current_space->local_remaining -= nzk;

2191:       current_space_lvl->array += nzk;
2192:       current_space_lvl->local_used += nzk;
2193:       current_space_lvl->local_remaining -= nzk;

2195:       ui[k + 1] = ui[k] + nzk;
2196:     }

2198:     PetscCall(ISRestoreIndices(perm, &rip));
2199:     PetscCall(ISRestoreIndices(iperm, &riip));
2200:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2201:     PetscCall(PetscFree(ajtmp));

2203:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2204:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2205:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2206:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2207:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2209:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2211:   /* put together the new matrix in MATSEQSBAIJ format */
2212:   b          = (Mat_SeqSBAIJ *)fact->data;
2213:   b->free_ij = PETSC_TRUE;
2214:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2215:   b->free_a = PETSC_TRUE;
2216:   b->j      = uj;
2217:   b->i      = ui;
2218:   b->diag   = udiag;
2219:   b->ilen   = NULL;
2220:   b->imax   = NULL;
2221:   b->row    = perm;
2222:   b->col    = perm;
2223:   PetscCall(PetscObjectReference((PetscObject)perm));
2224:   PetscCall(PetscObjectReference((PetscObject)perm));
2225:   b->icol          = iperm;
2226:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2228:   PetscCall(PetscMalloc1(am, &b->solve_work));

2230:   b->maxnz = b->nz = ui[am];

2232:   fact->info.factor_mallocs   = reallocs;
2233:   fact->info.fill_ratio_given = fill;
2234:   if (ai[am] != 0) {
2235:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2236:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2237:   } else {
2238:     fact->info.fill_ratio_needed = 0.0;
2239:   }
2240: #if defined(PETSC_USE_INFO)
2241:   if (ai[am] != 0) {
2242:     PetscReal af = fact->info.fill_ratio_needed;
2243:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2244:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2245:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2246:   } else {
2247:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2248:   }
2249: #endif
2250:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2251:   PetscFunctionReturn(PETSC_SUCCESS);
2252: }

2254: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2255: {
2256:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2257:   Mat_SeqSBAIJ      *b;
2258:   PetscBool          perm_identity;
2259:   PetscReal          fill = info->fill;
2260:   const PetscInt    *rip, *riip;
2261:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2262:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2263:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2264:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2265:   PetscBT            lnkbt;
2266:   IS                 iperm;
2267:   PetscBool          diagDense;

2269:   PetscFunctionBegin;
2270:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2271:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2272:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

2274:   /* check whether perm is the identity mapping */
2275:   PetscCall(ISIdentity(perm, &perm_identity));
2276:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2277:   PetscCall(ISGetIndices(iperm, &riip));
2278:   PetscCall(ISGetIndices(perm, &rip));

2280:   /* initialization */
2281:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2282:   PetscCall(PetscMalloc1(am + 1, &udiag));
2283:   ui[0] = 0;

2285:   /* jl: linked list for storing indices of the pivot rows
2286:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2287:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2288:   for (i = 0; i < am; i++) {
2289:     jl[i] = am;
2290:     il[i] = 0;
2291:   }

2293:   /* create and initialize a linked list for storing column indices of the active row k */
2294:   nlnk = am + 1;
2295:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2297:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2298:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2299:   current_space = free_space;

2301:   for (k = 0; k < am; k++) { /* for each active row k */
2302:     /* initialize lnk by the column indices of row rip[k] of A */
2303:     nzk   = 0;
2304:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2305:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2306:     ncols_upper = 0;
2307:     for (j = 0; j < ncols; j++) {
2308:       i = riip[*(aj + ai[rip[k]] + j)];
2309:       if (i >= k) { /* only take upper triangular entry */
2310:         cols[ncols_upper] = i;
2311:         ncols_upper++;
2312:       }
2313:     }
2314:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2315:     nzk += nlnk;

2317:     /* update lnk by computing fill-in for each pivot row to be merged in */
2318:     prow = jl[k]; /* 1st pivot row */

2320:     while (prow < k) {
2321:       nextprow = jl[prow];
2322:       /* merge prow into k-th row */
2323:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2324:       jmax   = ui[prow + 1];
2325:       ncols  = jmax - jmin;
2326:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2327:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2328:       nzk += nlnk;

2330:       /* update il and jl for prow */
2331:       if (jmin < jmax) {
2332:         il[prow] = jmin;
2333:         j        = *uj_ptr;
2334:         jl[prow] = jl[j];
2335:         jl[j]    = prow;
2336:       }
2337:       prow = nextprow;
2338:     }

2340:     /* if free space is not available, make more free space */
2341:     if (current_space->local_remaining < nzk) {
2342:       i = am - k + 1;                                    /* num of unfactored rows */
2343:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2344:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2345:       reallocs++;
2346:     }

2348:     /* copy data into free space, then initialize lnk */
2349:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2351:     /* add the k-th row into il and jl */
2352:     if (nzk > 1) {
2353:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2354:       jl[k] = jl[i];
2355:       jl[i] = k;
2356:       il[k] = ui[k] + 1;
2357:     }
2358:     ui_ptr[k] = current_space->array;

2360:     current_space->array += nzk;
2361:     current_space->local_used += nzk;
2362:     current_space->local_remaining -= nzk;

2364:     ui[k + 1] = ui[k] + nzk;
2365:   }

2367:   PetscCall(ISRestoreIndices(perm, &rip));
2368:   PetscCall(ISRestoreIndices(iperm, &riip));
2369:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2371:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2372:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2373:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2374:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2376:   /* put together the new matrix in MATSEQSBAIJ format */
2377:   b          = (Mat_SeqSBAIJ *)fact->data;
2378:   b->free_ij = PETSC_TRUE;
2379:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2380:   b->free_a = PETSC_TRUE;
2381:   b->j      = uj;
2382:   b->i      = ui;
2383:   b->diag   = udiag;
2384:   b->ilen   = NULL;
2385:   b->imax   = NULL;
2386:   b->row    = perm;
2387:   b->col    = perm;

2389:   PetscCall(PetscObjectReference((PetscObject)perm));
2390:   PetscCall(PetscObjectReference((PetscObject)perm));

2392:   b->icol          = iperm;
2393:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2395:   PetscCall(PetscMalloc1(am, &b->solve_work));

2397:   b->maxnz = b->nz = ui[am];

2399:   fact->info.factor_mallocs   = reallocs;
2400:   fact->info.fill_ratio_given = fill;
2401:   if (ai[am] != 0) {
2402:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2403:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2404:   } else {
2405:     fact->info.fill_ratio_needed = 0.0;
2406:   }
2407: #if defined(PETSC_USE_INFO)
2408:   if (ai[am] != 0) {
2409:     PetscReal af = fact->info.fill_ratio_needed;
2410:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2411:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2412:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2413:   } else {
2414:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2415:   }
2416: #endif
2417:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2418:   PetscFunctionReturn(PETSC_SUCCESS);
2419: }

2421: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2422: {
2423:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2424:   PetscInt           n  = A->rmap->n;
2425:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2426:   PetscScalar       *x, sum;
2427:   const PetscScalar *b;
2428:   const MatScalar   *aa, *v;
2429:   PetscInt           i, nz;

2431:   PetscFunctionBegin;
2432:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

2434:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2435:   PetscCall(VecGetArrayRead(bb, &b));
2436:   PetscCall(VecGetArrayWrite(xx, &x));

2438:   /* forward solve the lower triangular */
2439:   x[0] = b[0];
2440:   v    = aa;
2441:   vi   = aj;
2442:   for (i = 1; i < n; i++) {
2443:     nz  = ai[i + 1] - ai[i];
2444:     sum = b[i];
2445:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2446:     v += nz;
2447:     vi += nz;
2448:     x[i] = sum;
2449:   }

2451:   /* backward solve the upper triangular */
2452:   for (i = n - 1; i >= 0; i--) {
2453:     v   = aa + adiag[i + 1] + 1;
2454:     vi  = aj + adiag[i + 1] + 1;
2455:     nz  = adiag[i] - adiag[i + 1] - 1;
2456:     sum = x[i];
2457:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2458:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2459:   }

2461:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2462:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2463:   PetscCall(VecRestoreArrayRead(bb, &b));
2464:   PetscCall(VecRestoreArrayWrite(xx, &x));
2465:   PetscFunctionReturn(PETSC_SUCCESS);
2466: }

2468: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2469: {
2470:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2471:   IS                 iscol = a->col, isrow = a->row;
2472:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2473:   const PetscInt    *rout, *cout, *r, *c;
2474:   PetscScalar       *x, *tmp, sum;
2475:   const PetscScalar *b;
2476:   const MatScalar   *aa, *v;

2478:   PetscFunctionBegin;
2479:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

2481:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2482:   PetscCall(VecGetArrayRead(bb, &b));
2483:   PetscCall(VecGetArrayWrite(xx, &x));
2484:   tmp = a->solve_work;

2486:   PetscCall(ISGetIndices(isrow, &rout));
2487:   r = rout;
2488:   PetscCall(ISGetIndices(iscol, &cout));
2489:   c = cout;

2491:   /* forward solve the lower triangular */
2492:   tmp[0] = b[r[0]];
2493:   v      = aa;
2494:   vi     = aj;
2495:   for (i = 1; i < n; i++) {
2496:     nz  = ai[i + 1] - ai[i];
2497:     sum = b[r[i]];
2498:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2499:     tmp[i] = sum;
2500:     v += nz;
2501:     vi += nz;
2502:   }

2504:   /* backward solve the upper triangular */
2505:   for (i = n - 1; i >= 0; i--) {
2506:     v   = aa + adiag[i + 1] + 1;
2507:     vi  = aj + adiag[i + 1] + 1;
2508:     nz  = adiag[i] - adiag[i + 1] - 1;
2509:     sum = tmp[i];
2510:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2511:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2512:   }

2514:   PetscCall(ISRestoreIndices(isrow, &rout));
2515:   PetscCall(ISRestoreIndices(iscol, &cout));
2516:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2517:   PetscCall(VecRestoreArrayRead(bb, &b));
2518:   PetscCall(VecRestoreArrayWrite(xx, &x));
2519:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2520:   PetscFunctionReturn(PETSC_SUCCESS);
2521: }