Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
  7: {
  8:   PetscFunctionBegin;
  9:   *type = MATSOLVERPETSC;
 10:   PetscFunctionReturn(PETSC_SUCCESS);
 11: }

 13: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
 14: {
 15:   PetscInt n = A->rmap->n;

 17:   PetscFunctionBegin;
 18:   if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
 19:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
 20:     *B = NULL;
 21:     PetscFunctionReturn(PETSC_SUCCESS);
 22:   }

 24:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
 25:   PetscCall(MatSetSizes(*B, n, n, n, n));
 26:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
 27:     PetscCall(MatSetType(*B, MATSEQAIJ));

 29:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
 30:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

 32:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
 33:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
 34:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
 35:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
 36:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
 37:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
 38:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

 40:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
 41:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
 42:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
 43:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
 44:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
 45:   (*B)->factortype = ftype;

 47:   PetscCall(PetscFree((*B)->solvertype));
 48:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
 49:   (*B)->canuseordering = PETSC_TRUE;
 50:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
 51:   PetscFunctionReturn(PETSC_SUCCESS);
 52: }

 54: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
 55: {
 56:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
 57:   IS                 isicol;
 58:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
 59:   PetscInt           i, n = A->rmap->n;
 60:   PetscInt          *bi, *bj;
 61:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
 62:   PetscReal          f;
 63:   PetscInt           nlnk, *lnk, k, **bi_ptr;
 64:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
 65:   PetscBT            lnkbt;
 66:   PetscBool          diagDense;

 68:   PetscFunctionBegin;
 69:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
 70:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
 71:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

 73:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
 74:   PetscCall(ISGetIndices(isrow, &r));
 75:   PetscCall(ISGetIndices(isicol, &ic));

 77:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
 78:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
 79:   PetscCall(PetscMalloc1(n + 1, &bdiag));
 80:   bi[0] = bdiag[0] = 0;

 82:   /* linked list for storing column indices of the active row */
 83:   nlnk = n + 1;
 84:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

 86:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

 88:   /* initial FreeSpace size is f*(ai[n]+1) */
 89:   f = info->fill;
 90:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
 91:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
 92:   current_space = free_space;

 94:   for (i = 0; i < n; i++) {
 95:     /* copy previous fill into linked list */
 96:     nzi   = 0;
 97:     nnz   = ai[r[i] + 1] - ai[r[i]];
 98:     ajtmp = aj + ai[r[i]];
 99:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
100:     nzi += nlnk;

102:     /* add pivot rows into linked list */
103:     row = lnk[n];
104:     while (row < i) {
105:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
106:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
107:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
108:       nzi += nlnk;
109:       row = lnk[row];
110:     }
111:     bi[i + 1] = bi[i] + nzi;
112:     im[i]     = nzi;

114:     /* mark bdiag */
115:     nzbd = 0;
116:     nnz  = nzi;
117:     k    = lnk[n];
118:     while (nnz-- && k < i) {
119:       nzbd++;
120:       k = lnk[k];
121:     }
122:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

124:     /* if free space is not available, make more free space */
125:     if (current_space->local_remaining < nzi) {
126:       /* estimated additional space needed */
127:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
128:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
129:       reallocs++;
130:     }

132:     /* copy data into free space, then initialize lnk */
133:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

135:     bi_ptr[i] = current_space->array;
136:     current_space->array += nzi;
137:     current_space->local_used += nzi;
138:     current_space->local_remaining -= nzi;
139:   }

141:   PetscCall(ISRestoreIndices(isrow, &r));
142:   PetscCall(ISRestoreIndices(isicol, &ic));

144:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
145:   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
146:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
147:   PetscCall(PetscLLDestroy(lnk, lnkbt));
148:   PetscCall(PetscFree2(bi_ptr, im));

150:   /* put together the new matrix */
151:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
152:   b          = (Mat_SeqAIJ *)B->data;
153:   b->free_ij = PETSC_TRUE;
154:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
155:   b->free_a = PETSC_TRUE;
156:   b->j      = bj;
157:   b->i      = bi;
158:   b->diag   = bdiag;
159:   b->ilen   = NULL;
160:   b->imax   = NULL;
161:   b->row    = isrow;
162:   b->col    = iscol;
163:   PetscCall(PetscObjectReference((PetscObject)isrow));
164:   PetscCall(PetscObjectReference((PetscObject)iscol));
165:   b->icol = isicol;
166:   PetscCall(PetscMalloc1(n, &b->solve_work));

168:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
169:   b->maxnz = b->nz = bdiag[0] + 1;

171:   B->factortype            = MAT_FACTOR_LU;
172:   B->info.factor_mallocs   = reallocs;
173:   B->info.fill_ratio_given = f;

175:   if (ai[n]) {
176:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
177:   } else {
178:     B->info.fill_ratio_needed = 0.0;
179:   }
180: #if defined(PETSC_USE_INFO)
181:   if (ai[n] != 0) {
182:     PetscReal af = B->info.fill_ratio_needed;
183:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
184:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
185:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
186:     PetscCall(PetscInfo(A, "for best performance.\n"));
187:   } else {
188:     PetscCall(PetscInfo(A, "Empty matrix\n"));
189:   }
190: #endif
191:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
192:   if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
193:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
194:   PetscFunctionReturn(PETSC_SUCCESS);
195: }

197: /*
198:     Trouble in factorization, should we dump the original matrix?
199: */
200: PetscErrorCode MatFactorDumpMatrix(Mat A)
201: {
202:   PetscBool flg = PETSC_FALSE;

204:   PetscFunctionBegin;
205:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
206:   if (flg) {
207:     PetscViewer viewer;
208:     char        filename[PETSC_MAX_PATH_LEN];

210:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
211:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
212:     PetscCall(MatView(A, viewer));
213:     PetscCall(PetscViewerDestroy(&viewer));
214:   }
215:   PetscFunctionReturn(PETSC_SUCCESS);
216: }

218: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
219: {
220:   Mat              C = B;
221:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
222:   IS               isrow = b->row, isicol = b->icol;
223:   const PetscInt  *r, *ic, *ics;
224:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
225:   PetscInt         i, j, k, nz, nzL, row, *pj;
226:   const PetscInt  *ajtmp, *bjtmp;
227:   MatScalar       *rtmp, *pc, multiplier, *pv;
228:   const MatScalar *aa, *v;
229:   MatScalar       *ba;
230:   PetscBool        row_identity, col_identity;
231:   FactorShiftCtx   sctx;
232:   const PetscInt  *ddiag;
233:   PetscReal        rs;
234:   MatScalar        d;

236:   PetscFunctionBegin;
237:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
238:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
239:   /* MatPivotSetUp(): initialize shift context sctx */
240:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

242:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
243:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
244:     sctx.shift_top = info->zeropivot;
245:     for (i = 0; i < n; i++) {
246:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
247:       d  = aa[ddiag[i]];
248:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
249:       v  = aa + ai[i];
250:       nz = ai[i + 1] - ai[i];
251:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
252:       if (rs > sctx.shift_top) sctx.shift_top = rs;
253:     }
254:     sctx.shift_top *= 1.1;
255:     sctx.nshift_max = 5;
256:     sctx.shift_lo   = 0.;
257:     sctx.shift_hi   = 1.;
258:   }

260:   PetscCall(ISGetIndices(isrow, &r));
261:   PetscCall(ISGetIndices(isicol, &ic));
262:   PetscCall(PetscMalloc1(n + 1, &rtmp));
263:   ics = ic;

265:   do {
266:     sctx.newshift = PETSC_FALSE;
267:     for (i = 0; i < n; i++) {
268:       /* zero rtmp */
269:       /* L part */
270:       nz    = bi[i + 1] - bi[i];
271:       bjtmp = bj + bi[i];
272:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

274:       /* U part */
275:       nz    = bdiag[i] - bdiag[i + 1];
276:       bjtmp = bj + bdiag[i + 1] + 1;
277:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

279:       /* load in initial (unfactored row) */
280:       nz    = ai[r[i] + 1] - ai[r[i]];
281:       ajtmp = aj + ai[r[i]];
282:       v     = aa + ai[r[i]];
283:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
284:       /* ZeropivotApply() */
285:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

287:       /* elimination */
288:       bjtmp = bj + bi[i];
289:       row   = *bjtmp++;
290:       nzL   = bi[i + 1] - bi[i];
291:       for (k = 0; k < nzL; k++) {
292:         pc = rtmp + row;
293:         if (*pc != 0.0) {
294:           pv         = ba + bdiag[row];
295:           multiplier = *pc * (*pv);
296:           *pc        = multiplier;

298:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
299:           pv = ba + bdiag[row + 1] + 1;
300:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

302:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
303:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
304:         }
305:         row = *bjtmp++;
306:       }

308:       /* finished row so stick it into b->a */
309:       rs = 0.0;
310:       /* L part */
311:       pv = ba + bi[i];
312:       pj = b->j + bi[i];
313:       nz = bi[i + 1] - bi[i];
314:       for (j = 0; j < nz; j++) {
315:         pv[j] = rtmp[pj[j]];
316:         rs += PetscAbsScalar(pv[j]);
317:       }

319:       /* U part */
320:       pv = ba + bdiag[i + 1] + 1;
321:       pj = b->j + bdiag[i + 1] + 1;
322:       nz = bdiag[i] - bdiag[i + 1] - 1;
323:       for (j = 0; j < nz; j++) {
324:         pv[j] = rtmp[pj[j]];
325:         rs += PetscAbsScalar(pv[j]);
326:       }

328:       sctx.rs = rs;
329:       sctx.pv = rtmp[i];
330:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
331:       if (sctx.newshift) break; /* break for-loop */
332:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

334:       /* Mark diagonal and invert diagonal for simpler triangular solves */
335:       pv  = ba + bdiag[i];
336:       *pv = 1.0 / rtmp[i];

338:     } /* endof for (i=0; i<n; i++) { */

340:     /* MatPivotRefine() */
341:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
342:       /*
343:        * if no shift in this attempt & shifting & started shifting & can refine,
344:        * then try lower shift
345:        */
346:       sctx.shift_hi       = sctx.shift_fraction;
347:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
348:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
349:       sctx.newshift       = PETSC_TRUE;
350:       sctx.nshift++;
351:     }
352:   } while (sctx.newshift);

354:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
355:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

357:   PetscCall(PetscFree(rtmp));
358:   PetscCall(ISRestoreIndices(isicol, &ic));
359:   PetscCall(ISRestoreIndices(isrow, &r));

361:   PetscCall(ISIdentity(isrow, &row_identity));
362:   PetscCall(ISIdentity(isicol, &col_identity));
363:   if (b->inode.size_csr) {
364:     C->ops->solve = MatSolve_SeqAIJ_Inode;
365:   } else if (row_identity && col_identity) {
366:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
367:   } else {
368:     C->ops->solve = MatSolve_SeqAIJ;
369:   }
370:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
371:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
372:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
373:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
374:   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
375:   C->assembled              = PETSC_TRUE;
376:   C->preallocated           = PETSC_TRUE;

378:   PetscCall(PetscLogFlops(C->cmap->n));

380:   /* MatShiftView(A,info,&sctx) */
381:   if (sctx.nshift) {
382:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
383:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
384:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
385:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
386:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
387:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
388:     }
389:   }
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
394: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
395: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

397: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
398: {
399:   Mat              C = B;
400:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
401:   IS               isrow = b->row, isicol = b->icol;
402:   const PetscInt  *r, *ic, *ics;
403:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
404:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
405:   const PetscInt  *ajtmp, *bjtmp, *ddiag, *pj;
406:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
407:   const MatScalar *v, *aa;
408:   MatScalar       *ba;
409:   PetscReal        rs = 0.0;
410:   FactorShiftCtx   sctx;
411:   PetscBool        row_identity, col_identity;

413:   PetscFunctionBegin;
414:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));

416:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
417:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
418:   /* MatPivotSetUp(): initialize shift context sctx */
419:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

421:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
422:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
423:     sctx.shift_top = info->zeropivot;
424:     for (i = 0; i < n; i++) {
425:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
426:       d  = aa[ddiag[i]];
427:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
428:       v  = aa + ai[i];
429:       nz = ai[i + 1] - ai[i];
430:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
431:       if (rs > sctx.shift_top) sctx.shift_top = rs;
432:     }
433:     sctx.shift_top *= 1.1;
434:     sctx.nshift_max = 5;
435:     sctx.shift_lo   = 0.;
436:     sctx.shift_hi   = 1.;
437:   }

439:   PetscCall(ISGetIndices(isrow, &r));
440:   PetscCall(ISGetIndices(isicol, &ic));
441:   PetscCall(PetscMalloc1(n + 1, &rtmp));
442:   ics = ic;

444:   do {
445:     sctx.newshift = PETSC_FALSE;
446:     for (i = 0; i < n; i++) {
447:       nz    = bi[i + 1] - bi[i];
448:       bjtmp = bj + bi[i];
449:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

451:       /* load in initial (unfactored row) */
452:       nz    = ai[r[i] + 1] - ai[r[i]];
453:       ajtmp = aj + ai[r[i]];
454:       v     = aa + ai[r[i]];
455:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
456:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

458:       row = *bjtmp++;
459:       while (row < i) {
460:         pc = rtmp + row;
461:         if (*pc != 0.0) {
462:           pv         = ba + ddiag[row];
463:           pj         = b->j + ddiag[row] + 1;
464:           multiplier = *pc / *pv++;
465:           *pc        = multiplier;
466:           nz         = bi[row + 1] - ddiag[row] - 1;
467:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
468:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
469:         }
470:         row = *bjtmp++;
471:       }
472:       /* finished row so stick it into b->a */
473:       pv   = ba + bi[i];
474:       pj   = b->j + bi[i];
475:       nz   = bi[i + 1] - bi[i];
476:       diag = ddiag[i] - bi[i];
477:       rs   = 0.0;
478:       for (j = 0; j < nz; j++) {
479:         pv[j] = rtmp[pj[j]];
480:         rs += PetscAbsScalar(pv[j]);
481:       }
482:       rs -= PetscAbsScalar(pv[diag]);

484:       sctx.rs = rs;
485:       sctx.pv = pv[diag];
486:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
487:       if (sctx.newshift) break;
488:       pv[diag] = sctx.pv;
489:     }

491:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
492:       /*
493:        * if no shift in this attempt & shifting & started shifting & can refine,
494:        * then try lower shift
495:        */
496:       sctx.shift_hi       = sctx.shift_fraction;
497:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
498:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
499:       sctx.newshift       = PETSC_TRUE;
500:       sctx.nshift++;
501:     }
502:   } while (sctx.newshift);

504:   /* invert diagonal entries for simpler triangular solves */
505:   for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
506:   PetscCall(PetscFree(rtmp));
507:   PetscCall(ISRestoreIndices(isicol, &ic));
508:   PetscCall(ISRestoreIndices(isrow, &r));
509:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
510:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

512:   PetscCall(ISIdentity(isrow, &row_identity));
513:   PetscCall(ISIdentity(isicol, &col_identity));
514:   if (row_identity && col_identity) {
515:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
516:   } else {
517:     C->ops->solve = MatSolve_SeqAIJ_inplace;
518:   }
519:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
520:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
521:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
522:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
523:   C->ops->matsolvetranspose = NULL;

525:   C->assembled    = PETSC_TRUE;
526:   C->preallocated = PETSC_TRUE;

528:   PetscCall(PetscLogFlops(C->cmap->n));
529:   if (sctx.nshift) {
530:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
531:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
532:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
533:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
534:     }
535:   }
536:   C->ops->solve          = MatSolve_SeqAIJ_inplace;
537:   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

539:   PetscCall(MatSeqAIJCheckInode(C));
540:   PetscFunctionReturn(PETSC_SUCCESS);
541: }

543: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

545: /*
546:    This routine implements inplace ILU(0) with row or/and column permutations.
547:    Input:
548:      A - original matrix
549:    Output;
550:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
551:          a->j (col index) is permuted by the inverse of colperm, then sorted
552:          a->a reordered accordingly with a->j
553:          a->diag (ptr to diagonal elements) is updated.
554: */
555: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
556: {
557:   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
558:   IS              isrow = a->row, isicol = a->icol;
559:   const PetscInt *r, *ic, *ics;
560:   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
561:   PetscInt       *ajtmp, nz, row;
562:   PetscInt        nbdiag, *pj;
563:   PetscScalar    *rtmp, *pc, multiplier, d;
564:   MatScalar      *pv, *v;
565:   PetscReal       rs;
566:   FactorShiftCtx  sctx;
567:   MatScalar      *aa, *vtmp;
568:   PetscInt       *diag;

570:   PetscFunctionBegin;
571:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

573:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
574:   PetscCall(MatSeqAIJGetArray(A, &aa));
575:   /* MatPivotSetUp(): initialize shift context sctx */
576:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

578:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
579:     const PetscInt *ddiag;

581:     PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
582:     sctx.shift_top = info->zeropivot;
583:     for (i = 0; i < n; i++) {
584:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
585:       d    = aa[ddiag[i]];
586:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
587:       vtmp = aa + ai[i];
588:       nz   = ai[i + 1] - ai[i];
589:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
590:       if (rs > sctx.shift_top) sctx.shift_top = rs;
591:     }
592:     sctx.shift_top *= 1.1;
593:     sctx.nshift_max = 5;
594:     sctx.shift_lo   = 0.;
595:     sctx.shift_hi   = 1.;
596:   }

598:   PetscCall(ISGetIndices(isrow, &r));
599:   PetscCall(ISGetIndices(isicol, &ic));
600:   PetscCall(PetscMalloc1(n + 1, &rtmp));
601:   PetscCall(PetscArrayzero(rtmp, n + 1));
602:   ics = ic;

604: #if defined(MV)
605:   sctx.shift_top      = 0.;
606:   sctx.nshift_max     = 0;
607:   sctx.shift_lo       = 0.;
608:   sctx.shift_hi       = 0.;
609:   sctx.shift_fraction = 0.;

611:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
612:     sctx.shift_top = 0.;
613:     for (i = 0; i < n; i++) {
614:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615:       d  = aa[diag[i]];
616:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
617:       v  = aa + ai[i];
618:       nz = ai[i + 1] - ai[i];
619:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
620:       if (rs > sctx.shift_top) sctx.shift_top = rs;
621:     }
622:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
623:     sctx.shift_top *= 1.1;
624:     sctx.nshift_max = 5;
625:     sctx.shift_lo   = 0.;
626:     sctx.shift_hi   = 1.;
627:   }

629:   sctx.shift_amount = 0.;
630:   sctx.nshift       = 0;
631: #endif

633:   do {
634:     sctx.newshift = PETSC_FALSE;
635:     for (i = 0; i < n; i++) {
636:       /* load in initial unfactored row */
637:       nz    = ai[r[i] + 1] - ai[r[i]];
638:       ajtmp = aj + ai[r[i]];
639:       v     = aa + ai[r[i]];
640:       /* sort permuted ajtmp and values v accordingly */
641:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
642:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

644:       diag[r[i]] = ai[r[i]];
645:       for (j = 0; j < nz; j++) {
646:         rtmp[ajtmp[j]] = v[j];
647:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
648:       }
649:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

651:       row = *ajtmp++;
652:       while (row < i) {
653:         pc = rtmp + row;
654:         if (*pc != 0.0) {
655:           pv = aa + diag[r[row]];
656:           pj = aj + diag[r[row]] + 1;

658:           multiplier = *pc / *pv++;
659:           *pc        = multiplier;
660:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
661:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
662:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
663:         }
664:         row = *ajtmp++;
665:       }
666:       /* finished row so overwrite it onto aa */
667:       pv     = aa + ai[r[i]];
668:       pj     = aj + ai[r[i]];
669:       nz     = ai[r[i] + 1] - ai[r[i]];
670:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

672:       rs = 0.0;
673:       for (j = 0; j < nz; j++) {
674:         pv[j] = rtmp[pj[j]];
675:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
676:       }

678:       sctx.rs = rs;
679:       sctx.pv = pv[nbdiag];
680:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
681:       if (sctx.newshift) break;
682:       pv[nbdiag] = sctx.pv;
683:     }

685:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
686:       /*
687:        * if no shift in this attempt & shifting & started shifting & can refine,
688:        * then try lower shift
689:        */
690:       sctx.shift_hi       = sctx.shift_fraction;
691:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
692:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
693:       sctx.newshift       = PETSC_TRUE;
694:       sctx.nshift++;
695:     }
696:   } while (sctx.newshift);

698:   /* invert diagonal entries for simpler triangular solves */
699:   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];

701:   PetscCall(MatSeqAIJRestoreArray(A, &aa));
702:   PetscCall(PetscFree(rtmp));
703:   PetscCall(ISRestoreIndices(isicol, &ic));
704:   PetscCall(ISRestoreIndices(isrow, &r));

706:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
707:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
708:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
709:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

711:   A->assembled    = PETSC_TRUE;
712:   A->preallocated = PETSC_TRUE;

714:   PetscCall(PetscLogFlops(A->cmap->n));
715:   if (sctx.nshift) {
716:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
717:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
718:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
719:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
720:     }
721:   }
722:   PetscFunctionReturn(PETSC_SUCCESS);
723: }

725: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
726: {
727:   Mat C;

729:   PetscFunctionBegin;
730:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
731:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
732:   PetscCall(MatLUFactorNumeric(C, A, info));

734:   A->ops->solve          = C->ops->solve;
735:   A->ops->solvetranspose = C->ops->solvetranspose;

737:   PetscCall(MatHeaderMerge(A, &C));
738:   PetscFunctionReturn(PETSC_SUCCESS);
739: }

741: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
742: {
743:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
744:   IS                 iscol = a->col, isrow = a->row;
745:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
746:   PetscInt           nz;
747:   const PetscInt    *rout, *cout, *r, *c;
748:   PetscScalar       *x, *tmp, *tmps, sum;
749:   const PetscScalar *b;
750:   const MatScalar   *aa, *v;
751:   const PetscInt    *adiag;

753:   PetscFunctionBegin;
754:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

756:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
757:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
758:   PetscCall(VecGetArrayRead(bb, &b));
759:   PetscCall(VecGetArrayWrite(xx, &x));
760:   tmp = a->solve_work;

762:   PetscCall(ISGetIndices(isrow, &rout));
763:   r = rout;
764:   PetscCall(ISGetIndices(iscol, &cout));
765:   c = cout + (n - 1);

767:   /* forward solve the lower triangular */
768:   tmp[0] = b[*r++];
769:   tmps   = tmp;
770:   for (i = 1; i < n; i++) {
771:     v   = aa + ai[i];
772:     vi  = aj + ai[i];
773:     nz  = adiag[i] - ai[i];
774:     sum = b[*r++];
775:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
776:     tmp[i] = sum;
777:   }

779:   /* backward solve the upper triangular */
780:   for (i = n - 1; i >= 0; i--) {
781:     v   = aa + adiag[i] + 1;
782:     vi  = aj + adiag[i] + 1;
783:     nz  = ai[i + 1] - adiag[i] - 1;
784:     sum = tmp[i];
785:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
786:     x[*c--] = tmp[i] = sum * aa[adiag[i]];
787:   }
788:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
789:   PetscCall(ISRestoreIndices(isrow, &rout));
790:   PetscCall(ISRestoreIndices(iscol, &cout));
791:   PetscCall(VecRestoreArrayRead(bb, &b));
792:   PetscCall(VecRestoreArrayWrite(xx, &x));
793:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
794:   PetscFunctionReturn(PETSC_SUCCESS);
795: }

797: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
798: {
799:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
800:   IS                 iscol = a->col, isrow = a->row;
801:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
802:   PetscInt           nz, neq, ldb, ldx;
803:   const PetscInt    *rout, *cout, *r, *c;
804:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
805:   const PetscScalar *b, *aa, *v;
806:   PetscBool          isdense;
807:   const PetscInt    *adiag;

809:   PetscFunctionBegin;
810:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
811:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
812:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
813:   if (X != B) {
814:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
815:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
816:   }
817:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
818:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
819:   PetscCall(MatDenseGetArrayRead(B, &b));
820:   PetscCall(MatDenseGetLDA(B, &ldb));
821:   PetscCall(MatDenseGetArray(X, &x));
822:   PetscCall(MatDenseGetLDA(X, &ldx));
823:   PetscCall(ISGetIndices(isrow, &rout));
824:   r = rout;
825:   PetscCall(ISGetIndices(iscol, &cout));
826:   c = cout;
827:   for (neq = 0; neq < B->cmap->n; neq++) {
828:     /* forward solve the lower triangular */
829:     tmp[0] = b[r[0]];
830:     tmps   = tmp;
831:     for (i = 1; i < n; i++) {
832:       v   = aa + ai[i];
833:       vi  = aj + ai[i];
834:       nz  = adiag[i] - ai[i];
835:       sum = b[r[i]];
836:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
837:       tmp[i] = sum;
838:     }
839:     /* backward solve the upper triangular */
840:     for (i = n - 1; i >= 0; i--) {
841:       v   = aa + adiag[i] + 1;
842:       vi  = aj + adiag[i] + 1;
843:       nz  = ai[i + 1] - adiag[i] - 1;
844:       sum = tmp[i];
845:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
846:       x[c[i]] = tmp[i] = sum * aa[adiag[i]];
847:     }
848:     b += ldb;
849:     x += ldx;
850:   }
851:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
852:   PetscCall(ISRestoreIndices(isrow, &rout));
853:   PetscCall(ISRestoreIndices(iscol, &cout));
854:   PetscCall(MatDenseRestoreArrayRead(B, &b));
855:   PetscCall(MatDenseRestoreArray(X, &x));
856:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
857:   PetscFunctionReturn(PETSC_SUCCESS);
858: }

860: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
861: {
862:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
863:   IS                 iscol = a->col, isrow = a->row;
864:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
865:   const PetscInt    *adiag;
866:   PetscInt           nz, neq, ldb, ldx;
867:   const PetscInt    *rout, *cout, *r, *c;
868:   PetscScalar       *x, *tmp = a->solve_work, sum;
869:   const PetscScalar *b, *aa, *v;
870:   PetscBool          isdense;

872:   PetscFunctionBegin;
873:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
874:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
875:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
876:   if (X != B) {
877:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
878:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
879:   }
880:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
881:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
882:   PetscCall(MatDenseGetArrayRead(B, &b));
883:   PetscCall(MatDenseGetLDA(B, &ldb));
884:   PetscCall(MatDenseGetArray(X, &x));
885:   PetscCall(MatDenseGetLDA(X, &ldx));
886:   PetscCall(ISGetIndices(isrow, &rout));
887:   r = rout;
888:   PetscCall(ISGetIndices(iscol, &cout));
889:   c = cout;
890:   for (neq = 0; neq < B->cmap->n; neq++) {
891:     /* forward solve the lower triangular */
892:     tmp[0] = b[r[0]];
893:     v      = aa;
894:     vi     = aj;
895:     for (i = 1; i < n; i++) {
896:       nz  = ai[i + 1] - ai[i];
897:       sum = b[r[i]];
898:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
899:       tmp[i] = sum;
900:       v += nz;
901:       vi += nz;
902:     }
903:     /* backward solve the upper triangular */
904:     for (i = n - 1; i >= 0; i--) {
905:       v   = aa + adiag[i + 1] + 1;
906:       vi  = aj + adiag[i + 1] + 1;
907:       nz  = adiag[i] - adiag[i + 1] - 1;
908:       sum = tmp[i];
909:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
910:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
911:     }
912:     b += ldb;
913:     x += ldx;
914:   }
915:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
916:   PetscCall(ISRestoreIndices(isrow, &rout));
917:   PetscCall(ISRestoreIndices(iscol, &cout));
918:   PetscCall(MatDenseRestoreArrayRead(B, &b));
919:   PetscCall(MatDenseRestoreArray(X, &x));
920:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
921:   PetscFunctionReturn(PETSC_SUCCESS);
922: }

924: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
925: {
926:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
927:   IS                 iscol = a->col, isrow = a->row;
928:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
929:   const PetscInt    *adiag = a->diag;
930:   PetscInt           nz, neq, ldb, ldx;
931:   const PetscInt    *rout, *cout, *r, *c;
932:   PetscScalar       *x, *tmp = a->solve_work, s1;
933:   const PetscScalar *b, *aa, *v;
934:   PetscBool          isdense;

936:   PetscFunctionBegin;
937:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
938:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
939:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
940:   if (X != B) {
941:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
942:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
943:   }
944:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
945:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
946:   PetscCall(MatDenseGetArrayRead(B, &b));
947:   PetscCall(MatDenseGetLDA(B, &ldb));
948:   PetscCall(MatDenseGetArray(X, &x));
949:   PetscCall(MatDenseGetLDA(X, &ldx));
950:   PetscCall(ISGetIndices(isrow, &rout));
951:   r = rout;
952:   PetscCall(ISGetIndices(iscol, &cout));
953:   c = cout;
954:   for (neq = 0; neq < B->cmap->n; neq++) {
955:     /* copy the b into temp work space according to permutation */
956:     for (i = 0; i < n; i++) tmp[i] = b[c[i]];

958:     /* forward solve the U^T */
959:     for (i = 0; i < n; i++) {
960:       v  = aa + adiag[i + 1] + 1;
961:       vi = aj + adiag[i + 1] + 1;
962:       nz = adiag[i] - adiag[i + 1] - 1;
963:       s1 = tmp[i];
964:       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
965:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
966:       tmp[i] = s1;
967:     }

969:     /* backward solve the L^T */
970:     for (i = n - 1; i >= 0; i--) {
971:       v  = aa + ai[i];
972:       vi = aj + ai[i];
973:       nz = ai[i + 1] - ai[i];
974:       s1 = tmp[i];
975:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
976:     }

978:     /* copy tmp into x according to permutation */
979:     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
980:     b += ldb;
981:     x += ldx;
982:   }
983:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
984:   PetscCall(ISRestoreIndices(isrow, &rout));
985:   PetscCall(ISRestoreIndices(iscol, &cout));
986:   PetscCall(MatDenseRestoreArrayRead(B, &b));
987:   PetscCall(MatDenseRestoreArray(X, &x));
988:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
989:   PetscFunctionReturn(PETSC_SUCCESS);
990: }

992: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
993: {
994:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
995:   IS                 iscol = a->col, isrow = a->row;
996:   const PetscInt    *r, *c, *rout, *cout, *adiag;
997:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
998:   PetscInt           nz, row;
999:   PetscScalar       *x, *tmp, *tmps, sum;
1000:   const PetscScalar *b;
1001:   const MatScalar   *aa, *v;

1003:   PetscFunctionBegin;
1004:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1006:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1007:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1008:   PetscCall(VecGetArrayRead(bb, &b));
1009:   PetscCall(VecGetArrayWrite(xx, &x));
1010:   tmp = a->solve_work;

1012:   PetscCall(ISGetIndices(isrow, &rout));
1013:   r = rout;
1014:   PetscCall(ISGetIndices(iscol, &cout));
1015:   c = cout + (n - 1);

1017:   /* forward solve the lower triangular */
1018:   tmp[0] = b[*r++];
1019:   tmps   = tmp;
1020:   for (row = 1; row < n; row++) {
1021:     i   = rout[row]; /* permuted row */
1022:     v   = aa + ai[i];
1023:     vi  = aj + ai[i];
1024:     nz  = adiag[i] - ai[i];
1025:     sum = b[*r++];
1026:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1027:     tmp[row] = sum;
1028:   }

1030:   /* backward solve the upper triangular */
1031:   for (row = n - 1; row >= 0; row--) {
1032:     i   = rout[row]; /* permuted row */
1033:     v   = aa + adiag[i] + 1;
1034:     vi  = aj + adiag[i] + 1;
1035:     nz  = ai[i + 1] - adiag[i] - 1;
1036:     sum = tmp[row];
1037:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1038:     x[*c--] = tmp[row] = sum * aa[adiag[i]];
1039:   }
1040:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1041:   PetscCall(ISRestoreIndices(isrow, &rout));
1042:   PetscCall(ISRestoreIndices(iscol, &cout));
1043:   PetscCall(VecRestoreArrayRead(bb, &b));
1044:   PetscCall(VecRestoreArrayWrite(xx, &x));
1045:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1046:   PetscFunctionReturn(PETSC_SUCCESS);
1047: }

1049: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1050: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1051: {
1052:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1053:   PetscInt           n  = A->rmap->n;
1054:   const PetscInt    *ai = a->i, *aj = a->j, *adiag;
1055:   PetscScalar       *x;
1056:   const PetscScalar *b;
1057:   const MatScalar   *aa;
1058: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1059:   PetscInt         adiag_i, i, nz, ai_i;
1060:   const PetscInt  *vi;
1061:   const MatScalar *v;
1062:   PetscScalar      sum;
1063: #endif

1065:   PetscFunctionBegin;
1066:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1067:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1068:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1069:   PetscCall(VecGetArrayRead(bb, &b));
1070:   PetscCall(VecGetArrayWrite(xx, &x));

1072: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1073:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1074: #else
1075:   /* forward solve the lower triangular */
1076:   x[0] = b[0];
1077:   for (i = 1; i < n; i++) {
1078:     ai_i = ai[i];
1079:     v    = aa + ai_i;
1080:     vi   = aj + ai_i;
1081:     nz   = adiag[i] - ai_i;
1082:     sum  = b[i];
1083:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1084:     x[i] = sum;
1085:   }

1087:   /* backward solve the upper triangular */
1088:   for (i = n - 1; i >= 0; i--) {
1089:     adiag_i = adiag[i];
1090:     v       = aa + adiag_i + 1;
1091:     vi      = aj + adiag_i + 1;
1092:     nz      = ai[i + 1] - adiag_i - 1;
1093:     sum     = x[i];
1094:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1095:     x[i] = sum * aa[adiag_i];
1096:   }
1097: #endif
1098:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1099:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1100:   PetscCall(VecRestoreArrayRead(bb, &b));
1101:   PetscCall(VecRestoreArrayWrite(xx, &x));
1102:   PetscFunctionReturn(PETSC_SUCCESS);
1103: }

1105: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1106: {
1107:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1108:   IS                 iscol = a->col, isrow = a->row;
1109:   PetscInt           i, n                  = A->rmap->n, j;
1110:   PetscInt           nz;
1111:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1112:   PetscScalar       *x, *tmp, sum;
1113:   const PetscScalar *b;
1114:   const MatScalar   *aa, *v;

1116:   PetscFunctionBegin;
1117:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1119:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1120:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1121:   PetscCall(VecGetArrayRead(bb, &b));
1122:   PetscCall(VecGetArray(xx, &x));
1123:   tmp = a->solve_work;

1125:   PetscCall(ISGetIndices(isrow, &rout));
1126:   r = rout;
1127:   PetscCall(ISGetIndices(iscol, &cout));
1128:   c = cout + (n - 1);

1130:   /* forward solve the lower triangular */
1131:   tmp[0] = b[*r++];
1132:   for (i = 1; i < n; i++) {
1133:     v   = aa + ai[i];
1134:     vi  = aj + ai[i];
1135:     nz  = adiag[i] - ai[i];
1136:     sum = b[*r++];
1137:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1138:     tmp[i] = sum;
1139:   }

1141:   /* backward solve the upper triangular */
1142:   for (i = n - 1; i >= 0; i--) {
1143:     v   = aa + adiag[i] + 1;
1144:     vi  = aj + adiag[i] + 1;
1145:     nz  = ai[i + 1] - adiag[i] - 1;
1146:     sum = tmp[i];
1147:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1148:     tmp[i] = sum * aa[adiag[i]];
1149:     x[*c--] += tmp[i];
1150:   }

1152:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1153:   PetscCall(ISRestoreIndices(isrow, &rout));
1154:   PetscCall(ISRestoreIndices(iscol, &cout));
1155:   PetscCall(VecRestoreArrayRead(bb, &b));
1156:   PetscCall(VecRestoreArray(xx, &x));
1157:   PetscCall(PetscLogFlops(2.0 * a->nz));
1158:   PetscFunctionReturn(PETSC_SUCCESS);
1159: }

1161: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1162: {
1163:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1164:   IS                 iscol = a->col, isrow = a->row;
1165:   PetscInt           i, n                  = A->rmap->n, j;
1166:   PetscInt           nz;
1167:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1168:   PetscScalar       *x, *tmp, sum;
1169:   const PetscScalar *b;
1170:   const MatScalar   *aa, *v;

1172:   PetscFunctionBegin;
1173:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1175:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1176:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1177:   PetscCall(VecGetArrayRead(bb, &b));
1178:   PetscCall(VecGetArray(xx, &x));
1179:   tmp = a->solve_work;

1181:   PetscCall(ISGetIndices(isrow, &rout));
1182:   r = rout;
1183:   PetscCall(ISGetIndices(iscol, &cout));
1184:   c = cout;

1186:   /* forward solve the lower triangular */
1187:   tmp[0] = b[r[0]];
1188:   v      = aa;
1189:   vi     = aj;
1190:   for (i = 1; i < n; i++) {
1191:     nz  = ai[i + 1] - ai[i];
1192:     sum = b[r[i]];
1193:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1194:     tmp[i] = sum;
1195:     v += nz;
1196:     vi += nz;
1197:   }

1199:   /* backward solve the upper triangular */
1200:   v  = aa + adiag[n - 1];
1201:   vi = aj + adiag[n - 1];
1202:   for (i = n - 1; i >= 0; i--) {
1203:     nz  = adiag[i] - adiag[i + 1] - 1;
1204:     sum = tmp[i];
1205:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1206:     tmp[i] = sum * v[nz];
1207:     x[c[i]] += tmp[i];
1208:     v += nz + 1;
1209:     vi += nz + 1;
1210:   }

1212:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1213:   PetscCall(ISRestoreIndices(isrow, &rout));
1214:   PetscCall(ISRestoreIndices(iscol, &cout));
1215:   PetscCall(VecRestoreArrayRead(bb, &b));
1216:   PetscCall(VecRestoreArray(xx, &x));
1217:   PetscCall(PetscLogFlops(2.0 * a->nz));
1218:   PetscFunctionReturn(PETSC_SUCCESS);
1219: }

1221: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1222: {
1223:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1224:   IS                 iscol = a->col, isrow = a->row;
1225:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1226:   PetscInt           i, n = A->rmap->n, j;
1227:   PetscInt           nz;
1228:   PetscScalar       *x, *tmp, s1;
1229:   const MatScalar   *aa, *v;
1230:   const PetscScalar *b;

1232:   PetscFunctionBegin;
1233:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1234:   PetscCall(VecGetArrayRead(bb, &b));
1235:   PetscCall(VecGetArrayWrite(xx, &x));
1236:   tmp = a->solve_work;

1238:   PetscCall(ISGetIndices(isrow, &rout));
1239:   r = rout;
1240:   PetscCall(ISGetIndices(iscol, &cout));
1241:   c = cout;

1243:   /* copy the b into temp work space according to permutation */
1244:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1246:   /* forward solve the U^T */
1247:   for (i = 0; i < n; i++) {
1248:     v  = aa + diag[i];
1249:     vi = aj + diag[i] + 1;
1250:     nz = ai[i + 1] - diag[i] - 1;
1251:     s1 = tmp[i];
1252:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1253:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1254:     tmp[i] = s1;
1255:   }

1257:   /* backward solve the L^T */
1258:   for (i = n - 1; i >= 0; i--) {
1259:     v  = aa + diag[i] - 1;
1260:     vi = aj + diag[i] - 1;
1261:     nz = diag[i] - ai[i];
1262:     s1 = tmp[i];
1263:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1264:   }

1266:   /* copy tmp into x according to permutation */
1267:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1269:   PetscCall(ISRestoreIndices(isrow, &rout));
1270:   PetscCall(ISRestoreIndices(iscol, &cout));
1271:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1272:   PetscCall(VecRestoreArrayRead(bb, &b));
1273:   PetscCall(VecRestoreArrayWrite(xx, &x));

1275:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1276:   PetscFunctionReturn(PETSC_SUCCESS);
1277: }

1279: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1280: {
1281:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1282:   IS                 iscol = a->col, isrow = a->row;
1283:   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1284:   PetscInt           i, n = A->rmap->n, j;
1285:   PetscInt           nz;
1286:   PetscScalar       *x, *tmp, s1;
1287:   const MatScalar   *aa, *v;
1288:   const PetscScalar *b;

1290:   PetscFunctionBegin;
1291:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1292:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1293:   PetscCall(VecGetArrayRead(bb, &b));
1294:   PetscCall(VecGetArrayWrite(xx, &x));
1295:   tmp = a->solve_work;

1297:   PetscCall(ISGetIndices(isrow, &rout));
1298:   r = rout;
1299:   PetscCall(ISGetIndices(iscol, &cout));
1300:   c = cout;

1302:   /* copy the b into temp work space according to permutation */
1303:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1305:   /* forward solve the U^T */
1306:   for (i = 0; i < n; i++) {
1307:     v  = aa + adiag[i + 1] + 1;
1308:     vi = aj + adiag[i + 1] + 1;
1309:     nz = adiag[i] - adiag[i + 1] - 1;
1310:     s1 = tmp[i];
1311:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1312:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1313:     tmp[i] = s1;
1314:   }

1316:   /* backward solve the L^T */
1317:   for (i = n - 1; i >= 0; i--) {
1318:     v  = aa + ai[i];
1319:     vi = aj + ai[i];
1320:     nz = ai[i + 1] - ai[i];
1321:     s1 = tmp[i];
1322:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1323:   }

1325:   /* copy tmp into x according to permutation */
1326:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1328:   PetscCall(ISRestoreIndices(isrow, &rout));
1329:   PetscCall(ISRestoreIndices(iscol, &cout));
1330:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1331:   PetscCall(VecRestoreArrayRead(bb, &b));
1332:   PetscCall(VecRestoreArrayWrite(xx, &x));

1334:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1335:   PetscFunctionReturn(PETSC_SUCCESS);
1336: }

1338: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1339: {
1340:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1341:   IS                 iscol = a->col, isrow = a->row;
1342:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1343:   PetscInt           i, n = A->rmap->n, j;
1344:   PetscInt           nz;
1345:   PetscScalar       *x, *tmp, s1;
1346:   const MatScalar   *aa, *v;
1347:   const PetscScalar *b;

1349:   PetscFunctionBegin;
1350:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1351:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1352:   PetscCall(VecGetArrayRead(bb, &b));
1353:   PetscCall(VecGetArray(xx, &x));
1354:   tmp = a->solve_work;

1356:   PetscCall(ISGetIndices(isrow, &rout));
1357:   r = rout;
1358:   PetscCall(ISGetIndices(iscol, &cout));
1359:   c = cout;

1361:   /* copy the b into temp work space according to permutation */
1362:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1364:   /* forward solve the U^T */
1365:   for (i = 0; i < n; i++) {
1366:     v  = aa + diag[i];
1367:     vi = aj + diag[i] + 1;
1368:     nz = ai[i + 1] - diag[i] - 1;
1369:     s1 = tmp[i];
1370:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1371:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1372:     tmp[i] = s1;
1373:   }

1375:   /* backward solve the L^T */
1376:   for (i = n - 1; i >= 0; i--) {
1377:     v  = aa + diag[i] - 1;
1378:     vi = aj + diag[i] - 1;
1379:     nz = diag[i] - ai[i];
1380:     s1 = tmp[i];
1381:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1382:   }

1384:   /* copy tmp into x according to permutation */
1385:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1387:   PetscCall(ISRestoreIndices(isrow, &rout));
1388:   PetscCall(ISRestoreIndices(iscol, &cout));
1389:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1390:   PetscCall(VecRestoreArrayRead(bb, &b));
1391:   PetscCall(VecRestoreArray(xx, &x));

1393:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1394:   PetscFunctionReturn(PETSC_SUCCESS);
1395: }

1397: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1398: {
1399:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1400:   IS                 iscol = a->col, isrow = a->row;
1401:   const PetscInt    *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1402:   PetscInt           i, n = A->rmap->n, j;
1403:   PetscInt           nz;
1404:   PetscScalar       *x, *tmp, s1;
1405:   const MatScalar   *aa, *v;
1406:   const PetscScalar *b;

1408:   PetscFunctionBegin;
1409:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1410:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1411:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1412:   PetscCall(VecGetArrayRead(bb, &b));
1413:   PetscCall(VecGetArray(xx, &x));
1414:   tmp = a->solve_work;

1416:   PetscCall(ISGetIndices(isrow, &rout));
1417:   r = rout;
1418:   PetscCall(ISGetIndices(iscol, &cout));
1419:   c = cout;

1421:   /* copy the b into temp work space according to permutation */
1422:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1424:   /* forward solve the U^T */
1425:   for (i = 0; i < n; i++) {
1426:     v  = aa + adiag[i + 1] + 1;
1427:     vi = aj + adiag[i + 1] + 1;
1428:     nz = adiag[i] - adiag[i + 1] - 1;
1429:     s1 = tmp[i];
1430:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1432:     tmp[i] = s1;
1433:   }

1435:   /* backward solve the L^T */
1436:   for (i = n - 1; i >= 0; i--) {
1437:     v  = aa + ai[i];
1438:     vi = aj + ai[i];
1439:     nz = ai[i + 1] - ai[i];
1440:     s1 = tmp[i];
1441:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442:   }

1444:   /* copy tmp into x according to permutation */
1445:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1447:   PetscCall(ISRestoreIndices(isrow, &rout));
1448:   PetscCall(ISRestoreIndices(iscol, &cout));
1449:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1450:   PetscCall(VecRestoreArrayRead(bb, &b));
1451:   PetscCall(VecRestoreArray(xx, &x));

1453:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1454:   PetscFunctionReturn(PETSC_SUCCESS);
1455: }

1457: /*
1458:    ilu() under revised new data structure.
1459:    Factored arrays bj and ba are stored as
1460:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1462:    bi=fact->i is an array of size n+1, in which
1463:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1464:      bi[n]:  points to L(n-1,n-1)+1

1466:   bdiag=fact->diag is an array of size n+1,in which
1467:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1468:      bdiag[n]: points to entry of U(n-1,0)-1

1470:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1471:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1472: */
1473: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1474: {
1475:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1476:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1477:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1478:   IS             isicol;

1480:   PetscFunctionBegin;
1481:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1482:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1483:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1484:   b = (Mat_SeqAIJ *)fact->data;

1486:   /* allocate matrix arrays for new data structure */
1487:   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1488:   PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1489:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1490:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1491:   b->free_a  = PETSC_TRUE;
1492:   b->free_ij = PETSC_TRUE;

1494:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1495:   bdiag = b->diag;

1497:   /* set bi and bj with new data structure */
1498:   bi = b->i;
1499:   bj = b->j;

1501:   /* L part */
1502:   bi[0] = 0;
1503:   for (i = 0; i < n; i++) {
1504:     nz        = adiag[i] - ai[i];
1505:     bi[i + 1] = bi[i] + nz;
1506:     aj        = a->j + ai[i];
1507:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1508:   }

1510:   /* U part */
1511:   bdiag[n] = bi[n] - 1;
1512:   for (i = n - 1; i >= 0; i--) {
1513:     nz = ai[i + 1] - adiag[i] - 1;
1514:     aj = a->j + adiag[i] + 1;
1515:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1516:     /* diag[i] */
1517:     bj[k++]  = i;
1518:     bdiag[i] = bdiag[i + 1] + nz + 1;
1519:   }

1521:   fact->factortype             = MAT_FACTOR_ILU;
1522:   fact->info.factor_mallocs    = 0;
1523:   fact->info.fill_ratio_given  = info->fill;
1524:   fact->info.fill_ratio_needed = 1.0;
1525:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1526:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1528:   b       = (Mat_SeqAIJ *)fact->data;
1529:   b->row  = isrow;
1530:   b->col  = iscol;
1531:   b->icol = isicol;
1532:   PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1533:   PetscCall(PetscObjectReference((PetscObject)isrow));
1534:   PetscCall(PetscObjectReference((PetscObject)iscol));
1535:   PetscFunctionReturn(PETSC_SUCCESS);
1536: }

1538: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1539: {
1540:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1541:   IS                 isicol;
1542:   const PetscInt    *r, *ic;
1543:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1544:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1545:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1546:   PetscInt           i, levels, diagonal_fill;
1547:   PetscBool          col_identity, row_identity;
1548:   PetscReal          f;
1549:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1550:   PetscBT            lnkbt;
1551:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1552:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1553:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1554:   PetscBool          diagDense;

1556:   PetscFunctionBegin;
1557:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1558:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1559:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

1561:   levels = (PetscInt)info->levels;
1562:   PetscCall(ISIdentity(isrow, &row_identity));
1563:   PetscCall(ISIdentity(iscol, &col_identity));
1564:   if (!levels && row_identity && col_identity) {
1565:     /* special case: ilu(0) with natural ordering */
1566:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1567:     if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1568:     PetscFunctionReturn(PETSC_SUCCESS);
1569:   }

1571:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1572:   PetscCall(ISGetIndices(isrow, &r));
1573:   PetscCall(ISGetIndices(isicol, &ic));

1575:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1576:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1577:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1578:   bi[0] = bdiag[0] = 0;
1579:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1581:   /* create a linked list for storing column indices of the active row */
1582:   nlnk = n + 1;
1583:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1585:   /* initial FreeSpace size is f*(ai[n]+1) */
1586:   f             = info->fill;
1587:   diagonal_fill = (PetscInt)info->diagonal_fill;
1588:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1589:   current_space = free_space;
1590:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1591:   current_space_lvl = free_space_lvl;
1592:   for (i = 0; i < n; i++) {
1593:     nzi = 0;
1594:     /* copy current row into linked list */
1595:     nnz = ai[r[i] + 1] - ai[r[i]];
1596:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1597:     cols   = aj + ai[r[i]];
1598:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1599:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1600:     nzi += nlnk;

1602:     /* make sure diagonal entry is included */
1603:     if (diagonal_fill && lnk[i] == -1) {
1604:       fm = n;
1605:       while (lnk[fm] < i) fm = lnk[fm];
1606:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1607:       lnk[fm]    = i;
1608:       lnk_lvl[i] = 0;
1609:       nzi++;
1610:       dcount++;
1611:     }

1613:     /* add pivot rows into the active row */
1614:     nzbd = 0;
1615:     prow = lnk[n];
1616:     while (prow < i) {
1617:       nnz      = bdiag[prow];
1618:       cols     = bj_ptr[prow] + nnz + 1;
1619:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1620:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1621:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1622:       nzi += nlnk;
1623:       prow = lnk[prow];
1624:       nzbd++;
1625:     }
1626:     bdiag[i]  = nzbd;
1627:     bi[i + 1] = bi[i] + nzi;
1628:     /* if free space is not available, make more free space */
1629:     if (current_space->local_remaining < nzi) {
1630:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1631:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1632:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1633:       reallocs++;
1634:     }

1636:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1637:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1638:     bj_ptr[i]    = current_space->array;
1639:     bjlvl_ptr[i] = current_space_lvl->array;

1641:     /* make sure the active row i has diagonal entry */
1642:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1644:     current_space->array += nzi;
1645:     current_space->local_used += nzi;
1646:     current_space->local_remaining -= nzi;
1647:     current_space_lvl->array += nzi;
1648:     current_space_lvl->local_used += nzi;
1649:     current_space_lvl->local_remaining -= nzi;
1650:   }

1652:   PetscCall(ISRestoreIndices(isrow, &r));
1653:   PetscCall(ISRestoreIndices(isicol, &ic));
1654:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1655:   PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1656:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1658:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1659:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1660:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1662: #if defined(PETSC_USE_INFO)
1663:   {
1664:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1665:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1666:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1667:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1668:     PetscCall(PetscInfo(A, "for best performance.\n"));
1669:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1670:   }
1671: #endif
1672:   /* put together the new matrix */
1673:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1674:   b          = (Mat_SeqAIJ *)fact->data;
1675:   b->free_ij = PETSC_TRUE;
1676:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1677:   b->free_a = PETSC_TRUE;
1678:   b->j      = bj;
1679:   b->i      = bi;
1680:   b->diag   = bdiag;
1681:   b->ilen   = NULL;
1682:   b->imax   = NULL;
1683:   b->row    = isrow;
1684:   b->col    = iscol;
1685:   PetscCall(PetscObjectReference((PetscObject)isrow));
1686:   PetscCall(PetscObjectReference((PetscObject)iscol));
1687:   b->icol = isicol;

1689:   PetscCall(PetscMalloc1(n, &b->solve_work));
1690:   /* In b structure:  Free imax, ilen, old a, old j.
1691:      Allocate bdiag, solve_work, new a, new j */
1692:   b->maxnz = b->nz = bdiag[0] + 1;

1694:   fact->info.factor_mallocs    = reallocs;
1695:   fact->info.fill_ratio_given  = f;
1696:   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1697:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1698:   if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1699:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1700:   PetscFunctionReturn(PETSC_SUCCESS);
1701: }

1703: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1704: {
1705:   Mat              C  = B;
1706:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1707:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1708:   IS               ip = b->row, iip = b->icol;
1709:   const PetscInt  *rip, *riip;
1710:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1711:   PetscInt        *ai = a->i, *aj = a->j;
1712:   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1713:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1714:   PetscBool        perm_identity;
1715:   FactorShiftCtx   sctx;
1716:   PetscReal        rs;
1717:   const MatScalar *aa, *v;
1718:   MatScalar        d;
1719:   const PetscInt  *adiag;

1721:   PetscFunctionBegin;
1722:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1723:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1724:   /* MatPivotSetUp(): initialize shift context sctx */
1725:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1727:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1728:     sctx.shift_top = info->zeropivot;
1729:     for (i = 0; i < mbs; i++) {
1730:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1731:       d  = aa[adiag[i]];
1732:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1733:       v  = aa + ai[i];
1734:       nz = ai[i + 1] - ai[i];
1735:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1736:       if (rs > sctx.shift_top) sctx.shift_top = rs;
1737:     }
1738:     sctx.shift_top *= 1.1;
1739:     sctx.nshift_max = 5;
1740:     sctx.shift_lo   = 0.;
1741:     sctx.shift_hi   = 1.;
1742:   }

1744:   PetscCall(ISGetIndices(ip, &rip));
1745:   PetscCall(ISGetIndices(iip, &riip));

1747:   /* allocate working arrays
1748:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1749:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1750:   */
1751:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

1753:   do {
1754:     sctx.newshift = PETSC_FALSE;

1756:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1757:     if (mbs) il[0] = 0;

1759:     for (k = 0; k < mbs; k++) {
1760:       /* zero rtmp */
1761:       nz    = bi[k + 1] - bi[k];
1762:       bjtmp = bj + bi[k];
1763:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1765:       /* load in initial unfactored row */
1766:       bval = ba + bi[k];
1767:       jmin = ai[rip[k]];
1768:       jmax = ai[rip[k] + 1];
1769:       for (j = jmin; j < jmax; j++) {
1770:         col = riip[aj[j]];
1771:         if (col >= k) { /* only take upper triangular entry */
1772:           rtmp[col] = aa[j];
1773:           *bval++   = 0.0; /* for in-place factorization */
1774:         }
1775:       }
1776:       /* shift the diagonal of the matrix: ZeropivotApply() */
1777:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1779:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1780:       dk = rtmp[k];
1781:       i  = c2r[k]; /* first row to be added to k_th row  */

1783:       while (i < k) {
1784:         nexti = c2r[i]; /* next row to be added to k_th row */

1786:         /* compute multiplier, update diag(k) and U(i,k) */
1787:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1788:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1789:         dk += uikdi * ba[ili];           /* update diag[k] */
1790:         ba[ili] = uikdi;                 /* -U(i,k) */

1792:         /* add multiple of row i to k-th row */
1793:         jmin = ili + 1;
1794:         jmax = bi[i + 1];
1795:         if (jmin < jmax) {
1796:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1797:           /* update il and c2r for row i */
1798:           il[i]  = jmin;
1799:           j      = bj[jmin];
1800:           c2r[i] = c2r[j];
1801:           c2r[j] = i;
1802:         }
1803:         i = nexti;
1804:       }

1806:       /* copy data into U(k,:) */
1807:       rs   = 0.0;
1808:       jmin = bi[k];
1809:       jmax = bi[k + 1] - 1;
1810:       if (jmin < jmax) {
1811:         for (j = jmin; j < jmax; j++) {
1812:           col   = bj[j];
1813:           ba[j] = rtmp[col];
1814:           rs += PetscAbsScalar(ba[j]);
1815:         }
1816:         /* add the k-th row into il and c2r */
1817:         il[k]  = jmin;
1818:         i      = bj[jmin];
1819:         c2r[k] = c2r[i];
1820:         c2r[i] = k;
1821:       }

1823:       /* MatPivotCheck() */
1824:       sctx.rs = rs;
1825:       sctx.pv = dk;
1826:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1827:       if (sctx.newshift) break;
1828:       dk = sctx.pv;

1830:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1831:     }
1832:   } while (sctx.newshift);

1834:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1835:   PetscCall(PetscFree3(rtmp, il, c2r));
1836:   PetscCall(ISRestoreIndices(ip, &rip));
1837:   PetscCall(ISRestoreIndices(iip, &riip));

1839:   PetscCall(ISIdentity(ip, &perm_identity));
1840:   if (perm_identity) {
1841:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1842:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1843:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1844:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1845:   } else {
1846:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
1847:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1848:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
1849:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
1850:   }

1852:   C->assembled    = PETSC_TRUE;
1853:   C->preallocated = PETSC_TRUE;

1855:   PetscCall(PetscLogFlops(C->rmap->n));

1857:   /* MatPivotView() */
1858:   if (sctx.nshift) {
1859:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1860:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1861:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1862:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1863:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1864:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1865:     }
1866:   }
1867:   PetscFunctionReturn(PETSC_SUCCESS);
1868: }

1870: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1871: {
1872:   Mat              C  = B;
1873:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
1874:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
1875:   IS               ip = b->row, iip = b->icol;
1876:   const PetscInt  *rip, *riip;
1877:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1878:   PetscInt        *ai = a->i, *aj = a->j;
1879:   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1880:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
1881:   const MatScalar *aa, *v;
1882:   PetscBool        perm_identity;
1883:   FactorShiftCtx   sctx;
1884:   PetscReal        rs;
1885:   MatScalar        d;
1886:   const PetscInt  *adiag;

1888:   PetscFunctionBegin;
1889:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1890:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1891:   /* MatPivotSetUp(): initialize shift context sctx */
1892:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1894:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1895:     sctx.shift_top = info->zeropivot;
1896:     for (i = 0; i < mbs; i++) {
1897:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1898:       d  = aa[adiag[i]];
1899:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
1900:       v  = aa + ai[i];
1901:       nz = ai[i + 1] - ai[i];
1902:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1903:       if (rs > sctx.shift_top) sctx.shift_top = rs;
1904:     }
1905:     sctx.shift_top *= 1.1;
1906:     sctx.nshift_max = 5;
1907:     sctx.shift_lo   = 0.;
1908:     sctx.shift_hi   = 1.;
1909:   }

1911:   PetscCall(ISGetIndices(ip, &rip));
1912:   PetscCall(ISGetIndices(iip, &riip));

1914:   /* initialization */
1915:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

1917:   do {
1918:     sctx.newshift = PETSC_FALSE;

1920:     for (i = 0; i < mbs; i++) jl[i] = mbs;
1921:     il[0] = 0;

1923:     for (k = 0; k < mbs; k++) {
1924:       /* zero rtmp */
1925:       nz    = bi[k + 1] - bi[k];
1926:       bjtmp = bj + bi[k];
1927:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1929:       bval = ba + bi[k];
1930:       /* initialize k-th row by the perm[k]-th row of A */
1931:       jmin = ai[rip[k]];
1932:       jmax = ai[rip[k] + 1];
1933:       for (j = jmin; j < jmax; j++) {
1934:         col = riip[aj[j]];
1935:         if (col >= k) { /* only take upper triangular entry */
1936:           rtmp[col] = aa[j];
1937:           *bval++   = 0.0; /* for in-place factorization */
1938:         }
1939:       }
1940:       /* shift the diagonal of the matrix */
1941:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1943:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1944:       dk = rtmp[k];
1945:       i  = jl[k]; /* first row to be added to k_th row  */

1947:       while (i < k) {
1948:         nexti = jl[i]; /* next row to be added to k_th row */

1950:         /* compute multiplier, update diag(k) and U(i,k) */
1951:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1952:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1953:         dk += uikdi * ba[ili];
1954:         ba[ili] = uikdi; /* -U(i,k) */

1956:         /* add multiple of row i to k-th row */
1957:         jmin = ili + 1;
1958:         jmax = bi[i + 1];
1959:         if (jmin < jmax) {
1960:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1961:           /* update il and jl for row i */
1962:           il[i] = jmin;
1963:           j     = bj[jmin];
1964:           jl[i] = jl[j];
1965:           jl[j] = i;
1966:         }
1967:         i = nexti;
1968:       }

1970:       /* shift the diagonals when zero pivot is detected */
1971:       /* compute rs=sum of abs(off-diagonal) */
1972:       rs   = 0.0;
1973:       jmin = bi[k] + 1;
1974:       nz   = bi[k + 1] - jmin;
1975:       bcol = bj + jmin;
1976:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

1978:       sctx.rs = rs;
1979:       sctx.pv = dk;
1980:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1981:       if (sctx.newshift) break;
1982:       dk = sctx.pv;

1984:       /* copy data into U(k,:) */
1985:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1986:       jmin      = bi[k] + 1;
1987:       jmax      = bi[k + 1];
1988:       if (jmin < jmax) {
1989:         for (j = jmin; j < jmax; j++) {
1990:           col   = bj[j];
1991:           ba[j] = rtmp[col];
1992:         }
1993:         /* add the k-th row into il and jl */
1994:         il[k] = jmin;
1995:         i     = bj[jmin];
1996:         jl[k] = jl[i];
1997:         jl[i] = k;
1998:       }
1999:     }
2000:   } while (sctx.newshift);

2002:   PetscCall(PetscFree3(rtmp, il, jl));
2003:   PetscCall(ISRestoreIndices(ip, &rip));
2004:   PetscCall(ISRestoreIndices(iip, &riip));
2005:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));

2007:   PetscCall(ISIdentity(ip, &perm_identity));
2008:   if (perm_identity) {
2009:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2010:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2011:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013:   } else {
2014:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2015:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2016:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2017:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2018:   }

2020:   C->assembled    = PETSC_TRUE;
2021:   C->preallocated = PETSC_TRUE;

2023:   PetscCall(PetscLogFlops(C->rmap->n));
2024:   if (sctx.nshift) {
2025:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2026:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2027:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2028:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029:     }
2030:   }
2031:   PetscFunctionReturn(PETSC_SUCCESS);
2032: }

2034: /*
2035:    icc() under revised new data structure.
2036:    Factored arrays bj and ba are stored as
2037:      U(0,:),...,U(i,:),U(n-1,:)

2039:    ui=fact->i is an array of size n+1, in which
2040:    ui+
2041:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2042:      ui[n]:  points to U(n-1,n-1)+1

2044:   udiag=fact->diag is an array of size n,in which
2045:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2047:    U(i,:) contains udiag[i] as its last entry, i.e.,
2048:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2049: */

2051: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2052: {
2053:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2054:   Mat_SeqSBAIJ      *b;
2055:   PetscBool          perm_identity;
2056:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2057:   const PetscInt    *rip, *riip, *adiag;
2058:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2059:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
2060:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2061:   PetscReal          fill = info->fill, levels = info->levels;
2062:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2063:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2064:   PetscBT            lnkbt;
2065:   IS                 iperm;
2066:   PetscBool          diagDense;

2068:   PetscFunctionBegin;
2069:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2070:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2071:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2072:   PetscCall(ISIdentity(perm, &perm_identity));
2073:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2075:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2076:   PetscCall(PetscMalloc1(am + 1, &udiag));
2077:   ui[0] = 0;

2079:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2080:   if (!levels && perm_identity) {
2081:     for (i = 0; i < am; i++) {
2082:       ncols     = ai[i + 1] - adiag[i];
2083:       ui[i + 1] = ui[i] + ncols;
2084:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2085:     }
2086:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2087:     cols = uj;
2088:     for (i = 0; i < am; i++) {
2089:       aj    = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2090:       ncols = ai[i + 1] - adiag[i] - 1;
2091:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2092:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2093:     }
2094:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2095:     PetscCall(ISGetIndices(iperm, &riip));
2096:     PetscCall(ISGetIndices(perm, &rip));

2098:     /* initialization */
2099:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2101:     /* jl: linked list for storing indices of the pivot rows
2102:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2103:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2104:     for (i = 0; i < am; i++) {
2105:       jl[i] = am;
2106:       il[i] = 0;
2107:     }

2109:     /* create and initialize a linked list for storing column indices of the active row k */
2110:     nlnk = am + 1;
2111:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2113:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2114:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2115:     current_space = free_space;
2116:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2117:     current_space_lvl = free_space_lvl;

2119:     for (k = 0; k < am; k++) { /* for each active row k */
2120:       /* initialize lnk by the column indices of row rip[k] of A */
2121:       nzk   = 0;
2122:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2123:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2124:       ncols_upper = 0;
2125:       for (j = 0; j < ncols; j++) {
2126:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2127:         if (riip[i] >= k) {         /* only take upper triangular entry */
2128:           ajtmp[ncols_upper] = i;
2129:           ncols_upper++;
2130:         }
2131:       }
2132:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2133:       nzk += nlnk;

2135:       /* update lnk by computing fill-in for each pivot row to be merged in */
2136:       prow = jl[k]; /* 1st pivot row */

2138:       while (prow < k) {
2139:         nextprow = jl[prow];

2141:         /* merge prow into k-th row */
2142:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2143:         jmax  = ui[prow + 1];
2144:         ncols = jmax - jmin;
2145:         i     = jmin - ui[prow];
2146:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2147:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2148:         j     = *(uj - 1);
2149:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2150:         nzk += nlnk;

2152:         /* update il and jl for prow */
2153:         if (jmin < jmax) {
2154:           il[prow] = jmin;
2155:           j        = *cols;
2156:           jl[prow] = jl[j];
2157:           jl[j]    = prow;
2158:         }
2159:         prow = nextprow;
2160:       }

2162:       /* if free space is not available, make more free space */
2163:       if (current_space->local_remaining < nzk) {
2164:         i = am - k + 1;                                    /* num of unfactored rows */
2165:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2166:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2167:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2168:         reallocs++;
2169:       }

2171:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2172:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2173:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2175:       /* add the k-th row into il and jl */
2176:       if (nzk > 1) {
2177:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2178:         jl[k] = jl[i];
2179:         jl[i] = k;
2180:         il[k] = ui[k] + 1;
2181:       }
2182:       uj_ptr[k]     = current_space->array;
2183:       uj_lvl_ptr[k] = current_space_lvl->array;

2185:       current_space->array += nzk;
2186:       current_space->local_used += nzk;
2187:       current_space->local_remaining -= nzk;

2189:       current_space_lvl->array += nzk;
2190:       current_space_lvl->local_used += nzk;
2191:       current_space_lvl->local_remaining -= nzk;

2193:       ui[k + 1] = ui[k] + nzk;
2194:     }

2196:     PetscCall(ISRestoreIndices(perm, &rip));
2197:     PetscCall(ISRestoreIndices(iperm, &riip));
2198:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2199:     PetscCall(PetscFree(ajtmp));

2201:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2202:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2203:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2204:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2205:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2207:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2209:   /* put together the new matrix in MATSEQSBAIJ format */
2210:   b          = (Mat_SeqSBAIJ *)fact->data;
2211:   b->free_ij = PETSC_TRUE;
2212:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2213:   b->free_a = PETSC_TRUE;
2214:   b->j      = uj;
2215:   b->i      = ui;
2216:   b->diag   = udiag;
2217:   b->ilen   = NULL;
2218:   b->imax   = NULL;
2219:   b->row    = perm;
2220:   b->col    = perm;
2221:   PetscCall(PetscObjectReference((PetscObject)perm));
2222:   PetscCall(PetscObjectReference((PetscObject)perm));
2223:   b->icol          = iperm;
2224:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2226:   PetscCall(PetscMalloc1(am, &b->solve_work));

2228:   b->maxnz = b->nz = ui[am];

2230:   fact->info.factor_mallocs   = reallocs;
2231:   fact->info.fill_ratio_given = fill;
2232:   if (ai[am] != 0) {
2233:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2234:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2235:   } else {
2236:     fact->info.fill_ratio_needed = 0.0;
2237:   }
2238: #if defined(PETSC_USE_INFO)
2239:   if (ai[am] != 0) {
2240:     PetscReal af = fact->info.fill_ratio_needed;
2241:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2242:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2243:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2244:   } else {
2245:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2246:   }
2247: #endif
2248:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2249:   PetscFunctionReturn(PETSC_SUCCESS);
2250: }

2252: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2253: {
2254:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2255:   Mat_SeqSBAIJ      *b;
2256:   PetscBool          perm_identity;
2257:   PetscReal          fill = info->fill;
2258:   const PetscInt    *rip, *riip;
2259:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2260:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2261:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2262:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2263:   PetscBT            lnkbt;
2264:   IS                 iperm;
2265:   PetscBool          diagDense;

2267:   PetscFunctionBegin;
2268:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2269:   PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2270:   PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");

2272:   /* check whether perm is the identity mapping */
2273:   PetscCall(ISIdentity(perm, &perm_identity));
2274:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2275:   PetscCall(ISGetIndices(iperm, &riip));
2276:   PetscCall(ISGetIndices(perm, &rip));

2278:   /* initialization */
2279:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2280:   PetscCall(PetscMalloc1(am + 1, &udiag));
2281:   ui[0] = 0;

2283:   /* jl: linked list for storing indices of the pivot rows
2284:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2285:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2286:   for (i = 0; i < am; i++) {
2287:     jl[i] = am;
2288:     il[i] = 0;
2289:   }

2291:   /* create and initialize a linked list for storing column indices of the active row k */
2292:   nlnk = am + 1;
2293:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2295:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2296:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2297:   current_space = free_space;

2299:   for (k = 0; k < am; k++) { /* for each active row k */
2300:     /* initialize lnk by the column indices of row rip[k] of A */
2301:     nzk   = 0;
2302:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2303:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2304:     ncols_upper = 0;
2305:     for (j = 0; j < ncols; j++) {
2306:       i = riip[*(aj + ai[rip[k]] + j)];
2307:       if (i >= k) { /* only take upper triangular entry */
2308:         cols[ncols_upper] = i;
2309:         ncols_upper++;
2310:       }
2311:     }
2312:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2313:     nzk += nlnk;

2315:     /* update lnk by computing fill-in for each pivot row to be merged in */
2316:     prow = jl[k]; /* 1st pivot row */

2318:     while (prow < k) {
2319:       nextprow = jl[prow];
2320:       /* merge prow into k-th row */
2321:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2322:       jmax   = ui[prow + 1];
2323:       ncols  = jmax - jmin;
2324:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2325:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2326:       nzk += nlnk;

2328:       /* update il and jl for prow */
2329:       if (jmin < jmax) {
2330:         il[prow] = jmin;
2331:         j        = *uj_ptr;
2332:         jl[prow] = jl[j];
2333:         jl[j]    = prow;
2334:       }
2335:       prow = nextprow;
2336:     }

2338:     /* if free space is not available, make more free space */
2339:     if (current_space->local_remaining < nzk) {
2340:       i = am - k + 1;                                    /* num of unfactored rows */
2341:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2342:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2343:       reallocs++;
2344:     }

2346:     /* copy data into free space, then initialize lnk */
2347:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2349:     /* add the k-th row into il and jl */
2350:     if (nzk > 1) {
2351:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2352:       jl[k] = jl[i];
2353:       jl[i] = k;
2354:       il[k] = ui[k] + 1;
2355:     }
2356:     ui_ptr[k] = current_space->array;

2358:     current_space->array += nzk;
2359:     current_space->local_used += nzk;
2360:     current_space->local_remaining -= nzk;

2362:     ui[k + 1] = ui[k] + nzk;
2363:   }

2365:   PetscCall(ISRestoreIndices(perm, &rip));
2366:   PetscCall(ISRestoreIndices(iperm, &riip));
2367:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2369:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2370:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2371:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2372:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2374:   /* put together the new matrix in MATSEQSBAIJ format */
2375:   b          = (Mat_SeqSBAIJ *)fact->data;
2376:   b->free_ij = PETSC_TRUE;
2377:   PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2378:   b->free_a = PETSC_TRUE;
2379:   b->j      = uj;
2380:   b->i      = ui;
2381:   b->diag   = udiag;
2382:   b->ilen   = NULL;
2383:   b->imax   = NULL;
2384:   b->row    = perm;
2385:   b->col    = perm;

2387:   PetscCall(PetscObjectReference((PetscObject)perm));
2388:   PetscCall(PetscObjectReference((PetscObject)perm));

2390:   b->icol          = iperm;
2391:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2393:   PetscCall(PetscMalloc1(am, &b->solve_work));

2395:   b->maxnz = b->nz = ui[am];

2397:   fact->info.factor_mallocs   = reallocs;
2398:   fact->info.fill_ratio_given = fill;
2399:   if (ai[am] != 0) {
2400:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2401:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2402:   } else {
2403:     fact->info.fill_ratio_needed = 0.0;
2404:   }
2405: #if defined(PETSC_USE_INFO)
2406:   if (ai[am] != 0) {
2407:     PetscReal af = fact->info.fill_ratio_needed;
2408:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2409:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2410:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2411:   } else {
2412:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2413:   }
2414: #endif
2415:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2416:   PetscFunctionReturn(PETSC_SUCCESS);
2417: }

2419: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2420: {
2421:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
2422:   PetscInt           n  = A->rmap->n;
2423:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2424:   PetscScalar       *x, sum;
2425:   const PetscScalar *b;
2426:   const MatScalar   *aa, *v;
2427:   PetscInt           i, nz;

2429:   PetscFunctionBegin;
2430:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

2432:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2433:   PetscCall(VecGetArrayRead(bb, &b));
2434:   PetscCall(VecGetArrayWrite(xx, &x));

2436:   /* forward solve the lower triangular */
2437:   x[0] = b[0];
2438:   v    = aa;
2439:   vi   = aj;
2440:   for (i = 1; i < n; i++) {
2441:     nz  = ai[i + 1] - ai[i];
2442:     sum = b[i];
2443:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2444:     v += nz;
2445:     vi += nz;
2446:     x[i] = sum;
2447:   }

2449:   /* backward solve the upper triangular */
2450:   for (i = n - 1; i >= 0; i--) {
2451:     v   = aa + adiag[i + 1] + 1;
2452:     vi  = aj + adiag[i + 1] + 1;
2453:     nz  = adiag[i] - adiag[i + 1] - 1;
2454:     sum = x[i];
2455:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2456:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2457:   }

2459:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2460:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2461:   PetscCall(VecRestoreArrayRead(bb, &b));
2462:   PetscCall(VecRestoreArrayWrite(xx, &x));
2463:   PetscFunctionReturn(PETSC_SUCCESS);
2464: }

2466: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2467: {
2468:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
2469:   IS                 iscol = a->col, isrow = a->row;
2470:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2471:   const PetscInt    *rout, *cout, *r, *c;
2472:   PetscScalar       *x, *tmp, sum;
2473:   const PetscScalar *b;
2474:   const MatScalar   *aa, *v;

2476:   PetscFunctionBegin;
2477:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

2479:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2480:   PetscCall(VecGetArrayRead(bb, &b));
2481:   PetscCall(VecGetArrayWrite(xx, &x));
2482:   tmp = a->solve_work;

2484:   PetscCall(ISGetIndices(isrow, &rout));
2485:   r = rout;
2486:   PetscCall(ISGetIndices(iscol, &cout));
2487:   c = cout;

2489:   /* forward solve the lower triangular */
2490:   tmp[0] = b[r[0]];
2491:   v      = aa;
2492:   vi     = aj;
2493:   for (i = 1; i < n; i++) {
2494:     nz  = ai[i + 1] - ai[i];
2495:     sum = b[r[i]];
2496:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2497:     tmp[i] = sum;
2498:     v += nz;
2499:     vi += nz;
2500:   }

2502:   /* backward solve the upper triangular */
2503:   for (i = n - 1; i >= 0; i--) {
2504:     v   = aa + adiag[i + 1] + 1;
2505:     vi  = aj + adiag[i + 1] + 1;
2506:     nz  = adiag[i] - adiag[i + 1] - 1;
2507:     sum = tmp[i];
2508:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2509:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2510:   }

2512:   PetscCall(ISRestoreIndices(isrow, &rout));
2513:   PetscCall(ISRestoreIndices(iscol, &cout));
2514:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2515:   PetscCall(VecRestoreArrayRead(bb, &b));
2516:   PetscCall(VecRestoreArrayWrite(xx, &x));
2517:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2518:   PetscFunctionReturn(PETSC_SUCCESS);
2519: }