Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: /*
  7:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

  9:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 10: */
 11: #if 0
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   PetscFunctionBegin;
 22:   /* pick initial row */
 23:   best = -1;
 24:   for (i = 0; i < n; i++) {
 25:     future = 0.0;
 26:     for (j = ai[i]; j < ai[i + 1]; j++) {
 27:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 28:       else past = PetscAbsScalar(aa[j]);
 29:     }
 30:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 31:     if (past / future > best) {
 32:       best    = past / future;
 33:       current = i;
 34:     }
 35:   }

 37:   PetscCall(PetscMalloc1(n, &done));
 38:   PetscCall(PetscArrayzero(done, n));
 39:   PetscCall(PetscMalloc1(n, &order));
 40:   order[0] = current;
 41:   for (i = 0; i < n - 1; i++) {
 42:     done[current] = PETSC_TRUE;
 43:     best          = -1;
 44:     /* loop over all neighbors of current pivot */
 45:     for (j = ai[current]; j < ai[current + 1]; j++) {
 46:       jj = aj[j];
 47:       if (done[jj]) continue;
 48:       /* loop over columns of potential next row computing weights for below and above diagonal */
 49:       past = future = 0.0;
 50:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 51:         kk = aj[k];
 52:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 53:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 54:       }
 55:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 56:       if (past / future > best) {
 57:         best       = past / future;
 58:         newcurrent = jj;
 59:       }
 60:     }
 61:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 62:       best = -1;
 63:       for (k = 0; k < n; k++) {
 64:         if (done[k]) continue;
 65:         future = 0.0;
 66:         past   = 0.0;
 67:         for (j = ai[k]; j < ai[k + 1]; j++) {
 68:           kk = aj[j];
 69:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 70:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 71:         }
 72:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 73:         if (past / future > best) {
 74:           best       = past / future;
 75:           newcurrent = k;
 76:         }
 77:       }
 78:     }
 79:     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
 80:     current      = newcurrent;
 81:     order[i + 1] = current;
 82:   }
 83:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
 84:   *icol = *irow;
 85:   PetscCall(PetscObjectReference((PetscObject)*irow));
 86:   PetscCall(PetscFree(done));
 87:   PetscCall(PetscFree(order));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }
 90: #endif

 92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 93: {
 94:   PetscFunctionBegin;
 95:   *type = MATSOLVERPETSC;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101:   PetscInt n = A->rmap->n;

103:   PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105:   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107:     *B = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110: #endif

112:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113:   PetscCall(MatSetSizes(*B, n, n, n, n));
114:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115:     PetscCall(MatSetType(*B, MATSEQAIJ));

117:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

120:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

128:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133:   (*B)->factortype = ftype;

135:   PetscCall(PetscFree((*B)->solvertype));
136:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137:   (*B)->canuseordering = PETSC_TRUE;
138:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147:   IS                 isicol;
148:   const PetscInt    *r, *ic;
149:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150:   PetscInt          *bi, *bj, *ajtmp;
151:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152:   PetscReal          f;
153:   PetscInt           nlnk, *lnk, k, **bi_ptr;
154:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155:   PetscBT            lnkbt;
156:   PetscBool          missing;

158:   PetscFunctionBegin;
159:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160:   PetscCall(MatMissingDiagonal(A, &missing, &i));
161:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

163:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164:   PetscCall(ISGetIndices(isrow, &r));
165:   PetscCall(ISGetIndices(isicol, &ic));

167:   /* get new row pointers */
168:   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169:   bi[0] = 0;

171:   /* bdiag is location of diagonal in factor */
172:   PetscCall(PetscMalloc1(n + 1, &bdiag));
173:   bdiag[0] = 0;

175:   /* linked list for storing column indices of the active row */
176:   nlnk = n + 1;
177:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

180:   /* initial FreeSpace size is f*(ai[n]+1) */
181:   f = info->fill;
182:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184:   current_space = free_space;

186:   for (i = 0; i < n; i++) {
187:     /* copy previous fill into linked list */
188:     nzi   = 0;
189:     nnz   = ai[r[i] + 1] - ai[r[i]];
190:     ajtmp = aj + ai[r[i]];
191:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192:     nzi += nlnk;

194:     /* add pivot rows into linked list */
195:     row = lnk[n];
196:     while (row < i) {
197:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
199:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200:       nzi += nlnk;
201:       row = lnk[row];
202:     }
203:     bi[i + 1] = bi[i] + nzi;
204:     im[i]     = nzi;

206:     /* mark bdiag */
207:     nzbd = 0;
208:     nnz  = nzi;
209:     k    = lnk[n];
210:     while (nnz-- && k < i) {
211:       nzbd++;
212:       k = lnk[k];
213:     }
214:     bdiag[i] = bi[i] + nzbd;

216:     /* if free space is not available, make more free space */
217:     if (current_space->local_remaining < nzi) {
218:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
220:       reallocs++;
221:     }

223:     /* copy data into free space, then initialize lnk */
224:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

226:     bi_ptr[i] = current_space->array;
227:     current_space->array += nzi;
228:     current_space->local_used += nzi;
229:     current_space->local_remaining -= nzi;
230:   }
231:   #if defined(PETSC_USE_INFO)
232:   if (ai[n] != 0) {
233:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237:     PetscCall(PetscInfo(A, "for best performance.\n"));
238:   } else {
239:     PetscCall(PetscInfo(A, "Empty matrix\n"));
240:   }
241:   #endif

243:   PetscCall(ISRestoreIndices(isrow, &r));
244:   PetscCall(ISRestoreIndices(isicol, &ic));

246:   /* destroy list of free space and other temporary array(s) */
247:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249:   PetscCall(PetscLLDestroy(lnk, lnkbt));
250:   PetscCall(PetscFree2(bi_ptr, im));

252:   /* put together the new matrix */
253:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254:   b = (Mat_SeqAIJ *)B->data;
255:   b->free_ij      = PETSC_TRUE;
256:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257:   b->free_a       = PETSC_TRUE;
258:   b->j    = bj;
259:   b->i    = bi;
260:   b->diag = bdiag;
261:   b->ilen = NULL;
262:   b->imax = NULL;
263:   b->row  = isrow;
264:   b->col  = iscol;
265:   PetscCall(PetscObjectReference((PetscObject)isrow));
266:   PetscCall(PetscObjectReference((PetscObject)iscol));
267:   b->icol = isicol;
268:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

270:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271:   b->maxnz = b->nz = bi[n];

273:   B->factortype            = MAT_FACTOR_LU;
274:   B->info.factor_mallocs   = reallocs;
275:   B->info.fill_ratio_given = f;

277:   if (ai[n]) {
278:     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279:   } else {
280:     B->info.fill_ratio_needed = 0.0;
281:   }
282:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }
286: #endif

288: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289: {
290:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291:   IS                 isicol;
292:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293:   PetscInt           i, n = A->rmap->n;
294:   PetscInt          *bi, *bj;
295:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296:   PetscReal          f;
297:   PetscInt           nlnk, *lnk, k, **bi_ptr;
298:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299:   PetscBT            lnkbt;
300:   PetscBool          missing;

302:   PetscFunctionBegin;
303:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304:   PetscCall(MatMissingDiagonal(A, &missing, &i));
305:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

307:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308:   PetscCall(ISGetIndices(isrow, &r));
309:   PetscCall(ISGetIndices(isicol, &ic));

311:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313:   PetscCall(PetscMalloc1(n + 1, &bdiag));
314:   bi[0] = bdiag[0] = 0;

316:   /* linked list for storing column indices of the active row */
317:   nlnk = n + 1;
318:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

320:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

322:   /* initial FreeSpace size is f*(ai[n]+1) */
323:   f = info->fill;
324:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326:   current_space = free_space;

328:   for (i = 0; i < n; i++) {
329:     /* copy previous fill into linked list */
330:     nzi   = 0;
331:     nnz   = ai[r[i] + 1] - ai[r[i]];
332:     ajtmp = aj + ai[r[i]];
333:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334:     nzi += nlnk;

336:     /* add pivot rows into linked list */
337:     row = lnk[n];
338:     while (row < i) {
339:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342:       nzi += nlnk;
343:       row = lnk[row];
344:     }
345:     bi[i + 1] = bi[i] + nzi;
346:     im[i]     = nzi;

348:     /* mark bdiag */
349:     nzbd = 0;
350:     nnz  = nzi;
351:     k    = lnk[n];
352:     while (nnz-- && k < i) {
353:       nzbd++;
354:       k = lnk[k];
355:     }
356:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

358:     /* if free space is not available, make more free space */
359:     if (current_space->local_remaining < nzi) {
360:       /* estimated additional space needed */
361:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363:       reallocs++;
364:     }

366:     /* copy data into free space, then initialize lnk */
367:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

369:     bi_ptr[i] = current_space->array;
370:     current_space->array += nzi;
371:     current_space->local_used += nzi;
372:     current_space->local_remaining -= nzi;
373:   }

375:   PetscCall(ISRestoreIndices(isrow, &r));
376:   PetscCall(ISRestoreIndices(isicol, &ic));

378:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381:   PetscCall(PetscLLDestroy(lnk, lnkbt));
382:   PetscCall(PetscFree2(bi_ptr, im));

384:   /* put together the new matrix */
385:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386:   b          = (Mat_SeqAIJ *)B->data;
387:   b->free_ij = PETSC_TRUE;
388:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389:   b->free_a = PETSC_TRUE;
390:   b->j      = bj;
391:   b->i      = bi;
392:   b->diag   = bdiag;
393:   b->ilen   = NULL;
394:   b->imax   = NULL;
395:   b->row    = isrow;
396:   b->col    = iscol;
397:   PetscCall(PetscObjectReference((PetscObject)isrow));
398:   PetscCall(PetscObjectReference((PetscObject)iscol));
399:   b->icol = isicol;
400:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

402:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403:   b->maxnz = b->nz = bdiag[0] + 1;

405:   B->factortype            = MAT_FACTOR_LU;
406:   B->info.factor_mallocs   = reallocs;
407:   B->info.fill_ratio_given = f;

409:   if (ai[n]) {
410:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411:   } else {
412:     B->info.fill_ratio_needed = 0.0;
413:   }
414: #if defined(PETSC_USE_INFO)
415:   if (ai[n] != 0) {
416:     PetscReal af = B->info.fill_ratio_needed;
417:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420:     PetscCall(PetscInfo(A, "for best performance.\n"));
421:   } else {
422:     PetscCall(PetscInfo(A, "Empty matrix\n"));
423:   }
424: #endif
425:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428:   PetscFunctionReturn(PETSC_SUCCESS);
429: }

431: /*
432:     Trouble in factorization, should we dump the original matrix?
433: */
434: PetscErrorCode MatFactorDumpMatrix(Mat A)
435: {
436:   PetscBool flg = PETSC_FALSE;

438:   PetscFunctionBegin;
439:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440:   if (flg) {
441:     PetscViewer viewer;
442:     char        filename[PETSC_MAX_PATH_LEN];

444:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446:     PetscCall(MatView(A, viewer));
447:     PetscCall(PetscViewerDestroy(&viewer));
448:   }
449:   PetscFunctionReturn(PETSC_SUCCESS);
450: }

452: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453: {
454:   Mat              C = B;
455:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456:   IS               isrow = b->row, isicol = b->icol;
457:   const PetscInt  *r, *ic, *ics;
458:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459:   PetscInt         i, j, k, nz, nzL, row, *pj;
460:   const PetscInt  *ajtmp, *bjtmp;
461:   MatScalar       *rtmp, *pc, multiplier, *pv;
462:   const MatScalar *aa, *v;
463:   MatScalar       *ba;
464:   PetscBool        row_identity, col_identity;
465:   FactorShiftCtx   sctx;
466:   const PetscInt  *ddiag;
467:   PetscReal        rs;
468:   MatScalar        d;

470:   PetscFunctionBegin;
471:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
472:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
473:   /* MatPivotSetUp(): initialize shift context sctx */
474:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

476:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477:     ddiag          = a->diag;
478:     sctx.shift_top = info->zeropivot;
479:     for (i = 0; i < n; i++) {
480:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481:       d  = (aa)[ddiag[i]];
482:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
483:       v  = aa + ai[i];
484:       nz = ai[i + 1] - ai[i];
485:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486:       if (rs > sctx.shift_top) sctx.shift_top = rs;
487:     }
488:     sctx.shift_top *= 1.1;
489:     sctx.nshift_max = 5;
490:     sctx.shift_lo   = 0.;
491:     sctx.shift_hi   = 1.;
492:   }

494:   PetscCall(ISGetIndices(isrow, &r));
495:   PetscCall(ISGetIndices(isicol, &ic));
496:   PetscCall(PetscMalloc1(n + 1, &rtmp));
497:   ics = ic;

499:   do {
500:     sctx.newshift = PETSC_FALSE;
501:     for (i = 0; i < n; i++) {
502:       /* zero rtmp */
503:       /* L part */
504:       nz    = bi[i + 1] - bi[i];
505:       bjtmp = bj + bi[i];
506:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

508:       /* U part */
509:       nz    = bdiag[i] - bdiag[i + 1];
510:       bjtmp = bj + bdiag[i + 1] + 1;
511:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

513:       /* load in initial (unfactored row) */
514:       nz    = ai[r[i] + 1] - ai[r[i]];
515:       ajtmp = aj + ai[r[i]];
516:       v     = aa + ai[r[i]];
517:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518:       /* ZeropivotApply() */
519:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

521:       /* elimination */
522:       bjtmp = bj + bi[i];
523:       row   = *bjtmp++;
524:       nzL   = bi[i + 1] - bi[i];
525:       for (k = 0; k < nzL; k++) {
526:         pc = rtmp + row;
527:         if (*pc != 0.0) {
528:           pv         = ba + bdiag[row];
529:           multiplier = *pc * (*pv);
530:           *pc        = multiplier;

532:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533:           pv = ba + bdiag[row + 1] + 1;
534:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

536:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
538:         }
539:         row = *bjtmp++;
540:       }

542:       /* finished row so stick it into b->a */
543:       rs = 0.0;
544:       /* L part */
545:       pv = ba + bi[i];
546:       pj = b->j + bi[i];
547:       nz = bi[i + 1] - bi[i];
548:       for (j = 0; j < nz; j++) {
549:         pv[j] = rtmp[pj[j]];
550:         rs += PetscAbsScalar(pv[j]);
551:       }

553:       /* U part */
554:       pv = ba + bdiag[i + 1] + 1;
555:       pj = b->j + bdiag[i + 1] + 1;
556:       nz = bdiag[i] - bdiag[i + 1] - 1;
557:       for (j = 0; j < nz; j++) {
558:         pv[j] = rtmp[pj[j]];
559:         rs += PetscAbsScalar(pv[j]);
560:       }

562:       sctx.rs = rs;
563:       sctx.pv = rtmp[i];
564:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565:       if (sctx.newshift) break; /* break for-loop */
566:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

568:       /* Mark diagonal and invert diagonal for simpler triangular solves */
569:       pv  = ba + bdiag[i];
570:       *pv = 1.0 / rtmp[i];

572:     } /* endof for (i=0; i<n; i++) { */

574:     /* MatPivotRefine() */
575:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576:       /*
577:        * if no shift in this attempt & shifting & started shifting & can refine,
578:        * then try lower shift
579:        */
580:       sctx.shift_hi       = sctx.shift_fraction;
581:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
583:       sctx.newshift       = PETSC_TRUE;
584:       sctx.nshift++;
585:     }
586:   } while (sctx.newshift);

588:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
589:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

591:   PetscCall(PetscFree(rtmp));
592:   PetscCall(ISRestoreIndices(isicol, &ic));
593:   PetscCall(ISRestoreIndices(isrow, &r));

595:   PetscCall(ISIdentity(isrow, &row_identity));
596:   PetscCall(ISIdentity(isicol, &col_identity));
597:   if (b->inode.size) {
598:     C->ops->solve = MatSolve_SeqAIJ_Inode;
599:   } else if (row_identity && col_identity) {
600:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
601:   } else {
602:     C->ops->solve = MatSolve_SeqAIJ;
603:   }
604:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
605:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
606:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
607:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
608:   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
609:   C->assembled              = PETSC_TRUE;
610:   C->preallocated           = PETSC_TRUE;

612:   PetscCall(PetscLogFlops(C->cmap->n));

614:   /* MatShiftView(A,info,&sctx) */
615:   if (sctx.nshift) {
616:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
618:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
620:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
622:     }
623:   }
624:   PetscFunctionReturn(PETSC_SUCCESS);
625: }

627: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
628: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
629: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

631: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
632: {
633:   Mat              C = B;
634:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
635:   IS               isrow = b->row, isicol = b->icol;
636:   const PetscInt  *r, *ic, *ics;
637:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
638:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
639:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
640:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
641:   const MatScalar *v, *aa;
642:   MatScalar       *ba;
643:   PetscReal        rs = 0.0;
644:   FactorShiftCtx   sctx;
645:   const PetscInt  *ddiag;
646:   PetscBool        row_identity, col_identity;

648:   PetscFunctionBegin;
649:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
650:   PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
651:   /* MatPivotSetUp(): initialize shift context sctx */
652:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

654:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
655:     ddiag          = a->diag;
656:     sctx.shift_top = info->zeropivot;
657:     for (i = 0; i < n; i++) {
658:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659:       d  = (aa)[ddiag[i]];
660:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
661:       v  = aa + ai[i];
662:       nz = ai[i + 1] - ai[i];
663:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
664:       if (rs > sctx.shift_top) sctx.shift_top = rs;
665:     }
666:     sctx.shift_top *= 1.1;
667:     sctx.nshift_max = 5;
668:     sctx.shift_lo   = 0.;
669:     sctx.shift_hi   = 1.;
670:   }

672:   PetscCall(ISGetIndices(isrow, &r));
673:   PetscCall(ISGetIndices(isicol, &ic));
674:   PetscCall(PetscMalloc1(n + 1, &rtmp));
675:   ics = ic;

677:   do {
678:     sctx.newshift = PETSC_FALSE;
679:     for (i = 0; i < n; i++) {
680:       nz    = bi[i + 1] - bi[i];
681:       bjtmp = bj + bi[i];
682:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

684:       /* load in initial (unfactored row) */
685:       nz    = ai[r[i] + 1] - ai[r[i]];
686:       ajtmp = aj + ai[r[i]];
687:       v     = aa + ai[r[i]];
688:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
689:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

691:       row = *bjtmp++;
692:       while (row < i) {
693:         pc = rtmp + row;
694:         if (*pc != 0.0) {
695:           pv         = ba + diag_offset[row];
696:           pj         = b->j + diag_offset[row] + 1;
697:           multiplier = *pc / *pv++;
698:           *pc        = multiplier;
699:           nz         = bi[row + 1] - diag_offset[row] - 1;
700:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
702:         }
703:         row = *bjtmp++;
704:       }
705:       /* finished row so stick it into b->a */
706:       pv   = ba + bi[i];
707:       pj   = b->j + bi[i];
708:       nz   = bi[i + 1] - bi[i];
709:       diag = diag_offset[i] - bi[i];
710:       rs   = 0.0;
711:       for (j = 0; j < nz; j++) {
712:         pv[j] = rtmp[pj[j]];
713:         rs += PetscAbsScalar(pv[j]);
714:       }
715:       rs -= PetscAbsScalar(pv[diag]);

717:       sctx.rs = rs;
718:       sctx.pv = pv[diag];
719:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
720:       if (sctx.newshift) break;
721:       pv[diag] = sctx.pv;
722:     }

724:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
725:       /*
726:        * if no shift in this attempt & shifting & started shifting & can refine,
727:        * then try lower shift
728:        */
729:       sctx.shift_hi       = sctx.shift_fraction;
730:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
731:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
732:       sctx.newshift       = PETSC_TRUE;
733:       sctx.nshift++;
734:     }
735:   } while (sctx.newshift);

737:   /* invert diagonal entries for simpler triangular solves */
738:   for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]];
739:   PetscCall(PetscFree(rtmp));
740:   PetscCall(ISRestoreIndices(isicol, &ic));
741:   PetscCall(ISRestoreIndices(isrow, &r));
742:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
743:   PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));

745:   PetscCall(ISIdentity(isrow, &row_identity));
746:   PetscCall(ISIdentity(isicol, &col_identity));
747:   if (row_identity && col_identity) {
748:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749:   } else {
750:     C->ops->solve = MatSolve_SeqAIJ_inplace;
751:   }
752:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
753:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
754:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
756:   C->ops->matsolvetranspose = NULL;

758:   C->assembled    = PETSC_TRUE;
759:   C->preallocated = PETSC_TRUE;

761:   PetscCall(PetscLogFlops(C->cmap->n));
762:   if (sctx.nshift) {
763:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
764:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
765:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
766:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
767:     }
768:   }
769:   C->ops->solve          = MatSolve_SeqAIJ_inplace;
770:   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

772:   PetscCall(MatSeqAIJCheckInode(C));
773:   PetscFunctionReturn(PETSC_SUCCESS);
774: }

776: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

778: /*
779:    This routine implements inplace ILU(0) with row or/and column permutations.
780:    Input:
781:      A - original matrix
782:    Output;
783:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
784:          a->j (col index) is permuted by the inverse of colperm, then sorted
785:          a->a reordered accordingly with a->j
786:          a->diag (ptr to diagonal elements) is updated.
787: */
788: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
789: {
790:   Mat_SeqAIJ     *a     = (Mat_SeqAIJ *)A->data;
791:   IS              isrow = a->row, isicol = a->icol;
792:   const PetscInt *r, *ic, *ics;
793:   PetscInt        i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
794:   PetscInt       *ajtmp, nz, row;
795:   PetscInt       *diag = a->diag, nbdiag, *pj;
796:   PetscScalar    *rtmp, *pc, multiplier, d;
797:   MatScalar      *pv, *v;
798:   PetscReal       rs;
799:   FactorShiftCtx  sctx;
800:   MatScalar      *aa, *vtmp;

802:   PetscFunctionBegin;
803:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

805:   PetscCall(MatSeqAIJGetArray(A, &aa));
806:   /* MatPivotSetUp(): initialize shift context sctx */
807:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

809:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
810:     const PetscInt *ddiag = a->diag;
811:     sctx.shift_top        = info->zeropivot;
812:     for (i = 0; i < n; i++) {
813:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814:       d    = (aa)[ddiag[i]];
815:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
816:       vtmp = aa + ai[i];
817:       nz   = ai[i + 1] - ai[i];
818:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
819:       if (rs > sctx.shift_top) sctx.shift_top = rs;
820:     }
821:     sctx.shift_top *= 1.1;
822:     sctx.nshift_max = 5;
823:     sctx.shift_lo   = 0.;
824:     sctx.shift_hi   = 1.;
825:   }

827:   PetscCall(ISGetIndices(isrow, &r));
828:   PetscCall(ISGetIndices(isicol, &ic));
829:   PetscCall(PetscMalloc1(n + 1, &rtmp));
830:   PetscCall(PetscArrayzero(rtmp, n + 1));
831:   ics = ic;

833: #if defined(MV)
834:   sctx.shift_top      = 0.;
835:   sctx.nshift_max     = 0;
836:   sctx.shift_lo       = 0.;
837:   sctx.shift_hi       = 0.;
838:   sctx.shift_fraction = 0.;

840:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
841:     sctx.shift_top = 0.;
842:     for (i = 0; i < n; i++) {
843:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
844:       d  = aa[diag[i]];
845:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
846:       v  = aa + ai[i];
847:       nz = ai[i + 1] - ai[i];
848:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
849:       if (rs > sctx.shift_top) sctx.shift_top = rs;
850:     }
851:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
852:     sctx.shift_top *= 1.1;
853:     sctx.nshift_max = 5;
854:     sctx.shift_lo   = 0.;
855:     sctx.shift_hi   = 1.;
856:   }

858:   sctx.shift_amount = 0.;
859:   sctx.nshift       = 0;
860: #endif

862:   do {
863:     sctx.newshift = PETSC_FALSE;
864:     for (i = 0; i < n; i++) {
865:       /* load in initial unfactored row */
866:       nz    = ai[r[i] + 1] - ai[r[i]];
867:       ajtmp = aj + ai[r[i]];
868:       v     = aa + ai[r[i]];
869:       /* sort permuted ajtmp and values v accordingly */
870:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
871:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

873:       diag[r[i]] = ai[r[i]];
874:       for (j = 0; j < nz; j++) {
875:         rtmp[ajtmp[j]] = v[j];
876:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
877:       }
878:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

880:       row = *ajtmp++;
881:       while (row < i) {
882:         pc = rtmp + row;
883:         if (*pc != 0.0) {
884:           pv = aa + diag[r[row]];
885:           pj = aj + diag[r[row]] + 1;

887:           multiplier = *pc / *pv++;
888:           *pc        = multiplier;
889:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
890:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
891:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
892:         }
893:         row = *ajtmp++;
894:       }
895:       /* finished row so overwrite it onto aa */
896:       pv     = aa + ai[r[i]];
897:       pj     = aj + ai[r[i]];
898:       nz     = ai[r[i] + 1] - ai[r[i]];
899:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

901:       rs = 0.0;
902:       for (j = 0; j < nz; j++) {
903:         pv[j] = rtmp[pj[j]];
904:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
905:       }

907:       sctx.rs = rs;
908:       sctx.pv = pv[nbdiag];
909:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
910:       if (sctx.newshift) break;
911:       pv[nbdiag] = sctx.pv;
912:     }

914:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
915:       /*
916:        * if no shift in this attempt & shifting & started shifting & can refine,
917:        * then try lower shift
918:        */
919:       sctx.shift_hi       = sctx.shift_fraction;
920:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
921:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
922:       sctx.newshift       = PETSC_TRUE;
923:       sctx.nshift++;
924:     }
925:   } while (sctx.newshift);

927:   /* invert diagonal entries for simpler triangular solves */
928:   for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];

930:   PetscCall(MatSeqAIJRestoreArray(A, &aa));
931:   PetscCall(PetscFree(rtmp));
932:   PetscCall(ISRestoreIndices(isicol, &ic));
933:   PetscCall(ISRestoreIndices(isrow, &r));

935:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
936:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
937:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
938:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

940:   A->assembled    = PETSC_TRUE;
941:   A->preallocated = PETSC_TRUE;

943:   PetscCall(PetscLogFlops(A->cmap->n));
944:   if (sctx.nshift) {
945:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
947:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
949:     }
950:   }
951:   PetscFunctionReturn(PETSC_SUCCESS);
952: }

954: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
955: {
956:   Mat C;

958:   PetscFunctionBegin;
959:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
960:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
961:   PetscCall(MatLUFactorNumeric(C, A, info));

963:   A->ops->solve          = C->ops->solve;
964:   A->ops->solvetranspose = C->ops->solvetranspose;

966:   PetscCall(MatHeaderMerge(A, &C));
967:   PetscFunctionReturn(PETSC_SUCCESS);
968: }

970: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
971: {
972:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
973:   IS                 iscol = a->col, isrow = a->row;
974:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
975:   PetscInt           nz;
976:   const PetscInt    *rout, *cout, *r, *c;
977:   PetscScalar       *x, *tmp, *tmps, sum;
978:   const PetscScalar *b;
979:   const MatScalar   *aa, *v;

981:   PetscFunctionBegin;
982:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

984:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
985:   PetscCall(VecGetArrayRead(bb, &b));
986:   PetscCall(VecGetArrayWrite(xx, &x));
987:   tmp = a->solve_work;

989:   PetscCall(ISGetIndices(isrow, &rout));
990:   r = rout;
991:   PetscCall(ISGetIndices(iscol, &cout));
992:   c = cout + (n - 1);

994:   /* forward solve the lower triangular */
995:   tmp[0] = b[*r++];
996:   tmps   = tmp;
997:   for (i = 1; i < n; i++) {
998:     v   = aa + ai[i];
999:     vi  = aj + ai[i];
1000:     nz  = a->diag[i] - ai[i];
1001:     sum = b[*r++];
1002:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1003:     tmp[i] = sum;
1004:   }

1006:   /* backward solve the upper triangular */
1007:   for (i = n - 1; i >= 0; i--) {
1008:     v   = aa + a->diag[i] + 1;
1009:     vi  = aj + a->diag[i] + 1;
1010:     nz  = ai[i + 1] - a->diag[i] - 1;
1011:     sum = tmp[i];
1012:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1013:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1014:   }
1015:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1016:   PetscCall(ISRestoreIndices(isrow, &rout));
1017:   PetscCall(ISRestoreIndices(iscol, &cout));
1018:   PetscCall(VecRestoreArrayRead(bb, &b));
1019:   PetscCall(VecRestoreArrayWrite(xx, &x));
1020:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1021:   PetscFunctionReturn(PETSC_SUCCESS);
1022: }

1024: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1025: {
1026:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1027:   IS                 iscol = a->col, isrow = a->row;
1028:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1029:   PetscInt           nz, neq, ldb, ldx;
1030:   const PetscInt    *rout, *cout, *r, *c;
1031:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1032:   const PetscScalar *b, *aa, *v;
1033:   PetscBool          isdense;

1035:   PetscFunctionBegin;
1036:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1037:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1038:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1039:   if (X != B) {
1040:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1041:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1042:   }
1043:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1044:   PetscCall(MatDenseGetArrayRead(B, &b));
1045:   PetscCall(MatDenseGetLDA(B, &ldb));
1046:   PetscCall(MatDenseGetArray(X, &x));
1047:   PetscCall(MatDenseGetLDA(X, &ldx));
1048:   PetscCall(ISGetIndices(isrow, &rout));
1049:   r = rout;
1050:   PetscCall(ISGetIndices(iscol, &cout));
1051:   c = cout;
1052:   for (neq = 0; neq < B->cmap->n; neq++) {
1053:     /* forward solve the lower triangular */
1054:     tmp[0] = b[r[0]];
1055:     tmps   = tmp;
1056:     for (i = 1; i < n; i++) {
1057:       v   = aa + ai[i];
1058:       vi  = aj + ai[i];
1059:       nz  = a->diag[i] - ai[i];
1060:       sum = b[r[i]];
1061:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062:       tmp[i] = sum;
1063:     }
1064:     /* backward solve the upper triangular */
1065:     for (i = n - 1; i >= 0; i--) {
1066:       v   = aa + a->diag[i] + 1;
1067:       vi  = aj + a->diag[i] + 1;
1068:       nz  = ai[i + 1] - a->diag[i] - 1;
1069:       sum = tmp[i];
1070:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1071:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1072:     }
1073:     b += ldb;
1074:     x += ldx;
1075:   }
1076:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1077:   PetscCall(ISRestoreIndices(isrow, &rout));
1078:   PetscCall(ISRestoreIndices(iscol, &cout));
1079:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1080:   PetscCall(MatDenseRestoreArray(X, &x));
1081:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1082:   PetscFunctionReturn(PETSC_SUCCESS);
1083: }

1085: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1086: {
1087:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1088:   IS                 iscol = a->col, isrow = a->row;
1089:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1090:   PetscInt           nz, neq, ldb, ldx;
1091:   const PetscInt    *rout, *cout, *r, *c;
1092:   PetscScalar       *x, *tmp = a->solve_work, sum;
1093:   const PetscScalar *b, *aa, *v;
1094:   PetscBool          isdense;

1096:   PetscFunctionBegin;
1097:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1098:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1099:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1100:   if (X != B) {
1101:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1102:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1103:   }
1104:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1105:   PetscCall(MatDenseGetArrayRead(B, &b));
1106:   PetscCall(MatDenseGetLDA(B, &ldb));
1107:   PetscCall(MatDenseGetArray(X, &x));
1108:   PetscCall(MatDenseGetLDA(X, &ldx));
1109:   PetscCall(ISGetIndices(isrow, &rout));
1110:   r = rout;
1111:   PetscCall(ISGetIndices(iscol, &cout));
1112:   c = cout;
1113:   for (neq = 0; neq < B->cmap->n; neq++) {
1114:     /* forward solve the lower triangular */
1115:     tmp[0] = b[r[0]];
1116:     v      = aa;
1117:     vi     = aj;
1118:     for (i = 1; i < n; i++) {
1119:       nz  = ai[i + 1] - ai[i];
1120:       sum = b[r[i]];
1121:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122:       tmp[i] = sum;
1123:       v += nz;
1124:       vi += nz;
1125:     }
1126:     /* backward solve the upper triangular */
1127:     for (i = n - 1; i >= 0; i--) {
1128:       v   = aa + adiag[i + 1] + 1;
1129:       vi  = aj + adiag[i + 1] + 1;
1130:       nz  = adiag[i] - adiag[i + 1] - 1;
1131:       sum = tmp[i];
1132:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1133:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1134:     }
1135:     b += ldb;
1136:     x += ldx;
1137:   }
1138:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1139:   PetscCall(ISRestoreIndices(isrow, &rout));
1140:   PetscCall(ISRestoreIndices(iscol, &cout));
1141:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1142:   PetscCall(MatDenseRestoreArray(X, &x));
1143:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1144:   PetscFunctionReturn(PETSC_SUCCESS);
1145: }

1147: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1148: {
1149:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1150:   IS                 iscol = a->col, isrow = a->row;
1151:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1152:   PetscInt           nz, neq, ldb, ldx;
1153:   const PetscInt    *rout, *cout, *r, *c;
1154:   PetscScalar       *x, *tmp = a->solve_work, s1;
1155:   const PetscScalar *b, *aa, *v;
1156:   PetscBool          isdense;

1158:   PetscFunctionBegin;
1159:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1160:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1161:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1162:   if (X != B) {
1163:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1164:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1165:   }
1166:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1167:   PetscCall(MatDenseGetArrayRead(B, &b));
1168:   PetscCall(MatDenseGetLDA(B, &ldb));
1169:   PetscCall(MatDenseGetArray(X, &x));
1170:   PetscCall(MatDenseGetLDA(X, &ldx));
1171:   PetscCall(ISGetIndices(isrow, &rout));
1172:   r = rout;
1173:   PetscCall(ISGetIndices(iscol, &cout));
1174:   c = cout;
1175:   for (neq = 0; neq < B->cmap->n; neq++) {
1176:     /* copy the b into temp work space according to permutation */
1177:     for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1179:     /* forward solve the U^T */
1180:     for (i = 0; i < n; i++) {
1181:       v  = aa + adiag[i + 1] + 1;
1182:       vi = aj + adiag[i + 1] + 1;
1183:       nz = adiag[i] - adiag[i + 1] - 1;
1184:       s1 = tmp[i];
1185:       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1186:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187:       tmp[i] = s1;
1188:     }

1190:     /* backward solve the L^T */
1191:     for (i = n - 1; i >= 0; i--) {
1192:       v  = aa + ai[i];
1193:       vi = aj + ai[i];
1194:       nz = ai[i + 1] - ai[i];
1195:       s1 = tmp[i];
1196:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1197:     }

1199:     /* copy tmp into x according to permutation */
1200:     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1201:     b += ldb;
1202:     x += ldx;
1203:   }
1204:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1205:   PetscCall(ISRestoreIndices(isrow, &rout));
1206:   PetscCall(ISRestoreIndices(iscol, &cout));
1207:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1208:   PetscCall(MatDenseRestoreArray(X, &x));
1209:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1210:   PetscFunctionReturn(PETSC_SUCCESS);
1211: }

1213: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1214: {
1215:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1216:   IS                 iscol = a->col, isrow = a->row;
1217:   const PetscInt    *r, *c, *rout, *cout;
1218:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1219:   PetscInt           nz, row;
1220:   PetscScalar       *x, *tmp, *tmps, sum;
1221:   const PetscScalar *b;
1222:   const MatScalar   *aa, *v;

1224:   PetscFunctionBegin;
1225:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1227:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1228:   PetscCall(VecGetArrayRead(bb, &b));
1229:   PetscCall(VecGetArrayWrite(xx, &x));
1230:   tmp = a->solve_work;

1232:   PetscCall(ISGetIndices(isrow, &rout));
1233:   r = rout;
1234:   PetscCall(ISGetIndices(iscol, &cout));
1235:   c = cout + (n - 1);

1237:   /* forward solve the lower triangular */
1238:   tmp[0] = b[*r++];
1239:   tmps   = tmp;
1240:   for (row = 1; row < n; row++) {
1241:     i   = rout[row]; /* permuted row */
1242:     v   = aa + ai[i];
1243:     vi  = aj + ai[i];
1244:     nz  = a->diag[i] - ai[i];
1245:     sum = b[*r++];
1246:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1247:     tmp[row] = sum;
1248:   }

1250:   /* backward solve the upper triangular */
1251:   for (row = n - 1; row >= 0; row--) {
1252:     i   = rout[row]; /* permuted row */
1253:     v   = aa + a->diag[i] + 1;
1254:     vi  = aj + a->diag[i] + 1;
1255:     nz  = ai[i + 1] - a->diag[i] - 1;
1256:     sum = tmp[row];
1257:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1258:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1259:   }
1260:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1261:   PetscCall(ISRestoreIndices(isrow, &rout));
1262:   PetscCall(ISRestoreIndices(iscol, &cout));
1263:   PetscCall(VecRestoreArrayRead(bb, &b));
1264:   PetscCall(VecRestoreArrayWrite(xx, &x));
1265:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1266:   PetscFunctionReturn(PETSC_SUCCESS);
1267: }

1269: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1270: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1271: {
1272:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1273:   PetscInt           n  = A->rmap->n;
1274:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1275:   PetscScalar       *x;
1276:   const PetscScalar *b;
1277:   const MatScalar   *aa;
1278: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1279:   PetscInt         adiag_i, i, nz, ai_i;
1280:   const PetscInt  *vi;
1281:   const MatScalar *v;
1282:   PetscScalar      sum;
1283: #endif

1285:   PetscFunctionBegin;
1286:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1288:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1289:   PetscCall(VecGetArrayRead(bb, &b));
1290:   PetscCall(VecGetArrayWrite(xx, &x));

1292: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1293:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1294: #else
1295:   /* forward solve the lower triangular */
1296:   x[0] = b[0];
1297:   for (i = 1; i < n; i++) {
1298:     ai_i = ai[i];
1299:     v    = aa + ai_i;
1300:     vi   = aj + ai_i;
1301:     nz   = adiag[i] - ai_i;
1302:     sum  = b[i];
1303:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1304:     x[i] = sum;
1305:   }

1307:   /* backward solve the upper triangular */
1308:   for (i = n - 1; i >= 0; i--) {
1309:     adiag_i = adiag[i];
1310:     v       = aa + adiag_i + 1;
1311:     vi      = aj + adiag_i + 1;
1312:     nz      = ai[i + 1] - adiag_i - 1;
1313:     sum     = x[i];
1314:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1315:     x[i] = sum * aa[adiag_i];
1316:   }
1317: #endif
1318:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1319:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1320:   PetscCall(VecRestoreArrayRead(bb, &b));
1321:   PetscCall(VecRestoreArrayWrite(xx, &x));
1322:   PetscFunctionReturn(PETSC_SUCCESS);
1323: }

1325: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1326: {
1327:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1328:   IS                 iscol = a->col, isrow = a->row;
1329:   PetscInt           i, n                  = A->rmap->n, j;
1330:   PetscInt           nz;
1331:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1332:   PetscScalar       *x, *tmp, sum;
1333:   const PetscScalar *b;
1334:   const MatScalar   *aa, *v;

1336:   PetscFunctionBegin;
1337:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1339:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1340:   PetscCall(VecGetArrayRead(bb, &b));
1341:   PetscCall(VecGetArray(xx, &x));
1342:   tmp = a->solve_work;

1344:   PetscCall(ISGetIndices(isrow, &rout));
1345:   r = rout;
1346:   PetscCall(ISGetIndices(iscol, &cout));
1347:   c = cout + (n - 1);

1349:   /* forward solve the lower triangular */
1350:   tmp[0] = b[*r++];
1351:   for (i = 1; i < n; i++) {
1352:     v   = aa + ai[i];
1353:     vi  = aj + ai[i];
1354:     nz  = a->diag[i] - ai[i];
1355:     sum = b[*r++];
1356:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1357:     tmp[i] = sum;
1358:   }

1360:   /* backward solve the upper triangular */
1361:   for (i = n - 1; i >= 0; i--) {
1362:     v   = aa + a->diag[i] + 1;
1363:     vi  = aj + a->diag[i] + 1;
1364:     nz  = ai[i + 1] - a->diag[i] - 1;
1365:     sum = tmp[i];
1366:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1367:     tmp[i] = sum * aa[a->diag[i]];
1368:     x[*c--] += tmp[i];
1369:   }

1371:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1372:   PetscCall(ISRestoreIndices(isrow, &rout));
1373:   PetscCall(ISRestoreIndices(iscol, &cout));
1374:   PetscCall(VecRestoreArrayRead(bb, &b));
1375:   PetscCall(VecRestoreArray(xx, &x));
1376:   PetscCall(PetscLogFlops(2.0 * a->nz));
1377:   PetscFunctionReturn(PETSC_SUCCESS);
1378: }

1380: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1381: {
1382:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1383:   IS                 iscol = a->col, isrow = a->row;
1384:   PetscInt           i, n                  = A->rmap->n, j;
1385:   PetscInt           nz;
1386:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1387:   PetscScalar       *x, *tmp, sum;
1388:   const PetscScalar *b;
1389:   const MatScalar   *aa, *v;

1391:   PetscFunctionBegin;
1392:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1394:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1395:   PetscCall(VecGetArrayRead(bb, &b));
1396:   PetscCall(VecGetArray(xx, &x));
1397:   tmp = a->solve_work;

1399:   PetscCall(ISGetIndices(isrow, &rout));
1400:   r = rout;
1401:   PetscCall(ISGetIndices(iscol, &cout));
1402:   c = cout;

1404:   /* forward solve the lower triangular */
1405:   tmp[0] = b[r[0]];
1406:   v      = aa;
1407:   vi     = aj;
1408:   for (i = 1; i < n; i++) {
1409:     nz  = ai[i + 1] - ai[i];
1410:     sum = b[r[i]];
1411:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1412:     tmp[i] = sum;
1413:     v += nz;
1414:     vi += nz;
1415:   }

1417:   /* backward solve the upper triangular */
1418:   v  = aa + adiag[n - 1];
1419:   vi = aj + adiag[n - 1];
1420:   for (i = n - 1; i >= 0; i--) {
1421:     nz  = adiag[i] - adiag[i + 1] - 1;
1422:     sum = tmp[i];
1423:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1424:     tmp[i] = sum * v[nz];
1425:     x[c[i]] += tmp[i];
1426:     v += nz + 1;
1427:     vi += nz + 1;
1428:   }

1430:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1431:   PetscCall(ISRestoreIndices(isrow, &rout));
1432:   PetscCall(ISRestoreIndices(iscol, &cout));
1433:   PetscCall(VecRestoreArrayRead(bb, &b));
1434:   PetscCall(VecRestoreArray(xx, &x));
1435:   PetscCall(PetscLogFlops(2.0 * a->nz));
1436:   PetscFunctionReturn(PETSC_SUCCESS);
1437: }

1439: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1440: {
1441:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1442:   IS                 iscol = a->col, isrow = a->row;
1443:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1444:   PetscInt           i, n = A->rmap->n, j;
1445:   PetscInt           nz;
1446:   PetscScalar       *x, *tmp, s1;
1447:   const MatScalar   *aa, *v;
1448:   const PetscScalar *b;

1450:   PetscFunctionBegin;
1451:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1452:   PetscCall(VecGetArrayRead(bb, &b));
1453:   PetscCall(VecGetArrayWrite(xx, &x));
1454:   tmp = a->solve_work;

1456:   PetscCall(ISGetIndices(isrow, &rout));
1457:   r = rout;
1458:   PetscCall(ISGetIndices(iscol, &cout));
1459:   c = cout;

1461:   /* copy the b into temp work space according to permutation */
1462:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1464:   /* forward solve the U^T */
1465:   for (i = 0; i < n; i++) {
1466:     v  = aa + diag[i];
1467:     vi = aj + diag[i] + 1;
1468:     nz = ai[i + 1] - diag[i] - 1;
1469:     s1 = tmp[i];
1470:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1471:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1472:     tmp[i] = s1;
1473:   }

1475:   /* backward solve the L^T */
1476:   for (i = n - 1; i >= 0; i--) {
1477:     v  = aa + diag[i] - 1;
1478:     vi = aj + diag[i] - 1;
1479:     nz = diag[i] - ai[i];
1480:     s1 = tmp[i];
1481:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1482:   }

1484:   /* copy tmp into x according to permutation */
1485:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1487:   PetscCall(ISRestoreIndices(isrow, &rout));
1488:   PetscCall(ISRestoreIndices(iscol, &cout));
1489:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1490:   PetscCall(VecRestoreArrayRead(bb, &b));
1491:   PetscCall(VecRestoreArrayWrite(xx, &x));

1493:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1494:   PetscFunctionReturn(PETSC_SUCCESS);
1495: }

1497: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1498: {
1499:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1500:   IS                 iscol = a->col, isrow = a->row;
1501:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1502:   PetscInt           i, n = A->rmap->n, j;
1503:   PetscInt           nz;
1504:   PetscScalar       *x, *tmp, s1;
1505:   const MatScalar   *aa, *v;
1506:   const PetscScalar *b;

1508:   PetscFunctionBegin;
1509:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1510:   PetscCall(VecGetArrayRead(bb, &b));
1511:   PetscCall(VecGetArrayWrite(xx, &x));
1512:   tmp = a->solve_work;

1514:   PetscCall(ISGetIndices(isrow, &rout));
1515:   r = rout;
1516:   PetscCall(ISGetIndices(iscol, &cout));
1517:   c = cout;

1519:   /* copy the b into temp work space according to permutation */
1520:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1522:   /* forward solve the U^T */
1523:   for (i = 0; i < n; i++) {
1524:     v  = aa + adiag[i + 1] + 1;
1525:     vi = aj + adiag[i + 1] + 1;
1526:     nz = adiag[i] - adiag[i + 1] - 1;
1527:     s1 = tmp[i];
1528:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1529:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1530:     tmp[i] = s1;
1531:   }

1533:   /* backward solve the L^T */
1534:   for (i = n - 1; i >= 0; i--) {
1535:     v  = aa + ai[i];
1536:     vi = aj + ai[i];
1537:     nz = ai[i + 1] - ai[i];
1538:     s1 = tmp[i];
1539:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1540:   }

1542:   /* copy tmp into x according to permutation */
1543:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1545:   PetscCall(ISRestoreIndices(isrow, &rout));
1546:   PetscCall(ISRestoreIndices(iscol, &cout));
1547:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1548:   PetscCall(VecRestoreArrayRead(bb, &b));
1549:   PetscCall(VecRestoreArrayWrite(xx, &x));

1551:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1552:   PetscFunctionReturn(PETSC_SUCCESS);
1553: }

1555: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1556: {
1557:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1558:   IS                 iscol = a->col, isrow = a->row;
1559:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1560:   PetscInt           i, n = A->rmap->n, j;
1561:   PetscInt           nz;
1562:   PetscScalar       *x, *tmp, s1;
1563:   const MatScalar   *aa, *v;
1564:   const PetscScalar *b;

1566:   PetscFunctionBegin;
1567:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1568:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1569:   PetscCall(VecGetArrayRead(bb, &b));
1570:   PetscCall(VecGetArray(xx, &x));
1571:   tmp = a->solve_work;

1573:   PetscCall(ISGetIndices(isrow, &rout));
1574:   r = rout;
1575:   PetscCall(ISGetIndices(iscol, &cout));
1576:   c = cout;

1578:   /* copy the b into temp work space according to permutation */
1579:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1581:   /* forward solve the U^T */
1582:   for (i = 0; i < n; i++) {
1583:     v  = aa + diag[i];
1584:     vi = aj + diag[i] + 1;
1585:     nz = ai[i + 1] - diag[i] - 1;
1586:     s1 = tmp[i];
1587:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1588:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1589:     tmp[i] = s1;
1590:   }

1592:   /* backward solve the L^T */
1593:   for (i = n - 1; i >= 0; i--) {
1594:     v  = aa + diag[i] - 1;
1595:     vi = aj + diag[i] - 1;
1596:     nz = diag[i] - ai[i];
1597:     s1 = tmp[i];
1598:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1599:   }

1601:   /* copy tmp into x according to permutation */
1602:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1604:   PetscCall(ISRestoreIndices(isrow, &rout));
1605:   PetscCall(ISRestoreIndices(iscol, &cout));
1606:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1607:   PetscCall(VecRestoreArrayRead(bb, &b));
1608:   PetscCall(VecRestoreArray(xx, &x));

1610:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1611:   PetscFunctionReturn(PETSC_SUCCESS);
1612: }

1614: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1615: {
1616:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1617:   IS                 iscol = a->col, isrow = a->row;
1618:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1619:   PetscInt           i, n = A->rmap->n, j;
1620:   PetscInt           nz;
1621:   PetscScalar       *x, *tmp, s1;
1622:   const MatScalar   *aa, *v;
1623:   const PetscScalar *b;

1625:   PetscFunctionBegin;
1626:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1627:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1628:   PetscCall(VecGetArrayRead(bb, &b));
1629:   PetscCall(VecGetArray(xx, &x));
1630:   tmp = a->solve_work;

1632:   PetscCall(ISGetIndices(isrow, &rout));
1633:   r = rout;
1634:   PetscCall(ISGetIndices(iscol, &cout));
1635:   c = cout;

1637:   /* copy the b into temp work space according to permutation */
1638:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1640:   /* forward solve the U^T */
1641:   for (i = 0; i < n; i++) {
1642:     v  = aa + adiag[i + 1] + 1;
1643:     vi = aj + adiag[i + 1] + 1;
1644:     nz = adiag[i] - adiag[i + 1] - 1;
1645:     s1 = tmp[i];
1646:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1647:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1648:     tmp[i] = s1;
1649:   }

1651:   /* backward solve the L^T */
1652:   for (i = n - 1; i >= 0; i--) {
1653:     v  = aa + ai[i];
1654:     vi = aj + ai[i];
1655:     nz = ai[i + 1] - ai[i];
1656:     s1 = tmp[i];
1657:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1658:   }

1660:   /* copy tmp into x according to permutation */
1661:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1663:   PetscCall(ISRestoreIndices(isrow, &rout));
1664:   PetscCall(ISRestoreIndices(iscol, &cout));
1665:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1666:   PetscCall(VecRestoreArrayRead(bb, &b));
1667:   PetscCall(VecRestoreArray(xx, &x));

1669:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1670:   PetscFunctionReturn(PETSC_SUCCESS);
1671: }

1673: /*
1674:    ilu() under revised new data structure.
1675:    Factored arrays bj and ba are stored as
1676:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1678:    bi=fact->i is an array of size n+1, in which
1679:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1680:      bi[n]:  points to L(n-1,n-1)+1

1682:   bdiag=fact->diag is an array of size n+1,in which
1683:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1684:      bdiag[n]: points to entry of U(n-1,0)-1

1686:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1687:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1688: */
1689: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1690: {
1691:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1692:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1693:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1694:   IS             isicol;

1696:   PetscFunctionBegin;
1697:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1698:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1699:   b = (Mat_SeqAIJ *)fact->data;

1701:   /* allocate matrix arrays for new data structure */
1702:   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1703:   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1704:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1705:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1706:   b->free_a  = PETSC_TRUE;
1707:   b->free_ij = PETSC_TRUE;

1709:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1710:   bdiag = b->diag;

1712:   /* set bi and bj with new data structure */
1713:   bi = b->i;
1714:   bj = b->j;

1716:   /* L part */
1717:   bi[0] = 0;
1718:   for (i = 0; i < n; i++) {
1719:     nz        = adiag[i] - ai[i];
1720:     bi[i + 1] = bi[i] + nz;
1721:     aj        = a->j + ai[i];
1722:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1723:   }

1725:   /* U part */
1726:   bdiag[n] = bi[n] - 1;
1727:   for (i = n - 1; i >= 0; i--) {
1728:     nz = ai[i + 1] - adiag[i] - 1;
1729:     aj = a->j + adiag[i] + 1;
1730:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1731:     /* diag[i] */
1732:     bj[k++]  = i;
1733:     bdiag[i] = bdiag[i + 1] + nz + 1;
1734:   }

1736:   fact->factortype             = MAT_FACTOR_ILU;
1737:   fact->info.factor_mallocs    = 0;
1738:   fact->info.fill_ratio_given  = info->fill;
1739:   fact->info.fill_ratio_needed = 1.0;
1740:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1741:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1743:   b       = (Mat_SeqAIJ *)fact->data;
1744:   b->row  = isrow;
1745:   b->col  = iscol;
1746:   b->icol = isicol;
1747:   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1748:   PetscCall(PetscObjectReference((PetscObject)isrow));
1749:   PetscCall(PetscObjectReference((PetscObject)iscol));
1750:   PetscFunctionReturn(PETSC_SUCCESS);
1751: }

1753: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1754: {
1755:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1756:   IS                 isicol;
1757:   const PetscInt    *r, *ic;
1758:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1759:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1760:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1761:   PetscInt           i, levels, diagonal_fill;
1762:   PetscBool          col_identity, row_identity, missing;
1763:   PetscReal          f;
1764:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1765:   PetscBT            lnkbt;
1766:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1767:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1768:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1770:   PetscFunctionBegin;
1771:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1772:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1773:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1775:   levels = (PetscInt)info->levels;
1776:   PetscCall(ISIdentity(isrow, &row_identity));
1777:   PetscCall(ISIdentity(iscol, &col_identity));
1778:   if (!levels && row_identity && col_identity) {
1779:     /* special case: ilu(0) with natural ordering */
1780:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1781:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1782:     PetscFunctionReturn(PETSC_SUCCESS);
1783:   }

1785:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1786:   PetscCall(ISGetIndices(isrow, &r));
1787:   PetscCall(ISGetIndices(isicol, &ic));

1789:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1790:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1791:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1792:   bi[0] = bdiag[0] = 0;
1793:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1795:   /* create a linked list for storing column indices of the active row */
1796:   nlnk = n + 1;
1797:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1799:   /* initial FreeSpace size is f*(ai[n]+1) */
1800:   f             = info->fill;
1801:   diagonal_fill = (PetscInt)info->diagonal_fill;
1802:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1803:   current_space = free_space;
1804:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1805:   current_space_lvl = free_space_lvl;
1806:   for (i = 0; i < n; i++) {
1807:     nzi = 0;
1808:     /* copy current row into linked list */
1809:     nnz = ai[r[i] + 1] - ai[r[i]];
1810:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1811:     cols   = aj + ai[r[i]];
1812:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1813:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1814:     nzi += nlnk;

1816:     /* make sure diagonal entry is included */
1817:     if (diagonal_fill && lnk[i] == -1) {
1818:       fm = n;
1819:       while (lnk[fm] < i) fm = lnk[fm];
1820:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1821:       lnk[fm]    = i;
1822:       lnk_lvl[i] = 0;
1823:       nzi++;
1824:       dcount++;
1825:     }

1827:     /* add pivot rows into the active row */
1828:     nzbd = 0;
1829:     prow = lnk[n];
1830:     while (prow < i) {
1831:       nnz      = bdiag[prow];
1832:       cols     = bj_ptr[prow] + nnz + 1;
1833:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1834:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1835:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1836:       nzi += nlnk;
1837:       prow = lnk[prow];
1838:       nzbd++;
1839:     }
1840:     bdiag[i]  = nzbd;
1841:     bi[i + 1] = bi[i] + nzi;
1842:     /* if free space is not available, make more free space */
1843:     if (current_space->local_remaining < nzi) {
1844:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1845:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1846:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1847:       reallocs++;
1848:     }

1850:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1851:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1852:     bj_ptr[i]    = current_space->array;
1853:     bjlvl_ptr[i] = current_space_lvl->array;

1855:     /* make sure the active row i has diagonal entry */
1856:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1858:     current_space->array += nzi;
1859:     current_space->local_used += nzi;
1860:     current_space->local_remaining -= nzi;
1861:     current_space_lvl->array += nzi;
1862:     current_space_lvl->local_used += nzi;
1863:     current_space_lvl->local_remaining -= nzi;
1864:   }

1866:   PetscCall(ISRestoreIndices(isrow, &r));
1867:   PetscCall(ISRestoreIndices(isicol, &ic));
1868:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1869:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1870:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1872:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1873:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1874:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1876: #if defined(PETSC_USE_INFO)
1877:   {
1878:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1879:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1880:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1881:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1882:     PetscCall(PetscInfo(A, "for best performance.\n"));
1883:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1884:   }
1885: #endif
1886:   /* put together the new matrix */
1887:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1888:   b          = (Mat_SeqAIJ *)fact->data;
1889:   b->free_ij = PETSC_TRUE;
1890:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1891:   b->free_a = PETSC_TRUE;
1892:   b->j      = bj;
1893:   b->i      = bi;
1894:   b->diag   = bdiag;
1895:   b->ilen   = NULL;
1896:   b->imax   = NULL;
1897:   b->row    = isrow;
1898:   b->col    = iscol;
1899:   PetscCall(PetscObjectReference((PetscObject)isrow));
1900:   PetscCall(PetscObjectReference((PetscObject)iscol));
1901:   b->icol = isicol;

1903:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1904:   /* In b structure:  Free imax, ilen, old a, old j.
1905:      Allocate bdiag, solve_work, new a, new j */
1906:   b->maxnz = b->nz = bdiag[0] + 1;

1908:   fact->info.factor_mallocs    = reallocs;
1909:   fact->info.fill_ratio_given  = f;
1910:   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1911:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1912:   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1913:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1914:   PetscFunctionReturn(PETSC_SUCCESS);
1915: }

1917: #if 0
1918: // unused
1919: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920: {
1921:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1922:   IS                 isicol;
1923:   const PetscInt    *r, *ic;
1924:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1925:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1926:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1927:   PetscInt           i, levels, diagonal_fill;
1928:   PetscBool          col_identity, row_identity;
1929:   PetscReal          f;
1930:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1931:   PetscBT            lnkbt;
1932:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1933:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1934:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1935:   PetscBool          missing;

1937:   PetscFunctionBegin;
1938:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1939:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1940:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1942:   f             = info->fill;
1943:   levels        = (PetscInt)info->levels;
1944:   diagonal_fill = (PetscInt)info->diagonal_fill;

1946:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

1948:   PetscCall(ISIdentity(isrow, &row_identity));
1949:   PetscCall(ISIdentity(iscol, &col_identity));
1950:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1951:     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));

1953:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1954:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1955:     fact->factortype               = MAT_FACTOR_ILU;
1956:     (fact)->info.factor_mallocs    = 0;
1957:     (fact)->info.fill_ratio_given  = info->fill;
1958:     (fact)->info.fill_ratio_needed = 1.0;

1960:     b       = (Mat_SeqAIJ *)fact->data;
1961:     b->row  = isrow;
1962:     b->col  = iscol;
1963:     b->icol = isicol;
1964:     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1965:     PetscCall(PetscObjectReference((PetscObject)isrow));
1966:     PetscCall(PetscObjectReference((PetscObject)iscol));
1967:     PetscFunctionReturn(PETSC_SUCCESS);
1968:   }

1970:   PetscCall(ISGetIndices(isrow, &r));
1971:   PetscCall(ISGetIndices(isicol, &ic));

1973:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1974:   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1975:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1976:   bi[0] = bdiag[0] = 0;

1978:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1980:   /* create a linked list for storing column indices of the active row */
1981:   nlnk = n + 1;
1982:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1984:   /* initial FreeSpace size is f*(ai[n]+1) */
1985:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1986:   current_space = free_space;
1987:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1988:   current_space_lvl = free_space_lvl;

1990:   for (i = 0; i < n; i++) {
1991:     nzi = 0;
1992:     /* copy current row into linked list */
1993:     nnz = ai[r[i] + 1] - ai[r[i]];
1994:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1995:     cols   = aj + ai[r[i]];
1996:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1997:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1998:     nzi += nlnk;

2000:     /* make sure diagonal entry is included */
2001:     if (diagonal_fill && lnk[i] == -1) {
2002:       fm = n;
2003:       while (lnk[fm] < i) fm = lnk[fm];
2004:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
2005:       lnk[fm]    = i;
2006:       lnk_lvl[i] = 0;
2007:       nzi++;
2008:       dcount++;
2009:     }

2011:     /* add pivot rows into the active row */
2012:     nzbd = 0;
2013:     prow = lnk[n];
2014:     while (prow < i) {
2015:       nnz      = bdiag[prow];
2016:       cols     = bj_ptr[prow] + nnz + 1;
2017:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2018:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
2019:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
2020:       nzi += nlnk;
2021:       prow = lnk[prow];
2022:       nzbd++;
2023:     }
2024:     bdiag[i]  = nzbd;
2025:     bi[i + 1] = bi[i] + nzi;

2027:     /* if free space is not available, make more free space */
2028:     if (current_space->local_remaining < nzi) {
2029:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2030:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
2031:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
2032:       reallocs++;
2033:     }

2035:     /* copy data into free_space and free_space_lvl, then initialize lnk */
2036:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2037:     bj_ptr[i]    = current_space->array;
2038:     bjlvl_ptr[i] = current_space_lvl->array;

2040:     /* make sure the active row i has diagonal entry */
2041:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

2043:     current_space->array += nzi;
2044:     current_space->local_used += nzi;
2045:     current_space->local_remaining -= nzi;
2046:     current_space_lvl->array += nzi;
2047:     current_space_lvl->local_used += nzi;
2048:     current_space_lvl->local_remaining -= nzi;
2049:   }

2051:   PetscCall(ISRestoreIndices(isrow, &r));
2052:   PetscCall(ISRestoreIndices(isicol, &ic));

2054:   /* destroy list of free space and other temporary arrays */
2055:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2056:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2057:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2058:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2059:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

2061:   #if defined(PETSC_USE_INFO)
2062:   {
2063:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2064:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2065:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2066:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2067:     PetscCall(PetscInfo(A, "for best performance.\n"));
2068:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2069:   }
2070:   #endif

2072:   /* put together the new matrix */
2073:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2074:   b = (Mat_SeqAIJ *)fact->data;
2075:   b->free_ij      = PETSC_TRUE;
2076:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2077:   b->free_a       = PETSC_TRUE;
2078:   b->j = bj;
2079:   b->i = bi;
2080:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2081:   b->diag = bdiag;
2082:   b->ilen = NULL;
2083:   b->imax = NULL;
2084:   b->row  = isrow;
2085:   b->col  = iscol;
2086:   PetscCall(PetscObjectReference((PetscObject)isrow));
2087:   PetscCall(PetscObjectReference((PetscObject)iscol));
2088:   b->icol = isicol;
2089:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2090:   /* In b structure:  Free imax, ilen, old a, old j.
2091:      Allocate bdiag, solve_work, new a, new j */
2092:   b->maxnz = b->nz = bi[n];

2094:   fact->info.factor_mallocs    = reallocs;
2095:   fact->info.fill_ratio_given  = f;
2096:   fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2097:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2098:   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2099:   PetscFunctionReturn(PETSC_SUCCESS);
2100: }
2101: #endif

2103: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2104: {
2105:   Mat              C  = B;
2106:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2107:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
2108:   IS               ip = b->row, iip = b->icol;
2109:   const PetscInt  *rip, *riip;
2110:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2111:   PetscInt        *ai = a->i, *aj = a->j;
2112:   PetscInt         k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2113:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2114:   PetscBool        perm_identity;
2115:   FactorShiftCtx   sctx;
2116:   PetscReal        rs;
2117:   const MatScalar *aa, *v;
2118:   MatScalar        d;

2120:   PetscFunctionBegin;
2121:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2122:   /* MatPivotSetUp(): initialize shift context sctx */
2123:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2125:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2126:     sctx.shift_top = info->zeropivot;
2127:     for (i = 0; i < mbs; i++) {
2128:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2129:       d  = (aa)[a->diag[i]];
2130:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2131:       v  = aa + ai[i];
2132:       nz = ai[i + 1] - ai[i];
2133:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2134:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2135:     }
2136:     sctx.shift_top *= 1.1;
2137:     sctx.nshift_max = 5;
2138:     sctx.shift_lo   = 0.;
2139:     sctx.shift_hi   = 1.;
2140:   }

2142:   PetscCall(ISGetIndices(ip, &rip));
2143:   PetscCall(ISGetIndices(iip, &riip));

2145:   /* allocate working arrays
2146:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2147:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2148:   */
2149:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

2151:   do {
2152:     sctx.newshift = PETSC_FALSE;

2154:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2155:     if (mbs) il[0] = 0;

2157:     for (k = 0; k < mbs; k++) {
2158:       /* zero rtmp */
2159:       nz    = bi[k + 1] - bi[k];
2160:       bjtmp = bj + bi[k];
2161:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2163:       /* load in initial unfactored row */
2164:       bval = ba + bi[k];
2165:       jmin = ai[rip[k]];
2166:       jmax = ai[rip[k] + 1];
2167:       for (j = jmin; j < jmax; j++) {
2168:         col = riip[aj[j]];
2169:         if (col >= k) { /* only take upper triangular entry */
2170:           rtmp[col] = aa[j];
2171:           *bval++   = 0.0; /* for in-place factorization */
2172:         }
2173:       }
2174:       /* shift the diagonal of the matrix: ZeropivotApply() */
2175:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2177:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2178:       dk = rtmp[k];
2179:       i  = c2r[k]; /* first row to be added to k_th row  */

2181:       while (i < k) {
2182:         nexti = c2r[i]; /* next row to be added to k_th row */

2184:         /* compute multiplier, update diag(k) and U(i,k) */
2185:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2186:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2187:         dk += uikdi * ba[ili];           /* update diag[k] */
2188:         ba[ili] = uikdi;                 /* -U(i,k) */

2190:         /* add multiple of row i to k-th row */
2191:         jmin = ili + 1;
2192:         jmax = bi[i + 1];
2193:         if (jmin < jmax) {
2194:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2195:           /* update il and c2r for row i */
2196:           il[i]  = jmin;
2197:           j      = bj[jmin];
2198:           c2r[i] = c2r[j];
2199:           c2r[j] = i;
2200:         }
2201:         i = nexti;
2202:       }

2204:       /* copy data into U(k,:) */
2205:       rs   = 0.0;
2206:       jmin = bi[k];
2207:       jmax = bi[k + 1] - 1;
2208:       if (jmin < jmax) {
2209:         for (j = jmin; j < jmax; j++) {
2210:           col   = bj[j];
2211:           ba[j] = rtmp[col];
2212:           rs += PetscAbsScalar(ba[j]);
2213:         }
2214:         /* add the k-th row into il and c2r */
2215:         il[k]  = jmin;
2216:         i      = bj[jmin];
2217:         c2r[k] = c2r[i];
2218:         c2r[i] = k;
2219:       }

2221:       /* MatPivotCheck() */
2222:       sctx.rs = rs;
2223:       sctx.pv = dk;
2224:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2225:       if (sctx.newshift) break;
2226:       dk = sctx.pv;

2228:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2229:     }
2230:   } while (sctx.newshift);

2232:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2233:   PetscCall(PetscFree3(rtmp, il, c2r));
2234:   PetscCall(ISRestoreIndices(ip, &rip));
2235:   PetscCall(ISRestoreIndices(iip, &riip));

2237:   PetscCall(ISIdentity(ip, &perm_identity));
2238:   if (perm_identity) {
2239:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2240:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2241:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2242:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2243:   } else {
2244:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2245:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2246:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2247:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2248:   }

2250:   C->assembled    = PETSC_TRUE;
2251:   C->preallocated = PETSC_TRUE;

2253:   PetscCall(PetscLogFlops(C->rmap->n));

2255:   /* MatPivotView() */
2256:   if (sctx.nshift) {
2257:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2258:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2259:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2260:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2261:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2262:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2263:     }
2264:   }
2265:   PetscFunctionReturn(PETSC_SUCCESS);
2266: }

2268: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2269: {
2270:   Mat              C  = B;
2271:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *)A->data;
2272:   Mat_SeqSBAIJ    *b  = (Mat_SeqSBAIJ *)C->data;
2273:   IS               ip = b->row, iip = b->icol;
2274:   const PetscInt  *rip, *riip;
2275:   PetscInt         i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2276:   PetscInt        *ai = a->i, *aj = a->j;
2277:   PetscInt         k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2278:   MatScalar       *rtmp, *ba = b->a, *bval, dk, uikdi;
2279:   const MatScalar *aa, *v;
2280:   PetscBool        perm_identity;
2281:   FactorShiftCtx   sctx;
2282:   PetscReal        rs;
2283:   MatScalar        d;

2285:   PetscFunctionBegin;
2286:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2287:   /* MatPivotSetUp(): initialize shift context sctx */
2288:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2290:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2291:     sctx.shift_top = info->zeropivot;
2292:     for (i = 0; i < mbs; i++) {
2293:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2294:       d  = (aa)[a->diag[i]];
2295:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2296:       v  = aa + ai[i];
2297:       nz = ai[i + 1] - ai[i];
2298:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2299:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2300:     }
2301:     sctx.shift_top *= 1.1;
2302:     sctx.nshift_max = 5;
2303:     sctx.shift_lo   = 0.;
2304:     sctx.shift_hi   = 1.;
2305:   }

2307:   PetscCall(ISGetIndices(ip, &rip));
2308:   PetscCall(ISGetIndices(iip, &riip));

2310:   /* initialization */
2311:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

2313:   do {
2314:     sctx.newshift = PETSC_FALSE;

2316:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2317:     il[0] = 0;

2319:     for (k = 0; k < mbs; k++) {
2320:       /* zero rtmp */
2321:       nz    = bi[k + 1] - bi[k];
2322:       bjtmp = bj + bi[k];
2323:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2325:       bval = ba + bi[k];
2326:       /* initialize k-th row by the perm[k]-th row of A */
2327:       jmin = ai[rip[k]];
2328:       jmax = ai[rip[k] + 1];
2329:       for (j = jmin; j < jmax; j++) {
2330:         col = riip[aj[j]];
2331:         if (col >= k) { /* only take upper triangular entry */
2332:           rtmp[col] = aa[j];
2333:           *bval++   = 0.0; /* for in-place factorization */
2334:         }
2335:       }
2336:       /* shift the diagonal of the matrix */
2337:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2339:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2340:       dk = rtmp[k];
2341:       i  = jl[k]; /* first row to be added to k_th row  */

2343:       while (i < k) {
2344:         nexti = jl[i]; /* next row to be added to k_th row */

2346:         /* compute multiplier, update diag(k) and U(i,k) */
2347:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2348:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2349:         dk += uikdi * ba[ili];
2350:         ba[ili] = uikdi; /* -U(i,k) */

2352:         /* add multiple of row i to k-th row */
2353:         jmin = ili + 1;
2354:         jmax = bi[i + 1];
2355:         if (jmin < jmax) {
2356:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2357:           /* update il and jl for row i */
2358:           il[i] = jmin;
2359:           j     = bj[jmin];
2360:           jl[i] = jl[j];
2361:           jl[j] = i;
2362:         }
2363:         i = nexti;
2364:       }

2366:       /* shift the diagonals when zero pivot is detected */
2367:       /* compute rs=sum of abs(off-diagonal) */
2368:       rs   = 0.0;
2369:       jmin = bi[k] + 1;
2370:       nz   = bi[k + 1] - jmin;
2371:       bcol = bj + jmin;
2372:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2374:       sctx.rs = rs;
2375:       sctx.pv = dk;
2376:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2377:       if (sctx.newshift) break;
2378:       dk = sctx.pv;

2380:       /* copy data into U(k,:) */
2381:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2382:       jmin      = bi[k] + 1;
2383:       jmax      = bi[k + 1];
2384:       if (jmin < jmax) {
2385:         for (j = jmin; j < jmax; j++) {
2386:           col   = bj[j];
2387:           ba[j] = rtmp[col];
2388:         }
2389:         /* add the k-th row into il and jl */
2390:         il[k] = jmin;
2391:         i     = bj[jmin];
2392:         jl[k] = jl[i];
2393:         jl[i] = k;
2394:       }
2395:     }
2396:   } while (sctx.newshift);

2398:   PetscCall(PetscFree3(rtmp, il, jl));
2399:   PetscCall(ISRestoreIndices(ip, &rip));
2400:   PetscCall(ISRestoreIndices(iip, &riip));
2401:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));

2403:   PetscCall(ISIdentity(ip, &perm_identity));
2404:   if (perm_identity) {
2405:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2406:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2407:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2408:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2409:   } else {
2410:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2411:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2412:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2413:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2414:   }

2416:   C->assembled    = PETSC_TRUE;
2417:   C->preallocated = PETSC_TRUE;

2419:   PetscCall(PetscLogFlops(C->rmap->n));
2420:   if (sctx.nshift) {
2421:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2422:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2423:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2424:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2425:     }
2426:   }
2427:   PetscFunctionReturn(PETSC_SUCCESS);
2428: }

2430: /*
2431:    icc() under revised new data structure.
2432:    Factored arrays bj and ba are stored as
2433:      U(0,:),...,U(i,:),U(n-1,:)

2435:    ui=fact->i is an array of size n+1, in which
2436:    ui+
2437:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2438:      ui[n]:  points to U(n-1,n-1)+1

2440:   udiag=fact->diag is an array of size n,in which
2441:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2443:    U(i,:) contains udiag[i] as its last entry, i.e.,
2444:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2445: */

2447: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2448: {
2449:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2450:   Mat_SeqSBAIJ      *b;
2451:   PetscBool          perm_identity, missing;
2452:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2453:   const PetscInt    *rip, *riip;
2454:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2455:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2456:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2457:   PetscReal          fill = info->fill, levels = info->levels;
2458:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2459:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2460:   PetscBT            lnkbt;
2461:   IS                 iperm;

2463:   PetscFunctionBegin;
2464:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2465:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2466:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2467:   PetscCall(ISIdentity(perm, &perm_identity));
2468:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2470:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2471:   PetscCall(PetscMalloc1(am + 1, &udiag));
2472:   ui[0] = 0;

2474:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2475:   if (!levels && perm_identity) {
2476:     for (i = 0; i < am; i++) {
2477:       ncols     = ai[i + 1] - a->diag[i];
2478:       ui[i + 1] = ui[i] + ncols;
2479:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2480:     }
2481:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2482:     cols = uj;
2483:     for (i = 0; i < am; i++) {
2484:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2485:       ncols = ai[i + 1] - a->diag[i] - 1;
2486:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2487:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2488:     }
2489:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2490:     PetscCall(ISGetIndices(iperm, &riip));
2491:     PetscCall(ISGetIndices(perm, &rip));

2493:     /* initialization */
2494:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2496:     /* jl: linked list for storing indices of the pivot rows
2497:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2498:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2499:     for (i = 0; i < am; i++) {
2500:       jl[i] = am;
2501:       il[i] = 0;
2502:     }

2504:     /* create and initialize a linked list for storing column indices of the active row k */
2505:     nlnk = am + 1;
2506:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2508:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2509:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2510:     current_space = free_space;
2511:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2512:     current_space_lvl = free_space_lvl;

2514:     for (k = 0; k < am; k++) { /* for each active row k */
2515:       /* initialize lnk by the column indices of row rip[k] of A */
2516:       nzk   = 0;
2517:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2518:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2519:       ncols_upper = 0;
2520:       for (j = 0; j < ncols; j++) {
2521:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2522:         if (riip[i] >= k) {         /* only take upper triangular entry */
2523:           ajtmp[ncols_upper] = i;
2524:           ncols_upper++;
2525:         }
2526:       }
2527:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2528:       nzk += nlnk;

2530:       /* update lnk by computing fill-in for each pivot row to be merged in */
2531:       prow = jl[k]; /* 1st pivot row */

2533:       while (prow < k) {
2534:         nextprow = jl[prow];

2536:         /* merge prow into k-th row */
2537:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2538:         jmax  = ui[prow + 1];
2539:         ncols = jmax - jmin;
2540:         i     = jmin - ui[prow];
2541:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2542:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2543:         j     = *(uj - 1);
2544:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2545:         nzk += nlnk;

2547:         /* update il and jl for prow */
2548:         if (jmin < jmax) {
2549:           il[prow] = jmin;
2550:           j        = *cols;
2551:           jl[prow] = jl[j];
2552:           jl[j]    = prow;
2553:         }
2554:         prow = nextprow;
2555:       }

2557:       /* if free space is not available, make more free space */
2558:       if (current_space->local_remaining < nzk) {
2559:         i = am - k + 1;                                    /* num of unfactored rows */
2560:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2561:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2562:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2563:         reallocs++;
2564:       }

2566:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2567:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2568:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2570:       /* add the k-th row into il and jl */
2571:       if (nzk > 1) {
2572:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2573:         jl[k] = jl[i];
2574:         jl[i] = k;
2575:         il[k] = ui[k] + 1;
2576:       }
2577:       uj_ptr[k]     = current_space->array;
2578:       uj_lvl_ptr[k] = current_space_lvl->array;

2580:       current_space->array += nzk;
2581:       current_space->local_used += nzk;
2582:       current_space->local_remaining -= nzk;

2584:       current_space_lvl->array += nzk;
2585:       current_space_lvl->local_used += nzk;
2586:       current_space_lvl->local_remaining -= nzk;

2588:       ui[k + 1] = ui[k] + nzk;
2589:     }

2591:     PetscCall(ISRestoreIndices(perm, &rip));
2592:     PetscCall(ISRestoreIndices(iperm, &riip));
2593:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2594:     PetscCall(PetscFree(ajtmp));

2596:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2597:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2598:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2599:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2600:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2602:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2604:   /* put together the new matrix in MATSEQSBAIJ format */
2605:   b          = (Mat_SeqSBAIJ *)fact->data;
2606:   b->free_ij = PETSC_TRUE;
2607:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2608:   b->free_a    = PETSC_TRUE;
2609:   b->j         = uj;
2610:   b->i         = ui;
2611:   b->diag      = udiag;
2612:   b->free_diag = PETSC_TRUE;
2613:   b->ilen      = NULL;
2614:   b->imax      = NULL;
2615:   b->row       = perm;
2616:   b->col       = perm;
2617:   PetscCall(PetscObjectReference((PetscObject)perm));
2618:   PetscCall(PetscObjectReference((PetscObject)perm));
2619:   b->icol          = iperm;
2620:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2622:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2624:   b->maxnz = b->nz = ui[am];

2626:   fact->info.factor_mallocs   = reallocs;
2627:   fact->info.fill_ratio_given = fill;
2628:   if (ai[am] != 0) {
2629:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2630:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2631:   } else {
2632:     fact->info.fill_ratio_needed = 0.0;
2633:   }
2634: #if defined(PETSC_USE_INFO)
2635:   if (ai[am] != 0) {
2636:     PetscReal af = fact->info.fill_ratio_needed;
2637:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2638:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2639:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2640:   } else {
2641:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2642:   }
2643: #endif
2644:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2645:   PetscFunctionReturn(PETSC_SUCCESS);
2646: }

2648: #if 0
2649: // unused
2650: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2651: {
2652:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2653:   Mat_SeqSBAIJ      *b;
2654:   PetscBool          perm_identity, missing;
2655:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2656:   const PetscInt    *rip, *riip;
2657:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2658:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2659:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2660:   PetscReal          fill = info->fill, levels = info->levels;
2661:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2662:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2663:   PetscBT            lnkbt;
2664:   IS                 iperm;

2666:   PetscFunctionBegin;
2667:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2668:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2669:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2670:   PetscCall(ISIdentity(perm, &perm_identity));
2671:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2673:   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2674:   PetscCall(PetscMalloc1(am + 1, &udiag));
2675:   ui[0] = 0;

2677:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2678:   if (!levels && perm_identity) {
2679:     for (i = 0; i < am; i++) {
2680:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2681:       udiag[i]  = ui[i];
2682:     }
2683:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2684:     cols = uj;
2685:     for (i = 0; i < am; i++) {
2686:       aj    = a->j + a->diag[i];
2687:       ncols = ui[i + 1] - ui[i];
2688:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2689:     }
2690:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2691:     PetscCall(ISGetIndices(iperm, &riip));
2692:     PetscCall(ISGetIndices(perm, &rip));

2694:     /* initialization */
2695:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2697:     /* jl: linked list for storing indices of the pivot rows
2698:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2699:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2700:     for (i = 0; i < am; i++) {
2701:       jl[i] = am;
2702:       il[i] = 0;
2703:     }

2705:     /* create and initialize a linked list for storing column indices of the active row k */
2706:     nlnk = am + 1;
2707:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2709:     /* initial FreeSpace size is fill*(ai[am]+1) */
2710:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2711:     current_space = free_space;
2712:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2713:     current_space_lvl = free_space_lvl;

2715:     for (k = 0; k < am; k++) { /* for each active row k */
2716:       /* initialize lnk by the column indices of row rip[k] of A */
2717:       nzk   = 0;
2718:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2719:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2720:       ncols_upper = 0;
2721:       for (j = 0; j < ncols; j++) {
2722:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2723:         if (riip[i] >= k) {         /* only take upper triangular entry */
2724:           ajtmp[ncols_upper] = i;
2725:           ncols_upper++;
2726:         }
2727:       }
2728:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2729:       nzk += nlnk;

2731:       /* update lnk by computing fill-in for each pivot row to be merged in */
2732:       prow = jl[k]; /* 1st pivot row */

2734:       while (prow < k) {
2735:         nextprow = jl[prow];

2737:         /* merge prow into k-th row */
2738:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2739:         jmax  = ui[prow + 1];
2740:         ncols = jmax - jmin;
2741:         i     = jmin - ui[prow];
2742:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2743:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2744:         j     = *(uj - 1);
2745:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2746:         nzk += nlnk;

2748:         /* update il and jl for prow */
2749:         if (jmin < jmax) {
2750:           il[prow] = jmin;
2751:           j        = *cols;
2752:           jl[prow] = jl[j];
2753:           jl[j]    = prow;
2754:         }
2755:         prow = nextprow;
2756:       }

2758:       /* if free space is not available, make more free space */
2759:       if (current_space->local_remaining < nzk) {
2760:         i = am - k + 1;                                    /* num of unfactored rows */
2761:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2762:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2763:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2764:         reallocs++;
2765:       }

2767:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2768:       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2769:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2771:       /* add the k-th row into il and jl */
2772:       if (nzk > 1) {
2773:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2774:         jl[k] = jl[i];
2775:         jl[i] = k;
2776:         il[k] = ui[k] + 1;
2777:       }
2778:       uj_ptr[k]     = current_space->array;
2779:       uj_lvl_ptr[k] = current_space_lvl->array;

2781:       current_space->array += nzk;
2782:       current_space->local_used += nzk;
2783:       current_space->local_remaining -= nzk;

2785:       current_space_lvl->array += nzk;
2786:       current_space_lvl->local_used += nzk;
2787:       current_space_lvl->local_remaining -= nzk;

2789:       ui[k + 1] = ui[k] + nzk;
2790:     }

2792:   #if defined(PETSC_USE_INFO)
2793:     if (ai[am] != 0) {
2794:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2795:       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2796:       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2797:       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2798:     } else {
2799:       PetscCall(PetscInfo(A, "Empty matrix\n"));
2800:     }
2801:   #endif

2803:     PetscCall(ISRestoreIndices(perm, &rip));
2804:     PetscCall(ISRestoreIndices(iperm, &riip));
2805:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2806:     PetscCall(PetscFree(ajtmp));

2808:     /* destroy list of free space and other temporary array(s) */
2809:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2810:     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2811:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2812:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2814:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2816:   /* put together the new matrix in MATSEQSBAIJ format */

2818:   b               = (Mat_SeqSBAIJ *)fact->data;
2819:   b->free_ij       = PETSC_TRUE;
2820:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2821:   b->free_a        = PETSC_TRUE;
2822:   b->j         = uj;
2823:   b->i         = ui;
2824:   b->diag      = udiag;
2825:   b->free_diag = PETSC_TRUE;
2826:   b->ilen      = NULL;
2827:   b->imax      = NULL;
2828:   b->row       = perm;
2829:   b->col       = perm;

2831:   PetscCall(PetscObjectReference((PetscObject)perm));
2832:   PetscCall(PetscObjectReference((PetscObject)perm));

2834:   b->icol          = iperm;
2835:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2836:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2837:   b->maxnz = b->nz = ui[am];

2839:   fact->info.factor_mallocs   = reallocs;
2840:   fact->info.fill_ratio_given = fill;
2841:   if (ai[am] != 0) {
2842:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2843:   } else {
2844:     fact->info.fill_ratio_needed = 0.0;
2845:   }
2846:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2847:   PetscFunctionReturn(PETSC_SUCCESS);
2848: }
2849: #endif

2851: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2852: {
2853:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2854:   Mat_SeqSBAIJ      *b;
2855:   PetscBool          perm_identity, missing;
2856:   PetscReal          fill = info->fill;
2857:   const PetscInt    *rip, *riip;
2858:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2859:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2860:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2861:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2862:   PetscBT            lnkbt;
2863:   IS                 iperm;

2865:   PetscFunctionBegin;
2866:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2867:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2868:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2870:   /* check whether perm is the identity mapping */
2871:   PetscCall(ISIdentity(perm, &perm_identity));
2872:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2873:   PetscCall(ISGetIndices(iperm, &riip));
2874:   PetscCall(ISGetIndices(perm, &rip));

2876:   /* initialization */
2877:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2878:   PetscCall(PetscMalloc1(am + 1, &udiag));
2879:   ui[0] = 0;

2881:   /* jl: linked list for storing indices of the pivot rows
2882:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2883:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2884:   for (i = 0; i < am; i++) {
2885:     jl[i] = am;
2886:     il[i] = 0;
2887:   }

2889:   /* create and initialize a linked list for storing column indices of the active row k */
2890:   nlnk = am + 1;
2891:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2893:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2894:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2895:   current_space = free_space;

2897:   for (k = 0; k < am; k++) { /* for each active row k */
2898:     /* initialize lnk by the column indices of row rip[k] of A */
2899:     nzk   = 0;
2900:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2901:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2902:     ncols_upper = 0;
2903:     for (j = 0; j < ncols; j++) {
2904:       i = riip[*(aj + ai[rip[k]] + j)];
2905:       if (i >= k) { /* only take upper triangular entry */
2906:         cols[ncols_upper] = i;
2907:         ncols_upper++;
2908:       }
2909:     }
2910:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2911:     nzk += nlnk;

2913:     /* update lnk by computing fill-in for each pivot row to be merged in */
2914:     prow = jl[k]; /* 1st pivot row */

2916:     while (prow < k) {
2917:       nextprow = jl[prow];
2918:       /* merge prow into k-th row */
2919:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2920:       jmax   = ui[prow + 1];
2921:       ncols  = jmax - jmin;
2922:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2923:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2924:       nzk += nlnk;

2926:       /* update il and jl for prow */
2927:       if (jmin < jmax) {
2928:         il[prow] = jmin;
2929:         j        = *uj_ptr;
2930:         jl[prow] = jl[j];
2931:         jl[j]    = prow;
2932:       }
2933:       prow = nextprow;
2934:     }

2936:     /* if free space is not available, make more free space */
2937:     if (current_space->local_remaining < nzk) {
2938:       i = am - k + 1;                                    /* num of unfactored rows */
2939:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2940:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2941:       reallocs++;
2942:     }

2944:     /* copy data into free space, then initialize lnk */
2945:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2947:     /* add the k-th row into il and jl */
2948:     if (nzk > 1) {
2949:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2950:       jl[k] = jl[i];
2951:       jl[i] = k;
2952:       il[k] = ui[k] + 1;
2953:     }
2954:     ui_ptr[k] = current_space->array;

2956:     current_space->array += nzk;
2957:     current_space->local_used += nzk;
2958:     current_space->local_remaining -= nzk;

2960:     ui[k + 1] = ui[k] + nzk;
2961:   }

2963:   PetscCall(ISRestoreIndices(perm, &rip));
2964:   PetscCall(ISRestoreIndices(iperm, &riip));
2965:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2967:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2968:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2969:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2970:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2972:   /* put together the new matrix in MATSEQSBAIJ format */
2973:   b          = (Mat_SeqSBAIJ *)fact->data;
2974:   b->free_ij = PETSC_TRUE;
2975:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2976:   b->free_a    = PETSC_TRUE;
2977:   b->j         = uj;
2978:   b->i         = ui;
2979:   b->diag      = udiag;
2980:   b->free_diag = PETSC_TRUE;
2981:   b->ilen      = NULL;
2982:   b->imax      = NULL;
2983:   b->row       = perm;
2984:   b->col       = perm;

2986:   PetscCall(PetscObjectReference((PetscObject)perm));
2987:   PetscCall(PetscObjectReference((PetscObject)perm));

2989:   b->icol          = iperm;
2990:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2992:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2994:   b->maxnz = b->nz = ui[am];

2996:   fact->info.factor_mallocs   = reallocs;
2997:   fact->info.fill_ratio_given = fill;
2998:   if (ai[am] != 0) {
2999:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
3000:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
3001:   } else {
3002:     fact->info.fill_ratio_needed = 0.0;
3003:   }
3004: #if defined(PETSC_USE_INFO)
3005:   if (ai[am] != 0) {
3006:     PetscReal af = fact->info.fill_ratio_needed;
3007:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3008:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3009:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3010:   } else {
3011:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3012:   }
3013: #endif
3014:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3015:   PetscFunctionReturn(PETSC_SUCCESS);
3016: }

3018: #if 0
3019: // unused
3020: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3021: {
3022:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
3023:   Mat_SeqSBAIJ      *b;
3024:   PetscBool          perm_identity, missing;
3025:   PetscReal          fill = info->fill;
3026:   const PetscInt    *rip, *riip;
3027:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3028:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3029:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3030:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
3031:   PetscBT            lnkbt;
3032:   IS                 iperm;

3034:   PetscFunctionBegin;
3035:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3036:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3037:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

3039:   /* check whether perm is the identity mapping */
3040:   PetscCall(ISIdentity(perm, &perm_identity));
3041:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3042:   PetscCall(ISGetIndices(iperm, &riip));
3043:   PetscCall(ISGetIndices(perm, &rip));

3045:   /* initialization */
3046:   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3047:   ui[0] = 0;

3049:   /* jl: linked list for storing indices of the pivot rows
3050:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3051:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3052:   for (i = 0; i < am; i++) {
3053:     jl[i] = am;
3054:     il[i] = 0;
3055:   }

3057:   /* create and initialize a linked list for storing column indices of the active row k */
3058:   nlnk = am + 1;
3059:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

3061:   /* initial FreeSpace size is fill*(ai[am]+1) */
3062:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3063:   current_space = free_space;

3065:   for (k = 0; k < am; k++) { /* for each active row k */
3066:     /* initialize lnk by the column indices of row rip[k] of A */
3067:     nzk   = 0;
3068:     ncols = ai[rip[k] + 1] - ai[rip[k]];
3069:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3070:     ncols_upper = 0;
3071:     for (j = 0; j < ncols; j++) {
3072:       i = riip[*(aj + ai[rip[k]] + j)];
3073:       if (i >= k) { /* only take upper triangular entry */
3074:         cols[ncols_upper] = i;
3075:         ncols_upper++;
3076:       }
3077:     }
3078:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3079:     nzk += nlnk;

3081:     /* update lnk by computing fill-in for each pivot row to be merged in */
3082:     prow = jl[k]; /* 1st pivot row */

3084:     while (prow < k) {
3085:       nextprow = jl[prow];
3086:       /* merge prow into k-th row */
3087:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3088:       jmax   = ui[prow + 1];
3089:       ncols  = jmax - jmin;
3090:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3091:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3092:       nzk += nlnk;

3094:       /* update il and jl for prow */
3095:       if (jmin < jmax) {
3096:         il[prow] = jmin;
3097:         j        = *uj_ptr;
3098:         jl[prow] = jl[j];
3099:         jl[j]    = prow;
3100:       }
3101:       prow = nextprow;
3102:     }

3104:     /* if free space is not available, make more free space */
3105:     if (current_space->local_remaining < nzk) {
3106:       i = am - k + 1;                     /* num of unfactored rows */
3107:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3108:       PetscCall(PetscFreeSpaceGet(i, &current_space));
3109:       reallocs++;
3110:     }

3112:     /* copy data into free space, then initialize lnk */
3113:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

3115:     /* add the k-th row into il and jl */
3116:     if (nzk - 1 > 0) {
3117:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3118:       jl[k] = jl[i];
3119:       jl[i] = k;
3120:       il[k] = ui[k] + 1;
3121:     }
3122:     ui_ptr[k] = current_space->array;

3124:     current_space->array += nzk;
3125:     current_space->local_used += nzk;
3126:     current_space->local_remaining -= nzk;

3128:     ui[k + 1] = ui[k] + nzk;
3129:   }

3131:   #if defined(PETSC_USE_INFO)
3132:   if (ai[am] != 0) {
3133:     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3134:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3135:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3136:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3137:   } else {
3138:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3139:   }
3140:   #endif

3142:   PetscCall(ISRestoreIndices(perm, &rip));
3143:   PetscCall(ISRestoreIndices(iperm, &riip));
3144:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

3146:   /* destroy list of free space and other temporary array(s) */
3147:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3148:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3149:   PetscCall(PetscLLDestroy(lnk, lnkbt));

3151:   /* put together the new matrix in MATSEQSBAIJ format */
3152:   b               = (Mat_SeqSBAIJ *)fact->data;
3153:   b->free_ij      = PETSC_TRUE;
3154:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3155:   b->free_a       = PETSC_TRUE;
3156:   b->j    = uj;
3157:   b->i    = ui;
3158:   b->diag = NULL;
3159:   b->ilen = NULL;
3160:   b->imax = NULL;
3161:   b->row  = perm;
3162:   b->col  = perm;

3164:   PetscCall(PetscObjectReference((PetscObject)perm));
3165:   PetscCall(PetscObjectReference((PetscObject)perm));

3167:   b->icol          = iperm;
3168:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3170:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3171:   b->maxnz = b->nz = ui[am];

3173:   fact->info.factor_mallocs   = reallocs;
3174:   fact->info.fill_ratio_given = fill;
3175:   if (ai[am] != 0) {
3176:     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3177:   } else {
3178:     fact->info.fill_ratio_needed = 0.0;
3179:   }
3180:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3181:   PetscFunctionReturn(PETSC_SUCCESS);
3182: }
3183: #endif

3185: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3186: {
3187:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3188:   PetscInt           n  = A->rmap->n;
3189:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3190:   PetscScalar       *x, sum;
3191:   const PetscScalar *b;
3192:   const MatScalar   *aa, *v;
3193:   PetscInt           i, nz;

3195:   PetscFunctionBegin;
3196:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3198:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3199:   PetscCall(VecGetArrayRead(bb, &b));
3200:   PetscCall(VecGetArrayWrite(xx, &x));

3202:   /* forward solve the lower triangular */
3203:   x[0] = b[0];
3204:   v    = aa;
3205:   vi   = aj;
3206:   for (i = 1; i < n; i++) {
3207:     nz  = ai[i + 1] - ai[i];
3208:     sum = b[i];
3209:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3210:     v += nz;
3211:     vi += nz;
3212:     x[i] = sum;
3213:   }

3215:   /* backward solve the upper triangular */
3216:   for (i = n - 1; i >= 0; i--) {
3217:     v   = aa + adiag[i + 1] + 1;
3218:     vi  = aj + adiag[i + 1] + 1;
3219:     nz  = adiag[i] - adiag[i + 1] - 1;
3220:     sum = x[i];
3221:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3222:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3223:   }

3225:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3226:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3227:   PetscCall(VecRestoreArrayRead(bb, &b));
3228:   PetscCall(VecRestoreArrayWrite(xx, &x));
3229:   PetscFunctionReturn(PETSC_SUCCESS);
3230: }

3232: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3233: {
3234:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3235:   IS                 iscol = a->col, isrow = a->row;
3236:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3237:   const PetscInt    *rout, *cout, *r, *c;
3238:   PetscScalar       *x, *tmp, sum;
3239:   const PetscScalar *b;
3240:   const MatScalar   *aa, *v;

3242:   PetscFunctionBegin;
3243:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3245:   PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3246:   PetscCall(VecGetArrayRead(bb, &b));
3247:   PetscCall(VecGetArrayWrite(xx, &x));
3248:   tmp = a->solve_work;

3250:   PetscCall(ISGetIndices(isrow, &rout));
3251:   r = rout;
3252:   PetscCall(ISGetIndices(iscol, &cout));
3253:   c = cout;

3255:   /* forward solve the lower triangular */
3256:   tmp[0] = b[r[0]];
3257:   v      = aa;
3258:   vi     = aj;
3259:   for (i = 1; i < n; i++) {
3260:     nz  = ai[i + 1] - ai[i];
3261:     sum = b[r[i]];
3262:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3263:     tmp[i] = sum;
3264:     v += nz;
3265:     vi += nz;
3266:   }

3268:   /* backward solve the upper triangular */
3269:   for (i = n - 1; i >= 0; i--) {
3270:     v   = aa + adiag[i + 1] + 1;
3271:     vi  = aj + adiag[i + 1] + 1;
3272:     nz  = adiag[i] - adiag[i + 1] - 1;
3273:     sum = tmp[i];
3274:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3275:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3276:   }

3278:   PetscCall(ISRestoreIndices(isrow, &rout));
3279:   PetscCall(ISRestoreIndices(iscol, &cout));
3280:   PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3281:   PetscCall(VecRestoreArrayRead(bb, &b));
3282:   PetscCall(VecRestoreArrayWrite(xx, &x));
3283:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3284:   PetscFunctionReturn(PETSC_SUCCESS);
3285: }

3287: #if 0
3288: // unused
3289: /*
3290:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3291: */
3292: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3293: {
3294:   Mat             B = *fact;
3295:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3296:   IS              isicol;
3297:   const PetscInt *r, *ic;
3298:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3299:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3300:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3301:   PetscInt        nlnk, *lnk;
3302:   PetscBT         lnkbt;
3303:   PetscBool       row_identity, icol_identity;
3304:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3305:   const PetscInt *ics;
3306:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3307:   PetscReal       dt = info->dt, shift = info->shiftamount;
3308:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3309:   PetscBool       missing;

3311:   PetscFunctionBegin;
3312:   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3313:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3315:   /* symbolic factorization, can be reused */
3316:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3317:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3318:   adiag = a->diag;

3320:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

3322:   /* bdiag is location of diagonal in factor */
3323:   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3324:   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */

3326:   /* allocate row pointers bi */
3327:   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));

3329:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3330:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3331:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3333:   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3334:   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));

3336:   /* put together the new matrix */
3337:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3338:   b = (Mat_SeqAIJ *)B->data;
3339:   b->free_a       = PETSC_TRUE;
3340:   b->free_ij      = PETSC_TRUE;
3341:   b->a    = ba;
3342:   b->j    = bj;
3343:   b->i    = bi;
3344:   b->diag = bdiag;
3345:   b->ilen = NULL;
3346:   b->imax = NULL;
3347:   b->row  = isrow;
3348:   b->col  = iscol;
3349:   PetscCall(PetscObjectReference((PetscObject)isrow));
3350:   PetscCall(PetscObjectReference((PetscObject)iscol));
3351:   b->icol = isicol;

3353:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3354:   b->maxnz = nnz_max;

3356:   B->factortype            = MAT_FACTOR_ILUDT;
3357:   B->info.factor_mallocs   = 0;
3358:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3359:   /*  end of symbolic factorization */

3361:   PetscCall(ISGetIndices(isrow, &r));
3362:   PetscCall(ISGetIndices(isicol, &ic));
3363:   ics = ic;

3365:   /* linked list for storing column indices of the active row */
3366:   nlnk = n + 1;
3367:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

3369:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3370:   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3371:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3372:   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3373:   PetscCall(PetscArrayzero(rtmp, n));

3375:   bi[0]         = 0;
3376:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3377:   bdiag_rev[n]  = bdiag[0];
3378:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3379:   for (i = 0; i < n; i++) {
3380:     /* copy initial fill into linked list */
3381:     nzi = ai[r[i] + 1] - ai[r[i]];
3382:     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3383:     nzi_al = adiag[r[i]] - ai[r[i]];
3384:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3385:     ajtmp  = aj + ai[r[i]];
3386:     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));

3388:     /* load in initial (unfactored row) */
3389:     aatmp = a->a + ai[r[i]];
3390:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3392:     /* add pivot rows into linked list */
3393:     row = lnk[n];
3394:     while (row < i) {
3395:       nzi_bl = bi[row + 1] - bi[row] + 1;
3396:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3397:       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3398:       nzi += nlnk;
3399:       row = lnk[row];
3400:     }

3402:     /* copy data from lnk into jtmp, then initialize lnk */
3403:     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));

3405:     /* numerical factorization */
3406:     bjtmp = jtmp;
3407:     row   = *bjtmp++; /* 1st pivot row */
3408:     while (row < i) {
3409:       pc         = rtmp + row;
3410:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3411:       multiplier = (*pc) * (*pv);
3412:       *pc        = multiplier;
3413:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3414:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3415:         pv = ba + bdiag[row + 1] + 1;
3416:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3417:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3418:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3419:       }
3420:       row = *bjtmp++;
3421:     }

3423:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3424:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3425:     nzi_bl   = 0;
3426:     j        = 0;
3427:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3428:       vtmp[j]       = rtmp[jtmp[j]];
3429:       rtmp[jtmp[j]] = 0.0;
3430:       nzi_bl++;
3431:       j++;
3432:     }
3433:     nzi_bu = nzi - nzi_bl - 1;
3434:     while (j < nzi) {
3435:       vtmp[j]       = rtmp[jtmp[j]];
3436:       rtmp[jtmp[j]] = 0.0;
3437:       j++;
3438:     }

3440:     bjtmp = bj + bi[i];
3441:     batmp = ba + bi[i];
3442:     /* apply level dropping rule to L part */
3443:     ncut = nzi_al + dtcount;
3444:     if (ncut < nzi_bl) {
3445:       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3446:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3447:     } else {
3448:       ncut = nzi_bl;
3449:     }
3450:     for (j = 0; j < ncut; j++) {
3451:       bjtmp[j] = jtmp[j];
3452:       batmp[j] = vtmp[j];
3453:     }
3454:     bi[i + 1] = bi[i] + ncut;
3455:     nzi       = ncut + 1;

3457:     /* apply level dropping rule to U part */
3458:     ncut = nzi_au + dtcount;
3459:     if (ncut < nzi_bu) {
3460:       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3461:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3462:     } else {
3463:       ncut = nzi_bu;
3464:     }
3465:     nzi += ncut;

3467:     /* mark bdiagonal */
3468:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3469:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3470:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3471:     bjtmp                = bj + bdiag[i];
3472:     batmp                = ba + bdiag[i];
3473:     *bjtmp               = i;
3474:     *batmp               = diag_tmp; /* rtmp[i]; */
3475:     if (*batmp == 0.0) *batmp = dt + shift;
3476:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3478:     bjtmp = bj + bdiag[i + 1] + 1;
3479:     batmp = ba + bdiag[i + 1] + 1;
3480:     for (k = 0; k < ncut; k++) {
3481:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3482:       batmp[k] = vtmp[nzi_bl + 1 + k];
3483:     }

3485:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3486:   }              /* for (i=0; i<n; i++) */
3487:   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);

3489:   PetscCall(ISRestoreIndices(isrow, &r));
3490:   PetscCall(ISRestoreIndices(isicol, &ic));

3492:   PetscCall(PetscLLDestroy(lnk, lnkbt));
3493:   PetscCall(PetscFree2(im, jtmp));
3494:   PetscCall(PetscFree2(rtmp, vtmp));
3495:   PetscCall(PetscFree(bdiag_rev));

3497:   PetscCall(PetscLogFlops(B->cmap->n));
3498:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3500:   PetscCall(ISIdentity(isrow, &row_identity));
3501:   PetscCall(ISIdentity(isicol, &icol_identity));
3502:   if (row_identity && icol_identity) {
3503:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3504:   } else {
3505:     B->ops->solve = MatSolve_SeqAIJ;
3506:   }

3508:   B->ops->solveadd          = NULL;
3509:   B->ops->solvetranspose    = NULL;
3510:   B->ops->solvetransposeadd = NULL;
3511:   B->ops->matsolve          = NULL;
3512:   B->assembled              = PETSC_TRUE;
3513:   B->preallocated           = PETSC_TRUE;
3514:   PetscFunctionReturn(PETSC_SUCCESS);
3515: }

3517: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3518: /*
3519:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3520: */

3522: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3523: {
3524:   PetscFunctionBegin;
3525:   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3526:   PetscFunctionReturn(PETSC_SUCCESS);
3527: }

3529: /*
3530:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3531:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3532: */
3533: /*
3534:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3535: */

3537: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3538: {
3539:   Mat             C = fact;
3540:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3541:   IS              isrow = b->row, isicol = b->icol;
3542:   const PetscInt *r, *ic, *ics;
3543:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3544:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3545:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3546:   PetscReal       dt = info->dt, shift = info->shiftamount;
3547:   PetscBool       row_identity, col_identity;

3549:   PetscFunctionBegin;
3550:   PetscCall(ISGetIndices(isrow, &r));
3551:   PetscCall(ISGetIndices(isicol, &ic));
3552:   PetscCall(PetscMalloc1(n + 1, &rtmp));
3553:   ics = ic;

3555:   for (i = 0; i < n; i++) {
3556:     /* initialize rtmp array */
3557:     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3558:     bjtmp = bj + bi[i];
3559:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3560:     rtmp[i] = 0.0;
3561:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3562:     bjtmp   = bj + bdiag[i + 1] + 1;
3563:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3565:     /* load in initial unfactored row of A */
3566:     nz    = ai[r[i] + 1] - ai[r[i]];
3567:     ajtmp = aj + ai[r[i]];
3568:     v     = aa + ai[r[i]];
3569:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3571:     /* numerical factorization */
3572:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3573:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3574:     k     = 0;
3575:     while (k < nzl) {
3576:       row        = *bjtmp++;
3577:       pc         = rtmp + row;
3578:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3579:       multiplier = (*pc) * (*pv);
3580:       *pc        = multiplier;
3581:       if (PetscAbsScalar(multiplier) > dt) {
3582:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3583:         pv = b->a + bdiag[row + 1] + 1;
3584:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3585:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3586:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3587:       }
3588:       k++;
3589:     }

3591:     /* finished row so stick it into b->a */
3592:     /* L-part */
3593:     pv  = b->a + bi[i];
3594:     pj  = bj + bi[i];
3595:     nzl = bi[i + 1] - bi[i];
3596:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3598:     /* diagonal: invert diagonal entries for simpler triangular solves */
3599:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3600:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3602:     /* U-part */
3603:     pv  = b->a + bdiag[i + 1] + 1;
3604:     pj  = bj + bdiag[i + 1] + 1;
3605:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3606:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3607:   }

3609:   PetscCall(PetscFree(rtmp));
3610:   PetscCall(ISRestoreIndices(isicol, &ic));
3611:   PetscCall(ISRestoreIndices(isrow, &r));

3613:   PetscCall(ISIdentity(isrow, &row_identity));
3614:   PetscCall(ISIdentity(isicol, &col_identity));
3615:   if (row_identity && col_identity) {
3616:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3617:   } else {
3618:     C->ops->solve = MatSolve_SeqAIJ;
3619:   }
3620:   C->ops->solveadd          = NULL;
3621:   C->ops->solvetranspose    = NULL;
3622:   C->ops->solvetransposeadd = NULL;
3623:   C->ops->matsolve          = NULL;
3624:   C->assembled              = PETSC_TRUE;
3625:   C->preallocated           = PETSC_TRUE;

3627:   PetscCall(PetscLogFlops(C->cmap->n));
3628:   PetscFunctionReturn(PETSC_SUCCESS);
3629: }
3630: #endif