Actual source code: aijfact.c

  1: #include <../src/mat/impls/aij/seq/aij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petscbt.h>
  4: #include <../src/mat/utils/freespace.h>

  6: /*
  7:       Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix

  9:       This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
 10: */
 11: #if 0
 12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
 13: {
 14:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)mat->data;
 15:   PetscInt           i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
 16:   const PetscInt    *ai = a->i, *aj = a->j;
 17:   const PetscScalar *aa = a->a;
 18:   PetscBool         *done;
 19:   PetscReal          best, past = 0, future;

 21:   PetscFunctionBegin;
 22:   /* pick initial row */
 23:   best = -1;
 24:   for (i = 0; i < n; i++) {
 25:     future = 0.0;
 26:     for (j = ai[i]; j < ai[i + 1]; j++) {
 27:       if (aj[j] != i) future += PetscAbsScalar(aa[j]);
 28:       else past = PetscAbsScalar(aa[j]);
 29:     }
 30:     if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 31:     if (past / future > best) {
 32:       best    = past / future;
 33:       current = i;
 34:     }
 35:   }

 37:   PetscCall(PetscMalloc1(n, &done));
 38:   PetscCall(PetscArrayzero(done, n));
 39:   PetscCall(PetscMalloc1(n, &order));
 40:   order[0] = current;
 41:   for (i = 0; i < n - 1; i++) {
 42:     done[current] = PETSC_TRUE;
 43:     best          = -1;
 44:     /* loop over all neighbors of current pivot */
 45:     for (j = ai[current]; j < ai[current + 1]; j++) {
 46:       jj = aj[j];
 47:       if (done[jj]) continue;
 48:       /* loop over columns of potential next row computing weights for below and above diagonal */
 49:       past = future = 0.0;
 50:       for (k = ai[jj]; k < ai[jj + 1]; k++) {
 51:         kk = aj[k];
 52:         if (done[kk]) past += PetscAbsScalar(aa[k]);
 53:         else if (kk != jj) future += PetscAbsScalar(aa[k]);
 54:       }
 55:       if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 56:       if (past / future > best) {
 57:         best       = past / future;
 58:         newcurrent = jj;
 59:       }
 60:     }
 61:     if (best == -1) { /* no neighbors to select from so select best of all that remain */
 62:       best = -1;
 63:       for (k = 0; k < n; k++) {
 64:         if (done[k]) continue;
 65:         future = 0.0;
 66:         past   = 0.0;
 67:         for (j = ai[k]; j < ai[k + 1]; j++) {
 68:           kk = aj[j];
 69:           if (done[kk]) past += PetscAbsScalar(aa[j]);
 70:           else if (kk != k) future += PetscAbsScalar(aa[j]);
 71:         }
 72:         if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
 73:         if (past / future > best) {
 74:           best       = past / future;
 75:           newcurrent = k;
 76:         }
 77:       }
 78:     }
 79:     PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
 80:     current      = newcurrent;
 81:     order[i + 1] = current;
 82:   }
 83:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
 84:   *icol = *irow;
 85:   PetscCall(PetscObjectReference((PetscObject)*irow));
 86:   PetscCall(PetscFree(done));
 87:   PetscCall(PetscFree(order));
 88:   PetscFunctionReturn(PETSC_SUCCESS);
 89: }
 90: #endif

 92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
 93: {
 94:   PetscFunctionBegin;
 95:   *type = MATSOLVERPETSC;
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101:   PetscInt n = A->rmap->n;

103:   PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105:   if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106:     PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107:     *B = NULL;
108:     PetscFunctionReturn(PETSC_SUCCESS);
109:   }
110: #endif

112:   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113:   PetscCall(MatSetSizes(*B, n, n, n, n));
114:   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115:     PetscCall(MatSetType(*B, MATSEQAIJ));

117:     (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118:     (*B)->ops->lufactorsymbolic  = MatLUFactorSymbolic_SeqAIJ;

120:     PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124:   } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125:     PetscCall(MatSetType(*B, MATSEQSBAIJ));
126:     PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));

128:     (*B)->ops->iccfactorsymbolic      = MatICCFactorSymbolic_SeqAIJ;
129:     (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130:     PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131:     PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132:   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133:   (*B)->factortype = ftype;

135:   PetscCall(PetscFree((*B)->solvertype));
136:   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137:   (*B)->canuseordering = PETSC_TRUE;
138:   PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
147:   IS                 isicol;
148:   const PetscInt    *r, *ic;
149:   PetscInt           i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150:   PetscInt          *bi, *bj, *ajtmp;
151:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152:   PetscReal          f;
153:   PetscInt           nlnk, *lnk, k, **bi_ptr;
154:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
155:   PetscBT            lnkbt;
156:   PetscBool          missing;

158:   PetscFunctionBegin;
159:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160:   PetscCall(MatMissingDiagonal(A, &missing, &i));
161:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

163:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164:   PetscCall(ISGetIndices(isrow, &r));
165:   PetscCall(ISGetIndices(isicol, &ic));

167:   /* get new row pointers */
168:   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169:   bi[0] = 0;

171:   /* bdiag is location of diagonal in factor */
172:   PetscCall(PetscMalloc1(n + 1, &bdiag));
173:   bdiag[0] = 0;

175:   /* linked list for storing column indices of the active row */
176:   nlnk = n + 1;
177:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

180:   /* initial FreeSpace size is f*(ai[n]+1) */
181:   f = info->fill;
182:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184:   current_space = free_space;

186:   for (i = 0; i < n; i++) {
187:     /* copy previous fill into linked list */
188:     nzi   = 0;
189:     nnz   = ai[r[i] + 1] - ai[r[i]];
190:     ajtmp = aj + ai[r[i]];
191:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192:     nzi += nlnk;

194:     /* add pivot rows into linked list */
195:     row = lnk[n];
196:     while (row < i) {
197:       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198:       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
199:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200:       nzi += nlnk;
201:       row = lnk[row];
202:     }
203:     bi[i + 1] = bi[i] + nzi;
204:     im[i]     = nzi;

206:     /* mark bdiag */
207:     nzbd = 0;
208:     nnz  = nzi;
209:     k    = lnk[n];
210:     while (nnz-- && k < i) {
211:       nzbd++;
212:       k = lnk[k];
213:     }
214:     bdiag[i] = bi[i] + nzbd;

216:     /* if free space is not available, make more free space */
217:     if (current_space->local_remaining < nzi) {
218:       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
220:       reallocs++;
221:     }

223:     /* copy data into free space, then initialize lnk */
224:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

226:     bi_ptr[i] = current_space->array;
227:     current_space->array += nzi;
228:     current_space->local_used += nzi;
229:     current_space->local_remaining -= nzi;
230:   }
231:   #if defined(PETSC_USE_INFO)
232:   if (ai[n] != 0) {
233:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237:     PetscCall(PetscInfo(A, "for best performance.\n"));
238:   } else {
239:     PetscCall(PetscInfo(A, "Empty matrix\n"));
240:   }
241:   #endif

243:   PetscCall(ISRestoreIndices(isrow, &r));
244:   PetscCall(ISRestoreIndices(isicol, &ic));

246:   /* destroy list of free space and other temporary array(s) */
247:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249:   PetscCall(PetscLLDestroy(lnk, lnkbt));
250:   PetscCall(PetscFree2(bi_ptr, im));

252:   /* put together the new matrix */
253:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254:   b = (Mat_SeqAIJ *)B->data;
255:   b->free_ij      = PETSC_TRUE;
256:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257:   b->free_a       = PETSC_TRUE;
258:   b->j    = bj;
259:   b->i    = bi;
260:   b->diag = bdiag;
261:   b->ilen = NULL;
262:   b->imax = NULL;
263:   b->row  = isrow;
264:   b->col  = iscol;
265:   PetscCall(PetscObjectReference((PetscObject)isrow));
266:   PetscCall(PetscObjectReference((PetscObject)iscol));
267:   b->icol = isicol;
268:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

270:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
271:   b->maxnz = b->nz = bi[n];

273:   B->factortype            = MAT_FACTOR_LU;
274:   B->info.factor_mallocs   = reallocs;
275:   B->info.fill_ratio_given = f;

277:   if (ai[n]) {
278:     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279:   } else {
280:     B->info.fill_ratio_needed = 0.0;
281:   }
282:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }
286: #endif

288: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289: {
290:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
291:   IS                 isicol;
292:   const PetscInt    *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293:   PetscInt           i, n = A->rmap->n;
294:   PetscInt          *bi, *bj;
295:   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296:   PetscReal          f;
297:   PetscInt           nlnk, *lnk, k, **bi_ptr;
298:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
299:   PetscBT            lnkbt;
300:   PetscBool          missing;

302:   PetscFunctionBegin;
303:   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304:   PetscCall(MatMissingDiagonal(A, &missing, &i));
305:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

307:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308:   PetscCall(ISGetIndices(isrow, &r));
309:   PetscCall(ISGetIndices(isicol, &ic));

311:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313:   PetscCall(PetscMalloc1(n + 1, &bdiag));
314:   bi[0] = bdiag[0] = 0;

316:   /* linked list for storing column indices of the active row */
317:   nlnk = n + 1;
318:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

320:   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));

322:   /* initial FreeSpace size is f*(ai[n]+1) */
323:   f = info->fill;
324:   if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326:   current_space = free_space;

328:   for (i = 0; i < n; i++) {
329:     /* copy previous fill into linked list */
330:     nzi   = 0;
331:     nnz   = ai[r[i] + 1] - ai[r[i]];
332:     ajtmp = aj + ai[r[i]];
333:     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334:     nzi += nlnk;

336:     /* add pivot rows into linked list */
337:     row = lnk[n];
338:     while (row < i) {
339:       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
340:       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341:       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342:       nzi += nlnk;
343:       row = lnk[row];
344:     }
345:     bi[i + 1] = bi[i] + nzi;
346:     im[i]     = nzi;

348:     /* mark bdiag */
349:     nzbd = 0;
350:     nnz  = nzi;
351:     k    = lnk[n];
352:     while (nnz-- && k < i) {
353:       nzbd++;
354:       k = lnk[k];
355:     }
356:     bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

358:     /* if free space is not available, make more free space */
359:     if (current_space->local_remaining < nzi) {
360:       /* estimated additional space needed */
361:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
363:       reallocs++;
364:     }

366:     /* copy data into free space, then initialize lnk */
367:     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));

369:     bi_ptr[i] = current_space->array;
370:     current_space->array += nzi;
371:     current_space->local_used += nzi;
372:     current_space->local_remaining -= nzi;
373:   }

375:   PetscCall(ISRestoreIndices(isrow, &r));
376:   PetscCall(ISRestoreIndices(isicol, &ic));

378:   /*   copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381:   PetscCall(PetscLLDestroy(lnk, lnkbt));
382:   PetscCall(PetscFree2(bi_ptr, im));

384:   /* put together the new matrix */
385:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386:   b          = (Mat_SeqAIJ *)B->data;
387:   b->free_ij = PETSC_TRUE;
388:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389:   b->free_a = PETSC_TRUE;
390:   b->j      = bj;
391:   b->i      = bi;
392:   b->diag   = bdiag;
393:   b->ilen   = NULL;
394:   b->imax   = NULL;
395:   b->row    = isrow;
396:   b->col    = iscol;
397:   PetscCall(PetscObjectReference((PetscObject)isrow));
398:   PetscCall(PetscObjectReference((PetscObject)iscol));
399:   b->icol = isicol;
400:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));

402:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
403:   b->maxnz = b->nz = bdiag[0] + 1;

405:   B->factortype            = MAT_FACTOR_LU;
406:   B->info.factor_mallocs   = reallocs;
407:   B->info.fill_ratio_given = f;

409:   if (ai[n]) {
410:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411:   } else {
412:     B->info.fill_ratio_needed = 0.0;
413:   }
414: #if defined(PETSC_USE_INFO)
415:   if (ai[n] != 0) {
416:     PetscReal af = B->info.fill_ratio_needed;
417:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420:     PetscCall(PetscInfo(A, "for best performance.\n"));
421:   } else {
422:     PetscCall(PetscInfo(A, "Empty matrix\n"));
423:   }
424: #endif
425:   B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426:   if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427:   PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428:   PetscFunctionReturn(PETSC_SUCCESS);
429: }

431: /*
432:     Trouble in factorization, should we dump the original matrix?
433: */
434: PetscErrorCode MatFactorDumpMatrix(Mat A)
435: {
436:   PetscBool flg = PETSC_FALSE;

438:   PetscFunctionBegin;
439:   PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440:   if (flg) {
441:     PetscViewer viewer;
442:     char        filename[PETSC_MAX_PATH_LEN];

444:     PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445:     PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446:     PetscCall(MatView(A, viewer));
447:     PetscCall(PetscViewerDestroy(&viewer));
448:   }
449:   PetscFunctionReturn(PETSC_SUCCESS);
450: }

452: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453: {
454:   Mat              C = B;
455:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456:   IS               isrow = b->row, isicol = b->icol;
457:   const PetscInt  *r, *ic, *ics;
458:   const PetscInt   n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459:   PetscInt         i, j, k, nz, nzL, row, *pj;
460:   const PetscInt  *ajtmp, *bjtmp;
461:   MatScalar       *rtmp, *pc, multiplier, *pv;
462:   const MatScalar *aa = a->a, *v;
463:   PetscBool        row_identity, col_identity;
464:   FactorShiftCtx   sctx;
465:   const PetscInt  *ddiag;
466:   PetscReal        rs;
467:   MatScalar        d;

469:   PetscFunctionBegin;
470:   /* MatPivotSetUp(): initialize shift context sctx */
471:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

473:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474:     ddiag          = a->diag;
475:     sctx.shift_top = info->zeropivot;
476:     for (i = 0; i < n; i++) {
477:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478:       d  = (aa)[ddiag[i]];
479:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
480:       v  = aa + ai[i];
481:       nz = ai[i + 1] - ai[i];
482:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
483:       if (rs > sctx.shift_top) sctx.shift_top = rs;
484:     }
485:     sctx.shift_top *= 1.1;
486:     sctx.nshift_max = 5;
487:     sctx.shift_lo   = 0.;
488:     sctx.shift_hi   = 1.;
489:   }

491:   PetscCall(ISGetIndices(isrow, &r));
492:   PetscCall(ISGetIndices(isicol, &ic));
493:   PetscCall(PetscMalloc1(n + 1, &rtmp));
494:   ics = ic;

496:   do {
497:     sctx.newshift = PETSC_FALSE;
498:     for (i = 0; i < n; i++) {
499:       /* zero rtmp */
500:       /* L part */
501:       nz    = bi[i + 1] - bi[i];
502:       bjtmp = bj + bi[i];
503:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

505:       /* U part */
506:       nz    = bdiag[i] - bdiag[i + 1];
507:       bjtmp = bj + bdiag[i + 1] + 1;
508:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

510:       /* load in initial (unfactored row) */
511:       nz    = ai[r[i] + 1] - ai[r[i]];
512:       ajtmp = aj + ai[r[i]];
513:       v     = aa + ai[r[i]];
514:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
515:       /* ZeropivotApply() */
516:       rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */

518:       /* elimination */
519:       bjtmp = bj + bi[i];
520:       row   = *bjtmp++;
521:       nzL   = bi[i + 1] - bi[i];
522:       for (k = 0; k < nzL; k++) {
523:         pc = rtmp + row;
524:         if (*pc != 0.0) {
525:           pv         = b->a + bdiag[row];
526:           multiplier = *pc * (*pv);
527:           *pc        = multiplier;

529:           pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
530:           pv = b->a + bdiag[row + 1] + 1;
531:           nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */

533:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
534:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
535:         }
536:         row = *bjtmp++;
537:       }

539:       /* finished row so stick it into b->a */
540:       rs = 0.0;
541:       /* L part */
542:       pv = b->a + bi[i];
543:       pj = b->j + bi[i];
544:       nz = bi[i + 1] - bi[i];
545:       for (j = 0; j < nz; j++) {
546:         pv[j] = rtmp[pj[j]];
547:         rs += PetscAbsScalar(pv[j]);
548:       }

550:       /* U part */
551:       pv = b->a + bdiag[i + 1] + 1;
552:       pj = b->j + bdiag[i + 1] + 1;
553:       nz = bdiag[i] - bdiag[i + 1] - 1;
554:       for (j = 0; j < nz; j++) {
555:         pv[j] = rtmp[pj[j]];
556:         rs += PetscAbsScalar(pv[j]);
557:       }

559:       sctx.rs = rs;
560:       sctx.pv = rtmp[i];
561:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
562:       if (sctx.newshift) break; /* break for-loop */
563:       rtmp[i] = sctx.pv;        /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */

565:       /* Mark diagonal and invert diagonal for simpler triangular solves */
566:       pv  = b->a + bdiag[i];
567:       *pv = 1.0 / rtmp[i];

569:     } /* endof for (i=0; i<n; i++) { */

571:     /* MatPivotRefine() */
572:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
573:       /*
574:        * if no shift in this attempt & shifting & started shifting & can refine,
575:        * then try lower shift
576:        */
577:       sctx.shift_hi       = sctx.shift_fraction;
578:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
579:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
580:       sctx.newshift       = PETSC_TRUE;
581:       sctx.nshift++;
582:     }
583:   } while (sctx.newshift);

585:   PetscCall(PetscFree(rtmp));
586:   PetscCall(ISRestoreIndices(isicol, &ic));
587:   PetscCall(ISRestoreIndices(isrow, &r));

589:   PetscCall(ISIdentity(isrow, &row_identity));
590:   PetscCall(ISIdentity(isicol, &col_identity));
591:   if (b->inode.size) {
592:     C->ops->solve = MatSolve_SeqAIJ_Inode;
593:   } else if (row_identity && col_identity) {
594:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595:   } else {
596:     C->ops->solve = MatSolve_SeqAIJ;
597:   }
598:   C->ops->solveadd          = MatSolveAdd_SeqAIJ;
599:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ;
600:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601:   C->ops->matsolve          = MatMatSolve_SeqAIJ;
602:   C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
603:   C->assembled              = PETSC_TRUE;
604:   C->preallocated           = PETSC_TRUE;

606:   PetscCall(PetscLogFlops(C->cmap->n));

608:   /* MatShiftView(A,info,&sctx) */
609:   if (sctx.nshift) {
610:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
612:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
614:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
615:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
616:     }
617:   }
618:   PetscFunctionReturn(PETSC_SUCCESS);
619: }

621: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
622: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
623: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);

625: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
626: {
627:   Mat              C = B;
628:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
629:   IS               isrow = b->row, isicol = b->icol;
630:   const PetscInt  *r, *ic, *ics;
631:   PetscInt         nz, row, i, j, n = A->rmap->n, diag;
632:   const PetscInt  *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
633:   const PetscInt  *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
634:   MatScalar       *pv, *rtmp, *pc, multiplier, d;
635:   const MatScalar *v, *aa = a->a;
636:   PetscReal        rs = 0.0;
637:   FactorShiftCtx   sctx;
638:   const PetscInt  *ddiag;
639:   PetscBool        row_identity, col_identity;

641:   PetscFunctionBegin;
642:   /* MatPivotSetUp(): initialize shift context sctx */
643:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

645:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646:     ddiag          = a->diag;
647:     sctx.shift_top = info->zeropivot;
648:     for (i = 0; i < n; i++) {
649:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650:       d  = (aa)[ddiag[i]];
651:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
652:       v  = aa + ai[i];
653:       nz = ai[i + 1] - ai[i];
654:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
655:       if (rs > sctx.shift_top) sctx.shift_top = rs;
656:     }
657:     sctx.shift_top *= 1.1;
658:     sctx.nshift_max = 5;
659:     sctx.shift_lo   = 0.;
660:     sctx.shift_hi   = 1.;
661:   }

663:   PetscCall(ISGetIndices(isrow, &r));
664:   PetscCall(ISGetIndices(isicol, &ic));
665:   PetscCall(PetscMalloc1(n + 1, &rtmp));
666:   ics = ic;

668:   do {
669:     sctx.newshift = PETSC_FALSE;
670:     for (i = 0; i < n; i++) {
671:       nz    = bi[i + 1] - bi[i];
672:       bjtmp = bj + bi[i];
673:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

675:       /* load in initial (unfactored row) */
676:       nz    = ai[r[i] + 1] - ai[r[i]];
677:       ajtmp = aj + ai[r[i]];
678:       v     = aa + ai[r[i]];
679:       for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
680:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

682:       row = *bjtmp++;
683:       while (row < i) {
684:         pc = rtmp + row;
685:         if (*pc != 0.0) {
686:           pv         = b->a + diag_offset[row];
687:           pj         = b->j + diag_offset[row] + 1;
688:           multiplier = *pc / *pv++;
689:           *pc        = multiplier;
690:           nz         = bi[row + 1] - diag_offset[row] - 1;
691:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
692:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
693:         }
694:         row = *bjtmp++;
695:       }
696:       /* finished row so stick it into b->a */
697:       pv   = b->a + bi[i];
698:       pj   = b->j + bi[i];
699:       nz   = bi[i + 1] - bi[i];
700:       diag = diag_offset[i] - bi[i];
701:       rs   = 0.0;
702:       for (j = 0; j < nz; j++) {
703:         pv[j] = rtmp[pj[j]];
704:         rs += PetscAbsScalar(pv[j]);
705:       }
706:       rs -= PetscAbsScalar(pv[diag]);

708:       sctx.rs = rs;
709:       sctx.pv = pv[diag];
710:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
711:       if (sctx.newshift) break;
712:       pv[diag] = sctx.pv;
713:     }

715:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
716:       /*
717:        * if no shift in this attempt & shifting & started shifting & can refine,
718:        * then try lower shift
719:        */
720:       sctx.shift_hi       = sctx.shift_fraction;
721:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
722:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
723:       sctx.newshift       = PETSC_TRUE;
724:       sctx.nshift++;
725:     }
726:   } while (sctx.newshift);

728:   /* invert diagonal entries for simpler triangular solves */
729:   for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
730:   PetscCall(PetscFree(rtmp));
731:   PetscCall(ISRestoreIndices(isicol, &ic));
732:   PetscCall(ISRestoreIndices(isrow, &r));

734:   PetscCall(ISIdentity(isrow, &row_identity));
735:   PetscCall(ISIdentity(isicol, &col_identity));
736:   if (row_identity && col_identity) {
737:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738:   } else {
739:     C->ops->solve = MatSolve_SeqAIJ_inplace;
740:   }
741:   C->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
742:   C->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
743:   C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744:   C->ops->matsolve          = MatMatSolve_SeqAIJ_inplace;
745:   C->ops->matsolvetranspose = NULL;

747:   C->assembled    = PETSC_TRUE;
748:   C->preallocated = PETSC_TRUE;

750:   PetscCall(PetscLogFlops(C->cmap->n));
751:   if (sctx.nshift) {
752:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
753:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
754:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
755:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
756:     }
757:   }
758:   C->ops->solve          = MatSolve_SeqAIJ_inplace;
759:   C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;

761:   PetscCall(MatSeqAIJCheckInode(C));
762:   PetscFunctionReturn(PETSC_SUCCESS);
763: }

765: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);

767: /*
768:    This routine implements inplace ILU(0) with row or/and column permutations.
769:    Input:
770:      A - original matrix
771:    Output;
772:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
773:          a->j (col index) is permuted by the inverse of colperm, then sorted
774:          a->a reordered accordingly with a->j
775:          a->diag (ptr to diagonal elements) is updated.
776: */
777: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
778: {
779:   Mat_SeqAIJ      *a     = (Mat_SeqAIJ *)A->data;
780:   IS               isrow = a->row, isicol = a->icol;
781:   const PetscInt  *r, *ic, *ics;
782:   PetscInt         i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
783:   PetscInt        *ajtmp, nz, row;
784:   PetscInt        *diag = a->diag, nbdiag, *pj;
785:   PetscScalar     *rtmp, *pc, multiplier, d;
786:   MatScalar       *pv, *v;
787:   PetscReal        rs;
788:   FactorShiftCtx   sctx;
789:   const MatScalar *aa = a->a, *vtmp;

791:   PetscFunctionBegin;
792:   PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");

794:   /* MatPivotSetUp(): initialize shift context sctx */
795:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

797:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
798:     const PetscInt *ddiag = a->diag;
799:     sctx.shift_top        = info->zeropivot;
800:     for (i = 0; i < n; i++) {
801:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
802:       d    = (aa)[ddiag[i]];
803:       rs   = -PetscAbsScalar(d) - PetscRealPart(d);
804:       vtmp = aa + ai[i];
805:       nz   = ai[i + 1] - ai[i];
806:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
807:       if (rs > sctx.shift_top) sctx.shift_top = rs;
808:     }
809:     sctx.shift_top *= 1.1;
810:     sctx.nshift_max = 5;
811:     sctx.shift_lo   = 0.;
812:     sctx.shift_hi   = 1.;
813:   }

815:   PetscCall(ISGetIndices(isrow, &r));
816:   PetscCall(ISGetIndices(isicol, &ic));
817:   PetscCall(PetscMalloc1(n + 1, &rtmp));
818:   PetscCall(PetscArrayzero(rtmp, n + 1));
819:   ics = ic;

821: #if defined(MV)
822:   sctx.shift_top      = 0.;
823:   sctx.nshift_max     = 0;
824:   sctx.shift_lo       = 0.;
825:   sctx.shift_hi       = 0.;
826:   sctx.shift_fraction = 0.;

828:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
829:     sctx.shift_top = 0.;
830:     for (i = 0; i < n; i++) {
831:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
832:       d  = (a->a)[diag[i]];
833:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
834:       v  = a->a + ai[i];
835:       nz = ai[i + 1] - ai[i];
836:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
837:       if (rs > sctx.shift_top) sctx.shift_top = rs;
838:     }
839:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
840:     sctx.shift_top *= 1.1;
841:     sctx.nshift_max = 5;
842:     sctx.shift_lo   = 0.;
843:     sctx.shift_hi   = 1.;
844:   }

846:   sctx.shift_amount = 0.;
847:   sctx.nshift       = 0;
848: #endif

850:   do {
851:     sctx.newshift = PETSC_FALSE;
852:     for (i = 0; i < n; i++) {
853:       /* load in initial unfactored row */
854:       nz    = ai[r[i] + 1] - ai[r[i]];
855:       ajtmp = aj + ai[r[i]];
856:       v     = a->a + ai[r[i]];
857:       /* sort permuted ajtmp and values v accordingly */
858:       for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
859:       PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));

861:       diag[r[i]] = ai[r[i]];
862:       for (j = 0; j < nz; j++) {
863:         rtmp[ajtmp[j]] = v[j];
864:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
865:       }
866:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

868:       row = *ajtmp++;
869:       while (row < i) {
870:         pc = rtmp + row;
871:         if (*pc != 0.0) {
872:           pv = a->a + diag[r[row]];
873:           pj = aj + diag[r[row]] + 1;

875:           multiplier = *pc / *pv++;
876:           *pc        = multiplier;
877:           nz         = ai[r[row] + 1] - diag[r[row]] - 1;
878:           for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
879:           PetscCall(PetscLogFlops(1 + 2.0 * nz));
880:         }
881:         row = *ajtmp++;
882:       }
883:       /* finished row so overwrite it onto a->a */
884:       pv     = a->a + ai[r[i]];
885:       pj     = aj + ai[r[i]];
886:       nz     = ai[r[i] + 1] - ai[r[i]];
887:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */

889:       rs = 0.0;
890:       for (j = 0; j < nz; j++) {
891:         pv[j] = rtmp[pj[j]];
892:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
893:       }

895:       sctx.rs = rs;
896:       sctx.pv = pv[nbdiag];
897:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
898:       if (sctx.newshift) break;
899:       pv[nbdiag] = sctx.pv;
900:     }

902:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
903:       /*
904:        * if no shift in this attempt & shifting & started shifting & can refine,
905:        * then try lower shift
906:        */
907:       sctx.shift_hi       = sctx.shift_fraction;
908:       sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
909:       sctx.shift_amount   = sctx.shift_fraction * sctx.shift_top;
910:       sctx.newshift       = PETSC_TRUE;
911:       sctx.nshift++;
912:     }
913:   } while (sctx.newshift);

915:   /* invert diagonal entries for simpler triangular solves */
916:   for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];

918:   PetscCall(PetscFree(rtmp));
919:   PetscCall(ISRestoreIndices(isicol, &ic));
920:   PetscCall(ISRestoreIndices(isrow, &r));

922:   A->ops->solve             = MatSolve_SeqAIJ_InplaceWithPerm;
923:   A->ops->solveadd          = MatSolveAdd_SeqAIJ_inplace;
924:   A->ops->solvetranspose    = MatSolveTranspose_SeqAIJ_inplace;
925:   A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;

927:   A->assembled    = PETSC_TRUE;
928:   A->preallocated = PETSC_TRUE;

930:   PetscCall(PetscLogFlops(A->cmap->n));
931:   if (sctx.nshift) {
932:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
934:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
936:     }
937:   }
938:   PetscFunctionReturn(PETSC_SUCCESS);
939: }

941: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
942: {
943:   Mat C;

945:   PetscFunctionBegin;
946:   PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
947:   PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
948:   PetscCall(MatLUFactorNumeric(C, A, info));

950:   A->ops->solve          = C->ops->solve;
951:   A->ops->solvetranspose = C->ops->solvetranspose;

953:   PetscCall(MatHeaderMerge(A, &C));
954:   PetscFunctionReturn(PETSC_SUCCESS);
955: }

957: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
958: {
959:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
960:   IS                 iscol = a->col, isrow = a->row;
961:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
962:   PetscInt           nz;
963:   const PetscInt    *rout, *cout, *r, *c;
964:   PetscScalar       *x, *tmp, *tmps, sum;
965:   const PetscScalar *b;
966:   const MatScalar   *aa = a->a, *v;

968:   PetscFunctionBegin;
969:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

971:   PetscCall(VecGetArrayRead(bb, &b));
972:   PetscCall(VecGetArrayWrite(xx, &x));
973:   tmp = a->solve_work;

975:   PetscCall(ISGetIndices(isrow, &rout));
976:   r = rout;
977:   PetscCall(ISGetIndices(iscol, &cout));
978:   c = cout + (n - 1);

980:   /* forward solve the lower triangular */
981:   tmp[0] = b[*r++];
982:   tmps   = tmp;
983:   for (i = 1; i < n; i++) {
984:     v   = aa + ai[i];
985:     vi  = aj + ai[i];
986:     nz  = a->diag[i] - ai[i];
987:     sum = b[*r++];
988:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
989:     tmp[i] = sum;
990:   }

992:   /* backward solve the upper triangular */
993:   for (i = n - 1; i >= 0; i--) {
994:     v   = aa + a->diag[i] + 1;
995:     vi  = aj + a->diag[i] + 1;
996:     nz  = ai[i + 1] - a->diag[i] - 1;
997:     sum = tmp[i];
998:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
999:     x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1000:   }

1002:   PetscCall(ISRestoreIndices(isrow, &rout));
1003:   PetscCall(ISRestoreIndices(iscol, &cout));
1004:   PetscCall(VecRestoreArrayRead(bb, &b));
1005:   PetscCall(VecRestoreArrayWrite(xx, &x));
1006:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1007:   PetscFunctionReturn(PETSC_SUCCESS);
1008: }

1010: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1011: {
1012:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1013:   IS                 iscol = a->col, isrow = a->row;
1014:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1015:   PetscInt           nz, neq, ldb, ldx;
1016:   const PetscInt    *rout, *cout, *r, *c;
1017:   PetscScalar       *x, *tmp = a->solve_work, *tmps, sum;
1018:   const PetscScalar *b, *aa  = a->a, *v;
1019:   PetscBool          isdense;

1021:   PetscFunctionBegin;
1022:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1023:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1024:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1025:   if (X != B) {
1026:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1027:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1028:   }
1029:   PetscCall(MatDenseGetArrayRead(B, &b));
1030:   PetscCall(MatDenseGetLDA(B, &ldb));
1031:   PetscCall(MatDenseGetArray(X, &x));
1032:   PetscCall(MatDenseGetLDA(X, &ldx));
1033:   PetscCall(ISGetIndices(isrow, &rout));
1034:   r = rout;
1035:   PetscCall(ISGetIndices(iscol, &cout));
1036:   c = cout;
1037:   for (neq = 0; neq < B->cmap->n; neq++) {
1038:     /* forward solve the lower triangular */
1039:     tmp[0] = b[r[0]];
1040:     tmps   = tmp;
1041:     for (i = 1; i < n; i++) {
1042:       v   = aa + ai[i];
1043:       vi  = aj + ai[i];
1044:       nz  = a->diag[i] - ai[i];
1045:       sum = b[r[i]];
1046:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1047:       tmp[i] = sum;
1048:     }
1049:     /* backward solve the upper triangular */
1050:     for (i = n - 1; i >= 0; i--) {
1051:       v   = aa + a->diag[i] + 1;
1052:       vi  = aj + a->diag[i] + 1;
1053:       nz  = ai[i + 1] - a->diag[i] - 1;
1054:       sum = tmp[i];
1055:       PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1056:       x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1057:     }
1058:     b += ldb;
1059:     x += ldx;
1060:   }
1061:   PetscCall(ISRestoreIndices(isrow, &rout));
1062:   PetscCall(ISRestoreIndices(iscol, &cout));
1063:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1064:   PetscCall(MatDenseRestoreArray(X, &x));
1065:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1066:   PetscFunctionReturn(PETSC_SUCCESS);
1067: }

1069: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1070: {
1071:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1072:   IS                 iscol = a->col, isrow = a->row;
1073:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1074:   PetscInt           nz, neq, ldb, ldx;
1075:   const PetscInt    *rout, *cout, *r, *c;
1076:   PetscScalar       *x, *tmp = a->solve_work, sum;
1077:   const PetscScalar *b, *aa  = a->a, *v;
1078:   PetscBool          isdense;

1080:   PetscFunctionBegin;
1081:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1082:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1083:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1084:   if (X != B) {
1085:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1086:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1087:   }
1088:   PetscCall(MatDenseGetArrayRead(B, &b));
1089:   PetscCall(MatDenseGetLDA(B, &ldb));
1090:   PetscCall(MatDenseGetArray(X, &x));
1091:   PetscCall(MatDenseGetLDA(X, &ldx));
1092:   PetscCall(ISGetIndices(isrow, &rout));
1093:   r = rout;
1094:   PetscCall(ISGetIndices(iscol, &cout));
1095:   c = cout;
1096:   for (neq = 0; neq < B->cmap->n; neq++) {
1097:     /* forward solve the lower triangular */
1098:     tmp[0] = b[r[0]];
1099:     v      = aa;
1100:     vi     = aj;
1101:     for (i = 1; i < n; i++) {
1102:       nz  = ai[i + 1] - ai[i];
1103:       sum = b[r[i]];
1104:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1105:       tmp[i] = sum;
1106:       v += nz;
1107:       vi += nz;
1108:     }
1109:     /* backward solve the upper triangular */
1110:     for (i = n - 1; i >= 0; i--) {
1111:       v   = aa + adiag[i + 1] + 1;
1112:       vi  = aj + adiag[i + 1] + 1;
1113:       nz  = adiag[i] - adiag[i + 1] - 1;
1114:       sum = tmp[i];
1115:       PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1116:       x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1117:     }
1118:     b += ldb;
1119:     x += ldx;
1120:   }
1121:   PetscCall(ISRestoreIndices(isrow, &rout));
1122:   PetscCall(ISRestoreIndices(iscol, &cout));
1123:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1124:   PetscCall(MatDenseRestoreArray(X, &x));
1125:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1126:   PetscFunctionReturn(PETSC_SUCCESS);
1127: }

1129: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1130: {
1131:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1132:   IS                 iscol = a->col, isrow = a->row;
1133:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1134:   PetscInt           nz, neq, ldb, ldx;
1135:   const PetscInt    *rout, *cout, *r, *c;
1136:   PetscScalar       *x, *tmp = a->solve_work, s1;
1137:   const PetscScalar *b, *aa  = a->a, *v;
1138:   PetscBool          isdense;

1140:   PetscFunctionBegin;
1141:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1142:   PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1143:   PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1144:   if (X != B) {
1145:     PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1146:     PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1147:   }
1148:   PetscCall(MatDenseGetArrayRead(B, &b));
1149:   PetscCall(MatDenseGetLDA(B, &ldb));
1150:   PetscCall(MatDenseGetArray(X, &x));
1151:   PetscCall(MatDenseGetLDA(X, &ldx));
1152:   PetscCall(ISGetIndices(isrow, &rout));
1153:   r = rout;
1154:   PetscCall(ISGetIndices(iscol, &cout));
1155:   c = cout;
1156:   for (neq = 0; neq < B->cmap->n; neq++) {
1157:     /* copy the b into temp work space according to permutation */
1158:     for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1160:     /* forward solve the U^T */
1161:     for (i = 0; i < n; i++) {
1162:       v  = aa + adiag[i + 1] + 1;
1163:       vi = aj + adiag[i + 1] + 1;
1164:       nz = adiag[i] - adiag[i + 1] - 1;
1165:       s1 = tmp[i];
1166:       s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1167:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1168:       tmp[i] = s1;
1169:     }

1171:     /* backward solve the L^T */
1172:     for (i = n - 1; i >= 0; i--) {
1173:       v  = aa + ai[i];
1174:       vi = aj + ai[i];
1175:       nz = ai[i + 1] - ai[i];
1176:       s1 = tmp[i];
1177:       for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1178:     }

1180:     /* copy tmp into x according to permutation */
1181:     for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1182:     b += ldb;
1183:     x += ldx;
1184:   }
1185:   PetscCall(ISRestoreIndices(isrow, &rout));
1186:   PetscCall(ISRestoreIndices(iscol, &cout));
1187:   PetscCall(MatDenseRestoreArrayRead(B, &b));
1188:   PetscCall(MatDenseRestoreArray(X, &x));
1189:   PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1190:   PetscFunctionReturn(PETSC_SUCCESS);
1191: }

1193: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1194: {
1195:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1196:   IS                 iscol = a->col, isrow = a->row;
1197:   const PetscInt    *r, *c, *rout, *cout;
1198:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1199:   PetscInt           nz, row;
1200:   PetscScalar       *x, *tmp, *tmps, sum;
1201:   const PetscScalar *b;
1202:   const MatScalar   *aa = a->a, *v;

1204:   PetscFunctionBegin;
1205:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1207:   PetscCall(VecGetArrayRead(bb, &b));
1208:   PetscCall(VecGetArrayWrite(xx, &x));
1209:   tmp = a->solve_work;

1211:   PetscCall(ISGetIndices(isrow, &rout));
1212:   r = rout;
1213:   PetscCall(ISGetIndices(iscol, &cout));
1214:   c = cout + (n - 1);

1216:   /* forward solve the lower triangular */
1217:   tmp[0] = b[*r++];
1218:   tmps   = tmp;
1219:   for (row = 1; row < n; row++) {
1220:     i   = rout[row]; /* permuted row */
1221:     v   = aa + ai[i];
1222:     vi  = aj + ai[i];
1223:     nz  = a->diag[i] - ai[i];
1224:     sum = b[*r++];
1225:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1226:     tmp[row] = sum;
1227:   }

1229:   /* backward solve the upper triangular */
1230:   for (row = n - 1; row >= 0; row--) {
1231:     i   = rout[row]; /* permuted row */
1232:     v   = aa + a->diag[i] + 1;
1233:     vi  = aj + a->diag[i] + 1;
1234:     nz  = ai[i + 1] - a->diag[i] - 1;
1235:     sum = tmp[row];
1236:     PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1237:     x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1238:   }

1240:   PetscCall(ISRestoreIndices(isrow, &rout));
1241:   PetscCall(ISRestoreIndices(iscol, &cout));
1242:   PetscCall(VecRestoreArrayRead(bb, &b));
1243:   PetscCall(VecRestoreArrayWrite(xx, &x));
1244:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1245:   PetscFunctionReturn(PETSC_SUCCESS);
1246: }

1248: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1249: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1250: {
1251:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
1252:   PetscInt           n  = A->rmap->n;
1253:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag;
1254:   PetscScalar       *x;
1255:   const PetscScalar *b;
1256:   const MatScalar   *aa = a->a;
1257: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1258:   PetscInt         adiag_i, i, nz, ai_i;
1259:   const PetscInt  *vi;
1260:   const MatScalar *v;
1261:   PetscScalar      sum;
1262: #endif

1264:   PetscFunctionBegin;
1265:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

1267:   PetscCall(VecGetArrayRead(bb, &b));
1268:   PetscCall(VecGetArrayWrite(xx, &x));

1270: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1271:   fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1272: #else
1273:   /* forward solve the lower triangular */
1274:   x[0] = b[0];
1275:   for (i = 1; i < n; i++) {
1276:     ai_i = ai[i];
1277:     v    = aa + ai_i;
1278:     vi   = aj + ai_i;
1279:     nz   = adiag[i] - ai_i;
1280:     sum  = b[i];
1281:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1282:     x[i] = sum;
1283:   }

1285:   /* backward solve the upper triangular */
1286:   for (i = n - 1; i >= 0; i--) {
1287:     adiag_i = adiag[i];
1288:     v       = aa + adiag_i + 1;
1289:     vi      = aj + adiag_i + 1;
1290:     nz      = ai[i + 1] - adiag_i - 1;
1291:     sum     = x[i];
1292:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1293:     x[i] = sum * aa[adiag_i];
1294:   }
1295: #endif
1296:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1297:   PetscCall(VecRestoreArrayRead(bb, &b));
1298:   PetscCall(VecRestoreArrayWrite(xx, &x));
1299:   PetscFunctionReturn(PETSC_SUCCESS);
1300: }

1302: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1303: {
1304:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1305:   IS                 iscol = a->col, isrow = a->row;
1306:   PetscInt           i, n                  = A->rmap->n, j;
1307:   PetscInt           nz;
1308:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1309:   PetscScalar       *x, *tmp, sum;
1310:   const PetscScalar *b;
1311:   const MatScalar   *aa = a->a, *v;

1313:   PetscFunctionBegin;
1314:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1316:   PetscCall(VecGetArrayRead(bb, &b));
1317:   PetscCall(VecGetArray(xx, &x));
1318:   tmp = a->solve_work;

1320:   PetscCall(ISGetIndices(isrow, &rout));
1321:   r = rout;
1322:   PetscCall(ISGetIndices(iscol, &cout));
1323:   c = cout + (n - 1);

1325:   /* forward solve the lower triangular */
1326:   tmp[0] = b[*r++];
1327:   for (i = 1; i < n; i++) {
1328:     v   = aa + ai[i];
1329:     vi  = aj + ai[i];
1330:     nz  = a->diag[i] - ai[i];
1331:     sum = b[*r++];
1332:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1333:     tmp[i] = sum;
1334:   }

1336:   /* backward solve the upper triangular */
1337:   for (i = n - 1; i >= 0; i--) {
1338:     v   = aa + a->diag[i] + 1;
1339:     vi  = aj + a->diag[i] + 1;
1340:     nz  = ai[i + 1] - a->diag[i] - 1;
1341:     sum = tmp[i];
1342:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1343:     tmp[i] = sum * aa[a->diag[i]];
1344:     x[*c--] += tmp[i];
1345:   }

1347:   PetscCall(ISRestoreIndices(isrow, &rout));
1348:   PetscCall(ISRestoreIndices(iscol, &cout));
1349:   PetscCall(VecRestoreArrayRead(bb, &b));
1350:   PetscCall(VecRestoreArray(xx, &x));
1351:   PetscCall(PetscLogFlops(2.0 * a->nz));
1352:   PetscFunctionReturn(PETSC_SUCCESS);
1353: }

1355: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1356: {
1357:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1358:   IS                 iscol = a->col, isrow = a->row;
1359:   PetscInt           i, n                  = A->rmap->n, j;
1360:   PetscInt           nz;
1361:   const PetscInt    *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1362:   PetscScalar       *x, *tmp, sum;
1363:   const PetscScalar *b;
1364:   const MatScalar   *aa = a->a, *v;

1366:   PetscFunctionBegin;
1367:   if (yy != xx) PetscCall(VecCopy(yy, xx));

1369:   PetscCall(VecGetArrayRead(bb, &b));
1370:   PetscCall(VecGetArray(xx, &x));
1371:   tmp = a->solve_work;

1373:   PetscCall(ISGetIndices(isrow, &rout));
1374:   r = rout;
1375:   PetscCall(ISGetIndices(iscol, &cout));
1376:   c = cout;

1378:   /* forward solve the lower triangular */
1379:   tmp[0] = b[r[0]];
1380:   v      = aa;
1381:   vi     = aj;
1382:   for (i = 1; i < n; i++) {
1383:     nz  = ai[i + 1] - ai[i];
1384:     sum = b[r[i]];
1385:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1386:     tmp[i] = sum;
1387:     v += nz;
1388:     vi += nz;
1389:   }

1391:   /* backward solve the upper triangular */
1392:   v  = aa + adiag[n - 1];
1393:   vi = aj + adiag[n - 1];
1394:   for (i = n - 1; i >= 0; i--) {
1395:     nz  = adiag[i] - adiag[i + 1] - 1;
1396:     sum = tmp[i];
1397:     for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1398:     tmp[i] = sum * v[nz];
1399:     x[c[i]] += tmp[i];
1400:     v += nz + 1;
1401:     vi += nz + 1;
1402:   }

1404:   PetscCall(ISRestoreIndices(isrow, &rout));
1405:   PetscCall(ISRestoreIndices(iscol, &cout));
1406:   PetscCall(VecRestoreArrayRead(bb, &b));
1407:   PetscCall(VecRestoreArray(xx, &x));
1408:   PetscCall(PetscLogFlops(2.0 * a->nz));
1409:   PetscFunctionReturn(PETSC_SUCCESS);
1410: }

1412: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1413: {
1414:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1415:   IS                 iscol = a->col, isrow = a->row;
1416:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1417:   PetscInt           i, n = A->rmap->n, j;
1418:   PetscInt           nz;
1419:   PetscScalar       *x, *tmp, s1;
1420:   const MatScalar   *aa = a->a, *v;
1421:   const PetscScalar *b;

1423:   PetscFunctionBegin;
1424:   PetscCall(VecGetArrayRead(bb, &b));
1425:   PetscCall(VecGetArrayWrite(xx, &x));
1426:   tmp = a->solve_work;

1428:   PetscCall(ISGetIndices(isrow, &rout));
1429:   r = rout;
1430:   PetscCall(ISGetIndices(iscol, &cout));
1431:   c = cout;

1433:   /* copy the b into temp work space according to permutation */
1434:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1436:   /* forward solve the U^T */
1437:   for (i = 0; i < n; i++) {
1438:     v  = aa + diag[i];
1439:     vi = aj + diag[i] + 1;
1440:     nz = ai[i + 1] - diag[i] - 1;
1441:     s1 = tmp[i];
1442:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1443:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444:     tmp[i] = s1;
1445:   }

1447:   /* backward solve the L^T */
1448:   for (i = n - 1; i >= 0; i--) {
1449:     v  = aa + diag[i] - 1;
1450:     vi = aj + diag[i] - 1;
1451:     nz = diag[i] - ai[i];
1452:     s1 = tmp[i];
1453:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1454:   }

1456:   /* copy tmp into x according to permutation */
1457:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1459:   PetscCall(ISRestoreIndices(isrow, &rout));
1460:   PetscCall(ISRestoreIndices(iscol, &cout));
1461:   PetscCall(VecRestoreArrayRead(bb, &b));
1462:   PetscCall(VecRestoreArrayWrite(xx, &x));

1464:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1465:   PetscFunctionReturn(PETSC_SUCCESS);
1466: }

1468: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1469: {
1470:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1471:   IS                 iscol = a->col, isrow = a->row;
1472:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1473:   PetscInt           i, n = A->rmap->n, j;
1474:   PetscInt           nz;
1475:   PetscScalar       *x, *tmp, s1;
1476:   const MatScalar   *aa = a->a, *v;
1477:   const PetscScalar *b;

1479:   PetscFunctionBegin;
1480:   PetscCall(VecGetArrayRead(bb, &b));
1481:   PetscCall(VecGetArrayWrite(xx, &x));
1482:   tmp = a->solve_work;

1484:   PetscCall(ISGetIndices(isrow, &rout));
1485:   r = rout;
1486:   PetscCall(ISGetIndices(iscol, &cout));
1487:   c = cout;

1489:   /* copy the b into temp work space according to permutation */
1490:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1492:   /* forward solve the U^T */
1493:   for (i = 0; i < n; i++) {
1494:     v  = aa + adiag[i + 1] + 1;
1495:     vi = aj + adiag[i + 1] + 1;
1496:     nz = adiag[i] - adiag[i + 1] - 1;
1497:     s1 = tmp[i];
1498:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1499:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1500:     tmp[i] = s1;
1501:   }

1503:   /* backward solve the L^T */
1504:   for (i = n - 1; i >= 0; i--) {
1505:     v  = aa + ai[i];
1506:     vi = aj + ai[i];
1507:     nz = ai[i + 1] - ai[i];
1508:     s1 = tmp[i];
1509:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1510:   }

1512:   /* copy tmp into x according to permutation */
1513:   for (i = 0; i < n; i++) x[r[i]] = tmp[i];

1515:   PetscCall(ISRestoreIndices(isrow, &rout));
1516:   PetscCall(ISRestoreIndices(iscol, &cout));
1517:   PetscCall(VecRestoreArrayRead(bb, &b));
1518:   PetscCall(VecRestoreArrayWrite(xx, &x));

1520:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1521:   PetscFunctionReturn(PETSC_SUCCESS);
1522: }

1524: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1525: {
1526:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1527:   IS                 iscol = a->col, isrow = a->row;
1528:   const PetscInt    *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1529:   PetscInt           i, n = A->rmap->n, j;
1530:   PetscInt           nz;
1531:   PetscScalar       *x, *tmp, s1;
1532:   const MatScalar   *aa = a->a, *v;
1533:   const PetscScalar *b;

1535:   PetscFunctionBegin;
1536:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1537:   PetscCall(VecGetArrayRead(bb, &b));
1538:   PetscCall(VecGetArray(xx, &x));
1539:   tmp = a->solve_work;

1541:   PetscCall(ISGetIndices(isrow, &rout));
1542:   r = rout;
1543:   PetscCall(ISGetIndices(iscol, &cout));
1544:   c = cout;

1546:   /* copy the b into temp work space according to permutation */
1547:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1549:   /* forward solve the U^T */
1550:   for (i = 0; i < n; i++) {
1551:     v  = aa + diag[i];
1552:     vi = aj + diag[i] + 1;
1553:     nz = ai[i + 1] - diag[i] - 1;
1554:     s1 = tmp[i];
1555:     s1 *= (*v++); /* multiply by inverse of diagonal entry */
1556:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1557:     tmp[i] = s1;
1558:   }

1560:   /* backward solve the L^T */
1561:   for (i = n - 1; i >= 0; i--) {
1562:     v  = aa + diag[i] - 1;
1563:     vi = aj + diag[i] - 1;
1564:     nz = diag[i] - ai[i];
1565:     s1 = tmp[i];
1566:     for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1567:   }

1569:   /* copy tmp into x according to permutation */
1570:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1572:   PetscCall(ISRestoreIndices(isrow, &rout));
1573:   PetscCall(ISRestoreIndices(iscol, &cout));
1574:   PetscCall(VecRestoreArrayRead(bb, &b));
1575:   PetscCall(VecRestoreArray(xx, &x));

1577:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1578:   PetscFunctionReturn(PETSC_SUCCESS);
1579: }

1581: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1582: {
1583:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
1584:   IS                 iscol = a->col, isrow = a->row;
1585:   const PetscInt    *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1586:   PetscInt           i, n = A->rmap->n, j;
1587:   PetscInt           nz;
1588:   PetscScalar       *x, *tmp, s1;
1589:   const MatScalar   *aa = a->a, *v;
1590:   const PetscScalar *b;

1592:   PetscFunctionBegin;
1593:   if (zz != xx) PetscCall(VecCopy(zz, xx));
1594:   PetscCall(VecGetArrayRead(bb, &b));
1595:   PetscCall(VecGetArray(xx, &x));
1596:   tmp = a->solve_work;

1598:   PetscCall(ISGetIndices(isrow, &rout));
1599:   r = rout;
1600:   PetscCall(ISGetIndices(iscol, &cout));
1601:   c = cout;

1603:   /* copy the b into temp work space according to permutation */
1604:   for (i = 0; i < n; i++) tmp[i] = b[c[i]];

1606:   /* forward solve the U^T */
1607:   for (i = 0; i < n; i++) {
1608:     v  = aa + adiag[i + 1] + 1;
1609:     vi = aj + adiag[i + 1] + 1;
1610:     nz = adiag[i] - adiag[i + 1] - 1;
1611:     s1 = tmp[i];
1612:     s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1613:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1614:     tmp[i] = s1;
1615:   }

1617:   /* backward solve the L^T */
1618:   for (i = n - 1; i >= 0; i--) {
1619:     v  = aa + ai[i];
1620:     vi = aj + ai[i];
1621:     nz = ai[i + 1] - ai[i];
1622:     s1 = tmp[i];
1623:     for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1624:   }

1626:   /* copy tmp into x according to permutation */
1627:   for (i = 0; i < n; i++) x[r[i]] += tmp[i];

1629:   PetscCall(ISRestoreIndices(isrow, &rout));
1630:   PetscCall(ISRestoreIndices(iscol, &cout));
1631:   PetscCall(VecRestoreArrayRead(bb, &b));
1632:   PetscCall(VecRestoreArray(xx, &x));

1634:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1635:   PetscFunctionReturn(PETSC_SUCCESS);
1636: }

1638: /*
1639:    ilu() under revised new data structure.
1640:    Factored arrays bj and ba are stored as
1641:      L(0,:), L(1,:), ...,L(n-1,:),  U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)

1643:    bi=fact->i is an array of size n+1, in which
1644:      bi[i]:  points to 1st entry of L(i,:),i=0,...,n-1
1645:      bi[n]:  points to L(n-1,n-1)+1

1647:   bdiag=fact->diag is an array of size n+1,in which
1648:      bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1649:      bdiag[n]: points to entry of U(n-1,0)-1

1651:    U(i,:) contains bdiag[i] as its last entry, i.e.,
1652:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1653: */
1654: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1655: {
1656:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data, *b;
1657:   const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1658:   PetscInt       i, j, k = 0, nz, *bi, *bj, *bdiag;
1659:   IS             isicol;

1661:   PetscFunctionBegin;
1662:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1663:   PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1664:   b = (Mat_SeqAIJ *)fact->data;

1666:   /* allocate matrix arrays for new data structure */
1667:   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1668:   PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1669:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1670:   if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1671:   b->free_a  = PETSC_TRUE;
1672:   b->free_ij = PETSC_TRUE;

1674:   if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1675:   bdiag = b->diag;

1677:   /* set bi and bj with new data structure */
1678:   bi = b->i;
1679:   bj = b->j;

1681:   /* L part */
1682:   bi[0] = 0;
1683:   for (i = 0; i < n; i++) {
1684:     nz        = adiag[i] - ai[i];
1685:     bi[i + 1] = bi[i] + nz;
1686:     aj        = a->j + ai[i];
1687:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1688:   }

1690:   /* U part */
1691:   bdiag[n] = bi[n] - 1;
1692:   for (i = n - 1; i >= 0; i--) {
1693:     nz = ai[i + 1] - adiag[i] - 1;
1694:     aj = a->j + adiag[i] + 1;
1695:     for (j = 0; j < nz; j++) bj[k++] = aj[j];
1696:     /* diag[i] */
1697:     bj[k++]  = i;
1698:     bdiag[i] = bdiag[i + 1] + nz + 1;
1699:   }

1701:   fact->factortype             = MAT_FACTOR_ILU;
1702:   fact->info.factor_mallocs    = 0;
1703:   fact->info.fill_ratio_given  = info->fill;
1704:   fact->info.fill_ratio_needed = 1.0;
1705:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1706:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));

1708:   b       = (Mat_SeqAIJ *)fact->data;
1709:   b->row  = isrow;
1710:   b->col  = iscol;
1711:   b->icol = isicol;
1712:   PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1713:   PetscCall(PetscObjectReference((PetscObject)isrow));
1714:   PetscCall(PetscObjectReference((PetscObject)iscol));
1715:   PetscFunctionReturn(PETSC_SUCCESS);
1716: }

1718: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1719: {
1720:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1721:   IS                 isicol;
1722:   const PetscInt    *r, *ic;
1723:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1724:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1725:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1726:   PetscInt           i, levels, diagonal_fill;
1727:   PetscBool          col_identity, row_identity, missing;
1728:   PetscReal          f;
1729:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1730:   PetscBT            lnkbt;
1731:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1732:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1733:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;

1735:   PetscFunctionBegin;
1736:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1737:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1738:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1740:   levels = (PetscInt)info->levels;
1741:   PetscCall(ISIdentity(isrow, &row_identity));
1742:   PetscCall(ISIdentity(iscol, &col_identity));
1743:   if (!levels && row_identity && col_identity) {
1744:     /* special case: ilu(0) with natural ordering */
1745:     PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1746:     if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1747:     PetscFunctionReturn(PETSC_SUCCESS);
1748:   }

1750:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1751:   PetscCall(ISGetIndices(isrow, &r));
1752:   PetscCall(ISGetIndices(isicol, &ic));

1754:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755:   PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1756:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1757:   bi[0] = bdiag[0] = 0;
1758:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1760:   /* create a linked list for storing column indices of the active row */
1761:   nlnk = n + 1;
1762:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1764:   /* initial FreeSpace size is f*(ai[n]+1) */
1765:   f             = info->fill;
1766:   diagonal_fill = (PetscInt)info->diagonal_fill;
1767:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1768:   current_space = free_space;
1769:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1770:   current_space_lvl = free_space_lvl;
1771:   for (i = 0; i < n; i++) {
1772:     nzi = 0;
1773:     /* copy current row into linked list */
1774:     nnz = ai[r[i] + 1] - ai[r[i]];
1775:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1776:     cols   = aj + ai[r[i]];
1777:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1778:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1779:     nzi += nlnk;

1781:     /* make sure diagonal entry is included */
1782:     if (diagonal_fill && lnk[i] == -1) {
1783:       fm = n;
1784:       while (lnk[fm] < i) fm = lnk[fm];
1785:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1786:       lnk[fm]    = i;
1787:       lnk_lvl[i] = 0;
1788:       nzi++;
1789:       dcount++;
1790:     }

1792:     /* add pivot rows into the active row */
1793:     nzbd = 0;
1794:     prow = lnk[n];
1795:     while (prow < i) {
1796:       nnz      = bdiag[prow];
1797:       cols     = bj_ptr[prow] + nnz + 1;
1798:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1799:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1800:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1801:       nzi += nlnk;
1802:       prow = lnk[prow];
1803:       nzbd++;
1804:     }
1805:     bdiag[i]  = nzbd;
1806:     bi[i + 1] = bi[i] + nzi;
1807:     /* if free space is not available, make more free space */
1808:     if (current_space->local_remaining < nzi) {
1809:       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1810:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1811:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1812:       reallocs++;
1813:     }

1815:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1816:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1817:     bj_ptr[i]    = current_space->array;
1818:     bjlvl_ptr[i] = current_space_lvl->array;

1820:     /* make sure the active row i has diagonal entry */
1821:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

1823:     current_space->array += nzi;
1824:     current_space->local_used += nzi;
1825:     current_space->local_remaining -= nzi;
1826:     current_space_lvl->array += nzi;
1827:     current_space_lvl->local_used += nzi;
1828:     current_space_lvl->local_remaining -= nzi;
1829:   }

1831:   PetscCall(ISRestoreIndices(isrow, &r));
1832:   PetscCall(ISRestoreIndices(isicol, &ic));
1833:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1834:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1835:   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));

1837:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1838:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1839:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

1841: #if defined(PETSC_USE_INFO)
1842:   {
1843:     PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1844:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1845:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1846:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1847:     PetscCall(PetscInfo(A, "for best performance.\n"));
1848:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1849:   }
1850: #endif
1851:   /* put together the new matrix */
1852:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1853:   b          = (Mat_SeqAIJ *)fact->data;
1854:   b->free_ij = PETSC_TRUE;
1855:   PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1856:   b->free_a = PETSC_TRUE;
1857:   b->j      = bj;
1858:   b->i      = bi;
1859:   b->diag   = bdiag;
1860:   b->ilen   = NULL;
1861:   b->imax   = NULL;
1862:   b->row    = isrow;
1863:   b->col    = iscol;
1864:   PetscCall(PetscObjectReference((PetscObject)isrow));
1865:   PetscCall(PetscObjectReference((PetscObject)iscol));
1866:   b->icol = isicol;

1868:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1869:   /* In b structure:  Free imax, ilen, old a, old j.
1870:      Allocate bdiag, solve_work, new a, new j */
1871:   b->maxnz = b->nz = bdiag[0] + 1;

1873:   fact->info.factor_mallocs    = reallocs;
1874:   fact->info.fill_ratio_given  = f;
1875:   fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1876:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ;
1877:   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1878:   PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1879:   PetscFunctionReturn(PETSC_SUCCESS);
1880: }

1882: #if 0
1883: // unused
1884: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1885: {
1886:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data, *b;
1887:   IS                 isicol;
1888:   const PetscInt    *r, *ic;
1889:   PetscInt           n = A->rmap->n, *ai = a->i, *aj = a->j;
1890:   PetscInt          *bi, *cols, nnz, *cols_lvl;
1891:   PetscInt          *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1892:   PetscInt           i, levels, diagonal_fill;
1893:   PetscBool          col_identity, row_identity;
1894:   PetscReal          f;
1895:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL;
1896:   PetscBT            lnkbt;
1897:   PetscInt           nzi, *bj, **bj_ptr, **bjlvl_ptr;
1898:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
1899:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1900:   PetscBool          missing;

1902:   PetscFunctionBegin;
1903:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1904:   PetscCall(MatMissingDiagonal(A, &missing, &i));
1905:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

1907:   f             = info->fill;
1908:   levels        = (PetscInt)info->levels;
1909:   diagonal_fill = (PetscInt)info->diagonal_fill;

1911:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

1913:   PetscCall(ISIdentity(isrow, &row_identity));
1914:   PetscCall(ISIdentity(iscol, &col_identity));
1915:   if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1916:     PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));

1918:     (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1919:     if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1920:     fact->factortype               = MAT_FACTOR_ILU;
1921:     (fact)->info.factor_mallocs    = 0;
1922:     (fact)->info.fill_ratio_given  = info->fill;
1923:     (fact)->info.fill_ratio_needed = 1.0;

1925:     b       = (Mat_SeqAIJ *)fact->data;
1926:     b->row  = isrow;
1927:     b->col  = iscol;
1928:     b->icol = isicol;
1929:     PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1930:     PetscCall(PetscObjectReference((PetscObject)isrow));
1931:     PetscCall(PetscObjectReference((PetscObject)iscol));
1932:     PetscFunctionReturn(PETSC_SUCCESS);
1933:   }

1935:   PetscCall(ISGetIndices(isrow, &r));
1936:   PetscCall(ISGetIndices(isicol, &ic));

1938:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1939:   PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1940:   PetscCall(PetscMalloc1(n + 1, &bdiag));
1941:   bi[0] = bdiag[0] = 0;

1943:   PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));

1945:   /* create a linked list for storing column indices of the active row */
1946:   nlnk = n + 1;
1947:   PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));

1949:   /* initial FreeSpace size is f*(ai[n]+1) */
1950:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1951:   current_space = free_space;
1952:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1953:   current_space_lvl = free_space_lvl;

1955:   for (i = 0; i < n; i++) {
1956:     nzi = 0;
1957:     /* copy current row into linked list */
1958:     nnz = ai[r[i] + 1] - ai[r[i]];
1959:     PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1960:     cols   = aj + ai[r[i]];
1961:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1962:     PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1963:     nzi += nlnk;

1965:     /* make sure diagonal entry is included */
1966:     if (diagonal_fill && lnk[i] == -1) {
1967:       fm = n;
1968:       while (lnk[fm] < i) fm = lnk[fm];
1969:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1970:       lnk[fm]    = i;
1971:       lnk_lvl[i] = 0;
1972:       nzi++;
1973:       dcount++;
1974:     }

1976:     /* add pivot rows into the active row */
1977:     nzbd = 0;
1978:     prow = lnk[n];
1979:     while (prow < i) {
1980:       nnz      = bdiag[prow];
1981:       cols     = bj_ptr[prow] + nnz + 1;
1982:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1983:       nnz      = bi[prow + 1] - bi[prow] - nnz - 1;
1984:       PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1985:       nzi += nlnk;
1986:       prow = lnk[prow];
1987:       nzbd++;
1988:     }
1989:     bdiag[i]  = nzbd;
1990:     bi[i + 1] = bi[i] + nzi;

1992:     /* if free space is not available, make more free space */
1993:     if (current_space->local_remaining < nzi) {
1994:       nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1995:       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
1996:       PetscCall(PetscFreeSpaceGet(nnz, &current_space_lvl));
1997:       reallocs++;
1998:     }

2000:     /* copy data into free_space and free_space_lvl, then initialize lnk */
2001:     PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2002:     bj_ptr[i]    = current_space->array;
2003:     bjlvl_ptr[i] = current_space_lvl->array;

2005:     /* make sure the active row i has diagonal entry */
2006:     PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);

2008:     current_space->array += nzi;
2009:     current_space->local_used += nzi;
2010:     current_space->local_remaining -= nzi;
2011:     current_space_lvl->array += nzi;
2012:     current_space_lvl->local_used += nzi;
2013:     current_space_lvl->local_remaining -= nzi;
2014:   }

2016:   PetscCall(ISRestoreIndices(isrow, &r));
2017:   PetscCall(ISRestoreIndices(isicol, &ic));

2019:   /* destroy list of free space and other temporary arrays */
2020:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2021:   PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2022:   PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2023:   PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2024:   PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));

2026:   #if defined(PETSC_USE_INFO)
2027:   {
2028:     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2029:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2030:     PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2031:     PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2032:     PetscCall(PetscInfo(A, "for best performance.\n"));
2033:     if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2034:   }
2035:   #endif

2037:   /* put together the new matrix */
2038:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2039:   b = (Mat_SeqAIJ *)fact->data;
2040:   b->free_ij      = PETSC_TRUE;
2041:   PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2042:   b->free_a       = PETSC_TRUE;
2043:   b->j = bj;
2044:   b->i = bi;
2045:   for (i = 0; i < n; i++) bdiag[i] += bi[i];
2046:   b->diag = bdiag;
2047:   b->ilen = NULL;
2048:   b->imax = NULL;
2049:   b->row  = isrow;
2050:   b->col  = iscol;
2051:   PetscCall(PetscObjectReference((PetscObject)isrow));
2052:   PetscCall(PetscObjectReference((PetscObject)iscol));
2053:   b->icol = isicol;
2054:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2055:   /* In b structure:  Free imax, ilen, old a, old j.
2056:      Allocate bdiag, solve_work, new a, new j */
2057:   b->maxnz = b->nz = bi[n];

2059:   fact->info.factor_mallocs    = reallocs;
2060:   fact->info.fill_ratio_given  = f;
2061:   fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2062:   fact->ops->lufactornumeric   = MatLUFactorNumeric_SeqAIJ_inplace;
2063:   if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2064:   PetscFunctionReturn(PETSC_SUCCESS);
2065: }
2066: #endif

2068: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2069: {
2070:   Mat             C  = B;
2071:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2072:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2073:   IS              ip = b->row, iip = b->icol;
2074:   const PetscInt *rip, *riip;
2075:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2076:   PetscInt       *ai = a->i, *aj = a->j;
2077:   PetscInt        k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2078:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2079:   PetscBool       perm_identity;
2080:   FactorShiftCtx  sctx;
2081:   PetscReal       rs;
2082:   MatScalar       d, *v;

2084:   PetscFunctionBegin;
2085:   /* MatPivotSetUp(): initialize shift context sctx */
2086:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2088:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2089:     sctx.shift_top = info->zeropivot;
2090:     for (i = 0; i < mbs; i++) {
2091:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2092:       d  = (aa)[a->diag[i]];
2093:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2094:       v  = aa + ai[i];
2095:       nz = ai[i + 1] - ai[i];
2096:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2097:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2098:     }
2099:     sctx.shift_top *= 1.1;
2100:     sctx.nshift_max = 5;
2101:     sctx.shift_lo   = 0.;
2102:     sctx.shift_hi   = 1.;
2103:   }

2105:   PetscCall(ISGetIndices(ip, &rip));
2106:   PetscCall(ISGetIndices(iip, &riip));

2108:   /* allocate working arrays
2109:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2110:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2111:   */
2112:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

2114:   do {
2115:     sctx.newshift = PETSC_FALSE;

2117:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
2118:     if (mbs) il[0] = 0;

2120:     for (k = 0; k < mbs; k++) {
2121:       /* zero rtmp */
2122:       nz    = bi[k + 1] - bi[k];
2123:       bjtmp = bj + bi[k];
2124:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2126:       /* load in initial unfactored row */
2127:       bval = ba + bi[k];
2128:       jmin = ai[rip[k]];
2129:       jmax = ai[rip[k] + 1];
2130:       for (j = jmin; j < jmax; j++) {
2131:         col = riip[aj[j]];
2132:         if (col >= k) { /* only take upper triangular entry */
2133:           rtmp[col] = aa[j];
2134:           *bval++   = 0.0; /* for in-place factorization */
2135:         }
2136:       }
2137:       /* shift the diagonal of the matrix: ZeropivotApply() */
2138:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

2140:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2141:       dk = rtmp[k];
2142:       i  = c2r[k]; /* first row to be added to k_th row  */

2144:       while (i < k) {
2145:         nexti = c2r[i]; /* next row to be added to k_th row */

2147:         /* compute multiplier, update diag(k) and U(i,k) */
2148:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
2149:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2150:         dk += uikdi * ba[ili];           /* update diag[k] */
2151:         ba[ili] = uikdi;                 /* -U(i,k) */

2153:         /* add multiple of row i to k-th row */
2154:         jmin = ili + 1;
2155:         jmax = bi[i + 1];
2156:         if (jmin < jmax) {
2157:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2158:           /* update il and c2r for row i */
2159:           il[i]  = jmin;
2160:           j      = bj[jmin];
2161:           c2r[i] = c2r[j];
2162:           c2r[j] = i;
2163:         }
2164:         i = nexti;
2165:       }

2167:       /* copy data into U(k,:) */
2168:       rs   = 0.0;
2169:       jmin = bi[k];
2170:       jmax = bi[k + 1] - 1;
2171:       if (jmin < jmax) {
2172:         for (j = jmin; j < jmax; j++) {
2173:           col   = bj[j];
2174:           ba[j] = rtmp[col];
2175:           rs += PetscAbsScalar(ba[j]);
2176:         }
2177:         /* add the k-th row into il and c2r */
2178:         il[k]  = jmin;
2179:         i      = bj[jmin];
2180:         c2r[k] = c2r[i];
2181:         c2r[i] = k;
2182:       }

2184:       /* MatPivotCheck() */
2185:       sctx.rs = rs;
2186:       sctx.pv = dk;
2187:       PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2188:       if (sctx.newshift) break;
2189:       dk = sctx.pv;

2191:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2192:     }
2193:   } while (sctx.newshift);

2195:   PetscCall(PetscFree3(rtmp, il, c2r));
2196:   PetscCall(ISRestoreIndices(ip, &rip));
2197:   PetscCall(ISRestoreIndices(iip, &riip));

2199:   PetscCall(ISIdentity(ip, &perm_identity));
2200:   if (perm_identity) {
2201:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2202:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2203:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2204:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2205:   } else {
2206:     B->ops->solve          = MatSolve_SeqSBAIJ_1;
2207:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2208:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1;
2209:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1;
2210:   }

2212:   C->assembled    = PETSC_TRUE;
2213:   C->preallocated = PETSC_TRUE;

2215:   PetscCall(PetscLogFlops(C->rmap->n));

2217:   /* MatPivotView() */
2218:   if (sctx.nshift) {
2219:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2220:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2221:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2222:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2223:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2224:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2225:     }
2226:   }
2227:   PetscFunctionReturn(PETSC_SUCCESS);
2228: }

2230: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2231: {
2232:   Mat             C  = B;
2233:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
2234:   Mat_SeqSBAIJ   *b  = (Mat_SeqSBAIJ *)C->data;
2235:   IS              ip = b->row, iip = b->icol;
2236:   const PetscInt *rip, *riip;
2237:   PetscInt        i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2238:   PetscInt       *ai = a->i, *aj = a->j;
2239:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2240:   MatScalar      *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2241:   PetscBool       perm_identity;
2242:   FactorShiftCtx  sctx;
2243:   PetscReal       rs;
2244:   MatScalar       d, *v;

2246:   PetscFunctionBegin;
2247:   /* MatPivotSetUp(): initialize shift context sctx */
2248:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

2250:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2251:     sctx.shift_top = info->zeropivot;
2252:     for (i = 0; i < mbs; i++) {
2253:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2254:       d  = (aa)[a->diag[i]];
2255:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
2256:       v  = aa + ai[i];
2257:       nz = ai[i + 1] - ai[i];
2258:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2259:       if (rs > sctx.shift_top) sctx.shift_top = rs;
2260:     }
2261:     sctx.shift_top *= 1.1;
2262:     sctx.nshift_max = 5;
2263:     sctx.shift_lo   = 0.;
2264:     sctx.shift_hi   = 1.;
2265:   }

2267:   PetscCall(ISGetIndices(ip, &rip));
2268:   PetscCall(ISGetIndices(iip, &riip));

2270:   /* initialization */
2271:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

2273:   do {
2274:     sctx.newshift = PETSC_FALSE;

2276:     for (i = 0; i < mbs; i++) jl[i] = mbs;
2277:     il[0] = 0;

2279:     for (k = 0; k < mbs; k++) {
2280:       /* zero rtmp */
2281:       nz    = bi[k + 1] - bi[k];
2282:       bjtmp = bj + bi[k];
2283:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

2285:       bval = ba + bi[k];
2286:       /* initialize k-th row by the perm[k]-th row of A */
2287:       jmin = ai[rip[k]];
2288:       jmax = ai[rip[k] + 1];
2289:       for (j = jmin; j < jmax; j++) {
2290:         col = riip[aj[j]];
2291:         if (col >= k) { /* only take upper triangular entry */
2292:           rtmp[col] = aa[j];
2293:           *bval++   = 0.0; /* for in-place factorization */
2294:         }
2295:       }
2296:       /* shift the diagonal of the matrix */
2297:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

2299:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2300:       dk = rtmp[k];
2301:       i  = jl[k]; /* first row to be added to k_th row  */

2303:       while (i < k) {
2304:         nexti = jl[i]; /* next row to be added to k_th row */

2306:         /* compute multiplier, update diag(k) and U(i,k) */
2307:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
2308:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2309:         dk += uikdi * ba[ili];
2310:         ba[ili] = uikdi; /* -U(i,k) */

2312:         /* add multiple of row i to k-th row */
2313:         jmin = ili + 1;
2314:         jmax = bi[i + 1];
2315:         if (jmin < jmax) {
2316:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2317:           /* update il and jl for row i */
2318:           il[i] = jmin;
2319:           j     = bj[jmin];
2320:           jl[i] = jl[j];
2321:           jl[j] = i;
2322:         }
2323:         i = nexti;
2324:       }

2326:       /* shift the diagonals when zero pivot is detected */
2327:       /* compute rs=sum of abs(off-diagonal) */
2328:       rs   = 0.0;
2329:       jmin = bi[k] + 1;
2330:       nz   = bi[k + 1] - jmin;
2331:       bcol = bj + jmin;
2332:       for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);

2334:       sctx.rs = rs;
2335:       sctx.pv = dk;
2336:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2337:       if (sctx.newshift) break;
2338:       dk = sctx.pv;

2340:       /* copy data into U(k,:) */
2341:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2342:       jmin      = bi[k] + 1;
2343:       jmax      = bi[k + 1];
2344:       if (jmin < jmax) {
2345:         for (j = jmin; j < jmax; j++) {
2346:           col   = bj[j];
2347:           ba[j] = rtmp[col];
2348:         }
2349:         /* add the k-th row into il and jl */
2350:         il[k] = jmin;
2351:         i     = bj[jmin];
2352:         jl[k] = jl[i];
2353:         jl[i] = k;
2354:       }
2355:     }
2356:   } while (sctx.newshift);

2358:   PetscCall(PetscFree3(rtmp, il, jl));
2359:   PetscCall(ISRestoreIndices(ip, &rip));
2360:   PetscCall(ISRestoreIndices(iip, &riip));

2362:   PetscCall(ISIdentity(ip, &perm_identity));
2363:   if (perm_identity) {
2364:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368:   } else {
2369:     B->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
2370:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2371:     B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
2372:     B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
2373:   }

2375:   C->assembled    = PETSC_TRUE;
2376:   C->preallocated = PETSC_TRUE;

2378:   PetscCall(PetscLogFlops(C->rmap->n));
2379:   if (sctx.nshift) {
2380:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2382:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2384:     }
2385:   }
2386:   PetscFunctionReturn(PETSC_SUCCESS);
2387: }

2389: /*
2390:    icc() under revised new data structure.
2391:    Factored arrays bj and ba are stored as
2392:      U(0,:),...,U(i,:),U(n-1,:)

2394:    ui=fact->i is an array of size n+1, in which
2395:    ui+
2396:      ui[i]:  points to 1st entry of U(i,:),i=0,...,n-1
2397:      ui[n]:  points to U(n-1,n-1)+1

2399:   udiag=fact->diag is an array of size n,in which
2400:      udiag[i]: points to diagonal of U(i,:), i=0,...,n-1

2402:    U(i,:) contains udiag[i] as its last entry, i.e.,
2403:     U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404: */

2406: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2407: {
2408:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2409:   Mat_SeqSBAIJ      *b;
2410:   PetscBool          perm_identity, missing;
2411:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2412:   const PetscInt    *rip, *riip;
2413:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2414:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2415:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2416:   PetscReal          fill = info->fill, levels = info->levels;
2417:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2418:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2419:   PetscBT            lnkbt;
2420:   IS                 iperm;

2422:   PetscFunctionBegin;
2423:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2424:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2425:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2426:   PetscCall(ISIdentity(perm, &perm_identity));
2427:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2429:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2430:   PetscCall(PetscMalloc1(am + 1, &udiag));
2431:   ui[0] = 0;

2433:   /* ICC(0) without matrix ordering: simply rearrange column indices */
2434:   if (!levels && perm_identity) {
2435:     for (i = 0; i < am; i++) {
2436:       ncols     = ai[i + 1] - a->diag[i];
2437:       ui[i + 1] = ui[i] + ncols;
2438:       udiag[i]  = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2439:     }
2440:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2441:     cols = uj;
2442:     for (i = 0; i < am; i++) {
2443:       aj    = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2444:       ncols = ai[i + 1] - a->diag[i] - 1;
2445:       for (j = 0; j < ncols; j++) *cols++ = aj[j];
2446:       *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2447:     }
2448:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2449:     PetscCall(ISGetIndices(iperm, &riip));
2450:     PetscCall(ISGetIndices(perm, &rip));

2452:     /* initialization */
2453:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2455:     /* jl: linked list for storing indices of the pivot rows
2456:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2457:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2458:     for (i = 0; i < am; i++) {
2459:       jl[i] = am;
2460:       il[i] = 0;
2461:     }

2463:     /* create and initialize a linked list for storing column indices of the active row k */
2464:     nlnk = am + 1;
2465:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2467:     /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2468:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2469:     current_space = free_space;
2470:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2471:     current_space_lvl = free_space_lvl;

2473:     for (k = 0; k < am; k++) { /* for each active row k */
2474:       /* initialize lnk by the column indices of row rip[k] of A */
2475:       nzk   = 0;
2476:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2477:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2478:       ncols_upper = 0;
2479:       for (j = 0; j < ncols; j++) {
2480:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2481:         if (riip[i] >= k) {         /* only take upper triangular entry */
2482:           ajtmp[ncols_upper] = i;
2483:           ncols_upper++;
2484:         }
2485:       }
2486:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2487:       nzk += nlnk;

2489:       /* update lnk by computing fill-in for each pivot row to be merged in */
2490:       prow = jl[k]; /* 1st pivot row */

2492:       while (prow < k) {
2493:         nextprow = jl[prow];

2495:         /* merge prow into k-th row */
2496:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2497:         jmax  = ui[prow + 1];
2498:         ncols = jmax - jmin;
2499:         i     = jmin - ui[prow];
2500:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2501:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2502:         j     = *(uj - 1);
2503:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2504:         nzk += nlnk;

2506:         /* update il and jl for prow */
2507:         if (jmin < jmax) {
2508:           il[prow] = jmin;
2509:           j        = *cols;
2510:           jl[prow] = jl[j];
2511:           jl[j]    = prow;
2512:         }
2513:         prow = nextprow;
2514:       }

2516:       /* if free space is not available, make more free space */
2517:       if (current_space->local_remaining < nzk) {
2518:         i = am - k + 1;                                    /* num of unfactored rows */
2519:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2521:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2522:         reallocs++;
2523:       }

2525:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2526:       PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2527:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2529:       /* add the k-th row into il and jl */
2530:       if (nzk > 1) {
2531:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532:         jl[k] = jl[i];
2533:         jl[i] = k;
2534:         il[k] = ui[k] + 1;
2535:       }
2536:       uj_ptr[k]     = current_space->array;
2537:       uj_lvl_ptr[k] = current_space_lvl->array;

2539:       current_space->array += nzk;
2540:       current_space->local_used += nzk;
2541:       current_space->local_remaining -= nzk;

2543:       current_space_lvl->array += nzk;
2544:       current_space_lvl->local_used += nzk;
2545:       current_space_lvl->local_remaining -= nzk;

2547:       ui[k + 1] = ui[k] + nzk;
2548:     }

2550:     PetscCall(ISRestoreIndices(perm, &rip));
2551:     PetscCall(ISRestoreIndices(iperm, &riip));
2552:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2553:     PetscCall(PetscFree(ajtmp));

2555:     /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2556:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2557:     PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor  */
2558:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2559:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2561:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2563:   /* put together the new matrix in MATSEQSBAIJ format */
2564:   b          = (Mat_SeqSBAIJ *)fact->data;
2565:   b->free_ij = PETSC_TRUE;
2566:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2567:   b->free_a    = PETSC_TRUE;
2568:   b->j         = uj;
2569:   b->i         = ui;
2570:   b->diag      = udiag;
2571:   b->free_diag = PETSC_TRUE;
2572:   b->ilen      = NULL;
2573:   b->imax      = NULL;
2574:   b->row       = perm;
2575:   b->col       = perm;
2576:   PetscCall(PetscObjectReference((PetscObject)perm));
2577:   PetscCall(PetscObjectReference((PetscObject)perm));
2578:   b->icol          = iperm;
2579:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2581:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2583:   b->maxnz = b->nz = ui[am];

2585:   fact->info.factor_mallocs   = reallocs;
2586:   fact->info.fill_ratio_given = fill;
2587:   if (ai[am] != 0) {
2588:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2589:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2590:   } else {
2591:     fact->info.fill_ratio_needed = 0.0;
2592:   }
2593: #if defined(PETSC_USE_INFO)
2594:   if (ai[am] != 0) {
2595:     PetscReal af = fact->info.fill_ratio_needed;
2596:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2597:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2598:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2599:   } else {
2600:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2601:   }
2602: #endif
2603:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2604:   PetscFunctionReturn(PETSC_SUCCESS);
2605: }

2607: #if 0
2608: // unused
2609: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2610: {
2611:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2612:   Mat_SeqSBAIJ      *b;
2613:   PetscBool          perm_identity, missing;
2614:   PetscInt           reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2615:   const PetscInt    *rip, *riip;
2616:   PetscInt           jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2617:   PetscInt           nlnk, *lnk, *lnk_lvl = NULL, d;
2618:   PetscInt           ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2619:   PetscReal          fill = info->fill, levels = info->levels;
2620:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2621:   PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2622:   PetscBT            lnkbt;
2623:   IS                 iperm;

2625:   PetscFunctionBegin;
2626:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2627:   PetscCall(MatMissingDiagonal(A, &missing, &d));
2628:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2629:   PetscCall(ISIdentity(perm, &perm_identity));
2630:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));

2632:   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2633:   PetscCall(PetscMalloc1(am + 1, &udiag));
2634:   ui[0] = 0;

2636:   /* ICC(0) without matrix ordering: simply copies fill pattern */
2637:   if (!levels && perm_identity) {
2638:     for (i = 0; i < am; i++) {
2639:       ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2640:       udiag[i]  = ui[i];
2641:     }
2642:     PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2643:     cols = uj;
2644:     for (i = 0; i < am; i++) {
2645:       aj    = a->j + a->diag[i];
2646:       ncols = ui[i + 1] - ui[i];
2647:       for (j = 0; j < ncols; j++) *cols++ = *aj++;
2648:     }
2649:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2650:     PetscCall(ISGetIndices(iperm, &riip));
2651:     PetscCall(ISGetIndices(perm, &rip));

2653:     /* initialization */
2654:     PetscCall(PetscMalloc1(am + 1, &ajtmp));

2656:     /* jl: linked list for storing indices of the pivot rows
2657:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2658:     PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2659:     for (i = 0; i < am; i++) {
2660:       jl[i] = am;
2661:       il[i] = 0;
2662:     }

2664:     /* create and initialize a linked list for storing column indices of the active row k */
2665:     nlnk = am + 1;
2666:     PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));

2668:     /* initial FreeSpace size is fill*(ai[am]+1) */
2669:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2670:     current_space = free_space;
2671:     PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2672:     current_space_lvl = free_space_lvl;

2674:     for (k = 0; k < am; k++) { /* for each active row k */
2675:       /* initialize lnk by the column indices of row rip[k] of A */
2676:       nzk   = 0;
2677:       ncols = ai[rip[k] + 1] - ai[rip[k]];
2678:       PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2679:       ncols_upper = 0;
2680:       for (j = 0; j < ncols; j++) {
2681:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682:         if (riip[i] >= k) {         /* only take upper triangular entry */
2683:           ajtmp[ncols_upper] = i;
2684:           ncols_upper++;
2685:         }
2686:       }
2687:       PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2688:       nzk += nlnk;

2690:       /* update lnk by computing fill-in for each pivot row to be merged in */
2691:       prow = jl[k]; /* 1st pivot row */

2693:       while (prow < k) {
2694:         nextprow = jl[prow];

2696:         /* merge prow into k-th row */
2697:         jmin  = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698:         jmax  = ui[prow + 1];
2699:         ncols = jmax - jmin;
2700:         i     = jmin - ui[prow];
2701:         cols  = uj_ptr[prow] + i;     /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
2703:         j     = *(uj - 1);
2704:         PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2705:         nzk += nlnk;

2707:         /* update il and jl for prow */
2708:         if (jmin < jmax) {
2709:           il[prow] = jmin;
2710:           j        = *cols;
2711:           jl[prow] = jl[j];
2712:           jl[j]    = prow;
2713:         }
2714:         prow = nextprow;
2715:       }

2717:       /* if free space is not available, make more free space */
2718:       if (current_space->local_remaining < nzk) {
2719:         i = am - k + 1;                                    /* num of unfactored rows */
2720:         i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721:         PetscCall(PetscFreeSpaceGet(i, &current_space));
2722:         PetscCall(PetscFreeSpaceGet(i, &current_space_lvl));
2723:         reallocs++;
2724:       }

2726:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2727:       PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2728:       PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));

2730:       /* add the k-th row into il and jl */
2731:       if (nzk > 1) {
2732:         i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733:         jl[k] = jl[i];
2734:         jl[i] = k;
2735:         il[k] = ui[k] + 1;
2736:       }
2737:       uj_ptr[k]     = current_space->array;
2738:       uj_lvl_ptr[k] = current_space_lvl->array;

2740:       current_space->array += nzk;
2741:       current_space->local_used += nzk;
2742:       current_space->local_remaining -= nzk;

2744:       current_space_lvl->array += nzk;
2745:       current_space_lvl->local_used += nzk;
2746:       current_space_lvl->local_remaining -= nzk;

2748:       ui[k + 1] = ui[k] + nzk;
2749:     }

2751:   #if defined(PETSC_USE_INFO)
2752:     if (ai[am] != 0) {
2753:       PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2754:       PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2755:       PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2756:       PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2757:     } else {
2758:       PetscCall(PetscInfo(A, "Empty matrix\n"));
2759:     }
2760:   #endif

2762:     PetscCall(ISRestoreIndices(perm, &rip));
2763:     PetscCall(ISRestoreIndices(iperm, &riip));
2764:     PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2765:     PetscCall(PetscFree(ajtmp));

2767:     /* destroy list of free space and other temporary array(s) */
2768:     PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2769:     PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2770:     PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2771:     PetscCall(PetscFreeSpaceDestroy(free_space_lvl));

2773:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2775:   /* put together the new matrix in MATSEQSBAIJ format */

2777:   b               = (Mat_SeqSBAIJ *)fact->data;
2778:   b->free_ij       = PETSC_TRUE;
2779:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2780:   b->free_a        = PETSC_TRUE;
2781:   b->j         = uj;
2782:   b->i         = ui;
2783:   b->diag      = udiag;
2784:   b->free_diag = PETSC_TRUE;
2785:   b->ilen      = NULL;
2786:   b->imax      = NULL;
2787:   b->row       = perm;
2788:   b->col       = perm;

2790:   PetscCall(PetscObjectReference((PetscObject)perm));
2791:   PetscCall(PetscObjectReference((PetscObject)perm));

2793:   b->icol          = iperm;
2794:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2795:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2796:   b->maxnz = b->nz = ui[am];

2798:   fact->info.factor_mallocs   = reallocs;
2799:   fact->info.fill_ratio_given = fill;
2800:   if (ai[am] != 0) {
2801:     fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2802:   } else {
2803:     fact->info.fill_ratio_needed = 0.0;
2804:   }
2805:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2806:   PetscFunctionReturn(PETSC_SUCCESS);
2807: }
2808: #endif

2810: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2811: {
2812:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2813:   Mat_SeqSBAIJ      *b;
2814:   PetscBool          perm_identity, missing;
2815:   PetscReal          fill = info->fill;
2816:   const PetscInt    *rip, *riip;
2817:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2818:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2819:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2820:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2821:   PetscBT            lnkbt;
2822:   IS                 iperm;

2824:   PetscFunctionBegin;
2825:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2826:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2827:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2829:   /* check whether perm is the identity mapping */
2830:   PetscCall(ISIdentity(perm, &perm_identity));
2831:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2832:   PetscCall(ISGetIndices(iperm, &riip));
2833:   PetscCall(ISGetIndices(perm, &rip));

2835:   /* initialization */
2836:   PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2837:   PetscCall(PetscMalloc1(am + 1, &udiag));
2838:   ui[0] = 0;

2840:   /* jl: linked list for storing indices of the pivot rows
2841:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2842:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2843:   for (i = 0; i < am; i++) {
2844:     jl[i] = am;
2845:     il[i] = 0;
2846:   }

2848:   /* create and initialize a linked list for storing column indices of the active row k */
2849:   nlnk = am + 1;
2850:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

2852:   /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2853:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2854:   current_space = free_space;

2856:   for (k = 0; k < am; k++) { /* for each active row k */
2857:     /* initialize lnk by the column indices of row rip[k] of A */
2858:     nzk   = 0;
2859:     ncols = ai[rip[k] + 1] - ai[rip[k]];
2860:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2861:     ncols_upper = 0;
2862:     for (j = 0; j < ncols; j++) {
2863:       i = riip[*(aj + ai[rip[k]] + j)];
2864:       if (i >= k) { /* only take upper triangular entry */
2865:         cols[ncols_upper] = i;
2866:         ncols_upper++;
2867:       }
2868:     }
2869:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2870:     nzk += nlnk;

2872:     /* update lnk by computing fill-in for each pivot row to be merged in */
2873:     prow = jl[k]; /* 1st pivot row */

2875:     while (prow < k) {
2876:       nextprow = jl[prow];
2877:       /* merge prow into k-th row */
2878:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2879:       jmax   = ui[prow + 1];
2880:       ncols  = jmax - jmin;
2881:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2882:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2883:       nzk += nlnk;

2885:       /* update il and jl for prow */
2886:       if (jmin < jmax) {
2887:         il[prow] = jmin;
2888:         j        = *uj_ptr;
2889:         jl[prow] = jl[j];
2890:         jl[j]    = prow;
2891:       }
2892:       prow = nextprow;
2893:     }

2895:     /* if free space is not available, make more free space */
2896:     if (current_space->local_remaining < nzk) {
2897:       i = am - k + 1;                                    /* num of unfactored rows */
2898:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2899:       PetscCall(PetscFreeSpaceGet(i, &current_space));
2900:       reallocs++;
2901:     }

2903:     /* copy data into free space, then initialize lnk */
2904:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

2906:     /* add the k-th row into il and jl */
2907:     if (nzk > 1) {
2908:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2909:       jl[k] = jl[i];
2910:       jl[i] = k;
2911:       il[k] = ui[k] + 1;
2912:     }
2913:     ui_ptr[k] = current_space->array;

2915:     current_space->array += nzk;
2916:     current_space->local_used += nzk;
2917:     current_space->local_remaining -= nzk;

2919:     ui[k + 1] = ui[k] + nzk;
2920:   }

2922:   PetscCall(ISRestoreIndices(perm, &rip));
2923:   PetscCall(ISRestoreIndices(iperm, &riip));
2924:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

2926:   /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2927:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2928:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2929:   PetscCall(PetscLLDestroy(lnk, lnkbt));

2931:   /* put together the new matrix in MATSEQSBAIJ format */
2932:   b          = (Mat_SeqSBAIJ *)fact->data;
2933:   b->free_ij = PETSC_TRUE;
2934:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2935:   b->free_a    = PETSC_TRUE;
2936:   b->j         = uj;
2937:   b->i         = ui;
2938:   b->diag      = udiag;
2939:   b->free_diag = PETSC_TRUE;
2940:   b->ilen      = NULL;
2941:   b->imax      = NULL;
2942:   b->row       = perm;
2943:   b->col       = perm;

2945:   PetscCall(PetscObjectReference((PetscObject)perm));
2946:   PetscCall(PetscObjectReference((PetscObject)perm));

2948:   b->icol          = iperm;
2949:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

2951:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));

2953:   b->maxnz = b->nz = ui[am];

2955:   fact->info.factor_mallocs   = reallocs;
2956:   fact->info.fill_ratio_given = fill;
2957:   if (ai[am] != 0) {
2958:     /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2959:     fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2960:   } else {
2961:     fact->info.fill_ratio_needed = 0.0;
2962:   }
2963: #if defined(PETSC_USE_INFO)
2964:   if (ai[am] != 0) {
2965:     PetscReal af = fact->info.fill_ratio_needed;
2966:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2967:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2968:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2969:   } else {
2970:     PetscCall(PetscInfo(A, "Empty matrix\n"));
2971:   }
2972: #endif
2973:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2974:   PetscFunctionReturn(PETSC_SUCCESS);
2975: }

2977: #if 0
2978: // unused
2979: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2980: {
2981:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
2982:   Mat_SeqSBAIJ      *b;
2983:   PetscBool          perm_identity, missing;
2984:   PetscReal          fill = info->fill;
2985:   const PetscInt    *rip, *riip;
2986:   PetscInt           i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2987:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2988:   PetscInt           nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2989:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
2990:   PetscBT            lnkbt;
2991:   IS                 iperm;

2993:   PetscFunctionBegin;
2994:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2995:   PetscCall(MatMissingDiagonal(A, &missing, &i));
2996:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);

2998:   /* check whether perm is the identity mapping */
2999:   PetscCall(ISIdentity(perm, &perm_identity));
3000:   PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3001:   PetscCall(ISGetIndices(iperm, &riip));
3002:   PetscCall(ISGetIndices(perm, &rip));

3004:   /* initialization */
3005:   PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3006:   ui[0] = 0;

3008:   /* jl: linked list for storing indices of the pivot rows
3009:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3010:   PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3011:   for (i = 0; i < am; i++) {
3012:     jl[i] = am;
3013:     il[i] = 0;
3014:   }

3016:   /* create and initialize a linked list for storing column indices of the active row k */
3017:   nlnk = am + 1;
3018:   PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));

3020:   /* initial FreeSpace size is fill*(ai[am]+1) */
3021:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3022:   current_space = free_space;

3024:   for (k = 0; k < am; k++) { /* for each active row k */
3025:     /* initialize lnk by the column indices of row rip[k] of A */
3026:     nzk   = 0;
3027:     ncols = ai[rip[k] + 1] - ai[rip[k]];
3028:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3029:     ncols_upper = 0;
3030:     for (j = 0; j < ncols; j++) {
3031:       i = riip[*(aj + ai[rip[k]] + j)];
3032:       if (i >= k) { /* only take upper triangular entry */
3033:         cols[ncols_upper] = i;
3034:         ncols_upper++;
3035:       }
3036:     }
3037:     PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3038:     nzk += nlnk;

3040:     /* update lnk by computing fill-in for each pivot row to be merged in */
3041:     prow = jl[k]; /* 1st pivot row */

3043:     while (prow < k) {
3044:       nextprow = jl[prow];
3045:       /* merge prow into k-th row */
3046:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3047:       jmax   = ui[prow + 1];
3048:       ncols  = jmax - jmin;
3049:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3050:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3051:       nzk += nlnk;

3053:       /* update il and jl for prow */
3054:       if (jmin < jmax) {
3055:         il[prow] = jmin;
3056:         j        = *uj_ptr;
3057:         jl[prow] = jl[j];
3058:         jl[j]    = prow;
3059:       }
3060:       prow = nextprow;
3061:     }

3063:     /* if free space is not available, make more free space */
3064:     if (current_space->local_remaining < nzk) {
3065:       i = am - k + 1;                     /* num of unfactored rows */
3066:       i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3067:       PetscCall(PetscFreeSpaceGet(i, &current_space));
3068:       reallocs++;
3069:     }

3071:     /* copy data into free space, then initialize lnk */
3072:     PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));

3074:     /* add the k-th row into il and jl */
3075:     if (nzk - 1 > 0) {
3076:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3077:       jl[k] = jl[i];
3078:       jl[i] = k;
3079:       il[k] = ui[k] + 1;
3080:     }
3081:     ui_ptr[k] = current_space->array;

3083:     current_space->array += nzk;
3084:     current_space->local_used += nzk;
3085:     current_space->local_remaining -= nzk;

3087:     ui[k + 1] = ui[k] + nzk;
3088:   }

3090:   #if defined(PETSC_USE_INFO)
3091:   if (ai[am] != 0) {
3092:     PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3093:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3094:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3095:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3096:   } else {
3097:     PetscCall(PetscInfo(A, "Empty matrix\n"));
3098:   }
3099:   #endif

3101:   PetscCall(ISRestoreIndices(perm, &rip));
3102:   PetscCall(ISRestoreIndices(iperm, &riip));
3103:   PetscCall(PetscFree4(ui_ptr, jl, il, cols));

3105:   /* destroy list of free space and other temporary array(s) */
3106:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3107:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3108:   PetscCall(PetscLLDestroy(lnk, lnkbt));

3110:   /* put together the new matrix in MATSEQSBAIJ format */
3111:   b               = (Mat_SeqSBAIJ *)fact->data;
3112:   b->free_ij      = PETSC_TRUE;
3113:   PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3114:   b->free_a       = PETSC_TRUE;
3115:   b->j    = uj;
3116:   b->i    = ui;
3117:   b->diag = NULL;
3118:   b->ilen = NULL;
3119:   b->imax = NULL;
3120:   b->row  = perm;
3121:   b->col  = perm;

3123:   PetscCall(PetscObjectReference((PetscObject)perm));
3124:   PetscCall(PetscObjectReference((PetscObject)perm));

3126:   b->icol          = iperm;
3127:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

3129:   PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3130:   b->maxnz = b->nz = ui[am];

3132:   fact->info.factor_mallocs   = reallocs;
3133:   fact->info.fill_ratio_given = fill;
3134:   if (ai[am] != 0) {
3135:     fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3136:   } else {
3137:     fact->info.fill_ratio_needed = 0.0;
3138:   }
3139:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3140:   PetscFunctionReturn(PETSC_SUCCESS);
3141: }
3142: #endif

3144: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3145: {
3146:   Mat_SeqAIJ        *a  = (Mat_SeqAIJ *)A->data;
3147:   PetscInt           n  = A->rmap->n;
3148:   const PetscInt    *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3149:   PetscScalar       *x, sum;
3150:   const PetscScalar *b;
3151:   const MatScalar   *aa = a->a, *v;
3152:   PetscInt           i, nz;

3154:   PetscFunctionBegin;
3155:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3157:   PetscCall(VecGetArrayRead(bb, &b));
3158:   PetscCall(VecGetArrayWrite(xx, &x));

3160:   /* forward solve the lower triangular */
3161:   x[0] = b[0];
3162:   v    = aa;
3163:   vi   = aj;
3164:   for (i = 1; i < n; i++) {
3165:     nz  = ai[i + 1] - ai[i];
3166:     sum = b[i];
3167:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3168:     v += nz;
3169:     vi += nz;
3170:     x[i] = sum;
3171:   }

3173:   /* backward solve the upper triangular */
3174:   for (i = n - 1; i >= 0; i--) {
3175:     v   = aa + adiag[i + 1] + 1;
3176:     vi  = aj + adiag[i + 1] + 1;
3177:     nz  = adiag[i] - adiag[i + 1] - 1;
3178:     sum = x[i];
3179:     PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3180:     x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3181:   }

3183:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3184:   PetscCall(VecRestoreArrayRead(bb, &b));
3185:   PetscCall(VecRestoreArrayWrite(xx, &x));
3186:   PetscFunctionReturn(PETSC_SUCCESS);
3187: }

3189: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3190: {
3191:   Mat_SeqAIJ        *a     = (Mat_SeqAIJ *)A->data;
3192:   IS                 iscol = a->col, isrow = a->row;
3193:   PetscInt           i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3194:   const PetscInt    *rout, *cout, *r, *c;
3195:   PetscScalar       *x, *tmp, sum;
3196:   const PetscScalar *b;
3197:   const MatScalar   *aa = a->a, *v;

3199:   PetscFunctionBegin;
3200:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);

3202:   PetscCall(VecGetArrayRead(bb, &b));
3203:   PetscCall(VecGetArrayWrite(xx, &x));
3204:   tmp = a->solve_work;

3206:   PetscCall(ISGetIndices(isrow, &rout));
3207:   r = rout;
3208:   PetscCall(ISGetIndices(iscol, &cout));
3209:   c = cout;

3211:   /* forward solve the lower triangular */
3212:   tmp[0] = b[r[0]];
3213:   v      = aa;
3214:   vi     = aj;
3215:   for (i = 1; i < n; i++) {
3216:     nz  = ai[i + 1] - ai[i];
3217:     sum = b[r[i]];
3218:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3219:     tmp[i] = sum;
3220:     v += nz;
3221:     vi += nz;
3222:   }

3224:   /* backward solve the upper triangular */
3225:   for (i = n - 1; i >= 0; i--) {
3226:     v   = aa + adiag[i + 1] + 1;
3227:     vi  = aj + adiag[i + 1] + 1;
3228:     nz  = adiag[i] - adiag[i + 1] - 1;
3229:     sum = tmp[i];
3230:     PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3231:     x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3232:   }

3234:   PetscCall(ISRestoreIndices(isrow, &rout));
3235:   PetscCall(ISRestoreIndices(iscol, &cout));
3236:   PetscCall(VecRestoreArrayRead(bb, &b));
3237:   PetscCall(VecRestoreArrayWrite(xx, &x));
3238:   PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3239:   PetscFunctionReturn(PETSC_SUCCESS);
3240: }

3242: #if 0
3243: // unused
3244: /*
3245:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3246: */
3247: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3248: {
3249:   Mat             B = *fact;
3250:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b;
3251:   IS              isicol;
3252:   const PetscInt *r, *ic;
3253:   PetscInt        i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3254:   PetscInt       *bi, *bj, *bdiag, *bdiag_rev;
3255:   PetscInt        row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3256:   PetscInt        nlnk, *lnk;
3257:   PetscBT         lnkbt;
3258:   PetscBool       row_identity, icol_identity;
3259:   MatScalar      *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3260:   const PetscInt *ics;
3261:   PetscInt        j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3262:   PetscReal       dt = info->dt, shift = info->shiftamount;
3263:   PetscInt        dtcount = (PetscInt)info->dtcount, nnz_max;
3264:   PetscBool       missing;

3266:   PetscFunctionBegin;
3267:   if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3268:   if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);

3270:   /* symbolic factorization, can be reused */
3271:   PetscCall(MatMissingDiagonal(A, &missing, &i));
3272:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3273:   adiag = a->diag;

3275:   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));

3277:   /* bdiag is location of diagonal in factor */
3278:   PetscCall(PetscMalloc1(n + 1, &bdiag));     /* becomes b->diag */
3279:   PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */

3281:   /* allocate row pointers bi */
3282:   PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));

3284:   /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3285:   if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3286:   nnz_max = ai[n] + 2 * n * dtcount + 2;

3288:   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3289:   PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));

3291:   /* put together the new matrix */
3292:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3293:   b = (Mat_SeqAIJ *)B->data;
3294:   b->free_a       = PETSC_TRUE;
3295:   b->free_ij      = PETSC_TRUE;
3296:   b->a    = ba;
3297:   b->j    = bj;
3298:   b->i    = bi;
3299:   b->diag = bdiag;
3300:   b->ilen = NULL;
3301:   b->imax = NULL;
3302:   b->row  = isrow;
3303:   b->col  = iscol;
3304:   PetscCall(PetscObjectReference((PetscObject)isrow));
3305:   PetscCall(PetscObjectReference((PetscObject)iscol));
3306:   b->icol = isicol;

3308:   PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3309:   b->maxnz = nnz_max;

3311:   B->factortype            = MAT_FACTOR_ILUDT;
3312:   B->info.factor_mallocs   = 0;
3313:   B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3314:   /*  end of symbolic factorization */

3316:   PetscCall(ISGetIndices(isrow, &r));
3317:   PetscCall(ISGetIndices(isicol, &ic));
3318:   ics = ic;

3320:   /* linked list for storing column indices of the active row */
3321:   nlnk = n + 1;
3322:   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));

3324:   /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3325:   PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3326:   /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3327:   PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3328:   PetscCall(PetscArrayzero(rtmp, n));

3330:   bi[0]         = 0;
3331:   bdiag[0]      = nnz_max - 1; /* location of diag[0] in factor B */
3332:   bdiag_rev[n]  = bdiag[0];
3333:   bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3334:   for (i = 0; i < n; i++) {
3335:     /* copy initial fill into linked list */
3336:     nzi = ai[r[i] + 1] - ai[r[i]];
3337:     PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3338:     nzi_al = adiag[r[i]] - ai[r[i]];
3339:     nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3340:     ajtmp  = aj + ai[r[i]];
3341:     PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));

3343:     /* load in initial (unfactored row) */
3344:     aatmp = a->a + ai[r[i]];
3345:     for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;

3347:     /* add pivot rows into linked list */
3348:     row = lnk[n];
3349:     while (row < i) {
3350:       nzi_bl = bi[row + 1] - bi[row] + 1;
3351:       bjtmp  = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3352:       PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3353:       nzi += nlnk;
3354:       row = lnk[row];
3355:     }

3357:     /* copy data from lnk into jtmp, then initialize lnk */
3358:     PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));

3360:     /* numerical factorization */
3361:     bjtmp = jtmp;
3362:     row   = *bjtmp++; /* 1st pivot row */
3363:     while (row < i) {
3364:       pc         = rtmp + row;
3365:       pv         = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3366:       multiplier = (*pc) * (*pv);
3367:       *pc        = multiplier;
3368:       if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3369:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3370:         pv = ba + bdiag[row + 1] + 1;
3371:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3372:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3373:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3374:       }
3375:       row = *bjtmp++;
3376:     }

3378:     /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3379:     diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3380:     nzi_bl   = 0;
3381:     j        = 0;
3382:     while (jtmp[j] < i) { /* Note: jtmp is sorted */
3383:       vtmp[j]       = rtmp[jtmp[j]];
3384:       rtmp[jtmp[j]] = 0.0;
3385:       nzi_bl++;
3386:       j++;
3387:     }
3388:     nzi_bu = nzi - nzi_bl - 1;
3389:     while (j < nzi) {
3390:       vtmp[j]       = rtmp[jtmp[j]];
3391:       rtmp[jtmp[j]] = 0.0;
3392:       j++;
3393:     }

3395:     bjtmp = bj + bi[i];
3396:     batmp = ba + bi[i];
3397:     /* apply level dropping rule to L part */
3398:     ncut = nzi_al + dtcount;
3399:     if (ncut < nzi_bl) {
3400:       PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3401:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3402:     } else {
3403:       ncut = nzi_bl;
3404:     }
3405:     for (j = 0; j < ncut; j++) {
3406:       bjtmp[j] = jtmp[j];
3407:       batmp[j] = vtmp[j];
3408:     }
3409:     bi[i + 1] = bi[i] + ncut;
3410:     nzi       = ncut + 1;

3412:     /* apply level dropping rule to U part */
3413:     ncut = nzi_au + dtcount;
3414:     if (ncut < nzi_bu) {
3415:       PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3416:       PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3417:     } else {
3418:       ncut = nzi_bu;
3419:     }
3420:     nzi += ncut;

3422:     /* mark bdiagonal */
3423:     bdiag[i + 1]         = bdiag[i] - (ncut + 1);
3424:     bdiag_rev[n - i - 1] = bdiag[i + 1];
3425:     bi[2 * n - i]        = bi[2 * n - i + 1] - (ncut + 1);
3426:     bjtmp                = bj + bdiag[i];
3427:     batmp                = ba + bdiag[i];
3428:     *bjtmp               = i;
3429:     *batmp               = diag_tmp; /* rtmp[i]; */
3430:     if (*batmp == 0.0) *batmp = dt + shift;
3431:     *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */

3433:     bjtmp = bj + bdiag[i + 1] + 1;
3434:     batmp = ba + bdiag[i + 1] + 1;
3435:     for (k = 0; k < ncut; k++) {
3436:       bjtmp[k] = jtmp[nzi_bl + 1 + k];
3437:       batmp[k] = vtmp[nzi_bl + 1 + k];
3438:     }

3440:     im[i] = nzi; /* used by PetscLLAddSortedLU() */
3441:   }              /* for (i=0; i<n; i++) */
3442:   PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);

3444:   PetscCall(ISRestoreIndices(isrow, &r));
3445:   PetscCall(ISRestoreIndices(isicol, &ic));

3447:   PetscCall(PetscLLDestroy(lnk, lnkbt));
3448:   PetscCall(PetscFree2(im, jtmp));
3449:   PetscCall(PetscFree2(rtmp, vtmp));
3450:   PetscCall(PetscFree(bdiag_rev));

3452:   PetscCall(PetscLogFlops(B->cmap->n));
3453:   b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];

3455:   PetscCall(ISIdentity(isrow, &row_identity));
3456:   PetscCall(ISIdentity(isicol, &icol_identity));
3457:   if (row_identity && icol_identity) {
3458:     B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3459:   } else {
3460:     B->ops->solve = MatSolve_SeqAIJ;
3461:   }

3463:   B->ops->solveadd          = NULL;
3464:   B->ops->solvetranspose    = NULL;
3465:   B->ops->solvetransposeadd = NULL;
3466:   B->ops->matsolve          = NULL;
3467:   B->assembled              = PETSC_TRUE;
3468:   B->preallocated           = PETSC_TRUE;
3469:   PetscFunctionReturn(PETSC_SUCCESS);
3470: }

3472: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3473: /*
3474:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3475: */

3477: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3478: {
3479:   PetscFunctionBegin;
3480:   PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3481:   PetscFunctionReturn(PETSC_SUCCESS);
3482: }

3484: /*
3485:    same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3486:    - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3487: */
3488: /*
3489:     This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3490: */

3492: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3493: {
3494:   Mat             C = fact;
3495:   Mat_SeqAIJ     *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3496:   IS              isrow = b->row, isicol = b->icol;
3497:   const PetscInt *r, *ic, *ics;
3498:   PetscInt        i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3499:   PetscInt       *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3500:   MatScalar      *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3501:   PetscReal       dt = info->dt, shift = info->shiftamount;
3502:   PetscBool       row_identity, col_identity;

3504:   PetscFunctionBegin;
3505:   PetscCall(ISGetIndices(isrow, &r));
3506:   PetscCall(ISGetIndices(isicol, &ic));
3507:   PetscCall(PetscMalloc1(n + 1, &rtmp));
3508:   ics = ic;

3510:   for (i = 0; i < n; i++) {
3511:     /* initialize rtmp array */
3512:     nzl   = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3513:     bjtmp = bj + bi[i];
3514:     for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3515:     rtmp[i] = 0.0;
3516:     nzu     = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3517:     bjtmp   = bj + bdiag[i + 1] + 1;
3518:     for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;

3520:     /* load in initial unfactored row of A */
3521:     nz    = ai[r[i] + 1] - ai[r[i]];
3522:     ajtmp = aj + ai[r[i]];
3523:     v     = aa + ai[r[i]];
3524:     for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];

3526:     /* numerical factorization */
3527:     bjtmp = bj + bi[i];        /* point to 1st entry of L(i,:) */
3528:     nzl   = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3529:     k     = 0;
3530:     while (k < nzl) {
3531:       row        = *bjtmp++;
3532:       pc         = rtmp + row;
3533:       pv         = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3534:       multiplier = (*pc) * (*pv);
3535:       *pc        = multiplier;
3536:       if (PetscAbsScalar(multiplier) > dt) {
3537:         pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3538:         pv = b->a + bdiag[row + 1] + 1;
3539:         nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3540:         for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3541:         PetscCall(PetscLogFlops(1 + 2.0 * nz));
3542:       }
3543:       k++;
3544:     }

3546:     /* finished row so stick it into b->a */
3547:     /* L-part */
3548:     pv  = b->a + bi[i];
3549:     pj  = bj + bi[i];
3550:     nzl = bi[i + 1] - bi[i];
3551:     for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];

3553:     /* diagonal: invert diagonal entries for simpler triangular solves */
3554:     if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3555:     b->a[bdiag[i]] = 1.0 / rtmp[i];

3557:     /* U-part */
3558:     pv  = b->a + bdiag[i + 1] + 1;
3559:     pj  = bj + bdiag[i + 1] + 1;
3560:     nzu = bdiag[i] - bdiag[i + 1] - 1;
3561:     for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3562:   }

3564:   PetscCall(PetscFree(rtmp));
3565:   PetscCall(ISRestoreIndices(isicol, &ic));
3566:   PetscCall(ISRestoreIndices(isrow, &r));

3568:   PetscCall(ISIdentity(isrow, &row_identity));
3569:   PetscCall(ISIdentity(isicol, &col_identity));
3570:   if (row_identity && col_identity) {
3571:     C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3572:   } else {
3573:     C->ops->solve = MatSolve_SeqAIJ;
3574:   }
3575:   C->ops->solveadd          = NULL;
3576:   C->ops->solvetranspose    = NULL;
3577:   C->ops->solvetransposeadd = NULL;
3578:   C->ops->matsolve          = NULL;
3579:   C->assembled              = PETSC_TRUE;
3580:   C->preallocated           = PETSC_TRUE;

3582:   PetscCall(PetscLogFlops(C->cmap->n));
3583:   PetscFunctionReturn(PETSC_SUCCESS);
3584: }
3585: #endif