Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: /*
7: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10: */
11: #if 0
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: PetscFunctionBegin;
22: /* pick initial row */
23: best = -1;
24: for (i = 0; i < n; i++) {
25: future = 0.0;
26: for (j = ai[i]; j < ai[i + 1]; j++) {
27: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28: else past = PetscAbsScalar(aa[j]);
29: }
30: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31: if (past / future > best) {
32: best = past / future;
33: current = i;
34: }
35: }
37: PetscCall(PetscMalloc1(n, &done));
38: PetscCall(PetscArrayzero(done, n));
39: PetscCall(PetscMalloc1(n, &order));
40: order[0] = current;
41: for (i = 0; i < n - 1; i++) {
42: done[current] = PETSC_TRUE;
43: best = -1;
44: /* loop over all neighbors of current pivot */
45: for (j = ai[current]; j < ai[current + 1]; j++) {
46: jj = aj[j];
47: if (done[jj]) continue;
48: /* loop over columns of potential next row computing weights for below and above diagonal */
49: past = future = 0.0;
50: for (k = ai[jj]; k < ai[jj + 1]; k++) {
51: kk = aj[k];
52: if (done[kk]) past += PetscAbsScalar(aa[k]);
53: else if (kk != jj) future += PetscAbsScalar(aa[k]);
54: }
55: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56: if (past / future > best) {
57: best = past / future;
58: newcurrent = jj;
59: }
60: }
61: if (best == -1) { /* no neighbors to select from so select best of all that remain */
62: best = -1;
63: for (k = 0; k < n; k++) {
64: if (done[k]) continue;
65: future = 0.0;
66: past = 0.0;
67: for (j = ai[k]; j < ai[k + 1]; j++) {
68: kk = aj[j];
69: if (done[kk]) past += PetscAbsScalar(aa[j]);
70: else if (kk != k) future += PetscAbsScalar(aa[j]);
71: }
72: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73: if (past / future > best) {
74: best = past / future;
75: newcurrent = k;
76: }
77: }
78: }
79: PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80: current = newcurrent;
81: order[i + 1] = current;
82: }
83: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84: *icol = *irow;
85: PetscCall(PetscObjectReference((PetscObject)*irow));
86: PetscCall(PetscFree(done));
87: PetscCall(PetscFree(order));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #endif
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93: {
94: PetscFunctionBegin;
95: *type = MATSOLVERPETSC;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101: PetscInt n = A->rmap->n;
103: PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105: if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107: *B = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: #endif
112: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113: PetscCall(MatSetSizes(*B, n, n, n, n));
114: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115: PetscCall(MatSetType(*B, MATSEQAIJ));
117: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
120: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125: PetscCall(MatSetType(*B, MATSEQSBAIJ));
126: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
128: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
129: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133: (*B)->factortype = ftype;
135: PetscCall(PetscFree((*B)->solvertype));
136: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137: (*B)->canuseordering = PETSC_TRUE;
138: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
147: IS isicol;
148: const PetscInt *r, *ic;
149: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150: PetscInt *bi, *bj, *ajtmp;
151: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152: PetscReal f;
153: PetscInt nlnk, *lnk, k, **bi_ptr;
154: PetscFreeSpaceList free_space = NULL, current_space = NULL;
155: PetscBT lnkbt;
156: PetscBool missing;
158: PetscFunctionBegin;
159: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160: PetscCall(MatMissingDiagonal(A, &missing, &i));
161: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
163: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164: PetscCall(ISGetIndices(isrow, &r));
165: PetscCall(ISGetIndices(isicol, &ic));
167: /* get new row pointers */
168: PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169: bi[0] = 0;
171: /* bdiag is location of diagonal in factor */
172: PetscCall(PetscMalloc1(n + 1, &bdiag));
173: bdiag[0] = 0;
175: /* linked list for storing column indices of the active row */
176: nlnk = n + 1;
177: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
180: /* initial FreeSpace size is f*(ai[n]+1) */
181: f = info->fill;
182: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184: current_space = free_space;
186: for (i = 0; i < n; i++) {
187: /* copy previous fill into linked list */
188: nzi = 0;
189: nnz = ai[r[i] + 1] - ai[r[i]];
190: ajtmp = aj + ai[r[i]];
191: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192: nzi += nlnk;
194: /* add pivot rows into linked list */
195: row = lnk[n];
196: while (row < i) {
197: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
199: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200: nzi += nlnk;
201: row = lnk[row];
202: }
203: bi[i + 1] = bi[i] + nzi;
204: im[i] = nzi;
206: /* mark bdiag */
207: nzbd = 0;
208: nnz = nzi;
209: k = lnk[n];
210: while (nnz-- && k < i) {
211: nzbd++;
212: k = lnk[k];
213: }
214: bdiag[i] = bi[i] + nzbd;
216: /* if free space is not available, make more free space */
217: if (current_space->local_remaining < nzi) {
218: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
220: reallocs++;
221: }
223: /* copy data into free space, then initialize lnk */
224: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
226: bi_ptr[i] = current_space->array;
227: current_space->array += nzi;
228: current_space->local_used += nzi;
229: current_space->local_remaining -= nzi;
230: }
231: #if defined(PETSC_USE_INFO)
232: if (ai[n] != 0) {
233: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237: PetscCall(PetscInfo(A, "for best performance.\n"));
238: } else {
239: PetscCall(PetscInfo(A, "Empty matrix\n"));
240: }
241: #endif
243: PetscCall(ISRestoreIndices(isrow, &r));
244: PetscCall(ISRestoreIndices(isicol, &ic));
246: /* destroy list of free space and other temporary array(s) */
247: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249: PetscCall(PetscLLDestroy(lnk, lnkbt));
250: PetscCall(PetscFree2(bi_ptr, im));
252: /* put together the new matrix */
253: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254: b = (Mat_SeqAIJ *)B->data;
255: b->free_ij = PETSC_TRUE;
256: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257: b->free_a = PETSC_TRUE;
258: b->j = bj;
259: b->i = bi;
260: b->diag = bdiag;
261: b->ilen = NULL;
262: b->imax = NULL;
263: b->row = isrow;
264: b->col = iscol;
265: PetscCall(PetscObjectReference((PetscObject)isrow));
266: PetscCall(PetscObjectReference((PetscObject)iscol));
267: b->icol = isicol;
268: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
270: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
271: b->maxnz = b->nz = bi[n];
273: B->factortype = MAT_FACTOR_LU;
274: B->info.factor_mallocs = reallocs;
275: B->info.fill_ratio_given = f;
277: if (ai[n]) {
278: B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279: } else {
280: B->info.fill_ratio_needed = 0.0;
281: }
282: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
286: #endif
288: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289: {
290: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
291: IS isicol;
292: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293: PetscInt i, n = A->rmap->n;
294: PetscInt *bi, *bj;
295: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296: PetscReal f;
297: PetscInt nlnk, *lnk, k, **bi_ptr;
298: PetscFreeSpaceList free_space = NULL, current_space = NULL;
299: PetscBT lnkbt;
300: PetscBool missing;
302: PetscFunctionBegin;
303: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304: PetscCall(MatMissingDiagonal(A, &missing, &i));
305: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
307: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308: PetscCall(ISGetIndices(isrow, &r));
309: PetscCall(ISGetIndices(isicol, &ic));
311: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313: PetscCall(PetscMalloc1(n + 1, &bdiag));
314: bi[0] = bdiag[0] = 0;
316: /* linked list for storing column indices of the active row */
317: nlnk = n + 1;
318: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
320: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
322: /* initial FreeSpace size is f*(ai[n]+1) */
323: f = info->fill;
324: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326: current_space = free_space;
328: for (i = 0; i < n; i++) {
329: /* copy previous fill into linked list */
330: nzi = 0;
331: nnz = ai[r[i] + 1] - ai[r[i]];
332: ajtmp = aj + ai[r[i]];
333: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334: nzi += nlnk;
336: /* add pivot rows into linked list */
337: row = lnk[n];
338: while (row < i) {
339: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342: nzi += nlnk;
343: row = lnk[row];
344: }
345: bi[i + 1] = bi[i] + nzi;
346: im[i] = nzi;
348: /* mark bdiag */
349: nzbd = 0;
350: nnz = nzi;
351: k = lnk[n];
352: while (nnz-- && k < i) {
353: nzbd++;
354: k = lnk[k];
355: }
356: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
358: /* if free space is not available, make more free space */
359: if (current_space->local_remaining < nzi) {
360: /* estimated additional space needed */
361: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
363: reallocs++;
364: }
366: /* copy data into free space, then initialize lnk */
367: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
369: bi_ptr[i] = current_space->array;
370: current_space->array += nzi;
371: current_space->local_used += nzi;
372: current_space->local_remaining -= nzi;
373: }
375: PetscCall(ISRestoreIndices(isrow, &r));
376: PetscCall(ISRestoreIndices(isicol, &ic));
378: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379: PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381: PetscCall(PetscLLDestroy(lnk, lnkbt));
382: PetscCall(PetscFree2(bi_ptr, im));
384: /* put together the new matrix */
385: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386: b = (Mat_SeqAIJ *)B->data;
387: b->free_ij = PETSC_TRUE;
388: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389: b->free_a = PETSC_TRUE;
390: b->j = bj;
391: b->i = bi;
392: b->diag = bdiag;
393: b->ilen = NULL;
394: b->imax = NULL;
395: b->row = isrow;
396: b->col = iscol;
397: PetscCall(PetscObjectReference((PetscObject)isrow));
398: PetscCall(PetscObjectReference((PetscObject)iscol));
399: b->icol = isicol;
400: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
402: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
403: b->maxnz = b->nz = bdiag[0] + 1;
405: B->factortype = MAT_FACTOR_LU;
406: B->info.factor_mallocs = reallocs;
407: B->info.fill_ratio_given = f;
409: if (ai[n]) {
410: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411: } else {
412: B->info.fill_ratio_needed = 0.0;
413: }
414: #if defined(PETSC_USE_INFO)
415: if (ai[n] != 0) {
416: PetscReal af = B->info.fill_ratio_needed;
417: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420: PetscCall(PetscInfo(A, "for best performance.\n"));
421: } else {
422: PetscCall(PetscInfo(A, "Empty matrix\n"));
423: }
424: #endif
425: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428: PetscFunctionReturn(PETSC_SUCCESS);
429: }
431: /*
432: Trouble in factorization, should we dump the original matrix?
433: */
434: PetscErrorCode MatFactorDumpMatrix(Mat A)
435: {
436: PetscBool flg = PETSC_FALSE;
438: PetscFunctionBegin;
439: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440: if (flg) {
441: PetscViewer viewer;
442: char filename[PETSC_MAX_PATH_LEN];
444: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446: PetscCall(MatView(A, viewer));
447: PetscCall(PetscViewerDestroy(&viewer));
448: }
449: PetscFunctionReturn(PETSC_SUCCESS);
450: }
452: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453: {
454: Mat C = B;
455: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456: IS isrow = b->row, isicol = b->icol;
457: const PetscInt *r, *ic, *ics;
458: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459: PetscInt i, j, k, nz, nzL, row, *pj;
460: const PetscInt *ajtmp, *bjtmp;
461: MatScalar *rtmp, *pc, multiplier, *pv;
462: const MatScalar *aa = a->a, *v;
463: PetscBool row_identity, col_identity;
464: FactorShiftCtx sctx;
465: const PetscInt *ddiag;
466: PetscReal rs;
467: MatScalar d;
469: PetscFunctionBegin;
470: /* MatPivotSetUp(): initialize shift context sctx */
471: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
473: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
474: ddiag = a->diag;
475: sctx.shift_top = info->zeropivot;
476: for (i = 0; i < n; i++) {
477: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
478: d = (aa)[ddiag[i]];
479: rs = -PetscAbsScalar(d) - PetscRealPart(d);
480: v = aa + ai[i];
481: nz = ai[i + 1] - ai[i];
482: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
483: if (rs > sctx.shift_top) sctx.shift_top = rs;
484: }
485: sctx.shift_top *= 1.1;
486: sctx.nshift_max = 5;
487: sctx.shift_lo = 0.;
488: sctx.shift_hi = 1.;
489: }
491: PetscCall(ISGetIndices(isrow, &r));
492: PetscCall(ISGetIndices(isicol, &ic));
493: PetscCall(PetscMalloc1(n + 1, &rtmp));
494: ics = ic;
496: do {
497: sctx.newshift = PETSC_FALSE;
498: for (i = 0; i < n; i++) {
499: /* zero rtmp */
500: /* L part */
501: nz = bi[i + 1] - bi[i];
502: bjtmp = bj + bi[i];
503: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
505: /* U part */
506: nz = bdiag[i] - bdiag[i + 1];
507: bjtmp = bj + bdiag[i + 1] + 1;
508: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
510: /* load in initial (unfactored row) */
511: nz = ai[r[i] + 1] - ai[r[i]];
512: ajtmp = aj + ai[r[i]];
513: v = aa + ai[r[i]];
514: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
515: /* ZeropivotApply() */
516: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
518: /* elimination */
519: bjtmp = bj + bi[i];
520: row = *bjtmp++;
521: nzL = bi[i + 1] - bi[i];
522: for (k = 0; k < nzL; k++) {
523: pc = rtmp + row;
524: if (*pc != 0.0) {
525: pv = b->a + bdiag[row];
526: multiplier = *pc * (*pv);
527: *pc = multiplier;
529: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
530: pv = b->a + bdiag[row + 1] + 1;
531: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
533: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
534: PetscCall(PetscLogFlops(1 + 2.0 * nz));
535: }
536: row = *bjtmp++;
537: }
539: /* finished row so stick it into b->a */
540: rs = 0.0;
541: /* L part */
542: pv = b->a + bi[i];
543: pj = b->j + bi[i];
544: nz = bi[i + 1] - bi[i];
545: for (j = 0; j < nz; j++) {
546: pv[j] = rtmp[pj[j]];
547: rs += PetscAbsScalar(pv[j]);
548: }
550: /* U part */
551: pv = b->a + bdiag[i + 1] + 1;
552: pj = b->j + bdiag[i + 1] + 1;
553: nz = bdiag[i] - bdiag[i + 1] - 1;
554: for (j = 0; j < nz; j++) {
555: pv[j] = rtmp[pj[j]];
556: rs += PetscAbsScalar(pv[j]);
557: }
559: sctx.rs = rs;
560: sctx.pv = rtmp[i];
561: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
562: if (sctx.newshift) break; /* break for-loop */
563: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
565: /* Mark diagonal and invert diagonal for simpler triangular solves */
566: pv = b->a + bdiag[i];
567: *pv = 1.0 / rtmp[i];
569: } /* endof for (i=0; i<n; i++) { */
571: /* MatPivotRefine() */
572: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
573: /*
574: * if no shift in this attempt & shifting & started shifting & can refine,
575: * then try lower shift
576: */
577: sctx.shift_hi = sctx.shift_fraction;
578: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
579: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
580: sctx.newshift = PETSC_TRUE;
581: sctx.nshift++;
582: }
583: } while (sctx.newshift);
585: PetscCall(PetscFree(rtmp));
586: PetscCall(ISRestoreIndices(isicol, &ic));
587: PetscCall(ISRestoreIndices(isrow, &r));
589: PetscCall(ISIdentity(isrow, &row_identity));
590: PetscCall(ISIdentity(isicol, &col_identity));
591: if (b->inode.size) {
592: C->ops->solve = MatSolve_SeqAIJ_Inode;
593: } else if (row_identity && col_identity) {
594: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
595: } else {
596: C->ops->solve = MatSolve_SeqAIJ;
597: }
598: C->ops->solveadd = MatSolveAdd_SeqAIJ;
599: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
600: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
601: C->ops->matsolve = MatMatSolve_SeqAIJ;
602: C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
603: C->assembled = PETSC_TRUE;
604: C->preallocated = PETSC_TRUE;
606: PetscCall(PetscLogFlops(C->cmap->n));
608: /* MatShiftView(A,info,&sctx) */
609: if (sctx.nshift) {
610: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
611: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
612: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
613: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
614: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
615: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
616: }
617: }
618: PetscFunctionReturn(PETSC_SUCCESS);
619: }
621: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
622: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
623: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
625: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
626: {
627: Mat C = B;
628: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
629: IS isrow = b->row, isicol = b->icol;
630: const PetscInt *r, *ic, *ics;
631: PetscInt nz, row, i, j, n = A->rmap->n, diag;
632: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
633: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
634: MatScalar *pv, *rtmp, *pc, multiplier, d;
635: const MatScalar *v, *aa = a->a;
636: PetscReal rs = 0.0;
637: FactorShiftCtx sctx;
638: const PetscInt *ddiag;
639: PetscBool row_identity, col_identity;
641: PetscFunctionBegin;
642: /* MatPivotSetUp(): initialize shift context sctx */
643: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
645: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
646: ddiag = a->diag;
647: sctx.shift_top = info->zeropivot;
648: for (i = 0; i < n; i++) {
649: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
650: d = (aa)[ddiag[i]];
651: rs = -PetscAbsScalar(d) - PetscRealPart(d);
652: v = aa + ai[i];
653: nz = ai[i + 1] - ai[i];
654: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
655: if (rs > sctx.shift_top) sctx.shift_top = rs;
656: }
657: sctx.shift_top *= 1.1;
658: sctx.nshift_max = 5;
659: sctx.shift_lo = 0.;
660: sctx.shift_hi = 1.;
661: }
663: PetscCall(ISGetIndices(isrow, &r));
664: PetscCall(ISGetIndices(isicol, &ic));
665: PetscCall(PetscMalloc1(n + 1, &rtmp));
666: ics = ic;
668: do {
669: sctx.newshift = PETSC_FALSE;
670: for (i = 0; i < n; i++) {
671: nz = bi[i + 1] - bi[i];
672: bjtmp = bj + bi[i];
673: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
675: /* load in initial (unfactored row) */
676: nz = ai[r[i] + 1] - ai[r[i]];
677: ajtmp = aj + ai[r[i]];
678: v = aa + ai[r[i]];
679: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
680: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
682: row = *bjtmp++;
683: while (row < i) {
684: pc = rtmp + row;
685: if (*pc != 0.0) {
686: pv = b->a + diag_offset[row];
687: pj = b->j + diag_offset[row] + 1;
688: multiplier = *pc / *pv++;
689: *pc = multiplier;
690: nz = bi[row + 1] - diag_offset[row] - 1;
691: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
692: PetscCall(PetscLogFlops(1 + 2.0 * nz));
693: }
694: row = *bjtmp++;
695: }
696: /* finished row so stick it into b->a */
697: pv = b->a + bi[i];
698: pj = b->j + bi[i];
699: nz = bi[i + 1] - bi[i];
700: diag = diag_offset[i] - bi[i];
701: rs = 0.0;
702: for (j = 0; j < nz; j++) {
703: pv[j] = rtmp[pj[j]];
704: rs += PetscAbsScalar(pv[j]);
705: }
706: rs -= PetscAbsScalar(pv[diag]);
708: sctx.rs = rs;
709: sctx.pv = pv[diag];
710: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
711: if (sctx.newshift) break;
712: pv[diag] = sctx.pv;
713: }
715: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
716: /*
717: * if no shift in this attempt & shifting & started shifting & can refine,
718: * then try lower shift
719: */
720: sctx.shift_hi = sctx.shift_fraction;
721: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
722: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
723: sctx.newshift = PETSC_TRUE;
724: sctx.nshift++;
725: }
726: } while (sctx.newshift);
728: /* invert diagonal entries for simpler triangular solves */
729: for (i = 0; i < n; i++) b->a[diag_offset[i]] = 1.0 / b->a[diag_offset[i]];
730: PetscCall(PetscFree(rtmp));
731: PetscCall(ISRestoreIndices(isicol, &ic));
732: PetscCall(ISRestoreIndices(isrow, &r));
734: PetscCall(ISIdentity(isrow, &row_identity));
735: PetscCall(ISIdentity(isicol, &col_identity));
736: if (row_identity && col_identity) {
737: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
738: } else {
739: C->ops->solve = MatSolve_SeqAIJ_inplace;
740: }
741: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
742: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
743: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
744: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
745: C->ops->matsolvetranspose = NULL;
747: C->assembled = PETSC_TRUE;
748: C->preallocated = PETSC_TRUE;
750: PetscCall(PetscLogFlops(C->cmap->n));
751: if (sctx.nshift) {
752: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
753: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
754: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
755: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
756: }
757: }
758: C->ops->solve = MatSolve_SeqAIJ_inplace;
759: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
761: PetscCall(MatSeqAIJCheckInode(C));
762: PetscFunctionReturn(PETSC_SUCCESS);
763: }
765: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
767: /*
768: This routine implements inplace ILU(0) with row or/and column permutations.
769: Input:
770: A - original matrix
771: Output;
772: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
773: a->j (col index) is permuted by the inverse of colperm, then sorted
774: a->a reordered accordingly with a->j
775: a->diag (ptr to diagonal elements) is updated.
776: */
777: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
778: {
779: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
780: IS isrow = a->row, isicol = a->icol;
781: const PetscInt *r, *ic, *ics;
782: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
783: PetscInt *ajtmp, nz, row;
784: PetscInt *diag = a->diag, nbdiag, *pj;
785: PetscScalar *rtmp, *pc, multiplier, d;
786: MatScalar *pv, *v;
787: PetscReal rs;
788: FactorShiftCtx sctx;
789: const MatScalar *aa = a->a, *vtmp;
791: PetscFunctionBegin;
792: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
794: /* MatPivotSetUp(): initialize shift context sctx */
795: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
797: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
798: const PetscInt *ddiag = a->diag;
799: sctx.shift_top = info->zeropivot;
800: for (i = 0; i < n; i++) {
801: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
802: d = (aa)[ddiag[i]];
803: rs = -PetscAbsScalar(d) - PetscRealPart(d);
804: vtmp = aa + ai[i];
805: nz = ai[i + 1] - ai[i];
806: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
807: if (rs > sctx.shift_top) sctx.shift_top = rs;
808: }
809: sctx.shift_top *= 1.1;
810: sctx.nshift_max = 5;
811: sctx.shift_lo = 0.;
812: sctx.shift_hi = 1.;
813: }
815: PetscCall(ISGetIndices(isrow, &r));
816: PetscCall(ISGetIndices(isicol, &ic));
817: PetscCall(PetscMalloc1(n + 1, &rtmp));
818: PetscCall(PetscArrayzero(rtmp, n + 1));
819: ics = ic;
821: #if defined(MV)
822: sctx.shift_top = 0.;
823: sctx.nshift_max = 0;
824: sctx.shift_lo = 0.;
825: sctx.shift_hi = 0.;
826: sctx.shift_fraction = 0.;
828: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
829: sctx.shift_top = 0.;
830: for (i = 0; i < n; i++) {
831: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
832: d = (a->a)[diag[i]];
833: rs = -PetscAbsScalar(d) - PetscRealPart(d);
834: v = a->a + ai[i];
835: nz = ai[i + 1] - ai[i];
836: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
837: if (rs > sctx.shift_top) sctx.shift_top = rs;
838: }
839: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
840: sctx.shift_top *= 1.1;
841: sctx.nshift_max = 5;
842: sctx.shift_lo = 0.;
843: sctx.shift_hi = 1.;
844: }
846: sctx.shift_amount = 0.;
847: sctx.nshift = 0;
848: #endif
850: do {
851: sctx.newshift = PETSC_FALSE;
852: for (i = 0; i < n; i++) {
853: /* load in initial unfactored row */
854: nz = ai[r[i] + 1] - ai[r[i]];
855: ajtmp = aj + ai[r[i]];
856: v = a->a + ai[r[i]];
857: /* sort permuted ajtmp and values v accordingly */
858: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
859: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
861: diag[r[i]] = ai[r[i]];
862: for (j = 0; j < nz; j++) {
863: rtmp[ajtmp[j]] = v[j];
864: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
865: }
866: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
868: row = *ajtmp++;
869: while (row < i) {
870: pc = rtmp + row;
871: if (*pc != 0.0) {
872: pv = a->a + diag[r[row]];
873: pj = aj + diag[r[row]] + 1;
875: multiplier = *pc / *pv++;
876: *pc = multiplier;
877: nz = ai[r[row] + 1] - diag[r[row]] - 1;
878: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
879: PetscCall(PetscLogFlops(1 + 2.0 * nz));
880: }
881: row = *ajtmp++;
882: }
883: /* finished row so overwrite it onto a->a */
884: pv = a->a + ai[r[i]];
885: pj = aj + ai[r[i]];
886: nz = ai[r[i] + 1] - ai[r[i]];
887: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
889: rs = 0.0;
890: for (j = 0; j < nz; j++) {
891: pv[j] = rtmp[pj[j]];
892: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
893: }
895: sctx.rs = rs;
896: sctx.pv = pv[nbdiag];
897: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
898: if (sctx.newshift) break;
899: pv[nbdiag] = sctx.pv;
900: }
902: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
903: /*
904: * if no shift in this attempt & shifting & started shifting & can refine,
905: * then try lower shift
906: */
907: sctx.shift_hi = sctx.shift_fraction;
908: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
909: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
910: sctx.newshift = PETSC_TRUE;
911: sctx.nshift++;
912: }
913: } while (sctx.newshift);
915: /* invert diagonal entries for simpler triangular solves */
916: for (i = 0; i < n; i++) a->a[diag[r[i]]] = 1.0 / a->a[diag[r[i]]];
918: PetscCall(PetscFree(rtmp));
919: PetscCall(ISRestoreIndices(isicol, &ic));
920: PetscCall(ISRestoreIndices(isrow, &r));
922: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
923: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
924: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
925: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
927: A->assembled = PETSC_TRUE;
928: A->preallocated = PETSC_TRUE;
930: PetscCall(PetscLogFlops(A->cmap->n));
931: if (sctx.nshift) {
932: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
933: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
934: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
935: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
936: }
937: }
938: PetscFunctionReturn(PETSC_SUCCESS);
939: }
941: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
942: {
943: Mat C;
945: PetscFunctionBegin;
946: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
947: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
948: PetscCall(MatLUFactorNumeric(C, A, info));
950: A->ops->solve = C->ops->solve;
951: A->ops->solvetranspose = C->ops->solvetranspose;
953: PetscCall(MatHeaderMerge(A, &C));
954: PetscFunctionReturn(PETSC_SUCCESS);
955: }
957: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
958: {
959: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
960: IS iscol = a->col, isrow = a->row;
961: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
962: PetscInt nz;
963: const PetscInt *rout, *cout, *r, *c;
964: PetscScalar *x, *tmp, *tmps, sum;
965: const PetscScalar *b;
966: const MatScalar *aa = a->a, *v;
968: PetscFunctionBegin;
969: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
971: PetscCall(VecGetArrayRead(bb, &b));
972: PetscCall(VecGetArrayWrite(xx, &x));
973: tmp = a->solve_work;
975: PetscCall(ISGetIndices(isrow, &rout));
976: r = rout;
977: PetscCall(ISGetIndices(iscol, &cout));
978: c = cout + (n - 1);
980: /* forward solve the lower triangular */
981: tmp[0] = b[*r++];
982: tmps = tmp;
983: for (i = 1; i < n; i++) {
984: v = aa + ai[i];
985: vi = aj + ai[i];
986: nz = a->diag[i] - ai[i];
987: sum = b[*r++];
988: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
989: tmp[i] = sum;
990: }
992: /* backward solve the upper triangular */
993: for (i = n - 1; i >= 0; i--) {
994: v = aa + a->diag[i] + 1;
995: vi = aj + a->diag[i] + 1;
996: nz = ai[i + 1] - a->diag[i] - 1;
997: sum = tmp[i];
998: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
999: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1000: }
1002: PetscCall(ISRestoreIndices(isrow, &rout));
1003: PetscCall(ISRestoreIndices(iscol, &cout));
1004: PetscCall(VecRestoreArrayRead(bb, &b));
1005: PetscCall(VecRestoreArrayWrite(xx, &x));
1006: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1007: PetscFunctionReturn(PETSC_SUCCESS);
1008: }
1010: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1011: {
1012: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1013: IS iscol = a->col, isrow = a->row;
1014: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1015: PetscInt nz, neq, ldb, ldx;
1016: const PetscInt *rout, *cout, *r, *c;
1017: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1018: const PetscScalar *b, *aa = a->a, *v;
1019: PetscBool isdense;
1021: PetscFunctionBegin;
1022: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1023: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1024: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1025: if (X != B) {
1026: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1027: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1028: }
1029: PetscCall(MatDenseGetArrayRead(B, &b));
1030: PetscCall(MatDenseGetLDA(B, &ldb));
1031: PetscCall(MatDenseGetArray(X, &x));
1032: PetscCall(MatDenseGetLDA(X, &ldx));
1033: PetscCall(ISGetIndices(isrow, &rout));
1034: r = rout;
1035: PetscCall(ISGetIndices(iscol, &cout));
1036: c = cout;
1037: for (neq = 0; neq < B->cmap->n; neq++) {
1038: /* forward solve the lower triangular */
1039: tmp[0] = b[r[0]];
1040: tmps = tmp;
1041: for (i = 1; i < n; i++) {
1042: v = aa + ai[i];
1043: vi = aj + ai[i];
1044: nz = a->diag[i] - ai[i];
1045: sum = b[r[i]];
1046: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1047: tmp[i] = sum;
1048: }
1049: /* backward solve the upper triangular */
1050: for (i = n - 1; i >= 0; i--) {
1051: v = aa + a->diag[i] + 1;
1052: vi = aj + a->diag[i] + 1;
1053: nz = ai[i + 1] - a->diag[i] - 1;
1054: sum = tmp[i];
1055: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1056: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1057: }
1058: b += ldb;
1059: x += ldx;
1060: }
1061: PetscCall(ISRestoreIndices(isrow, &rout));
1062: PetscCall(ISRestoreIndices(iscol, &cout));
1063: PetscCall(MatDenseRestoreArrayRead(B, &b));
1064: PetscCall(MatDenseRestoreArray(X, &x));
1065: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1066: PetscFunctionReturn(PETSC_SUCCESS);
1067: }
1069: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1070: {
1071: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1072: IS iscol = a->col, isrow = a->row;
1073: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1074: PetscInt nz, neq, ldb, ldx;
1075: const PetscInt *rout, *cout, *r, *c;
1076: PetscScalar *x, *tmp = a->solve_work, sum;
1077: const PetscScalar *b, *aa = a->a, *v;
1078: PetscBool isdense;
1080: PetscFunctionBegin;
1081: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1082: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1083: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1084: if (X != B) {
1085: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1086: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1087: }
1088: PetscCall(MatDenseGetArrayRead(B, &b));
1089: PetscCall(MatDenseGetLDA(B, &ldb));
1090: PetscCall(MatDenseGetArray(X, &x));
1091: PetscCall(MatDenseGetLDA(X, &ldx));
1092: PetscCall(ISGetIndices(isrow, &rout));
1093: r = rout;
1094: PetscCall(ISGetIndices(iscol, &cout));
1095: c = cout;
1096: for (neq = 0; neq < B->cmap->n; neq++) {
1097: /* forward solve the lower triangular */
1098: tmp[0] = b[r[0]];
1099: v = aa;
1100: vi = aj;
1101: for (i = 1; i < n; i++) {
1102: nz = ai[i + 1] - ai[i];
1103: sum = b[r[i]];
1104: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1105: tmp[i] = sum;
1106: v += nz;
1107: vi += nz;
1108: }
1109: /* backward solve the upper triangular */
1110: for (i = n - 1; i >= 0; i--) {
1111: v = aa + adiag[i + 1] + 1;
1112: vi = aj + adiag[i + 1] + 1;
1113: nz = adiag[i] - adiag[i + 1] - 1;
1114: sum = tmp[i];
1115: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1116: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1117: }
1118: b += ldb;
1119: x += ldx;
1120: }
1121: PetscCall(ISRestoreIndices(isrow, &rout));
1122: PetscCall(ISRestoreIndices(iscol, &cout));
1123: PetscCall(MatDenseRestoreArrayRead(B, &b));
1124: PetscCall(MatDenseRestoreArray(X, &x));
1125: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1126: PetscFunctionReturn(PETSC_SUCCESS);
1127: }
1129: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1130: {
1131: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1132: IS iscol = a->col, isrow = a->row;
1133: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1134: PetscInt nz, neq, ldb, ldx;
1135: const PetscInt *rout, *cout, *r, *c;
1136: PetscScalar *x, *tmp = a->solve_work, s1;
1137: const PetscScalar *b, *aa = a->a, *v;
1138: PetscBool isdense;
1140: PetscFunctionBegin;
1141: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1142: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1143: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1144: if (X != B) {
1145: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1146: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1147: }
1148: PetscCall(MatDenseGetArrayRead(B, &b));
1149: PetscCall(MatDenseGetLDA(B, &ldb));
1150: PetscCall(MatDenseGetArray(X, &x));
1151: PetscCall(MatDenseGetLDA(X, &ldx));
1152: PetscCall(ISGetIndices(isrow, &rout));
1153: r = rout;
1154: PetscCall(ISGetIndices(iscol, &cout));
1155: c = cout;
1156: for (neq = 0; neq < B->cmap->n; neq++) {
1157: /* copy the b into temp work space according to permutation */
1158: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1160: /* forward solve the U^T */
1161: for (i = 0; i < n; i++) {
1162: v = aa + adiag[i + 1] + 1;
1163: vi = aj + adiag[i + 1] + 1;
1164: nz = adiag[i] - adiag[i + 1] - 1;
1165: s1 = tmp[i];
1166: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1167: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1168: tmp[i] = s1;
1169: }
1171: /* backward solve the L^T */
1172: for (i = n - 1; i >= 0; i--) {
1173: v = aa + ai[i];
1174: vi = aj + ai[i];
1175: nz = ai[i + 1] - ai[i];
1176: s1 = tmp[i];
1177: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1178: }
1180: /* copy tmp into x according to permutation */
1181: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1182: b += ldb;
1183: x += ldx;
1184: }
1185: PetscCall(ISRestoreIndices(isrow, &rout));
1186: PetscCall(ISRestoreIndices(iscol, &cout));
1187: PetscCall(MatDenseRestoreArrayRead(B, &b));
1188: PetscCall(MatDenseRestoreArray(X, &x));
1189: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1190: PetscFunctionReturn(PETSC_SUCCESS);
1191: }
1193: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1194: {
1195: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1196: IS iscol = a->col, isrow = a->row;
1197: const PetscInt *r, *c, *rout, *cout;
1198: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1199: PetscInt nz, row;
1200: PetscScalar *x, *tmp, *tmps, sum;
1201: const PetscScalar *b;
1202: const MatScalar *aa = a->a, *v;
1204: PetscFunctionBegin;
1205: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1207: PetscCall(VecGetArrayRead(bb, &b));
1208: PetscCall(VecGetArrayWrite(xx, &x));
1209: tmp = a->solve_work;
1211: PetscCall(ISGetIndices(isrow, &rout));
1212: r = rout;
1213: PetscCall(ISGetIndices(iscol, &cout));
1214: c = cout + (n - 1);
1216: /* forward solve the lower triangular */
1217: tmp[0] = b[*r++];
1218: tmps = tmp;
1219: for (row = 1; row < n; row++) {
1220: i = rout[row]; /* permuted row */
1221: v = aa + ai[i];
1222: vi = aj + ai[i];
1223: nz = a->diag[i] - ai[i];
1224: sum = b[*r++];
1225: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1226: tmp[row] = sum;
1227: }
1229: /* backward solve the upper triangular */
1230: for (row = n - 1; row >= 0; row--) {
1231: i = rout[row]; /* permuted row */
1232: v = aa + a->diag[i] + 1;
1233: vi = aj + a->diag[i] + 1;
1234: nz = ai[i + 1] - a->diag[i] - 1;
1235: sum = tmp[row];
1236: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1237: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1238: }
1240: PetscCall(ISRestoreIndices(isrow, &rout));
1241: PetscCall(ISRestoreIndices(iscol, &cout));
1242: PetscCall(VecRestoreArrayRead(bb, &b));
1243: PetscCall(VecRestoreArrayWrite(xx, &x));
1244: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1245: PetscFunctionReturn(PETSC_SUCCESS);
1246: }
1248: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1249: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1250: {
1251: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1252: PetscInt n = A->rmap->n;
1253: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1254: PetscScalar *x;
1255: const PetscScalar *b;
1256: const MatScalar *aa = a->a;
1257: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1258: PetscInt adiag_i, i, nz, ai_i;
1259: const PetscInt *vi;
1260: const MatScalar *v;
1261: PetscScalar sum;
1262: #endif
1264: PetscFunctionBegin;
1265: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1267: PetscCall(VecGetArrayRead(bb, &b));
1268: PetscCall(VecGetArrayWrite(xx, &x));
1270: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1271: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1272: #else
1273: /* forward solve the lower triangular */
1274: x[0] = b[0];
1275: for (i = 1; i < n; i++) {
1276: ai_i = ai[i];
1277: v = aa + ai_i;
1278: vi = aj + ai_i;
1279: nz = adiag[i] - ai_i;
1280: sum = b[i];
1281: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1282: x[i] = sum;
1283: }
1285: /* backward solve the upper triangular */
1286: for (i = n - 1; i >= 0; i--) {
1287: adiag_i = adiag[i];
1288: v = aa + adiag_i + 1;
1289: vi = aj + adiag_i + 1;
1290: nz = ai[i + 1] - adiag_i - 1;
1291: sum = x[i];
1292: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1293: x[i] = sum * aa[adiag_i];
1294: }
1295: #endif
1296: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1297: PetscCall(VecRestoreArrayRead(bb, &b));
1298: PetscCall(VecRestoreArrayWrite(xx, &x));
1299: PetscFunctionReturn(PETSC_SUCCESS);
1300: }
1302: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1303: {
1304: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1305: IS iscol = a->col, isrow = a->row;
1306: PetscInt i, n = A->rmap->n, j;
1307: PetscInt nz;
1308: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1309: PetscScalar *x, *tmp, sum;
1310: const PetscScalar *b;
1311: const MatScalar *aa = a->a, *v;
1313: PetscFunctionBegin;
1314: if (yy != xx) PetscCall(VecCopy(yy, xx));
1316: PetscCall(VecGetArrayRead(bb, &b));
1317: PetscCall(VecGetArray(xx, &x));
1318: tmp = a->solve_work;
1320: PetscCall(ISGetIndices(isrow, &rout));
1321: r = rout;
1322: PetscCall(ISGetIndices(iscol, &cout));
1323: c = cout + (n - 1);
1325: /* forward solve the lower triangular */
1326: tmp[0] = b[*r++];
1327: for (i = 1; i < n; i++) {
1328: v = aa + ai[i];
1329: vi = aj + ai[i];
1330: nz = a->diag[i] - ai[i];
1331: sum = b[*r++];
1332: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1333: tmp[i] = sum;
1334: }
1336: /* backward solve the upper triangular */
1337: for (i = n - 1; i >= 0; i--) {
1338: v = aa + a->diag[i] + 1;
1339: vi = aj + a->diag[i] + 1;
1340: nz = ai[i + 1] - a->diag[i] - 1;
1341: sum = tmp[i];
1342: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1343: tmp[i] = sum * aa[a->diag[i]];
1344: x[*c--] += tmp[i];
1345: }
1347: PetscCall(ISRestoreIndices(isrow, &rout));
1348: PetscCall(ISRestoreIndices(iscol, &cout));
1349: PetscCall(VecRestoreArrayRead(bb, &b));
1350: PetscCall(VecRestoreArray(xx, &x));
1351: PetscCall(PetscLogFlops(2.0 * a->nz));
1352: PetscFunctionReturn(PETSC_SUCCESS);
1353: }
1355: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1356: {
1357: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1358: IS iscol = a->col, isrow = a->row;
1359: PetscInt i, n = A->rmap->n, j;
1360: PetscInt nz;
1361: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1362: PetscScalar *x, *tmp, sum;
1363: const PetscScalar *b;
1364: const MatScalar *aa = a->a, *v;
1366: PetscFunctionBegin;
1367: if (yy != xx) PetscCall(VecCopy(yy, xx));
1369: PetscCall(VecGetArrayRead(bb, &b));
1370: PetscCall(VecGetArray(xx, &x));
1371: tmp = a->solve_work;
1373: PetscCall(ISGetIndices(isrow, &rout));
1374: r = rout;
1375: PetscCall(ISGetIndices(iscol, &cout));
1376: c = cout;
1378: /* forward solve the lower triangular */
1379: tmp[0] = b[r[0]];
1380: v = aa;
1381: vi = aj;
1382: for (i = 1; i < n; i++) {
1383: nz = ai[i + 1] - ai[i];
1384: sum = b[r[i]];
1385: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1386: tmp[i] = sum;
1387: v += nz;
1388: vi += nz;
1389: }
1391: /* backward solve the upper triangular */
1392: v = aa + adiag[n - 1];
1393: vi = aj + adiag[n - 1];
1394: for (i = n - 1; i >= 0; i--) {
1395: nz = adiag[i] - adiag[i + 1] - 1;
1396: sum = tmp[i];
1397: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1398: tmp[i] = sum * v[nz];
1399: x[c[i]] += tmp[i];
1400: v += nz + 1;
1401: vi += nz + 1;
1402: }
1404: PetscCall(ISRestoreIndices(isrow, &rout));
1405: PetscCall(ISRestoreIndices(iscol, &cout));
1406: PetscCall(VecRestoreArrayRead(bb, &b));
1407: PetscCall(VecRestoreArray(xx, &x));
1408: PetscCall(PetscLogFlops(2.0 * a->nz));
1409: PetscFunctionReturn(PETSC_SUCCESS);
1410: }
1412: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1413: {
1414: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1415: IS iscol = a->col, isrow = a->row;
1416: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1417: PetscInt i, n = A->rmap->n, j;
1418: PetscInt nz;
1419: PetscScalar *x, *tmp, s1;
1420: const MatScalar *aa = a->a, *v;
1421: const PetscScalar *b;
1423: PetscFunctionBegin;
1424: PetscCall(VecGetArrayRead(bb, &b));
1425: PetscCall(VecGetArrayWrite(xx, &x));
1426: tmp = a->solve_work;
1428: PetscCall(ISGetIndices(isrow, &rout));
1429: r = rout;
1430: PetscCall(ISGetIndices(iscol, &cout));
1431: c = cout;
1433: /* copy the b into temp work space according to permutation */
1434: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1436: /* forward solve the U^T */
1437: for (i = 0; i < n; i++) {
1438: v = aa + diag[i];
1439: vi = aj + diag[i] + 1;
1440: nz = ai[i + 1] - diag[i] - 1;
1441: s1 = tmp[i];
1442: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1443: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444: tmp[i] = s1;
1445: }
1447: /* backward solve the L^T */
1448: for (i = n - 1; i >= 0; i--) {
1449: v = aa + diag[i] - 1;
1450: vi = aj + diag[i] - 1;
1451: nz = diag[i] - ai[i];
1452: s1 = tmp[i];
1453: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1454: }
1456: /* copy tmp into x according to permutation */
1457: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1459: PetscCall(ISRestoreIndices(isrow, &rout));
1460: PetscCall(ISRestoreIndices(iscol, &cout));
1461: PetscCall(VecRestoreArrayRead(bb, &b));
1462: PetscCall(VecRestoreArrayWrite(xx, &x));
1464: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1465: PetscFunctionReturn(PETSC_SUCCESS);
1466: }
1468: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1469: {
1470: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1471: IS iscol = a->col, isrow = a->row;
1472: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1473: PetscInt i, n = A->rmap->n, j;
1474: PetscInt nz;
1475: PetscScalar *x, *tmp, s1;
1476: const MatScalar *aa = a->a, *v;
1477: const PetscScalar *b;
1479: PetscFunctionBegin;
1480: PetscCall(VecGetArrayRead(bb, &b));
1481: PetscCall(VecGetArrayWrite(xx, &x));
1482: tmp = a->solve_work;
1484: PetscCall(ISGetIndices(isrow, &rout));
1485: r = rout;
1486: PetscCall(ISGetIndices(iscol, &cout));
1487: c = cout;
1489: /* copy the b into temp work space according to permutation */
1490: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1492: /* forward solve the U^T */
1493: for (i = 0; i < n; i++) {
1494: v = aa + adiag[i + 1] + 1;
1495: vi = aj + adiag[i + 1] + 1;
1496: nz = adiag[i] - adiag[i + 1] - 1;
1497: s1 = tmp[i];
1498: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1499: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1500: tmp[i] = s1;
1501: }
1503: /* backward solve the L^T */
1504: for (i = n - 1; i >= 0; i--) {
1505: v = aa + ai[i];
1506: vi = aj + ai[i];
1507: nz = ai[i + 1] - ai[i];
1508: s1 = tmp[i];
1509: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1510: }
1512: /* copy tmp into x according to permutation */
1513: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1515: PetscCall(ISRestoreIndices(isrow, &rout));
1516: PetscCall(ISRestoreIndices(iscol, &cout));
1517: PetscCall(VecRestoreArrayRead(bb, &b));
1518: PetscCall(VecRestoreArrayWrite(xx, &x));
1520: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1521: PetscFunctionReturn(PETSC_SUCCESS);
1522: }
1524: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1525: {
1526: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1527: IS iscol = a->col, isrow = a->row;
1528: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1529: PetscInt i, n = A->rmap->n, j;
1530: PetscInt nz;
1531: PetscScalar *x, *tmp, s1;
1532: const MatScalar *aa = a->a, *v;
1533: const PetscScalar *b;
1535: PetscFunctionBegin;
1536: if (zz != xx) PetscCall(VecCopy(zz, xx));
1537: PetscCall(VecGetArrayRead(bb, &b));
1538: PetscCall(VecGetArray(xx, &x));
1539: tmp = a->solve_work;
1541: PetscCall(ISGetIndices(isrow, &rout));
1542: r = rout;
1543: PetscCall(ISGetIndices(iscol, &cout));
1544: c = cout;
1546: /* copy the b into temp work space according to permutation */
1547: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1549: /* forward solve the U^T */
1550: for (i = 0; i < n; i++) {
1551: v = aa + diag[i];
1552: vi = aj + diag[i] + 1;
1553: nz = ai[i + 1] - diag[i] - 1;
1554: s1 = tmp[i];
1555: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1556: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1557: tmp[i] = s1;
1558: }
1560: /* backward solve the L^T */
1561: for (i = n - 1; i >= 0; i--) {
1562: v = aa + diag[i] - 1;
1563: vi = aj + diag[i] - 1;
1564: nz = diag[i] - ai[i];
1565: s1 = tmp[i];
1566: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1567: }
1569: /* copy tmp into x according to permutation */
1570: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1572: PetscCall(ISRestoreIndices(isrow, &rout));
1573: PetscCall(ISRestoreIndices(iscol, &cout));
1574: PetscCall(VecRestoreArrayRead(bb, &b));
1575: PetscCall(VecRestoreArray(xx, &x));
1577: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1578: PetscFunctionReturn(PETSC_SUCCESS);
1579: }
1581: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1582: {
1583: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1584: IS iscol = a->col, isrow = a->row;
1585: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1586: PetscInt i, n = A->rmap->n, j;
1587: PetscInt nz;
1588: PetscScalar *x, *tmp, s1;
1589: const MatScalar *aa = a->a, *v;
1590: const PetscScalar *b;
1592: PetscFunctionBegin;
1593: if (zz != xx) PetscCall(VecCopy(zz, xx));
1594: PetscCall(VecGetArrayRead(bb, &b));
1595: PetscCall(VecGetArray(xx, &x));
1596: tmp = a->solve_work;
1598: PetscCall(ISGetIndices(isrow, &rout));
1599: r = rout;
1600: PetscCall(ISGetIndices(iscol, &cout));
1601: c = cout;
1603: /* copy the b into temp work space according to permutation */
1604: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1606: /* forward solve the U^T */
1607: for (i = 0; i < n; i++) {
1608: v = aa + adiag[i + 1] + 1;
1609: vi = aj + adiag[i + 1] + 1;
1610: nz = adiag[i] - adiag[i + 1] - 1;
1611: s1 = tmp[i];
1612: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1613: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1614: tmp[i] = s1;
1615: }
1617: /* backward solve the L^T */
1618: for (i = n - 1; i >= 0; i--) {
1619: v = aa + ai[i];
1620: vi = aj + ai[i];
1621: nz = ai[i + 1] - ai[i];
1622: s1 = tmp[i];
1623: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1624: }
1626: /* copy tmp into x according to permutation */
1627: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1629: PetscCall(ISRestoreIndices(isrow, &rout));
1630: PetscCall(ISRestoreIndices(iscol, &cout));
1631: PetscCall(VecRestoreArrayRead(bb, &b));
1632: PetscCall(VecRestoreArray(xx, &x));
1634: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1635: PetscFunctionReturn(PETSC_SUCCESS);
1636: }
1638: /*
1639: ilu() under revised new data structure.
1640: Factored arrays bj and ba are stored as
1641: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1643: bi=fact->i is an array of size n+1, in which
1644: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1645: bi[n]: points to L(n-1,n-1)+1
1647: bdiag=fact->diag is an array of size n+1,in which
1648: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1649: bdiag[n]: points to entry of U(n-1,0)-1
1651: U(i,:) contains bdiag[i] as its last entry, i.e.,
1652: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1653: */
1654: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1655: {
1656: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1657: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1658: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1659: IS isicol;
1661: PetscFunctionBegin;
1662: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1663: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1664: b = (Mat_SeqAIJ *)fact->data;
1666: /* allocate matrix arrays for new data structure */
1667: PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1668: PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1669: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1670: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1671: b->free_a = PETSC_TRUE;
1672: b->free_ij = PETSC_TRUE;
1674: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1675: bdiag = b->diag;
1677: /* set bi and bj with new data structure */
1678: bi = b->i;
1679: bj = b->j;
1681: /* L part */
1682: bi[0] = 0;
1683: for (i = 0; i < n; i++) {
1684: nz = adiag[i] - ai[i];
1685: bi[i + 1] = bi[i] + nz;
1686: aj = a->j + ai[i];
1687: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1688: }
1690: /* U part */
1691: bdiag[n] = bi[n] - 1;
1692: for (i = n - 1; i >= 0; i--) {
1693: nz = ai[i + 1] - adiag[i] - 1;
1694: aj = a->j + adiag[i] + 1;
1695: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1696: /* diag[i] */
1697: bj[k++] = i;
1698: bdiag[i] = bdiag[i + 1] + nz + 1;
1699: }
1701: fact->factortype = MAT_FACTOR_ILU;
1702: fact->info.factor_mallocs = 0;
1703: fact->info.fill_ratio_given = info->fill;
1704: fact->info.fill_ratio_needed = 1.0;
1705: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1706: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1708: b = (Mat_SeqAIJ *)fact->data;
1709: b->row = isrow;
1710: b->col = iscol;
1711: b->icol = isicol;
1712: PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1713: PetscCall(PetscObjectReference((PetscObject)isrow));
1714: PetscCall(PetscObjectReference((PetscObject)iscol));
1715: PetscFunctionReturn(PETSC_SUCCESS);
1716: }
1718: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1719: {
1720: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1721: IS isicol;
1722: const PetscInt *r, *ic;
1723: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1724: PetscInt *bi, *cols, nnz, *cols_lvl;
1725: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1726: PetscInt i, levels, diagonal_fill;
1727: PetscBool col_identity, row_identity, missing;
1728: PetscReal f;
1729: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1730: PetscBT lnkbt;
1731: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1732: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1733: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1735: PetscFunctionBegin;
1736: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1737: PetscCall(MatMissingDiagonal(A, &missing, &i));
1738: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1740: levels = (PetscInt)info->levels;
1741: PetscCall(ISIdentity(isrow, &row_identity));
1742: PetscCall(ISIdentity(iscol, &col_identity));
1743: if (!levels && row_identity && col_identity) {
1744: /* special case: ilu(0) with natural ordering */
1745: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1746: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1747: PetscFunctionReturn(PETSC_SUCCESS);
1748: }
1750: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1751: PetscCall(ISGetIndices(isrow, &r));
1752: PetscCall(ISGetIndices(isicol, &ic));
1754: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1755: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1756: PetscCall(PetscMalloc1(n + 1, &bdiag));
1757: bi[0] = bdiag[0] = 0;
1758: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1760: /* create a linked list for storing column indices of the active row */
1761: nlnk = n + 1;
1762: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1764: /* initial FreeSpace size is f*(ai[n]+1) */
1765: f = info->fill;
1766: diagonal_fill = (PetscInt)info->diagonal_fill;
1767: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1768: current_space = free_space;
1769: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1770: current_space_lvl = free_space_lvl;
1771: for (i = 0; i < n; i++) {
1772: nzi = 0;
1773: /* copy current row into linked list */
1774: nnz = ai[r[i] + 1] - ai[r[i]];
1775: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1776: cols = aj + ai[r[i]];
1777: lnk[i] = -1; /* marker to indicate if diagonal exists */
1778: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1779: nzi += nlnk;
1781: /* make sure diagonal entry is included */
1782: if (diagonal_fill && lnk[i] == -1) {
1783: fm = n;
1784: while (lnk[fm] < i) fm = lnk[fm];
1785: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1786: lnk[fm] = i;
1787: lnk_lvl[i] = 0;
1788: nzi++;
1789: dcount++;
1790: }
1792: /* add pivot rows into the active row */
1793: nzbd = 0;
1794: prow = lnk[n];
1795: while (prow < i) {
1796: nnz = bdiag[prow];
1797: cols = bj_ptr[prow] + nnz + 1;
1798: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1799: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1800: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1801: nzi += nlnk;
1802: prow = lnk[prow];
1803: nzbd++;
1804: }
1805: bdiag[i] = nzbd;
1806: bi[i + 1] = bi[i] + nzi;
1807: /* if free space is not available, make more free space */
1808: if (current_space->local_remaining < nzi) {
1809: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1810: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1811: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1812: reallocs++;
1813: }
1815: /* copy data into free_space and free_space_lvl, then initialize lnk */
1816: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1817: bj_ptr[i] = current_space->array;
1818: bjlvl_ptr[i] = current_space_lvl->array;
1820: /* make sure the active row i has diagonal entry */
1821: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1823: current_space->array += nzi;
1824: current_space->local_used += nzi;
1825: current_space->local_remaining -= nzi;
1826: current_space_lvl->array += nzi;
1827: current_space_lvl->local_used += nzi;
1828: current_space_lvl->local_remaining -= nzi;
1829: }
1831: PetscCall(ISRestoreIndices(isrow, &r));
1832: PetscCall(ISRestoreIndices(isicol, &ic));
1833: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1834: PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1835: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1837: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1838: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1839: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1841: #if defined(PETSC_USE_INFO)
1842: {
1843: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1844: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1845: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1846: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1847: PetscCall(PetscInfo(A, "for best performance.\n"));
1848: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1849: }
1850: #endif
1851: /* put together the new matrix */
1852: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1853: b = (Mat_SeqAIJ *)fact->data;
1854: b->free_ij = PETSC_TRUE;
1855: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1856: b->free_a = PETSC_TRUE;
1857: b->j = bj;
1858: b->i = bi;
1859: b->diag = bdiag;
1860: b->ilen = NULL;
1861: b->imax = NULL;
1862: b->row = isrow;
1863: b->col = iscol;
1864: PetscCall(PetscObjectReference((PetscObject)isrow));
1865: PetscCall(PetscObjectReference((PetscObject)iscol));
1866: b->icol = isicol;
1868: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1869: /* In b structure: Free imax, ilen, old a, old j.
1870: Allocate bdiag, solve_work, new a, new j */
1871: b->maxnz = b->nz = bdiag[0] + 1;
1873: fact->info.factor_mallocs = reallocs;
1874: fact->info.fill_ratio_given = f;
1875: fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1876: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1877: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1878: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1879: PetscFunctionReturn(PETSC_SUCCESS);
1880: }
1882: #if 0
1883: // unused
1884: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1885: {
1886: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1887: IS isicol;
1888: const PetscInt *r, *ic;
1889: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1890: PetscInt *bi, *cols, nnz, *cols_lvl;
1891: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1892: PetscInt i, levels, diagonal_fill;
1893: PetscBool col_identity, row_identity;
1894: PetscReal f;
1895: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1896: PetscBT lnkbt;
1897: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1898: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1899: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1900: PetscBool missing;
1902: PetscFunctionBegin;
1903: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1904: PetscCall(MatMissingDiagonal(A, &missing, &i));
1905: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1907: f = info->fill;
1908: levels = (PetscInt)info->levels;
1909: diagonal_fill = (PetscInt)info->diagonal_fill;
1911: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1913: PetscCall(ISIdentity(isrow, &row_identity));
1914: PetscCall(ISIdentity(iscol, &col_identity));
1915: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1916: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1918: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1919: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1920: fact->factortype = MAT_FACTOR_ILU;
1921: (fact)->info.factor_mallocs = 0;
1922: (fact)->info.fill_ratio_given = info->fill;
1923: (fact)->info.fill_ratio_needed = 1.0;
1925: b = (Mat_SeqAIJ *)fact->data;
1926: b->row = isrow;
1927: b->col = iscol;
1928: b->icol = isicol;
1929: PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1930: PetscCall(PetscObjectReference((PetscObject)isrow));
1931: PetscCall(PetscObjectReference((PetscObject)iscol));
1932: PetscFunctionReturn(PETSC_SUCCESS);
1933: }
1935: PetscCall(ISGetIndices(isrow, &r));
1936: PetscCall(ISGetIndices(isicol, &ic));
1938: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1939: PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1940: PetscCall(PetscMalloc1(n + 1, &bdiag));
1941: bi[0] = bdiag[0] = 0;
1943: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1945: /* create a linked list for storing column indices of the active row */
1946: nlnk = n + 1;
1947: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1949: /* initial FreeSpace size is f*(ai[n]+1) */
1950: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1951: current_space = free_space;
1952: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1953: current_space_lvl = free_space_lvl;
1955: for (i = 0; i < n; i++) {
1956: nzi = 0;
1957: /* copy current row into linked list */
1958: nnz = ai[r[i] + 1] - ai[r[i]];
1959: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1960: cols = aj + ai[r[i]];
1961: lnk[i] = -1; /* marker to indicate if diagonal exists */
1962: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1963: nzi += nlnk;
1965: /* make sure diagonal entry is included */
1966: if (diagonal_fill && lnk[i] == -1) {
1967: fm = n;
1968: while (lnk[fm] < i) fm = lnk[fm];
1969: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1970: lnk[fm] = i;
1971: lnk_lvl[i] = 0;
1972: nzi++;
1973: dcount++;
1974: }
1976: /* add pivot rows into the active row */
1977: nzbd = 0;
1978: prow = lnk[n];
1979: while (prow < i) {
1980: nnz = bdiag[prow];
1981: cols = bj_ptr[prow] + nnz + 1;
1982: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1983: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1984: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1985: nzi += nlnk;
1986: prow = lnk[prow];
1987: nzbd++;
1988: }
1989: bdiag[i] = nzbd;
1990: bi[i + 1] = bi[i] + nzi;
1992: /* if free space is not available, make more free space */
1993: if (current_space->local_remaining < nzi) {
1994: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
1995: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1996: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1997: reallocs++;
1998: }
2000: /* copy data into free_space and free_space_lvl, then initialize lnk */
2001: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2002: bj_ptr[i] = current_space->array;
2003: bjlvl_ptr[i] = current_space_lvl->array;
2005: /* make sure the active row i has diagonal entry */
2006: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2008: current_space->array += nzi;
2009: current_space->local_used += nzi;
2010: current_space->local_remaining -= nzi;
2011: current_space_lvl->array += nzi;
2012: current_space_lvl->local_used += nzi;
2013: current_space_lvl->local_remaining -= nzi;
2014: }
2016: PetscCall(ISRestoreIndices(isrow, &r));
2017: PetscCall(ISRestoreIndices(isicol, &ic));
2019: /* destroy list of free space and other temporary arrays */
2020: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2021: PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2022: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2023: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2024: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2026: #if defined(PETSC_USE_INFO)
2027: {
2028: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2029: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2030: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2031: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2032: PetscCall(PetscInfo(A, "for best performance.\n"));
2033: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2034: }
2035: #endif
2037: /* put together the new matrix */
2038: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2039: b = (Mat_SeqAIJ *)fact->data;
2040: b->free_ij = PETSC_TRUE;
2041: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2042: b->free_a = PETSC_TRUE;
2043: b->j = bj;
2044: b->i = bi;
2045: for (i = 0; i < n; i++) bdiag[i] += bi[i];
2046: b->diag = bdiag;
2047: b->ilen = NULL;
2048: b->imax = NULL;
2049: b->row = isrow;
2050: b->col = iscol;
2051: PetscCall(PetscObjectReference((PetscObject)isrow));
2052: PetscCall(PetscObjectReference((PetscObject)iscol));
2053: b->icol = isicol;
2054: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2055: /* In b structure: Free imax, ilen, old a, old j.
2056: Allocate bdiag, solve_work, new a, new j */
2057: b->maxnz = b->nz = bi[n];
2059: fact->info.factor_mallocs = reallocs;
2060: fact->info.fill_ratio_given = f;
2061: fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2062: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2063: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2064: PetscFunctionReturn(PETSC_SUCCESS);
2065: }
2066: #endif
2068: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2069: {
2070: Mat C = B;
2071: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2072: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2073: IS ip = b->row, iip = b->icol;
2074: const PetscInt *rip, *riip;
2075: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2076: PetscInt *ai = a->i, *aj = a->j;
2077: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2078: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2079: PetscBool perm_identity;
2080: FactorShiftCtx sctx;
2081: PetscReal rs;
2082: MatScalar d, *v;
2084: PetscFunctionBegin;
2085: /* MatPivotSetUp(): initialize shift context sctx */
2086: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2088: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2089: sctx.shift_top = info->zeropivot;
2090: for (i = 0; i < mbs; i++) {
2091: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2092: d = (aa)[a->diag[i]];
2093: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2094: v = aa + ai[i];
2095: nz = ai[i + 1] - ai[i];
2096: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2097: if (rs > sctx.shift_top) sctx.shift_top = rs;
2098: }
2099: sctx.shift_top *= 1.1;
2100: sctx.nshift_max = 5;
2101: sctx.shift_lo = 0.;
2102: sctx.shift_hi = 1.;
2103: }
2105: PetscCall(ISGetIndices(ip, &rip));
2106: PetscCall(ISGetIndices(iip, &riip));
2108: /* allocate working arrays
2109: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2110: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2111: */
2112: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2114: do {
2115: sctx.newshift = PETSC_FALSE;
2117: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2118: if (mbs) il[0] = 0;
2120: for (k = 0; k < mbs; k++) {
2121: /* zero rtmp */
2122: nz = bi[k + 1] - bi[k];
2123: bjtmp = bj + bi[k];
2124: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2126: /* load in initial unfactored row */
2127: bval = ba + bi[k];
2128: jmin = ai[rip[k]];
2129: jmax = ai[rip[k] + 1];
2130: for (j = jmin; j < jmax; j++) {
2131: col = riip[aj[j]];
2132: if (col >= k) { /* only take upper triangular entry */
2133: rtmp[col] = aa[j];
2134: *bval++ = 0.0; /* for in-place factorization */
2135: }
2136: }
2137: /* shift the diagonal of the matrix: ZeropivotApply() */
2138: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2140: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2141: dk = rtmp[k];
2142: i = c2r[k]; /* first row to be added to k_th row */
2144: while (i < k) {
2145: nexti = c2r[i]; /* next row to be added to k_th row */
2147: /* compute multiplier, update diag(k) and U(i,k) */
2148: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2149: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2150: dk += uikdi * ba[ili]; /* update diag[k] */
2151: ba[ili] = uikdi; /* -U(i,k) */
2153: /* add multiple of row i to k-th row */
2154: jmin = ili + 1;
2155: jmax = bi[i + 1];
2156: if (jmin < jmax) {
2157: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2158: /* update il and c2r for row i */
2159: il[i] = jmin;
2160: j = bj[jmin];
2161: c2r[i] = c2r[j];
2162: c2r[j] = i;
2163: }
2164: i = nexti;
2165: }
2167: /* copy data into U(k,:) */
2168: rs = 0.0;
2169: jmin = bi[k];
2170: jmax = bi[k + 1] - 1;
2171: if (jmin < jmax) {
2172: for (j = jmin; j < jmax; j++) {
2173: col = bj[j];
2174: ba[j] = rtmp[col];
2175: rs += PetscAbsScalar(ba[j]);
2176: }
2177: /* add the k-th row into il and c2r */
2178: il[k] = jmin;
2179: i = bj[jmin];
2180: c2r[k] = c2r[i];
2181: c2r[i] = k;
2182: }
2184: /* MatPivotCheck() */
2185: sctx.rs = rs;
2186: sctx.pv = dk;
2187: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2188: if (sctx.newshift) break;
2189: dk = sctx.pv;
2191: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2192: }
2193: } while (sctx.newshift);
2195: PetscCall(PetscFree3(rtmp, il, c2r));
2196: PetscCall(ISRestoreIndices(ip, &rip));
2197: PetscCall(ISRestoreIndices(iip, &riip));
2199: PetscCall(ISIdentity(ip, &perm_identity));
2200: if (perm_identity) {
2201: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2202: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2203: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2204: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2205: } else {
2206: B->ops->solve = MatSolve_SeqSBAIJ_1;
2207: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2208: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2209: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2210: }
2212: C->assembled = PETSC_TRUE;
2213: C->preallocated = PETSC_TRUE;
2215: PetscCall(PetscLogFlops(C->rmap->n));
2217: /* MatPivotView() */
2218: if (sctx.nshift) {
2219: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2220: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2221: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2222: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2223: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2224: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2225: }
2226: }
2227: PetscFunctionReturn(PETSC_SUCCESS);
2228: }
2230: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2231: {
2232: Mat C = B;
2233: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2234: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2235: IS ip = b->row, iip = b->icol;
2236: const PetscInt *rip, *riip;
2237: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2238: PetscInt *ai = a->i, *aj = a->j;
2239: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2240: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
2241: PetscBool perm_identity;
2242: FactorShiftCtx sctx;
2243: PetscReal rs;
2244: MatScalar d, *v;
2246: PetscFunctionBegin;
2247: /* MatPivotSetUp(): initialize shift context sctx */
2248: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2250: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2251: sctx.shift_top = info->zeropivot;
2252: for (i = 0; i < mbs; i++) {
2253: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2254: d = (aa)[a->diag[i]];
2255: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2256: v = aa + ai[i];
2257: nz = ai[i + 1] - ai[i];
2258: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2259: if (rs > sctx.shift_top) sctx.shift_top = rs;
2260: }
2261: sctx.shift_top *= 1.1;
2262: sctx.nshift_max = 5;
2263: sctx.shift_lo = 0.;
2264: sctx.shift_hi = 1.;
2265: }
2267: PetscCall(ISGetIndices(ip, &rip));
2268: PetscCall(ISGetIndices(iip, &riip));
2270: /* initialization */
2271: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2273: do {
2274: sctx.newshift = PETSC_FALSE;
2276: for (i = 0; i < mbs; i++) jl[i] = mbs;
2277: il[0] = 0;
2279: for (k = 0; k < mbs; k++) {
2280: /* zero rtmp */
2281: nz = bi[k + 1] - bi[k];
2282: bjtmp = bj + bi[k];
2283: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2285: bval = ba + bi[k];
2286: /* initialize k-th row by the perm[k]-th row of A */
2287: jmin = ai[rip[k]];
2288: jmax = ai[rip[k] + 1];
2289: for (j = jmin; j < jmax; j++) {
2290: col = riip[aj[j]];
2291: if (col >= k) { /* only take upper triangular entry */
2292: rtmp[col] = aa[j];
2293: *bval++ = 0.0; /* for in-place factorization */
2294: }
2295: }
2296: /* shift the diagonal of the matrix */
2297: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2299: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2300: dk = rtmp[k];
2301: i = jl[k]; /* first row to be added to k_th row */
2303: while (i < k) {
2304: nexti = jl[i]; /* next row to be added to k_th row */
2306: /* compute multiplier, update diag(k) and U(i,k) */
2307: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2308: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2309: dk += uikdi * ba[ili];
2310: ba[ili] = uikdi; /* -U(i,k) */
2312: /* add multiple of row i to k-th row */
2313: jmin = ili + 1;
2314: jmax = bi[i + 1];
2315: if (jmin < jmax) {
2316: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2317: /* update il and jl for row i */
2318: il[i] = jmin;
2319: j = bj[jmin];
2320: jl[i] = jl[j];
2321: jl[j] = i;
2322: }
2323: i = nexti;
2324: }
2326: /* shift the diagonals when zero pivot is detected */
2327: /* compute rs=sum of abs(off-diagonal) */
2328: rs = 0.0;
2329: jmin = bi[k] + 1;
2330: nz = bi[k + 1] - jmin;
2331: bcol = bj + jmin;
2332: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2334: sctx.rs = rs;
2335: sctx.pv = dk;
2336: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2337: if (sctx.newshift) break;
2338: dk = sctx.pv;
2340: /* copy data into U(k,:) */
2341: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2342: jmin = bi[k] + 1;
2343: jmax = bi[k + 1];
2344: if (jmin < jmax) {
2345: for (j = jmin; j < jmax; j++) {
2346: col = bj[j];
2347: ba[j] = rtmp[col];
2348: }
2349: /* add the k-th row into il and jl */
2350: il[k] = jmin;
2351: i = bj[jmin];
2352: jl[k] = jl[i];
2353: jl[i] = k;
2354: }
2355: }
2356: } while (sctx.newshift);
2358: PetscCall(PetscFree3(rtmp, il, jl));
2359: PetscCall(ISRestoreIndices(ip, &rip));
2360: PetscCall(ISRestoreIndices(iip, &riip));
2362: PetscCall(ISIdentity(ip, &perm_identity));
2363: if (perm_identity) {
2364: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2365: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2366: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2367: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2368: } else {
2369: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2370: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2371: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2372: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2373: }
2375: C->assembled = PETSC_TRUE;
2376: C->preallocated = PETSC_TRUE;
2378: PetscCall(PetscLogFlops(C->rmap->n));
2379: if (sctx.nshift) {
2380: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2381: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2382: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2383: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2384: }
2385: }
2386: PetscFunctionReturn(PETSC_SUCCESS);
2387: }
2389: /*
2390: icc() under revised new data structure.
2391: Factored arrays bj and ba are stored as
2392: U(0,:),...,U(i,:),U(n-1,:)
2394: ui=fact->i is an array of size n+1, in which
2395: ui+
2396: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2397: ui[n]: points to U(n-1,n-1)+1
2399: udiag=fact->diag is an array of size n,in which
2400: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2402: U(i,:) contains udiag[i] as its last entry, i.e.,
2403: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2404: */
2406: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2407: {
2408: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2409: Mat_SeqSBAIJ *b;
2410: PetscBool perm_identity, missing;
2411: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2412: const PetscInt *rip, *riip;
2413: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2414: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2415: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2416: PetscReal fill = info->fill, levels = info->levels;
2417: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2418: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2419: PetscBT lnkbt;
2420: IS iperm;
2422: PetscFunctionBegin;
2423: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2424: PetscCall(MatMissingDiagonal(A, &missing, &d));
2425: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2426: PetscCall(ISIdentity(perm, &perm_identity));
2427: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2429: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2430: PetscCall(PetscMalloc1(am + 1, &udiag));
2431: ui[0] = 0;
2433: /* ICC(0) without matrix ordering: simply rearrange column indices */
2434: if (!levels && perm_identity) {
2435: for (i = 0; i < am; i++) {
2436: ncols = ai[i + 1] - a->diag[i];
2437: ui[i + 1] = ui[i] + ncols;
2438: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2439: }
2440: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2441: cols = uj;
2442: for (i = 0; i < am; i++) {
2443: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2444: ncols = ai[i + 1] - a->diag[i] - 1;
2445: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2446: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2447: }
2448: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2449: PetscCall(ISGetIndices(iperm, &riip));
2450: PetscCall(ISGetIndices(perm, &rip));
2452: /* initialization */
2453: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2455: /* jl: linked list for storing indices of the pivot rows
2456: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2457: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2458: for (i = 0; i < am; i++) {
2459: jl[i] = am;
2460: il[i] = 0;
2461: }
2463: /* create and initialize a linked list for storing column indices of the active row k */
2464: nlnk = am + 1;
2465: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2467: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2468: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2469: current_space = free_space;
2470: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2471: current_space_lvl = free_space_lvl;
2473: for (k = 0; k < am; k++) { /* for each active row k */
2474: /* initialize lnk by the column indices of row rip[k] of A */
2475: nzk = 0;
2476: ncols = ai[rip[k] + 1] - ai[rip[k]];
2477: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2478: ncols_upper = 0;
2479: for (j = 0; j < ncols; j++) {
2480: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2481: if (riip[i] >= k) { /* only take upper triangular entry */
2482: ajtmp[ncols_upper] = i;
2483: ncols_upper++;
2484: }
2485: }
2486: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2487: nzk += nlnk;
2489: /* update lnk by computing fill-in for each pivot row to be merged in */
2490: prow = jl[k]; /* 1st pivot row */
2492: while (prow < k) {
2493: nextprow = jl[prow];
2495: /* merge prow into k-th row */
2496: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2497: jmax = ui[prow + 1];
2498: ncols = jmax - jmin;
2499: i = jmin - ui[prow];
2500: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2501: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2502: j = *(uj - 1);
2503: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2504: nzk += nlnk;
2506: /* update il and jl for prow */
2507: if (jmin < jmax) {
2508: il[prow] = jmin;
2509: j = *cols;
2510: jl[prow] = jl[j];
2511: jl[j] = prow;
2512: }
2513: prow = nextprow;
2514: }
2516: /* if free space is not available, make more free space */
2517: if (current_space->local_remaining < nzk) {
2518: i = am - k + 1; /* num of unfactored rows */
2519: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2520: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2521: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2522: reallocs++;
2523: }
2525: /* copy data into free_space and free_space_lvl, then initialize lnk */
2526: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2527: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2529: /* add the k-th row into il and jl */
2530: if (nzk > 1) {
2531: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2532: jl[k] = jl[i];
2533: jl[i] = k;
2534: il[k] = ui[k] + 1;
2535: }
2536: uj_ptr[k] = current_space->array;
2537: uj_lvl_ptr[k] = current_space_lvl->array;
2539: current_space->array += nzk;
2540: current_space->local_used += nzk;
2541: current_space->local_remaining -= nzk;
2543: current_space_lvl->array += nzk;
2544: current_space_lvl->local_used += nzk;
2545: current_space_lvl->local_remaining -= nzk;
2547: ui[k + 1] = ui[k] + nzk;
2548: }
2550: PetscCall(ISRestoreIndices(perm, &rip));
2551: PetscCall(ISRestoreIndices(iperm, &riip));
2552: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2553: PetscCall(PetscFree(ajtmp));
2555: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2556: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2557: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2558: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2559: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2561: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2563: /* put together the new matrix in MATSEQSBAIJ format */
2564: b = (Mat_SeqSBAIJ *)fact->data;
2565: b->free_ij = PETSC_TRUE;
2566: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2567: b->free_a = PETSC_TRUE;
2568: b->j = uj;
2569: b->i = ui;
2570: b->diag = udiag;
2571: b->free_diag = PETSC_TRUE;
2572: b->ilen = NULL;
2573: b->imax = NULL;
2574: b->row = perm;
2575: b->col = perm;
2576: PetscCall(PetscObjectReference((PetscObject)perm));
2577: PetscCall(PetscObjectReference((PetscObject)perm));
2578: b->icol = iperm;
2579: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2581: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2583: b->maxnz = b->nz = ui[am];
2585: fact->info.factor_mallocs = reallocs;
2586: fact->info.fill_ratio_given = fill;
2587: if (ai[am] != 0) {
2588: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2589: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2590: } else {
2591: fact->info.fill_ratio_needed = 0.0;
2592: }
2593: #if defined(PETSC_USE_INFO)
2594: if (ai[am] != 0) {
2595: PetscReal af = fact->info.fill_ratio_needed;
2596: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2597: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2598: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2599: } else {
2600: PetscCall(PetscInfo(A, "Empty matrix\n"));
2601: }
2602: #endif
2603: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2604: PetscFunctionReturn(PETSC_SUCCESS);
2605: }
2607: #if 0
2608: // unused
2609: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2610: {
2611: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2612: Mat_SeqSBAIJ *b;
2613: PetscBool perm_identity, missing;
2614: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2615: const PetscInt *rip, *riip;
2616: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2617: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2618: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2619: PetscReal fill = info->fill, levels = info->levels;
2620: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2621: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2622: PetscBT lnkbt;
2623: IS iperm;
2625: PetscFunctionBegin;
2626: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2627: PetscCall(MatMissingDiagonal(A, &missing, &d));
2628: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2629: PetscCall(ISIdentity(perm, &perm_identity));
2630: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2632: PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2633: PetscCall(PetscMalloc1(am + 1, &udiag));
2634: ui[0] = 0;
2636: /* ICC(0) without matrix ordering: simply copies fill pattern */
2637: if (!levels && perm_identity) {
2638: for (i = 0; i < am; i++) {
2639: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2640: udiag[i] = ui[i];
2641: }
2642: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2643: cols = uj;
2644: for (i = 0; i < am; i++) {
2645: aj = a->j + a->diag[i];
2646: ncols = ui[i + 1] - ui[i];
2647: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2648: }
2649: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2650: PetscCall(ISGetIndices(iperm, &riip));
2651: PetscCall(ISGetIndices(perm, &rip));
2653: /* initialization */
2654: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2656: /* jl: linked list for storing indices of the pivot rows
2657: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2658: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2659: for (i = 0; i < am; i++) {
2660: jl[i] = am;
2661: il[i] = 0;
2662: }
2664: /* create and initialize a linked list for storing column indices of the active row k */
2665: nlnk = am + 1;
2666: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2668: /* initial FreeSpace size is fill*(ai[am]+1) */
2669: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2670: current_space = free_space;
2671: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2672: current_space_lvl = free_space_lvl;
2674: for (k = 0; k < am; k++) { /* for each active row k */
2675: /* initialize lnk by the column indices of row rip[k] of A */
2676: nzk = 0;
2677: ncols = ai[rip[k] + 1] - ai[rip[k]];
2678: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2679: ncols_upper = 0;
2680: for (j = 0; j < ncols; j++) {
2681: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2682: if (riip[i] >= k) { /* only take upper triangular entry */
2683: ajtmp[ncols_upper] = i;
2684: ncols_upper++;
2685: }
2686: }
2687: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2688: nzk += nlnk;
2690: /* update lnk by computing fill-in for each pivot row to be merged in */
2691: prow = jl[k]; /* 1st pivot row */
2693: while (prow < k) {
2694: nextprow = jl[prow];
2696: /* merge prow into k-th row */
2697: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2698: jmax = ui[prow + 1];
2699: ncols = jmax - jmin;
2700: i = jmin - ui[prow];
2701: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2702: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2703: j = *(uj - 1);
2704: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2705: nzk += nlnk;
2707: /* update il and jl for prow */
2708: if (jmin < jmax) {
2709: il[prow] = jmin;
2710: j = *cols;
2711: jl[prow] = jl[j];
2712: jl[j] = prow;
2713: }
2714: prow = nextprow;
2715: }
2717: /* if free space is not available, make more free space */
2718: if (current_space->local_remaining < nzk) {
2719: i = am - k + 1; /* num of unfactored rows */
2720: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2721: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2722: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2723: reallocs++;
2724: }
2726: /* copy data into free_space and free_space_lvl, then initialize lnk */
2727: PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2728: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2730: /* add the k-th row into il and jl */
2731: if (nzk > 1) {
2732: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2733: jl[k] = jl[i];
2734: jl[i] = k;
2735: il[k] = ui[k] + 1;
2736: }
2737: uj_ptr[k] = current_space->array;
2738: uj_lvl_ptr[k] = current_space_lvl->array;
2740: current_space->array += nzk;
2741: current_space->local_used += nzk;
2742: current_space->local_remaining -= nzk;
2744: current_space_lvl->array += nzk;
2745: current_space_lvl->local_used += nzk;
2746: current_space_lvl->local_remaining -= nzk;
2748: ui[k + 1] = ui[k] + nzk;
2749: }
2751: #if defined(PETSC_USE_INFO)
2752: if (ai[am] != 0) {
2753: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2754: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2755: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2756: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2757: } else {
2758: PetscCall(PetscInfo(A, "Empty matrix\n"));
2759: }
2760: #endif
2762: PetscCall(ISRestoreIndices(perm, &rip));
2763: PetscCall(ISRestoreIndices(iperm, &riip));
2764: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2765: PetscCall(PetscFree(ajtmp));
2767: /* destroy list of free space and other temporary array(s) */
2768: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2769: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2770: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2771: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2773: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2775: /* put together the new matrix in MATSEQSBAIJ format */
2777: b = (Mat_SeqSBAIJ *)fact->data;
2778: b->free_ij = PETSC_TRUE;
2779: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2780: b->free_a = PETSC_TRUE;
2781: b->j = uj;
2782: b->i = ui;
2783: b->diag = udiag;
2784: b->free_diag = PETSC_TRUE;
2785: b->ilen = NULL;
2786: b->imax = NULL;
2787: b->row = perm;
2788: b->col = perm;
2790: PetscCall(PetscObjectReference((PetscObject)perm));
2791: PetscCall(PetscObjectReference((PetscObject)perm));
2793: b->icol = iperm;
2794: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2795: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2796: b->maxnz = b->nz = ui[am];
2798: fact->info.factor_mallocs = reallocs;
2799: fact->info.fill_ratio_given = fill;
2800: if (ai[am] != 0) {
2801: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2802: } else {
2803: fact->info.fill_ratio_needed = 0.0;
2804: }
2805: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2806: PetscFunctionReturn(PETSC_SUCCESS);
2807: }
2808: #endif
2810: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2811: {
2812: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2813: Mat_SeqSBAIJ *b;
2814: PetscBool perm_identity, missing;
2815: PetscReal fill = info->fill;
2816: const PetscInt *rip, *riip;
2817: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2818: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2819: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2820: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2821: PetscBT lnkbt;
2822: IS iperm;
2824: PetscFunctionBegin;
2825: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2826: PetscCall(MatMissingDiagonal(A, &missing, &i));
2827: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2829: /* check whether perm is the identity mapping */
2830: PetscCall(ISIdentity(perm, &perm_identity));
2831: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2832: PetscCall(ISGetIndices(iperm, &riip));
2833: PetscCall(ISGetIndices(perm, &rip));
2835: /* initialization */
2836: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2837: PetscCall(PetscMalloc1(am + 1, &udiag));
2838: ui[0] = 0;
2840: /* jl: linked list for storing indices of the pivot rows
2841: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2842: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2843: for (i = 0; i < am; i++) {
2844: jl[i] = am;
2845: il[i] = 0;
2846: }
2848: /* create and initialize a linked list for storing column indices of the active row k */
2849: nlnk = am + 1;
2850: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2852: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2853: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2854: current_space = free_space;
2856: for (k = 0; k < am; k++) { /* for each active row k */
2857: /* initialize lnk by the column indices of row rip[k] of A */
2858: nzk = 0;
2859: ncols = ai[rip[k] + 1] - ai[rip[k]];
2860: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2861: ncols_upper = 0;
2862: for (j = 0; j < ncols; j++) {
2863: i = riip[*(aj + ai[rip[k]] + j)];
2864: if (i >= k) { /* only take upper triangular entry */
2865: cols[ncols_upper] = i;
2866: ncols_upper++;
2867: }
2868: }
2869: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2870: nzk += nlnk;
2872: /* update lnk by computing fill-in for each pivot row to be merged in */
2873: prow = jl[k]; /* 1st pivot row */
2875: while (prow < k) {
2876: nextprow = jl[prow];
2877: /* merge prow into k-th row */
2878: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2879: jmax = ui[prow + 1];
2880: ncols = jmax - jmin;
2881: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2882: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2883: nzk += nlnk;
2885: /* update il and jl for prow */
2886: if (jmin < jmax) {
2887: il[prow] = jmin;
2888: j = *uj_ptr;
2889: jl[prow] = jl[j];
2890: jl[j] = prow;
2891: }
2892: prow = nextprow;
2893: }
2895: /* if free space is not available, make more free space */
2896: if (current_space->local_remaining < nzk) {
2897: i = am - k + 1; /* num of unfactored rows */
2898: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2899: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2900: reallocs++;
2901: }
2903: /* copy data into free space, then initialize lnk */
2904: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2906: /* add the k-th row into il and jl */
2907: if (nzk > 1) {
2908: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2909: jl[k] = jl[i];
2910: jl[i] = k;
2911: il[k] = ui[k] + 1;
2912: }
2913: ui_ptr[k] = current_space->array;
2915: current_space->array += nzk;
2916: current_space->local_used += nzk;
2917: current_space->local_remaining -= nzk;
2919: ui[k + 1] = ui[k] + nzk;
2920: }
2922: PetscCall(ISRestoreIndices(perm, &rip));
2923: PetscCall(ISRestoreIndices(iperm, &riip));
2924: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2926: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2927: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2928: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2929: PetscCall(PetscLLDestroy(lnk, lnkbt));
2931: /* put together the new matrix in MATSEQSBAIJ format */
2932: b = (Mat_SeqSBAIJ *)fact->data;
2933: b->free_ij = PETSC_TRUE;
2934: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2935: b->free_a = PETSC_TRUE;
2936: b->j = uj;
2937: b->i = ui;
2938: b->diag = udiag;
2939: b->free_diag = PETSC_TRUE;
2940: b->ilen = NULL;
2941: b->imax = NULL;
2942: b->row = perm;
2943: b->col = perm;
2945: PetscCall(PetscObjectReference((PetscObject)perm));
2946: PetscCall(PetscObjectReference((PetscObject)perm));
2948: b->icol = iperm;
2949: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2951: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2953: b->maxnz = b->nz = ui[am];
2955: fact->info.factor_mallocs = reallocs;
2956: fact->info.fill_ratio_given = fill;
2957: if (ai[am] != 0) {
2958: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2959: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2960: } else {
2961: fact->info.fill_ratio_needed = 0.0;
2962: }
2963: #if defined(PETSC_USE_INFO)
2964: if (ai[am] != 0) {
2965: PetscReal af = fact->info.fill_ratio_needed;
2966: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2967: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2968: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2969: } else {
2970: PetscCall(PetscInfo(A, "Empty matrix\n"));
2971: }
2972: #endif
2973: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2974: PetscFunctionReturn(PETSC_SUCCESS);
2975: }
2977: #if 0
2978: // unused
2979: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2980: {
2981: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2982: Mat_SeqSBAIJ *b;
2983: PetscBool perm_identity, missing;
2984: PetscReal fill = info->fill;
2985: const PetscInt *rip, *riip;
2986: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2987: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2988: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
2989: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2990: PetscBT lnkbt;
2991: IS iperm;
2993: PetscFunctionBegin;
2994: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2995: PetscCall(MatMissingDiagonal(A, &missing, &i));
2996: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2998: /* check whether perm is the identity mapping */
2999: PetscCall(ISIdentity(perm, &perm_identity));
3000: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3001: PetscCall(ISGetIndices(iperm, &riip));
3002: PetscCall(ISGetIndices(perm, &rip));
3004: /* initialization */
3005: PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3006: ui[0] = 0;
3008: /* jl: linked list for storing indices of the pivot rows
3009: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3010: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3011: for (i = 0; i < am; i++) {
3012: jl[i] = am;
3013: il[i] = 0;
3014: }
3016: /* create and initialize a linked list for storing column indices of the active row k */
3017: nlnk = am + 1;
3018: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3020: /* initial FreeSpace size is fill*(ai[am]+1) */
3021: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3022: current_space = free_space;
3024: for (k = 0; k < am; k++) { /* for each active row k */
3025: /* initialize lnk by the column indices of row rip[k] of A */
3026: nzk = 0;
3027: ncols = ai[rip[k] + 1] - ai[rip[k]];
3028: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3029: ncols_upper = 0;
3030: for (j = 0; j < ncols; j++) {
3031: i = riip[*(aj + ai[rip[k]] + j)];
3032: if (i >= k) { /* only take upper triangular entry */
3033: cols[ncols_upper] = i;
3034: ncols_upper++;
3035: }
3036: }
3037: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3038: nzk += nlnk;
3040: /* update lnk by computing fill-in for each pivot row to be merged in */
3041: prow = jl[k]; /* 1st pivot row */
3043: while (prow < k) {
3044: nextprow = jl[prow];
3045: /* merge prow into k-th row */
3046: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3047: jmax = ui[prow + 1];
3048: ncols = jmax - jmin;
3049: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3050: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3051: nzk += nlnk;
3053: /* update il and jl for prow */
3054: if (jmin < jmax) {
3055: il[prow] = jmin;
3056: j = *uj_ptr;
3057: jl[prow] = jl[j];
3058: jl[j] = prow;
3059: }
3060: prow = nextprow;
3061: }
3063: /* if free space is not available, make more free space */
3064: if (current_space->local_remaining < nzk) {
3065: i = am - k + 1; /* num of unfactored rows */
3066: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3067: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
3068: reallocs++;
3069: }
3071: /* copy data into free space, then initialize lnk */
3072: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3074: /* add the k-th row into il and jl */
3075: if (nzk - 1 > 0) {
3076: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3077: jl[k] = jl[i];
3078: jl[i] = k;
3079: il[k] = ui[k] + 1;
3080: }
3081: ui_ptr[k] = current_space->array;
3083: current_space->array += nzk;
3084: current_space->local_used += nzk;
3085: current_space->local_remaining -= nzk;
3087: ui[k + 1] = ui[k] + nzk;
3088: }
3090: #if defined(PETSC_USE_INFO)
3091: if (ai[am] != 0) {
3092: PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3093: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3094: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3095: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3096: } else {
3097: PetscCall(PetscInfo(A, "Empty matrix\n"));
3098: }
3099: #endif
3101: PetscCall(ISRestoreIndices(perm, &rip));
3102: PetscCall(ISRestoreIndices(iperm, &riip));
3103: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3105: /* destroy list of free space and other temporary array(s) */
3106: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3107: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3108: PetscCall(PetscLLDestroy(lnk, lnkbt));
3110: /* put together the new matrix in MATSEQSBAIJ format */
3111: b = (Mat_SeqSBAIJ *)fact->data;
3112: b->free_ij = PETSC_TRUE;
3113: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3114: b->free_a = PETSC_TRUE;
3115: b->j = uj;
3116: b->i = ui;
3117: b->diag = NULL;
3118: b->ilen = NULL;
3119: b->imax = NULL;
3120: b->row = perm;
3121: b->col = perm;
3123: PetscCall(PetscObjectReference((PetscObject)perm));
3124: PetscCall(PetscObjectReference((PetscObject)perm));
3126: b->icol = iperm;
3127: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3129: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3130: b->maxnz = b->nz = ui[am];
3132: fact->info.factor_mallocs = reallocs;
3133: fact->info.fill_ratio_given = fill;
3134: if (ai[am] != 0) {
3135: fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3136: } else {
3137: fact->info.fill_ratio_needed = 0.0;
3138: }
3139: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3140: PetscFunctionReturn(PETSC_SUCCESS);
3141: }
3142: #endif
3144: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3145: {
3146: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3147: PetscInt n = A->rmap->n;
3148: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3149: PetscScalar *x, sum;
3150: const PetscScalar *b;
3151: const MatScalar *aa = a->a, *v;
3152: PetscInt i, nz;
3154: PetscFunctionBegin;
3155: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3157: PetscCall(VecGetArrayRead(bb, &b));
3158: PetscCall(VecGetArrayWrite(xx, &x));
3160: /* forward solve the lower triangular */
3161: x[0] = b[0];
3162: v = aa;
3163: vi = aj;
3164: for (i = 1; i < n; i++) {
3165: nz = ai[i + 1] - ai[i];
3166: sum = b[i];
3167: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3168: v += nz;
3169: vi += nz;
3170: x[i] = sum;
3171: }
3173: /* backward solve the upper triangular */
3174: for (i = n - 1; i >= 0; i--) {
3175: v = aa + adiag[i + 1] + 1;
3176: vi = aj + adiag[i + 1] + 1;
3177: nz = adiag[i] - adiag[i + 1] - 1;
3178: sum = x[i];
3179: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3180: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3181: }
3183: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3184: PetscCall(VecRestoreArrayRead(bb, &b));
3185: PetscCall(VecRestoreArrayWrite(xx, &x));
3186: PetscFunctionReturn(PETSC_SUCCESS);
3187: }
3189: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3190: {
3191: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3192: IS iscol = a->col, isrow = a->row;
3193: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3194: const PetscInt *rout, *cout, *r, *c;
3195: PetscScalar *x, *tmp, sum;
3196: const PetscScalar *b;
3197: const MatScalar *aa = a->a, *v;
3199: PetscFunctionBegin;
3200: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3202: PetscCall(VecGetArrayRead(bb, &b));
3203: PetscCall(VecGetArrayWrite(xx, &x));
3204: tmp = a->solve_work;
3206: PetscCall(ISGetIndices(isrow, &rout));
3207: r = rout;
3208: PetscCall(ISGetIndices(iscol, &cout));
3209: c = cout;
3211: /* forward solve the lower triangular */
3212: tmp[0] = b[r[0]];
3213: v = aa;
3214: vi = aj;
3215: for (i = 1; i < n; i++) {
3216: nz = ai[i + 1] - ai[i];
3217: sum = b[r[i]];
3218: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3219: tmp[i] = sum;
3220: v += nz;
3221: vi += nz;
3222: }
3224: /* backward solve the upper triangular */
3225: for (i = n - 1; i >= 0; i--) {
3226: v = aa + adiag[i + 1] + 1;
3227: vi = aj + adiag[i + 1] + 1;
3228: nz = adiag[i] - adiag[i + 1] - 1;
3229: sum = tmp[i];
3230: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3231: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3232: }
3234: PetscCall(ISRestoreIndices(isrow, &rout));
3235: PetscCall(ISRestoreIndices(iscol, &cout));
3236: PetscCall(VecRestoreArrayRead(bb, &b));
3237: PetscCall(VecRestoreArrayWrite(xx, &x));
3238: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3239: PetscFunctionReturn(PETSC_SUCCESS);
3240: }
3242: #if 0
3243: // unused
3244: /*
3245: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3246: */
3247: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3248: {
3249: Mat B = *fact;
3250: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3251: IS isicol;
3252: const PetscInt *r, *ic;
3253: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3254: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3255: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3256: PetscInt nlnk, *lnk;
3257: PetscBT lnkbt;
3258: PetscBool row_identity, icol_identity;
3259: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3260: const PetscInt *ics;
3261: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3262: PetscReal dt = info->dt, shift = info->shiftamount;
3263: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3264: PetscBool missing;
3266: PetscFunctionBegin;
3267: if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3268: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3270: /* symbolic factorization, can be reused */
3271: PetscCall(MatMissingDiagonal(A, &missing, &i));
3272: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3273: adiag = a->diag;
3275: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3277: /* bdiag is location of diagonal in factor */
3278: PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */
3279: PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3281: /* allocate row pointers bi */
3282: PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
3284: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3285: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3286: nnz_max = ai[n] + 2 * n * dtcount + 2;
3288: PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3289: PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3291: /* put together the new matrix */
3292: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3293: b = (Mat_SeqAIJ *)B->data;
3294: b->free_a = PETSC_TRUE;
3295: b->free_ij = PETSC_TRUE;
3296: b->a = ba;
3297: b->j = bj;
3298: b->i = bi;
3299: b->diag = bdiag;
3300: b->ilen = NULL;
3301: b->imax = NULL;
3302: b->row = isrow;
3303: b->col = iscol;
3304: PetscCall(PetscObjectReference((PetscObject)isrow));
3305: PetscCall(PetscObjectReference((PetscObject)iscol));
3306: b->icol = isicol;
3308: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3309: b->maxnz = nnz_max;
3311: B->factortype = MAT_FACTOR_ILUDT;
3312: B->info.factor_mallocs = 0;
3313: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3314: /* end of symbolic factorization */
3316: PetscCall(ISGetIndices(isrow, &r));
3317: PetscCall(ISGetIndices(isicol, &ic));
3318: ics = ic;
3320: /* linked list for storing column indices of the active row */
3321: nlnk = n + 1;
3322: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3324: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3325: PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3326: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3327: PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3328: PetscCall(PetscArrayzero(rtmp, n));
3330: bi[0] = 0;
3331: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3332: bdiag_rev[n] = bdiag[0];
3333: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3334: for (i = 0; i < n; i++) {
3335: /* copy initial fill into linked list */
3336: nzi = ai[r[i] + 1] - ai[r[i]];
3337: PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3338: nzi_al = adiag[r[i]] - ai[r[i]];
3339: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3340: ajtmp = aj + ai[r[i]];
3341: PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3343: /* load in initial (unfactored row) */
3344: aatmp = a->a + ai[r[i]];
3345: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3347: /* add pivot rows into linked list */
3348: row = lnk[n];
3349: while (row < i) {
3350: nzi_bl = bi[row + 1] - bi[row] + 1;
3351: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3352: PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3353: nzi += nlnk;
3354: row = lnk[row];
3355: }
3357: /* copy data from lnk into jtmp, then initialize lnk */
3358: PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3360: /* numerical factorization */
3361: bjtmp = jtmp;
3362: row = *bjtmp++; /* 1st pivot row */
3363: while (row < i) {
3364: pc = rtmp + row;
3365: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3366: multiplier = (*pc) * (*pv);
3367: *pc = multiplier;
3368: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3369: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3370: pv = ba + bdiag[row + 1] + 1;
3371: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3372: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3373: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3374: }
3375: row = *bjtmp++;
3376: }
3378: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3379: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3380: nzi_bl = 0;
3381: j = 0;
3382: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3383: vtmp[j] = rtmp[jtmp[j]];
3384: rtmp[jtmp[j]] = 0.0;
3385: nzi_bl++;
3386: j++;
3387: }
3388: nzi_bu = nzi - nzi_bl - 1;
3389: while (j < nzi) {
3390: vtmp[j] = rtmp[jtmp[j]];
3391: rtmp[jtmp[j]] = 0.0;
3392: j++;
3393: }
3395: bjtmp = bj + bi[i];
3396: batmp = ba + bi[i];
3397: /* apply level dropping rule to L part */
3398: ncut = nzi_al + dtcount;
3399: if (ncut < nzi_bl) {
3400: PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3401: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3402: } else {
3403: ncut = nzi_bl;
3404: }
3405: for (j = 0; j < ncut; j++) {
3406: bjtmp[j] = jtmp[j];
3407: batmp[j] = vtmp[j];
3408: }
3409: bi[i + 1] = bi[i] + ncut;
3410: nzi = ncut + 1;
3412: /* apply level dropping rule to U part */
3413: ncut = nzi_au + dtcount;
3414: if (ncut < nzi_bu) {
3415: PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3416: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3417: } else {
3418: ncut = nzi_bu;
3419: }
3420: nzi += ncut;
3422: /* mark bdiagonal */
3423: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3424: bdiag_rev[n - i - 1] = bdiag[i + 1];
3425: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3426: bjtmp = bj + bdiag[i];
3427: batmp = ba + bdiag[i];
3428: *bjtmp = i;
3429: *batmp = diag_tmp; /* rtmp[i]; */
3430: if (*batmp == 0.0) *batmp = dt + shift;
3431: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3433: bjtmp = bj + bdiag[i + 1] + 1;
3434: batmp = ba + bdiag[i + 1] + 1;
3435: for (k = 0; k < ncut; k++) {
3436: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3437: batmp[k] = vtmp[nzi_bl + 1 + k];
3438: }
3440: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3441: } /* for (i=0; i<n; i++) */
3442: PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3444: PetscCall(ISRestoreIndices(isrow, &r));
3445: PetscCall(ISRestoreIndices(isicol, &ic));
3447: PetscCall(PetscLLDestroy(lnk, lnkbt));
3448: PetscCall(PetscFree2(im, jtmp));
3449: PetscCall(PetscFree2(rtmp, vtmp));
3450: PetscCall(PetscFree(bdiag_rev));
3452: PetscCall(PetscLogFlops(B->cmap->n));
3453: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3455: PetscCall(ISIdentity(isrow, &row_identity));
3456: PetscCall(ISIdentity(isicol, &icol_identity));
3457: if (row_identity && icol_identity) {
3458: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3459: } else {
3460: B->ops->solve = MatSolve_SeqAIJ;
3461: }
3463: B->ops->solveadd = NULL;
3464: B->ops->solvetranspose = NULL;
3465: B->ops->solvetransposeadd = NULL;
3466: B->ops->matsolve = NULL;
3467: B->assembled = PETSC_TRUE;
3468: B->preallocated = PETSC_TRUE;
3469: PetscFunctionReturn(PETSC_SUCCESS);
3470: }
3472: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3473: /*
3474: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3475: */
3477: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3478: {
3479: PetscFunctionBegin;
3480: PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3481: PetscFunctionReturn(PETSC_SUCCESS);
3482: }
3484: /*
3485: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3486: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3487: */
3488: /*
3489: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3490: */
3492: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3493: {
3494: Mat C = fact;
3495: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3496: IS isrow = b->row, isicol = b->icol;
3497: const PetscInt *r, *ic, *ics;
3498: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3499: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3500: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3501: PetscReal dt = info->dt, shift = info->shiftamount;
3502: PetscBool row_identity, col_identity;
3504: PetscFunctionBegin;
3505: PetscCall(ISGetIndices(isrow, &r));
3506: PetscCall(ISGetIndices(isicol, &ic));
3507: PetscCall(PetscMalloc1(n + 1, &rtmp));
3508: ics = ic;
3510: for (i = 0; i < n; i++) {
3511: /* initialize rtmp array */
3512: nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3513: bjtmp = bj + bi[i];
3514: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3515: rtmp[i] = 0.0;
3516: nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3517: bjtmp = bj + bdiag[i + 1] + 1;
3518: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3520: /* load in initial unfactored row of A */
3521: nz = ai[r[i] + 1] - ai[r[i]];
3522: ajtmp = aj + ai[r[i]];
3523: v = aa + ai[r[i]];
3524: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3526: /* numerical factorization */
3527: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3528: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3529: k = 0;
3530: while (k < nzl) {
3531: row = *bjtmp++;
3532: pc = rtmp + row;
3533: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3534: multiplier = (*pc) * (*pv);
3535: *pc = multiplier;
3536: if (PetscAbsScalar(multiplier) > dt) {
3537: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3538: pv = b->a + bdiag[row + 1] + 1;
3539: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3540: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3541: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3542: }
3543: k++;
3544: }
3546: /* finished row so stick it into b->a */
3547: /* L-part */
3548: pv = b->a + bi[i];
3549: pj = bj + bi[i];
3550: nzl = bi[i + 1] - bi[i];
3551: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3553: /* diagonal: invert diagonal entries for simpler triangular solves */
3554: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3555: b->a[bdiag[i]] = 1.0 / rtmp[i];
3557: /* U-part */
3558: pv = b->a + bdiag[i + 1] + 1;
3559: pj = bj + bdiag[i + 1] + 1;
3560: nzu = bdiag[i] - bdiag[i + 1] - 1;
3561: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3562: }
3564: PetscCall(PetscFree(rtmp));
3565: PetscCall(ISRestoreIndices(isicol, &ic));
3566: PetscCall(ISRestoreIndices(isrow, &r));
3568: PetscCall(ISIdentity(isrow, &row_identity));
3569: PetscCall(ISIdentity(isicol, &col_identity));
3570: if (row_identity && col_identity) {
3571: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3572: } else {
3573: C->ops->solve = MatSolve_SeqAIJ;
3574: }
3575: C->ops->solveadd = NULL;
3576: C->ops->solvetranspose = NULL;
3577: C->ops->solvetransposeadd = NULL;
3578: C->ops->matsolve = NULL;
3579: C->assembled = PETSC_TRUE;
3580: C->preallocated = PETSC_TRUE;
3582: PetscCall(PetscLogFlops(C->cmap->n));
3583: PetscFunctionReturn(PETSC_SUCCESS);
3584: }
3585: #endif