Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: /*
7: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
9: This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll()
10: */
11: #if 0
12: PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat, MatOrderingType type, IS *irow, IS *icol)
13: {
14: Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->data;
15: PetscInt i, j, jj, k, kk, n = mat->rmap->n, current = 0, newcurrent = 0, *order;
16: const PetscInt *ai = a->i, *aj = a->j;
17: const PetscScalar *aa = a->a;
18: PetscBool *done;
19: PetscReal best, past = 0, future;
21: PetscFunctionBegin;
22: /* pick initial row */
23: best = -1;
24: for (i = 0; i < n; i++) {
25: future = 0.0;
26: for (j = ai[i]; j < ai[i + 1]; j++) {
27: if (aj[j] != i) future += PetscAbsScalar(aa[j]);
28: else past = PetscAbsScalar(aa[j]);
29: }
30: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
31: if (past / future > best) {
32: best = past / future;
33: current = i;
34: }
35: }
37: PetscCall(PetscMalloc1(n, &done));
38: PetscCall(PetscArrayzero(done, n));
39: PetscCall(PetscMalloc1(n, &order));
40: order[0] = current;
41: for (i = 0; i < n - 1; i++) {
42: done[current] = PETSC_TRUE;
43: best = -1;
44: /* loop over all neighbors of current pivot */
45: for (j = ai[current]; j < ai[current + 1]; j++) {
46: jj = aj[j];
47: if (done[jj]) continue;
48: /* loop over columns of potential next row computing weights for below and above diagonal */
49: past = future = 0.0;
50: for (k = ai[jj]; k < ai[jj + 1]; k++) {
51: kk = aj[k];
52: if (done[kk]) past += PetscAbsScalar(aa[k]);
53: else if (kk != jj) future += PetscAbsScalar(aa[k]);
54: }
55: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
56: if (past / future > best) {
57: best = past / future;
58: newcurrent = jj;
59: }
60: }
61: if (best == -1) { /* no neighbors to select from so select best of all that remain */
62: best = -1;
63: for (k = 0; k < n; k++) {
64: if (done[k]) continue;
65: future = 0.0;
66: past = 0.0;
67: for (j = ai[k]; j < ai[k + 1]; j++) {
68: kk = aj[j];
69: if (done[kk]) past += PetscAbsScalar(aa[j]);
70: else if (kk != k) future += PetscAbsScalar(aa[j]);
71: }
72: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
73: if (past / future > best) {
74: best = past / future;
75: newcurrent = k;
76: }
77: }
78: }
79: PetscCheck(current != newcurrent, PETSC_COMM_SELF, PETSC_ERR_PLIB, "newcurrent cannot be current");
80: current = newcurrent;
81: order[i + 1] = current;
82: }
83: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, order, PETSC_COPY_VALUES, irow));
84: *icol = *irow;
85: PetscCall(PetscObjectReference((PetscObject)*irow));
86: PetscCall(PetscFree(done));
87: PetscCall(PetscFree(order));
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
90: #endif
92: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
93: {
94: PetscFunctionBegin;
95: *type = MATSOLVERPETSC;
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
99: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
100: {
101: PetscInt n = A->rmap->n;
103: PetscFunctionBegin;
104: #if defined(PETSC_USE_COMPLEX)
105: if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
106: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
107: *B = NULL;
108: PetscFunctionReturn(PETSC_SUCCESS);
109: }
110: #endif
112: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
113: PetscCall(MatSetSizes(*B, n, n, n, n));
114: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
115: PetscCall(MatSetType(*B, MATSEQAIJ));
117: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
118: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
120: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
121: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
122: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
123: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
124: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
125: PetscCall(MatSetType(*B, MATSEQSBAIJ));
126: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
128: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
129: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
130: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
131: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
132: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
133: (*B)->factortype = ftype;
135: PetscCall(PetscFree((*B)->solvertype));
136: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
137: (*B)->canuseordering = PETSC_TRUE;
138: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: #if 0
143: // currently unused
144: static PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
145: {
146: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
147: IS isicol;
148: const PetscInt *r, *ic;
149: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j;
150: PetscInt *bi, *bj, *ajtmp;
151: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
152: PetscReal f;
153: PetscInt nlnk, *lnk, k, **bi_ptr;
154: PetscFreeSpaceList free_space = NULL, current_space = NULL;
155: PetscBT lnkbt;
156: PetscBool missing;
158: PetscFunctionBegin;
159: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
160: PetscCall(MatMissingDiagonal(A, &missing, &i));
161: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
163: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
164: PetscCall(ISGetIndices(isrow, &r));
165: PetscCall(ISGetIndices(isicol, &ic));
167: /* get new row pointers */
168: PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
169: bi[0] = 0;
171: /* bdiag is location of diagonal in factor */
172: PetscCall(PetscMalloc1(n + 1, &bdiag));
173: bdiag[0] = 0;
175: /* linked list for storing column indices of the active row */
176: nlnk = n + 1;
177: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
178: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
180: /* initial FreeSpace size is f*(ai[n]+1) */
181: f = info->fill;
182: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
183: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
184: current_space = free_space;
186: for (i = 0; i < n; i++) {
187: /* copy previous fill into linked list */
188: nzi = 0;
189: nnz = ai[r[i] + 1] - ai[r[i]];
190: ajtmp = aj + ai[r[i]];
191: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
192: nzi += nlnk;
194: /* add pivot rows into linked list */
195: row = lnk[n];
196: while (row < i) {
197: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
198: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
199: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
200: nzi += nlnk;
201: row = lnk[row];
202: }
203: bi[i + 1] = bi[i] + nzi;
204: im[i] = nzi;
206: /* mark bdiag */
207: nzbd = 0;
208: nnz = nzi;
209: k = lnk[n];
210: while (nnz-- && k < i) {
211: nzbd++;
212: k = lnk[k];
213: }
214: bdiag[i] = bi[i] + nzbd;
216: /* if free space is not available, make more free space */
217: if (current_space->local_remaining < nzi) {
218: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
219: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
220: reallocs++;
221: }
223: /* copy data into free space, then initialize lnk */
224: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
226: bi_ptr[i] = current_space->array;
227: current_space->array += nzi;
228: current_space->local_used += nzi;
229: current_space->local_remaining -= nzi;
230: }
231: #if defined(PETSC_USE_INFO)
232: if (ai[n] != 0) {
233: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
234: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
235: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
236: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
237: PetscCall(PetscInfo(A, "for best performance.\n"));
238: } else {
239: PetscCall(PetscInfo(A, "Empty matrix\n"));
240: }
241: #endif
243: PetscCall(ISRestoreIndices(isrow, &r));
244: PetscCall(ISRestoreIndices(isicol, &ic));
246: /* destroy list of free space and other temporary array(s) */
247: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
248: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
249: PetscCall(PetscLLDestroy(lnk, lnkbt));
250: PetscCall(PetscFree2(bi_ptr, im));
252: /* put together the new matrix */
253: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
254: b = (Mat_SeqAIJ *)B->data;
255: b->free_ij = PETSC_TRUE;
256: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
257: b->free_a = PETSC_TRUE;
258: b->j = bj;
259: b->i = bi;
260: b->diag = bdiag;
261: b->ilen = NULL;
262: b->imax = NULL;
263: b->row = isrow;
264: b->col = iscol;
265: PetscCall(PetscObjectReference((PetscObject)isrow));
266: PetscCall(PetscObjectReference((PetscObject)iscol));
267: b->icol = isicol;
268: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
270: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
271: b->maxnz = b->nz = bi[n];
273: B->factortype = MAT_FACTOR_LU;
274: B->info.factor_mallocs = reallocs;
275: B->info.fill_ratio_given = f;
277: if (ai[n]) {
278: B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
279: } else {
280: B->info.fill_ratio_needed = 0.0;
281: }
282: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
283: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
284: PetscFunctionReturn(PETSC_SUCCESS);
285: }
286: #endif
288: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
289: {
290: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
291: IS isicol;
292: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
293: PetscInt i, n = A->rmap->n;
294: PetscInt *bi, *bj;
295: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
296: PetscReal f;
297: PetscInt nlnk, *lnk, k, **bi_ptr;
298: PetscFreeSpaceList free_space = NULL, current_space = NULL;
299: PetscBT lnkbt;
300: PetscBool missing;
302: PetscFunctionBegin;
303: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
304: PetscCall(MatMissingDiagonal(A, &missing, &i));
305: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
307: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
308: PetscCall(ISGetIndices(isrow, &r));
309: PetscCall(ISGetIndices(isicol, &ic));
311: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
312: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
313: PetscCall(PetscMalloc1(n + 1, &bdiag));
314: bi[0] = bdiag[0] = 0;
316: /* linked list for storing column indices of the active row */
317: nlnk = n + 1;
318: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
320: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
322: /* initial FreeSpace size is f*(ai[n]+1) */
323: f = info->fill;
324: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
325: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
326: current_space = free_space;
328: for (i = 0; i < n; i++) {
329: /* copy previous fill into linked list */
330: nzi = 0;
331: nnz = ai[r[i] + 1] - ai[r[i]];
332: ajtmp = aj + ai[r[i]];
333: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
334: nzi += nlnk;
336: /* add pivot rows into linked list */
337: row = lnk[n];
338: while (row < i) {
339: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
340: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
341: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
342: nzi += nlnk;
343: row = lnk[row];
344: }
345: bi[i + 1] = bi[i] + nzi;
346: im[i] = nzi;
348: /* mark bdiag */
349: nzbd = 0;
350: nnz = nzi;
351: k = lnk[n];
352: while (nnz-- && k < i) {
353: nzbd++;
354: k = lnk[k];
355: }
356: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
358: /* if free space is not available, make more free space */
359: if (current_space->local_remaining < nzi) {
360: /* estimated additional space needed */
361: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
362: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
363: reallocs++;
364: }
366: /* copy data into free space, then initialize lnk */
367: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
369: bi_ptr[i] = current_space->array;
370: current_space->array += nzi;
371: current_space->local_used += nzi;
372: current_space->local_remaining -= nzi;
373: }
375: PetscCall(ISRestoreIndices(isrow, &r));
376: PetscCall(ISRestoreIndices(isicol, &ic));
378: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
379: PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
380: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
381: PetscCall(PetscLLDestroy(lnk, lnkbt));
382: PetscCall(PetscFree2(bi_ptr, im));
384: /* put together the new matrix */
385: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
386: b = (Mat_SeqAIJ *)B->data;
387: b->free_ij = PETSC_TRUE;
388: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
389: b->free_a = PETSC_TRUE;
390: b->j = bj;
391: b->i = bi;
392: b->diag = bdiag;
393: b->ilen = NULL;
394: b->imax = NULL;
395: b->row = isrow;
396: b->col = iscol;
397: PetscCall(PetscObjectReference((PetscObject)isrow));
398: PetscCall(PetscObjectReference((PetscObject)iscol));
399: b->icol = isicol;
400: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
402: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
403: b->maxnz = b->nz = bdiag[0] + 1;
405: B->factortype = MAT_FACTOR_LU;
406: B->info.factor_mallocs = reallocs;
407: B->info.fill_ratio_given = f;
409: if (ai[n]) {
410: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
411: } else {
412: B->info.fill_ratio_needed = 0.0;
413: }
414: #if defined(PETSC_USE_INFO)
415: if (ai[n] != 0) {
416: PetscReal af = B->info.fill_ratio_needed;
417: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
418: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
419: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
420: PetscCall(PetscInfo(A, "for best performance.\n"));
421: } else {
422: PetscCall(PetscInfo(A, "Empty matrix\n"));
423: }
424: #endif
425: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
426: if (a->inode.size) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
428: PetscFunctionReturn(PETSC_SUCCESS);
429: }
431: /*
432: Trouble in factorization, should we dump the original matrix?
433: */
434: PetscErrorCode MatFactorDumpMatrix(Mat A)
435: {
436: PetscBool flg = PETSC_FALSE;
438: PetscFunctionBegin;
439: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
440: if (flg) {
441: PetscViewer viewer;
442: char filename[PETSC_MAX_PATH_LEN];
444: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
445: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
446: PetscCall(MatView(A, viewer));
447: PetscCall(PetscViewerDestroy(&viewer));
448: }
449: PetscFunctionReturn(PETSC_SUCCESS);
450: }
452: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
453: {
454: Mat C = B;
455: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
456: IS isrow = b->row, isicol = b->icol;
457: const PetscInt *r, *ic, *ics;
458: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
459: PetscInt i, j, k, nz, nzL, row, *pj;
460: const PetscInt *ajtmp, *bjtmp;
461: MatScalar *rtmp, *pc, multiplier, *pv;
462: const MatScalar *aa, *v;
463: MatScalar *ba;
464: PetscBool row_identity, col_identity;
465: FactorShiftCtx sctx;
466: const PetscInt *ddiag;
467: PetscReal rs;
468: MatScalar d;
470: PetscFunctionBegin;
471: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
472: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
473: /* MatPivotSetUp(): initialize shift context sctx */
474: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
476: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
477: ddiag = a->diag;
478: sctx.shift_top = info->zeropivot;
479: for (i = 0; i < n; i++) {
480: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
481: d = (aa)[ddiag[i]];
482: rs = -PetscAbsScalar(d) - PetscRealPart(d);
483: v = aa + ai[i];
484: nz = ai[i + 1] - ai[i];
485: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
486: if (rs > sctx.shift_top) sctx.shift_top = rs;
487: }
488: sctx.shift_top *= 1.1;
489: sctx.nshift_max = 5;
490: sctx.shift_lo = 0.;
491: sctx.shift_hi = 1.;
492: }
494: PetscCall(ISGetIndices(isrow, &r));
495: PetscCall(ISGetIndices(isicol, &ic));
496: PetscCall(PetscMalloc1(n + 1, &rtmp));
497: ics = ic;
499: do {
500: sctx.newshift = PETSC_FALSE;
501: for (i = 0; i < n; i++) {
502: /* zero rtmp */
503: /* L part */
504: nz = bi[i + 1] - bi[i];
505: bjtmp = bj + bi[i];
506: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
508: /* U part */
509: nz = bdiag[i] - bdiag[i + 1];
510: bjtmp = bj + bdiag[i + 1] + 1;
511: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
513: /* load in initial (unfactored row) */
514: nz = ai[r[i] + 1] - ai[r[i]];
515: ajtmp = aj + ai[r[i]];
516: v = aa + ai[r[i]];
517: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
518: /* ZeropivotApply() */
519: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
521: /* elimination */
522: bjtmp = bj + bi[i];
523: row = *bjtmp++;
524: nzL = bi[i + 1] - bi[i];
525: for (k = 0; k < nzL; k++) {
526: pc = rtmp + row;
527: if (*pc != 0.0) {
528: pv = ba + bdiag[row];
529: multiplier = *pc * (*pv);
530: *pc = multiplier;
532: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
533: pv = ba + bdiag[row + 1] + 1;
534: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
536: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
537: PetscCall(PetscLogFlops(1 + 2.0 * nz));
538: }
539: row = *bjtmp++;
540: }
542: /* finished row so stick it into b->a */
543: rs = 0.0;
544: /* L part */
545: pv = ba + bi[i];
546: pj = b->j + bi[i];
547: nz = bi[i + 1] - bi[i];
548: for (j = 0; j < nz; j++) {
549: pv[j] = rtmp[pj[j]];
550: rs += PetscAbsScalar(pv[j]);
551: }
553: /* U part */
554: pv = ba + bdiag[i + 1] + 1;
555: pj = b->j + bdiag[i + 1] + 1;
556: nz = bdiag[i] - bdiag[i + 1] - 1;
557: for (j = 0; j < nz; j++) {
558: pv[j] = rtmp[pj[j]];
559: rs += PetscAbsScalar(pv[j]);
560: }
562: sctx.rs = rs;
563: sctx.pv = rtmp[i];
564: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
565: if (sctx.newshift) break; /* break for-loop */
566: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
568: /* Mark diagonal and invert diagonal for simpler triangular solves */
569: pv = ba + bdiag[i];
570: *pv = 1.0 / rtmp[i];
572: } /* endof for (i=0; i<n; i++) { */
574: /* MatPivotRefine() */
575: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
576: /*
577: * if no shift in this attempt & shifting & started shifting & can refine,
578: * then try lower shift
579: */
580: sctx.shift_hi = sctx.shift_fraction;
581: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
582: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
583: sctx.newshift = PETSC_TRUE;
584: sctx.nshift++;
585: }
586: } while (sctx.newshift);
588: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
589: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
591: PetscCall(PetscFree(rtmp));
592: PetscCall(ISRestoreIndices(isicol, &ic));
593: PetscCall(ISRestoreIndices(isrow, &r));
595: PetscCall(ISIdentity(isrow, &row_identity));
596: PetscCall(ISIdentity(isicol, &col_identity));
597: if (b->inode.size) {
598: C->ops->solve = MatSolve_SeqAIJ_Inode;
599: } else if (row_identity && col_identity) {
600: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
601: } else {
602: C->ops->solve = MatSolve_SeqAIJ;
603: }
604: C->ops->solveadd = MatSolveAdd_SeqAIJ;
605: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
606: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
607: C->ops->matsolve = MatMatSolve_SeqAIJ;
608: C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
609: C->assembled = PETSC_TRUE;
610: C->preallocated = PETSC_TRUE;
612: PetscCall(PetscLogFlops(C->cmap->n));
614: /* MatShiftView(A,info,&sctx) */
615: if (sctx.nshift) {
616: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
617: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
618: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
619: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
620: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
621: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
622: }
623: }
624: PetscFunctionReturn(PETSC_SUCCESS);
625: }
627: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
628: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
629: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
631: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
632: {
633: Mat C = B;
634: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
635: IS isrow = b->row, isicol = b->icol;
636: const PetscInt *r, *ic, *ics;
637: PetscInt nz, row, i, j, n = A->rmap->n, diag;
638: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
639: const PetscInt *ajtmp, *bjtmp, *diag_offset = b->diag, *pj;
640: MatScalar *pv, *rtmp, *pc, multiplier, d;
641: const MatScalar *v, *aa;
642: MatScalar *ba;
643: PetscReal rs = 0.0;
644: FactorShiftCtx sctx;
645: const PetscInt *ddiag;
646: PetscBool row_identity, col_identity;
648: PetscFunctionBegin;
649: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
650: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
651: /* MatPivotSetUp(): initialize shift context sctx */
652: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
654: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
655: ddiag = a->diag;
656: sctx.shift_top = info->zeropivot;
657: for (i = 0; i < n; i++) {
658: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
659: d = (aa)[ddiag[i]];
660: rs = -PetscAbsScalar(d) - PetscRealPart(d);
661: v = aa + ai[i];
662: nz = ai[i + 1] - ai[i];
663: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
664: if (rs > sctx.shift_top) sctx.shift_top = rs;
665: }
666: sctx.shift_top *= 1.1;
667: sctx.nshift_max = 5;
668: sctx.shift_lo = 0.;
669: sctx.shift_hi = 1.;
670: }
672: PetscCall(ISGetIndices(isrow, &r));
673: PetscCall(ISGetIndices(isicol, &ic));
674: PetscCall(PetscMalloc1(n + 1, &rtmp));
675: ics = ic;
677: do {
678: sctx.newshift = PETSC_FALSE;
679: for (i = 0; i < n; i++) {
680: nz = bi[i + 1] - bi[i];
681: bjtmp = bj + bi[i];
682: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
684: /* load in initial (unfactored row) */
685: nz = ai[r[i] + 1] - ai[r[i]];
686: ajtmp = aj + ai[r[i]];
687: v = aa + ai[r[i]];
688: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
689: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
691: row = *bjtmp++;
692: while (row < i) {
693: pc = rtmp + row;
694: if (*pc != 0.0) {
695: pv = ba + diag_offset[row];
696: pj = b->j + diag_offset[row] + 1;
697: multiplier = *pc / *pv++;
698: *pc = multiplier;
699: nz = bi[row + 1] - diag_offset[row] - 1;
700: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
701: PetscCall(PetscLogFlops(1 + 2.0 * nz));
702: }
703: row = *bjtmp++;
704: }
705: /* finished row so stick it into b->a */
706: pv = ba + bi[i];
707: pj = b->j + bi[i];
708: nz = bi[i + 1] - bi[i];
709: diag = diag_offset[i] - bi[i];
710: rs = 0.0;
711: for (j = 0; j < nz; j++) {
712: pv[j] = rtmp[pj[j]];
713: rs += PetscAbsScalar(pv[j]);
714: }
715: rs -= PetscAbsScalar(pv[diag]);
717: sctx.rs = rs;
718: sctx.pv = pv[diag];
719: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
720: if (sctx.newshift) break;
721: pv[diag] = sctx.pv;
722: }
724: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
725: /*
726: * if no shift in this attempt & shifting & started shifting & can refine,
727: * then try lower shift
728: */
729: sctx.shift_hi = sctx.shift_fraction;
730: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
731: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
732: sctx.newshift = PETSC_TRUE;
733: sctx.nshift++;
734: }
735: } while (sctx.newshift);
737: /* invert diagonal entries for simpler triangular solves */
738: for (i = 0; i < n; i++) ba[diag_offset[i]] = 1.0 / ba[diag_offset[i]];
739: PetscCall(PetscFree(rtmp));
740: PetscCall(ISRestoreIndices(isicol, &ic));
741: PetscCall(ISRestoreIndices(isrow, &r));
742: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
743: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
745: PetscCall(ISIdentity(isrow, &row_identity));
746: PetscCall(ISIdentity(isicol, &col_identity));
747: if (row_identity && col_identity) {
748: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
749: } else {
750: C->ops->solve = MatSolve_SeqAIJ_inplace;
751: }
752: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
753: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
754: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
755: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
756: C->ops->matsolvetranspose = NULL;
758: C->assembled = PETSC_TRUE;
759: C->preallocated = PETSC_TRUE;
761: PetscCall(PetscLogFlops(C->cmap->n));
762: if (sctx.nshift) {
763: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
764: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
765: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
766: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
767: }
768: }
769: C->ops->solve = MatSolve_SeqAIJ_inplace;
770: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
772: PetscCall(MatSeqAIJCheckInode(C));
773: PetscFunctionReturn(PETSC_SUCCESS);
774: }
776: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
778: /*
779: This routine implements inplace ILU(0) with row or/and column permutations.
780: Input:
781: A - original matrix
782: Output;
783: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
784: a->j (col index) is permuted by the inverse of colperm, then sorted
785: a->a reordered accordingly with a->j
786: a->diag (ptr to diagonal elements) is updated.
787: */
788: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
789: {
790: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
791: IS isrow = a->row, isicol = a->icol;
792: const PetscInt *r, *ic, *ics;
793: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
794: PetscInt *ajtmp, nz, row;
795: PetscInt *diag = a->diag, nbdiag, *pj;
796: PetscScalar *rtmp, *pc, multiplier, d;
797: MatScalar *pv, *v;
798: PetscReal rs;
799: FactorShiftCtx sctx;
800: MatScalar *aa, *vtmp;
802: PetscFunctionBegin;
803: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
805: PetscCall(MatSeqAIJGetArray(A, &aa));
806: /* MatPivotSetUp(): initialize shift context sctx */
807: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
809: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
810: const PetscInt *ddiag = a->diag;
811: sctx.shift_top = info->zeropivot;
812: for (i = 0; i < n; i++) {
813: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
814: d = (aa)[ddiag[i]];
815: rs = -PetscAbsScalar(d) - PetscRealPart(d);
816: vtmp = aa + ai[i];
817: nz = ai[i + 1] - ai[i];
818: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
819: if (rs > sctx.shift_top) sctx.shift_top = rs;
820: }
821: sctx.shift_top *= 1.1;
822: sctx.nshift_max = 5;
823: sctx.shift_lo = 0.;
824: sctx.shift_hi = 1.;
825: }
827: PetscCall(ISGetIndices(isrow, &r));
828: PetscCall(ISGetIndices(isicol, &ic));
829: PetscCall(PetscMalloc1(n + 1, &rtmp));
830: PetscCall(PetscArrayzero(rtmp, n + 1));
831: ics = ic;
833: #if defined(MV)
834: sctx.shift_top = 0.;
835: sctx.nshift_max = 0;
836: sctx.shift_lo = 0.;
837: sctx.shift_hi = 0.;
838: sctx.shift_fraction = 0.;
840: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
841: sctx.shift_top = 0.;
842: for (i = 0; i < n; i++) {
843: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
844: d = aa[diag[i]];
845: rs = -PetscAbsScalar(d) - PetscRealPart(d);
846: v = aa + ai[i];
847: nz = ai[i + 1] - ai[i];
848: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
849: if (rs > sctx.shift_top) sctx.shift_top = rs;
850: }
851: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
852: sctx.shift_top *= 1.1;
853: sctx.nshift_max = 5;
854: sctx.shift_lo = 0.;
855: sctx.shift_hi = 1.;
856: }
858: sctx.shift_amount = 0.;
859: sctx.nshift = 0;
860: #endif
862: do {
863: sctx.newshift = PETSC_FALSE;
864: for (i = 0; i < n; i++) {
865: /* load in initial unfactored row */
866: nz = ai[r[i] + 1] - ai[r[i]];
867: ajtmp = aj + ai[r[i]];
868: v = aa + ai[r[i]];
869: /* sort permuted ajtmp and values v accordingly */
870: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
871: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
873: diag[r[i]] = ai[r[i]];
874: for (j = 0; j < nz; j++) {
875: rtmp[ajtmp[j]] = v[j];
876: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
877: }
878: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
880: row = *ajtmp++;
881: while (row < i) {
882: pc = rtmp + row;
883: if (*pc != 0.0) {
884: pv = aa + diag[r[row]];
885: pj = aj + diag[r[row]] + 1;
887: multiplier = *pc / *pv++;
888: *pc = multiplier;
889: nz = ai[r[row] + 1] - diag[r[row]] - 1;
890: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
891: PetscCall(PetscLogFlops(1 + 2.0 * nz));
892: }
893: row = *ajtmp++;
894: }
895: /* finished row so overwrite it onto aa */
896: pv = aa + ai[r[i]];
897: pj = aj + ai[r[i]];
898: nz = ai[r[i] + 1] - ai[r[i]];
899: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
901: rs = 0.0;
902: for (j = 0; j < nz; j++) {
903: pv[j] = rtmp[pj[j]];
904: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
905: }
907: sctx.rs = rs;
908: sctx.pv = pv[nbdiag];
909: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
910: if (sctx.newshift) break;
911: pv[nbdiag] = sctx.pv;
912: }
914: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
915: /*
916: * if no shift in this attempt & shifting & started shifting & can refine,
917: * then try lower shift
918: */
919: sctx.shift_hi = sctx.shift_fraction;
920: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
921: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
922: sctx.newshift = PETSC_TRUE;
923: sctx.nshift++;
924: }
925: } while (sctx.newshift);
927: /* invert diagonal entries for simpler triangular solves */
928: for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
930: PetscCall(MatSeqAIJRestoreArray(A, &aa));
931: PetscCall(PetscFree(rtmp));
932: PetscCall(ISRestoreIndices(isicol, &ic));
933: PetscCall(ISRestoreIndices(isrow, &r));
935: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
936: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
937: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
938: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
940: A->assembled = PETSC_TRUE;
941: A->preallocated = PETSC_TRUE;
943: PetscCall(PetscLogFlops(A->cmap->n));
944: if (sctx.nshift) {
945: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
946: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
947: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
948: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
949: }
950: }
951: PetscFunctionReturn(PETSC_SUCCESS);
952: }
954: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
955: {
956: Mat C;
958: PetscFunctionBegin;
959: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
960: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
961: PetscCall(MatLUFactorNumeric(C, A, info));
963: A->ops->solve = C->ops->solve;
964: A->ops->solvetranspose = C->ops->solvetranspose;
966: PetscCall(MatHeaderMerge(A, &C));
967: PetscFunctionReturn(PETSC_SUCCESS);
968: }
970: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
971: {
972: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
973: IS iscol = a->col, isrow = a->row;
974: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
975: PetscInt nz;
976: const PetscInt *rout, *cout, *r, *c;
977: PetscScalar *x, *tmp, *tmps, sum;
978: const PetscScalar *b;
979: const MatScalar *aa, *v;
981: PetscFunctionBegin;
982: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
984: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
985: PetscCall(VecGetArrayRead(bb, &b));
986: PetscCall(VecGetArrayWrite(xx, &x));
987: tmp = a->solve_work;
989: PetscCall(ISGetIndices(isrow, &rout));
990: r = rout;
991: PetscCall(ISGetIndices(iscol, &cout));
992: c = cout + (n - 1);
994: /* forward solve the lower triangular */
995: tmp[0] = b[*r++];
996: tmps = tmp;
997: for (i = 1; i < n; i++) {
998: v = aa + ai[i];
999: vi = aj + ai[i];
1000: nz = a->diag[i] - ai[i];
1001: sum = b[*r++];
1002: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1003: tmp[i] = sum;
1004: }
1006: /* backward solve the upper triangular */
1007: for (i = n - 1; i >= 0; i--) {
1008: v = aa + a->diag[i] + 1;
1009: vi = aj + a->diag[i] + 1;
1010: nz = ai[i + 1] - a->diag[i] - 1;
1011: sum = tmp[i];
1012: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1013: x[*c--] = tmp[i] = sum * aa[a->diag[i]];
1014: }
1015: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1016: PetscCall(ISRestoreIndices(isrow, &rout));
1017: PetscCall(ISRestoreIndices(iscol, &cout));
1018: PetscCall(VecRestoreArrayRead(bb, &b));
1019: PetscCall(VecRestoreArrayWrite(xx, &x));
1020: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1021: PetscFunctionReturn(PETSC_SUCCESS);
1022: }
1024: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
1025: {
1026: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1027: IS iscol = a->col, isrow = a->row;
1028: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1029: PetscInt nz, neq, ldb, ldx;
1030: const PetscInt *rout, *cout, *r, *c;
1031: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
1032: const PetscScalar *b, *aa, *v;
1033: PetscBool isdense;
1035: PetscFunctionBegin;
1036: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1037: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1038: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1039: if (X != B) {
1040: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1041: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1042: }
1043: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1044: PetscCall(MatDenseGetArrayRead(B, &b));
1045: PetscCall(MatDenseGetLDA(B, &ldb));
1046: PetscCall(MatDenseGetArray(X, &x));
1047: PetscCall(MatDenseGetLDA(X, &ldx));
1048: PetscCall(ISGetIndices(isrow, &rout));
1049: r = rout;
1050: PetscCall(ISGetIndices(iscol, &cout));
1051: c = cout;
1052: for (neq = 0; neq < B->cmap->n; neq++) {
1053: /* forward solve the lower triangular */
1054: tmp[0] = b[r[0]];
1055: tmps = tmp;
1056: for (i = 1; i < n; i++) {
1057: v = aa + ai[i];
1058: vi = aj + ai[i];
1059: nz = a->diag[i] - ai[i];
1060: sum = b[r[i]];
1061: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1062: tmp[i] = sum;
1063: }
1064: /* backward solve the upper triangular */
1065: for (i = n - 1; i >= 0; i--) {
1066: v = aa + a->diag[i] + 1;
1067: vi = aj + a->diag[i] + 1;
1068: nz = ai[i + 1] - a->diag[i] - 1;
1069: sum = tmp[i];
1070: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1071: x[c[i]] = tmp[i] = sum * aa[a->diag[i]];
1072: }
1073: b += ldb;
1074: x += ldx;
1075: }
1076: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1077: PetscCall(ISRestoreIndices(isrow, &rout));
1078: PetscCall(ISRestoreIndices(iscol, &cout));
1079: PetscCall(MatDenseRestoreArrayRead(B, &b));
1080: PetscCall(MatDenseRestoreArray(X, &x));
1081: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1082: PetscFunctionReturn(PETSC_SUCCESS);
1083: }
1085: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
1086: {
1087: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1088: IS iscol = a->col, isrow = a->row;
1089: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1090: PetscInt nz, neq, ldb, ldx;
1091: const PetscInt *rout, *cout, *r, *c;
1092: PetscScalar *x, *tmp = a->solve_work, sum;
1093: const PetscScalar *b, *aa, *v;
1094: PetscBool isdense;
1096: PetscFunctionBegin;
1097: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1098: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1099: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1100: if (X != B) {
1101: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1102: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1103: }
1104: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1105: PetscCall(MatDenseGetArrayRead(B, &b));
1106: PetscCall(MatDenseGetLDA(B, &ldb));
1107: PetscCall(MatDenseGetArray(X, &x));
1108: PetscCall(MatDenseGetLDA(X, &ldx));
1109: PetscCall(ISGetIndices(isrow, &rout));
1110: r = rout;
1111: PetscCall(ISGetIndices(iscol, &cout));
1112: c = cout;
1113: for (neq = 0; neq < B->cmap->n; neq++) {
1114: /* forward solve the lower triangular */
1115: tmp[0] = b[r[0]];
1116: v = aa;
1117: vi = aj;
1118: for (i = 1; i < n; i++) {
1119: nz = ai[i + 1] - ai[i];
1120: sum = b[r[i]];
1121: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1122: tmp[i] = sum;
1123: v += nz;
1124: vi += nz;
1125: }
1126: /* backward solve the upper triangular */
1127: for (i = n - 1; i >= 0; i--) {
1128: v = aa + adiag[i + 1] + 1;
1129: vi = aj + adiag[i + 1] + 1;
1130: nz = adiag[i] - adiag[i + 1] - 1;
1131: sum = tmp[i];
1132: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
1133: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
1134: }
1135: b += ldb;
1136: x += ldx;
1137: }
1138: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1139: PetscCall(ISRestoreIndices(isrow, &rout));
1140: PetscCall(ISRestoreIndices(iscol, &cout));
1141: PetscCall(MatDenseRestoreArrayRead(B, &b));
1142: PetscCall(MatDenseRestoreArray(X, &x));
1143: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1144: PetscFunctionReturn(PETSC_SUCCESS);
1145: }
1147: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
1148: {
1149: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1150: IS iscol = a->col, isrow = a->row;
1151: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, j;
1152: PetscInt nz, neq, ldb, ldx;
1153: const PetscInt *rout, *cout, *r, *c;
1154: PetscScalar *x, *tmp = a->solve_work, s1;
1155: const PetscScalar *b, *aa, *v;
1156: PetscBool isdense;
1158: PetscFunctionBegin;
1159: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1160: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
1161: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
1162: if (X != B) {
1163: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
1164: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
1165: }
1166: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1167: PetscCall(MatDenseGetArrayRead(B, &b));
1168: PetscCall(MatDenseGetLDA(B, &ldb));
1169: PetscCall(MatDenseGetArray(X, &x));
1170: PetscCall(MatDenseGetLDA(X, &ldx));
1171: PetscCall(ISGetIndices(isrow, &rout));
1172: r = rout;
1173: PetscCall(ISGetIndices(iscol, &cout));
1174: c = cout;
1175: for (neq = 0; neq < B->cmap->n; neq++) {
1176: /* copy the b into temp work space according to permutation */
1177: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1179: /* forward solve the U^T */
1180: for (i = 0; i < n; i++) {
1181: v = aa + adiag[i + 1] + 1;
1182: vi = aj + adiag[i + 1] + 1;
1183: nz = adiag[i] - adiag[i + 1] - 1;
1184: s1 = tmp[i];
1185: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1186: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1187: tmp[i] = s1;
1188: }
1190: /* backward solve the L^T */
1191: for (i = n - 1; i >= 0; i--) {
1192: v = aa + ai[i];
1193: vi = aj + ai[i];
1194: nz = ai[i + 1] - ai[i];
1195: s1 = tmp[i];
1196: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1197: }
1199: /* copy tmp into x according to permutation */
1200: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1201: b += ldb;
1202: x += ldx;
1203: }
1204: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1205: PetscCall(ISRestoreIndices(isrow, &rout));
1206: PetscCall(ISRestoreIndices(iscol, &cout));
1207: PetscCall(MatDenseRestoreArrayRead(B, &b));
1208: PetscCall(MatDenseRestoreArray(X, &x));
1209: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
1210: PetscFunctionReturn(PETSC_SUCCESS);
1211: }
1213: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
1214: {
1215: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1216: IS iscol = a->col, isrow = a->row;
1217: const PetscInt *r, *c, *rout, *cout;
1218: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1219: PetscInt nz, row;
1220: PetscScalar *x, *tmp, *tmps, sum;
1221: const PetscScalar *b;
1222: const MatScalar *aa, *v;
1224: PetscFunctionBegin;
1225: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1227: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1228: PetscCall(VecGetArrayRead(bb, &b));
1229: PetscCall(VecGetArrayWrite(xx, &x));
1230: tmp = a->solve_work;
1232: PetscCall(ISGetIndices(isrow, &rout));
1233: r = rout;
1234: PetscCall(ISGetIndices(iscol, &cout));
1235: c = cout + (n - 1);
1237: /* forward solve the lower triangular */
1238: tmp[0] = b[*r++];
1239: tmps = tmp;
1240: for (row = 1; row < n; row++) {
1241: i = rout[row]; /* permuted row */
1242: v = aa + ai[i];
1243: vi = aj + ai[i];
1244: nz = a->diag[i] - ai[i];
1245: sum = b[*r++];
1246: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1247: tmp[row] = sum;
1248: }
1250: /* backward solve the upper triangular */
1251: for (row = n - 1; row >= 0; row--) {
1252: i = rout[row]; /* permuted row */
1253: v = aa + a->diag[i] + 1;
1254: vi = aj + a->diag[i] + 1;
1255: nz = ai[i + 1] - a->diag[i] - 1;
1256: sum = tmp[row];
1257: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1258: x[*c--] = tmp[row] = sum * aa[a->diag[i]];
1259: }
1260: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1261: PetscCall(ISRestoreIndices(isrow, &rout));
1262: PetscCall(ISRestoreIndices(iscol, &cout));
1263: PetscCall(VecRestoreArrayRead(bb, &b));
1264: PetscCall(VecRestoreArrayWrite(xx, &x));
1265: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1266: PetscFunctionReturn(PETSC_SUCCESS);
1267: }
1269: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1270: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1271: {
1272: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1273: PetscInt n = A->rmap->n;
1274: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag;
1275: PetscScalar *x;
1276: const PetscScalar *b;
1277: const MatScalar *aa;
1278: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1279: PetscInt adiag_i, i, nz, ai_i;
1280: const PetscInt *vi;
1281: const MatScalar *v;
1282: PetscScalar sum;
1283: #endif
1285: PetscFunctionBegin;
1286: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1288: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1289: PetscCall(VecGetArrayRead(bb, &b));
1290: PetscCall(VecGetArrayWrite(xx, &x));
1292: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1293: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1294: #else
1295: /* forward solve the lower triangular */
1296: x[0] = b[0];
1297: for (i = 1; i < n; i++) {
1298: ai_i = ai[i];
1299: v = aa + ai_i;
1300: vi = aj + ai_i;
1301: nz = adiag[i] - ai_i;
1302: sum = b[i];
1303: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1304: x[i] = sum;
1305: }
1307: /* backward solve the upper triangular */
1308: for (i = n - 1; i >= 0; i--) {
1309: adiag_i = adiag[i];
1310: v = aa + adiag_i + 1;
1311: vi = aj + adiag_i + 1;
1312: nz = ai[i + 1] - adiag_i - 1;
1313: sum = x[i];
1314: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1315: x[i] = sum * aa[adiag_i];
1316: }
1317: #endif
1318: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1319: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1320: PetscCall(VecRestoreArrayRead(bb, &b));
1321: PetscCall(VecRestoreArrayWrite(xx, &x));
1322: PetscFunctionReturn(PETSC_SUCCESS);
1323: }
1325: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1326: {
1327: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1328: IS iscol = a->col, isrow = a->row;
1329: PetscInt i, n = A->rmap->n, j;
1330: PetscInt nz;
1331: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j;
1332: PetscScalar *x, *tmp, sum;
1333: const PetscScalar *b;
1334: const MatScalar *aa, *v;
1336: PetscFunctionBegin;
1337: if (yy != xx) PetscCall(VecCopy(yy, xx));
1339: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1340: PetscCall(VecGetArrayRead(bb, &b));
1341: PetscCall(VecGetArray(xx, &x));
1342: tmp = a->solve_work;
1344: PetscCall(ISGetIndices(isrow, &rout));
1345: r = rout;
1346: PetscCall(ISGetIndices(iscol, &cout));
1347: c = cout + (n - 1);
1349: /* forward solve the lower triangular */
1350: tmp[0] = b[*r++];
1351: for (i = 1; i < n; i++) {
1352: v = aa + ai[i];
1353: vi = aj + ai[i];
1354: nz = a->diag[i] - ai[i];
1355: sum = b[*r++];
1356: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1357: tmp[i] = sum;
1358: }
1360: /* backward solve the upper triangular */
1361: for (i = n - 1; i >= 0; i--) {
1362: v = aa + a->diag[i] + 1;
1363: vi = aj + a->diag[i] + 1;
1364: nz = ai[i + 1] - a->diag[i] - 1;
1365: sum = tmp[i];
1366: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1367: tmp[i] = sum * aa[a->diag[i]];
1368: x[*c--] += tmp[i];
1369: }
1371: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1372: PetscCall(ISRestoreIndices(isrow, &rout));
1373: PetscCall(ISRestoreIndices(iscol, &cout));
1374: PetscCall(VecRestoreArrayRead(bb, &b));
1375: PetscCall(VecRestoreArray(xx, &x));
1376: PetscCall(PetscLogFlops(2.0 * a->nz));
1377: PetscFunctionReturn(PETSC_SUCCESS);
1378: }
1380: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1381: {
1382: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1383: IS iscol = a->col, isrow = a->row;
1384: PetscInt i, n = A->rmap->n, j;
1385: PetscInt nz;
1386: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
1387: PetscScalar *x, *tmp, sum;
1388: const PetscScalar *b;
1389: const MatScalar *aa, *v;
1391: PetscFunctionBegin;
1392: if (yy != xx) PetscCall(VecCopy(yy, xx));
1394: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1395: PetscCall(VecGetArrayRead(bb, &b));
1396: PetscCall(VecGetArray(xx, &x));
1397: tmp = a->solve_work;
1399: PetscCall(ISGetIndices(isrow, &rout));
1400: r = rout;
1401: PetscCall(ISGetIndices(iscol, &cout));
1402: c = cout;
1404: /* forward solve the lower triangular */
1405: tmp[0] = b[r[0]];
1406: v = aa;
1407: vi = aj;
1408: for (i = 1; i < n; i++) {
1409: nz = ai[i + 1] - ai[i];
1410: sum = b[r[i]];
1411: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1412: tmp[i] = sum;
1413: v += nz;
1414: vi += nz;
1415: }
1417: /* backward solve the upper triangular */
1418: v = aa + adiag[n - 1];
1419: vi = aj + adiag[n - 1];
1420: for (i = n - 1; i >= 0; i--) {
1421: nz = adiag[i] - adiag[i + 1] - 1;
1422: sum = tmp[i];
1423: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1424: tmp[i] = sum * v[nz];
1425: x[c[i]] += tmp[i];
1426: v += nz + 1;
1427: vi += nz + 1;
1428: }
1430: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1431: PetscCall(ISRestoreIndices(isrow, &rout));
1432: PetscCall(ISRestoreIndices(iscol, &cout));
1433: PetscCall(VecRestoreArrayRead(bb, &b));
1434: PetscCall(VecRestoreArray(xx, &x));
1435: PetscCall(PetscLogFlops(2.0 * a->nz));
1436: PetscFunctionReturn(PETSC_SUCCESS);
1437: }
1439: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1440: {
1441: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1442: IS iscol = a->col, isrow = a->row;
1443: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1444: PetscInt i, n = A->rmap->n, j;
1445: PetscInt nz;
1446: PetscScalar *x, *tmp, s1;
1447: const MatScalar *aa, *v;
1448: const PetscScalar *b;
1450: PetscFunctionBegin;
1451: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1452: PetscCall(VecGetArrayRead(bb, &b));
1453: PetscCall(VecGetArrayWrite(xx, &x));
1454: tmp = a->solve_work;
1456: PetscCall(ISGetIndices(isrow, &rout));
1457: r = rout;
1458: PetscCall(ISGetIndices(iscol, &cout));
1459: c = cout;
1461: /* copy the b into temp work space according to permutation */
1462: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1464: /* forward solve the U^T */
1465: for (i = 0; i < n; i++) {
1466: v = aa + diag[i];
1467: vi = aj + diag[i] + 1;
1468: nz = ai[i + 1] - diag[i] - 1;
1469: s1 = tmp[i];
1470: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1471: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1472: tmp[i] = s1;
1473: }
1475: /* backward solve the L^T */
1476: for (i = n - 1; i >= 0; i--) {
1477: v = aa + diag[i] - 1;
1478: vi = aj + diag[i] - 1;
1479: nz = diag[i] - ai[i];
1480: s1 = tmp[i];
1481: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1482: }
1484: /* copy tmp into x according to permutation */
1485: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1487: PetscCall(ISRestoreIndices(isrow, &rout));
1488: PetscCall(ISRestoreIndices(iscol, &cout));
1489: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1490: PetscCall(VecRestoreArrayRead(bb, &b));
1491: PetscCall(VecRestoreArrayWrite(xx, &x));
1493: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1494: PetscFunctionReturn(PETSC_SUCCESS);
1495: }
1497: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1498: {
1499: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1500: IS iscol = a->col, isrow = a->row;
1501: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1502: PetscInt i, n = A->rmap->n, j;
1503: PetscInt nz;
1504: PetscScalar *x, *tmp, s1;
1505: const MatScalar *aa, *v;
1506: const PetscScalar *b;
1508: PetscFunctionBegin;
1509: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1510: PetscCall(VecGetArrayRead(bb, &b));
1511: PetscCall(VecGetArrayWrite(xx, &x));
1512: tmp = a->solve_work;
1514: PetscCall(ISGetIndices(isrow, &rout));
1515: r = rout;
1516: PetscCall(ISGetIndices(iscol, &cout));
1517: c = cout;
1519: /* copy the b into temp work space according to permutation */
1520: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1522: /* forward solve the U^T */
1523: for (i = 0; i < n; i++) {
1524: v = aa + adiag[i + 1] + 1;
1525: vi = aj + adiag[i + 1] + 1;
1526: nz = adiag[i] - adiag[i + 1] - 1;
1527: s1 = tmp[i];
1528: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1529: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1530: tmp[i] = s1;
1531: }
1533: /* backward solve the L^T */
1534: for (i = n - 1; i >= 0; i--) {
1535: v = aa + ai[i];
1536: vi = aj + ai[i];
1537: nz = ai[i + 1] - ai[i];
1538: s1 = tmp[i];
1539: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1540: }
1542: /* copy tmp into x according to permutation */
1543: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1545: PetscCall(ISRestoreIndices(isrow, &rout));
1546: PetscCall(ISRestoreIndices(iscol, &cout));
1547: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1548: PetscCall(VecRestoreArrayRead(bb, &b));
1549: PetscCall(VecRestoreArrayWrite(xx, &x));
1551: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1552: PetscFunctionReturn(PETSC_SUCCESS);
1553: }
1555: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1556: {
1557: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1558: IS iscol = a->col, isrow = a->row;
1559: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1560: PetscInt i, n = A->rmap->n, j;
1561: PetscInt nz;
1562: PetscScalar *x, *tmp, s1;
1563: const MatScalar *aa, *v;
1564: const PetscScalar *b;
1566: PetscFunctionBegin;
1567: if (zz != xx) PetscCall(VecCopy(zz, xx));
1568: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1569: PetscCall(VecGetArrayRead(bb, &b));
1570: PetscCall(VecGetArray(xx, &x));
1571: tmp = a->solve_work;
1573: PetscCall(ISGetIndices(isrow, &rout));
1574: r = rout;
1575: PetscCall(ISGetIndices(iscol, &cout));
1576: c = cout;
1578: /* copy the b into temp work space according to permutation */
1579: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1581: /* forward solve the U^T */
1582: for (i = 0; i < n; i++) {
1583: v = aa + diag[i];
1584: vi = aj + diag[i] + 1;
1585: nz = ai[i + 1] - diag[i] - 1;
1586: s1 = tmp[i];
1587: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1588: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1589: tmp[i] = s1;
1590: }
1592: /* backward solve the L^T */
1593: for (i = n - 1; i >= 0; i--) {
1594: v = aa + diag[i] - 1;
1595: vi = aj + diag[i] - 1;
1596: nz = diag[i] - ai[i];
1597: s1 = tmp[i];
1598: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1599: }
1601: /* copy tmp into x according to permutation */
1602: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1604: PetscCall(ISRestoreIndices(isrow, &rout));
1605: PetscCall(ISRestoreIndices(iscol, &cout));
1606: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1607: PetscCall(VecRestoreArrayRead(bb, &b));
1608: PetscCall(VecRestoreArray(xx, &x));
1610: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1611: PetscFunctionReturn(PETSC_SUCCESS);
1612: }
1614: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1615: {
1616: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1617: IS iscol = a->col, isrow = a->row;
1618: const PetscInt *rout, *cout, *r, *c, *adiag = a->diag, *ai = a->i, *aj = a->j, *vi;
1619: PetscInt i, n = A->rmap->n, j;
1620: PetscInt nz;
1621: PetscScalar *x, *tmp, s1;
1622: const MatScalar *aa, *v;
1623: const PetscScalar *b;
1625: PetscFunctionBegin;
1626: if (zz != xx) PetscCall(VecCopy(zz, xx));
1627: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1628: PetscCall(VecGetArrayRead(bb, &b));
1629: PetscCall(VecGetArray(xx, &x));
1630: tmp = a->solve_work;
1632: PetscCall(ISGetIndices(isrow, &rout));
1633: r = rout;
1634: PetscCall(ISGetIndices(iscol, &cout));
1635: c = cout;
1637: /* copy the b into temp work space according to permutation */
1638: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1640: /* forward solve the U^T */
1641: for (i = 0; i < n; i++) {
1642: v = aa + adiag[i + 1] + 1;
1643: vi = aj + adiag[i + 1] + 1;
1644: nz = adiag[i] - adiag[i + 1] - 1;
1645: s1 = tmp[i];
1646: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1647: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1648: tmp[i] = s1;
1649: }
1651: /* backward solve the L^T */
1652: for (i = n - 1; i >= 0; i--) {
1653: v = aa + ai[i];
1654: vi = aj + ai[i];
1655: nz = ai[i + 1] - ai[i];
1656: s1 = tmp[i];
1657: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1658: }
1660: /* copy tmp into x according to permutation */
1661: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1663: PetscCall(ISRestoreIndices(isrow, &rout));
1664: PetscCall(ISRestoreIndices(iscol, &cout));
1665: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1666: PetscCall(VecRestoreArrayRead(bb, &b));
1667: PetscCall(VecRestoreArray(xx, &x));
1669: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1670: PetscFunctionReturn(PETSC_SUCCESS);
1671: }
1673: /*
1674: ilu() under revised new data structure.
1675: Factored arrays bj and ba are stored as
1676: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1678: bi=fact->i is an array of size n+1, in which
1679: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1680: bi[n]: points to L(n-1,n-1)+1
1682: bdiag=fact->diag is an array of size n+1,in which
1683: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1684: bdiag[n]: points to entry of U(n-1,0)-1
1686: U(i,:) contains bdiag[i] as its last entry, i.e.,
1687: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1688: */
1689: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1690: {
1691: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1692: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag = a->diag;
1693: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1694: IS isicol;
1696: PetscFunctionBegin;
1697: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1698: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1699: b = (Mat_SeqAIJ *)fact->data;
1701: /* allocate matrix arrays for new data structure */
1702: PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscScalar), (void **)&b->a));
1703: PetscCall(PetscShmgetAllocateArray(ai[n] + 1, sizeof(PetscInt), (void **)&b->j));
1704: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1705: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1706: b->free_a = PETSC_TRUE;
1707: b->free_ij = PETSC_TRUE;
1709: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1710: bdiag = b->diag;
1712: /* set bi and bj with new data structure */
1713: bi = b->i;
1714: bj = b->j;
1716: /* L part */
1717: bi[0] = 0;
1718: for (i = 0; i < n; i++) {
1719: nz = adiag[i] - ai[i];
1720: bi[i + 1] = bi[i] + nz;
1721: aj = a->j + ai[i];
1722: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1723: }
1725: /* U part */
1726: bdiag[n] = bi[n] - 1;
1727: for (i = n - 1; i >= 0; i--) {
1728: nz = ai[i + 1] - adiag[i] - 1;
1729: aj = a->j + adiag[i] + 1;
1730: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1731: /* diag[i] */
1732: bj[k++] = i;
1733: bdiag[i] = bdiag[i + 1] + nz + 1;
1734: }
1736: fact->factortype = MAT_FACTOR_ILU;
1737: fact->info.factor_mallocs = 0;
1738: fact->info.fill_ratio_given = info->fill;
1739: fact->info.fill_ratio_needed = 1.0;
1740: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1741: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1743: b = (Mat_SeqAIJ *)fact->data;
1744: b->row = isrow;
1745: b->col = iscol;
1746: b->icol = isicol;
1747: PetscCall(PetscMalloc1(fact->rmap->n + 1, &b->solve_work));
1748: PetscCall(PetscObjectReference((PetscObject)isrow));
1749: PetscCall(PetscObjectReference((PetscObject)iscol));
1750: PetscFunctionReturn(PETSC_SUCCESS);
1751: }
1753: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1754: {
1755: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1756: IS isicol;
1757: const PetscInt *r, *ic;
1758: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1759: PetscInt *bi, *cols, nnz, *cols_lvl;
1760: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1761: PetscInt i, levels, diagonal_fill;
1762: PetscBool col_identity, row_identity, missing;
1763: PetscReal f;
1764: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1765: PetscBT lnkbt;
1766: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1767: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1768: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1770: PetscFunctionBegin;
1771: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1772: PetscCall(MatMissingDiagonal(A, &missing, &i));
1773: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1775: levels = (PetscInt)info->levels;
1776: PetscCall(ISIdentity(isrow, &row_identity));
1777: PetscCall(ISIdentity(iscol, &col_identity));
1778: if (!levels && row_identity && col_identity) {
1779: /* special case: ilu(0) with natural ordering */
1780: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1781: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1782: PetscFunctionReturn(PETSC_SUCCESS);
1783: }
1785: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1786: PetscCall(ISGetIndices(isrow, &r));
1787: PetscCall(ISGetIndices(isicol, &ic));
1789: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1790: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1791: PetscCall(PetscMalloc1(n + 1, &bdiag));
1792: bi[0] = bdiag[0] = 0;
1793: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1795: /* create a linked list for storing column indices of the active row */
1796: nlnk = n + 1;
1797: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1799: /* initial FreeSpace size is f*(ai[n]+1) */
1800: f = info->fill;
1801: diagonal_fill = (PetscInt)info->diagonal_fill;
1802: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1803: current_space = free_space;
1804: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1805: current_space_lvl = free_space_lvl;
1806: for (i = 0; i < n; i++) {
1807: nzi = 0;
1808: /* copy current row into linked list */
1809: nnz = ai[r[i] + 1] - ai[r[i]];
1810: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1811: cols = aj + ai[r[i]];
1812: lnk[i] = -1; /* marker to indicate if diagonal exists */
1813: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1814: nzi += nlnk;
1816: /* make sure diagonal entry is included */
1817: if (diagonal_fill && lnk[i] == -1) {
1818: fm = n;
1819: while (lnk[fm] < i) fm = lnk[fm];
1820: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1821: lnk[fm] = i;
1822: lnk_lvl[i] = 0;
1823: nzi++;
1824: dcount++;
1825: }
1827: /* add pivot rows into the active row */
1828: nzbd = 0;
1829: prow = lnk[n];
1830: while (prow < i) {
1831: nnz = bdiag[prow];
1832: cols = bj_ptr[prow] + nnz + 1;
1833: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1834: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1835: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1836: nzi += nlnk;
1837: prow = lnk[prow];
1838: nzbd++;
1839: }
1840: bdiag[i] = nzbd;
1841: bi[i + 1] = bi[i] + nzi;
1842: /* if free space is not available, make more free space */
1843: if (current_space->local_remaining < nzi) {
1844: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1845: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1846: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1847: reallocs++;
1848: }
1850: /* copy data into free_space and free_space_lvl, then initialize lnk */
1851: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1852: bj_ptr[i] = current_space->array;
1853: bjlvl_ptr[i] = current_space_lvl->array;
1855: /* make sure the active row i has diagonal entry */
1856: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1858: current_space->array += nzi;
1859: current_space->local_used += nzi;
1860: current_space->local_remaining -= nzi;
1861: current_space_lvl->array += nzi;
1862: current_space_lvl->local_used += nzi;
1863: current_space_lvl->local_remaining -= nzi;
1864: }
1866: PetscCall(ISRestoreIndices(isrow, &r));
1867: PetscCall(ISRestoreIndices(isicol, &ic));
1868: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1869: PetscCall(PetscShmgetAllocateArray(bi[n] + 1, sizeof(PetscInt), (void **)&bj));
1870: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1872: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1873: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1874: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1876: #if defined(PETSC_USE_INFO)
1877: {
1878: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1879: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1880: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1881: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1882: PetscCall(PetscInfo(A, "for best performance.\n"));
1883: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1884: }
1885: #endif
1886: /* put together the new matrix */
1887: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1888: b = (Mat_SeqAIJ *)fact->data;
1889: b->free_ij = PETSC_TRUE;
1890: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1891: b->free_a = PETSC_TRUE;
1892: b->j = bj;
1893: b->i = bi;
1894: b->diag = bdiag;
1895: b->ilen = NULL;
1896: b->imax = NULL;
1897: b->row = isrow;
1898: b->col = iscol;
1899: PetscCall(PetscObjectReference((PetscObject)isrow));
1900: PetscCall(PetscObjectReference((PetscObject)iscol));
1901: b->icol = isicol;
1903: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
1904: /* In b structure: Free imax, ilen, old a, old j.
1905: Allocate bdiag, solve_work, new a, new j */
1906: b->maxnz = b->nz = bdiag[0] + 1;
1908: fact->info.factor_mallocs = reallocs;
1909: fact->info.fill_ratio_given = f;
1910: fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1911: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1912: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1913: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1914: PetscFunctionReturn(PETSC_SUCCESS);
1915: }
1917: #if 0
1918: // unused
1919: static PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920: {
1921: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1922: IS isicol;
1923: const PetscInt *r, *ic;
1924: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1925: PetscInt *bi, *cols, nnz, *cols_lvl;
1926: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1927: PetscInt i, levels, diagonal_fill;
1928: PetscBool col_identity, row_identity;
1929: PetscReal f;
1930: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1931: PetscBT lnkbt;
1932: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1933: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1934: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1935: PetscBool missing;
1937: PetscFunctionBegin;
1938: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1939: PetscCall(MatMissingDiagonal(A, &missing, &i));
1940: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
1942: f = info->fill;
1943: levels = (PetscInt)info->levels;
1944: diagonal_fill = (PetscInt)info->diagonal_fill;
1946: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1948: PetscCall(ISIdentity(isrow, &row_identity));
1949: PetscCall(ISIdentity(iscol, &col_identity));
1950: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1951: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_TRUE));
1953: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1954: if (a->inode.size) (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1955: fact->factortype = MAT_FACTOR_ILU;
1956: (fact)->info.factor_mallocs = 0;
1957: (fact)->info.fill_ratio_given = info->fill;
1958: (fact)->info.fill_ratio_needed = 1.0;
1960: b = (Mat_SeqAIJ *)fact->data;
1961: b->row = isrow;
1962: b->col = iscol;
1963: b->icol = isicol;
1964: PetscCall(PetscMalloc1((fact)->rmap->n + 1, &b->solve_work));
1965: PetscCall(PetscObjectReference((PetscObject)isrow));
1966: PetscCall(PetscObjectReference((PetscObject)iscol));
1967: PetscFunctionReturn(PETSC_SUCCESS);
1968: }
1970: PetscCall(ISGetIndices(isrow, &r));
1971: PetscCall(ISGetIndices(isicol, &ic));
1973: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1974: PetscCall(PetscShmgetAllocateArray(n + 1,sizeof(PetscInt),(void **)&bi));
1975: PetscCall(PetscMalloc1(n + 1, &bdiag));
1976: bi[0] = bdiag[0] = 0;
1978: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1980: /* create a linked list for storing column indices of the active row */
1981: nlnk = n + 1;
1982: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1984: /* initial FreeSpace size is f*(ai[n]+1) */
1985: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1986: current_space = free_space;
1987: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1988: current_space_lvl = free_space_lvl;
1990: for (i = 0; i < n; i++) {
1991: nzi = 0;
1992: /* copy current row into linked list */
1993: nnz = ai[r[i] + 1] - ai[r[i]];
1994: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1995: cols = aj + ai[r[i]];
1996: lnk[i] = -1; /* marker to indicate if diagonal exists */
1997: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1998: nzi += nlnk;
2000: /* make sure diagonal entry is included */
2001: if (diagonal_fill && lnk[i] == -1) {
2002: fm = n;
2003: while (lnk[fm] < i) fm = lnk[fm];
2004: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
2005: lnk[fm] = i;
2006: lnk_lvl[i] = 0;
2007: nzi++;
2008: dcount++;
2009: }
2011: /* add pivot rows into the active row */
2012: nzbd = 0;
2013: prow = lnk[n];
2014: while (prow < i) {
2015: nnz = bdiag[prow];
2016: cols = bj_ptr[prow] + nnz + 1;
2017: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
2018: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
2019: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
2020: nzi += nlnk;
2021: prow = lnk[prow];
2022: nzbd++;
2023: }
2024: bdiag[i] = nzbd;
2025: bi[i + 1] = bi[i] + nzi;
2027: /* if free space is not available, make more free space */
2028: if (current_space->local_remaining < nzi) {
2029: nnz = PetscIntMultTruncate(nzi, n - i); /* estimated and max additional space needed */
2030: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
2031: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
2032: reallocs++;
2033: }
2035: /* copy data into free_space and free_space_lvl, then initialize lnk */
2036: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2037: bj_ptr[i] = current_space->array;
2038: bjlvl_ptr[i] = current_space_lvl->array;
2040: /* make sure the active row i has diagonal entry */
2041: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
2043: current_space->array += nzi;
2044: current_space->local_used += nzi;
2045: current_space->local_remaining -= nzi;
2046: current_space_lvl->array += nzi;
2047: current_space_lvl->local_used += nzi;
2048: current_space_lvl->local_remaining -= nzi;
2049: }
2051: PetscCall(ISRestoreIndices(isrow, &r));
2052: PetscCall(ISRestoreIndices(isicol, &ic));
2054: /* destroy list of free space and other temporary arrays */
2055: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscInt),(void **)&bj));
2056: PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); /* copy free_space -> bj */
2057: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2058: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2059: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
2061: #if defined(PETSC_USE_INFO)
2062: {
2063: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2064: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
2065: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
2066: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
2067: PetscCall(PetscInfo(A, "for best performance.\n"));
2068: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
2069: }
2070: #endif
2072: /* put together the new matrix */
2073: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
2074: b = (Mat_SeqAIJ *)fact->data;
2075: b->free_ij = PETSC_TRUE;
2076: PetscCall(PetscShmgetAllocateArray(bi[n] + 1,sizeof(PetscScalar),(void **)&b->a));
2077: b->free_a = PETSC_TRUE;
2078: b->j = bj;
2079: b->i = bi;
2080: for (i = 0; i < n; i++) bdiag[i] += bi[i];
2081: b->diag = bdiag;
2082: b->ilen = NULL;
2083: b->imax = NULL;
2084: b->row = isrow;
2085: b->col = iscol;
2086: PetscCall(PetscObjectReference((PetscObject)isrow));
2087: PetscCall(PetscObjectReference((PetscObject)iscol));
2088: b->icol = isicol;
2089: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
2090: /* In b structure: Free imax, ilen, old a, old j.
2091: Allocate bdiag, solve_work, new a, new j */
2092: b->maxnz = b->nz = bi[n];
2094: fact->info.factor_mallocs = reallocs;
2095: fact->info.fill_ratio_given = f;
2096: fact->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
2097: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2098: if (a->inode.size) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2099: PetscFunctionReturn(PETSC_SUCCESS);
2100: }
2101: #endif
2103: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
2104: {
2105: Mat C = B;
2106: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2107: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2108: IS ip = b->row, iip = b->icol;
2109: const PetscInt *rip, *riip;
2110: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
2111: PetscInt *ai = a->i, *aj = a->j;
2112: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
2113: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
2114: PetscBool perm_identity;
2115: FactorShiftCtx sctx;
2116: PetscReal rs;
2117: const MatScalar *aa, *v;
2118: MatScalar d;
2120: PetscFunctionBegin;
2121: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2122: /* MatPivotSetUp(): initialize shift context sctx */
2123: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2125: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2126: sctx.shift_top = info->zeropivot;
2127: for (i = 0; i < mbs; i++) {
2128: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2129: d = (aa)[a->diag[i]];
2130: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2131: v = aa + ai[i];
2132: nz = ai[i + 1] - ai[i];
2133: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2134: if (rs > sctx.shift_top) sctx.shift_top = rs;
2135: }
2136: sctx.shift_top *= 1.1;
2137: sctx.nshift_max = 5;
2138: sctx.shift_lo = 0.;
2139: sctx.shift_hi = 1.;
2140: }
2142: PetscCall(ISGetIndices(ip, &rip));
2143: PetscCall(ISGetIndices(iip, &riip));
2145: /* allocate working arrays
2146: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2147: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2148: */
2149: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
2151: do {
2152: sctx.newshift = PETSC_FALSE;
2154: for (i = 0; i < mbs; i++) c2r[i] = mbs;
2155: if (mbs) il[0] = 0;
2157: for (k = 0; k < mbs; k++) {
2158: /* zero rtmp */
2159: nz = bi[k + 1] - bi[k];
2160: bjtmp = bj + bi[k];
2161: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2163: /* load in initial unfactored row */
2164: bval = ba + bi[k];
2165: jmin = ai[rip[k]];
2166: jmax = ai[rip[k] + 1];
2167: for (j = jmin; j < jmax; j++) {
2168: col = riip[aj[j]];
2169: if (col >= k) { /* only take upper triangular entry */
2170: rtmp[col] = aa[j];
2171: *bval++ = 0.0; /* for in-place factorization */
2172: }
2173: }
2174: /* shift the diagonal of the matrix: ZeropivotApply() */
2175: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2177: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2178: dk = rtmp[k];
2179: i = c2r[k]; /* first row to be added to k_th row */
2181: while (i < k) {
2182: nexti = c2r[i]; /* next row to be added to k_th row */
2184: /* compute multiplier, update diag(k) and U(i,k) */
2185: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2186: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
2187: dk += uikdi * ba[ili]; /* update diag[k] */
2188: ba[ili] = uikdi; /* -U(i,k) */
2190: /* add multiple of row i to k-th row */
2191: jmin = ili + 1;
2192: jmax = bi[i + 1];
2193: if (jmin < jmax) {
2194: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2195: /* update il and c2r for row i */
2196: il[i] = jmin;
2197: j = bj[jmin];
2198: c2r[i] = c2r[j];
2199: c2r[j] = i;
2200: }
2201: i = nexti;
2202: }
2204: /* copy data into U(k,:) */
2205: rs = 0.0;
2206: jmin = bi[k];
2207: jmax = bi[k + 1] - 1;
2208: if (jmin < jmax) {
2209: for (j = jmin; j < jmax; j++) {
2210: col = bj[j];
2211: ba[j] = rtmp[col];
2212: rs += PetscAbsScalar(ba[j]);
2213: }
2214: /* add the k-th row into il and c2r */
2215: il[k] = jmin;
2216: i = bj[jmin];
2217: c2r[k] = c2r[i];
2218: c2r[i] = k;
2219: }
2221: /* MatPivotCheck() */
2222: sctx.rs = rs;
2223: sctx.pv = dk;
2224: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
2225: if (sctx.newshift) break;
2226: dk = sctx.pv;
2228: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
2229: }
2230: } while (sctx.newshift);
2232: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2233: PetscCall(PetscFree3(rtmp, il, c2r));
2234: PetscCall(ISRestoreIndices(ip, &rip));
2235: PetscCall(ISRestoreIndices(iip, &riip));
2237: PetscCall(ISIdentity(ip, &perm_identity));
2238: if (perm_identity) {
2239: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2240: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2241: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2242: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2243: } else {
2244: B->ops->solve = MatSolve_SeqSBAIJ_1;
2245: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2246: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2247: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2248: }
2250: C->assembled = PETSC_TRUE;
2251: C->preallocated = PETSC_TRUE;
2253: PetscCall(PetscLogFlops(C->rmap->n));
2255: /* MatPivotView() */
2256: if (sctx.nshift) {
2257: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2258: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
2259: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2260: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2261: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
2262: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
2263: }
2264: }
2265: PetscFunctionReturn(PETSC_SUCCESS);
2266: }
2268: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
2269: {
2270: Mat C = B;
2271: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2272: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
2273: IS ip = b->row, iip = b->icol;
2274: const PetscInt *rip, *riip;
2275: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
2276: PetscInt *ai = a->i, *aj = a->j;
2277: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
2278: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
2279: const MatScalar *aa, *v;
2280: PetscBool perm_identity;
2281: FactorShiftCtx sctx;
2282: PetscReal rs;
2283: MatScalar d;
2285: PetscFunctionBegin;
2286: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2287: /* MatPivotSetUp(): initialize shift context sctx */
2288: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
2290: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2291: sctx.shift_top = info->zeropivot;
2292: for (i = 0; i < mbs; i++) {
2293: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2294: d = (aa)[a->diag[i]];
2295: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2296: v = aa + ai[i];
2297: nz = ai[i + 1] - ai[i];
2298: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
2299: if (rs > sctx.shift_top) sctx.shift_top = rs;
2300: }
2301: sctx.shift_top *= 1.1;
2302: sctx.nshift_max = 5;
2303: sctx.shift_lo = 0.;
2304: sctx.shift_hi = 1.;
2305: }
2307: PetscCall(ISGetIndices(ip, &rip));
2308: PetscCall(ISGetIndices(iip, &riip));
2310: /* initialization */
2311: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
2313: do {
2314: sctx.newshift = PETSC_FALSE;
2316: for (i = 0; i < mbs; i++) jl[i] = mbs;
2317: il[0] = 0;
2319: for (k = 0; k < mbs; k++) {
2320: /* zero rtmp */
2321: nz = bi[k + 1] - bi[k];
2322: bjtmp = bj + bi[k];
2323: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
2325: bval = ba + bi[k];
2326: /* initialize k-th row by the perm[k]-th row of A */
2327: jmin = ai[rip[k]];
2328: jmax = ai[rip[k] + 1];
2329: for (j = jmin; j < jmax; j++) {
2330: col = riip[aj[j]];
2331: if (col >= k) { /* only take upper triangular entry */
2332: rtmp[col] = aa[j];
2333: *bval++ = 0.0; /* for in-place factorization */
2334: }
2335: }
2336: /* shift the diagonal of the matrix */
2337: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2339: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2340: dk = rtmp[k];
2341: i = jl[k]; /* first row to be added to k_th row */
2343: while (i < k) {
2344: nexti = jl[i]; /* next row to be added to k_th row */
2346: /* compute multiplier, update diag(k) and U(i,k) */
2347: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2348: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
2349: dk += uikdi * ba[ili];
2350: ba[ili] = uikdi; /* -U(i,k) */
2352: /* add multiple of row i to k-th row */
2353: jmin = ili + 1;
2354: jmax = bi[i + 1];
2355: if (jmin < jmax) {
2356: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
2357: /* update il and jl for row i */
2358: il[i] = jmin;
2359: j = bj[jmin];
2360: jl[i] = jl[j];
2361: jl[j] = i;
2362: }
2363: i = nexti;
2364: }
2366: /* shift the diagonals when zero pivot is detected */
2367: /* compute rs=sum of abs(off-diagonal) */
2368: rs = 0.0;
2369: jmin = bi[k] + 1;
2370: nz = bi[k + 1] - jmin;
2371: bcol = bj + jmin;
2372: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
2374: sctx.rs = rs;
2375: sctx.pv = dk;
2376: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
2377: if (sctx.newshift) break;
2378: dk = sctx.pv;
2380: /* copy data into U(k,:) */
2381: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
2382: jmin = bi[k] + 1;
2383: jmax = bi[k + 1];
2384: if (jmin < jmax) {
2385: for (j = jmin; j < jmax; j++) {
2386: col = bj[j];
2387: ba[j] = rtmp[col];
2388: }
2389: /* add the k-th row into il and jl */
2390: il[k] = jmin;
2391: i = bj[jmin];
2392: jl[k] = jl[i];
2393: jl[i] = k;
2394: }
2395: }
2396: } while (sctx.newshift);
2398: PetscCall(PetscFree3(rtmp, il, jl));
2399: PetscCall(ISRestoreIndices(ip, &rip));
2400: PetscCall(ISRestoreIndices(iip, &riip));
2401: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2403: PetscCall(ISIdentity(ip, &perm_identity));
2404: if (perm_identity) {
2405: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2406: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2407: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2408: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2409: } else {
2410: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2411: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2412: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2413: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2414: }
2416: C->assembled = PETSC_TRUE;
2417: C->preallocated = PETSC_TRUE;
2419: PetscCall(PetscLogFlops(C->rmap->n));
2420: if (sctx.nshift) {
2421: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2422: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2423: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2424: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2425: }
2426: }
2427: PetscFunctionReturn(PETSC_SUCCESS);
2428: }
2430: /*
2431: icc() under revised new data structure.
2432: Factored arrays bj and ba are stored as
2433: U(0,:),...,U(i,:),U(n-1,:)
2435: ui=fact->i is an array of size n+1, in which
2436: ui+
2437: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2438: ui[n]: points to U(n-1,n-1)+1
2440: udiag=fact->diag is an array of size n,in which
2441: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2443: U(i,:) contains udiag[i] as its last entry, i.e.,
2444: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2445: */
2447: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2448: {
2449: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2450: Mat_SeqSBAIJ *b;
2451: PetscBool perm_identity, missing;
2452: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2453: const PetscInt *rip, *riip;
2454: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2455: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2456: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2457: PetscReal fill = info->fill, levels = info->levels;
2458: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2459: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2460: PetscBT lnkbt;
2461: IS iperm;
2463: PetscFunctionBegin;
2464: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2465: PetscCall(MatMissingDiagonal(A, &missing, &d));
2466: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2467: PetscCall(ISIdentity(perm, &perm_identity));
2468: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2470: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2471: PetscCall(PetscMalloc1(am + 1, &udiag));
2472: ui[0] = 0;
2474: /* ICC(0) without matrix ordering: simply rearrange column indices */
2475: if (!levels && perm_identity) {
2476: for (i = 0; i < am; i++) {
2477: ncols = ai[i + 1] - a->diag[i];
2478: ui[i + 1] = ui[i] + ncols;
2479: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2480: }
2481: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2482: cols = uj;
2483: for (i = 0; i < am; i++) {
2484: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2485: ncols = ai[i + 1] - a->diag[i] - 1;
2486: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2487: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2488: }
2489: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2490: PetscCall(ISGetIndices(iperm, &riip));
2491: PetscCall(ISGetIndices(perm, &rip));
2493: /* initialization */
2494: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2496: /* jl: linked list for storing indices of the pivot rows
2497: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2498: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2499: for (i = 0; i < am; i++) {
2500: jl[i] = am;
2501: il[i] = 0;
2502: }
2504: /* create and initialize a linked list for storing column indices of the active row k */
2505: nlnk = am + 1;
2506: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2508: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2509: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2510: current_space = free_space;
2511: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2512: current_space_lvl = free_space_lvl;
2514: for (k = 0; k < am; k++) { /* for each active row k */
2515: /* initialize lnk by the column indices of row rip[k] of A */
2516: nzk = 0;
2517: ncols = ai[rip[k] + 1] - ai[rip[k]];
2518: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2519: ncols_upper = 0;
2520: for (j = 0; j < ncols; j++) {
2521: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2522: if (riip[i] >= k) { /* only take upper triangular entry */
2523: ajtmp[ncols_upper] = i;
2524: ncols_upper++;
2525: }
2526: }
2527: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2528: nzk += nlnk;
2530: /* update lnk by computing fill-in for each pivot row to be merged in */
2531: prow = jl[k]; /* 1st pivot row */
2533: while (prow < k) {
2534: nextprow = jl[prow];
2536: /* merge prow into k-th row */
2537: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2538: jmax = ui[prow + 1];
2539: ncols = jmax - jmin;
2540: i = jmin - ui[prow];
2541: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2542: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2543: j = *(uj - 1);
2544: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2545: nzk += nlnk;
2547: /* update il and jl for prow */
2548: if (jmin < jmax) {
2549: il[prow] = jmin;
2550: j = *cols;
2551: jl[prow] = jl[j];
2552: jl[j] = prow;
2553: }
2554: prow = nextprow;
2555: }
2557: /* if free space is not available, make more free space */
2558: if (current_space->local_remaining < nzk) {
2559: i = am - k + 1; /* num of unfactored rows */
2560: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2561: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2562: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2563: reallocs++;
2564: }
2566: /* copy data into free_space and free_space_lvl, then initialize lnk */
2567: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2568: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2570: /* add the k-th row into il and jl */
2571: if (nzk > 1) {
2572: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2573: jl[k] = jl[i];
2574: jl[i] = k;
2575: il[k] = ui[k] + 1;
2576: }
2577: uj_ptr[k] = current_space->array;
2578: uj_lvl_ptr[k] = current_space_lvl->array;
2580: current_space->array += nzk;
2581: current_space->local_used += nzk;
2582: current_space->local_remaining -= nzk;
2584: current_space_lvl->array += nzk;
2585: current_space_lvl->local_used += nzk;
2586: current_space_lvl->local_remaining -= nzk;
2588: ui[k + 1] = ui[k] + nzk;
2589: }
2591: PetscCall(ISRestoreIndices(perm, &rip));
2592: PetscCall(ISRestoreIndices(iperm, &riip));
2593: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2594: PetscCall(PetscFree(ajtmp));
2596: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2597: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2598: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2599: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2600: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2602: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2604: /* put together the new matrix in MATSEQSBAIJ format */
2605: b = (Mat_SeqSBAIJ *)fact->data;
2606: b->free_ij = PETSC_TRUE;
2607: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2608: b->free_a = PETSC_TRUE;
2609: b->j = uj;
2610: b->i = ui;
2611: b->diag = udiag;
2612: b->free_diag = PETSC_TRUE;
2613: b->ilen = NULL;
2614: b->imax = NULL;
2615: b->row = perm;
2616: b->col = perm;
2617: PetscCall(PetscObjectReference((PetscObject)perm));
2618: PetscCall(PetscObjectReference((PetscObject)perm));
2619: b->icol = iperm;
2620: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2622: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2624: b->maxnz = b->nz = ui[am];
2626: fact->info.factor_mallocs = reallocs;
2627: fact->info.fill_ratio_given = fill;
2628: if (ai[am] != 0) {
2629: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2630: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2631: } else {
2632: fact->info.fill_ratio_needed = 0.0;
2633: }
2634: #if defined(PETSC_USE_INFO)
2635: if (ai[am] != 0) {
2636: PetscReal af = fact->info.fill_ratio_needed;
2637: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2638: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2639: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2640: } else {
2641: PetscCall(PetscInfo(A, "Empty matrix\n"));
2642: }
2643: #endif
2644: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2645: PetscFunctionReturn(PETSC_SUCCESS);
2646: }
2648: #if 0
2649: // unused
2650: static PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2651: {
2652: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2653: Mat_SeqSBAIJ *b;
2654: PetscBool perm_identity, missing;
2655: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2656: const PetscInt *rip, *riip;
2657: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2658: PetscInt nlnk, *lnk, *lnk_lvl = NULL, d;
2659: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2660: PetscReal fill = info->fill, levels = info->levels;
2661: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2662: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2663: PetscBT lnkbt;
2664: IS iperm;
2666: PetscFunctionBegin;
2667: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2668: PetscCall(MatMissingDiagonal(A, &missing, &d));
2669: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
2670: PetscCall(ISIdentity(perm, &perm_identity));
2671: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2673: PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
2674: PetscCall(PetscMalloc1(am + 1, &udiag));
2675: ui[0] = 0;
2677: /* ICC(0) without matrix ordering: simply copies fill pattern */
2678: if (!levels && perm_identity) {
2679: for (i = 0; i < am; i++) {
2680: ui[i + 1] = ui[i] + ai[i + 1] - a->diag[i];
2681: udiag[i] = ui[i];
2682: }
2683: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2684: cols = uj;
2685: for (i = 0; i < am; i++) {
2686: aj = a->j + a->diag[i];
2687: ncols = ui[i + 1] - ui[i];
2688: for (j = 0; j < ncols; j++) *cols++ = *aj++;
2689: }
2690: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2691: PetscCall(ISGetIndices(iperm, &riip));
2692: PetscCall(ISGetIndices(perm, &rip));
2694: /* initialization */
2695: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2697: /* jl: linked list for storing indices of the pivot rows
2698: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2699: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2700: for (i = 0; i < am; i++) {
2701: jl[i] = am;
2702: il[i] = 0;
2703: }
2705: /* create and initialize a linked list for storing column indices of the active row k */
2706: nlnk = am + 1;
2707: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2709: /* initial FreeSpace size is fill*(ai[am]+1) */
2710: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
2711: current_space = free_space;
2712: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space_lvl));
2713: current_space_lvl = free_space_lvl;
2715: for (k = 0; k < am; k++) { /* for each active row k */
2716: /* initialize lnk by the column indices of row rip[k] of A */
2717: nzk = 0;
2718: ncols = ai[rip[k] + 1] - ai[rip[k]];
2719: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2720: ncols_upper = 0;
2721: for (j = 0; j < ncols; j++) {
2722: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2723: if (riip[i] >= k) { /* only take upper triangular entry */
2724: ajtmp[ncols_upper] = i;
2725: ncols_upper++;
2726: }
2727: }
2728: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2729: nzk += nlnk;
2731: /* update lnk by computing fill-in for each pivot row to be merged in */
2732: prow = jl[k]; /* 1st pivot row */
2734: while (prow < k) {
2735: nextprow = jl[prow];
2737: /* merge prow into k-th row */
2738: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2739: jmax = ui[prow + 1];
2740: ncols = jmax - jmin;
2741: i = jmin - ui[prow];
2742: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2743: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2744: j = *(uj - 1);
2745: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2746: nzk += nlnk;
2748: /* update il and jl for prow */
2749: if (jmin < jmax) {
2750: il[prow] = jmin;
2751: j = *cols;
2752: jl[prow] = jl[j];
2753: jl[j] = prow;
2754: }
2755: prow = nextprow;
2756: }
2758: /* if free space is not available, make more free space */
2759: if (current_space->local_remaining < nzk) {
2760: i = am - k + 1; /* num of unfactored rows */
2761: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2762: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2763: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2764: reallocs++;
2765: }
2767: /* copy data into free_space and free_space_lvl, then initialize lnk */
2768: PetscCheck(nzk, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2769: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2771: /* add the k-th row into il and jl */
2772: if (nzk > 1) {
2773: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2774: jl[k] = jl[i];
2775: jl[i] = k;
2776: il[k] = ui[k] + 1;
2777: }
2778: uj_ptr[k] = current_space->array;
2779: uj_lvl_ptr[k] = current_space_lvl->array;
2781: current_space->array += nzk;
2782: current_space->local_used += nzk;
2783: current_space->local_remaining -= nzk;
2785: current_space_lvl->array += nzk;
2786: current_space_lvl->local_used += nzk;
2787: current_space_lvl->local_remaining -= nzk;
2789: ui[k + 1] = ui[k] + nzk;
2790: }
2792: #if defined(PETSC_USE_INFO)
2793: if (ai[am] != 0) {
2794: PetscReal af = (PetscReal)ui[am] / ((PetscReal)ai[am]);
2795: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2796: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2797: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2798: } else {
2799: PetscCall(PetscInfo(A, "Empty matrix\n"));
2800: }
2801: #endif
2803: PetscCall(ISRestoreIndices(perm, &rip));
2804: PetscCall(ISRestoreIndices(iperm, &riip));
2805: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2806: PetscCall(PetscFree(ajtmp));
2808: /* destroy list of free space and other temporary array(s) */
2809: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
2810: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
2811: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2812: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2814: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2816: /* put together the new matrix in MATSEQSBAIJ format */
2818: b = (Mat_SeqSBAIJ *)fact->data;
2819: b->free_ij = PETSC_TRUE;
2820: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
2821: b->free_a = PETSC_TRUE;
2822: b->j = uj;
2823: b->i = ui;
2824: b->diag = udiag;
2825: b->free_diag = PETSC_TRUE;
2826: b->ilen = NULL;
2827: b->imax = NULL;
2828: b->row = perm;
2829: b->col = perm;
2831: PetscCall(PetscObjectReference((PetscObject)perm));
2832: PetscCall(PetscObjectReference((PetscObject)perm));
2834: b->icol = iperm;
2835: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2836: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2837: b->maxnz = b->nz = ui[am];
2839: fact->info.factor_mallocs = reallocs;
2840: fact->info.fill_ratio_given = fill;
2841: if (ai[am] != 0) {
2842: fact->info.fill_ratio_needed = ((PetscReal)ui[am]) / ((PetscReal)ai[am]);
2843: } else {
2844: fact->info.fill_ratio_needed = 0.0;
2845: }
2846: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2847: PetscFunctionReturn(PETSC_SUCCESS);
2848: }
2849: #endif
2851: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2852: {
2853: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2854: Mat_SeqSBAIJ *b;
2855: PetscBool perm_identity, missing;
2856: PetscReal fill = info->fill;
2857: const PetscInt *rip, *riip;
2858: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2859: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2860: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2861: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2862: PetscBT lnkbt;
2863: IS iperm;
2865: PetscFunctionBegin;
2866: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2867: PetscCall(MatMissingDiagonal(A, &missing, &i));
2868: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
2870: /* check whether perm is the identity mapping */
2871: PetscCall(ISIdentity(perm, &perm_identity));
2872: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2873: PetscCall(ISGetIndices(iperm, &riip));
2874: PetscCall(ISGetIndices(perm, &rip));
2876: /* initialization */
2877: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2878: PetscCall(PetscMalloc1(am + 1, &udiag));
2879: ui[0] = 0;
2881: /* jl: linked list for storing indices of the pivot rows
2882: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2883: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2884: for (i = 0; i < am; i++) {
2885: jl[i] = am;
2886: il[i] = 0;
2887: }
2889: /* create and initialize a linked list for storing column indices of the active row k */
2890: nlnk = am + 1;
2891: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2893: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2894: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2895: current_space = free_space;
2897: for (k = 0; k < am; k++) { /* for each active row k */
2898: /* initialize lnk by the column indices of row rip[k] of A */
2899: nzk = 0;
2900: ncols = ai[rip[k] + 1] - ai[rip[k]];
2901: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2902: ncols_upper = 0;
2903: for (j = 0; j < ncols; j++) {
2904: i = riip[*(aj + ai[rip[k]] + j)];
2905: if (i >= k) { /* only take upper triangular entry */
2906: cols[ncols_upper] = i;
2907: ncols_upper++;
2908: }
2909: }
2910: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2911: nzk += nlnk;
2913: /* update lnk by computing fill-in for each pivot row to be merged in */
2914: prow = jl[k]; /* 1st pivot row */
2916: while (prow < k) {
2917: nextprow = jl[prow];
2918: /* merge prow into k-th row */
2919: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2920: jmax = ui[prow + 1];
2921: ncols = jmax - jmin;
2922: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2923: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2924: nzk += nlnk;
2926: /* update il and jl for prow */
2927: if (jmin < jmax) {
2928: il[prow] = jmin;
2929: j = *uj_ptr;
2930: jl[prow] = jl[j];
2931: jl[j] = prow;
2932: }
2933: prow = nextprow;
2934: }
2936: /* if free space is not available, make more free space */
2937: if (current_space->local_remaining < nzk) {
2938: i = am - k + 1; /* num of unfactored rows */
2939: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2940: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2941: reallocs++;
2942: }
2944: /* copy data into free space, then initialize lnk */
2945: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2947: /* add the k-th row into il and jl */
2948: if (nzk > 1) {
2949: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2950: jl[k] = jl[i];
2951: jl[i] = k;
2952: il[k] = ui[k] + 1;
2953: }
2954: ui_ptr[k] = current_space->array;
2956: current_space->array += nzk;
2957: current_space->local_used += nzk;
2958: current_space->local_remaining -= nzk;
2960: ui[k + 1] = ui[k] + nzk;
2961: }
2963: PetscCall(ISRestoreIndices(perm, &rip));
2964: PetscCall(ISRestoreIndices(iperm, &riip));
2965: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2967: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2968: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2969: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2970: PetscCall(PetscLLDestroy(lnk, lnkbt));
2972: /* put together the new matrix in MATSEQSBAIJ format */
2973: b = (Mat_SeqSBAIJ *)fact->data;
2974: b->free_ij = PETSC_TRUE;
2975: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscScalar), (void **)&b->a));
2976: b->free_a = PETSC_TRUE;
2977: b->j = uj;
2978: b->i = ui;
2979: b->diag = udiag;
2980: b->free_diag = PETSC_TRUE;
2981: b->ilen = NULL;
2982: b->imax = NULL;
2983: b->row = perm;
2984: b->col = perm;
2986: PetscCall(PetscObjectReference((PetscObject)perm));
2987: PetscCall(PetscObjectReference((PetscObject)perm));
2989: b->icol = iperm;
2990: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2992: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
2994: b->maxnz = b->nz = ui[am];
2996: fact->info.factor_mallocs = reallocs;
2997: fact->info.fill_ratio_given = fill;
2998: if (ai[am] != 0) {
2999: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
3000: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
3001: } else {
3002: fact->info.fill_ratio_needed = 0.0;
3003: }
3004: #if defined(PETSC_USE_INFO)
3005: if (ai[am] != 0) {
3006: PetscReal af = fact->info.fill_ratio_needed;
3007: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3008: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3009: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3010: } else {
3011: PetscCall(PetscInfo(A, "Empty matrix\n"));
3012: }
3013: #endif
3014: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
3015: PetscFunctionReturn(PETSC_SUCCESS);
3016: }
3018: #if 0
3019: // unused
3020: static PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
3021: {
3022: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3023: Mat_SeqSBAIJ *b;
3024: PetscBool perm_identity, missing;
3025: PetscReal fill = info->fill;
3026: const PetscInt *rip, *riip;
3027: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
3028: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
3029: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr;
3030: PetscFreeSpaceList free_space = NULL, current_space = NULL;
3031: PetscBT lnkbt;
3032: IS iperm;
3034: PetscFunctionBegin;
3035: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
3036: PetscCall(MatMissingDiagonal(A, &missing, &i));
3037: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3039: /* check whether perm is the identity mapping */
3040: PetscCall(ISIdentity(perm, &perm_identity));
3041: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
3042: PetscCall(ISGetIndices(iperm, &riip));
3043: PetscCall(ISGetIndices(perm, &rip));
3045: /* initialization */
3046: PetscCall(PetscShmgetAllocateArray(am + 1,sizeof(PetscInt),(void **)&ui));
3047: ui[0] = 0;
3049: /* jl: linked list for storing indices of the pivot rows
3050: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
3051: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
3052: for (i = 0; i < am; i++) {
3053: jl[i] = am;
3054: il[i] = 0;
3055: }
3057: /* create and initialize a linked list for storing column indices of the active row k */
3058: nlnk = am + 1;
3059: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
3061: /* initial FreeSpace size is fill*(ai[am]+1) */
3062: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[am] + 1), &free_space));
3063: current_space = free_space;
3065: for (k = 0; k < am; k++) { /* for each active row k */
3066: /* initialize lnk by the column indices of row rip[k] of A */
3067: nzk = 0;
3068: ncols = ai[rip[k] + 1] - ai[rip[k]];
3069: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
3070: ncols_upper = 0;
3071: for (j = 0; j < ncols; j++) {
3072: i = riip[*(aj + ai[rip[k]] + j)];
3073: if (i >= k) { /* only take upper triangular entry */
3074: cols[ncols_upper] = i;
3075: ncols_upper++;
3076: }
3077: }
3078: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
3079: nzk += nlnk;
3081: /* update lnk by computing fill-in for each pivot row to be merged in */
3082: prow = jl[k]; /* 1st pivot row */
3084: while (prow < k) {
3085: nextprow = jl[prow];
3086: /* merge prow into k-th row */
3087: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3088: jmax = ui[prow + 1];
3089: ncols = jmax - jmin;
3090: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3091: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
3092: nzk += nlnk;
3094: /* update il and jl for prow */
3095: if (jmin < jmax) {
3096: il[prow] = jmin;
3097: j = *uj_ptr;
3098: jl[prow] = jl[j];
3099: jl[j] = prow;
3100: }
3101: prow = nextprow;
3102: }
3104: /* if free space is not available, make more free space */
3105: if (current_space->local_remaining < nzk) {
3106: i = am - k + 1; /* num of unfactored rows */
3107: i = PetscMin(i * nzk, i * (i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3108: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
3109: reallocs++;
3110: }
3112: /* copy data into free space, then initialize lnk */
3113: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
3115: /* add the k-th row into il and jl */
3116: if (nzk - 1 > 0) {
3117: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3118: jl[k] = jl[i];
3119: jl[i] = k;
3120: il[k] = ui[k] + 1;
3121: }
3122: ui_ptr[k] = current_space->array;
3124: current_space->array += nzk;
3125: current_space->local_used += nzk;
3126: current_space->local_remaining -= nzk;
3128: ui[k + 1] = ui[k] + nzk;
3129: }
3131: #if defined(PETSC_USE_INFO)
3132: if (ai[am] != 0) {
3133: PetscReal af = (PetscReal)ui[am] / (PetscReal)ai[am];
3134: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
3135: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3136: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
3137: } else {
3138: PetscCall(PetscInfo(A, "Empty matrix\n"));
3139: }
3140: #endif
3142: PetscCall(ISRestoreIndices(perm, &rip));
3143: PetscCall(ISRestoreIndices(iperm, &riip));
3144: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
3146: /* destroy list of free space and other temporary array(s) */
3147: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscInt),(void **)&uj));
3148: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
3149: PetscCall(PetscLLDestroy(lnk, lnkbt));
3151: /* put together the new matrix in MATSEQSBAIJ format */
3152: b = (Mat_SeqSBAIJ *)fact->data;
3153: b->free_ij = PETSC_TRUE;
3154: PetscCall(PetscShmgetAllocateArray(ui[am] + 1,sizeof(PetscScalar),(void **)&b->a));
3155: b->free_a = PETSC_TRUE;
3156: b->j = uj;
3157: b->i = ui;
3158: b->diag = NULL;
3159: b->ilen = NULL;
3160: b->imax = NULL;
3161: b->row = perm;
3162: b->col = perm;
3164: PetscCall(PetscObjectReference((PetscObject)perm));
3165: PetscCall(PetscObjectReference((PetscObject)perm));
3167: b->icol = iperm;
3168: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3170: PetscCall(PetscMalloc1(am + 1, &b->solve_work));
3171: b->maxnz = b->nz = ui[am];
3173: fact->info.factor_mallocs = reallocs;
3174: fact->info.fill_ratio_given = fill;
3175: if (ai[am] != 0) {
3176: fact->info.fill_ratio_needed = (PetscReal)ui[am] / (PetscReal)ai[am];
3177: } else {
3178: fact->info.fill_ratio_needed = 0.0;
3179: }
3180: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3181: PetscFunctionReturn(PETSC_SUCCESS);
3182: }
3183: #endif
3185: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
3186: {
3187: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3188: PetscInt n = A->rmap->n;
3189: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
3190: PetscScalar *x, sum;
3191: const PetscScalar *b;
3192: const MatScalar *aa, *v;
3193: PetscInt i, nz;
3195: PetscFunctionBegin;
3196: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3198: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3199: PetscCall(VecGetArrayRead(bb, &b));
3200: PetscCall(VecGetArrayWrite(xx, &x));
3202: /* forward solve the lower triangular */
3203: x[0] = b[0];
3204: v = aa;
3205: vi = aj;
3206: for (i = 1; i < n; i++) {
3207: nz = ai[i + 1] - ai[i];
3208: sum = b[i];
3209: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3210: v += nz;
3211: vi += nz;
3212: x[i] = sum;
3213: }
3215: /* backward solve the upper triangular */
3216: for (i = n - 1; i >= 0; i--) {
3217: v = aa + adiag[i + 1] + 1;
3218: vi = aj + adiag[i + 1] + 1;
3219: nz = adiag[i] - adiag[i + 1] - 1;
3220: sum = x[i];
3221: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
3222: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3223: }
3225: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3226: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3227: PetscCall(VecRestoreArrayRead(bb, &b));
3228: PetscCall(VecRestoreArrayWrite(xx, &x));
3229: PetscFunctionReturn(PETSC_SUCCESS);
3230: }
3232: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
3233: {
3234: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
3235: IS iscol = a->col, isrow = a->row;
3236: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
3237: const PetscInt *rout, *cout, *r, *c;
3238: PetscScalar *x, *tmp, sum;
3239: const PetscScalar *b;
3240: const MatScalar *aa, *v;
3242: PetscFunctionBegin;
3243: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
3245: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
3246: PetscCall(VecGetArrayRead(bb, &b));
3247: PetscCall(VecGetArrayWrite(xx, &x));
3248: tmp = a->solve_work;
3250: PetscCall(ISGetIndices(isrow, &rout));
3251: r = rout;
3252: PetscCall(ISGetIndices(iscol, &cout));
3253: c = cout;
3255: /* forward solve the lower triangular */
3256: tmp[0] = b[r[0]];
3257: v = aa;
3258: vi = aj;
3259: for (i = 1; i < n; i++) {
3260: nz = ai[i + 1] - ai[i];
3261: sum = b[r[i]];
3262: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3263: tmp[i] = sum;
3264: v += nz;
3265: vi += nz;
3266: }
3268: /* backward solve the upper triangular */
3269: for (i = n - 1; i >= 0; i--) {
3270: v = aa + adiag[i + 1] + 1;
3271: vi = aj + adiag[i + 1] + 1;
3272: nz = adiag[i] - adiag[i + 1] - 1;
3273: sum = tmp[i];
3274: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
3275: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
3276: }
3278: PetscCall(ISRestoreIndices(isrow, &rout));
3279: PetscCall(ISRestoreIndices(iscol, &cout));
3280: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
3281: PetscCall(VecRestoreArrayRead(bb, &b));
3282: PetscCall(VecRestoreArrayWrite(xx, &x));
3283: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
3284: PetscFunctionReturn(PETSC_SUCCESS);
3285: }
3287: #if 0
3288: // unused
3289: /*
3290: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3291: */
3292: static PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A, IS isrow, IS iscol, const MatFactorInfo *info, Mat *fact)
3293: {
3294: Mat B = *fact;
3295: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
3296: IS isicol;
3297: const PetscInt *r, *ic;
3298: PetscInt i, n = A->rmap->n, *ai = a->i, *aj = a->j, *ajtmp, *adiag;
3299: PetscInt *bi, *bj, *bdiag, *bdiag_rev;
3300: PetscInt row, nzi, nzi_bl, nzi_bu, *im, nzi_al, nzi_au;
3301: PetscInt nlnk, *lnk;
3302: PetscBT lnkbt;
3303: PetscBool row_identity, icol_identity;
3304: MatScalar *aatmp, *pv, *batmp, *ba, *rtmp, *pc, multiplier, *vtmp, diag_tmp;
3305: const PetscInt *ics;
3306: PetscInt j, nz, *pj, *bjtmp, k, ncut, *jtmp;
3307: PetscReal dt = info->dt, shift = info->shiftamount;
3308: PetscInt dtcount = (PetscInt)info->dtcount, nnz_max;
3309: PetscBool missing;
3311: PetscFunctionBegin;
3312: if (dt == (PetscReal)PETSC_DEFAULT) dt = 0.005;
3313: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5 * a->rmax);
3315: /* symbolic factorization, can be reused */
3316: PetscCall(MatMissingDiagonal(A, &missing, &i));
3317: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
3318: adiag = a->diag;
3320: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3322: /* bdiag is location of diagonal in factor */
3323: PetscCall(PetscMalloc1(n + 1, &bdiag)); /* becomes b->diag */
3324: PetscCall(PetscMalloc1(n + 1, &bdiag_rev)); /* temporary */
3326: /* allocate row pointers bi */
3327: PetscCall(PetscShmgetAllocateArray(2 * n + 2,sizeof(PetscInt),(void **)&bi));
3329: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3330: if (dtcount > n - 1) dtcount = n - 1; /* diagonal is excluded */
3331: nnz_max = ai[n] + 2 * n * dtcount + 2;
3333: PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscInt),(void **)&bj));
3334: PetscCall(PetscShmgetAllocateArray(nnz_max + 1,sizeof(PetscScalar),(void **)&ba));
3336: /* put together the new matrix */
3337: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
3338: b = (Mat_SeqAIJ *)B->data;
3339: b->free_a = PETSC_TRUE;
3340: b->free_ij = PETSC_TRUE;
3341: b->a = ba;
3342: b->j = bj;
3343: b->i = bi;
3344: b->diag = bdiag;
3345: b->ilen = NULL;
3346: b->imax = NULL;
3347: b->row = isrow;
3348: b->col = iscol;
3349: PetscCall(PetscObjectReference((PetscObject)isrow));
3350: PetscCall(PetscObjectReference((PetscObject)iscol));
3351: b->icol = isicol;
3353: PetscCall(PetscMalloc1(n + 1, &b->solve_work));
3354: b->maxnz = nnz_max;
3356: B->factortype = MAT_FACTOR_ILUDT;
3357: B->info.factor_mallocs = 0;
3358: B->info.fill_ratio_given = ((PetscReal)nnz_max) / ((PetscReal)ai[n]);
3359: /* end of symbolic factorization */
3361: PetscCall(ISGetIndices(isrow, &r));
3362: PetscCall(ISGetIndices(isicol, &ic));
3363: ics = ic;
3365: /* linked list for storing column indices of the active row */
3366: nlnk = n + 1;
3367: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
3369: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3370: PetscCall(PetscMalloc2(n, &im, n, &jtmp));
3371: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3372: PetscCall(PetscMalloc2(n, &rtmp, n, &vtmp));
3373: PetscCall(PetscArrayzero(rtmp, n));
3375: bi[0] = 0;
3376: bdiag[0] = nnz_max - 1; /* location of diag[0] in factor B */
3377: bdiag_rev[n] = bdiag[0];
3378: bi[2 * n + 1] = bdiag[0] + 1; /* endof bj and ba array */
3379: for (i = 0; i < n; i++) {
3380: /* copy initial fill into linked list */
3381: nzi = ai[r[i] + 1] - ai[r[i]];
3382: PetscCheck(nzi, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
3383: nzi_al = adiag[r[i]] - ai[r[i]];
3384: nzi_au = ai[r[i] + 1] - adiag[r[i]] - 1;
3385: ajtmp = aj + ai[r[i]];
3386: PetscCall(PetscLLAddPerm(nzi, ajtmp, ic, n, &nlnk, lnk, lnkbt));
3388: /* load in initial (unfactored row) */
3389: aatmp = a->a + ai[r[i]];
3390: for (j = 0; j < nzi; j++) rtmp[ics[*ajtmp++]] = *aatmp++;
3392: /* add pivot rows into linked list */
3393: row = lnk[n];
3394: while (row < i) {
3395: nzi_bl = bi[row + 1] - bi[row] + 1;
3396: bjtmp = bj + bdiag[row + 1] + 1; /* points to 1st column next to the diagonal in U */
3397: PetscCall(PetscLLAddSortedLU(bjtmp, row, &nlnk, lnk, lnkbt, i, nzi_bl, im));
3398: nzi += nlnk;
3399: row = lnk[row];
3400: }
3402: /* copy data from lnk into jtmp, then initialize lnk */
3403: PetscCall(PetscLLClean(n, n, nzi, lnk, jtmp, lnkbt));
3405: /* numerical factorization */
3406: bjtmp = jtmp;
3407: row = *bjtmp++; /* 1st pivot row */
3408: while (row < i) {
3409: pc = rtmp + row;
3410: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3411: multiplier = (*pc) * (*pv);
3412: *pc = multiplier;
3413: if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */
3414: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3415: pv = ba + bdiag[row + 1] + 1;
3416: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3417: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3418: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3419: }
3420: row = *bjtmp++;
3421: }
3423: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3424: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3425: nzi_bl = 0;
3426: j = 0;
3427: while (jtmp[j] < i) { /* Note: jtmp is sorted */
3428: vtmp[j] = rtmp[jtmp[j]];
3429: rtmp[jtmp[j]] = 0.0;
3430: nzi_bl++;
3431: j++;
3432: }
3433: nzi_bu = nzi - nzi_bl - 1;
3434: while (j < nzi) {
3435: vtmp[j] = rtmp[jtmp[j]];
3436: rtmp[jtmp[j]] = 0.0;
3437: j++;
3438: }
3440: bjtmp = bj + bi[i];
3441: batmp = ba + bi[i];
3442: /* apply level dropping rule to L part */
3443: ncut = nzi_al + dtcount;
3444: if (ncut < nzi_bl) {
3445: PetscCall(PetscSortSplit(ncut, nzi_bl, vtmp, jtmp));
3446: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp, vtmp));
3447: } else {
3448: ncut = nzi_bl;
3449: }
3450: for (j = 0; j < ncut; j++) {
3451: bjtmp[j] = jtmp[j];
3452: batmp[j] = vtmp[j];
3453: }
3454: bi[i + 1] = bi[i] + ncut;
3455: nzi = ncut + 1;
3457: /* apply level dropping rule to U part */
3458: ncut = nzi_au + dtcount;
3459: if (ncut < nzi_bu) {
3460: PetscCall(PetscSortSplit(ncut, nzi_bu, vtmp + nzi_bl + 1, jtmp + nzi_bl + 1));
3461: PetscCall(PetscSortIntWithScalarArray(ncut, jtmp + nzi_bl + 1, vtmp + nzi_bl + 1));
3462: } else {
3463: ncut = nzi_bu;
3464: }
3465: nzi += ncut;
3467: /* mark bdiagonal */
3468: bdiag[i + 1] = bdiag[i] - (ncut + 1);
3469: bdiag_rev[n - i - 1] = bdiag[i + 1];
3470: bi[2 * n - i] = bi[2 * n - i + 1] - (ncut + 1);
3471: bjtmp = bj + bdiag[i];
3472: batmp = ba + bdiag[i];
3473: *bjtmp = i;
3474: *batmp = diag_tmp; /* rtmp[i]; */
3475: if (*batmp == 0.0) *batmp = dt + shift;
3476: *batmp = 1.0 / (*batmp); /* invert diagonal entries for simpler triangular solves */
3478: bjtmp = bj + bdiag[i + 1] + 1;
3479: batmp = ba + bdiag[i + 1] + 1;
3480: for (k = 0; k < ncut; k++) {
3481: bjtmp[k] = jtmp[nzi_bl + 1 + k];
3482: batmp[k] = vtmp[nzi_bl + 1 + k];
3483: }
3485: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3486: } /* for (i=0; i<n; i++) */
3487: PetscCheck(bi[n] < bdiag[n], PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "end of L array %" PetscInt_FMT " cannot >= the beginning of U array %" PetscInt_FMT, bi[n], bdiag[n]);
3489: PetscCall(ISRestoreIndices(isrow, &r));
3490: PetscCall(ISRestoreIndices(isicol, &ic));
3492: PetscCall(PetscLLDestroy(lnk, lnkbt));
3493: PetscCall(PetscFree2(im, jtmp));
3494: PetscCall(PetscFree2(rtmp, vtmp));
3495: PetscCall(PetscFree(bdiag_rev));
3497: PetscCall(PetscLogFlops(B->cmap->n));
3498: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3500: PetscCall(ISIdentity(isrow, &row_identity));
3501: PetscCall(ISIdentity(isicol, &icol_identity));
3502: if (row_identity && icol_identity) {
3503: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3504: } else {
3505: B->ops->solve = MatSolve_SeqAIJ;
3506: }
3508: B->ops->solveadd = NULL;
3509: B->ops->solvetranspose = NULL;
3510: B->ops->solvetransposeadd = NULL;
3511: B->ops->matsolve = NULL;
3512: B->assembled = PETSC_TRUE;
3513: B->preallocated = PETSC_TRUE;
3514: PetscFunctionReturn(PETSC_SUCCESS);
3515: }
3517: /* a wrapper of MatILUDTFactor_SeqAIJ() */
3518: /*
3519: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3520: */
3522: static PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS row, IS col, const MatFactorInfo *info)
3523: {
3524: PetscFunctionBegin;
3525: PetscCall(MatILUDTFactor_SeqAIJ(A, row, col, info, &fact));
3526: PetscFunctionReturn(PETSC_SUCCESS);
3527: }
3529: /*
3530: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3531: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3532: */
3533: /*
3534: This will get a new name and become a variant of MatILUFactor_SeqAIJ() there is no longer separate functions in the matrix function table for dt factors
3535: */
3537: static PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact, Mat A, const MatFactorInfo *info)
3538: {
3539: Mat C = fact;
3540: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
3541: IS isrow = b->row, isicol = b->icol;
3542: const PetscInt *r, *ic, *ics;
3543: PetscInt i, j, k, n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
3544: PetscInt *ajtmp, *bjtmp, nz, nzl, nzu, row, *bdiag = b->diag, *pj;
3545: MatScalar *rtmp, *pc, multiplier, *v, *pv, *aa = a->a;
3546: PetscReal dt = info->dt, shift = info->shiftamount;
3547: PetscBool row_identity, col_identity;
3549: PetscFunctionBegin;
3550: PetscCall(ISGetIndices(isrow, &r));
3551: PetscCall(ISGetIndices(isicol, &ic));
3552: PetscCall(PetscMalloc1(n + 1, &rtmp));
3553: ics = ic;
3555: for (i = 0; i < n; i++) {
3556: /* initialize rtmp array */
3557: nzl = bi[i + 1] - bi[i]; /* num of nonzeros in L(i,:) */
3558: bjtmp = bj + bi[i];
3559: for (j = 0; j < nzl; j++) rtmp[*bjtmp++] = 0.0;
3560: rtmp[i] = 0.0;
3561: nzu = bdiag[i] - bdiag[i + 1]; /* num of nonzeros in U(i,:) */
3562: bjtmp = bj + bdiag[i + 1] + 1;
3563: for (j = 0; j < nzu; j++) rtmp[*bjtmp++] = 0.0;
3565: /* load in initial unfactored row of A */
3566: nz = ai[r[i] + 1] - ai[r[i]];
3567: ajtmp = aj + ai[r[i]];
3568: v = aa + ai[r[i]];
3569: for (j = 0; j < nz; j++) rtmp[ics[*ajtmp++]] = v[j];
3571: /* numerical factorization */
3572: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3573: nzl = bi[i + 1] - bi[i]; /* num of entries in L(i,:) */
3574: k = 0;
3575: while (k < nzl) {
3576: row = *bjtmp++;
3577: pc = rtmp + row;
3578: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3579: multiplier = (*pc) * (*pv);
3580: *pc = multiplier;
3581: if (PetscAbsScalar(multiplier) > dt) {
3582: pj = bj + bdiag[row + 1] + 1; /* point to 1st entry of U(row,:) */
3583: pv = b->a + bdiag[row + 1] + 1;
3584: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:), excluding diagonal */
3585: for (j = 0; j < nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3586: PetscCall(PetscLogFlops(1 + 2.0 * nz));
3587: }
3588: k++;
3589: }
3591: /* finished row so stick it into b->a */
3592: /* L-part */
3593: pv = b->a + bi[i];
3594: pj = bj + bi[i];
3595: nzl = bi[i + 1] - bi[i];
3596: for (j = 0; j < nzl; j++) pv[j] = rtmp[pj[j]];
3598: /* diagonal: invert diagonal entries for simpler triangular solves */
3599: if (rtmp[i] == 0.0) rtmp[i] = dt + shift;
3600: b->a[bdiag[i]] = 1.0 / rtmp[i];
3602: /* U-part */
3603: pv = b->a + bdiag[i + 1] + 1;
3604: pj = bj + bdiag[i + 1] + 1;
3605: nzu = bdiag[i] - bdiag[i + 1] - 1;
3606: for (j = 0; j < nzu; j++) pv[j] = rtmp[pj[j]];
3607: }
3609: PetscCall(PetscFree(rtmp));
3610: PetscCall(ISRestoreIndices(isicol, &ic));
3611: PetscCall(ISRestoreIndices(isrow, &r));
3613: PetscCall(ISIdentity(isrow, &row_identity));
3614: PetscCall(ISIdentity(isicol, &col_identity));
3615: if (row_identity && col_identity) {
3616: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3617: } else {
3618: C->ops->solve = MatSolve_SeqAIJ;
3619: }
3620: C->ops->solveadd = NULL;
3621: C->ops->solvetranspose = NULL;
3622: C->ops->solvetransposeadd = NULL;
3623: C->ops->matsolve = NULL;
3624: C->assembled = PETSC_TRUE;
3625: C->preallocated = PETSC_TRUE;
3627: PetscCall(PetscLogFlops(C->cmap->n));
3628: PetscFunctionReturn(PETSC_SUCCESS);
3629: }
3630: #endif