Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7: {
8: PetscFunctionBegin;
9: *type = MATSOLVERPETSC;
10: PetscFunctionReturn(PETSC_SUCCESS);
11: }
13: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14: {
15: PetscInt n = A->rmap->n;
17: PetscFunctionBegin;
18: if (PetscDefined(USE_COMPLEX) && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
19: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
20: *B = NULL;
21: PetscFunctionReturn(PETSC_SUCCESS);
22: }
24: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
25: PetscCall(MatSetSizes(*B, n, n, n, n));
26: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
27: PetscCall(MatSetType(*B, MATSEQAIJ));
29: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
30: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
32: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
33: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
34: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
35: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
36: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
37: PetscCall(MatSetType(*B, MATSEQSBAIJ));
38: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
40: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
41: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
42: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
43: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
44: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
45: (*B)->factortype = ftype;
47: PetscCall(PetscFree((*B)->solvertype));
48: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
49: (*B)->canuseordering = PETSC_TRUE;
50: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
51: PetscFunctionReturn(PETSC_SUCCESS);
52: }
54: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
55: {
56: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
57: IS isicol;
58: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
59: PetscInt i, n = A->rmap->n;
60: PetscInt *bi, *bj;
61: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
62: PetscReal f;
63: PetscInt nlnk, *lnk, k, **bi_ptr;
64: PetscFreeSpaceList free_space = NULL, current_space = NULL;
65: PetscBT lnkbt;
66: PetscBool diagDense;
68: PetscFunctionBegin;
69: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
70: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
71: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
73: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
74: PetscCall(ISGetIndices(isrow, &r));
75: PetscCall(ISGetIndices(isicol, &ic));
77: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
78: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
79: PetscCall(PetscMalloc1(n + 1, &bdiag));
80: bi[0] = bdiag[0] = 0;
82: /* linked list for storing column indices of the active row */
83: nlnk = n + 1;
84: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
86: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
88: /* initial FreeSpace size is f*(ai[n]+1) */
89: f = info->fill;
90: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
91: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
92: current_space = free_space;
94: for (i = 0; i < n; i++) {
95: /* copy previous fill into linked list */
96: nzi = 0;
97: nnz = ai[r[i] + 1] - ai[r[i]];
98: ajtmp = aj + ai[r[i]];
99: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
100: nzi += nlnk;
102: /* add pivot rows into linked list */
103: row = lnk[n];
104: while (row < i) {
105: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
106: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
107: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
108: nzi += nlnk;
109: row = lnk[row];
110: }
111: bi[i + 1] = bi[i] + nzi;
112: im[i] = nzi;
114: /* mark bdiag */
115: nzbd = 0;
116: nnz = nzi;
117: k = lnk[n];
118: while (nnz-- && k < i) {
119: nzbd++;
120: k = lnk[k];
121: }
122: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
124: /* if free space is not available, make more free space */
125: if (current_space->local_remaining < nzi) {
126: /* estimated additional space needed */
127: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
128: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
129: reallocs++;
130: }
132: /* copy data into free space, then initialize lnk */
133: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
135: bi_ptr[i] = current_space->array;
136: current_space->array += nzi;
137: current_space->local_used += nzi;
138: current_space->local_remaining -= nzi;
139: }
141: PetscCall(ISRestoreIndices(isrow, &r));
142: PetscCall(ISRestoreIndices(isicol, &ic));
144: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
145: PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
146: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
147: PetscCall(PetscLLDestroy(lnk, lnkbt));
148: PetscCall(PetscFree2(bi_ptr, im));
150: /* put together the new matrix */
151: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
152: b = (Mat_SeqAIJ *)B->data;
153: b->free_ij = PETSC_TRUE;
154: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
155: b->free_a = PETSC_TRUE;
156: b->j = bj;
157: b->i = bi;
158: b->diag = bdiag;
159: b->ilen = NULL;
160: b->imax = NULL;
161: b->row = isrow;
162: b->col = iscol;
163: PetscCall(PetscObjectReference((PetscObject)isrow));
164: PetscCall(PetscObjectReference((PetscObject)iscol));
165: b->icol = isicol;
166: PetscCall(PetscMalloc1(n, &b->solve_work));
168: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
169: b->maxnz = b->nz = bdiag[0] + 1;
171: B->factortype = MAT_FACTOR_LU;
172: B->info.factor_mallocs = reallocs;
173: B->info.fill_ratio_given = f;
175: if (ai[n]) {
176: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
177: } else {
178: B->info.fill_ratio_needed = 0.0;
179: }
180: #if defined(PETSC_USE_INFO)
181: if (ai[n] != 0) {
182: PetscReal af = B->info.fill_ratio_needed;
183: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
184: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
185: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
186: PetscCall(PetscInfo(A, "for best performance.\n"));
187: } else {
188: PetscCall(PetscInfo(A, "Empty matrix\n"));
189: }
190: #endif
191: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
192: if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
193: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
194: PetscFunctionReturn(PETSC_SUCCESS);
195: }
197: /*
198: Trouble in factorization, should we dump the original matrix?
199: */
200: PetscErrorCode MatFactorDumpMatrix(Mat A)
201: {
202: PetscBool flg = PETSC_FALSE;
204: PetscFunctionBegin;
205: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
206: if (flg) {
207: PetscViewer viewer;
208: char filename[PETSC_MAX_PATH_LEN];
210: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
211: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
212: PetscCall(MatView(A, viewer));
213: PetscCall(PetscViewerDestroy(&viewer));
214: }
215: PetscFunctionReturn(PETSC_SUCCESS);
216: }
218: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
219: {
220: Mat C = B;
221: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
222: IS isrow = b->row, isicol = b->icol;
223: const PetscInt *r, *ic, *ics;
224: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
225: PetscInt i, j, k, nz, nzL, row, *pj;
226: const PetscInt *ajtmp, *bjtmp;
227: MatScalar *rtmp, *pc, multiplier, *pv;
228: const MatScalar *aa, *v;
229: MatScalar *ba;
230: PetscBool row_identity, col_identity;
231: FactorShiftCtx sctx;
232: const PetscInt *ddiag;
233: PetscReal rs;
234: MatScalar d;
236: PetscFunctionBegin;
237: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
238: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
239: /* MatPivotSetUp(): initialize shift context sctx */
240: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
242: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
243: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
244: sctx.shift_top = info->zeropivot;
245: for (i = 0; i < n; i++) {
246: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
247: d = aa[ddiag[i]];
248: rs = -PetscAbsScalar(d) - PetscRealPart(d);
249: v = aa + ai[i];
250: nz = ai[i + 1] - ai[i];
251: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
252: if (rs > sctx.shift_top) sctx.shift_top = rs;
253: }
254: sctx.shift_top *= 1.1;
255: sctx.nshift_max = 5;
256: sctx.shift_lo = 0.;
257: sctx.shift_hi = 1.;
258: }
260: PetscCall(ISGetIndices(isrow, &r));
261: PetscCall(ISGetIndices(isicol, &ic));
262: PetscCall(PetscMalloc1(n + 1, &rtmp));
263: ics = ic;
265: do {
266: sctx.newshift = PETSC_FALSE;
267: for (i = 0; i < n; i++) {
268: /* zero rtmp */
269: /* L part */
270: nz = bi[i + 1] - bi[i];
271: bjtmp = bj + bi[i];
272: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
274: /* U part */
275: nz = bdiag[i] - bdiag[i + 1];
276: bjtmp = bj + bdiag[i + 1] + 1;
277: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
279: /* load in initial (unfactored row) */
280: nz = ai[r[i] + 1] - ai[r[i]];
281: ajtmp = aj + ai[r[i]];
282: v = aa + ai[r[i]];
283: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
284: /* ZeropivotApply() */
285: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
287: /* elimination */
288: bjtmp = bj + bi[i];
289: row = *bjtmp++;
290: nzL = bi[i + 1] - bi[i];
291: for (k = 0; k < nzL; k++) {
292: pc = rtmp + row;
293: if (*pc != 0.0) {
294: pv = ba + bdiag[row];
295: multiplier = *pc * (*pv);
296: *pc = multiplier;
298: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
299: pv = ba + bdiag[row + 1] + 1;
300: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
302: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
303: PetscCall(PetscLogFlops(1 + 2.0 * nz));
304: }
305: row = *bjtmp++;
306: }
308: /* finished row so stick it into b->a */
309: rs = 0.0;
310: /* L part */
311: pv = ba + bi[i];
312: pj = b->j + bi[i];
313: nz = bi[i + 1] - bi[i];
314: for (j = 0; j < nz; j++) {
315: pv[j] = rtmp[pj[j]];
316: rs += PetscAbsScalar(pv[j]);
317: }
319: /* U part */
320: pv = ba + bdiag[i + 1] + 1;
321: pj = b->j + bdiag[i + 1] + 1;
322: nz = bdiag[i] - bdiag[i + 1] - 1;
323: for (j = 0; j < nz; j++) {
324: pv[j] = rtmp[pj[j]];
325: rs += PetscAbsScalar(pv[j]);
326: }
328: sctx.rs = rs;
329: sctx.pv = rtmp[i];
330: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
331: if (sctx.newshift) break; /* break for-loop */
332: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
334: /* Mark diagonal and invert diagonal for simpler triangular solves */
335: pv = ba + bdiag[i];
336: *pv = 1.0 / rtmp[i];
338: } /* endof for (i=0; i<n; i++) { */
340: /* MatPivotRefine() */
341: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
342: /*
343: * if no shift in this attempt & shifting & started shifting & can refine,
344: * then try lower shift
345: */
346: sctx.shift_hi = sctx.shift_fraction;
347: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
348: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
349: sctx.newshift = PETSC_TRUE;
350: sctx.nshift++;
351: }
352: } while (sctx.newshift);
354: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
355: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
357: PetscCall(PetscFree(rtmp));
358: PetscCall(ISRestoreIndices(isicol, &ic));
359: PetscCall(ISRestoreIndices(isrow, &r));
361: PetscCall(ISIdentity(isrow, &row_identity));
362: PetscCall(ISIdentity(isicol, &col_identity));
363: if (b->inode.size_csr) {
364: C->ops->solve = MatSolve_SeqAIJ_Inode;
365: } else if (row_identity && col_identity) {
366: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
367: } else {
368: C->ops->solve = MatSolve_SeqAIJ;
369: }
370: C->ops->solveadd = MatSolveAdd_SeqAIJ;
371: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
372: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
373: C->ops->matsolve = MatMatSolve_SeqAIJ;
374: C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
375: C->assembled = PETSC_TRUE;
376: C->preallocated = PETSC_TRUE;
378: PetscCall(PetscLogFlops(C->cmap->n));
380: /* MatShiftView(A,info,&sctx) */
381: if (sctx.nshift) {
382: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
383: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
384: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
385: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
386: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
387: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
388: }
389: }
390: PetscFunctionReturn(PETSC_SUCCESS);
391: }
393: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
394: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
395: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
397: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
398: {
399: Mat C = B;
400: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
401: IS isrow = b->row, isicol = b->icol;
402: const PetscInt *r, *ic, *ics;
403: PetscInt nz, row, i, j, n = A->rmap->n, diag;
404: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
405: const PetscInt *ajtmp, *bjtmp, *ddiag, *pj;
406: MatScalar *pv, *rtmp, *pc, multiplier, d;
407: const MatScalar *v, *aa;
408: MatScalar *ba;
409: PetscReal rs = 0.0;
410: FactorShiftCtx sctx;
411: PetscBool row_identity, col_identity;
413: PetscFunctionBegin;
414: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
416: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
417: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
418: /* MatPivotSetUp(): initialize shift context sctx */
419: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
421: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
422: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
423: sctx.shift_top = info->zeropivot;
424: for (i = 0; i < n; i++) {
425: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
426: d = aa[ddiag[i]];
427: rs = -PetscAbsScalar(d) - PetscRealPart(d);
428: v = aa + ai[i];
429: nz = ai[i + 1] - ai[i];
430: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
431: if (rs > sctx.shift_top) sctx.shift_top = rs;
432: }
433: sctx.shift_top *= 1.1;
434: sctx.nshift_max = 5;
435: sctx.shift_lo = 0.;
436: sctx.shift_hi = 1.;
437: }
439: PetscCall(ISGetIndices(isrow, &r));
440: PetscCall(ISGetIndices(isicol, &ic));
441: PetscCall(PetscMalloc1(n + 1, &rtmp));
442: ics = ic;
444: do {
445: sctx.newshift = PETSC_FALSE;
446: for (i = 0; i < n; i++) {
447: nz = bi[i + 1] - bi[i];
448: bjtmp = bj + bi[i];
449: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
451: /* load in initial (unfactored row) */
452: nz = ai[r[i] + 1] - ai[r[i]];
453: ajtmp = aj + ai[r[i]];
454: v = aa + ai[r[i]];
455: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
456: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
458: row = *bjtmp++;
459: while (row < i) {
460: pc = rtmp + row;
461: if (*pc != 0.0) {
462: pv = ba + ddiag[row];
463: pj = b->j + ddiag[row] + 1;
464: multiplier = *pc / *pv++;
465: *pc = multiplier;
466: nz = bi[row + 1] - ddiag[row] - 1;
467: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
468: PetscCall(PetscLogFlops(1 + 2.0 * nz));
469: }
470: row = *bjtmp++;
471: }
472: /* finished row so stick it into b->a */
473: pv = ba + bi[i];
474: pj = b->j + bi[i];
475: nz = bi[i + 1] - bi[i];
476: diag = ddiag[i] - bi[i];
477: rs = 0.0;
478: for (j = 0; j < nz; j++) {
479: pv[j] = rtmp[pj[j]];
480: rs += PetscAbsScalar(pv[j]);
481: }
482: rs -= PetscAbsScalar(pv[diag]);
484: sctx.rs = rs;
485: sctx.pv = pv[diag];
486: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
487: if (sctx.newshift) break;
488: pv[diag] = sctx.pv;
489: }
491: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
492: /*
493: * if no shift in this attempt & shifting & started shifting & can refine,
494: * then try lower shift
495: */
496: sctx.shift_hi = sctx.shift_fraction;
497: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
498: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
499: sctx.newshift = PETSC_TRUE;
500: sctx.nshift++;
501: }
502: } while (sctx.newshift);
504: /* invert diagonal entries for simpler triangular solves */
505: for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
506: PetscCall(PetscFree(rtmp));
507: PetscCall(ISRestoreIndices(isicol, &ic));
508: PetscCall(ISRestoreIndices(isrow, &r));
509: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
510: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
512: PetscCall(ISIdentity(isrow, &row_identity));
513: PetscCall(ISIdentity(isicol, &col_identity));
514: if (row_identity && col_identity) {
515: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
516: } else {
517: C->ops->solve = MatSolve_SeqAIJ_inplace;
518: }
519: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
520: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
521: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
522: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
523: C->ops->matsolvetranspose = NULL;
525: C->assembled = PETSC_TRUE;
526: C->preallocated = PETSC_TRUE;
528: PetscCall(PetscLogFlops(C->cmap->n));
529: if (sctx.nshift) {
530: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
531: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
532: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
533: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
534: }
535: }
536: C->ops->solve = MatSolve_SeqAIJ_inplace;
537: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
539: PetscCall(MatSeqAIJCheckInode(C));
540: PetscFunctionReturn(PETSC_SUCCESS);
541: }
543: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
545: /*
546: This routine implements inplace ILU(0) with row or/and column permutations.
547: Input:
548: A - original matrix
549: Output;
550: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
551: a->j (col index) is permuted by the inverse of colperm, then sorted
552: a->a reordered accordingly with a->j
553: a->diag (ptr to diagonal elements) is updated.
554: */
555: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
556: {
557: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
558: IS isrow = a->row, isicol = a->icol;
559: const PetscInt *r, *ic, *ics;
560: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
561: PetscInt *ajtmp, nz, row;
562: PetscInt nbdiag, *pj;
563: PetscScalar *rtmp, *pc, multiplier, d;
564: MatScalar *pv, *v;
565: PetscReal rs;
566: FactorShiftCtx sctx;
567: MatScalar *aa, *vtmp;
568: PetscInt *diag;
570: PetscFunctionBegin;
571: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
573: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
574: PetscCall(MatSeqAIJGetArray(A, &aa));
575: /* MatPivotSetUp(): initialize shift context sctx */
576: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
578: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
579: const PetscInt *ddiag;
581: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
582: sctx.shift_top = info->zeropivot;
583: for (i = 0; i < n; i++) {
584: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
585: d = aa[ddiag[i]];
586: rs = -PetscAbsScalar(d) - PetscRealPart(d);
587: vtmp = aa + ai[i];
588: nz = ai[i + 1] - ai[i];
589: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
590: if (rs > sctx.shift_top) sctx.shift_top = rs;
591: }
592: sctx.shift_top *= 1.1;
593: sctx.nshift_max = 5;
594: sctx.shift_lo = 0.;
595: sctx.shift_hi = 1.;
596: }
598: PetscCall(ISGetIndices(isrow, &r));
599: PetscCall(ISGetIndices(isicol, &ic));
600: PetscCall(PetscMalloc1(n + 1, &rtmp));
601: PetscCall(PetscArrayzero(rtmp, n + 1));
602: ics = ic;
604: #if defined(MV)
605: sctx.shift_top = 0.;
606: sctx.nshift_max = 0;
607: sctx.shift_lo = 0.;
608: sctx.shift_hi = 0.;
609: sctx.shift_fraction = 0.;
611: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
612: sctx.shift_top = 0.;
613: for (i = 0; i < n; i++) {
614: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
615: d = aa[diag[i]];
616: rs = -PetscAbsScalar(d) - PetscRealPart(d);
617: v = aa + ai[i];
618: nz = ai[i + 1] - ai[i];
619: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
620: if (rs > sctx.shift_top) sctx.shift_top = rs;
621: }
622: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
623: sctx.shift_top *= 1.1;
624: sctx.nshift_max = 5;
625: sctx.shift_lo = 0.;
626: sctx.shift_hi = 1.;
627: }
629: sctx.shift_amount = 0.;
630: sctx.nshift = 0;
631: #endif
633: do {
634: sctx.newshift = PETSC_FALSE;
635: for (i = 0; i < n; i++) {
636: /* load in initial unfactored row */
637: nz = ai[r[i] + 1] - ai[r[i]];
638: ajtmp = aj + ai[r[i]];
639: v = aa + ai[r[i]];
640: /* sort permuted ajtmp and values v accordingly */
641: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
642: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
644: diag[r[i]] = ai[r[i]];
645: for (j = 0; j < nz; j++) {
646: rtmp[ajtmp[j]] = v[j];
647: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
648: }
649: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
651: row = *ajtmp++;
652: while (row < i) {
653: pc = rtmp + row;
654: if (*pc != 0.0) {
655: pv = aa + diag[r[row]];
656: pj = aj + diag[r[row]] + 1;
658: multiplier = *pc / *pv++;
659: *pc = multiplier;
660: nz = ai[r[row] + 1] - diag[r[row]] - 1;
661: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
662: PetscCall(PetscLogFlops(1 + 2.0 * nz));
663: }
664: row = *ajtmp++;
665: }
666: /* finished row so overwrite it onto aa */
667: pv = aa + ai[r[i]];
668: pj = aj + ai[r[i]];
669: nz = ai[r[i] + 1] - ai[r[i]];
670: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
672: rs = 0.0;
673: for (j = 0; j < nz; j++) {
674: pv[j] = rtmp[pj[j]];
675: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
676: }
678: sctx.rs = rs;
679: sctx.pv = pv[nbdiag];
680: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
681: if (sctx.newshift) break;
682: pv[nbdiag] = sctx.pv;
683: }
685: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
686: /*
687: * if no shift in this attempt & shifting & started shifting & can refine,
688: * then try lower shift
689: */
690: sctx.shift_hi = sctx.shift_fraction;
691: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
692: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
693: sctx.newshift = PETSC_TRUE;
694: sctx.nshift++;
695: }
696: } while (sctx.newshift);
698: /* invert diagonal entries for simpler triangular solves */
699: for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
701: PetscCall(MatSeqAIJRestoreArray(A, &aa));
702: PetscCall(PetscFree(rtmp));
703: PetscCall(ISRestoreIndices(isicol, &ic));
704: PetscCall(ISRestoreIndices(isrow, &r));
706: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
707: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
708: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
709: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
711: A->assembled = PETSC_TRUE;
712: A->preallocated = PETSC_TRUE;
714: PetscCall(PetscLogFlops(A->cmap->n));
715: if (sctx.nshift) {
716: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
717: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
718: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
719: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
720: }
721: }
722: PetscFunctionReturn(PETSC_SUCCESS);
723: }
725: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
726: {
727: Mat C;
729: PetscFunctionBegin;
730: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
731: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
732: PetscCall(MatLUFactorNumeric(C, A, info));
734: A->ops->solve = C->ops->solve;
735: A->ops->solvetranspose = C->ops->solvetranspose;
737: PetscCall(MatHeaderMerge(A, &C));
738: PetscFunctionReturn(PETSC_SUCCESS);
739: }
741: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
742: {
743: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
744: IS iscol = a->col, isrow = a->row;
745: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
746: PetscInt nz;
747: const PetscInt *rout, *cout, *r, *c;
748: PetscScalar *x, *tmp, *tmps, sum;
749: const PetscScalar *b;
750: const MatScalar *aa, *v;
751: const PetscInt *adiag;
753: PetscFunctionBegin;
754: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
756: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
757: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
758: PetscCall(VecGetArrayRead(bb, &b));
759: PetscCall(VecGetArrayWrite(xx, &x));
760: tmp = a->solve_work;
762: PetscCall(ISGetIndices(isrow, &rout));
763: r = rout;
764: PetscCall(ISGetIndices(iscol, &cout));
765: c = cout + (n - 1);
767: /* forward solve the lower triangular */
768: tmp[0] = b[*r++];
769: tmps = tmp;
770: for (i = 1; i < n; i++) {
771: v = aa + ai[i];
772: vi = aj + ai[i];
773: nz = adiag[i] - ai[i];
774: sum = b[*r++];
775: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
776: tmp[i] = sum;
777: }
779: /* backward solve the upper triangular */
780: for (i = n - 1; i >= 0; i--) {
781: v = aa + adiag[i] + 1;
782: vi = aj + adiag[i] + 1;
783: nz = ai[i + 1] - adiag[i] - 1;
784: sum = tmp[i];
785: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
786: x[*c--] = tmp[i] = sum * aa[adiag[i]];
787: }
788: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
789: PetscCall(ISRestoreIndices(isrow, &rout));
790: PetscCall(ISRestoreIndices(iscol, &cout));
791: PetscCall(VecRestoreArrayRead(bb, &b));
792: PetscCall(VecRestoreArrayWrite(xx, &x));
793: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
794: PetscFunctionReturn(PETSC_SUCCESS);
795: }
797: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
798: {
799: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
800: IS iscol = a->col, isrow = a->row;
801: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
802: PetscInt nz, neq, ldb, ldx;
803: const PetscInt *rout, *cout, *r, *c;
804: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
805: const PetscScalar *b, *aa, *v;
806: PetscBool isdense;
807: const PetscInt *adiag;
809: PetscFunctionBegin;
810: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
811: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
812: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
813: if (X != B) {
814: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
815: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
816: }
817: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
818: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
819: PetscCall(MatDenseGetArrayRead(B, &b));
820: PetscCall(MatDenseGetLDA(B, &ldb));
821: PetscCall(MatDenseGetArray(X, &x));
822: PetscCall(MatDenseGetLDA(X, &ldx));
823: PetscCall(ISGetIndices(isrow, &rout));
824: r = rout;
825: PetscCall(ISGetIndices(iscol, &cout));
826: c = cout;
827: for (neq = 0; neq < B->cmap->n; neq++) {
828: /* forward solve the lower triangular */
829: tmp[0] = b[r[0]];
830: tmps = tmp;
831: for (i = 1; i < n; i++) {
832: v = aa + ai[i];
833: vi = aj + ai[i];
834: nz = adiag[i] - ai[i];
835: sum = b[r[i]];
836: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
837: tmp[i] = sum;
838: }
839: /* backward solve the upper triangular */
840: for (i = n - 1; i >= 0; i--) {
841: v = aa + adiag[i] + 1;
842: vi = aj + adiag[i] + 1;
843: nz = ai[i + 1] - adiag[i] - 1;
844: sum = tmp[i];
845: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
846: x[c[i]] = tmp[i] = sum * aa[adiag[i]];
847: }
848: b += ldb;
849: x += ldx;
850: }
851: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
852: PetscCall(ISRestoreIndices(isrow, &rout));
853: PetscCall(ISRestoreIndices(iscol, &cout));
854: PetscCall(MatDenseRestoreArrayRead(B, &b));
855: PetscCall(MatDenseRestoreArray(X, &x));
856: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
857: PetscFunctionReturn(PETSC_SUCCESS);
858: }
860: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
861: {
862: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
863: IS iscol = a->col, isrow = a->row;
864: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
865: const PetscInt *adiag;
866: PetscInt nz, neq, ldb, ldx;
867: const PetscInt *rout, *cout, *r, *c;
868: PetscScalar *x, *tmp = a->solve_work, sum;
869: const PetscScalar *b, *aa, *v;
870: PetscBool isdense;
872: PetscFunctionBegin;
873: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
874: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
875: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
876: if (X != B) {
877: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
878: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
879: }
880: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
881: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
882: PetscCall(MatDenseGetArrayRead(B, &b));
883: PetscCall(MatDenseGetLDA(B, &ldb));
884: PetscCall(MatDenseGetArray(X, &x));
885: PetscCall(MatDenseGetLDA(X, &ldx));
886: PetscCall(ISGetIndices(isrow, &rout));
887: r = rout;
888: PetscCall(ISGetIndices(iscol, &cout));
889: c = cout;
890: for (neq = 0; neq < B->cmap->n; neq++) {
891: /* forward solve the lower triangular */
892: tmp[0] = b[r[0]];
893: v = aa;
894: vi = aj;
895: for (i = 1; i < n; i++) {
896: nz = ai[i + 1] - ai[i];
897: sum = b[r[i]];
898: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
899: tmp[i] = sum;
900: v += nz;
901: vi += nz;
902: }
903: /* backward solve the upper triangular */
904: for (i = n - 1; i >= 0; i--) {
905: v = aa + adiag[i + 1] + 1;
906: vi = aj + adiag[i + 1] + 1;
907: nz = adiag[i] - adiag[i + 1] - 1;
908: sum = tmp[i];
909: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
910: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
911: }
912: b += ldb;
913: x += ldx;
914: }
915: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
916: PetscCall(ISRestoreIndices(isrow, &rout));
917: PetscCall(ISRestoreIndices(iscol, &cout));
918: PetscCall(MatDenseRestoreArrayRead(B, &b));
919: PetscCall(MatDenseRestoreArray(X, &x));
920: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
921: PetscFunctionReturn(PETSC_SUCCESS);
922: }
924: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
925: {
926: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
927: IS iscol = a->col, isrow = a->row;
928: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
929: const PetscInt *adiag = a->diag;
930: PetscInt nz, neq, ldb, ldx;
931: const PetscInt *rout, *cout, *r, *c;
932: PetscScalar *x, *tmp = a->solve_work, s1;
933: const PetscScalar *b, *aa, *v;
934: PetscBool isdense;
936: PetscFunctionBegin;
937: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
938: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
939: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
940: if (X != B) {
941: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
942: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
943: }
944: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
945: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
946: PetscCall(MatDenseGetArrayRead(B, &b));
947: PetscCall(MatDenseGetLDA(B, &ldb));
948: PetscCall(MatDenseGetArray(X, &x));
949: PetscCall(MatDenseGetLDA(X, &ldx));
950: PetscCall(ISGetIndices(isrow, &rout));
951: r = rout;
952: PetscCall(ISGetIndices(iscol, &cout));
953: c = cout;
954: for (neq = 0; neq < B->cmap->n; neq++) {
955: /* copy the b into temp work space according to permutation */
956: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
958: /* forward solve the U^T */
959: for (i = 0; i < n; i++) {
960: v = aa + adiag[i + 1] + 1;
961: vi = aj + adiag[i + 1] + 1;
962: nz = adiag[i] - adiag[i + 1] - 1;
963: s1 = tmp[i];
964: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
965: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
966: tmp[i] = s1;
967: }
969: /* backward solve the L^T */
970: for (i = n - 1; i >= 0; i--) {
971: v = aa + ai[i];
972: vi = aj + ai[i];
973: nz = ai[i + 1] - ai[i];
974: s1 = tmp[i];
975: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
976: }
978: /* copy tmp into x according to permutation */
979: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
980: b += ldb;
981: x += ldx;
982: }
983: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
984: PetscCall(ISRestoreIndices(isrow, &rout));
985: PetscCall(ISRestoreIndices(iscol, &cout));
986: PetscCall(MatDenseRestoreArrayRead(B, &b));
987: PetscCall(MatDenseRestoreArray(X, &x));
988: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
989: PetscFunctionReturn(PETSC_SUCCESS);
990: }
992: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
993: {
994: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
995: IS iscol = a->col, isrow = a->row;
996: const PetscInt *r, *c, *rout, *cout, *adiag;
997: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
998: PetscInt nz, row;
999: PetscScalar *x, *tmp, *tmps, sum;
1000: const PetscScalar *b;
1001: const MatScalar *aa, *v;
1003: PetscFunctionBegin;
1004: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1006: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1007: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1008: PetscCall(VecGetArrayRead(bb, &b));
1009: PetscCall(VecGetArrayWrite(xx, &x));
1010: tmp = a->solve_work;
1012: PetscCall(ISGetIndices(isrow, &rout));
1013: r = rout;
1014: PetscCall(ISGetIndices(iscol, &cout));
1015: c = cout + (n - 1);
1017: /* forward solve the lower triangular */
1018: tmp[0] = b[*r++];
1019: tmps = tmp;
1020: for (row = 1; row < n; row++) {
1021: i = rout[row]; /* permuted row */
1022: v = aa + ai[i];
1023: vi = aj + ai[i];
1024: nz = adiag[i] - ai[i];
1025: sum = b[*r++];
1026: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1027: tmp[row] = sum;
1028: }
1030: /* backward solve the upper triangular */
1031: for (row = n - 1; row >= 0; row--) {
1032: i = rout[row]; /* permuted row */
1033: v = aa + adiag[i] + 1;
1034: vi = aj + adiag[i] + 1;
1035: nz = ai[i + 1] - adiag[i] - 1;
1036: sum = tmp[row];
1037: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1038: x[*c--] = tmp[row] = sum * aa[adiag[i]];
1039: }
1040: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1041: PetscCall(ISRestoreIndices(isrow, &rout));
1042: PetscCall(ISRestoreIndices(iscol, &cout));
1043: PetscCall(VecRestoreArrayRead(bb, &b));
1044: PetscCall(VecRestoreArrayWrite(xx, &x));
1045: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1046: PetscFunctionReturn(PETSC_SUCCESS);
1047: }
1049: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1050: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1051: {
1052: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1053: PetscInt n = A->rmap->n;
1054: const PetscInt *ai = a->i, *aj = a->j, *adiag;
1055: PetscScalar *x;
1056: const PetscScalar *b;
1057: const MatScalar *aa;
1058: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1059: PetscInt adiag_i, i, nz, ai_i;
1060: const PetscInt *vi;
1061: const MatScalar *v;
1062: PetscScalar sum;
1063: #endif
1065: PetscFunctionBegin;
1066: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1067: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1068: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1069: PetscCall(VecGetArrayRead(bb, &b));
1070: PetscCall(VecGetArrayWrite(xx, &x));
1072: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1073: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1074: #else
1075: /* forward solve the lower triangular */
1076: x[0] = b[0];
1077: for (i = 1; i < n; i++) {
1078: ai_i = ai[i];
1079: v = aa + ai_i;
1080: vi = aj + ai_i;
1081: nz = adiag[i] - ai_i;
1082: sum = b[i];
1083: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1084: x[i] = sum;
1085: }
1087: /* backward solve the upper triangular */
1088: for (i = n - 1; i >= 0; i--) {
1089: adiag_i = adiag[i];
1090: v = aa + adiag_i + 1;
1091: vi = aj + adiag_i + 1;
1092: nz = ai[i + 1] - adiag_i - 1;
1093: sum = x[i];
1094: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1095: x[i] = sum * aa[adiag_i];
1096: }
1097: #endif
1098: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1099: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1100: PetscCall(VecRestoreArrayRead(bb, &b));
1101: PetscCall(VecRestoreArrayWrite(xx, &x));
1102: PetscFunctionReturn(PETSC_SUCCESS);
1103: }
1105: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1106: {
1107: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1108: IS iscol = a->col, isrow = a->row;
1109: PetscInt i, n = A->rmap->n, j;
1110: PetscInt nz;
1111: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1112: PetscScalar *x, *tmp, sum;
1113: const PetscScalar *b;
1114: const MatScalar *aa, *v;
1116: PetscFunctionBegin;
1117: if (yy != xx) PetscCall(VecCopy(yy, xx));
1119: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1120: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1121: PetscCall(VecGetArrayRead(bb, &b));
1122: PetscCall(VecGetArray(xx, &x));
1123: tmp = a->solve_work;
1125: PetscCall(ISGetIndices(isrow, &rout));
1126: r = rout;
1127: PetscCall(ISGetIndices(iscol, &cout));
1128: c = cout + (n - 1);
1130: /* forward solve the lower triangular */
1131: tmp[0] = b[*r++];
1132: for (i = 1; i < n; i++) {
1133: v = aa + ai[i];
1134: vi = aj + ai[i];
1135: nz = adiag[i] - ai[i];
1136: sum = b[*r++];
1137: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1138: tmp[i] = sum;
1139: }
1141: /* backward solve the upper triangular */
1142: for (i = n - 1; i >= 0; i--) {
1143: v = aa + adiag[i] + 1;
1144: vi = aj + adiag[i] + 1;
1145: nz = ai[i + 1] - adiag[i] - 1;
1146: sum = tmp[i];
1147: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1148: tmp[i] = sum * aa[adiag[i]];
1149: x[*c--] += tmp[i];
1150: }
1152: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1153: PetscCall(ISRestoreIndices(isrow, &rout));
1154: PetscCall(ISRestoreIndices(iscol, &cout));
1155: PetscCall(VecRestoreArrayRead(bb, &b));
1156: PetscCall(VecRestoreArray(xx, &x));
1157: PetscCall(PetscLogFlops(2.0 * a->nz));
1158: PetscFunctionReturn(PETSC_SUCCESS);
1159: }
1161: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1162: {
1163: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1164: IS iscol = a->col, isrow = a->row;
1165: PetscInt i, n = A->rmap->n, j;
1166: PetscInt nz;
1167: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1168: PetscScalar *x, *tmp, sum;
1169: const PetscScalar *b;
1170: const MatScalar *aa, *v;
1172: PetscFunctionBegin;
1173: if (yy != xx) PetscCall(VecCopy(yy, xx));
1175: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1176: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1177: PetscCall(VecGetArrayRead(bb, &b));
1178: PetscCall(VecGetArray(xx, &x));
1179: tmp = a->solve_work;
1181: PetscCall(ISGetIndices(isrow, &rout));
1182: r = rout;
1183: PetscCall(ISGetIndices(iscol, &cout));
1184: c = cout;
1186: /* forward solve the lower triangular */
1187: tmp[0] = b[r[0]];
1188: v = aa;
1189: vi = aj;
1190: for (i = 1; i < n; i++) {
1191: nz = ai[i + 1] - ai[i];
1192: sum = b[r[i]];
1193: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1194: tmp[i] = sum;
1195: v += nz;
1196: vi += nz;
1197: }
1199: /* backward solve the upper triangular */
1200: v = aa + adiag[n - 1];
1201: vi = aj + adiag[n - 1];
1202: for (i = n - 1; i >= 0; i--) {
1203: nz = adiag[i] - adiag[i + 1] - 1;
1204: sum = tmp[i];
1205: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1206: tmp[i] = sum * v[nz];
1207: x[c[i]] += tmp[i];
1208: v += nz + 1;
1209: vi += nz + 1;
1210: }
1212: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1213: PetscCall(ISRestoreIndices(isrow, &rout));
1214: PetscCall(ISRestoreIndices(iscol, &cout));
1215: PetscCall(VecRestoreArrayRead(bb, &b));
1216: PetscCall(VecRestoreArray(xx, &x));
1217: PetscCall(PetscLogFlops(2.0 * a->nz));
1218: PetscFunctionReturn(PETSC_SUCCESS);
1219: }
1221: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1222: {
1223: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1224: IS iscol = a->col, isrow = a->row;
1225: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1226: PetscInt i, n = A->rmap->n, j;
1227: PetscInt nz;
1228: PetscScalar *x, *tmp, s1;
1229: const MatScalar *aa, *v;
1230: const PetscScalar *b;
1232: PetscFunctionBegin;
1233: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1234: PetscCall(VecGetArrayRead(bb, &b));
1235: PetscCall(VecGetArrayWrite(xx, &x));
1236: tmp = a->solve_work;
1238: PetscCall(ISGetIndices(isrow, &rout));
1239: r = rout;
1240: PetscCall(ISGetIndices(iscol, &cout));
1241: c = cout;
1243: /* copy the b into temp work space according to permutation */
1244: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1246: /* forward solve the U^T */
1247: for (i = 0; i < n; i++) {
1248: v = aa + diag[i];
1249: vi = aj + diag[i] + 1;
1250: nz = ai[i + 1] - diag[i] - 1;
1251: s1 = tmp[i];
1252: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1253: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1254: tmp[i] = s1;
1255: }
1257: /* backward solve the L^T */
1258: for (i = n - 1; i >= 0; i--) {
1259: v = aa + diag[i] - 1;
1260: vi = aj + diag[i] - 1;
1261: nz = diag[i] - ai[i];
1262: s1 = tmp[i];
1263: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1264: }
1266: /* copy tmp into x according to permutation */
1267: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1269: PetscCall(ISRestoreIndices(isrow, &rout));
1270: PetscCall(ISRestoreIndices(iscol, &cout));
1271: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1272: PetscCall(VecRestoreArrayRead(bb, &b));
1273: PetscCall(VecRestoreArrayWrite(xx, &x));
1275: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1276: PetscFunctionReturn(PETSC_SUCCESS);
1277: }
1279: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1280: {
1281: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1282: IS iscol = a->col, isrow = a->row;
1283: const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1284: PetscInt i, n = A->rmap->n, j;
1285: PetscInt nz;
1286: PetscScalar *x, *tmp, s1;
1287: const MatScalar *aa, *v;
1288: const PetscScalar *b;
1290: PetscFunctionBegin;
1291: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1292: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1293: PetscCall(VecGetArrayRead(bb, &b));
1294: PetscCall(VecGetArrayWrite(xx, &x));
1295: tmp = a->solve_work;
1297: PetscCall(ISGetIndices(isrow, &rout));
1298: r = rout;
1299: PetscCall(ISGetIndices(iscol, &cout));
1300: c = cout;
1302: /* copy the b into temp work space according to permutation */
1303: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1305: /* forward solve the U^T */
1306: for (i = 0; i < n; i++) {
1307: v = aa + adiag[i + 1] + 1;
1308: vi = aj + adiag[i + 1] + 1;
1309: nz = adiag[i] - adiag[i + 1] - 1;
1310: s1 = tmp[i];
1311: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1312: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1313: tmp[i] = s1;
1314: }
1316: /* backward solve the L^T */
1317: for (i = n - 1; i >= 0; i--) {
1318: v = aa + ai[i];
1319: vi = aj + ai[i];
1320: nz = ai[i + 1] - ai[i];
1321: s1 = tmp[i];
1322: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1323: }
1325: /* copy tmp into x according to permutation */
1326: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1328: PetscCall(ISRestoreIndices(isrow, &rout));
1329: PetscCall(ISRestoreIndices(iscol, &cout));
1330: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1331: PetscCall(VecRestoreArrayRead(bb, &b));
1332: PetscCall(VecRestoreArrayWrite(xx, &x));
1334: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1335: PetscFunctionReturn(PETSC_SUCCESS);
1336: }
1338: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1339: {
1340: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1341: IS iscol = a->col, isrow = a->row;
1342: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1343: PetscInt i, n = A->rmap->n, j;
1344: PetscInt nz;
1345: PetscScalar *x, *tmp, s1;
1346: const MatScalar *aa, *v;
1347: const PetscScalar *b;
1349: PetscFunctionBegin;
1350: if (zz != xx) PetscCall(VecCopy(zz, xx));
1351: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1352: PetscCall(VecGetArrayRead(bb, &b));
1353: PetscCall(VecGetArray(xx, &x));
1354: tmp = a->solve_work;
1356: PetscCall(ISGetIndices(isrow, &rout));
1357: r = rout;
1358: PetscCall(ISGetIndices(iscol, &cout));
1359: c = cout;
1361: /* copy the b into temp work space according to permutation */
1362: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1364: /* forward solve the U^T */
1365: for (i = 0; i < n; i++) {
1366: v = aa + diag[i];
1367: vi = aj + diag[i] + 1;
1368: nz = ai[i + 1] - diag[i] - 1;
1369: s1 = tmp[i];
1370: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1371: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1372: tmp[i] = s1;
1373: }
1375: /* backward solve the L^T */
1376: for (i = n - 1; i >= 0; i--) {
1377: v = aa + diag[i] - 1;
1378: vi = aj + diag[i] - 1;
1379: nz = diag[i] - ai[i];
1380: s1 = tmp[i];
1381: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1382: }
1384: /* copy tmp into x according to permutation */
1385: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1387: PetscCall(ISRestoreIndices(isrow, &rout));
1388: PetscCall(ISRestoreIndices(iscol, &cout));
1389: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1390: PetscCall(VecRestoreArrayRead(bb, &b));
1391: PetscCall(VecRestoreArray(xx, &x));
1393: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1394: PetscFunctionReturn(PETSC_SUCCESS);
1395: }
1397: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1398: {
1399: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1400: IS iscol = a->col, isrow = a->row;
1401: const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1402: PetscInt i, n = A->rmap->n, j;
1403: PetscInt nz;
1404: PetscScalar *x, *tmp, s1;
1405: const MatScalar *aa, *v;
1406: const PetscScalar *b;
1408: PetscFunctionBegin;
1409: if (zz != xx) PetscCall(VecCopy(zz, xx));
1410: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1411: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1412: PetscCall(VecGetArrayRead(bb, &b));
1413: PetscCall(VecGetArray(xx, &x));
1414: tmp = a->solve_work;
1416: PetscCall(ISGetIndices(isrow, &rout));
1417: r = rout;
1418: PetscCall(ISGetIndices(iscol, &cout));
1419: c = cout;
1421: /* copy the b into temp work space according to permutation */
1422: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1424: /* forward solve the U^T */
1425: for (i = 0; i < n; i++) {
1426: v = aa + adiag[i + 1] + 1;
1427: vi = aj + adiag[i + 1] + 1;
1428: nz = adiag[i] - adiag[i + 1] - 1;
1429: s1 = tmp[i];
1430: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1431: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1432: tmp[i] = s1;
1433: }
1435: /* backward solve the L^T */
1436: for (i = n - 1; i >= 0; i--) {
1437: v = aa + ai[i];
1438: vi = aj + ai[i];
1439: nz = ai[i + 1] - ai[i];
1440: s1 = tmp[i];
1441: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1442: }
1444: /* copy tmp into x according to permutation */
1445: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1447: PetscCall(ISRestoreIndices(isrow, &rout));
1448: PetscCall(ISRestoreIndices(iscol, &cout));
1449: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1450: PetscCall(VecRestoreArrayRead(bb, &b));
1451: PetscCall(VecRestoreArray(xx, &x));
1453: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1454: PetscFunctionReturn(PETSC_SUCCESS);
1455: }
1457: /*
1458: ilu() under revised new data structure.
1459: Factored arrays bj and ba are stored as
1460: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1462: bi=fact->i is an array of size n+1, in which
1463: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1464: bi[n]: points to L(n-1,n-1)+1
1466: bdiag=fact->diag is an array of size n+1,in which
1467: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1468: bdiag[n]: points to entry of U(n-1,0)-1
1470: U(i,:) contains bdiag[i] as its last entry, i.e.,
1471: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1472: */
1473: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1474: {
1475: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1476: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1477: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1478: IS isicol;
1480: PetscFunctionBegin;
1481: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1482: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1483: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1484: b = (Mat_SeqAIJ *)fact->data;
1486: /* allocate matrix arrays for new data structure */
1487: PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1488: PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1489: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1490: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1491: b->free_a = PETSC_TRUE;
1492: b->free_ij = PETSC_TRUE;
1494: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1495: bdiag = b->diag;
1497: /* set bi and bj with new data structure */
1498: bi = b->i;
1499: bj = b->j;
1501: /* L part */
1502: bi[0] = 0;
1503: for (i = 0; i < n; i++) {
1504: nz = adiag[i] - ai[i];
1505: bi[i + 1] = bi[i] + nz;
1506: aj = a->j + ai[i];
1507: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1508: }
1510: /* U part */
1511: bdiag[n] = bi[n] - 1;
1512: for (i = n - 1; i >= 0; i--) {
1513: nz = ai[i + 1] - adiag[i] - 1;
1514: aj = a->j + adiag[i] + 1;
1515: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1516: /* diag[i] */
1517: bj[k++] = i;
1518: bdiag[i] = bdiag[i + 1] + nz + 1;
1519: }
1521: fact->factortype = MAT_FACTOR_ILU;
1522: fact->info.factor_mallocs = 0;
1523: fact->info.fill_ratio_given = info->fill;
1524: fact->info.fill_ratio_needed = 1.0;
1525: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1526: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1528: b = (Mat_SeqAIJ *)fact->data;
1529: b->row = isrow;
1530: b->col = iscol;
1531: b->icol = isicol;
1532: PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1533: PetscCall(PetscObjectReference((PetscObject)isrow));
1534: PetscCall(PetscObjectReference((PetscObject)iscol));
1535: PetscFunctionReturn(PETSC_SUCCESS);
1536: }
1538: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1539: {
1540: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1541: IS isicol;
1542: const PetscInt *r, *ic;
1543: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1544: PetscInt *bi, *cols, nnz, *cols_lvl;
1545: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1546: PetscInt i, levels, diagonal_fill;
1547: PetscBool col_identity, row_identity;
1548: PetscReal f;
1549: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1550: PetscBT lnkbt;
1551: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1552: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1553: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1554: PetscBool diagDense;
1556: PetscFunctionBegin;
1557: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1558: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1559: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1561: levels = (PetscInt)info->levels;
1562: PetscCall(ISIdentity(isrow, &row_identity));
1563: PetscCall(ISIdentity(iscol, &col_identity));
1564: if (!levels && row_identity && col_identity) {
1565: /* special case: ilu(0) with natural ordering */
1566: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1567: if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1568: PetscFunctionReturn(PETSC_SUCCESS);
1569: }
1571: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1572: PetscCall(ISGetIndices(isrow, &r));
1573: PetscCall(ISGetIndices(isicol, &ic));
1575: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1576: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1577: PetscCall(PetscMalloc1(n + 1, &bdiag));
1578: bi[0] = bdiag[0] = 0;
1579: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1581: /* create a linked list for storing column indices of the active row */
1582: nlnk = n + 1;
1583: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1585: /* initial FreeSpace size is f*(ai[n]+1) */
1586: f = info->fill;
1587: diagonal_fill = (PetscInt)info->diagonal_fill;
1588: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1589: current_space = free_space;
1590: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1591: current_space_lvl = free_space_lvl;
1592: for (i = 0; i < n; i++) {
1593: nzi = 0;
1594: /* copy current row into linked list */
1595: nnz = ai[r[i] + 1] - ai[r[i]];
1596: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1597: cols = aj + ai[r[i]];
1598: lnk[i] = -1; /* marker to indicate if diagonal exists */
1599: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1600: nzi += nlnk;
1602: /* make sure diagonal entry is included */
1603: if (diagonal_fill && lnk[i] == -1) {
1604: fm = n;
1605: while (lnk[fm] < i) fm = lnk[fm];
1606: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1607: lnk[fm] = i;
1608: lnk_lvl[i] = 0;
1609: nzi++;
1610: dcount++;
1611: }
1613: /* add pivot rows into the active row */
1614: nzbd = 0;
1615: prow = lnk[n];
1616: while (prow < i) {
1617: nnz = bdiag[prow];
1618: cols = bj_ptr[prow] + nnz + 1;
1619: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1620: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1621: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1622: nzi += nlnk;
1623: prow = lnk[prow];
1624: nzbd++;
1625: }
1626: bdiag[i] = nzbd;
1627: bi[i + 1] = bi[i] + nzi;
1628: /* if free space is not available, make more free space */
1629: if (current_space->local_remaining < nzi) {
1630: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1631: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1632: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1633: reallocs++;
1634: }
1636: /* copy data into free_space and free_space_lvl, then initialize lnk */
1637: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1638: bj_ptr[i] = current_space->array;
1639: bjlvl_ptr[i] = current_space_lvl->array;
1641: /* make sure the active row i has diagonal entry */
1642: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1644: current_space->array += nzi;
1645: current_space->local_used += nzi;
1646: current_space->local_remaining -= nzi;
1647: current_space_lvl->array += nzi;
1648: current_space_lvl->local_used += nzi;
1649: current_space_lvl->local_remaining -= nzi;
1650: }
1652: PetscCall(ISRestoreIndices(isrow, &r));
1653: PetscCall(ISRestoreIndices(isicol, &ic));
1654: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1655: PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1656: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1658: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1659: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1660: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1662: #if defined(PETSC_USE_INFO)
1663: {
1664: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1665: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1666: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1667: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1668: PetscCall(PetscInfo(A, "for best performance.\n"));
1669: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1670: }
1671: #endif
1672: /* put together the new matrix */
1673: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1674: b = (Mat_SeqAIJ *)fact->data;
1675: b->free_ij = PETSC_TRUE;
1676: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1677: b->free_a = PETSC_TRUE;
1678: b->j = bj;
1679: b->i = bi;
1680: b->diag = bdiag;
1681: b->ilen = NULL;
1682: b->imax = NULL;
1683: b->row = isrow;
1684: b->col = iscol;
1685: PetscCall(PetscObjectReference((PetscObject)isrow));
1686: PetscCall(PetscObjectReference((PetscObject)iscol));
1687: b->icol = isicol;
1689: PetscCall(PetscMalloc1(n, &b->solve_work));
1690: /* In b structure: Free imax, ilen, old a, old j.
1691: Allocate bdiag, solve_work, new a, new j */
1692: b->maxnz = b->nz = bdiag[0] + 1;
1694: fact->info.factor_mallocs = reallocs;
1695: fact->info.fill_ratio_given = f;
1696: fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1697: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1698: if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1699: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1700: PetscFunctionReturn(PETSC_SUCCESS);
1701: }
1703: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1704: {
1705: Mat C = B;
1706: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1707: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1708: IS ip = b->row, iip = b->icol;
1709: const PetscInt *rip, *riip;
1710: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1711: PetscInt *ai = a->i, *aj = a->j;
1712: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1713: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1714: PetscBool perm_identity;
1715: FactorShiftCtx sctx;
1716: PetscReal rs;
1717: const MatScalar *aa, *v;
1718: MatScalar d;
1719: const PetscInt *adiag;
1721: PetscFunctionBegin;
1722: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1723: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1724: /* MatPivotSetUp(): initialize shift context sctx */
1725: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1727: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1728: sctx.shift_top = info->zeropivot;
1729: for (i = 0; i < mbs; i++) {
1730: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1731: d = aa[adiag[i]];
1732: rs = -PetscAbsScalar(d) - PetscRealPart(d);
1733: v = aa + ai[i];
1734: nz = ai[i + 1] - ai[i];
1735: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1736: if (rs > sctx.shift_top) sctx.shift_top = rs;
1737: }
1738: sctx.shift_top *= 1.1;
1739: sctx.nshift_max = 5;
1740: sctx.shift_lo = 0.;
1741: sctx.shift_hi = 1.;
1742: }
1744: PetscCall(ISGetIndices(ip, &rip));
1745: PetscCall(ISGetIndices(iip, &riip));
1747: /* allocate working arrays
1748: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1749: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1750: */
1751: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1753: do {
1754: sctx.newshift = PETSC_FALSE;
1756: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1757: if (mbs) il[0] = 0;
1759: for (k = 0; k < mbs; k++) {
1760: /* zero rtmp */
1761: nz = bi[k + 1] - bi[k];
1762: bjtmp = bj + bi[k];
1763: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1765: /* load in initial unfactored row */
1766: bval = ba + bi[k];
1767: jmin = ai[rip[k]];
1768: jmax = ai[rip[k] + 1];
1769: for (j = jmin; j < jmax; j++) {
1770: col = riip[aj[j]];
1771: if (col >= k) { /* only take upper triangular entry */
1772: rtmp[col] = aa[j];
1773: *bval++ = 0.0; /* for in-place factorization */
1774: }
1775: }
1776: /* shift the diagonal of the matrix: ZeropivotApply() */
1777: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1779: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1780: dk = rtmp[k];
1781: i = c2r[k]; /* first row to be added to k_th row */
1783: while (i < k) {
1784: nexti = c2r[i]; /* next row to be added to k_th row */
1786: /* compute multiplier, update diag(k) and U(i,k) */
1787: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1788: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1789: dk += uikdi * ba[ili]; /* update diag[k] */
1790: ba[ili] = uikdi; /* -U(i,k) */
1792: /* add multiple of row i to k-th row */
1793: jmin = ili + 1;
1794: jmax = bi[i + 1];
1795: if (jmin < jmax) {
1796: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1797: /* update il and c2r for row i */
1798: il[i] = jmin;
1799: j = bj[jmin];
1800: c2r[i] = c2r[j];
1801: c2r[j] = i;
1802: }
1803: i = nexti;
1804: }
1806: /* copy data into U(k,:) */
1807: rs = 0.0;
1808: jmin = bi[k];
1809: jmax = bi[k + 1] - 1;
1810: if (jmin < jmax) {
1811: for (j = jmin; j < jmax; j++) {
1812: col = bj[j];
1813: ba[j] = rtmp[col];
1814: rs += PetscAbsScalar(ba[j]);
1815: }
1816: /* add the k-th row into il and c2r */
1817: il[k] = jmin;
1818: i = bj[jmin];
1819: c2r[k] = c2r[i];
1820: c2r[i] = k;
1821: }
1823: /* MatPivotCheck() */
1824: sctx.rs = rs;
1825: sctx.pv = dk;
1826: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1827: if (sctx.newshift) break;
1828: dk = sctx.pv;
1830: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1831: }
1832: } while (sctx.newshift);
1834: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1835: PetscCall(PetscFree3(rtmp, il, c2r));
1836: PetscCall(ISRestoreIndices(ip, &rip));
1837: PetscCall(ISRestoreIndices(iip, &riip));
1839: PetscCall(ISIdentity(ip, &perm_identity));
1840: if (perm_identity) {
1841: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1842: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1843: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1844: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1845: } else {
1846: B->ops->solve = MatSolve_SeqSBAIJ_1;
1847: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1848: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1849: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1850: }
1852: C->assembled = PETSC_TRUE;
1853: C->preallocated = PETSC_TRUE;
1855: PetscCall(PetscLogFlops(C->rmap->n));
1857: /* MatPivotView() */
1858: if (sctx.nshift) {
1859: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1860: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1861: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1862: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1863: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1864: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1865: }
1866: }
1867: PetscFunctionReturn(PETSC_SUCCESS);
1868: }
1870: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1871: {
1872: Mat C = B;
1873: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1874: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1875: IS ip = b->row, iip = b->icol;
1876: const PetscInt *rip, *riip;
1877: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1878: PetscInt *ai = a->i, *aj = a->j;
1879: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1880: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1881: const MatScalar *aa, *v;
1882: PetscBool perm_identity;
1883: FactorShiftCtx sctx;
1884: PetscReal rs;
1885: MatScalar d;
1886: const PetscInt *adiag;
1888: PetscFunctionBegin;
1889: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1890: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1891: /* MatPivotSetUp(): initialize shift context sctx */
1892: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1894: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1895: sctx.shift_top = info->zeropivot;
1896: for (i = 0; i < mbs; i++) {
1897: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1898: d = aa[adiag[i]];
1899: rs = -PetscAbsScalar(d) - PetscRealPart(d);
1900: v = aa + ai[i];
1901: nz = ai[i + 1] - ai[i];
1902: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1903: if (rs > sctx.shift_top) sctx.shift_top = rs;
1904: }
1905: sctx.shift_top *= 1.1;
1906: sctx.nshift_max = 5;
1907: sctx.shift_lo = 0.;
1908: sctx.shift_hi = 1.;
1909: }
1911: PetscCall(ISGetIndices(ip, &rip));
1912: PetscCall(ISGetIndices(iip, &riip));
1914: /* initialization */
1915: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1917: do {
1918: sctx.newshift = PETSC_FALSE;
1920: for (i = 0; i < mbs; i++) jl[i] = mbs;
1921: il[0] = 0;
1923: for (k = 0; k < mbs; k++) {
1924: /* zero rtmp */
1925: nz = bi[k + 1] - bi[k];
1926: bjtmp = bj + bi[k];
1927: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1929: bval = ba + bi[k];
1930: /* initialize k-th row by the perm[k]-th row of A */
1931: jmin = ai[rip[k]];
1932: jmax = ai[rip[k] + 1];
1933: for (j = jmin; j < jmax; j++) {
1934: col = riip[aj[j]];
1935: if (col >= k) { /* only take upper triangular entry */
1936: rtmp[col] = aa[j];
1937: *bval++ = 0.0; /* for in-place factorization */
1938: }
1939: }
1940: /* shift the diagonal of the matrix */
1941: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1943: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1944: dk = rtmp[k];
1945: i = jl[k]; /* first row to be added to k_th row */
1947: while (i < k) {
1948: nexti = jl[i]; /* next row to be added to k_th row */
1950: /* compute multiplier, update diag(k) and U(i,k) */
1951: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1952: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1953: dk += uikdi * ba[ili];
1954: ba[ili] = uikdi; /* -U(i,k) */
1956: /* add multiple of row i to k-th row */
1957: jmin = ili + 1;
1958: jmax = bi[i + 1];
1959: if (jmin < jmax) {
1960: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1961: /* update il and jl for row i */
1962: il[i] = jmin;
1963: j = bj[jmin];
1964: jl[i] = jl[j];
1965: jl[j] = i;
1966: }
1967: i = nexti;
1968: }
1970: /* shift the diagonals when zero pivot is detected */
1971: /* compute rs=sum of abs(off-diagonal) */
1972: rs = 0.0;
1973: jmin = bi[k] + 1;
1974: nz = bi[k + 1] - jmin;
1975: bcol = bj + jmin;
1976: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1978: sctx.rs = rs;
1979: sctx.pv = dk;
1980: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1981: if (sctx.newshift) break;
1982: dk = sctx.pv;
1984: /* copy data into U(k,:) */
1985: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1986: jmin = bi[k] + 1;
1987: jmax = bi[k + 1];
1988: if (jmin < jmax) {
1989: for (j = jmin; j < jmax; j++) {
1990: col = bj[j];
1991: ba[j] = rtmp[col];
1992: }
1993: /* add the k-th row into il and jl */
1994: il[k] = jmin;
1995: i = bj[jmin];
1996: jl[k] = jl[i];
1997: jl[i] = k;
1998: }
1999: }
2000: } while (sctx.newshift);
2002: PetscCall(PetscFree3(rtmp, il, jl));
2003: PetscCall(ISRestoreIndices(ip, &rip));
2004: PetscCall(ISRestoreIndices(iip, &riip));
2005: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2007: PetscCall(ISIdentity(ip, &perm_identity));
2008: if (perm_identity) {
2009: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2010: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2011: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013: } else {
2014: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2015: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2016: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2017: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2018: }
2020: C->assembled = PETSC_TRUE;
2021: C->preallocated = PETSC_TRUE;
2023: PetscCall(PetscLogFlops(C->rmap->n));
2024: if (sctx.nshift) {
2025: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2026: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2027: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2028: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029: }
2030: }
2031: PetscFunctionReturn(PETSC_SUCCESS);
2032: }
2034: /*
2035: icc() under revised new data structure.
2036: Factored arrays bj and ba are stored as
2037: U(0,:),...,U(i,:),U(n-1,:)
2039: ui=fact->i is an array of size n+1, in which
2040: ui+
2041: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2042: ui[n]: points to U(n-1,n-1)+1
2044: udiag=fact->diag is an array of size n,in which
2045: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2047: U(i,:) contains udiag[i] as its last entry, i.e.,
2048: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2049: */
2051: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2052: {
2053: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2054: Mat_SeqSBAIJ *b;
2055: PetscBool perm_identity;
2056: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2057: const PetscInt *rip, *riip, *adiag;
2058: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2059: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
2060: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2061: PetscReal fill = info->fill, levels = info->levels;
2062: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2063: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2064: PetscBT lnkbt;
2065: IS iperm;
2066: PetscBool diagDense;
2068: PetscFunctionBegin;
2069: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2070: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2071: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2072: PetscCall(ISIdentity(perm, &perm_identity));
2073: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2075: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2076: PetscCall(PetscMalloc1(am + 1, &udiag));
2077: ui[0] = 0;
2079: /* ICC(0) without matrix ordering: simply rearrange column indices */
2080: if (!levels && perm_identity) {
2081: for (i = 0; i < am; i++) {
2082: ncols = ai[i + 1] - adiag[i];
2083: ui[i + 1] = ui[i] + ncols;
2084: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2085: }
2086: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2087: cols = uj;
2088: for (i = 0; i < am; i++) {
2089: aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2090: ncols = ai[i + 1] - adiag[i] - 1;
2091: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2092: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2093: }
2094: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2095: PetscCall(ISGetIndices(iperm, &riip));
2096: PetscCall(ISGetIndices(perm, &rip));
2098: /* initialization */
2099: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2101: /* jl: linked list for storing indices of the pivot rows
2102: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2103: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2104: for (i = 0; i < am; i++) {
2105: jl[i] = am;
2106: il[i] = 0;
2107: }
2109: /* create and initialize a linked list for storing column indices of the active row k */
2110: nlnk = am + 1;
2111: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2113: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2114: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2115: current_space = free_space;
2116: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2117: current_space_lvl = free_space_lvl;
2119: for (k = 0; k < am; k++) { /* for each active row k */
2120: /* initialize lnk by the column indices of row rip[k] of A */
2121: nzk = 0;
2122: ncols = ai[rip[k] + 1] - ai[rip[k]];
2123: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2124: ncols_upper = 0;
2125: for (j = 0; j < ncols; j++) {
2126: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2127: if (riip[i] >= k) { /* only take upper triangular entry */
2128: ajtmp[ncols_upper] = i;
2129: ncols_upper++;
2130: }
2131: }
2132: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2133: nzk += nlnk;
2135: /* update lnk by computing fill-in for each pivot row to be merged in */
2136: prow = jl[k]; /* 1st pivot row */
2138: while (prow < k) {
2139: nextprow = jl[prow];
2141: /* merge prow into k-th row */
2142: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2143: jmax = ui[prow + 1];
2144: ncols = jmax - jmin;
2145: i = jmin - ui[prow];
2146: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2147: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2148: j = *(uj - 1);
2149: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2150: nzk += nlnk;
2152: /* update il and jl for prow */
2153: if (jmin < jmax) {
2154: il[prow] = jmin;
2155: j = *cols;
2156: jl[prow] = jl[j];
2157: jl[j] = prow;
2158: }
2159: prow = nextprow;
2160: }
2162: /* if free space is not available, make more free space */
2163: if (current_space->local_remaining < nzk) {
2164: i = am - k + 1; /* num of unfactored rows */
2165: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2166: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2167: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2168: reallocs++;
2169: }
2171: /* copy data into free_space and free_space_lvl, then initialize lnk */
2172: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2173: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2175: /* add the k-th row into il and jl */
2176: if (nzk > 1) {
2177: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2178: jl[k] = jl[i];
2179: jl[i] = k;
2180: il[k] = ui[k] + 1;
2181: }
2182: uj_ptr[k] = current_space->array;
2183: uj_lvl_ptr[k] = current_space_lvl->array;
2185: current_space->array += nzk;
2186: current_space->local_used += nzk;
2187: current_space->local_remaining -= nzk;
2189: current_space_lvl->array += nzk;
2190: current_space_lvl->local_used += nzk;
2191: current_space_lvl->local_remaining -= nzk;
2193: ui[k + 1] = ui[k] + nzk;
2194: }
2196: PetscCall(ISRestoreIndices(perm, &rip));
2197: PetscCall(ISRestoreIndices(iperm, &riip));
2198: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2199: PetscCall(PetscFree(ajtmp));
2201: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2202: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2203: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2204: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2205: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2207: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2209: /* put together the new matrix in MATSEQSBAIJ format */
2210: b = (Mat_SeqSBAIJ *)fact->data;
2211: b->free_ij = PETSC_TRUE;
2212: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2213: b->free_a = PETSC_TRUE;
2214: b->j = uj;
2215: b->i = ui;
2216: b->diag = udiag;
2217: b->ilen = NULL;
2218: b->imax = NULL;
2219: b->row = perm;
2220: b->col = perm;
2221: PetscCall(PetscObjectReference((PetscObject)perm));
2222: PetscCall(PetscObjectReference((PetscObject)perm));
2223: b->icol = iperm;
2224: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2226: PetscCall(PetscMalloc1(am, &b->solve_work));
2228: b->maxnz = b->nz = ui[am];
2230: fact->info.factor_mallocs = reallocs;
2231: fact->info.fill_ratio_given = fill;
2232: if (ai[am] != 0) {
2233: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2234: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2235: } else {
2236: fact->info.fill_ratio_needed = 0.0;
2237: }
2238: #if defined(PETSC_USE_INFO)
2239: if (ai[am] != 0) {
2240: PetscReal af = fact->info.fill_ratio_needed;
2241: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2242: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2243: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2244: } else {
2245: PetscCall(PetscInfo(A, "Empty matrix\n"));
2246: }
2247: #endif
2248: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2249: PetscFunctionReturn(PETSC_SUCCESS);
2250: }
2252: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2253: {
2254: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2255: Mat_SeqSBAIJ *b;
2256: PetscBool perm_identity;
2257: PetscReal fill = info->fill;
2258: const PetscInt *rip, *riip;
2259: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2260: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2261: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2262: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2263: PetscBT lnkbt;
2264: IS iperm;
2265: PetscBool diagDense;
2267: PetscFunctionBegin;
2268: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2269: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2270: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2272: /* check whether perm is the identity mapping */
2273: PetscCall(ISIdentity(perm, &perm_identity));
2274: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2275: PetscCall(ISGetIndices(iperm, &riip));
2276: PetscCall(ISGetIndices(perm, &rip));
2278: /* initialization */
2279: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2280: PetscCall(PetscMalloc1(am + 1, &udiag));
2281: ui[0] = 0;
2283: /* jl: linked list for storing indices of the pivot rows
2284: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2285: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2286: for (i = 0; i < am; i++) {
2287: jl[i] = am;
2288: il[i] = 0;
2289: }
2291: /* create and initialize a linked list for storing column indices of the active row k */
2292: nlnk = am + 1;
2293: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2295: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2296: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2297: current_space = free_space;
2299: for (k = 0; k < am; k++) { /* for each active row k */
2300: /* initialize lnk by the column indices of row rip[k] of A */
2301: nzk = 0;
2302: ncols = ai[rip[k] + 1] - ai[rip[k]];
2303: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2304: ncols_upper = 0;
2305: for (j = 0; j < ncols; j++) {
2306: i = riip[*(aj + ai[rip[k]] + j)];
2307: if (i >= k) { /* only take upper triangular entry */
2308: cols[ncols_upper] = i;
2309: ncols_upper++;
2310: }
2311: }
2312: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2313: nzk += nlnk;
2315: /* update lnk by computing fill-in for each pivot row to be merged in */
2316: prow = jl[k]; /* 1st pivot row */
2318: while (prow < k) {
2319: nextprow = jl[prow];
2320: /* merge prow into k-th row */
2321: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2322: jmax = ui[prow + 1];
2323: ncols = jmax - jmin;
2324: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2325: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2326: nzk += nlnk;
2328: /* update il and jl for prow */
2329: if (jmin < jmax) {
2330: il[prow] = jmin;
2331: j = *uj_ptr;
2332: jl[prow] = jl[j];
2333: jl[j] = prow;
2334: }
2335: prow = nextprow;
2336: }
2338: /* if free space is not available, make more free space */
2339: if (current_space->local_remaining < nzk) {
2340: i = am - k + 1; /* num of unfactored rows */
2341: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2342: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2343: reallocs++;
2344: }
2346: /* copy data into free space, then initialize lnk */
2347: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2349: /* add the k-th row into il and jl */
2350: if (nzk > 1) {
2351: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2352: jl[k] = jl[i];
2353: jl[i] = k;
2354: il[k] = ui[k] + 1;
2355: }
2356: ui_ptr[k] = current_space->array;
2358: current_space->array += nzk;
2359: current_space->local_used += nzk;
2360: current_space->local_remaining -= nzk;
2362: ui[k + 1] = ui[k] + nzk;
2363: }
2365: PetscCall(ISRestoreIndices(perm, &rip));
2366: PetscCall(ISRestoreIndices(iperm, &riip));
2367: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2369: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2370: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2371: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2372: PetscCall(PetscLLDestroy(lnk, lnkbt));
2374: /* put together the new matrix in MATSEQSBAIJ format */
2375: b = (Mat_SeqSBAIJ *)fact->data;
2376: b->free_ij = PETSC_TRUE;
2377: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2378: b->free_a = PETSC_TRUE;
2379: b->j = uj;
2380: b->i = ui;
2381: b->diag = udiag;
2382: b->ilen = NULL;
2383: b->imax = NULL;
2384: b->row = perm;
2385: b->col = perm;
2387: PetscCall(PetscObjectReference((PetscObject)perm));
2388: PetscCall(PetscObjectReference((PetscObject)perm));
2390: b->icol = iperm;
2391: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2393: PetscCall(PetscMalloc1(am, &b->solve_work));
2395: b->maxnz = b->nz = ui[am];
2397: fact->info.factor_mallocs = reallocs;
2398: fact->info.fill_ratio_given = fill;
2399: if (ai[am] != 0) {
2400: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2401: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2402: } else {
2403: fact->info.fill_ratio_needed = 0.0;
2404: }
2405: #if defined(PETSC_USE_INFO)
2406: if (ai[am] != 0) {
2407: PetscReal af = fact->info.fill_ratio_needed;
2408: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2409: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2410: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2411: } else {
2412: PetscCall(PetscInfo(A, "Empty matrix\n"));
2413: }
2414: #endif
2415: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2416: PetscFunctionReturn(PETSC_SUCCESS);
2417: }
2419: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2420: {
2421: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2422: PetscInt n = A->rmap->n;
2423: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2424: PetscScalar *x, sum;
2425: const PetscScalar *b;
2426: const MatScalar *aa, *v;
2427: PetscInt i, nz;
2429: PetscFunctionBegin;
2430: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2432: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2433: PetscCall(VecGetArrayRead(bb, &b));
2434: PetscCall(VecGetArrayWrite(xx, &x));
2436: /* forward solve the lower triangular */
2437: x[0] = b[0];
2438: v = aa;
2439: vi = aj;
2440: for (i = 1; i < n; i++) {
2441: nz = ai[i + 1] - ai[i];
2442: sum = b[i];
2443: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2444: v += nz;
2445: vi += nz;
2446: x[i] = sum;
2447: }
2449: /* backward solve the upper triangular */
2450: for (i = n - 1; i >= 0; i--) {
2451: v = aa + adiag[i + 1] + 1;
2452: vi = aj + adiag[i + 1] + 1;
2453: nz = adiag[i] - adiag[i + 1] - 1;
2454: sum = x[i];
2455: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2456: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2457: }
2459: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2460: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2461: PetscCall(VecRestoreArrayRead(bb, &b));
2462: PetscCall(VecRestoreArrayWrite(xx, &x));
2463: PetscFunctionReturn(PETSC_SUCCESS);
2464: }
2466: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2467: {
2468: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2469: IS iscol = a->col, isrow = a->row;
2470: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2471: const PetscInt *rout, *cout, *r, *c;
2472: PetscScalar *x, *tmp, sum;
2473: const PetscScalar *b;
2474: const MatScalar *aa, *v;
2476: PetscFunctionBegin;
2477: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2479: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2480: PetscCall(VecGetArrayRead(bb, &b));
2481: PetscCall(VecGetArrayWrite(xx, &x));
2482: tmp = a->solve_work;
2484: PetscCall(ISGetIndices(isrow, &rout));
2485: r = rout;
2486: PetscCall(ISGetIndices(iscol, &cout));
2487: c = cout;
2489: /* forward solve the lower triangular */
2490: tmp[0] = b[r[0]];
2491: v = aa;
2492: vi = aj;
2493: for (i = 1; i < n; i++) {
2494: nz = ai[i + 1] - ai[i];
2495: sum = b[r[i]];
2496: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2497: tmp[i] = sum;
2498: v += nz;
2499: vi += nz;
2500: }
2502: /* backward solve the upper triangular */
2503: for (i = n - 1; i >= 0; i--) {
2504: v = aa + adiag[i + 1] + 1;
2505: vi = aj + adiag[i + 1] + 1;
2506: nz = adiag[i] - adiag[i + 1] - 1;
2507: sum = tmp[i];
2508: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2509: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2510: }
2512: PetscCall(ISRestoreIndices(isrow, &rout));
2513: PetscCall(ISRestoreIndices(iscol, &cout));
2514: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2515: PetscCall(VecRestoreArrayRead(bb, &b));
2516: PetscCall(VecRestoreArrayWrite(xx, &x));
2517: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2518: PetscFunctionReturn(PETSC_SUCCESS);
2519: }