Actual source code: aijfact.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petscbt.h>
4: #include <../src/mat/utils/freespace.h>
6: static PetscErrorCode MatFactorGetSolverType_petsc(Mat A, MatSolverType *type)
7: {
8: PetscFunctionBegin;
9: *type = MATSOLVERPETSC;
10: PetscFunctionReturn(PETSC_SUCCESS);
11: }
13: PETSC_INTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A, MatFactorType ftype, Mat *B)
14: {
15: PetscInt n = A->rmap->n;
17: PetscFunctionBegin;
18: #if defined(PETSC_USE_COMPLEX)
19: if ((ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) {
20: PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY or MAT_FACTOR_ICC are not supported. Use MAT_FACTOR_LU instead.\n"));
21: *B = NULL;
22: PetscFunctionReturn(PETSC_SUCCESS);
23: }
24: #endif
26: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
27: PetscCall(MatSetSizes(*B, n, n, n, n));
28: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) {
29: PetscCall(MatSetType(*B, MATSEQAIJ));
31: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
32: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
34: PetscCall(MatSetBlockSizesFromMats(*B, A, A));
35: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_LU]));
36: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILU]));
37: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ILUDT]));
38: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
39: PetscCall(MatSetType(*B, MATSEQSBAIJ));
40: PetscCall(MatSeqSBAIJSetPreallocation(*B, 1, MAT_SKIP_ALLOCATION, NULL));
42: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
43: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
44: PetscCall(PetscStrallocpy(MATORDERINGND, (char **)&(*B)->preferredordering[MAT_FACTOR_CHOLESKY]));
45: PetscCall(PetscStrallocpy(MATORDERINGNATURAL, (char **)&(*B)->preferredordering[MAT_FACTOR_ICC]));
46: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
47: (*B)->factortype = ftype;
49: PetscCall(PetscFree((*B)->solvertype));
50: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &(*B)->solvertype));
51: (*B)->canuseordering = PETSC_TRUE;
52: PetscCall(PetscObjectComposeFunction((PetscObject)*B, "MatFactorGetSolverType_C", MatFactorGetSolverType_petsc));
53: PetscFunctionReturn(PETSC_SUCCESS);
54: }
56: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
57: {
58: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
59: IS isicol;
60: const PetscInt *r, *ic, *ai = a->i, *aj = a->j, *ajtmp;
61: PetscInt i, n = A->rmap->n;
62: PetscInt *bi, *bj;
63: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
64: PetscReal f;
65: PetscInt nlnk, *lnk, k, **bi_ptr;
66: PetscFreeSpaceList free_space = NULL, current_space = NULL;
67: PetscBT lnkbt;
68: PetscBool diagDense;
70: PetscFunctionBegin;
71: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
72: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
73: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
75: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
76: PetscCall(ISGetIndices(isrow, &r));
77: PetscCall(ISGetIndices(isicol, &ic));
79: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
80: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
81: PetscCall(PetscMalloc1(n + 1, &bdiag));
82: bi[0] = bdiag[0] = 0;
84: /* linked list for storing column indices of the active row */
85: nlnk = n + 1;
86: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
88: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
90: /* initial FreeSpace size is f*(ai[n]+1) */
91: f = info->fill;
92: if (n == 1) f = 1; /* prevent failure in corner case of 1x1 matrix with fill < 0.5 */
93: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
94: current_space = free_space;
96: for (i = 0; i < n; i++) {
97: /* copy previous fill into linked list */
98: nzi = 0;
99: nnz = ai[r[i] + 1] - ai[r[i]];
100: ajtmp = aj + ai[r[i]];
101: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
102: nzi += nlnk;
104: /* add pivot rows into linked list */
105: row = lnk[n];
106: while (row < i) {
107: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
108: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
109: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
110: nzi += nlnk;
111: row = lnk[row];
112: }
113: bi[i + 1] = bi[i] + nzi;
114: im[i] = nzi;
116: /* mark bdiag */
117: nzbd = 0;
118: nnz = nzi;
119: k = lnk[n];
120: while (nnz-- && k < i) {
121: nzbd++;
122: k = lnk[k];
123: }
124: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
126: /* if free space is not available, make more free space */
127: if (current_space->local_remaining < nzi) {
128: /* estimated additional space needed */
129: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - 1, nzi));
130: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
131: reallocs++;
132: }
134: /* copy data into free space, then initialize lnk */
135: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
137: bi_ptr[i] = current_space->array;
138: current_space->array += nzi;
139: current_space->local_used += nzi;
140: current_space->local_remaining -= nzi;
141: }
143: PetscCall(ISRestoreIndices(isrow, &r));
144: PetscCall(ISRestoreIndices(isicol, &ic));
146: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
147: PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
148: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
149: PetscCall(PetscLLDestroy(lnk, lnkbt));
150: PetscCall(PetscFree2(bi_ptr, im));
152: /* put together the new matrix */
153: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(B, MAT_SKIP_ALLOCATION, NULL));
154: b = (Mat_SeqAIJ *)B->data;
155: b->free_ij = PETSC_TRUE;
156: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
157: b->free_a = PETSC_TRUE;
158: b->j = bj;
159: b->i = bi;
160: b->diag = bdiag;
161: b->ilen = NULL;
162: b->imax = NULL;
163: b->row = isrow;
164: b->col = iscol;
165: PetscCall(PetscObjectReference((PetscObject)isrow));
166: PetscCall(PetscObjectReference((PetscObject)iscol));
167: b->icol = isicol;
168: PetscCall(PetscMalloc1(n, &b->solve_work));
170: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
171: b->maxnz = b->nz = bdiag[0] + 1;
173: B->factortype = MAT_FACTOR_LU;
174: B->info.factor_mallocs = reallocs;
175: B->info.fill_ratio_given = f;
177: if (ai[n]) {
178: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
179: } else {
180: B->info.fill_ratio_needed = 0.0;
181: }
182: #if defined(PETSC_USE_INFO)
183: if (ai[n] != 0) {
184: PetscReal af = B->info.fill_ratio_needed;
185: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
186: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
187: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
188: PetscCall(PetscInfo(A, "for best performance.\n"));
189: } else {
190: PetscCall(PetscInfo(A, "Empty matrix\n"));
191: }
192: #endif
193: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
194: if (a->inode.size_csr) B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
195: PetscCall(MatSeqAIJCheckInode_FactorLU(B));
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: /*
200: Trouble in factorization, should we dump the original matrix?
201: */
202: PetscErrorCode MatFactorDumpMatrix(Mat A)
203: {
204: PetscBool flg = PETSC_FALSE;
206: PetscFunctionBegin;
207: PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, NULL, "-mat_factor_dump_on_error", &flg, NULL));
208: if (flg) {
209: PetscViewer viewer;
210: char filename[PETSC_MAX_PATH_LEN];
212: PetscCall(PetscSNPrintf(filename, PETSC_MAX_PATH_LEN, "matrix_factor_error.%d", PetscGlobalRank));
213: PetscCall(PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A), filename, FILE_MODE_WRITE, &viewer));
214: PetscCall(MatView(A, viewer));
215: PetscCall(PetscViewerDestroy(&viewer));
216: }
217: PetscFunctionReturn(PETSC_SUCCESS);
218: }
220: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
221: {
222: Mat C = B;
223: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
224: IS isrow = b->row, isicol = b->icol;
225: const PetscInt *r, *ic, *ics;
226: const PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j, *bdiag = b->diag;
227: PetscInt i, j, k, nz, nzL, row, *pj;
228: const PetscInt *ajtmp, *bjtmp;
229: MatScalar *rtmp, *pc, multiplier, *pv;
230: const MatScalar *aa, *v;
231: MatScalar *ba;
232: PetscBool row_identity, col_identity;
233: FactorShiftCtx sctx;
234: const PetscInt *ddiag;
235: PetscReal rs;
236: MatScalar d;
238: PetscFunctionBegin;
239: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
240: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
241: /* MatPivotSetUp(): initialize shift context sctx */
242: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
244: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
245: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
246: sctx.shift_top = info->zeropivot;
247: for (i = 0; i < n; i++) {
248: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
249: d = aa[ddiag[i]];
250: rs = -PetscAbsScalar(d) - PetscRealPart(d);
251: v = aa + ai[i];
252: nz = ai[i + 1] - ai[i];
253: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
254: if (rs > sctx.shift_top) sctx.shift_top = rs;
255: }
256: sctx.shift_top *= 1.1;
257: sctx.nshift_max = 5;
258: sctx.shift_lo = 0.;
259: sctx.shift_hi = 1.;
260: }
262: PetscCall(ISGetIndices(isrow, &r));
263: PetscCall(ISGetIndices(isicol, &ic));
264: PetscCall(PetscMalloc1(n + 1, &rtmp));
265: ics = ic;
267: do {
268: sctx.newshift = PETSC_FALSE;
269: for (i = 0; i < n; i++) {
270: /* zero rtmp */
271: /* L part */
272: nz = bi[i + 1] - bi[i];
273: bjtmp = bj + bi[i];
274: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
276: /* U part */
277: nz = bdiag[i] - bdiag[i + 1];
278: bjtmp = bj + bdiag[i + 1] + 1;
279: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
281: /* load in initial (unfactored row) */
282: nz = ai[r[i] + 1] - ai[r[i]];
283: ajtmp = aj + ai[r[i]];
284: v = aa + ai[r[i]];
285: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
286: /* ZeropivotApply() */
287: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
289: /* elimination */
290: bjtmp = bj + bi[i];
291: row = *bjtmp++;
292: nzL = bi[i + 1] - bi[i];
293: for (k = 0; k < nzL; k++) {
294: pc = rtmp + row;
295: if (*pc != 0.0) {
296: pv = ba + bdiag[row];
297: multiplier = *pc * (*pv);
298: *pc = multiplier;
300: pj = b->j + bdiag[row + 1] + 1; /* beginning of U(row,:) */
301: pv = ba + bdiag[row + 1] + 1;
302: nz = bdiag[row] - bdiag[row + 1] - 1; /* num of entries in U(row,:) excluding diag */
304: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
305: PetscCall(PetscLogFlops(1 + 2.0 * nz));
306: }
307: row = *bjtmp++;
308: }
310: /* finished row so stick it into b->a */
311: rs = 0.0;
312: /* L part */
313: pv = ba + bi[i];
314: pj = b->j + bi[i];
315: nz = bi[i + 1] - bi[i];
316: for (j = 0; j < nz; j++) {
317: pv[j] = rtmp[pj[j]];
318: rs += PetscAbsScalar(pv[j]);
319: }
321: /* U part */
322: pv = ba + bdiag[i + 1] + 1;
323: pj = b->j + bdiag[i + 1] + 1;
324: nz = bdiag[i] - bdiag[i + 1] - 1;
325: for (j = 0; j < nz; j++) {
326: pv[j] = rtmp[pj[j]];
327: rs += PetscAbsScalar(pv[j]);
328: }
330: sctx.rs = rs;
331: sctx.pv = rtmp[i];
332: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
333: if (sctx.newshift) break; /* break for-loop */
334: rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */
336: /* Mark diagonal and invert diagonal for simpler triangular solves */
337: pv = ba + bdiag[i];
338: *pv = 1.0 / rtmp[i];
340: } /* endof for (i=0; i<n; i++) { */
342: /* MatPivotRefine() */
343: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
344: /*
345: * if no shift in this attempt & shifting & started shifting & can refine,
346: * then try lower shift
347: */
348: sctx.shift_hi = sctx.shift_fraction;
349: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
350: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
351: sctx.newshift = PETSC_TRUE;
352: sctx.nshift++;
353: }
354: } while (sctx.newshift);
356: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
357: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
359: PetscCall(PetscFree(rtmp));
360: PetscCall(ISRestoreIndices(isicol, &ic));
361: PetscCall(ISRestoreIndices(isrow, &r));
363: PetscCall(ISIdentity(isrow, &row_identity));
364: PetscCall(ISIdentity(isicol, &col_identity));
365: if (b->inode.size_csr) {
366: C->ops->solve = MatSolve_SeqAIJ_Inode;
367: } else if (row_identity && col_identity) {
368: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
369: } else {
370: C->ops->solve = MatSolve_SeqAIJ;
371: }
372: C->ops->solveadd = MatSolveAdd_SeqAIJ;
373: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
374: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
375: C->ops->matsolve = MatMatSolve_SeqAIJ;
376: C->ops->matsolvetranspose = MatMatSolveTranspose_SeqAIJ;
377: C->assembled = PETSC_TRUE;
378: C->preallocated = PETSC_TRUE;
380: PetscCall(PetscLogFlops(C->cmap->n));
382: /* MatShiftView(A,info,&sctx) */
383: if (sctx.nshift) {
384: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
385: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
386: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
387: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
388: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
389: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
390: }
391: }
392: PetscFunctionReturn(PETSC_SUCCESS);
393: }
395: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat, Mat, Mat);
396: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat, Vec, Vec);
397: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat, Vec, Vec, Vec);
399: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
400: {
401: Mat C = B;
402: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b = (Mat_SeqAIJ *)C->data;
403: IS isrow = b->row, isicol = b->icol;
404: const PetscInt *r, *ic, *ics;
405: PetscInt nz, row, i, j, n = A->rmap->n, diag;
406: const PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
407: const PetscInt *ajtmp, *bjtmp, *ddiag, *pj;
408: MatScalar *pv, *rtmp, *pc, multiplier, d;
409: const MatScalar *v, *aa;
410: MatScalar *ba;
411: PetscReal rs = 0.0;
412: FactorShiftCtx sctx;
413: PetscBool row_identity, col_identity;
415: PetscFunctionBegin;
416: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
418: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
419: PetscCall(MatSeqAIJGetArrayWrite(B, &ba));
420: /* MatPivotSetUp(): initialize shift context sctx */
421: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
423: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
424: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
425: sctx.shift_top = info->zeropivot;
426: for (i = 0; i < n; i++) {
427: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
428: d = aa[ddiag[i]];
429: rs = -PetscAbsScalar(d) - PetscRealPart(d);
430: v = aa + ai[i];
431: nz = ai[i + 1] - ai[i];
432: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
433: if (rs > sctx.shift_top) sctx.shift_top = rs;
434: }
435: sctx.shift_top *= 1.1;
436: sctx.nshift_max = 5;
437: sctx.shift_lo = 0.;
438: sctx.shift_hi = 1.;
439: }
441: PetscCall(ISGetIndices(isrow, &r));
442: PetscCall(ISGetIndices(isicol, &ic));
443: PetscCall(PetscMalloc1(n + 1, &rtmp));
444: ics = ic;
446: do {
447: sctx.newshift = PETSC_FALSE;
448: for (i = 0; i < n; i++) {
449: nz = bi[i + 1] - bi[i];
450: bjtmp = bj + bi[i];
451: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
453: /* load in initial (unfactored row) */
454: nz = ai[r[i] + 1] - ai[r[i]];
455: ajtmp = aj + ai[r[i]];
456: v = aa + ai[r[i]];
457: for (j = 0; j < nz; j++) rtmp[ics[ajtmp[j]]] = v[j];
458: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
460: row = *bjtmp++;
461: while (row < i) {
462: pc = rtmp + row;
463: if (*pc != 0.0) {
464: pv = ba + ddiag[row];
465: pj = b->j + ddiag[row] + 1;
466: multiplier = *pc / *pv++;
467: *pc = multiplier;
468: nz = bi[row + 1] - ddiag[row] - 1;
469: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
470: PetscCall(PetscLogFlops(1 + 2.0 * nz));
471: }
472: row = *bjtmp++;
473: }
474: /* finished row so stick it into b->a */
475: pv = ba + bi[i];
476: pj = b->j + bi[i];
477: nz = bi[i + 1] - bi[i];
478: diag = ddiag[i] - bi[i];
479: rs = 0.0;
480: for (j = 0; j < nz; j++) {
481: pv[j] = rtmp[pj[j]];
482: rs += PetscAbsScalar(pv[j]);
483: }
484: rs -= PetscAbsScalar(pv[diag]);
486: sctx.rs = rs;
487: sctx.pv = pv[diag];
488: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
489: if (sctx.newshift) break;
490: pv[diag] = sctx.pv;
491: }
493: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
494: /*
495: * if no shift in this attempt & shifting & started shifting & can refine,
496: * then try lower shift
497: */
498: sctx.shift_hi = sctx.shift_fraction;
499: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
500: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
501: sctx.newshift = PETSC_TRUE;
502: sctx.nshift++;
503: }
504: } while (sctx.newshift);
506: /* invert diagonal entries for simpler triangular solves */
507: for (i = 0; i < n; i++) ba[ddiag[i]] = 1.0 / ba[ddiag[i]];
508: PetscCall(PetscFree(rtmp));
509: PetscCall(ISRestoreIndices(isicol, &ic));
510: PetscCall(ISRestoreIndices(isrow, &r));
511: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
512: PetscCall(MatSeqAIJRestoreArrayWrite(B, &ba));
514: PetscCall(ISIdentity(isrow, &row_identity));
515: PetscCall(ISIdentity(isicol, &col_identity));
516: if (row_identity && col_identity) {
517: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
518: } else {
519: C->ops->solve = MatSolve_SeqAIJ_inplace;
520: }
521: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
522: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
523: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
524: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
525: C->ops->matsolvetranspose = NULL;
527: C->assembled = PETSC_TRUE;
528: C->preallocated = PETSC_TRUE;
530: PetscCall(PetscLogFlops(C->cmap->n));
531: if (sctx.nshift) {
532: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
533: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
534: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
535: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
536: }
537: }
538: C->ops->solve = MatSolve_SeqAIJ_inplace;
539: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
541: PetscCall(MatSeqAIJCheckInode(C));
542: PetscFunctionReturn(PETSC_SUCCESS);
543: }
545: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat, Vec, Vec);
547: /*
548: This routine implements inplace ILU(0) with row or/and column permutations.
549: Input:
550: A - original matrix
551: Output;
552: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
553: a->j (col index) is permuted by the inverse of colperm, then sorted
554: a->a reordered accordingly with a->j
555: a->diag (ptr to diagonal elements) is updated.
556: */
557: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B, Mat A, const MatFactorInfo *info)
558: {
559: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
560: IS isrow = a->row, isicol = a->icol;
561: const PetscInt *r, *ic, *ics;
562: PetscInt i, j, n = A->rmap->n, *ai = a->i, *aj = a->j;
563: PetscInt *ajtmp, nz, row;
564: PetscInt nbdiag, *pj;
565: PetscScalar *rtmp, *pc, multiplier, d;
566: MatScalar *pv, *v;
567: PetscReal rs;
568: FactorShiftCtx sctx;
569: MatScalar *aa, *vtmp;
570: PetscInt *diag;
572: PetscFunctionBegin;
573: PetscCheck(A == B, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "input and output matrix must have same address");
575: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, (const PetscInt **)&diag, NULL));
576: PetscCall(MatSeqAIJGetArray(A, &aa));
577: /* MatPivotSetUp(): initialize shift context sctx */
578: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
580: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
581: const PetscInt *ddiag;
583: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &ddiag, NULL));
584: sctx.shift_top = info->zeropivot;
585: for (i = 0; i < n; i++) {
586: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
587: d = aa[ddiag[i]];
588: rs = -PetscAbsScalar(d) - PetscRealPart(d);
589: vtmp = aa + ai[i];
590: nz = ai[i + 1] - ai[i];
591: for (j = 0; j < nz; j++) rs += PetscAbsScalar(vtmp[j]);
592: if (rs > sctx.shift_top) sctx.shift_top = rs;
593: }
594: sctx.shift_top *= 1.1;
595: sctx.nshift_max = 5;
596: sctx.shift_lo = 0.;
597: sctx.shift_hi = 1.;
598: }
600: PetscCall(ISGetIndices(isrow, &r));
601: PetscCall(ISGetIndices(isicol, &ic));
602: PetscCall(PetscMalloc1(n + 1, &rtmp));
603: PetscCall(PetscArrayzero(rtmp, n + 1));
604: ics = ic;
606: #if defined(MV)
607: sctx.shift_top = 0.;
608: sctx.nshift_max = 0;
609: sctx.shift_lo = 0.;
610: sctx.shift_hi = 0.;
611: sctx.shift_fraction = 0.;
613: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
614: sctx.shift_top = 0.;
615: for (i = 0; i < n; i++) {
616: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
617: d = aa[diag[i]];
618: rs = -PetscAbsScalar(d) - PetscRealPart(d);
619: v = aa + ai[i];
620: nz = ai[i + 1] - ai[i];
621: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
622: if (rs > sctx.shift_top) sctx.shift_top = rs;
623: }
624: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
625: sctx.shift_top *= 1.1;
626: sctx.nshift_max = 5;
627: sctx.shift_lo = 0.;
628: sctx.shift_hi = 1.;
629: }
631: sctx.shift_amount = 0.;
632: sctx.nshift = 0;
633: #endif
635: do {
636: sctx.newshift = PETSC_FALSE;
637: for (i = 0; i < n; i++) {
638: /* load in initial unfactored row */
639: nz = ai[r[i] + 1] - ai[r[i]];
640: ajtmp = aj + ai[r[i]];
641: v = aa + ai[r[i]];
642: /* sort permuted ajtmp and values v accordingly */
643: for (j = 0; j < nz; j++) ajtmp[j] = ics[ajtmp[j]];
644: PetscCall(PetscSortIntWithScalarArray(nz, ajtmp, v));
646: diag[r[i]] = ai[r[i]];
647: for (j = 0; j < nz; j++) {
648: rtmp[ajtmp[j]] = v[j];
649: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
650: }
651: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
653: row = *ajtmp++;
654: while (row < i) {
655: pc = rtmp + row;
656: if (*pc != 0.0) {
657: pv = aa + diag[r[row]];
658: pj = aj + diag[r[row]] + 1;
660: multiplier = *pc / *pv++;
661: *pc = multiplier;
662: nz = ai[r[row] + 1] - diag[r[row]] - 1;
663: for (j = 0; j < nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
664: PetscCall(PetscLogFlops(1 + 2.0 * nz));
665: }
666: row = *ajtmp++;
667: }
668: /* finished row so overwrite it onto aa */
669: pv = aa + ai[r[i]];
670: pj = aj + ai[r[i]];
671: nz = ai[r[i] + 1] - ai[r[i]];
672: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
674: rs = 0.0;
675: for (j = 0; j < nz; j++) {
676: pv[j] = rtmp[pj[j]];
677: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
678: }
680: sctx.rs = rs;
681: sctx.pv = pv[nbdiag];
682: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
683: if (sctx.newshift) break;
684: pv[nbdiag] = sctx.pv;
685: }
687: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction > 0 && sctx.nshift < sctx.nshift_max) {
688: /*
689: * if no shift in this attempt & shifting & started shifting & can refine,
690: * then try lower shift
691: */
692: sctx.shift_hi = sctx.shift_fraction;
693: sctx.shift_fraction = (sctx.shift_hi + sctx.shift_lo) / 2.;
694: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
695: sctx.newshift = PETSC_TRUE;
696: sctx.nshift++;
697: }
698: } while (sctx.newshift);
700: /* invert diagonal entries for simpler triangular solves */
701: for (i = 0; i < n; i++) aa[diag[r[i]]] = 1.0 / aa[diag[r[i]]];
703: PetscCall(MatSeqAIJRestoreArray(A, &aa));
704: PetscCall(PetscFree(rtmp));
705: PetscCall(ISRestoreIndices(isicol, &ic));
706: PetscCall(ISRestoreIndices(isrow, &r));
708: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
709: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
710: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
711: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
713: A->assembled = PETSC_TRUE;
714: A->preallocated = PETSC_TRUE;
716: PetscCall(PetscLogFlops(A->cmap->n));
717: if (sctx.nshift) {
718: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
719: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
720: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
721: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
722: }
723: }
724: PetscFunctionReturn(PETSC_SUCCESS);
725: }
727: PetscErrorCode MatLUFactor_SeqAIJ(Mat A, IS row, IS col, const MatFactorInfo *info)
728: {
729: Mat C;
731: PetscFunctionBegin;
732: PetscCall(MatGetFactor(A, MATSOLVERPETSC, MAT_FACTOR_LU, &C));
733: PetscCall(MatLUFactorSymbolic(C, A, row, col, info));
734: PetscCall(MatLUFactorNumeric(C, A, info));
736: A->ops->solve = C->ops->solve;
737: A->ops->solvetranspose = C->ops->solvetranspose;
739: PetscCall(MatHeaderMerge(A, &C));
740: PetscFunctionReturn(PETSC_SUCCESS);
741: }
743: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
744: {
745: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
746: IS iscol = a->col, isrow = a->row;
747: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
748: PetscInt nz;
749: const PetscInt *rout, *cout, *r, *c;
750: PetscScalar *x, *tmp, *tmps, sum;
751: const PetscScalar *b;
752: const MatScalar *aa, *v;
753: const PetscInt *adiag;
755: PetscFunctionBegin;
756: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
758: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
759: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
760: PetscCall(VecGetArrayRead(bb, &b));
761: PetscCall(VecGetArrayWrite(xx, &x));
762: tmp = a->solve_work;
764: PetscCall(ISGetIndices(isrow, &rout));
765: r = rout;
766: PetscCall(ISGetIndices(iscol, &cout));
767: c = cout + (n - 1);
769: /* forward solve the lower triangular */
770: tmp[0] = b[*r++];
771: tmps = tmp;
772: for (i = 1; i < n; i++) {
773: v = aa + ai[i];
774: vi = aj + ai[i];
775: nz = adiag[i] - ai[i];
776: sum = b[*r++];
777: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
778: tmp[i] = sum;
779: }
781: /* backward solve the upper triangular */
782: for (i = n - 1; i >= 0; i--) {
783: v = aa + adiag[i] + 1;
784: vi = aj + adiag[i] + 1;
785: nz = ai[i + 1] - adiag[i] - 1;
786: sum = tmp[i];
787: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
788: x[*c--] = tmp[i] = sum * aa[adiag[i]];
789: }
790: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
791: PetscCall(ISRestoreIndices(isrow, &rout));
792: PetscCall(ISRestoreIndices(iscol, &cout));
793: PetscCall(VecRestoreArrayRead(bb, &b));
794: PetscCall(VecRestoreArrayWrite(xx, &x));
795: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
796: PetscFunctionReturn(PETSC_SUCCESS);
797: }
799: static PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A, Mat B, Mat X)
800: {
801: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
802: IS iscol = a->col, isrow = a->row;
803: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
804: PetscInt nz, neq, ldb, ldx;
805: const PetscInt *rout, *cout, *r, *c;
806: PetscScalar *x, *tmp = a->solve_work, *tmps, sum;
807: const PetscScalar *b, *aa, *v;
808: PetscBool isdense;
809: const PetscInt *adiag;
811: PetscFunctionBegin;
812: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
813: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
814: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
815: if (X != B) {
816: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
817: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
818: }
819: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
820: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
821: PetscCall(MatDenseGetArrayRead(B, &b));
822: PetscCall(MatDenseGetLDA(B, &ldb));
823: PetscCall(MatDenseGetArray(X, &x));
824: PetscCall(MatDenseGetLDA(X, &ldx));
825: PetscCall(ISGetIndices(isrow, &rout));
826: r = rout;
827: PetscCall(ISGetIndices(iscol, &cout));
828: c = cout;
829: for (neq = 0; neq < B->cmap->n; neq++) {
830: /* forward solve the lower triangular */
831: tmp[0] = b[r[0]];
832: tmps = tmp;
833: for (i = 1; i < n; i++) {
834: v = aa + ai[i];
835: vi = aj + ai[i];
836: nz = adiag[i] - ai[i];
837: sum = b[r[i]];
838: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
839: tmp[i] = sum;
840: }
841: /* backward solve the upper triangular */
842: for (i = n - 1; i >= 0; i--) {
843: v = aa + adiag[i] + 1;
844: vi = aj + adiag[i] + 1;
845: nz = ai[i + 1] - adiag[i] - 1;
846: sum = tmp[i];
847: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
848: x[c[i]] = tmp[i] = sum * aa[adiag[i]];
849: }
850: b += ldb;
851: x += ldx;
852: }
853: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
854: PetscCall(ISRestoreIndices(isrow, &rout));
855: PetscCall(ISRestoreIndices(iscol, &cout));
856: PetscCall(MatDenseRestoreArrayRead(B, &b));
857: PetscCall(MatDenseRestoreArray(X, &x));
858: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
859: PetscFunctionReturn(PETSC_SUCCESS);
860: }
862: PetscErrorCode MatMatSolve_SeqAIJ(Mat A, Mat B, Mat X)
863: {
864: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
865: IS iscol = a->col, isrow = a->row;
866: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
867: const PetscInt *adiag;
868: PetscInt nz, neq, ldb, ldx;
869: const PetscInt *rout, *cout, *r, *c;
870: PetscScalar *x, *tmp = a->solve_work, sum;
871: const PetscScalar *b, *aa, *v;
872: PetscBool isdense;
874: PetscFunctionBegin;
875: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
876: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
877: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
878: if (X != B) {
879: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
880: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
881: }
882: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
883: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
884: PetscCall(MatDenseGetArrayRead(B, &b));
885: PetscCall(MatDenseGetLDA(B, &ldb));
886: PetscCall(MatDenseGetArray(X, &x));
887: PetscCall(MatDenseGetLDA(X, &ldx));
888: PetscCall(ISGetIndices(isrow, &rout));
889: r = rout;
890: PetscCall(ISGetIndices(iscol, &cout));
891: c = cout;
892: for (neq = 0; neq < B->cmap->n; neq++) {
893: /* forward solve the lower triangular */
894: tmp[0] = b[r[0]];
895: v = aa;
896: vi = aj;
897: for (i = 1; i < n; i++) {
898: nz = ai[i + 1] - ai[i];
899: sum = b[r[i]];
900: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
901: tmp[i] = sum;
902: v += nz;
903: vi += nz;
904: }
905: /* backward solve the upper triangular */
906: for (i = n - 1; i >= 0; i--) {
907: v = aa + adiag[i + 1] + 1;
908: vi = aj + adiag[i + 1] + 1;
909: nz = adiag[i] - adiag[i + 1] - 1;
910: sum = tmp[i];
911: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
912: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
913: }
914: b += ldb;
915: x += ldx;
916: }
917: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
918: PetscCall(ISRestoreIndices(isrow, &rout));
919: PetscCall(ISRestoreIndices(iscol, &cout));
920: PetscCall(MatDenseRestoreArrayRead(B, &b));
921: PetscCall(MatDenseRestoreArray(X, &x));
922: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
923: PetscFunctionReturn(PETSC_SUCCESS);
924: }
926: PetscErrorCode MatMatSolveTranspose_SeqAIJ(Mat A, Mat B, Mat X)
927: {
928: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
929: IS iscol = a->col, isrow = a->row;
930: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, j;
931: const PetscInt *adiag = a->diag;
932: PetscInt nz, neq, ldb, ldx;
933: const PetscInt *rout, *cout, *r, *c;
934: PetscScalar *x, *tmp = a->solve_work, s1;
935: const PetscScalar *b, *aa, *v;
936: PetscBool isdense;
938: PetscFunctionBegin;
939: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
940: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &isdense));
941: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "B matrix must be a SeqDense matrix");
942: if (X != B) {
943: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSEQDENSE, &isdense));
944: PetscCheck(isdense, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "X matrix must be a SeqDense matrix");
945: }
946: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
947: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
948: PetscCall(MatDenseGetArrayRead(B, &b));
949: PetscCall(MatDenseGetLDA(B, &ldb));
950: PetscCall(MatDenseGetArray(X, &x));
951: PetscCall(MatDenseGetLDA(X, &ldx));
952: PetscCall(ISGetIndices(isrow, &rout));
953: r = rout;
954: PetscCall(ISGetIndices(iscol, &cout));
955: c = cout;
956: for (neq = 0; neq < B->cmap->n; neq++) {
957: /* copy the b into temp work space according to permutation */
958: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
960: /* forward solve the U^T */
961: for (i = 0; i < n; i++) {
962: v = aa + adiag[i + 1] + 1;
963: vi = aj + adiag[i + 1] + 1;
964: nz = adiag[i] - adiag[i + 1] - 1;
965: s1 = tmp[i];
966: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
967: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
968: tmp[i] = s1;
969: }
971: /* backward solve the L^T */
972: for (i = n - 1; i >= 0; i--) {
973: v = aa + ai[i];
974: vi = aj + ai[i];
975: nz = ai[i + 1] - ai[i];
976: s1 = tmp[i];
977: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
978: }
980: /* copy tmp into x according to permutation */
981: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
982: b += ldb;
983: x += ldx;
984: }
985: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
986: PetscCall(ISRestoreIndices(isrow, &rout));
987: PetscCall(ISRestoreIndices(iscol, &cout));
988: PetscCall(MatDenseRestoreArrayRead(B, &b));
989: PetscCall(MatDenseRestoreArray(X, &x));
990: PetscCall(PetscLogFlops(B->cmap->n * (2.0 * a->nz - n)));
991: PetscFunctionReturn(PETSC_SUCCESS);
992: }
994: static PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A, Vec bb, Vec xx)
995: {
996: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
997: IS iscol = a->col, isrow = a->row;
998: const PetscInt *r, *c, *rout, *cout, *adiag;
999: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j;
1000: PetscInt nz, row;
1001: PetscScalar *x, *tmp, *tmps, sum;
1002: const PetscScalar *b;
1003: const MatScalar *aa, *v;
1005: PetscFunctionBegin;
1006: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1008: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1009: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1010: PetscCall(VecGetArrayRead(bb, &b));
1011: PetscCall(VecGetArrayWrite(xx, &x));
1012: tmp = a->solve_work;
1014: PetscCall(ISGetIndices(isrow, &rout));
1015: r = rout;
1016: PetscCall(ISGetIndices(iscol, &cout));
1017: c = cout + (n - 1);
1019: /* forward solve the lower triangular */
1020: tmp[0] = b[*r++];
1021: tmps = tmp;
1022: for (row = 1; row < n; row++) {
1023: i = rout[row]; /* permuted row */
1024: v = aa + ai[i];
1025: vi = aj + ai[i];
1026: nz = adiag[i] - ai[i];
1027: sum = b[*r++];
1028: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1029: tmp[row] = sum;
1030: }
1032: /* backward solve the upper triangular */
1033: for (row = n - 1; row >= 0; row--) {
1034: i = rout[row]; /* permuted row */
1035: v = aa + adiag[i] + 1;
1036: vi = aj + adiag[i] + 1;
1037: nz = ai[i + 1] - adiag[i] - 1;
1038: sum = tmp[row];
1039: PetscSparseDenseMinusDot(sum, tmps, v, vi, nz);
1040: x[*c--] = tmp[row] = sum * aa[adiag[i]];
1041: }
1042: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1043: PetscCall(ISRestoreIndices(isrow, &rout));
1044: PetscCall(ISRestoreIndices(iscol, &cout));
1045: PetscCall(VecRestoreArrayRead(bb, &b));
1046: PetscCall(VecRestoreArrayWrite(xx, &x));
1047: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1048: PetscFunctionReturn(PETSC_SUCCESS);
1049: }
1051: #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h>
1052: static PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
1053: {
1054: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1055: PetscInt n = A->rmap->n;
1056: const PetscInt *ai = a->i, *aj = a->j, *adiag;
1057: PetscScalar *x;
1058: const PetscScalar *b;
1059: const MatScalar *aa;
1060: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1061: PetscInt adiag_i, i, nz, ai_i;
1062: const PetscInt *vi;
1063: const MatScalar *v;
1064: PetscScalar sum;
1065: #endif
1067: PetscFunctionBegin;
1068: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
1069: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1070: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1071: PetscCall(VecGetArrayRead(bb, &b));
1072: PetscCall(VecGetArrayWrite(xx, &x));
1074: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1075: fortransolveaij_(&n, x, ai, aj, adiag, aa, b);
1076: #else
1077: /* forward solve the lower triangular */
1078: x[0] = b[0];
1079: for (i = 1; i < n; i++) {
1080: ai_i = ai[i];
1081: v = aa + ai_i;
1082: vi = aj + ai_i;
1083: nz = adiag[i] - ai_i;
1084: sum = b[i];
1085: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1086: x[i] = sum;
1087: }
1089: /* backward solve the upper triangular */
1090: for (i = n - 1; i >= 0; i--) {
1091: adiag_i = adiag[i];
1092: v = aa + adiag_i + 1;
1093: vi = aj + adiag_i + 1;
1094: nz = ai[i + 1] - adiag_i - 1;
1095: sum = x[i];
1096: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
1097: x[i] = sum * aa[adiag_i];
1098: }
1099: #endif
1100: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1101: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1102: PetscCall(VecRestoreArrayRead(bb, &b));
1103: PetscCall(VecRestoreArrayWrite(xx, &x));
1104: PetscFunctionReturn(PETSC_SUCCESS);
1105: }
1107: static PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec yy, Vec xx)
1108: {
1109: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1110: IS iscol = a->col, isrow = a->row;
1111: PetscInt i, n = A->rmap->n, j;
1112: PetscInt nz;
1113: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1114: PetscScalar *x, *tmp, sum;
1115: const PetscScalar *b;
1116: const MatScalar *aa, *v;
1118: PetscFunctionBegin;
1119: if (yy != xx) PetscCall(VecCopy(yy, xx));
1121: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1122: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1123: PetscCall(VecGetArrayRead(bb, &b));
1124: PetscCall(VecGetArray(xx, &x));
1125: tmp = a->solve_work;
1127: PetscCall(ISGetIndices(isrow, &rout));
1128: r = rout;
1129: PetscCall(ISGetIndices(iscol, &cout));
1130: c = cout + (n - 1);
1132: /* forward solve the lower triangular */
1133: tmp[0] = b[*r++];
1134: for (i = 1; i < n; i++) {
1135: v = aa + ai[i];
1136: vi = aj + ai[i];
1137: nz = adiag[i] - ai[i];
1138: sum = b[*r++];
1139: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1140: tmp[i] = sum;
1141: }
1143: /* backward solve the upper triangular */
1144: for (i = n - 1; i >= 0; i--) {
1145: v = aa + adiag[i] + 1;
1146: vi = aj + adiag[i] + 1;
1147: nz = ai[i + 1] - adiag[i] - 1;
1148: sum = tmp[i];
1149: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1150: tmp[i] = sum * aa[adiag[i]];
1151: x[*c--] += tmp[i];
1152: }
1154: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1155: PetscCall(ISRestoreIndices(isrow, &rout));
1156: PetscCall(ISRestoreIndices(iscol, &cout));
1157: PetscCall(VecRestoreArrayRead(bb, &b));
1158: PetscCall(VecRestoreArray(xx, &x));
1159: PetscCall(PetscLogFlops(2.0 * a->nz));
1160: PetscFunctionReturn(PETSC_SUCCESS);
1161: }
1163: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A, Vec bb, Vec yy, Vec xx)
1164: {
1165: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1166: IS iscol = a->col, isrow = a->row;
1167: PetscInt i, n = A->rmap->n, j;
1168: PetscInt nz;
1169: const PetscInt *rout, *cout, *r, *c, *vi, *ai = a->i, *aj = a->j, *adiag;
1170: PetscScalar *x, *tmp, sum;
1171: const PetscScalar *b;
1172: const MatScalar *aa, *v;
1174: PetscFunctionBegin;
1175: if (yy != xx) PetscCall(VecCopy(yy, xx));
1177: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1178: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1179: PetscCall(VecGetArrayRead(bb, &b));
1180: PetscCall(VecGetArray(xx, &x));
1181: tmp = a->solve_work;
1183: PetscCall(ISGetIndices(isrow, &rout));
1184: r = rout;
1185: PetscCall(ISGetIndices(iscol, &cout));
1186: c = cout;
1188: /* forward solve the lower triangular */
1189: tmp[0] = b[r[0]];
1190: v = aa;
1191: vi = aj;
1192: for (i = 1; i < n; i++) {
1193: nz = ai[i + 1] - ai[i];
1194: sum = b[r[i]];
1195: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1196: tmp[i] = sum;
1197: v += nz;
1198: vi += nz;
1199: }
1201: /* backward solve the upper triangular */
1202: v = aa + adiag[n - 1];
1203: vi = aj + adiag[n - 1];
1204: for (i = n - 1; i >= 0; i--) {
1205: nz = adiag[i] - adiag[i + 1] - 1;
1206: sum = tmp[i];
1207: for (j = 0; j < nz; j++) sum -= v[j] * tmp[vi[j]];
1208: tmp[i] = sum * v[nz];
1209: x[c[i]] += tmp[i];
1210: v += nz + 1;
1211: vi += nz + 1;
1212: }
1214: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1215: PetscCall(ISRestoreIndices(isrow, &rout));
1216: PetscCall(ISRestoreIndices(iscol, &cout));
1217: PetscCall(VecRestoreArrayRead(bb, &b));
1218: PetscCall(VecRestoreArray(xx, &x));
1219: PetscCall(PetscLogFlops(2.0 * a->nz));
1220: PetscFunctionReturn(PETSC_SUCCESS);
1221: }
1223: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A, Vec bb, Vec xx)
1224: {
1225: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1226: IS iscol = a->col, isrow = a->row;
1227: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1228: PetscInt i, n = A->rmap->n, j;
1229: PetscInt nz;
1230: PetscScalar *x, *tmp, s1;
1231: const MatScalar *aa, *v;
1232: const PetscScalar *b;
1234: PetscFunctionBegin;
1235: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1236: PetscCall(VecGetArrayRead(bb, &b));
1237: PetscCall(VecGetArrayWrite(xx, &x));
1238: tmp = a->solve_work;
1240: PetscCall(ISGetIndices(isrow, &rout));
1241: r = rout;
1242: PetscCall(ISGetIndices(iscol, &cout));
1243: c = cout;
1245: /* copy the b into temp work space according to permutation */
1246: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1248: /* forward solve the U^T */
1249: for (i = 0; i < n; i++) {
1250: v = aa + diag[i];
1251: vi = aj + diag[i] + 1;
1252: nz = ai[i + 1] - diag[i] - 1;
1253: s1 = tmp[i];
1254: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1255: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1256: tmp[i] = s1;
1257: }
1259: /* backward solve the L^T */
1260: for (i = n - 1; i >= 0; i--) {
1261: v = aa + diag[i] - 1;
1262: vi = aj + diag[i] - 1;
1263: nz = diag[i] - ai[i];
1264: s1 = tmp[i];
1265: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1266: }
1268: /* copy tmp into x according to permutation */
1269: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1271: PetscCall(ISRestoreIndices(isrow, &rout));
1272: PetscCall(ISRestoreIndices(iscol, &cout));
1273: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1274: PetscCall(VecRestoreArrayRead(bb, &b));
1275: PetscCall(VecRestoreArrayWrite(xx, &x));
1277: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1278: PetscFunctionReturn(PETSC_SUCCESS);
1279: }
1281: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A, Vec bb, Vec xx)
1282: {
1283: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1284: IS iscol = a->col, isrow = a->row;
1285: const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1286: PetscInt i, n = A->rmap->n, j;
1287: PetscInt nz;
1288: PetscScalar *x, *tmp, s1;
1289: const MatScalar *aa, *v;
1290: const PetscScalar *b;
1292: PetscFunctionBegin;
1293: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1294: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1295: PetscCall(VecGetArrayRead(bb, &b));
1296: PetscCall(VecGetArrayWrite(xx, &x));
1297: tmp = a->solve_work;
1299: PetscCall(ISGetIndices(isrow, &rout));
1300: r = rout;
1301: PetscCall(ISGetIndices(iscol, &cout));
1302: c = cout;
1304: /* copy the b into temp work space according to permutation */
1305: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1307: /* forward solve the U^T */
1308: for (i = 0; i < n; i++) {
1309: v = aa + adiag[i + 1] + 1;
1310: vi = aj + adiag[i + 1] + 1;
1311: nz = adiag[i] - adiag[i + 1] - 1;
1312: s1 = tmp[i];
1313: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1314: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1315: tmp[i] = s1;
1316: }
1318: /* backward solve the L^T */
1319: for (i = n - 1; i >= 0; i--) {
1320: v = aa + ai[i];
1321: vi = aj + ai[i];
1322: nz = ai[i + 1] - ai[i];
1323: s1 = tmp[i];
1324: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1325: }
1327: /* copy tmp into x according to permutation */
1328: for (i = 0; i < n; i++) x[r[i]] = tmp[i];
1330: PetscCall(ISRestoreIndices(isrow, &rout));
1331: PetscCall(ISRestoreIndices(iscol, &cout));
1332: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1333: PetscCall(VecRestoreArrayRead(bb, &b));
1334: PetscCall(VecRestoreArrayWrite(xx, &x));
1336: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1337: PetscFunctionReturn(PETSC_SUCCESS);
1338: }
1340: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A, Vec bb, Vec zz, Vec xx)
1341: {
1342: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1343: IS iscol = a->col, isrow = a->row;
1344: const PetscInt *rout, *cout, *r, *c, *diag = a->diag, *ai = a->i, *aj = a->j, *vi;
1345: PetscInt i, n = A->rmap->n, j;
1346: PetscInt nz;
1347: PetscScalar *x, *tmp, s1;
1348: const MatScalar *aa, *v;
1349: const PetscScalar *b;
1351: PetscFunctionBegin;
1352: if (zz != xx) PetscCall(VecCopy(zz, xx));
1353: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1354: PetscCall(VecGetArrayRead(bb, &b));
1355: PetscCall(VecGetArray(xx, &x));
1356: tmp = a->solve_work;
1358: PetscCall(ISGetIndices(isrow, &rout));
1359: r = rout;
1360: PetscCall(ISGetIndices(iscol, &cout));
1361: c = cout;
1363: /* copy the b into temp work space according to permutation */
1364: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1366: /* forward solve the U^T */
1367: for (i = 0; i < n; i++) {
1368: v = aa + diag[i];
1369: vi = aj + diag[i] + 1;
1370: nz = ai[i + 1] - diag[i] - 1;
1371: s1 = tmp[i];
1372: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1373: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1374: tmp[i] = s1;
1375: }
1377: /* backward solve the L^T */
1378: for (i = n - 1; i >= 0; i--) {
1379: v = aa + diag[i] - 1;
1380: vi = aj + diag[i] - 1;
1381: nz = diag[i] - ai[i];
1382: s1 = tmp[i];
1383: for (j = 0; j > -nz; j--) tmp[vi[j]] -= s1 * v[j];
1384: }
1386: /* copy tmp into x according to permutation */
1387: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1389: PetscCall(ISRestoreIndices(isrow, &rout));
1390: PetscCall(ISRestoreIndices(iscol, &cout));
1391: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1392: PetscCall(VecRestoreArrayRead(bb, &b));
1393: PetscCall(VecRestoreArray(xx, &x));
1395: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1396: PetscFunctionReturn(PETSC_SUCCESS);
1397: }
1399: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A, Vec bb, Vec zz, Vec xx)
1400: {
1401: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1402: IS iscol = a->col, isrow = a->row;
1403: const PetscInt *rout, *cout, *r, *c, *adiag, *ai = a->i, *aj = a->j, *vi;
1404: PetscInt i, n = A->rmap->n, j;
1405: PetscInt nz;
1406: PetscScalar *x, *tmp, s1;
1407: const MatScalar *aa, *v;
1408: const PetscScalar *b;
1410: PetscFunctionBegin;
1411: if (zz != xx) PetscCall(VecCopy(zz, xx));
1412: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1413: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1414: PetscCall(VecGetArrayRead(bb, &b));
1415: PetscCall(VecGetArray(xx, &x));
1416: tmp = a->solve_work;
1418: PetscCall(ISGetIndices(isrow, &rout));
1419: r = rout;
1420: PetscCall(ISGetIndices(iscol, &cout));
1421: c = cout;
1423: /* copy the b into temp work space according to permutation */
1424: for (i = 0; i < n; i++) tmp[i] = b[c[i]];
1426: /* forward solve the U^T */
1427: for (i = 0; i < n; i++) {
1428: v = aa + adiag[i + 1] + 1;
1429: vi = aj + adiag[i + 1] + 1;
1430: nz = adiag[i] - adiag[i + 1] - 1;
1431: s1 = tmp[i];
1432: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1433: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1434: tmp[i] = s1;
1435: }
1437: /* backward solve the L^T */
1438: for (i = n - 1; i >= 0; i--) {
1439: v = aa + ai[i];
1440: vi = aj + ai[i];
1441: nz = ai[i + 1] - ai[i];
1442: s1 = tmp[i];
1443: for (j = 0; j < nz; j++) tmp[vi[j]] -= s1 * v[j];
1444: }
1446: /* copy tmp into x according to permutation */
1447: for (i = 0; i < n; i++) x[r[i]] += tmp[i];
1449: PetscCall(ISRestoreIndices(isrow, &rout));
1450: PetscCall(ISRestoreIndices(iscol, &cout));
1451: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1452: PetscCall(VecRestoreArrayRead(bb, &b));
1453: PetscCall(VecRestoreArray(xx, &x));
1455: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
1456: PetscFunctionReturn(PETSC_SUCCESS);
1457: }
1459: /*
1460: ilu() under revised new data structure.
1461: Factored arrays bj and ba are stored as
1462: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1464: bi=fact->i is an array of size n+1, in which
1465: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1466: bi[n]: points to L(n-1,n-1)+1
1468: bdiag=fact->diag is an array of size n+1,in which
1469: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1470: bdiag[n]: points to entry of U(n-1,0)-1
1472: U(i,:) contains bdiag[i] as its last entry, i.e.,
1473: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1474: */
1475: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1476: {
1477: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1478: const PetscInt n = A->rmap->n, *ai = a->i, *aj, *adiag;
1479: PetscInt i, j, k = 0, nz, *bi, *bj, *bdiag;
1480: IS isicol;
1482: PetscFunctionBegin;
1483: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1484: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1485: PetscCall(MatDuplicateNoCreate_SeqAIJ(fact, A, MAT_DO_NOT_COPY_VALUES, PETSC_FALSE));
1486: b = (Mat_SeqAIJ *)fact->data;
1488: /* allocate matrix arrays for new data structure */
1489: PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscScalar), (void **)&b->a));
1490: PetscCall(PetscShmgetAllocateArray(ai[n], sizeof(PetscInt), (void **)&b->j));
1491: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&b->i));
1492: if (n > 0) PetscCall(PetscArrayzero(b->a, ai[n]));
1493: b->free_a = PETSC_TRUE;
1494: b->free_ij = PETSC_TRUE;
1496: if (!b->diag) PetscCall(PetscMalloc1(n + 1, &b->diag));
1497: bdiag = b->diag;
1499: /* set bi and bj with new data structure */
1500: bi = b->i;
1501: bj = b->j;
1503: /* L part */
1504: bi[0] = 0;
1505: for (i = 0; i < n; i++) {
1506: nz = adiag[i] - ai[i];
1507: bi[i + 1] = bi[i] + nz;
1508: aj = a->j + ai[i];
1509: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1510: }
1512: /* U part */
1513: bdiag[n] = bi[n] - 1;
1514: for (i = n - 1; i >= 0; i--) {
1515: nz = ai[i + 1] - adiag[i] - 1;
1516: aj = a->j + adiag[i] + 1;
1517: for (j = 0; j < nz; j++) bj[k++] = aj[j];
1518: /* diag[i] */
1519: bj[k++] = i;
1520: bdiag[i] = bdiag[i + 1] + nz + 1;
1521: }
1523: fact->factortype = MAT_FACTOR_ILU;
1524: fact->info.factor_mallocs = 0;
1525: fact->info.fill_ratio_given = info->fill;
1526: fact->info.fill_ratio_needed = 1.0;
1527: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1528: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1530: b = (Mat_SeqAIJ *)fact->data;
1531: b->row = isrow;
1532: b->col = iscol;
1533: b->icol = isicol;
1534: PetscCall(PetscMalloc1(fact->rmap->n, &b->solve_work));
1535: PetscCall(PetscObjectReference((PetscObject)isrow));
1536: PetscCall(PetscObjectReference((PetscObject)iscol));
1537: PetscFunctionReturn(PETSC_SUCCESS);
1538: }
1540: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1541: {
1542: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *b;
1543: IS isicol;
1544: const PetscInt *r, *ic;
1545: PetscInt n = A->rmap->n, *ai = a->i, *aj = a->j;
1546: PetscInt *bi, *cols, nnz, *cols_lvl;
1547: PetscInt *bdiag, prow, fm, nzbd, reallocs = 0, dcount = 0;
1548: PetscInt i, levels, diagonal_fill;
1549: PetscBool col_identity, row_identity;
1550: PetscReal f;
1551: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
1552: PetscBT lnkbt;
1553: PetscInt nzi, *bj, **bj_ptr, **bjlvl_ptr;
1554: PetscFreeSpaceList free_space = NULL, current_space = NULL;
1555: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
1556: PetscBool diagDense;
1558: PetscFunctionBegin;
1559: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
1560: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
1561: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
1563: levels = (PetscInt)info->levels;
1564: PetscCall(ISIdentity(isrow, &row_identity));
1565: PetscCall(ISIdentity(iscol, &col_identity));
1566: if (!levels && row_identity && col_identity) {
1567: /* special case: ilu(0) with natural ordering */
1568: PetscCall(MatILUFactorSymbolic_SeqAIJ_ilu0(fact, A, isrow, iscol, info));
1569: if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1570: PetscFunctionReturn(PETSC_SUCCESS);
1571: }
1573: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
1574: PetscCall(ISGetIndices(isrow, &r));
1575: PetscCall(ISGetIndices(isicol, &ic));
1577: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1578: PetscCall(PetscShmgetAllocateArray(n + 1, sizeof(PetscInt), (void **)&bi));
1579: PetscCall(PetscMalloc1(n + 1, &bdiag));
1580: bi[0] = bdiag[0] = 0;
1581: PetscCall(PetscMalloc2(n, &bj_ptr, n, &bjlvl_ptr));
1583: /* create a linked list for storing column indices of the active row */
1584: nlnk = n + 1;
1585: PetscCall(PetscIncompleteLLCreate(n, n, nlnk, lnk, lnk_lvl, lnkbt));
1587: /* initial FreeSpace size is f*(ai[n]+1) */
1588: f = info->fill;
1589: diagonal_fill = (PetscInt)info->diagonal_fill;
1590: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
1591: current_space = free_space;
1592: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space_lvl));
1593: current_space_lvl = free_space_lvl;
1594: for (i = 0; i < n; i++) {
1595: nzi = 0;
1596: /* copy current row into linked list */
1597: nnz = ai[r[i] + 1] - ai[r[i]];
1598: PetscCheck(nnz, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, r[i], i);
1599: cols = aj + ai[r[i]];
1600: lnk[i] = -1; /* marker to indicate if diagonal exists */
1601: PetscCall(PetscIncompleteLLInit(nnz, cols, n, ic, &nlnk, lnk, lnk_lvl, lnkbt));
1602: nzi += nlnk;
1604: /* make sure diagonal entry is included */
1605: if (diagonal_fill && lnk[i] == -1) {
1606: fm = n;
1607: while (lnk[fm] < i) fm = lnk[fm];
1608: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1609: lnk[fm] = i;
1610: lnk_lvl[i] = 0;
1611: nzi++;
1612: dcount++;
1613: }
1615: /* add pivot rows into the active row */
1616: nzbd = 0;
1617: prow = lnk[n];
1618: while (prow < i) {
1619: nnz = bdiag[prow];
1620: cols = bj_ptr[prow] + nnz + 1;
1621: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1622: nnz = bi[prow + 1] - bi[prow] - nnz - 1;
1623: PetscCall(PetscILULLAddSorted(nnz, cols, levels, cols_lvl, prow, &nlnk, lnk, lnk_lvl, lnkbt, prow));
1624: nzi += nlnk;
1625: prow = lnk[prow];
1626: nzbd++;
1627: }
1628: bdiag[i] = nzbd;
1629: bi[i + 1] = bi[i] + nzi;
1630: /* if free space is not available, make more free space */
1631: if (current_space->local_remaining < nzi) {
1632: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(nzi, n - i)); /* estimated and max additional space needed */
1633: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
1634: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space_lvl));
1635: reallocs++;
1636: }
1638: /* copy data into free_space and free_space_lvl, then initialize lnk */
1639: PetscCall(PetscIncompleteLLClean(n, n, nzi, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
1640: bj_ptr[i] = current_space->array;
1641: bjlvl_ptr[i] = current_space_lvl->array;
1643: /* make sure the active row i has diagonal entry */
1644: PetscCheck(*(bj_ptr[i] + bdiag[i]) == i, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Row %" PetscInt_FMT " has missing diagonal in factored matrix, try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill", i);
1646: current_space->array += nzi;
1647: current_space->local_used += nzi;
1648: current_space->local_remaining -= nzi;
1649: current_space_lvl->array += nzi;
1650: current_space_lvl->local_used += nzi;
1651: current_space_lvl->local_remaining -= nzi;
1652: }
1654: PetscCall(ISRestoreIndices(isrow, &r));
1655: PetscCall(ISRestoreIndices(isicol, &ic));
1656: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1657: PetscCall(PetscShmgetAllocateArray(bi[n], sizeof(PetscInt), (void **)&bj));
1658: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
1660: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
1661: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
1662: PetscCall(PetscFree2(bj_ptr, bjlvl_ptr));
1664: #if defined(PETSC_USE_INFO)
1665: {
1666: PetscReal af = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1667: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
1668: PetscCall(PetscInfo(A, "Run with -[sub_]pc_factor_fill %g or use \n", (double)af));
1669: PetscCall(PetscInfo(A, "PCFactorSetFill([sub]pc,%g);\n", (double)af));
1670: PetscCall(PetscInfo(A, "for best performance.\n"));
1671: if (diagonal_fill) PetscCall(PetscInfo(A, "Detected and replaced %" PetscInt_FMT " missing diagonals\n", dcount));
1672: }
1673: #endif
1674: /* put together the new matrix */
1675: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(fact, MAT_SKIP_ALLOCATION, NULL));
1676: b = (Mat_SeqAIJ *)fact->data;
1677: b->free_ij = PETSC_TRUE;
1678: PetscCall(PetscShmgetAllocateArray(bdiag[0] + 1, sizeof(PetscScalar), (void **)&b->a));
1679: b->free_a = PETSC_TRUE;
1680: b->j = bj;
1681: b->i = bi;
1682: b->diag = bdiag;
1683: b->ilen = NULL;
1684: b->imax = NULL;
1685: b->row = isrow;
1686: b->col = iscol;
1687: PetscCall(PetscObjectReference((PetscObject)isrow));
1688: PetscCall(PetscObjectReference((PetscObject)iscol));
1689: b->icol = isicol;
1691: PetscCall(PetscMalloc1(n, &b->solve_work));
1692: /* In b structure: Free imax, ilen, old a, old j.
1693: Allocate bdiag, solve_work, new a, new j */
1694: b->maxnz = b->nz = bdiag[0] + 1;
1696: fact->info.factor_mallocs = reallocs;
1697: fact->info.fill_ratio_given = f;
1698: fact->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
1699: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1700: if (a->inode.size_csr) fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1701: PetscCall(MatSeqAIJCheckInode_FactorLU(fact));
1702: PetscFunctionReturn(PETSC_SUCCESS);
1703: }
1705: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B, Mat A, const MatFactorInfo *info)
1706: {
1707: Mat C = B;
1708: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1709: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1710: IS ip = b->row, iip = b->icol;
1711: const PetscInt *rip, *riip;
1712: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1713: PetscInt *ai = a->i, *aj = a->j;
1714: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1715: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1716: PetscBool perm_identity;
1717: FactorShiftCtx sctx;
1718: PetscReal rs;
1719: const MatScalar *aa, *v;
1720: MatScalar d;
1721: const PetscInt *adiag;
1723: PetscFunctionBegin;
1724: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1725: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1726: /* MatPivotSetUp(): initialize shift context sctx */
1727: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1729: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1730: sctx.shift_top = info->zeropivot;
1731: for (i = 0; i < mbs; i++) {
1732: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1733: d = aa[adiag[i]];
1734: rs = -PetscAbsScalar(d) - PetscRealPart(d);
1735: v = aa + ai[i];
1736: nz = ai[i + 1] - ai[i];
1737: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1738: if (rs > sctx.shift_top) sctx.shift_top = rs;
1739: }
1740: sctx.shift_top *= 1.1;
1741: sctx.nshift_max = 5;
1742: sctx.shift_lo = 0.;
1743: sctx.shift_hi = 1.;
1744: }
1746: PetscCall(ISGetIndices(ip, &rip));
1747: PetscCall(ISGetIndices(iip, &riip));
1749: /* allocate working arrays
1750: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1751: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1752: */
1753: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1755: do {
1756: sctx.newshift = PETSC_FALSE;
1758: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1759: if (mbs) il[0] = 0;
1761: for (k = 0; k < mbs; k++) {
1762: /* zero rtmp */
1763: nz = bi[k + 1] - bi[k];
1764: bjtmp = bj + bi[k];
1765: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1767: /* load in initial unfactored row */
1768: bval = ba + bi[k];
1769: jmin = ai[rip[k]];
1770: jmax = ai[rip[k] + 1];
1771: for (j = jmin; j < jmax; j++) {
1772: col = riip[aj[j]];
1773: if (col >= k) { /* only take upper triangular entry */
1774: rtmp[col] = aa[j];
1775: *bval++ = 0.0; /* for in-place factorization */
1776: }
1777: }
1778: /* shift the diagonal of the matrix: ZeropivotApply() */
1779: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1781: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1782: dk = rtmp[k];
1783: i = c2r[k]; /* first row to be added to k_th row */
1785: while (i < k) {
1786: nexti = c2r[i]; /* next row to be added to k_th row */
1788: /* compute multiplier, update diag(k) and U(i,k) */
1789: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1790: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1791: dk += uikdi * ba[ili]; /* update diag[k] */
1792: ba[ili] = uikdi; /* -U(i,k) */
1794: /* add multiple of row i to k-th row */
1795: jmin = ili + 1;
1796: jmax = bi[i + 1];
1797: if (jmin < jmax) {
1798: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1799: /* update il and c2r for row i */
1800: il[i] = jmin;
1801: j = bj[jmin];
1802: c2r[i] = c2r[j];
1803: c2r[j] = i;
1804: }
1805: i = nexti;
1806: }
1808: /* copy data into U(k,:) */
1809: rs = 0.0;
1810: jmin = bi[k];
1811: jmax = bi[k + 1] - 1;
1812: if (jmin < jmax) {
1813: for (j = jmin; j < jmax; j++) {
1814: col = bj[j];
1815: ba[j] = rtmp[col];
1816: rs += PetscAbsScalar(ba[j]);
1817: }
1818: /* add the k-th row into il and c2r */
1819: il[k] = jmin;
1820: i = bj[jmin];
1821: c2r[k] = c2r[i];
1822: c2r[i] = k;
1823: }
1825: /* MatPivotCheck() */
1826: sctx.rs = rs;
1827: sctx.pv = dk;
1828: PetscCall(MatPivotCheck(B, A, info, &sctx, i));
1829: if (sctx.newshift) break;
1830: dk = sctx.pv;
1832: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1833: }
1834: } while (sctx.newshift);
1836: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
1837: PetscCall(PetscFree3(rtmp, il, c2r));
1838: PetscCall(ISRestoreIndices(ip, &rip));
1839: PetscCall(ISRestoreIndices(iip, &riip));
1841: PetscCall(ISIdentity(ip, &perm_identity));
1842: if (perm_identity) {
1843: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1844: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1845: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1846: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1847: } else {
1848: B->ops->solve = MatSolve_SeqSBAIJ_1;
1849: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1850: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1851: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1852: }
1854: C->assembled = PETSC_TRUE;
1855: C->preallocated = PETSC_TRUE;
1857: PetscCall(PetscLogFlops(C->rmap->n));
1859: /* MatPivotView() */
1860: if (sctx.nshift) {
1861: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1862: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1863: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1864: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1865: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1866: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1867: }
1868: }
1869: PetscFunctionReturn(PETSC_SUCCESS);
1870: }
1872: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B, Mat A, const MatFactorInfo *info)
1873: {
1874: Mat C = B;
1875: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
1876: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)C->data;
1877: IS ip = b->row, iip = b->icol;
1878: const PetscInt *rip, *riip;
1879: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bcol, *bjtmp;
1880: PetscInt *ai = a->i, *aj = a->j;
1881: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1882: MatScalar *rtmp, *ba = b->a, *bval, dk, uikdi;
1883: const MatScalar *aa, *v;
1884: PetscBool perm_identity;
1885: FactorShiftCtx sctx;
1886: PetscReal rs;
1887: MatScalar d;
1888: const PetscInt *adiag;
1890: PetscFunctionBegin;
1891: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, NULL));
1892: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
1893: /* MatPivotSetUp(): initialize shift context sctx */
1894: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1896: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1897: sctx.shift_top = info->zeropivot;
1898: for (i = 0; i < mbs; i++) {
1899: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1900: d = aa[adiag[i]];
1901: rs = -PetscAbsScalar(d) - PetscRealPart(d);
1902: v = aa + ai[i];
1903: nz = ai[i + 1] - ai[i];
1904: for (j = 0; j < nz; j++) rs += PetscAbsScalar(v[j]);
1905: if (rs > sctx.shift_top) sctx.shift_top = rs;
1906: }
1907: sctx.shift_top *= 1.1;
1908: sctx.nshift_max = 5;
1909: sctx.shift_lo = 0.;
1910: sctx.shift_hi = 1.;
1911: }
1913: PetscCall(ISGetIndices(ip, &rip));
1914: PetscCall(ISGetIndices(iip, &riip));
1916: /* initialization */
1917: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1919: do {
1920: sctx.newshift = PETSC_FALSE;
1922: for (i = 0; i < mbs; i++) jl[i] = mbs;
1923: il[0] = 0;
1925: for (k = 0; k < mbs; k++) {
1926: /* zero rtmp */
1927: nz = bi[k + 1] - bi[k];
1928: bjtmp = bj + bi[k];
1929: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1931: bval = ba + bi[k];
1932: /* initialize k-th row by the perm[k]-th row of A */
1933: jmin = ai[rip[k]];
1934: jmax = ai[rip[k] + 1];
1935: for (j = jmin; j < jmax; j++) {
1936: col = riip[aj[j]];
1937: if (col >= k) { /* only take upper triangular entry */
1938: rtmp[col] = aa[j];
1939: *bval++ = 0.0; /* for in-place factorization */
1940: }
1941: }
1942: /* shift the diagonal of the matrix */
1943: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1945: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1946: dk = rtmp[k];
1947: i = jl[k]; /* first row to be added to k_th row */
1949: while (i < k) {
1950: nexti = jl[i]; /* next row to be added to k_th row */
1952: /* compute multiplier, update diag(k) and U(i,k) */
1953: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1954: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1955: dk += uikdi * ba[ili];
1956: ba[ili] = uikdi; /* -U(i,k) */
1958: /* add multiple of row i to k-th row */
1959: jmin = ili + 1;
1960: jmax = bi[i + 1];
1961: if (jmin < jmax) {
1962: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1963: /* update il and jl for row i */
1964: il[i] = jmin;
1965: j = bj[jmin];
1966: jl[i] = jl[j];
1967: jl[j] = i;
1968: }
1969: i = nexti;
1970: }
1972: /* shift the diagonals when zero pivot is detected */
1973: /* compute rs=sum of abs(off-diagonal) */
1974: rs = 0.0;
1975: jmin = bi[k] + 1;
1976: nz = bi[k + 1] - jmin;
1977: bcol = bj + jmin;
1978: for (j = 0; j < nz; j++) rs += PetscAbsScalar(rtmp[bcol[j]]);
1980: sctx.rs = rs;
1981: sctx.pv = dk;
1982: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1983: if (sctx.newshift) break;
1984: dk = sctx.pv;
1986: /* copy data into U(k,:) */
1987: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1988: jmin = bi[k] + 1;
1989: jmax = bi[k + 1];
1990: if (jmin < jmax) {
1991: for (j = jmin; j < jmax; j++) {
1992: col = bj[j];
1993: ba[j] = rtmp[col];
1994: }
1995: /* add the k-th row into il and jl */
1996: il[k] = jmin;
1997: i = bj[jmin];
1998: jl[k] = jl[i];
1999: jl[i] = k;
2000: }
2001: }
2002: } while (sctx.newshift);
2004: PetscCall(PetscFree3(rtmp, il, jl));
2005: PetscCall(ISRestoreIndices(ip, &rip));
2006: PetscCall(ISRestoreIndices(iip, &riip));
2007: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2009: PetscCall(ISIdentity(ip, &perm_identity));
2010: if (perm_identity) {
2011: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2012: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2013: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2014: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2015: } else {
2016: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2017: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2018: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2019: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2020: }
2022: C->assembled = PETSC_TRUE;
2023: C->preallocated = PETSC_TRUE;
2025: PetscCall(PetscLogFlops(C->rmap->n));
2026: if (sctx.nshift) {
2027: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2028: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2029: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2030: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
2031: }
2032: }
2033: PetscFunctionReturn(PETSC_SUCCESS);
2034: }
2036: /*
2037: icc() under revised new data structure.
2038: Factored arrays bj and ba are stored as
2039: U(0,:),...,U(i,:),U(n-1,:)
2041: ui=fact->i is an array of size n+1, in which
2042: ui+
2043: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2044: ui[n]: points to U(n-1,n-1)+1
2046: udiag=fact->diag is an array of size n,in which
2047: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2049: U(i,:) contains udiag[i] as its last entry, i.e.,
2050: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2051: */
2053: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2054: {
2055: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2056: Mat_SeqSBAIJ *b;
2057: PetscBool perm_identity;
2058: PetscInt reallocs = 0, i, *ai = a->i, *aj = a->j, am = A->rmap->n, *ui, *udiag;
2059: const PetscInt *rip, *riip, *adiag;
2060: PetscInt jmin, jmax, nzk, k, j, *jl, prow, *il, nextprow;
2061: PetscInt nlnk, *lnk, *lnk_lvl = NULL;
2062: PetscInt ncols, ncols_upper, *cols, *ajtmp, *uj, **uj_ptr, **uj_lvl_ptr;
2063: PetscReal fill = info->fill, levels = info->levels;
2064: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2065: PetscFreeSpaceList free_space_lvl = NULL, current_space_lvl = NULL;
2066: PetscBT lnkbt;
2067: IS iperm;
2068: PetscBool diagDense;
2070: PetscFunctionBegin;
2071: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2072: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense));
2073: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2074: PetscCall(ISIdentity(perm, &perm_identity));
2075: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2077: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2078: PetscCall(PetscMalloc1(am + 1, &udiag));
2079: ui[0] = 0;
2081: /* ICC(0) without matrix ordering: simply rearrange column indices */
2082: if (!levels && perm_identity) {
2083: for (i = 0; i < am; i++) {
2084: ncols = ai[i + 1] - adiag[i];
2085: ui[i + 1] = ui[i] + ncols;
2086: udiag[i] = ui[i + 1] - 1; /* points to the last entry of U(i,:) */
2087: }
2088: PetscCall(PetscMalloc1(ui[am] + 1, &uj));
2089: cols = uj;
2090: for (i = 0; i < am; i++) {
2091: aj = a->j + adiag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2092: ncols = ai[i + 1] - adiag[i] - 1;
2093: for (j = 0; j < ncols; j++) *cols++ = aj[j];
2094: *cols++ = i; /* diagonal is located as the last entry of U(i,:) */
2095: }
2096: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2097: PetscCall(ISGetIndices(iperm, &riip));
2098: PetscCall(ISGetIndices(perm, &rip));
2100: /* initialization */
2101: PetscCall(PetscMalloc1(am + 1, &ajtmp));
2103: /* jl: linked list for storing indices of the pivot rows
2104: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2105: PetscCall(PetscMalloc4(am, &uj_ptr, am, &uj_lvl_ptr, am, &jl, am, &il));
2106: for (i = 0; i < am; i++) {
2107: jl[i] = am;
2108: il[i] = 0;
2109: }
2111: /* create and initialize a linked list for storing column indices of the active row k */
2112: nlnk = am + 1;
2113: PetscCall(PetscIncompleteLLCreate(am, am, nlnk, lnk, lnk_lvl, lnkbt));
2115: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2116: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2117: current_space = free_space;
2118: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space_lvl));
2119: current_space_lvl = free_space_lvl;
2121: for (k = 0; k < am; k++) { /* for each active row k */
2122: /* initialize lnk by the column indices of row rip[k] of A */
2123: nzk = 0;
2124: ncols = ai[rip[k] + 1] - ai[rip[k]];
2125: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2126: ncols_upper = 0;
2127: for (j = 0; j < ncols; j++) {
2128: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2129: if (riip[i] >= k) { /* only take upper triangular entry */
2130: ajtmp[ncols_upper] = i;
2131: ncols_upper++;
2132: }
2133: }
2134: PetscCall(PetscIncompleteLLInit(ncols_upper, ajtmp, am, riip, &nlnk, lnk, lnk_lvl, lnkbt));
2135: nzk += nlnk;
2137: /* update lnk by computing fill-in for each pivot row to be merged in */
2138: prow = jl[k]; /* 1st pivot row */
2140: while (prow < k) {
2141: nextprow = jl[prow];
2143: /* merge prow into k-th row */
2144: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2145: jmax = ui[prow + 1];
2146: ncols = jmax - jmin;
2147: i = jmin - ui[prow];
2148: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2149: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2150: j = *(uj - 1);
2151: PetscCall(PetscICCLLAddSorted(ncols, cols, levels, uj, am, &nlnk, lnk, lnk_lvl, lnkbt, j));
2152: nzk += nlnk;
2154: /* update il and jl for prow */
2155: if (jmin < jmax) {
2156: il[prow] = jmin;
2157: j = *cols;
2158: jl[prow] = jl[j];
2159: jl[j] = prow;
2160: }
2161: prow = nextprow;
2162: }
2164: /* if free space is not available, make more free space */
2165: if (current_space->local_remaining < nzk) {
2166: i = am - k + 1; /* num of unfactored rows */
2167: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2168: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2169: PetscCall(PetscFreeSpaceGet(i, ¤t_space_lvl));
2170: reallocs++;
2171: }
2173: /* copy data into free_space and free_space_lvl, then initialize lnk */
2174: PetscCheck(nzk != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Empty row %" PetscInt_FMT " in ICC matrix factor", k);
2175: PetscCall(PetscIncompleteLLClean(am, am, nzk, lnk, lnk_lvl, current_space->array, current_space_lvl->array, lnkbt));
2177: /* add the k-th row into il and jl */
2178: if (nzk > 1) {
2179: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2180: jl[k] = jl[i];
2181: jl[i] = k;
2182: il[k] = ui[k] + 1;
2183: }
2184: uj_ptr[k] = current_space->array;
2185: uj_lvl_ptr[k] = current_space_lvl->array;
2187: current_space->array += nzk;
2188: current_space->local_used += nzk;
2189: current_space->local_remaining -= nzk;
2191: current_space_lvl->array += nzk;
2192: current_space_lvl->local_used += nzk;
2193: current_space_lvl->local_remaining -= nzk;
2195: ui[k + 1] = ui[k] + nzk;
2196: }
2198: PetscCall(ISRestoreIndices(perm, &rip));
2199: PetscCall(ISRestoreIndices(iperm, &riip));
2200: PetscCall(PetscFree4(uj_ptr, uj_lvl_ptr, jl, il));
2201: PetscCall(PetscFree(ajtmp));
2203: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2204: PetscCall(PetscShmgetAllocateArray(ui[am] + 1, sizeof(PetscInt), (void **)&uj));
2205: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2206: PetscCall(PetscIncompleteLLDestroy(lnk, lnkbt));
2207: PetscCall(PetscFreeSpaceDestroy(free_space_lvl));
2209: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2211: /* put together the new matrix in MATSEQSBAIJ format */
2212: b = (Mat_SeqSBAIJ *)fact->data;
2213: b->free_ij = PETSC_TRUE;
2214: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2215: b->free_a = PETSC_TRUE;
2216: b->j = uj;
2217: b->i = ui;
2218: b->diag = udiag;
2219: b->ilen = NULL;
2220: b->imax = NULL;
2221: b->row = perm;
2222: b->col = perm;
2223: PetscCall(PetscObjectReference((PetscObject)perm));
2224: PetscCall(PetscObjectReference((PetscObject)perm));
2225: b->icol = iperm;
2226: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2228: PetscCall(PetscMalloc1(am, &b->solve_work));
2230: b->maxnz = b->nz = ui[am];
2232: fact->info.factor_mallocs = reallocs;
2233: fact->info.fill_ratio_given = fill;
2234: if (ai[am] != 0) {
2235: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2236: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2237: } else {
2238: fact->info.fill_ratio_needed = 0.0;
2239: }
2240: #if defined(PETSC_USE_INFO)
2241: if (ai[am] != 0) {
2242: PetscReal af = fact->info.fill_ratio_needed;
2243: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2244: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2245: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2246: } else {
2247: PetscCall(PetscInfo(A, "Empty matrix\n"));
2248: }
2249: #endif
2250: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2251: PetscFunctionReturn(PETSC_SUCCESS);
2252: }
2254: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
2255: {
2256: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2257: Mat_SeqSBAIJ *b;
2258: PetscBool perm_identity;
2259: PetscReal fill = info->fill;
2260: const PetscInt *rip, *riip;
2261: PetscInt i, am = A->rmap->n, *ai = a->i, *aj = a->j, reallocs = 0, prow;
2262: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
2263: PetscInt nlnk, *lnk, ncols, ncols_upper, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
2264: PetscFreeSpaceList free_space = NULL, current_space = NULL;
2265: PetscBT lnkbt;
2266: IS iperm;
2267: PetscBool diagDense;
2269: PetscFunctionBegin;
2270: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
2271: PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, NULL, &diagDense));
2272: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entries");
2274: /* check whether perm is the identity mapping */
2275: PetscCall(ISIdentity(perm, &perm_identity));
2276: PetscCall(ISInvertPermutation(perm, PETSC_DECIDE, &iperm));
2277: PetscCall(ISGetIndices(iperm, &riip));
2278: PetscCall(ISGetIndices(perm, &rip));
2280: /* initialization */
2281: PetscCall(PetscShmgetAllocateArray(am + 1, sizeof(PetscInt), (void **)&ui));
2282: PetscCall(PetscMalloc1(am + 1, &udiag));
2283: ui[0] = 0;
2285: /* jl: linked list for storing indices of the pivot rows
2286: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2287: PetscCall(PetscMalloc4(am, &ui_ptr, am, &jl, am, &il, am, &cols));
2288: for (i = 0; i < am; i++) {
2289: jl[i] = am;
2290: il[i] = 0;
2291: }
2293: /* create and initialize a linked list for storing column indices of the active row k */
2294: nlnk = am + 1;
2295: PetscCall(PetscLLCreate(am, am, nlnk, lnk, lnkbt));
2297: /* initial FreeSpace size is fill*(ai[am]+am)/2 */
2298: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, (ai[am] + am) / 2), &free_space));
2299: current_space = free_space;
2301: for (k = 0; k < am; k++) { /* for each active row k */
2302: /* initialize lnk by the column indices of row rip[k] of A */
2303: nzk = 0;
2304: ncols = ai[rip[k] + 1] - ai[rip[k]];
2305: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row in matrix: row in original ordering %" PetscInt_FMT " in permuted ordering %" PetscInt_FMT, rip[k], k);
2306: ncols_upper = 0;
2307: for (j = 0; j < ncols; j++) {
2308: i = riip[*(aj + ai[rip[k]] + j)];
2309: if (i >= k) { /* only take upper triangular entry */
2310: cols[ncols_upper] = i;
2311: ncols_upper++;
2312: }
2313: }
2314: PetscCall(PetscLLAdd(ncols_upper, cols, am, &nlnk, lnk, lnkbt));
2315: nzk += nlnk;
2317: /* update lnk by computing fill-in for each pivot row to be merged in */
2318: prow = jl[k]; /* 1st pivot row */
2320: while (prow < k) {
2321: nextprow = jl[prow];
2322: /* merge prow into k-th row */
2323: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2324: jmax = ui[prow + 1];
2325: ncols = jmax - jmin;
2326: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2327: PetscCall(PetscLLAddSorted(ncols, uj_ptr, am, &nlnk, lnk, lnkbt));
2328: nzk += nlnk;
2330: /* update il and jl for prow */
2331: if (jmin < jmax) {
2332: il[prow] = jmin;
2333: j = *uj_ptr;
2334: jl[prow] = jl[j];
2335: jl[j] = prow;
2336: }
2337: prow = nextprow;
2338: }
2340: /* if free space is not available, make more free space */
2341: if (current_space->local_remaining < nzk) {
2342: i = am - k + 1; /* num of unfactored rows */
2343: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2344: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
2345: reallocs++;
2346: }
2348: /* copy data into free space, then initialize lnk */
2349: PetscCall(PetscLLClean(am, am, nzk, lnk, current_space->array, lnkbt));
2351: /* add the k-th row into il and jl */
2352: if (nzk > 1) {
2353: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2354: jl[k] = jl[i];
2355: jl[i] = k;
2356: il[k] = ui[k] + 1;
2357: }
2358: ui_ptr[k] = current_space->array;
2360: current_space->array += nzk;
2361: current_space->local_used += nzk;
2362: current_space->local_remaining -= nzk;
2364: ui[k + 1] = ui[k] + nzk;
2365: }
2367: PetscCall(ISRestoreIndices(perm, &rip));
2368: PetscCall(ISRestoreIndices(iperm, &riip));
2369: PetscCall(PetscFree4(ui_ptr, jl, il, cols));
2371: /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */
2372: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscInt), (void **)&uj));
2373: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, am, ui, udiag)); /* store matrix factor */
2374: PetscCall(PetscLLDestroy(lnk, lnkbt));
2376: /* put together the new matrix in MATSEQSBAIJ format */
2377: b = (Mat_SeqSBAIJ *)fact->data;
2378: b->free_ij = PETSC_TRUE;
2379: PetscCall(PetscShmgetAllocateArray(ui[am], sizeof(PetscScalar), (void **)&b->a));
2380: b->free_a = PETSC_TRUE;
2381: b->j = uj;
2382: b->i = ui;
2383: b->diag = udiag;
2384: b->ilen = NULL;
2385: b->imax = NULL;
2386: b->row = perm;
2387: b->col = perm;
2389: PetscCall(PetscObjectReference((PetscObject)perm));
2390: PetscCall(PetscObjectReference((PetscObject)perm));
2392: b->icol = iperm;
2393: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2395: PetscCall(PetscMalloc1(am, &b->solve_work));
2397: b->maxnz = b->nz = ui[am];
2399: fact->info.factor_mallocs = reallocs;
2400: fact->info.fill_ratio_given = fill;
2401: if (ai[am] != 0) {
2402: /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */
2403: fact->info.fill_ratio_needed = ((PetscReal)2 * ui[am]) / (ai[am] + am);
2404: } else {
2405: fact->info.fill_ratio_needed = 0.0;
2406: }
2407: #if defined(PETSC_USE_INFO)
2408: if (ai[am] != 0) {
2409: PetscReal af = fact->info.fill_ratio_needed;
2410: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
2411: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
2412: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
2413: } else {
2414: PetscCall(PetscInfo(A, "Empty matrix\n"));
2415: }
2416: #endif
2417: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2418: PetscFunctionReturn(PETSC_SUCCESS);
2419: }
2421: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A, Vec bb, Vec xx)
2422: {
2423: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2424: PetscInt n = A->rmap->n;
2425: const PetscInt *ai = a->i, *aj = a->j, *adiag = a->diag, *vi;
2426: PetscScalar *x, sum;
2427: const PetscScalar *b;
2428: const MatScalar *aa, *v;
2429: PetscInt i, nz;
2431: PetscFunctionBegin;
2432: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2434: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2435: PetscCall(VecGetArrayRead(bb, &b));
2436: PetscCall(VecGetArrayWrite(xx, &x));
2438: /* forward solve the lower triangular */
2439: x[0] = b[0];
2440: v = aa;
2441: vi = aj;
2442: for (i = 1; i < n; i++) {
2443: nz = ai[i + 1] - ai[i];
2444: sum = b[i];
2445: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2446: v += nz;
2447: vi += nz;
2448: x[i] = sum;
2449: }
2451: /* backward solve the upper triangular */
2452: for (i = n - 1; i >= 0; i--) {
2453: v = aa + adiag[i + 1] + 1;
2454: vi = aj + adiag[i + 1] + 1;
2455: nz = adiag[i] - adiag[i + 1] - 1;
2456: sum = x[i];
2457: PetscSparseDenseMinusDot(sum, x, v, vi, nz);
2458: x[i] = sum * v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
2459: }
2461: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2462: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2463: PetscCall(VecRestoreArrayRead(bb, &b));
2464: PetscCall(VecRestoreArrayWrite(xx, &x));
2465: PetscFunctionReturn(PETSC_SUCCESS);
2466: }
2468: PetscErrorCode MatSolve_SeqAIJ(Mat A, Vec bb, Vec xx)
2469: {
2470: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
2471: IS iscol = a->col, isrow = a->row;
2472: PetscInt i, n = A->rmap->n, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag, nz;
2473: const PetscInt *rout, *cout, *r, *c;
2474: PetscScalar *x, *tmp, sum;
2475: const PetscScalar *b;
2476: const MatScalar *aa, *v;
2478: PetscFunctionBegin;
2479: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
2481: PetscCall(MatSeqAIJGetArrayRead(A, &aa));
2482: PetscCall(VecGetArrayRead(bb, &b));
2483: PetscCall(VecGetArrayWrite(xx, &x));
2484: tmp = a->solve_work;
2486: PetscCall(ISGetIndices(isrow, &rout));
2487: r = rout;
2488: PetscCall(ISGetIndices(iscol, &cout));
2489: c = cout;
2491: /* forward solve the lower triangular */
2492: tmp[0] = b[r[0]];
2493: v = aa;
2494: vi = aj;
2495: for (i = 1; i < n; i++) {
2496: nz = ai[i + 1] - ai[i];
2497: sum = b[r[i]];
2498: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2499: tmp[i] = sum;
2500: v += nz;
2501: vi += nz;
2502: }
2504: /* backward solve the upper triangular */
2505: for (i = n - 1; i >= 0; i--) {
2506: v = aa + adiag[i + 1] + 1;
2507: vi = aj + adiag[i + 1] + 1;
2508: nz = adiag[i] - adiag[i + 1] - 1;
2509: sum = tmp[i];
2510: PetscSparseDenseMinusDot(sum, tmp, v, vi, nz);
2511: x[c[i]] = tmp[i] = sum * v[nz]; /* v[nz] = aa[adiag[i]] */
2512: }
2514: PetscCall(ISRestoreIndices(isrow, &rout));
2515: PetscCall(ISRestoreIndices(iscol, &cout));
2516: PetscCall(MatSeqAIJRestoreArrayRead(A, &aa));
2517: PetscCall(VecRestoreArrayRead(bb, &b));
2518: PetscCall(VecRestoreArrayWrite(xx, &x));
2519: PetscCall(PetscLogFlops(2.0 * a->nz - A->cmap->n));
2520: PetscFunctionReturn(PETSC_SUCCESS);
2521: }