Actual source code: sbaijfact.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petsc/private/kernels/blockinvert.h>
4: #include <petscis.h>
6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7: {
8: Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9: MatScalar *dd = fact->a;
10: PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
12: PetscFunctionBegin;
13: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
15: nneg_tmp = 0;
16: npos_tmp = 0;
17: if (fi) {
18: for (i = 0; i < mbs; i++) {
19: if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20: else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21: fi++;
22: }
23: } else {
24: for (i = 0; i < mbs; i++) {
25: if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++;
26: else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++;
27: }
28: }
29: if (nneg) *nneg = nneg_tmp;
30: if (npos) *npos = npos_tmp;
31: if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: /*
36: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
37: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
38: */
39: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
40: {
41: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
42: const PetscInt *rip, *ai, *aj;
43: PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
44: PetscInt m, reallocs = 0, prow;
45: PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
46: PetscReal f = info->fill;
47: PetscBool perm_identity;
49: PetscFunctionBegin;
50: /* check whether perm is the identity mapping */
51: PetscCall(ISIdentity(perm, &perm_identity));
52: PetscCall(ISGetIndices(perm, &rip));
54: if (perm_identity) { /* without permutation */
55: a->permute = PETSC_FALSE;
57: ai = a->i;
58: aj = a->j;
59: } else { /* non-trivial permutation */
60: a->permute = PETSC_TRUE;
62: PetscCall(MatReorderingSeqSBAIJ(A, perm));
64: ai = a->inew;
65: aj = a->jnew;
66: }
68: /* initialization */
69: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu));
70: umax = (PetscInt)(f * ai[mbs] + 1);
71: umax += mbs + 1;
72: PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju));
73: iu[0] = mbs + 1;
74: juidx = mbs + 1; /* index for ju */
75: /* jl linked list for pivot row -- linked list for col index */
76: PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
77: for (i = 0; i < mbs; i++) {
78: jl[i] = mbs;
79: q[i] = 0;
80: }
82: /* for each row k */
83: for (k = 0; k < mbs; k++) {
84: for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
85: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
86: q[k] = mbs;
87: /* initialize nonzero structure of k-th row to row rip[k] of A */
88: jmin = ai[rip[k]] + 1; /* exclude diag[k] */
89: jmax = ai[rip[k] + 1];
90: for (j = jmin; j < jmax; j++) {
91: vj = rip[aj[j]]; /* col. value */
92: if (vj > k) {
93: qm = k;
94: do {
95: m = qm;
96: qm = q[m];
97: } while (qm < vj);
98: PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
99: nzk++;
100: q[m] = vj;
101: q[vj] = qm;
102: } /* if (vj > k) */
103: } /* for (j=jmin; j<jmax; j++) */
105: /* modify nonzero structure of k-th row by computing fill-in
106: for each row i to be merged in */
107: prow = k;
108: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
110: while (prow < k) {
111: /* merge row prow into k-th row */
112: jmin = iu[prow] + 1;
113: jmax = iu[prow + 1];
114: qm = k;
115: for (j = jmin; j < jmax; j++) {
116: vj = ju[j];
117: do {
118: m = qm;
119: qm = q[m];
120: } while (qm < vj);
121: if (qm != vj) {
122: nzk++;
123: q[m] = vj;
124: q[vj] = qm;
125: qm = vj;
126: }
127: }
128: prow = jl[prow]; /* next pivot row */
129: }
131: /* add k to row list for first nonzero element in k-th row */
132: if (nzk > 0) {
133: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134: jl[k] = jl[i];
135: jl[i] = k;
136: }
137: iu[k + 1] = iu[k] + nzk;
139: /* allocate more space to ju if needed */
140: if (iu[k + 1] > umax) {
141: /* estimate how much additional space we will need */
142: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143: /* just double the memory each time */
144: maxadd = umax;
145: if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
146: umax += maxadd;
148: /* allocate a longer ju */
149: PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp));
150: PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
151: PetscCall(PetscShmgetDeallocateArray((void **)&ju));
152: ju = jutmp;
153: reallocs++; /* count how many times we realloc */
154: }
156: /* save nonzero structure of k-th row in ju */
157: i = k;
158: while (nzk--) {
159: i = q[i];
160: ju[juidx++] = i;
161: }
162: }
164: #if defined(PETSC_USE_INFO)
165: if (ai[mbs] != 0) {
166: PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
167: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
168: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
169: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
170: PetscCall(PetscInfo(A, "for best performance.\n"));
171: } else {
172: PetscCall(PetscInfo(A, "Empty matrix\n"));
173: }
174: #endif
176: PetscCall(ISRestoreIndices(perm, &rip));
177: PetscCall(PetscFree2(jl, q));
179: /* put together the new matrix */
180: PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
182: b = (Mat_SeqSBAIJ *)F->data;
183: b->free_ij = PETSC_TRUE;
184: PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
185: b->free_a = PETSC_TRUE;
186: b->j = ju;
187: b->i = iu;
188: b->diag = NULL;
189: b->ilen = NULL;
190: b->imax = NULL;
191: b->row = perm;
193: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
195: PetscCall(PetscObjectReference((PetscObject)perm));
197: b->icol = perm;
198: PetscCall(PetscObjectReference((PetscObject)perm));
199: PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
200: /* In b structure: Free imax, ilen, old a, old j.
201: Allocate idnew, solve_work, new a, new j */
202: b->maxnz = b->nz = iu[mbs];
204: F->info.factor_mallocs = reallocs;
205: F->info.fill_ratio_given = f;
206: if (ai[mbs] != 0) {
207: F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
208: } else {
209: F->info.fill_ratio_needed = 0.0;
210: }
211: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
214: /*
215: Symbolic U^T*D*U factorization for SBAIJ format.
216: See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
217: */
218: #include <petscbt.h>
219: #include <../src/mat/utils/freespace.h>
220: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
221: {
222: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
223: Mat_SeqSBAIJ *b;
224: PetscBool perm_identity, missing;
225: PetscReal fill = info->fill;
226: const PetscInt *rip, *ai = a->i, *aj = a->j;
227: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
228: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
229: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
230: PetscFreeSpaceList free_space = NULL, current_space = NULL;
231: PetscBT lnkbt;
233: PetscFunctionBegin;
234: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
235: PetscCall(MatMissingDiagonal(A, &missing, &i));
236: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
237: if (bs > 1) {
238: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: /* check whether perm is the identity mapping */
243: PetscCall(ISIdentity(perm, &perm_identity));
244: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
245: a->permute = PETSC_FALSE;
246: PetscCall(ISGetIndices(perm, &rip));
248: /* initialization */
249: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
250: PetscCall(PetscMalloc1(mbs + 1, &udiag));
251: ui[0] = 0;
253: /* jl: linked list for storing indices of the pivot rows
254: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
255: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
256: for (i = 0; i < mbs; i++) {
257: jl[i] = mbs;
258: il[i] = 0;
259: }
261: /* create and initialize a linked list for storing column indices of the active row k */
262: nlnk = mbs + 1;
263: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
265: /* initial FreeSpace size is fill*(ai[mbs]+1) */
266: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
267: current_space = free_space;
269: for (k = 0; k < mbs; k++) { /* for each active row k */
270: /* initialize lnk by the column indices of row rip[k] of A */
271: nzk = 0;
272: ncols = ai[k + 1] - ai[k];
273: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
274: for (j = 0; j < ncols; j++) {
275: i = *(aj + ai[k] + j);
276: cols[j] = i;
277: }
278: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
279: nzk += nlnk;
281: /* update lnk by computing fill-in for each pivot row to be merged in */
282: prow = jl[k]; /* 1st pivot row */
284: while (prow < k) {
285: nextprow = jl[prow];
286: /* merge prow into k-th row */
287: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
288: jmax = ui[prow + 1];
289: ncols = jmax - jmin;
290: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
291: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
292: nzk += nlnk;
294: /* update il and jl for prow */
295: if (jmin < jmax) {
296: il[prow] = jmin;
297: j = *uj_ptr;
298: jl[prow] = jl[j];
299: jl[j] = prow;
300: }
301: prow = nextprow;
302: }
304: /* if free space is not available, make more free space */
305: if (current_space->local_remaining < nzk) {
306: i = mbs - k + 1; /* num of unfactored rows */
307: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
308: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
309: reallocs++;
310: }
312: /* copy data into free space, then initialize lnk */
313: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
315: /* add the k-th row into il and jl */
316: if (nzk > 1) {
317: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
318: jl[k] = jl[i];
319: jl[i] = k;
320: il[k] = ui[k] + 1;
321: }
322: ui_ptr[k] = current_space->array;
324: current_space->array += nzk;
325: current_space->local_used += nzk;
326: current_space->local_remaining -= nzk;
328: ui[k + 1] = ui[k] + nzk;
329: }
331: PetscCall(ISRestoreIndices(perm, &rip));
332: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
334: /* destroy list of free space and other temporary array(s) */
335: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
336: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
337: PetscCall(PetscLLDestroy(lnk, lnkbt));
339: /* put together the new matrix in MATSEQSBAIJ format */
340: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
342: b = (Mat_SeqSBAIJ *)fact->data;
343: b->free_ij = PETSC_TRUE;
344: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
345: b->free_a = PETSC_TRUE;
346: b->j = uj;
347: b->i = ui;
348: b->diag = udiag;
349: b->free_diag = PETSC_TRUE;
350: b->ilen = NULL;
351: b->imax = NULL;
352: b->row = perm;
353: b->icol = perm;
355: PetscCall(PetscObjectReference((PetscObject)perm));
356: PetscCall(PetscObjectReference((PetscObject)perm));
358: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
360: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
362: b->maxnz = b->nz = ui[mbs];
364: fact->info.factor_mallocs = reallocs;
365: fact->info.fill_ratio_given = fill;
366: if (ai[mbs] != 0) {
367: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
368: } else {
369: fact->info.fill_ratio_needed = 0.0;
370: }
371: #if defined(PETSC_USE_INFO)
372: if (ai[mbs] != 0) {
373: PetscReal af = fact->info.fill_ratio_needed;
374: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
375: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
376: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
377: } else {
378: PetscCall(PetscInfo(A, "Empty matrix\n"));
379: }
380: #endif
381: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
386: {
387: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
388: Mat_SeqSBAIJ *b;
389: PetscBool perm_identity, missing;
390: PetscReal fill = info->fill;
391: const PetscInt *rip, *ai, *aj;
392: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
393: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
394: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
395: PetscFreeSpaceList free_space = NULL, current_space = NULL;
396: PetscBT lnkbt;
398: PetscFunctionBegin;
399: PetscCall(MatMissingDiagonal(A, &missing, &d));
400: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);
402: /*
403: This code originally uses Modified Sparse Row (MSR) storage
404: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
405: Then it is rewritten so the factor B takes seqsbaij format. However the associated
406: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
407: thus the original code in MSR format is still used for these cases.
408: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
409: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
410: */
411: if (bs > 1) {
412: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
413: PetscFunctionReturn(PETSC_SUCCESS);
414: }
416: /* check whether perm is the identity mapping */
417: PetscCall(ISIdentity(perm, &perm_identity));
418: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
419: a->permute = PETSC_FALSE;
420: ai = a->i;
421: aj = a->j;
422: PetscCall(ISGetIndices(perm, &rip));
424: /* initialization */
425: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
426: ui[0] = 0;
428: /* jl: linked list for storing indices of the pivot rows
429: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
430: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
431: for (i = 0; i < mbs; i++) {
432: jl[i] = mbs;
433: il[i] = 0;
434: }
436: /* create and initialize a linked list for storing column indices of the active row k */
437: nlnk = mbs + 1;
438: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
440: /* initial FreeSpace size is fill*(ai[mbs]+1) */
441: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
442: current_space = free_space;
444: for (k = 0; k < mbs; k++) { /* for each active row k */
445: /* initialize lnk by the column indices of row rip[k] of A */
446: nzk = 0;
447: ncols = ai[rip[k] + 1] - ai[rip[k]];
448: for (j = 0; j < ncols; j++) {
449: i = *(aj + ai[rip[k]] + j);
450: cols[j] = rip[i];
451: }
452: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
453: nzk += nlnk;
455: /* update lnk by computing fill-in for each pivot row to be merged in */
456: prow = jl[k]; /* 1st pivot row */
458: while (prow < k) {
459: nextprow = jl[prow];
460: /* merge prow into k-th row */
461: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
462: jmax = ui[prow + 1];
463: ncols = jmax - jmin;
464: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
465: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
466: nzk += nlnk;
468: /* update il and jl for prow */
469: if (jmin < jmax) {
470: il[prow] = jmin;
472: j = *uj_ptr;
473: jl[prow] = jl[j];
474: jl[j] = prow;
475: }
476: prow = nextprow;
477: }
479: /* if free space is not available, make more free space */
480: if (current_space->local_remaining < nzk) {
481: i = mbs - k + 1; /* num of unfactored rows */
482: i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
483: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
484: reallocs++;
485: }
487: /* copy data into free space, then initialize lnk */
488: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
490: /* add the k-th row into il and jl */
491: if (nzk - 1 > 0) {
492: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
493: jl[k] = jl[i];
494: jl[i] = k;
495: il[k] = ui[k] + 1;
496: }
497: ui_ptr[k] = current_space->array;
499: current_space->array += nzk;
500: current_space->local_used += nzk;
501: current_space->local_remaining -= nzk;
503: ui[k + 1] = ui[k] + nzk;
504: }
506: PetscCall(ISRestoreIndices(perm, &rip));
507: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
509: /* destroy list of free space and other temporary array(s) */
510: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
511: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
512: PetscCall(PetscLLDestroy(lnk, lnkbt));
514: /* put together the new matrix in MATSEQSBAIJ format */
515: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
517: b = (Mat_SeqSBAIJ *)fact->data;
518: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
519: b->free_a = PETSC_TRUE;
520: b->free_ij = PETSC_TRUE;
521: b->j = uj;
522: b->i = ui;
523: b->diag = NULL;
524: b->ilen = NULL;
525: b->imax = NULL;
526: b->row = perm;
528: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
530: PetscCall(PetscObjectReference((PetscObject)perm));
531: b->icol = perm;
532: PetscCall(PetscObjectReference((PetscObject)perm));
533: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
534: b->maxnz = b->nz = ui[mbs];
536: fact->info.factor_mallocs = reallocs;
537: fact->info.fill_ratio_given = fill;
538: if (ai[mbs] != 0) {
539: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
540: } else {
541: fact->info.fill_ratio_needed = 0.0;
542: }
543: #if defined(PETSC_USE_INFO)
544: if (ai[mbs] != 0) {
545: PetscReal af = fact->info.fill_ratio_needed;
546: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
547: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
548: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
549: } else {
550: PetscCall(PetscInfo(A, "Empty matrix\n"));
551: }
552: #endif
553: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
558: {
559: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
560: IS perm = b->row;
561: const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
562: PetscInt i, j;
563: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
564: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
565: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
566: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
567: MatScalar *work;
568: PetscInt *pivots;
569: PetscBool allowzeropivot, zeropivotdetected;
571: PetscFunctionBegin;
572: /* initialization */
573: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
574: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
575: allowzeropivot = PetscNot(A->erroriffailure);
577: il[0] = 0;
578: for (i = 0; i < mbs; i++) jl[i] = mbs;
580: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
581: PetscCall(PetscMalloc1(bs, &pivots));
583: PetscCall(ISGetIndices(perm, &perm_ptr));
585: /* check permutation */
586: if (!a->permute) {
587: ai = a->i;
588: aj = a->j;
589: aa = a->a;
590: } else {
591: ai = a->inew;
592: aj = a->jnew;
593: PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
594: PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
595: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
596: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
598: for (i = 0; i < mbs; i++) {
599: jmin = ai[i];
600: jmax = ai[i + 1];
601: for (j = jmin; j < jmax; j++) {
602: while (a2anew[j] != j) {
603: k = a2anew[j];
604: a2anew[j] = a2anew[k];
605: a2anew[k] = k;
606: for (k1 = 0; k1 < bs2; k1++) {
607: dk[k1] = aa[k * bs2 + k1];
608: aa[k * bs2 + k1] = aa[j * bs2 + k1];
609: aa[j * bs2 + k1] = dk[k1];
610: }
611: }
612: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
613: if (i > aj[j]) {
614: ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
615: for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
616: for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
617: for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
618: }
619: }
620: }
621: }
622: PetscCall(PetscFree(a2anew));
623: }
625: /* for each row k */
626: for (k = 0; k < mbs; k++) {
627: /*initialize k-th row with elements nonzero in row perm(k) of A */
628: jmin = ai[perm_ptr[k]];
629: jmax = ai[perm_ptr[k] + 1];
631: ap = aa + jmin * bs2;
632: for (j = jmin; j < jmax; j++) {
633: vj = perm_ptr[aj[j]]; /* block col. index */
634: rtmp_ptr = rtmp + vj * bs2;
635: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
636: }
638: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
639: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
640: i = jl[k]; /* first row to be added to k_th row */
642: while (i < k) {
643: nexti = jl[i]; /* next row to be added to k_th row */
645: /* compute multiplier */
646: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
648: /* uik = -inv(Di)*U_bar(i,k) */
649: diag = ba + i * bs2;
650: u = ba + ili * bs2;
651: PetscCall(PetscArrayzero(uik, bs2));
652: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
654: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
655: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
656: PetscCall(PetscLogFlops(4.0 * bs * bs2));
658: /* update -U(i,k) */
659: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
661: /* add multiple of row i to k-th row ... */
662: jmin = ili + 1;
663: jmax = bi[i + 1];
664: if (jmin < jmax) {
665: for (j = jmin; j < jmax; j++) {
666: /* rtmp += -U(i,k)^T * U_bar(i,j) */
667: rtmp_ptr = rtmp + bj[j] * bs2;
668: u = ba + j * bs2;
669: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
670: }
671: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
673: /* ... add i to row list for next nonzero entry */
674: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
675: j = bj[jmin];
676: jl[i] = jl[j];
677: jl[j] = i; /* update jl */
678: }
679: i = nexti;
680: }
682: /* save nonzero entries in k-th row of U ... */
684: /* invert diagonal block */
685: diag = ba + k * bs2;
686: PetscCall(PetscArraycpy(diag, dk, bs2));
688: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
689: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
691: jmin = bi[k];
692: jmax = bi[k + 1];
693: if (jmin < jmax) {
694: for (j = jmin; j < jmax; j++) {
695: vj = bj[j]; /* block col. index of U */
696: u = ba + j * bs2;
697: rtmp_ptr = rtmp + vj * bs2;
698: for (k1 = 0; k1 < bs2; k1++) {
699: *u++ = *rtmp_ptr;
700: *rtmp_ptr++ = 0.0;
701: }
702: }
704: /* ... add k to row list for first nonzero entry in k-th row */
705: il[k] = jmin;
706: i = bj[jmin];
707: jl[k] = jl[i];
708: jl[i] = k;
709: }
710: }
712: PetscCall(PetscFree(rtmp));
713: PetscCall(PetscFree2(il, jl));
714: PetscCall(PetscFree3(dk, uik, work));
715: PetscCall(PetscFree(pivots));
716: if (a->permute) PetscCall(PetscFree(aa));
718: PetscCall(ISRestoreIndices(perm, &perm_ptr));
720: C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
721: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
722: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
723: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
725: C->assembled = PETSC_TRUE;
726: C->preallocated = PETSC_TRUE;
728: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
729: PetscFunctionReturn(PETSC_SUCCESS);
730: }
732: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
733: {
734: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
735: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
736: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
737: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
738: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
739: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
740: MatScalar *work;
741: PetscInt *pivots;
742: PetscBool allowzeropivot, zeropivotdetected;
744: PetscFunctionBegin;
745: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
746: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
747: il[0] = 0;
748: for (i = 0; i < mbs; i++) jl[i] = mbs;
750: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
751: PetscCall(PetscMalloc1(bs, &pivots));
752: allowzeropivot = PetscNot(A->erroriffailure);
754: ai = a->i;
755: aj = a->j;
756: aa = a->a;
758: /* for each row k */
759: for (k = 0; k < mbs; k++) {
760: /*initialize k-th row with elements nonzero in row k of A */
761: jmin = ai[k];
762: jmax = ai[k + 1];
763: ap = aa + jmin * bs2;
764: for (j = jmin; j < jmax; j++) {
765: vj = aj[j]; /* block col. index */
766: rtmp_ptr = rtmp + vj * bs2;
767: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
768: }
770: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
771: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
772: i = jl[k]; /* first row to be added to k_th row */
774: while (i < k) {
775: nexti = jl[i]; /* next row to be added to k_th row */
777: /* compute multiplier */
778: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
780: /* uik = -inv(Di)*U_bar(i,k) */
781: diag = ba + i * bs2;
782: u = ba + ili * bs2;
783: PetscCall(PetscArrayzero(uik, bs2));
784: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
786: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
787: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
788: PetscCall(PetscLogFlops(2.0 * bs * bs2));
790: /* update -U(i,k) */
791: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
793: /* add multiple of row i to k-th row ... */
794: jmin = ili + 1;
795: jmax = bi[i + 1];
796: if (jmin < jmax) {
797: for (j = jmin; j < jmax; j++) {
798: /* rtmp += -U(i,k)^T * U_bar(i,j) */
799: rtmp_ptr = rtmp + bj[j] * bs2;
800: u = ba + j * bs2;
801: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
802: }
803: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
805: /* ... add i to row list for next nonzero entry */
806: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
807: j = bj[jmin];
808: jl[i] = jl[j];
809: jl[j] = i; /* update jl */
810: }
811: i = nexti;
812: }
814: /* save nonzero entries in k-th row of U ... */
816: /* invert diagonal block */
817: diag = ba + k * bs2;
818: PetscCall(PetscArraycpy(diag, dk, bs2));
820: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
821: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
823: jmin = bi[k];
824: jmax = bi[k + 1];
825: if (jmin < jmax) {
826: for (j = jmin; j < jmax; j++) {
827: vj = bj[j]; /* block col. index of U */
828: u = ba + j * bs2;
829: rtmp_ptr = rtmp + vj * bs2;
830: for (k1 = 0; k1 < bs2; k1++) {
831: *u++ = *rtmp_ptr;
832: *rtmp_ptr++ = 0.0;
833: }
834: }
836: /* ... add k to row list for first nonzero entry in k-th row */
837: il[k] = jmin;
838: i = bj[jmin];
839: jl[k] = jl[i];
840: jl[i] = k;
841: }
842: }
844: PetscCall(PetscFree(rtmp));
845: PetscCall(PetscFree2(il, jl));
846: PetscCall(PetscFree3(dk, uik, work));
847: PetscCall(PetscFree(pivots));
849: C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
850: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853: C->assembled = PETSC_TRUE;
854: C->preallocated = PETSC_TRUE;
856: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
857: PetscFunctionReturn(PETSC_SUCCESS);
858: }
860: /*
861: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
862: Version for blocks 2 by 2.
863: */
864: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
865: {
866: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
867: IS perm = b->row;
868: const PetscInt *ai, *aj, *perm_ptr;
869: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
870: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
871: MatScalar *ba = b->a, *aa, *ap;
872: MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
873: PetscReal shift = info->shiftamount;
874: PetscBool allowzeropivot, zeropivotdetected;
876: PetscFunctionBegin;
877: allowzeropivot = PetscNot(A->erroriffailure);
879: /* initialization */
880: /* il and jl record the first nonzero element in each row of the accessing
881: window U(0:k, k:mbs-1).
882: jl: list of rows to be added to uneliminated rows
883: i>= k: jl(i) is the first row to be added to row i
884: i< k: jl(i) is the row following row i in some list of rows
885: jl(i) = mbs indicates the end of a list
886: il(i): points to the first nonzero element in columns k,...,mbs-1 of
887: row i of U */
888: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
889: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
890: il[0] = 0;
891: for (i = 0; i < mbs; i++) jl[i] = mbs;
893: PetscCall(ISGetIndices(perm, &perm_ptr));
895: /* check permutation */
896: if (!a->permute) {
897: ai = a->i;
898: aj = a->j;
899: aa = a->a;
900: } else {
901: ai = a->inew;
902: aj = a->jnew;
903: PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
904: PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
905: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
906: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
908: for (i = 0; i < mbs; i++) {
909: jmin = ai[i];
910: jmax = ai[i + 1];
911: for (j = jmin; j < jmax; j++) {
912: while (a2anew[j] != j) {
913: k = a2anew[j];
914: a2anew[j] = a2anew[k];
915: a2anew[k] = k;
916: for (k1 = 0; k1 < 4; k1++) {
917: dk[k1] = aa[k * 4 + k1];
918: aa[k * 4 + k1] = aa[j * 4 + k1];
919: aa[j * 4 + k1] = dk[k1];
920: }
921: }
922: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
923: if (i > aj[j]) {
924: ap = aa + j * 4; /* ptr to the beginning of the block */
925: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
926: ap[1] = ap[2];
927: ap[2] = dk[1];
928: }
929: }
930: }
931: PetscCall(PetscFree(a2anew));
932: }
934: /* for each row k */
935: for (k = 0; k < mbs; k++) {
936: /*initialize k-th row with elements nonzero in row perm(k) of A */
937: jmin = ai[perm_ptr[k]];
938: jmax = ai[perm_ptr[k] + 1];
939: ap = aa + jmin * 4;
940: for (j = jmin; j < jmax; j++) {
941: vj = perm_ptr[aj[j]]; /* block col. index */
942: rtmp_ptr = rtmp + vj * 4;
943: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
944: }
946: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
947: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
948: i = jl[k]; /* first row to be added to k_th row */
950: while (i < k) {
951: nexti = jl[i]; /* next row to be added to k_th row */
953: /* compute multiplier */
954: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
956: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
957: diag = ba + i * 4;
958: u = ba + ili * 4;
959: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
960: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
961: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
962: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
964: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
965: dk[0] += uik[0] * u[0] + uik[1] * u[1];
966: dk[1] += uik[2] * u[0] + uik[3] * u[1];
967: dk[2] += uik[0] * u[2] + uik[1] * u[3];
968: dk[3] += uik[2] * u[2] + uik[3] * u[3];
970: PetscCall(PetscLogFlops(16.0 * 2.0));
972: /* update -U(i,k): ba[ili] = uik */
973: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
975: /* add multiple of row i to k-th row ... */
976: jmin = ili + 1;
977: jmax = bi[i + 1];
978: if (jmin < jmax) {
979: for (j = jmin; j < jmax; j++) {
980: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
981: rtmp_ptr = rtmp + bj[j] * 4;
982: u = ba + j * 4;
983: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
984: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
985: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
986: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
987: }
988: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
990: /* ... add i to row list for next nonzero entry */
991: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
992: j = bj[jmin];
993: jl[i] = jl[j];
994: jl[j] = i; /* update jl */
995: }
996: i = nexti;
997: }
999: /* save nonzero entries in k-th row of U ... */
1001: /* invert diagonal block */
1002: diag = ba + k * 4;
1003: PetscCall(PetscArraycpy(diag, dk, 4));
1004: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1005: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1007: jmin = bi[k];
1008: jmax = bi[k + 1];
1009: if (jmin < jmax) {
1010: for (j = jmin; j < jmax; j++) {
1011: vj = bj[j]; /* block col. index of U */
1012: u = ba + j * 4;
1013: rtmp_ptr = rtmp + vj * 4;
1014: for (k1 = 0; k1 < 4; k1++) {
1015: *u++ = *rtmp_ptr;
1016: *rtmp_ptr++ = 0.0;
1017: }
1018: }
1020: /* ... add k to row list for first nonzero entry in k-th row */
1021: il[k] = jmin;
1022: i = bj[jmin];
1023: jl[k] = jl[i];
1024: jl[i] = k;
1025: }
1026: }
1028: PetscCall(PetscFree(rtmp));
1029: PetscCall(PetscFree2(il, jl));
1030: if (a->permute) PetscCall(PetscFree(aa));
1031: PetscCall(ISRestoreIndices(perm, &perm_ptr));
1033: C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1034: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1035: C->assembled = PETSC_TRUE;
1036: C->preallocated = PETSC_TRUE;
1038: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1039: PetscFunctionReturn(PETSC_SUCCESS);
1040: }
1042: /*
1043: Version for when blocks are 2 by 2 Using natural ordering
1044: */
1045: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1046: {
1047: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1048: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1049: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1050: MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1051: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1052: PetscReal shift = info->shiftamount;
1053: PetscBool allowzeropivot, zeropivotdetected;
1055: PetscFunctionBegin;
1056: allowzeropivot = PetscNot(A->erroriffailure);
1058: /* initialization */
1059: /* il and jl record the first nonzero element in each row of the accessing
1060: window U(0:k, k:mbs-1).
1061: jl: list of rows to be added to uneliminated rows
1062: i>= k: jl(i) is the first row to be added to row i
1063: i< k: jl(i) is the row following row i in some list of rows
1064: jl(i) = mbs indicates the end of a list
1065: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1066: row i of U */
1067: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1068: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1069: il[0] = 0;
1070: for (i = 0; i < mbs; i++) jl[i] = mbs;
1072: ai = a->i;
1073: aj = a->j;
1074: aa = a->a;
1076: /* for each row k */
1077: for (k = 0; k < mbs; k++) {
1078: /*initialize k-th row with elements nonzero in row k of A */
1079: jmin = ai[k];
1080: jmax = ai[k + 1];
1081: ap = aa + jmin * 4;
1082: for (j = jmin; j < jmax; j++) {
1083: vj = aj[j]; /* block col. index */
1084: rtmp_ptr = rtmp + vj * 4;
1085: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1086: }
1088: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1089: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1090: i = jl[k]; /* first row to be added to k_th row */
1092: while (i < k) {
1093: nexti = jl[i]; /* next row to be added to k_th row */
1095: /* compute multiplier */
1096: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1098: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1099: diag = ba + i * 4;
1100: u = ba + ili * 4;
1101: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1102: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1103: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1104: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1106: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1107: dk[0] += uik[0] * u[0] + uik[1] * u[1];
1108: dk[1] += uik[2] * u[0] + uik[3] * u[1];
1109: dk[2] += uik[0] * u[2] + uik[1] * u[3];
1110: dk[3] += uik[2] * u[2] + uik[3] * u[3];
1112: PetscCall(PetscLogFlops(16.0 * 2.0));
1114: /* update -U(i,k): ba[ili] = uik */
1115: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1117: /* add multiple of row i to k-th row ... */
1118: jmin = ili + 1;
1119: jmax = bi[i + 1];
1120: if (jmin < jmax) {
1121: for (j = jmin; j < jmax; j++) {
1122: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1123: rtmp_ptr = rtmp + bj[j] * 4;
1124: u = ba + j * 4;
1125: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1126: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1127: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1128: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1129: }
1130: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1132: /* ... add i to row list for next nonzero entry */
1133: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1134: j = bj[jmin];
1135: jl[i] = jl[j];
1136: jl[j] = i; /* update jl */
1137: }
1138: i = nexti;
1139: }
1141: /* save nonzero entries in k-th row of U ... */
1143: /* invert diagonal block */
1144: diag = ba + k * 4;
1145: PetscCall(PetscArraycpy(diag, dk, 4));
1146: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1147: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1149: jmin = bi[k];
1150: jmax = bi[k + 1];
1151: if (jmin < jmax) {
1152: for (j = jmin; j < jmax; j++) {
1153: vj = bj[j]; /* block col. index of U */
1154: u = ba + j * 4;
1155: rtmp_ptr = rtmp + vj * 4;
1156: for (k1 = 0; k1 < 4; k1++) {
1157: *u++ = *rtmp_ptr;
1158: *rtmp_ptr++ = 0.0;
1159: }
1160: }
1162: /* ... add k to row list for first nonzero entry in k-th row */
1163: il[k] = jmin;
1164: i = bj[jmin];
1165: jl[k] = jl[i];
1166: jl[i] = k;
1167: }
1168: }
1170: PetscCall(PetscFree(rtmp));
1171: PetscCall(PetscFree2(il, jl));
1173: C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1174: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177: C->assembled = PETSC_TRUE;
1178: C->preallocated = PETSC_TRUE;
1180: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1181: PetscFunctionReturn(PETSC_SUCCESS);
1182: }
1184: /*
1185: Numeric U^T*D*U factorization for SBAIJ format.
1186: Version for blocks are 1 by 1.
1187: */
1188: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1189: {
1190: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1191: IS ip = b->row;
1192: const PetscInt *ai, *aj, *rip;
1193: PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1194: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1195: MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1196: PetscReal rs;
1197: FactorShiftCtx sctx;
1199: PetscFunctionBegin;
1200: /* MatPivotSetUp(): initialize shift context sctx */
1201: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1203: PetscCall(ISGetIndices(ip, &rip));
1204: if (!a->permute) {
1205: ai = a->i;
1206: aj = a->j;
1207: aa = a->a;
1208: } else {
1209: ai = a->inew;
1210: aj = a->jnew;
1211: nz = ai[mbs];
1212: PetscCall(PetscMalloc1(nz, &aa));
1213: a2anew = a->a2anew;
1214: bval = a->a;
1215: for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1216: }
1218: /* initialization */
1219: /* il and jl record the first nonzero element in each row of the accessing
1220: window U(0:k, k:mbs-1).
1221: jl: list of rows to be added to uneliminated rows
1222: i>= k: jl(i) is the first row to be added to row i
1223: i< k: jl(i) is the row following row i in some list of rows
1224: jl(i) = mbs indicates the end of a list
1225: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1226: row i of U */
1227: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1229: do {
1230: sctx.newshift = PETSC_FALSE;
1231: il[0] = 0;
1232: for (i = 0; i < mbs; i++) {
1233: rtmp[i] = 0.0;
1234: jl[i] = mbs;
1235: }
1237: for (k = 0; k < mbs; k++) {
1238: /*initialize k-th row by the perm[k]-th row of A */
1239: jmin = ai[rip[k]];
1240: jmax = ai[rip[k] + 1];
1241: bval = ba + bi[k];
1242: for (j = jmin; j < jmax; j++) {
1243: col = rip[aj[j]];
1244: rtmp[col] = aa[j];
1245: *bval++ = 0.0; /* for in-place factorization */
1246: }
1248: /* shift the diagonal of the matrix */
1249: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1251: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1252: dk = rtmp[k];
1253: i = jl[k]; /* first row to be added to k_th row */
1255: while (i < k) {
1256: nexti = jl[i]; /* next row to be added to k_th row */
1258: /* compute multiplier, update diag(k) and U(i,k) */
1259: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1260: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1261: dk += uikdi * ba[ili];
1262: ba[ili] = uikdi; /* -U(i,k) */
1264: /* add multiple of row i to k-th row */
1265: jmin = ili + 1;
1266: jmax = bi[i + 1];
1267: if (jmin < jmax) {
1268: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1269: PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1271: /* update il and jl for row i */
1272: il[i] = jmin;
1273: j = bj[jmin];
1274: jl[i] = jl[j];
1275: jl[j] = i;
1276: }
1277: i = nexti;
1278: }
1280: /* shift the diagonals when zero pivot is detected */
1281: /* compute rs=sum of abs(off-diagonal) */
1282: rs = 0.0;
1283: jmin = bi[k] + 1;
1284: nz = bi[k + 1] - jmin;
1285: if (nz) {
1286: bcol = bj + jmin;
1287: while (nz--) {
1288: rs += PetscAbsScalar(rtmp[*bcol]);
1289: bcol++;
1290: }
1291: }
1293: sctx.rs = rs;
1294: sctx.pv = dk;
1295: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1296: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1297: dk = sctx.pv;
1299: /* copy data into U(k,:) */
1300: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1301: jmin = bi[k] + 1;
1302: jmax = bi[k + 1];
1303: if (jmin < jmax) {
1304: for (j = jmin; j < jmax; j++) {
1305: col = bj[j];
1306: ba[j] = rtmp[col];
1307: rtmp[col] = 0.0;
1308: }
1309: /* add the k-th row into il and jl */
1310: il[k] = jmin;
1311: i = bj[jmin];
1312: jl[k] = jl[i];
1313: jl[i] = k;
1314: }
1315: }
1316: } while (sctx.newshift);
1317: PetscCall(PetscFree3(rtmp, il, jl));
1318: if (a->permute) PetscCall(PetscFree(aa));
1320: PetscCall(ISRestoreIndices(ip, &rip));
1322: C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1323: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1324: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1325: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1326: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1327: C->assembled = PETSC_TRUE;
1328: C->preallocated = PETSC_TRUE;
1330: PetscCall(PetscLogFlops(C->rmap->N));
1331: if (sctx.nshift) {
1332: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1333: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1334: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1335: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1336: }
1337: }
1338: PetscFunctionReturn(PETSC_SUCCESS);
1339: }
1341: /*
1342: Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1343: Modified from MatCholeskyFactorNumeric_SeqAIJ()
1344: */
1345: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1346: {
1347: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1348: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1349: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1350: PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1351: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1352: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1353: FactorShiftCtx sctx;
1354: PetscReal rs;
1355: MatScalar d, *v;
1357: PetscFunctionBegin;
1358: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1360: /* MatPivotSetUp(): initialize shift context sctx */
1361: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1363: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1364: sctx.shift_top = info->zeropivot;
1366: PetscCall(PetscArrayzero(rtmp, mbs));
1368: for (i = 0; i < mbs; i++) {
1369: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1370: d = (aa)[a->diag[i]];
1371: rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1372: ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1373: v = aa + ai[i] + 1;
1374: nz = ai[i + 1] - ai[i] - 1;
1375: for (j = 0; j < nz; j++) {
1376: rtmp[i] += PetscAbsScalar(v[j]);
1377: rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1378: }
1379: if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1380: }
1381: sctx.shift_top *= 1.1;
1382: sctx.nshift_max = 5;
1383: sctx.shift_lo = 0.;
1384: sctx.shift_hi = 1.;
1385: }
1387: /* allocate working arrays
1388: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1389: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1390: */
1391: do {
1392: sctx.newshift = PETSC_FALSE;
1394: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1395: if (mbs) il[0] = 0;
1397: for (k = 0; k < mbs; k++) {
1398: /* zero rtmp */
1399: nz = bi[k + 1] - bi[k];
1400: bjtmp = bj + bi[k];
1401: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1403: /* load in initial unfactored row */
1404: bval = ba + bi[k];
1405: jmin = ai[k];
1406: jmax = ai[k + 1];
1407: for (j = jmin; j < jmax; j++) {
1408: col = aj[j];
1409: rtmp[col] = aa[j];
1410: *bval++ = 0.0; /* for in-place factorization */
1411: }
1412: /* shift the diagonal of the matrix: ZeropivotApply() */
1413: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1415: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1416: dk = rtmp[k];
1417: i = c2r[k]; /* first row to be added to k_th row */
1419: while (i < k) {
1420: nexti = c2r[i]; /* next row to be added to k_th row */
1422: /* compute multiplier, update diag(k) and U(i,k) */
1423: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1424: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1425: dk += uikdi * ba[ili]; /* update diag[k] */
1426: ba[ili] = uikdi; /* -U(i,k) */
1428: /* add multiple of row i to k-th row */
1429: jmin = ili + 1;
1430: jmax = bi[i + 1];
1431: if (jmin < jmax) {
1432: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1433: /* update il and c2r for row i */
1434: il[i] = jmin;
1435: j = bj[jmin];
1436: c2r[i] = c2r[j];
1437: c2r[j] = i;
1438: }
1439: i = nexti;
1440: }
1442: /* copy data into U(k,:) */
1443: rs = 0.0;
1444: jmin = bi[k];
1445: jmax = bi[k + 1] - 1;
1446: if (jmin < jmax) {
1447: for (j = jmin; j < jmax; j++) {
1448: col = bj[j];
1449: ba[j] = rtmp[col];
1450: rs += PetscAbsScalar(ba[j]);
1451: }
1452: /* add the k-th row into il and c2r */
1453: il[k] = jmin;
1454: i = bj[jmin];
1455: c2r[k] = c2r[i];
1456: c2r[i] = k;
1457: }
1459: sctx.rs = rs;
1460: sctx.pv = dk;
1461: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1462: if (sctx.newshift) break;
1463: dk = sctx.pv;
1465: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1466: }
1467: } while (sctx.newshift);
1469: PetscCall(PetscFree3(rtmp, il, c2r));
1471: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1472: B->ops->solves = MatSolves_SeqSBAIJ_1;
1473: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1474: B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1475: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1476: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1478: B->assembled = PETSC_TRUE;
1479: B->preallocated = PETSC_TRUE;
1481: PetscCall(PetscLogFlops(B->rmap->n));
1483: /* MatPivotView() */
1484: if (sctx.nshift) {
1485: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1486: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1487: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1488: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1489: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1490: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1491: }
1492: }
1493: PetscFunctionReturn(PETSC_SUCCESS);
1494: }
1496: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1497: {
1498: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1499: PetscInt i, j, mbs = a->mbs;
1500: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1501: PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1502: MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1503: PetscReal rs;
1504: FactorShiftCtx sctx;
1506: PetscFunctionBegin;
1507: /* MatPivotSetUp(): initialize shift context sctx */
1508: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1510: /* initialization */
1511: /* il and jl record the first nonzero element in each row of the accessing
1512: window U(0:k, k:mbs-1).
1513: jl: list of rows to be added to uneliminated rows
1514: i>= k: jl(i) is the first row to be added to row i
1515: i< k: jl(i) is the row following row i in some list of rows
1516: jl(i) = mbs indicates the end of a list
1517: il(i): points to the first nonzero element in U(i,k:mbs-1)
1518: */
1519: PetscCall(PetscMalloc1(mbs, &rtmp));
1520: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1522: do {
1523: sctx.newshift = PETSC_FALSE;
1524: il[0] = 0;
1525: for (i = 0; i < mbs; i++) {
1526: rtmp[i] = 0.0;
1527: jl[i] = mbs;
1528: }
1530: for (k = 0; k < mbs; k++) {
1531: /*initialize k-th row with elements nonzero in row perm(k) of A */
1532: nz = ai[k + 1] - ai[k];
1533: acol = aj + ai[k];
1534: aval = aa + ai[k];
1535: bval = ba + bi[k];
1536: while (nz--) {
1537: rtmp[*acol++] = *aval++;
1538: *bval++ = 0.0; /* for in-place factorization */
1539: }
1541: /* shift the diagonal of the matrix */
1542: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1544: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1545: dk = rtmp[k];
1546: i = jl[k]; /* first row to be added to k_th row */
1548: while (i < k) {
1549: nexti = jl[i]; /* next row to be added to k_th row */
1550: /* compute multiplier, update D(k) and U(i,k) */
1551: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1552: uikdi = -ba[ili] * ba[bi[i]];
1553: dk += uikdi * ba[ili];
1554: ba[ili] = uikdi; /* -U(i,k) */
1556: /* add multiple of row i to k-th row ... */
1557: jmin = ili + 1;
1558: nz = bi[i + 1] - jmin;
1559: if (nz > 0) {
1560: bcol = bj + jmin;
1561: bval = ba + jmin;
1562: PetscCall(PetscLogFlops(2.0 * nz));
1563: while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1565: /* update il and jl for i-th row */
1566: il[i] = jmin;
1567: j = bj[jmin];
1568: jl[i] = jl[j];
1569: jl[j] = i;
1570: }
1571: i = nexti;
1572: }
1574: /* shift the diagonals when zero pivot is detected */
1575: /* compute rs=sum of abs(off-diagonal) */
1576: rs = 0.0;
1577: jmin = bi[k] + 1;
1578: nz = bi[k + 1] - jmin;
1579: if (nz) {
1580: bcol = bj + jmin;
1581: while (nz--) {
1582: rs += PetscAbsScalar(rtmp[*bcol]);
1583: bcol++;
1584: }
1585: }
1587: sctx.rs = rs;
1588: sctx.pv = dk;
1589: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1590: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1591: dk = sctx.pv;
1593: /* copy data into U(k,:) */
1594: ba[bi[k]] = 1.0 / dk;
1595: jmin = bi[k] + 1;
1596: nz = bi[k + 1] - jmin;
1597: if (nz) {
1598: bcol = bj + jmin;
1599: bval = ba + jmin;
1600: while (nz--) {
1601: *bval++ = rtmp[*bcol];
1602: rtmp[*bcol++] = 0.0;
1603: }
1604: /* add k-th row into il and jl */
1605: il[k] = jmin;
1606: i = bj[jmin];
1607: jl[k] = jl[i];
1608: jl[i] = k;
1609: }
1610: } /* end of for (k = 0; k<mbs; k++) */
1611: } while (sctx.newshift);
1612: PetscCall(PetscFree(rtmp));
1613: PetscCall(PetscFree2(il, jl));
1615: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1616: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1617: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1618: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621: C->assembled = PETSC_TRUE;
1622: C->preallocated = PETSC_TRUE;
1624: PetscCall(PetscLogFlops(C->rmap->N));
1625: if (sctx.nshift) {
1626: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1627: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1628: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1629: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1630: }
1631: }
1632: PetscFunctionReturn(PETSC_SUCCESS);
1633: }
1635: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1636: {
1637: Mat C;
1639: PetscFunctionBegin;
1640: PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1641: PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1642: PetscCall(MatCholeskyFactorNumeric(C, A, info));
1644: A->ops->solve = C->ops->solve;
1645: A->ops->solvetranspose = C->ops->solvetranspose;
1647: PetscCall(MatHeaderMerge(A, &C));
1648: PetscFunctionReturn(PETSC_SUCCESS);
1649: }