Actual source code: sbaijfact.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
3: #include <petsc/private/kernels/blockinvert.h>
4: #include <petscis.h>
6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
7: {
8: Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
9: MatScalar *dd = fact->a;
10: PetscInt mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;
12: PetscFunctionBegin;
13: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);
15: nneg_tmp = 0;
16: npos_tmp = 0;
17: if (fi) {
18: for (i = 0; i < mbs; i++) {
19: if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
20: else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
21: fi++;
22: }
23: } else {
24: for (i = 0; i < mbs; i++) {
25: if (PetscRealPart(dd[fact->i[i]]) > 0.0) npos_tmp++;
26: else if (PetscRealPart(dd[fact->i[i]]) < 0.0) nneg_tmp++;
27: }
28: }
29: if (nneg) *nneg = nneg_tmp;
30: if (npos) *npos = npos_tmp;
31: if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: /*
36: Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
37: Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
38: */
39: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
40: {
41: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b;
42: const PetscInt *rip, *ai, *aj;
43: PetscInt i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
44: PetscInt m, reallocs = 0, prow;
45: PetscInt *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
46: PetscReal f = info->fill;
47: PetscBool perm_identity;
49: PetscFunctionBegin;
50: /* check whether perm is the identity mapping */
51: PetscCall(ISIdentity(perm, &perm_identity));
52: PetscCall(ISGetIndices(perm, &rip));
54: if (perm_identity) { /* without permutation */
55: a->permute = PETSC_FALSE;
57: ai = a->i;
58: aj = a->j;
59: } else { /* non-trivial permutation */
60: a->permute = PETSC_TRUE;
62: PetscCall(MatReorderingSeqSBAIJ(A, perm));
64: ai = a->inew;
65: aj = a->jnew;
66: }
68: /* initialization */
69: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&iu));
70: umax = (PetscInt)(f * ai[mbs] + 1);
71: umax += mbs + 1;
72: PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&ju));
73: iu[0] = mbs + 1;
74: juidx = mbs + 1; /* index for ju */
75: /* jl linked list for pivot row -- linked list for col index */
76: PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
77: for (i = 0; i < mbs; i++) {
78: jl[i] = mbs;
79: q[i] = 0;
80: }
82: /* for each row k */
83: for (k = 0; k < mbs; k++) {
84: for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
85: nzk = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
86: q[k] = mbs;
87: /* initialize nonzero structure of k-th row to row rip[k] of A */
88: jmin = ai[rip[k]] + 1; /* exclude diag[k] */
89: jmax = ai[rip[k] + 1];
90: for (j = jmin; j < jmax; j++) {
91: vj = rip[aj[j]]; /* col. value */
92: if (vj > k) {
93: qm = k;
94: do {
95: m = qm;
96: qm = q[m];
97: } while (qm < vj);
98: PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
99: nzk++;
100: q[m] = vj;
101: q[vj] = qm;
102: } /* if (vj > k) */
103: } /* for (j=jmin; j<jmax; j++) */
105: /* modify nonzero structure of k-th row by computing fill-in
106: for each row i to be merged in */
107: prow = k;
108: prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */
110: while (prow < k) {
111: /* merge row prow into k-th row */
112: jmin = iu[prow] + 1;
113: jmax = iu[prow + 1];
114: qm = k;
115: for (j = jmin; j < jmax; j++) {
116: vj = ju[j];
117: do {
118: m = qm;
119: qm = q[m];
120: } while (qm < vj);
121: if (qm != vj) {
122: nzk++;
123: q[m] = vj;
124: q[vj] = qm;
125: qm = vj;
126: }
127: }
128: prow = jl[prow]; /* next pivot row */
129: }
131: /* add k to row list for first nonzero element in k-th row */
132: if (nzk > 0) {
133: i = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
134: jl[k] = jl[i];
135: jl[i] = k;
136: }
137: iu[k + 1] = iu[k] + nzk;
139: /* allocate more space to ju if needed */
140: if (iu[k + 1] > umax) {
141: /* estimate how much additional space we will need */
142: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
143: /* just double the memory each time */
144: maxadd = umax;
145: if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
146: umax += maxadd;
148: /* allocate a longer ju */
149: PetscCall(PetscShmgetAllocateArray(umax, sizeof(PetscInt), (void **)&jutmp));
150: PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
151: PetscCall(PetscShmgetDeallocateArray((void **)&ju));
152: ju = jutmp;
153: reallocs++; /* count how many times we realloc */
154: }
156: /* save nonzero structure of k-th row in ju */
157: i = k;
158: while (nzk--) {
159: i = q[i];
160: ju[juidx++] = i;
161: }
162: }
164: #if defined(PETSC_USE_INFO)
165: if (ai[mbs] != 0) {
166: PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
167: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
168: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
169: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
170: PetscCall(PetscInfo(A, "for best performance.\n"));
171: } else {
172: PetscCall(PetscInfo(A, "Empty matrix\n"));
173: }
174: #endif
176: PetscCall(ISRestoreIndices(perm, &rip));
177: PetscCall(PetscFree2(jl, q));
179: /* put together the new matrix */
180: PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));
182: b = (Mat_SeqSBAIJ *)F->data;
183: b->free_ij = PETSC_TRUE;
184: PetscCall(PetscShmgetAllocateArray((iu[mbs] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
185: b->free_a = PETSC_TRUE;
186: b->j = ju;
187: b->i = iu;
188: b->diag = NULL;
189: b->ilen = NULL;
190: b->imax = NULL;
191: b->row = perm;
193: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
195: PetscCall(PetscObjectReference((PetscObject)perm));
197: b->icol = perm;
198: PetscCall(PetscObjectReference((PetscObject)perm));
199: PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
200: /* In b structure: Free imax, ilen, old a, old j.
201: Allocate idnew, solve_work, new a, new j */
202: b->maxnz = b->nz = iu[mbs];
204: F->info.factor_mallocs = reallocs;
205: F->info.fill_ratio_given = f;
206: if (ai[mbs] != 0) {
207: F->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
208: } else {
209: F->info.fill_ratio_needed = 0.0;
210: }
211: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
214: /*
215: Symbolic U^T*D*U factorization for SBAIJ format.
216: See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
217: */
218: #include <petscbt.h>
219: #include <../src/mat/utils/freespace.h>
220: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
221: {
222: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
223: Mat_SeqSBAIJ *b;
224: PetscBool perm_identity;
225: PetscReal fill = info->fill;
226: const PetscInt *rip, *ai = a->i, *aj = a->j;
227: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
228: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
229: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
230: PetscFreeSpaceList free_space = NULL, current_space = NULL;
231: PetscBT lnkbt;
232: PetscBool diagDense;
234: PetscFunctionBegin;
235: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
236: PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense));
237: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
238: if (bs > 1) {
239: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
240: PetscFunctionReturn(PETSC_SUCCESS);
241: }
243: /* check whether perm is the identity mapping */
244: PetscCall(ISIdentity(perm, &perm_identity));
245: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
246: a->permute = PETSC_FALSE;
247: PetscCall(ISGetIndices(perm, &rip));
249: /* initialization */
250: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
251: PetscCall(PetscMalloc1(mbs + 1, &udiag));
252: ui[0] = 0;
254: /* jl: linked list for storing indices of the pivot rows
255: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
256: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
257: for (i = 0; i < mbs; i++) {
258: jl[i] = mbs;
259: il[i] = 0;
260: }
262: /* create and initialize a linked list for storing column indices of the active row k */
263: nlnk = mbs + 1;
264: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
266: /* initial FreeSpace size is fill*(ai[mbs]+1) */
267: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
268: current_space = free_space;
270: for (k = 0; k < mbs; k++) { /* for each active row k */
271: /* initialize lnk by the column indices of row rip[k] of A */
272: nzk = 0;
273: ncols = ai[k + 1] - ai[k];
274: PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
275: for (j = 0; j < ncols; j++) {
276: i = *(aj + ai[k] + j);
277: cols[j] = i;
278: }
279: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
280: nzk += nlnk;
282: /* update lnk by computing fill-in for each pivot row to be merged in */
283: prow = jl[k]; /* 1st pivot row */
285: while (prow < k) {
286: nextprow = jl[prow];
287: /* merge prow into k-th row */
288: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
289: jmax = ui[prow + 1];
290: ncols = jmax - jmin;
291: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
292: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
293: nzk += nlnk;
295: /* update il and jl for prow */
296: if (jmin < jmax) {
297: il[prow] = jmin;
298: j = *uj_ptr;
299: jl[prow] = jl[j];
300: jl[j] = prow;
301: }
302: prow = nextprow;
303: }
305: /* if free space is not available, make more free space */
306: if (current_space->local_remaining < nzk) {
307: i = mbs - k + 1; /* num of unfactored rows */
308: i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
309: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
310: reallocs++;
311: }
313: /* copy data into free space, then initialize lnk */
314: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
316: /* add the k-th row into il and jl */
317: if (nzk > 1) {
318: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
319: jl[k] = jl[i];
320: jl[i] = k;
321: il[k] = ui[k] + 1;
322: }
323: ui_ptr[k] = current_space->array;
325: current_space->array += nzk;
326: current_space->local_used += nzk;
327: current_space->local_remaining -= nzk;
329: ui[k + 1] = ui[k] + nzk;
330: }
332: PetscCall(ISRestoreIndices(perm, &rip));
333: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
335: /* destroy list of free space and other temporary array(s) */
336: PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscInt), (void **)&uj));
337: PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
338: PetscCall(PetscLLDestroy(lnk, lnkbt));
340: /* put together the new matrix in MATSEQSBAIJ format */
341: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
343: b = (Mat_SeqSBAIJ *)fact->data;
344: b->free_ij = PETSC_TRUE;
345: PetscCall(PetscShmgetAllocateArray(ui[mbs], sizeof(PetscScalar), (void **)&b->a));
346: b->free_a = PETSC_TRUE;
347: b->j = uj;
348: b->i = ui;
349: b->diag = udiag;
350: b->ilen = NULL;
351: b->imax = NULL;
352: b->row = perm;
353: b->icol = perm;
355: PetscCall(PetscObjectReference((PetscObject)perm));
356: PetscCall(PetscObjectReference((PetscObject)perm));
358: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
360: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
362: b->maxnz = b->nz = ui[mbs];
364: fact->info.factor_mallocs = reallocs;
365: fact->info.fill_ratio_given = fill;
366: if (ai[mbs] != 0) {
367: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
368: } else {
369: fact->info.fill_ratio_needed = 0.0;
370: }
371: #if defined(PETSC_USE_INFO)
372: if (ai[mbs] != 0) {
373: PetscReal af = fact->info.fill_ratio_needed;
374: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
375: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
376: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
377: } else {
378: PetscCall(PetscInfo(A, "Empty matrix\n"));
379: }
380: #endif
381: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
386: {
387: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
388: Mat_SeqSBAIJ *b;
389: PetscBool perm_identity;
390: PetscReal fill = info->fill;
391: const PetscInt *rip, *ai, *aj;
392: PetscInt i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
393: PetscInt *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
394: PetscInt nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
395: PetscFreeSpaceList free_space = NULL, current_space = NULL;
396: PetscBT lnkbt;
397: PetscBool diagDense;
399: PetscFunctionBegin;
400: PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, NULL, &diagDense));
401: PetscCheck(diagDense, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry");
403: /*
404: This code originally uses Modified Sparse Row (MSR) storage
405: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
406: Then it is rewritten so the factor B takes seqsbaij format. However the associated
407: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
408: thus the original code in MSR format is still used for these cases.
409: The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
410: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
411: */
412: if (bs > 1) {
413: PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
414: PetscFunctionReturn(PETSC_SUCCESS);
415: }
417: /* check whether perm is the identity mapping */
418: PetscCall(ISIdentity(perm, &perm_identity));
419: PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
420: a->permute = PETSC_FALSE;
421: ai = a->i;
422: aj = a->j;
423: PetscCall(ISGetIndices(perm, &rip));
425: /* initialization */
426: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&ui));
427: ui[0] = 0;
429: /* jl: linked list for storing indices of the pivot rows
430: il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
431: PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
432: for (i = 0; i < mbs; i++) {
433: jl[i] = mbs;
434: il[i] = 0;
435: }
437: /* create and initialize a linked list for storing column indices of the active row k */
438: nlnk = mbs + 1;
439: PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));
441: /* initial FreeSpace size is fill*(ai[mbs]+1) */
442: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
443: current_space = free_space;
445: for (k = 0; k < mbs; k++) { /* for each active row k */
446: /* initialize lnk by the column indices of row rip[k] of A */
447: nzk = 0;
448: ncols = ai[rip[k] + 1] - ai[rip[k]];
449: for (j = 0; j < ncols; j++) {
450: i = *(aj + ai[rip[k]] + j);
451: cols[j] = rip[i];
452: }
453: PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
454: nzk += nlnk;
456: /* update lnk by computing fill-in for each pivot row to be merged in */
457: prow = jl[k]; /* 1st pivot row */
459: while (prow < k) {
460: nextprow = jl[prow];
461: /* merge prow into k-th row */
462: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
463: jmax = ui[prow + 1];
464: ncols = jmax - jmin;
465: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
466: PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
467: nzk += nlnk;
469: /* update il and jl for prow */
470: if (jmin < jmax) {
471: il[prow] = jmin;
473: j = *uj_ptr;
474: jl[prow] = jl[j];
475: jl[j] = prow;
476: }
477: prow = nextprow;
478: }
480: /* if free space is not available, make more free space */
481: if (current_space->local_remaining < nzk) {
482: i = mbs - k + 1; /* num of unfactored rows */
483: i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
484: PetscCall(PetscFreeSpaceGet(i, ¤t_space));
485: reallocs++;
486: }
488: /* copy data into free space, then initialize lnk */
489: PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));
491: /* add the k-th row into il and jl */
492: if (nzk - 1 > 0) {
493: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
494: jl[k] = jl[i];
495: jl[i] = k;
496: il[k] = ui[k] + 1;
497: }
498: ui_ptr[k] = current_space->array;
500: current_space->array += nzk;
501: current_space->local_used += nzk;
502: current_space->local_remaining -= nzk;
504: ui[k + 1] = ui[k] + nzk;
505: }
507: PetscCall(ISRestoreIndices(perm, &rip));
508: PetscCall(PetscFree4(ui_ptr, il, jl, cols));
510: /* destroy list of free space and other temporary array(s) */
511: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscInt), (void **)&uj));
512: PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
513: PetscCall(PetscLLDestroy(lnk, lnkbt));
515: /* put together the new matrix in MATSEQSBAIJ format */
516: PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));
518: b = (Mat_SeqSBAIJ *)fact->data;
519: PetscCall(PetscShmgetAllocateArray(ui[mbs] + 1, sizeof(PetscScalar), (void **)&b->a));
520: b->free_a = PETSC_TRUE;
521: b->free_ij = PETSC_TRUE;
522: b->j = uj;
523: b->i = ui;
524: b->diag = NULL;
525: b->ilen = NULL;
526: b->imax = NULL;
527: b->row = perm;
529: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
531: PetscCall(PetscObjectReference((PetscObject)perm));
532: b->icol = perm;
533: PetscCall(PetscObjectReference((PetscObject)perm));
534: PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535: b->maxnz = b->nz = ui[mbs];
537: fact->info.factor_mallocs = reallocs;
538: fact->info.fill_ratio_given = fill;
539: if (ai[mbs] != 0) {
540: fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
541: } else {
542: fact->info.fill_ratio_needed = 0.0;
543: }
544: #if defined(PETSC_USE_INFO)
545: if (ai[mbs] != 0) {
546: PetscReal af = fact->info.fill_ratio_needed;
547: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
548: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
549: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
550: } else {
551: PetscCall(PetscInfo(A, "Empty matrix\n"));
552: }
553: #endif
554: PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
555: PetscFunctionReturn(PETSC_SUCCESS);
556: }
558: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
559: {
560: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561: IS perm = b->row;
562: const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563: PetscInt i, j;
564: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
566: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
567: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
568: MatScalar *work;
569: PetscInt *pivots;
570: PetscBool allowzeropivot, zeropivotdetected;
572: PetscFunctionBegin;
573: /* initialization */
574: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576: allowzeropivot = PetscNot(A->erroriffailure);
578: il[0] = 0;
579: for (i = 0; i < mbs; i++) jl[i] = mbs;
581: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582: PetscCall(PetscMalloc1(bs, &pivots));
584: PetscCall(ISGetIndices(perm, &perm_ptr));
586: /* check permutation */
587: if (!a->permute) {
588: ai = a->i;
589: aj = a->j;
590: aa = a->a;
591: } else {
592: ai = a->inew;
593: aj = a->jnew;
594: PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595: PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
599: for (i = 0; i < mbs; i++) {
600: jmin = ai[i];
601: jmax = ai[i + 1];
602: for (j = jmin; j < jmax; j++) {
603: while (a2anew[j] != j) {
604: k = a2anew[j];
605: a2anew[j] = a2anew[k];
606: a2anew[k] = k;
607: for (k1 = 0; k1 < bs2; k1++) {
608: dk[k1] = aa[k * bs2 + k1];
609: aa[k * bs2 + k1] = aa[j * bs2 + k1];
610: aa[j * bs2 + k1] = dk[k1];
611: }
612: }
613: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
614: if (i > aj[j]) {
615: ap = aa + j * bs2; /* ptr to the beginning of j-th block of aa */
616: for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617: for (k = 0; k < bs; k++) { /* j-th block of aa <- dk^T */
618: for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619: }
620: }
621: }
622: }
623: PetscCall(PetscFree(a2anew));
624: }
626: /* for each row k */
627: for (k = 0; k < mbs; k++) {
628: /*initialize k-th row with elements nonzero in row perm(k) of A */
629: jmin = ai[perm_ptr[k]];
630: jmax = ai[perm_ptr[k] + 1];
632: ap = aa + jmin * bs2;
633: for (j = jmin; j < jmax; j++) {
634: vj = perm_ptr[aj[j]]; /* block col. index */
635: rtmp_ptr = rtmp + vj * bs2;
636: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637: }
639: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641: i = jl[k]; /* first row to be added to k_th row */
643: while (i < k) {
644: nexti = jl[i]; /* next row to be added to k_th row */
646: /* compute multiplier */
647: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
649: /* uik = -inv(Di)*U_bar(i,k) */
650: diag = ba + i * bs2;
651: u = ba + ili * bs2;
652: PetscCall(PetscArrayzero(uik, bs2));
653: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
655: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657: PetscCall(PetscLogFlops(4.0 * bs * bs2));
659: /* update -U(i,k) */
660: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
662: /* add multiple of row i to k-th row ... */
663: jmin = ili + 1;
664: jmax = bi[i + 1];
665: if (jmin < jmax) {
666: for (j = jmin; j < jmax; j++) {
667: /* rtmp += -U(i,k)^T * U_bar(i,j) */
668: rtmp_ptr = rtmp + bj[j] * bs2;
669: u = ba + j * bs2;
670: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671: }
672: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
674: /* ... add i to row list for next nonzero entry */
675: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676: j = bj[jmin];
677: jl[i] = jl[j];
678: jl[j] = i; /* update jl */
679: }
680: i = nexti;
681: }
683: /* save nonzero entries in k-th row of U ... */
685: /* invert diagonal block */
686: diag = ba + k * bs2;
687: PetscCall(PetscArraycpy(diag, dk, bs2));
689: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
692: jmin = bi[k];
693: jmax = bi[k + 1];
694: if (jmin < jmax) {
695: for (j = jmin; j < jmax; j++) {
696: vj = bj[j]; /* block col. index of U */
697: u = ba + j * bs2;
698: rtmp_ptr = rtmp + vj * bs2;
699: for (k1 = 0; k1 < bs2; k1++) {
700: *u++ = *rtmp_ptr;
701: *rtmp_ptr++ = 0.0;
702: }
703: }
705: /* ... add k to row list for first nonzero entry in k-th row */
706: il[k] = jmin;
707: i = bj[jmin];
708: jl[k] = jl[i];
709: jl[i] = k;
710: }
711: }
713: PetscCall(PetscFree(rtmp));
714: PetscCall(PetscFree2(il, jl));
715: PetscCall(PetscFree3(dk, uik, work));
716: PetscCall(PetscFree(pivots));
717: if (a->permute) PetscCall(PetscFree(aa));
719: PetscCall(ISRestoreIndices(perm, &perm_ptr));
721: C->ops->solve = MatSolve_SeqSBAIJ_N_inplace;
722: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_inplace;
724: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_inplace;
726: C->assembled = PETSC_TRUE;
727: C->preallocated = PETSC_TRUE;
729: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730: PetscFunctionReturn(PETSC_SUCCESS);
731: }
733: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
734: {
735: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
736: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
737: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
738: PetscInt bs = A->rmap->bs, bs2 = a->bs2;
739: MatScalar *ba = b->a, *aa, *ap, *dk, *uik;
740: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
741: MatScalar *work;
742: PetscInt *pivots;
743: PetscBool allowzeropivot, zeropivotdetected;
745: PetscFunctionBegin;
746: PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
747: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
748: il[0] = 0;
749: for (i = 0; i < mbs; i++) jl[i] = mbs;
751: PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
752: PetscCall(PetscMalloc1(bs, &pivots));
753: allowzeropivot = PetscNot(A->erroriffailure);
755: ai = a->i;
756: aj = a->j;
757: aa = a->a;
759: /* for each row k */
760: for (k = 0; k < mbs; k++) {
761: /*initialize k-th row with elements nonzero in row k of A */
762: jmin = ai[k];
763: jmax = ai[k + 1];
764: ap = aa + jmin * bs2;
765: for (j = jmin; j < jmax; j++) {
766: vj = aj[j]; /* block col. index */
767: rtmp_ptr = rtmp + vj * bs2;
768: for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
769: }
771: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
772: PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
773: i = jl[k]; /* first row to be added to k_th row */
775: while (i < k) {
776: nexti = jl[i]; /* next row to be added to k_th row */
778: /* compute multiplier */
779: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
781: /* uik = -inv(Di)*U_bar(i,k) */
782: diag = ba + i * bs2;
783: u = ba + ili * bs2;
784: PetscCall(PetscArrayzero(uik, bs2));
785: PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);
787: /* update D(k) += -U(i,k)^T * U_bar(i,k) */
788: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
789: PetscCall(PetscLogFlops(2.0 * bs * bs2));
791: /* update -U(i,k) */
792: PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));
794: /* add multiple of row i to k-th row ... */
795: jmin = ili + 1;
796: jmax = bi[i + 1];
797: if (jmin < jmax) {
798: for (j = jmin; j < jmax; j++) {
799: /* rtmp += -U(i,k)^T * U_bar(i,j) */
800: rtmp_ptr = rtmp + bj[j] * bs2;
801: u = ba + j * bs2;
802: PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
803: }
804: PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));
806: /* ... add i to row list for next nonzero entry */
807: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
808: j = bj[jmin];
809: jl[i] = jl[j];
810: jl[j] = i; /* update jl */
811: }
812: i = nexti;
813: }
815: /* save nonzero entries in k-th row of U ... */
817: /* invert diagonal block */
818: diag = ba + k * bs2;
819: PetscCall(PetscArraycpy(diag, dk, bs2));
821: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
822: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
824: jmin = bi[k];
825: jmax = bi[k + 1];
826: if (jmin < jmax) {
827: for (j = jmin; j < jmax; j++) {
828: vj = bj[j]; /* block col. index of U */
829: u = ba + j * bs2;
830: rtmp_ptr = rtmp + vj * bs2;
831: for (k1 = 0; k1 < bs2; k1++) {
832: *u++ = *rtmp_ptr;
833: *rtmp_ptr++ = 0.0;
834: }
835: }
837: /* ... add k to row list for first nonzero entry in k-th row */
838: il[k] = jmin;
839: i = bj[jmin];
840: jl[k] = jl[i];
841: jl[i] = k;
842: }
843: }
845: PetscCall(PetscFree(rtmp));
846: PetscCall(PetscFree2(il, jl));
847: PetscCall(PetscFree3(dk, uik, work));
848: PetscCall(PetscFree(pivots));
850: C->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851: C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854: C->assembled = PETSC_TRUE;
855: C->preallocated = PETSC_TRUE;
857: PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
858: PetscFunctionReturn(PETSC_SUCCESS);
859: }
861: /*
862: Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
863: Version for blocks 2 by 2.
864: */
865: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
866: {
867: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
868: IS perm = b->row;
869: const PetscInt *ai, *aj, *perm_ptr;
870: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
871: PetscInt *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
872: MatScalar *ba = b->a, *aa, *ap;
873: MatScalar *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
874: PetscReal shift = info->shiftamount;
875: PetscBool allowzeropivot, zeropivotdetected;
877: PetscFunctionBegin;
878: allowzeropivot = PetscNot(A->erroriffailure);
880: /* initialization */
881: /* il and jl record the first nonzero element in each row of the accessing
882: window U(0:k, k:mbs-1).
883: jl: list of rows to be added to uneliminated rows
884: i>= k: jl(i) is the first row to be added to row i
885: i< k: jl(i) is the row following row i in some list of rows
886: jl(i) = mbs indicates the end of a list
887: il(i): points to the first nonzero element in columns k,...,mbs-1 of
888: row i of U */
889: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
890: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
891: il[0] = 0;
892: for (i = 0; i < mbs; i++) jl[i] = mbs;
894: PetscCall(ISGetIndices(perm, &perm_ptr));
896: /* check permutation */
897: if (!a->permute) {
898: ai = a->i;
899: aj = a->j;
900: aa = a->a;
901: } else {
902: ai = a->inew;
903: aj = a->jnew;
904: PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
905: PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
906: PetscCall(PetscMalloc1(ai[mbs], &a2anew));
907: PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));
909: for (i = 0; i < mbs; i++) {
910: jmin = ai[i];
911: jmax = ai[i + 1];
912: for (j = jmin; j < jmax; j++) {
913: while (a2anew[j] != j) {
914: k = a2anew[j];
915: a2anew[j] = a2anew[k];
916: a2anew[k] = k;
917: for (k1 = 0; k1 < 4; k1++) {
918: dk[k1] = aa[k * 4 + k1];
919: aa[k * 4 + k1] = aa[j * 4 + k1];
920: aa[j * 4 + k1] = dk[k1];
921: }
922: }
923: /* transform column-oriented blocks that lie in the lower triangle to row-oriented blocks */
924: if (i > aj[j]) {
925: ap = aa + j * 4; /* ptr to the beginning of the block */
926: dk[1] = ap[1]; /* swap ap[1] and ap[2] */
927: ap[1] = ap[2];
928: ap[2] = dk[1];
929: }
930: }
931: }
932: PetscCall(PetscFree(a2anew));
933: }
935: /* for each row k */
936: for (k = 0; k < mbs; k++) {
937: /*initialize k-th row with elements nonzero in row perm(k) of A */
938: jmin = ai[perm_ptr[k]];
939: jmax = ai[perm_ptr[k] + 1];
940: ap = aa + jmin * 4;
941: for (j = jmin; j < jmax; j++) {
942: vj = perm_ptr[aj[j]]; /* block col. index */
943: rtmp_ptr = rtmp + vj * 4;
944: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
945: }
947: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
948: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
949: i = jl[k]; /* first row to be added to k_th row */
951: while (i < k) {
952: nexti = jl[i]; /* next row to be added to k_th row */
954: /* compute multiplier */
955: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
957: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
958: diag = ba + i * 4;
959: u = ba + ili * 4;
960: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
961: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
962: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
963: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
965: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
966: dk[0] += uik[0] * u[0] + uik[1] * u[1];
967: dk[1] += uik[2] * u[0] + uik[3] * u[1];
968: dk[2] += uik[0] * u[2] + uik[1] * u[3];
969: dk[3] += uik[2] * u[2] + uik[3] * u[3];
971: PetscCall(PetscLogFlops(16.0 * 2.0));
973: /* update -U(i,k): ba[ili] = uik */
974: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
976: /* add multiple of row i to k-th row ... */
977: jmin = ili + 1;
978: jmax = bi[i + 1];
979: if (jmin < jmax) {
980: for (j = jmin; j < jmax; j++) {
981: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
982: rtmp_ptr = rtmp + bj[j] * 4;
983: u = ba + j * 4;
984: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
985: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
986: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
987: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
988: }
989: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
991: /* ... add i to row list for next nonzero entry */
992: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993: j = bj[jmin];
994: jl[i] = jl[j];
995: jl[j] = i; /* update jl */
996: }
997: i = nexti;
998: }
1000: /* save nonzero entries in k-th row of U ... */
1002: /* invert diagonal block */
1003: diag = ba + k * 4;
1004: PetscCall(PetscArraycpy(diag, dk, 4));
1005: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1006: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1008: jmin = bi[k];
1009: jmax = bi[k + 1];
1010: if (jmin < jmax) {
1011: for (j = jmin; j < jmax; j++) {
1012: vj = bj[j]; /* block col. index of U */
1013: u = ba + j * 4;
1014: rtmp_ptr = rtmp + vj * 4;
1015: for (k1 = 0; k1 < 4; k1++) {
1016: *u++ = *rtmp_ptr;
1017: *rtmp_ptr++ = 0.0;
1018: }
1019: }
1021: /* ... add k to row list for first nonzero entry in k-th row */
1022: il[k] = jmin;
1023: i = bj[jmin];
1024: jl[k] = jl[i];
1025: jl[i] = k;
1026: }
1027: }
1029: PetscCall(PetscFree(rtmp));
1030: PetscCall(PetscFree2(il, jl));
1031: if (a->permute) PetscCall(PetscFree(aa));
1032: PetscCall(ISRestoreIndices(perm, &perm_ptr));
1034: C->ops->solve = MatSolve_SeqSBAIJ_2_inplace;
1035: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1036: C->assembled = PETSC_TRUE;
1037: C->preallocated = PETSC_TRUE;
1039: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1040: PetscFunctionReturn(PETSC_SUCCESS);
1041: }
1043: /*
1044: Version for when blocks are 2 by 2 Using natural ordering
1045: */
1046: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1047: {
1048: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1049: PetscInt i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1050: PetscInt *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1051: MatScalar *ba = b->a, *aa, *ap, dk[8], uik[8];
1052: MatScalar *u, *diag, *rtmp, *rtmp_ptr;
1053: PetscReal shift = info->shiftamount;
1054: PetscBool allowzeropivot, zeropivotdetected;
1056: PetscFunctionBegin;
1057: allowzeropivot = PetscNot(A->erroriffailure);
1059: /* initialization */
1060: /* il and jl record the first nonzero element in each row of the accessing
1061: window U(0:k, k:mbs-1).
1062: jl: list of rows to be added to uneliminated rows
1063: i>= k: jl(i) is the first row to be added to row i
1064: i< k: jl(i) is the row following row i in some list of rows
1065: jl(i) = mbs indicates the end of a list
1066: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1067: row i of U */
1068: PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1069: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1070: il[0] = 0;
1071: for (i = 0; i < mbs; i++) jl[i] = mbs;
1073: ai = a->i;
1074: aj = a->j;
1075: aa = a->a;
1077: /* for each row k */
1078: for (k = 0; k < mbs; k++) {
1079: /*initialize k-th row with elements nonzero in row k of A */
1080: jmin = ai[k];
1081: jmax = ai[k + 1];
1082: ap = aa + jmin * 4;
1083: for (j = jmin; j < jmax; j++) {
1084: vj = aj[j]; /* block col. index */
1085: rtmp_ptr = rtmp + vj * 4;
1086: for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1087: }
1089: /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1090: PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1091: i = jl[k]; /* first row to be added to k_th row */
1093: while (i < k) {
1094: nexti = jl[i]; /* next row to be added to k_th row */
1096: /* compute multiplier */
1097: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1099: /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1100: diag = ba + i * 4;
1101: u = ba + ili * 4;
1102: uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1103: uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1104: uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1105: uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);
1107: /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1108: dk[0] += uik[0] * u[0] + uik[1] * u[1];
1109: dk[1] += uik[2] * u[0] + uik[3] * u[1];
1110: dk[2] += uik[0] * u[2] + uik[1] * u[3];
1111: dk[3] += uik[2] * u[2] + uik[3] * u[3];
1113: PetscCall(PetscLogFlops(16.0 * 2.0));
1115: /* update -U(i,k): ba[ili] = uik */
1116: PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));
1118: /* add multiple of row i to k-th row ... */
1119: jmin = ili + 1;
1120: jmax = bi[i + 1];
1121: if (jmin < jmax) {
1122: for (j = jmin; j < jmax; j++) {
1123: /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1124: rtmp_ptr = rtmp + bj[j] * 4;
1125: u = ba + j * 4;
1126: rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1127: rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1128: rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1129: rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1130: }
1131: PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));
1133: /* ... add i to row list for next nonzero entry */
1134: il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1135: j = bj[jmin];
1136: jl[i] = jl[j];
1137: jl[j] = i; /* update jl */
1138: }
1139: i = nexti;
1140: }
1142: /* save nonzero entries in k-th row of U ... */
1144: /* invert diagonal block */
1145: diag = ba + k * 4;
1146: PetscCall(PetscArraycpy(diag, dk, 4));
1147: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1148: if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1150: jmin = bi[k];
1151: jmax = bi[k + 1];
1152: if (jmin < jmax) {
1153: for (j = jmin; j < jmax; j++) {
1154: vj = bj[j]; /* block col. index of U */
1155: u = ba + j * 4;
1156: rtmp_ptr = rtmp + vj * 4;
1157: for (k1 = 0; k1 < 4; k1++) {
1158: *u++ = *rtmp_ptr;
1159: *rtmp_ptr++ = 0.0;
1160: }
1161: }
1163: /* ... add k to row list for first nonzero entry in k-th row */
1164: il[k] = jmin;
1165: i = bj[jmin];
1166: jl[k] = jl[i];
1167: jl[i] = k;
1168: }
1169: }
1171: PetscCall(PetscFree(rtmp));
1172: PetscCall(PetscFree2(il, jl));
1174: C->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175: C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178: C->assembled = PETSC_TRUE;
1179: C->preallocated = PETSC_TRUE;
1181: PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1182: PetscFunctionReturn(PETSC_SUCCESS);
1183: }
1185: /*
1186: Numeric U^T*D*U factorization for SBAIJ format.
1187: Version for blocks are 1 by 1.
1188: */
1189: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1190: {
1191: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1192: IS ip = b->row;
1193: const PetscInt *ai, *aj, *rip;
1194: PetscInt *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1195: PetscInt k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1196: MatScalar *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1197: PetscReal rs;
1198: FactorShiftCtx sctx;
1200: PetscFunctionBegin;
1201: /* MatPivotSetUp(): initialize shift context sctx */
1202: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1204: PetscCall(ISGetIndices(ip, &rip));
1205: if (!a->permute) {
1206: ai = a->i;
1207: aj = a->j;
1208: aa = a->a;
1209: } else {
1210: ai = a->inew;
1211: aj = a->jnew;
1212: nz = ai[mbs];
1213: PetscCall(PetscMalloc1(nz, &aa));
1214: a2anew = a->a2anew;
1215: bval = a->a;
1216: for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1217: }
1219: /* initialization */
1220: /* il and jl record the first nonzero element in each row of the accessing
1221: window U(0:k, k:mbs-1).
1222: jl: list of rows to be added to uneliminated rows
1223: i>= k: jl(i) is the first row to be added to row i
1224: i< k: jl(i) is the row following row i in some list of rows
1225: jl(i) = mbs indicates the end of a list
1226: il(i): points to the first nonzero element in columns k,...,mbs-1 of
1227: row i of U */
1228: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));
1230: do {
1231: sctx.newshift = PETSC_FALSE;
1232: il[0] = 0;
1233: for (i = 0; i < mbs; i++) {
1234: rtmp[i] = 0.0;
1235: jl[i] = mbs;
1236: }
1238: for (k = 0; k < mbs; k++) {
1239: /*initialize k-th row by the perm[k]-th row of A */
1240: jmin = ai[rip[k]];
1241: jmax = ai[rip[k] + 1];
1242: bval = ba + bi[k];
1243: for (j = jmin; j < jmax; j++) {
1244: col = rip[aj[j]];
1245: rtmp[col] = aa[j];
1246: *bval++ = 0.0; /* for in-place factorization */
1247: }
1249: /* shift the diagonal of the matrix */
1250: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1252: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1253: dk = rtmp[k];
1254: i = jl[k]; /* first row to be added to k_th row */
1256: while (i < k) {
1257: nexti = jl[i]; /* next row to be added to k_th row */
1259: /* compute multiplier, update diag(k) and U(i,k) */
1260: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1261: uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1262: dk += uikdi * ba[ili];
1263: ba[ili] = uikdi; /* -U(i,k) */
1265: /* add multiple of row i to k-th row */
1266: jmin = ili + 1;
1267: jmax = bi[i + 1];
1268: if (jmin < jmax) {
1269: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1270: PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));
1272: /* update il and jl for row i */
1273: il[i] = jmin;
1274: j = bj[jmin];
1275: jl[i] = jl[j];
1276: jl[j] = i;
1277: }
1278: i = nexti;
1279: }
1281: /* shift the diagonals when zero pivot is detected */
1282: /* compute rs=sum of abs(off-diagonal) */
1283: rs = 0.0;
1284: jmin = bi[k] + 1;
1285: nz = bi[k + 1] - jmin;
1286: if (nz) {
1287: bcol = bj + jmin;
1288: while (nz--) {
1289: rs += PetscAbsScalar(rtmp[*bcol]);
1290: bcol++;
1291: }
1292: }
1294: sctx.rs = rs;
1295: sctx.pv = dk;
1296: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1297: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1298: dk = sctx.pv;
1300: /* copy data into U(k,:) */
1301: ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1302: jmin = bi[k] + 1;
1303: jmax = bi[k + 1];
1304: if (jmin < jmax) {
1305: for (j = jmin; j < jmax; j++) {
1306: col = bj[j];
1307: ba[j] = rtmp[col];
1308: rtmp[col] = 0.0;
1309: }
1310: /* add the k-th row into il and jl */
1311: il[k] = jmin;
1312: i = bj[jmin];
1313: jl[k] = jl[i];
1314: jl[i] = k;
1315: }
1316: }
1317: } while (sctx.newshift);
1318: PetscCall(PetscFree3(rtmp, il, jl));
1319: if (a->permute) PetscCall(PetscFree(aa));
1321: PetscCall(ISRestoreIndices(ip, &rip));
1323: C->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
1324: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1325: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1326: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
1327: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
1328: C->assembled = PETSC_TRUE;
1329: C->preallocated = PETSC_TRUE;
1331: PetscCall(PetscLogFlops(C->rmap->N));
1332: if (sctx.nshift) {
1333: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1334: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1335: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1336: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1337: }
1338: }
1339: PetscFunctionReturn(PETSC_SUCCESS);
1340: }
1342: /*
1343: Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1344: Modified from MatCholeskyFactorNumeric_SeqAIJ()
1345: */
1346: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1347: {
1348: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1349: Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ *)B->data;
1350: PetscInt i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1351: PetscInt *ai = a->i, *aj = a->j, *ajtmp;
1352: PetscInt k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1353: MatScalar *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1354: FactorShiftCtx sctx;
1355: PetscReal rs;
1356: MatScalar d, *v;
1357: const PetscInt *adiag;
1359: PetscFunctionBegin;
1360: PetscCall(MatGetDiagonalMarkers_SeqSBAIJ(A, &adiag, NULL));
1361: PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));
1363: /* MatPivotSetUp(): initialize shift context sctx */
1364: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1366: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1367: sctx.shift_top = info->zeropivot;
1369: PetscCall(PetscArrayzero(rtmp, mbs));
1371: for (i = 0; i < mbs; i++) {
1372: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1373: d = aa[adiag[i]];
1374: rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1375: ajtmp = aj + ai[i] + 1; /* exclude diagonal */
1376: v = aa + ai[i] + 1;
1377: nz = ai[i + 1] - ai[i] - 1;
1378: for (j = 0; j < nz; j++) {
1379: rtmp[i] += PetscAbsScalar(v[j]);
1380: rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1381: }
1382: if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1383: }
1384: sctx.shift_top *= 1.1;
1385: sctx.nshift_max = 5;
1386: sctx.shift_lo = 0.;
1387: sctx.shift_hi = 1.;
1388: }
1390: /* allocate working arrays
1391: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1392: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1393: */
1394: do {
1395: sctx.newshift = PETSC_FALSE;
1397: for (i = 0; i < mbs; i++) c2r[i] = mbs;
1398: if (mbs) il[0] = 0;
1400: for (k = 0; k < mbs; k++) {
1401: /* zero rtmp */
1402: nz = bi[k + 1] - bi[k];
1403: bjtmp = bj + bi[k];
1404: for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;
1406: /* load in initial unfactored row */
1407: bval = ba + bi[k];
1408: jmin = ai[k];
1409: jmax = ai[k + 1];
1410: for (j = jmin; j < jmax; j++) {
1411: col = aj[j];
1412: rtmp[col] = aa[j];
1413: *bval++ = 0.0; /* for in-place factorization */
1414: }
1415: /* shift the diagonal of the matrix: ZeropivotApply() */
1416: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
1418: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1419: dk = rtmp[k];
1420: i = c2r[k]; /* first row to be added to k_th row */
1422: while (i < k) {
1423: nexti = c2r[i]; /* next row to be added to k_th row */
1425: /* compute multiplier, update diag(k) and U(i,k) */
1426: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1427: uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1428: dk += uikdi * ba[ili]; /* update diag[k] */
1429: ba[ili] = uikdi; /* -U(i,k) */
1431: /* add multiple of row i to k-th row */
1432: jmin = ili + 1;
1433: jmax = bi[i + 1];
1434: if (jmin < jmax) {
1435: for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1436: /* update il and c2r for row i */
1437: il[i] = jmin;
1438: j = bj[jmin];
1439: c2r[i] = c2r[j];
1440: c2r[j] = i;
1441: }
1442: i = nexti;
1443: }
1445: /* copy data into U(k,:) */
1446: rs = 0.0;
1447: jmin = bi[k];
1448: jmax = bi[k + 1] - 1;
1449: if (jmin < jmax) {
1450: for (j = jmin; j < jmax; j++) {
1451: col = bj[j];
1452: ba[j] = rtmp[col];
1453: rs += PetscAbsScalar(ba[j]);
1454: }
1455: /* add the k-th row into il and c2r */
1456: il[k] = jmin;
1457: i = bj[jmin];
1458: c2r[k] = c2r[i];
1459: c2r[i] = k;
1460: }
1462: sctx.rs = rs;
1463: sctx.pv = dk;
1464: PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1465: if (sctx.newshift) break;
1466: dk = sctx.pv;
1468: ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1469: }
1470: } while (sctx.newshift);
1472: PetscCall(PetscFree3(rtmp, il, c2r));
1474: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475: B->ops->solves = MatSolves_SeqSBAIJ_1;
1476: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1477: B->ops->matsolve = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1478: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1479: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1481: B->assembled = PETSC_TRUE;
1482: B->preallocated = PETSC_TRUE;
1484: PetscCall(PetscLogFlops(B->rmap->n));
1486: /* MatPivotView() */
1487: if (sctx.nshift) {
1488: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1489: PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1490: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1491: PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1492: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1493: PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1494: }
1495: }
1496: PetscFunctionReturn(PETSC_SUCCESS);
1497: }
1499: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1500: {
1501: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1502: PetscInt i, j, mbs = a->mbs;
1503: PetscInt *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1504: PetscInt k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1505: MatScalar *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1506: PetscReal rs;
1507: FactorShiftCtx sctx;
1509: PetscFunctionBegin;
1510: /* MatPivotSetUp(): initialize shift context sctx */
1511: PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));
1513: /* initialization */
1514: /* il and jl record the first nonzero element in each row of the accessing
1515: window U(0:k, k:mbs-1).
1516: jl: list of rows to be added to uneliminated rows
1517: i>= k: jl(i) is the first row to be added to row i
1518: i< k: jl(i) is the row following row i in some list of rows
1519: jl(i) = mbs indicates the end of a list
1520: il(i): points to the first nonzero element in U(i,k:mbs-1)
1521: */
1522: PetscCall(PetscMalloc1(mbs, &rtmp));
1523: PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1525: do {
1526: sctx.newshift = PETSC_FALSE;
1527: il[0] = 0;
1528: for (i = 0; i < mbs; i++) {
1529: rtmp[i] = 0.0;
1530: jl[i] = mbs;
1531: }
1533: for (k = 0; k < mbs; k++) {
1534: /*initialize k-th row with elements nonzero in row perm(k) of A */
1535: nz = ai[k + 1] - ai[k];
1536: acol = aj + ai[k];
1537: aval = aa + ai[k];
1538: bval = ba + bi[k];
1539: while (nz--) {
1540: rtmp[*acol++] = *aval++;
1541: *bval++ = 0.0; /* for in-place factorization */
1542: }
1544: /* shift the diagonal of the matrix */
1545: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1547: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1548: dk = rtmp[k];
1549: i = jl[k]; /* first row to be added to k_th row */
1551: while (i < k) {
1552: nexti = jl[i]; /* next row to be added to k_th row */
1553: /* compute multiplier, update D(k) and U(i,k) */
1554: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1555: uikdi = -ba[ili] * ba[bi[i]];
1556: dk += uikdi * ba[ili];
1557: ba[ili] = uikdi; /* -U(i,k) */
1559: /* add multiple of row i to k-th row ... */
1560: jmin = ili + 1;
1561: nz = bi[i + 1] - jmin;
1562: if (nz > 0) {
1563: bcol = bj + jmin;
1564: bval = ba + jmin;
1565: PetscCall(PetscLogFlops(2.0 * nz));
1566: while (nz--) rtmp[*bcol++] += uikdi * (*bval++);
1568: /* update il and jl for i-th row */
1569: il[i] = jmin;
1570: j = bj[jmin];
1571: jl[i] = jl[j];
1572: jl[j] = i;
1573: }
1574: i = nexti;
1575: }
1577: /* shift the diagonals when zero pivot is detected */
1578: /* compute rs=sum of abs(off-diagonal) */
1579: rs = 0.0;
1580: jmin = bi[k] + 1;
1581: nz = bi[k + 1] - jmin;
1582: if (nz) {
1583: bcol = bj + jmin;
1584: while (nz--) {
1585: rs += PetscAbsScalar(rtmp[*bcol]);
1586: bcol++;
1587: }
1588: }
1590: sctx.rs = rs;
1591: sctx.pv = dk;
1592: PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1593: if (sctx.newshift) break; /* sctx.shift_amount is updated */
1594: dk = sctx.pv;
1596: /* copy data into U(k,:) */
1597: ba[bi[k]] = 1.0 / dk;
1598: jmin = bi[k] + 1;
1599: nz = bi[k + 1] - jmin;
1600: if (nz) {
1601: bcol = bj + jmin;
1602: bval = ba + jmin;
1603: while (nz--) {
1604: *bval++ = rtmp[*bcol];
1605: rtmp[*bcol++] = 0.0;
1606: }
1607: /* add k-th row into il and jl */
1608: il[k] = jmin;
1609: i = bj[jmin];
1610: jl[k] = jl[i];
1611: jl[i] = k;
1612: }
1613: } /* end of for (k = 0; k<mbs; k++) */
1614: } while (sctx.newshift);
1615: PetscCall(PetscFree(rtmp));
1616: PetscCall(PetscFree2(il, jl));
1618: C->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619: C->ops->solves = MatSolves_SeqSBAIJ_1_inplace;
1620: C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1621: C->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1622: C->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1624: C->assembled = PETSC_TRUE;
1625: C->preallocated = PETSC_TRUE;
1627: PetscCall(PetscLogFlops(C->rmap->N));
1628: if (sctx.nshift) {
1629: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1630: PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1631: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1632: PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1633: }
1634: }
1635: PetscFunctionReturn(PETSC_SUCCESS);
1636: }
1638: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1639: {
1640: Mat C;
1642: PetscFunctionBegin;
1643: PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1644: PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1645: PetscCall(MatCholeskyFactorNumeric(C, A, info));
1647: A->ops->solve = C->ops->solve;
1648: A->ops->solvetranspose = C->ops->solvetranspose;
1650: PetscCall(MatHeaderMerge(A, &C));
1651: PetscFunctionReturn(PETSC_SUCCESS);
1652: }