Actual source code: baijfact3.c
1: /*
2: Factorization code for BAIJ format.
3: */
4: #include <../src/mat/impls/baij/seq/baij.h>
5: #include <petsc/private/kernels/blockinvert.h>
7: /*
8: This is used to set the numeric factorization for both LU and ILU symbolic factorization
9: */
10: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11: {
12: PetscFunctionBegin;
13: if (natural) {
14: switch (fact->rmap->bs) {
15: case 1:
16: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17: break;
18: case 2:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20: break;
21: case 3:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23: break;
24: case 4:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26: break;
27: case 5:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29: break;
30: case 6:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32: break;
33: case 7:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35: break;
36: case 9:
37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39: #else
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41: #endif
42: break;
43: case 15:
44: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45: break;
46: default:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48: break;
49: }
50: } else {
51: switch (fact->rmap->bs) {
52: case 1:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54: break;
55: case 2:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57: break;
58: case 3:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60: break;
61: case 4:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63: break;
64: case 5:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66: break;
67: case 6:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69: break;
70: case 7:
71: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72: break;
73: default:
74: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75: break;
76: }
77: }
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82: {
83: PetscFunctionBegin;
84: if (natural) {
85: switch (inA->rmap->bs) {
86: case 1:
87: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88: break;
89: case 2:
90: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91: break;
92: case 3:
93: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94: break;
95: case 4:
96: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
97: break;
98: case 5:
99: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
100: break;
101: case 6:
102: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
103: break;
104: case 7:
105: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
106: break;
107: default:
108: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
109: break;
110: }
111: } else {
112: switch (inA->rmap->bs) {
113: case 1:
114: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
115: break;
116: case 2:
117: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
118: break;
119: case 3:
120: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
121: break;
122: case 4:
123: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
124: break;
125: case 5:
126: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
127: break;
128: case 6:
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
130: break;
131: case 7:
132: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
133: break;
134: default:
135: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
136: break;
137: }
138: }
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: /*
143: The symbolic factorization code is identical to that for AIJ format,
144: except for very small changes since this is now a SeqBAIJ datastructure.
145: NOT good code reuse.
146: */
147: #include <petscbt.h>
148: #include <../src/mat/utils/freespace.h>
150: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
151: {
152: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
153: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
154: PetscBool row_identity, col_identity, both_identity;
155: IS isicol;
156: const PetscInt *r, *ic;
157: PetscInt i, *ai = a->i, *aj = a->j;
158: PetscInt *bi, *bj, *ajtmp;
159: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
160: PetscReal f;
161: PetscInt nlnk, *lnk, k, **bi_ptr;
162: PetscFreeSpaceList free_space = NULL, current_space = NULL;
163: PetscBT lnkbt;
164: PetscBool missing;
166: PetscFunctionBegin;
167: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
168: PetscCall(MatMissingDiagonal(A, &missing, &i));
169: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
171: if (bs > 1) { /* check shifttype */
172: PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
173: }
175: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
176: PetscCall(ISGetIndices(isrow, &r));
177: PetscCall(ISGetIndices(isicol, &ic));
179: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
180: PetscCall(PetscMalloc1(n + 1, &bi));
181: PetscCall(PetscMalloc1(n + 1, &bdiag));
182: bi[0] = bdiag[0] = 0;
184: /* linked list for storing column indices of the active row */
185: nlnk = n + 1;
186: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
188: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
190: /* initial FreeSpace size is f*(ai[n]+1) */
191: f = info->fill;
192: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
194: current_space = free_space;
196: for (i = 0; i < n; i++) {
197: /* copy previous fill into linked list */
198: nzi = 0;
199: nnz = ai[r[i] + 1] - ai[r[i]];
200: ajtmp = aj + ai[r[i]];
201: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
202: nzi += nlnk;
204: /* add pivot rows into linked list */
205: row = lnk[n];
206: while (row < i) {
207: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
208: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
209: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
210: nzi += nlnk;
211: row = lnk[row];
212: }
213: bi[i + 1] = bi[i] + nzi;
214: im[i] = nzi;
216: /* mark bdiag */
217: nzbd = 0;
218: nnz = nzi;
219: k = lnk[n];
220: while (nnz-- && k < i) {
221: nzbd++;
222: k = lnk[k];
223: }
224: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
226: /* if free space is not available, make more free space */
227: if (current_space->local_remaining < nzi) {
228: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
229: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
230: reallocs++;
231: }
233: /* copy data into free space, then initialize lnk */
234: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
236: bi_ptr[i] = current_space->array;
237: current_space->array += nzi;
238: current_space->local_used += nzi;
239: current_space->local_remaining -= nzi;
240: }
242: PetscCall(ISRestoreIndices(isrow, &r));
243: PetscCall(ISRestoreIndices(isicol, &ic));
245: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
246: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
247: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
248: PetscCall(PetscLLDestroy(lnk, lnkbt));
249: PetscCall(PetscFree2(bi_ptr, im));
251: /* put together the new matrix */
252: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
253: b = (Mat_SeqBAIJ *)B->data;
255: b->free_ij = PETSC_TRUE;
256: PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
257: b->free_a = PETSC_TRUE;
258: b->j = bj;
259: b->i = bi;
260: b->diag = bdiag;
261: b->free_diag = PETSC_TRUE;
262: b->ilen = NULL;
263: b->imax = NULL;
264: b->row = isrow;
265: b->col = iscol;
266: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
268: PetscCall(PetscObjectReference((PetscObject)isrow));
269: PetscCall(PetscObjectReference((PetscObject)iscol));
270: b->icol = isicol;
271: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
273: b->maxnz = b->nz = bdiag[0] + 1;
275: B->factortype = MAT_FACTOR_LU;
276: B->info.factor_mallocs = reallocs;
277: B->info.fill_ratio_given = f;
279: if (ai[n] != 0) {
280: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
281: } else {
282: B->info.fill_ratio_needed = 0.0;
283: }
284: #if defined(PETSC_USE_INFO)
285: if (ai[n] != 0) {
286: PetscReal af = B->info.fill_ratio_needed;
287: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
288: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
289: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
290: PetscCall(PetscInfo(A, "for best performance.\n"));
291: } else {
292: PetscCall(PetscInfo(A, "Empty matrix\n"));
293: }
294: #endif
296: PetscCall(ISIdentity(isrow, &row_identity));
297: PetscCall(ISIdentity(iscol, &col_identity));
299: both_identity = (PetscBool)(row_identity && col_identity);
301: PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
302: PetscFunctionReturn(PETSC_SUCCESS);
303: }
305: #if 0
306: // unused
307: static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
308: {
309: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
310: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
311: PetscBool row_identity, col_identity, both_identity;
312: IS isicol;
313: const PetscInt *r, *ic;
314: PetscInt i, *ai = a->i, *aj = a->j;
315: PetscInt *bi, *bj, *ajtmp;
316: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
317: PetscReal f;
318: PetscInt nlnk, *lnk, k, **bi_ptr;
319: PetscFreeSpaceList free_space = NULL, current_space = NULL;
320: PetscBT lnkbt;
321: PetscBool missing;
323: PetscFunctionBegin;
324: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
325: PetscCall(MatMissingDiagonal(A, &missing, &i));
326: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
328: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
329: PetscCall(ISGetIndices(isrow, &r));
330: PetscCall(ISGetIndices(isicol, &ic));
332: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
333: PetscCall(PetscMalloc1(n + 1, &bi));
334: PetscCall(PetscMalloc1(n + 1, &bdiag));
336: bi[0] = bdiag[0] = 0;
338: /* linked list for storing column indices of the active row */
339: nlnk = n + 1;
340: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
342: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
344: /* initial FreeSpace size is f*(ai[n]+1) */
345: f = info->fill;
346: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
347: current_space = free_space;
349: for (i = 0; i < n; i++) {
350: /* copy previous fill into linked list */
351: nzi = 0;
352: nnz = ai[r[i] + 1] - ai[r[i]];
353: ajtmp = aj + ai[r[i]];
354: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
355: nzi += nlnk;
357: /* add pivot rows into linked list */
358: row = lnk[n];
359: while (row < i) {
360: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
361: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
362: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
363: nzi += nlnk;
364: row = lnk[row];
365: }
366: bi[i + 1] = bi[i] + nzi;
367: im[i] = nzi;
369: /* mark bdiag */
370: nzbd = 0;
371: nnz = nzi;
372: k = lnk[n];
373: while (nnz-- && k < i) {
374: nzbd++;
375: k = lnk[k];
376: }
377: bdiag[i] = bi[i] + nzbd;
379: /* if free space is not available, make more free space */
380: if (current_space->local_remaining < nzi) {
381: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
382: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
383: reallocs++;
384: }
386: /* copy data into free space, then initialize lnk */
387: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
389: bi_ptr[i] = current_space->array;
390: current_space->array += nzi;
391: current_space->local_used += nzi;
392: current_space->local_remaining -= nzi;
393: }
394: #if defined(PETSC_USE_INFO)
395: if (ai[n] != 0) {
396: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
397: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
398: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
399: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
400: PetscCall(PetscInfo(A, "for best performance.\n"));
401: } else {
402: PetscCall(PetscInfo(A, "Empty matrix\n"));
403: }
404: #endif
406: PetscCall(ISRestoreIndices(isrow, &r));
407: PetscCall(ISRestoreIndices(isicol, &ic));
409: /* destroy list of free space and other temporary array(s) */
410: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
411: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
412: PetscCall(PetscLLDestroy(lnk, lnkbt));
413: PetscCall(PetscFree2(bi_ptr, im));
415: /* put together the new matrix */
416: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
417: b = (Mat_SeqBAIJ *)B->data;
418: b->free_ij = PETSC_TRUE;
419: PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a));
420: b->free_a = PETSC_TRUE;
421: b->j = bj;
422: b->i = bi;
423: b->diag = bdiag;
424: b->free_diag = PETSC_TRUE;
425: b->ilen = NULL;
426: b->imax = NULL;
427: b->row = isrow;
428: b->col = iscol;
429: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
431: PetscCall(PetscObjectReference((PetscObject)isrow));
432: PetscCall(PetscObjectReference((PetscObject)iscol));
433: b->icol = isicol;
435: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
437: b->maxnz = b->nz = bi[n];
439: B->factortype = MAT_FACTOR_LU;
440: B->info.factor_mallocs = reallocs;
441: B->info.fill_ratio_given = f;
443: if (ai[n] != 0) {
444: B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
445: } else {
446: B->info.fill_ratio_needed = 0.0;
447: }
449: PetscCall(ISIdentity(isrow, &row_identity));
450: PetscCall(ISIdentity(iscol, &col_identity));
452: both_identity = (PetscBool)(row_identity && col_identity);
454: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
455: PetscFunctionReturn(PETSC_SUCCESS);
456: }
457: #endif