Actual source code: baijfact3.c
1: /*
2: Factorization code for BAIJ format.
3: */
4: #include <../src/mat/impls/baij/seq/baij.h>
5: #include <petsc/private/kernels/blockinvert.h>
7: /*
8: This is used to set the numeric factorization for both LU and ILU symbolic factorization
9: */
10: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11: {
12: PetscFunctionBegin;
13: if (natural) {
14: switch (fact->rmap->bs) {
15: case 1:
16: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17: break;
18: case 2:
19: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20: break;
21: case 3:
22: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23: break;
24: case 4:
25: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26: break;
27: case 5:
28: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29: break;
30: case 6:
31: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32: break;
33: case 7:
34: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35: break;
36: case 9:
37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39: #else
40: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41: #endif
42: break;
43: case 15:
44: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45: break;
46: default:
47: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48: break;
49: }
50: } else {
51: switch (fact->rmap->bs) {
52: case 1:
53: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54: break;
55: case 2:
56: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57: break;
58: case 3:
59: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60: break;
61: case 4:
62: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63: break;
64: case 5:
65: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66: break;
67: case 6:
68: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69: break;
70: case 7:
71: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72: break;
73: default:
74: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75: break;
76: }
77: }
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82: {
83: PetscFunctionBegin;
84: if (natural) {
85: switch (inA->rmap->bs) {
86: case 1:
87: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88: break;
89: case 2:
90: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91: break;
92: case 3:
93: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94: break;
95: case 4:
96: #if defined(PETSC_USE_REAL_MAT_SINGLE)
97: {
98: PetscBool sse_enabled_local;
99: PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
100: if (sse_enabled_local) {
101: #if defined(PETSC_HAVE_SSE)
102: int i, *AJ = a->j, nz = a->nz, n = a->mbs;
103: if (n == (unsigned short)n) {
104: unsigned short *aj = (unsigned short *)AJ;
105: for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
107: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110: PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
111: } else {
112: /* Scale the column indices for easier indexing in MatSolve. */
113: /* for (i=0;i<nz;i++) { */
114: /* AJ[i] = AJ[i]*4; */
115: /* } */
116: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
117: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
119: PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
120: }
121: #else
122: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
123: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
124: #endif
125: } else {
126: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
127: }
128: }
129: #else
130: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
131: #endif
132: break;
133: case 5:
134: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
135: break;
136: case 6:
137: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
138: break;
139: case 7:
140: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
141: break;
142: default:
143: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
144: break;
145: }
146: } else {
147: switch (inA->rmap->bs) {
148: case 1:
149: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
150: break;
151: case 2:
152: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
153: break;
154: case 3:
155: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
156: break;
157: case 4:
158: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
159: break;
160: case 5:
161: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
162: break;
163: case 6:
164: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
165: break;
166: case 7:
167: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
168: break;
169: default:
170: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
171: break;
172: }
173: }
174: PetscFunctionReturn(PETSC_SUCCESS);
175: }
177: /*
178: The symbolic factorization code is identical to that for AIJ format,
179: except for very small changes since this is now a SeqBAIJ datastructure.
180: NOT good code reuse.
181: */
182: #include <petscbt.h>
183: #include <../src/mat/utils/freespace.h>
185: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
186: {
187: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
188: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
189: PetscBool row_identity, col_identity, both_identity;
190: IS isicol;
191: const PetscInt *r, *ic;
192: PetscInt i, *ai = a->i, *aj = a->j;
193: PetscInt *bi, *bj, *ajtmp;
194: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
195: PetscReal f;
196: PetscInt nlnk, *lnk, k, **bi_ptr;
197: PetscFreeSpaceList free_space = NULL, current_space = NULL;
198: PetscBT lnkbt;
199: PetscBool missing;
201: PetscFunctionBegin;
202: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
203: PetscCall(MatMissingDiagonal(A, &missing, &i));
204: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
206: if (bs > 1) { /* check shifttype */
207: PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
208: }
210: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
211: PetscCall(ISGetIndices(isrow, &r));
212: PetscCall(ISGetIndices(isicol, &ic));
214: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
215: PetscCall(PetscMalloc1(n + 1, &bi));
216: PetscCall(PetscMalloc1(n + 1, &bdiag));
217: bi[0] = bdiag[0] = 0;
219: /* linked list for storing column indices of the active row */
220: nlnk = n + 1;
221: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
223: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
225: /* initial FreeSpace size is f*(ai[n]+1) */
226: f = info->fill;
227: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
229: current_space = free_space;
231: for (i = 0; i < n; i++) {
232: /* copy previous fill into linked list */
233: nzi = 0;
234: nnz = ai[r[i] + 1] - ai[r[i]];
235: ajtmp = aj + ai[r[i]];
236: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
237: nzi += nlnk;
239: /* add pivot rows into linked list */
240: row = lnk[n];
241: while (row < i) {
242: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
243: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
244: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
245: nzi += nlnk;
246: row = lnk[row];
247: }
248: bi[i + 1] = bi[i] + nzi;
249: im[i] = nzi;
251: /* mark bdiag */
252: nzbd = 0;
253: nnz = nzi;
254: k = lnk[n];
255: while (nnz-- && k < i) {
256: nzbd++;
257: k = lnk[k];
258: }
259: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
261: /* if free space is not available, make more free space */
262: if (current_space->local_remaining < nzi) {
263: nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
264: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
265: reallocs++;
266: }
268: /* copy data into free space, then initialize lnk */
269: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
271: bi_ptr[i] = current_space->array;
272: current_space->array += nzi;
273: current_space->local_used += nzi;
274: current_space->local_remaining -= nzi;
275: }
277: PetscCall(ISRestoreIndices(isrow, &r));
278: PetscCall(ISRestoreIndices(isicol, &ic));
280: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
281: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
282: PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
283: PetscCall(PetscLLDestroy(lnk, lnkbt));
284: PetscCall(PetscFree2(bi_ptr, im));
286: /* put together the new matrix */
287: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
288: b = (Mat_SeqBAIJ *)B->data;
290: b->free_ij = PETSC_TRUE;
291: PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
292: b->free_a = PETSC_TRUE;
293: b->j = bj;
294: b->i = bi;
295: b->diag = bdiag;
296: b->free_diag = PETSC_TRUE;
297: b->ilen = NULL;
298: b->imax = NULL;
299: b->row = isrow;
300: b->col = iscol;
301: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
303: PetscCall(PetscObjectReference((PetscObject)isrow));
304: PetscCall(PetscObjectReference((PetscObject)iscol));
305: b->icol = isicol;
306: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
308: b->maxnz = b->nz = bdiag[0] + 1;
310: B->factortype = MAT_FACTOR_LU;
311: B->info.factor_mallocs = reallocs;
312: B->info.fill_ratio_given = f;
314: if (ai[n] != 0) {
315: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
316: } else {
317: B->info.fill_ratio_needed = 0.0;
318: }
319: #if defined(PETSC_USE_INFO)
320: if (ai[n] != 0) {
321: PetscReal af = B->info.fill_ratio_needed;
322: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
323: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
324: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
325: PetscCall(PetscInfo(A, "for best performance.\n"));
326: } else {
327: PetscCall(PetscInfo(A, "Empty matrix\n"));
328: }
329: #endif
331: PetscCall(ISIdentity(isrow, &row_identity));
332: PetscCall(ISIdentity(iscol, &col_identity));
334: both_identity = (PetscBool)(row_identity && col_identity);
336: PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
337: PetscFunctionReturn(PETSC_SUCCESS);
338: }
340: #if 0
341: // unused
342: static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
343: {
344: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b;
345: PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
346: PetscBool row_identity, col_identity, both_identity;
347: IS isicol;
348: const PetscInt *r, *ic;
349: PetscInt i, *ai = a->i, *aj = a->j;
350: PetscInt *bi, *bj, *ajtmp;
351: PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
352: PetscReal f;
353: PetscInt nlnk, *lnk, k, **bi_ptr;
354: PetscFreeSpaceList free_space = NULL, current_space = NULL;
355: PetscBT lnkbt;
356: PetscBool missing;
358: PetscFunctionBegin;
359: PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
360: PetscCall(MatMissingDiagonal(A, &missing, &i));
361: PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
363: PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
364: PetscCall(ISGetIndices(isrow, &r));
365: PetscCall(ISGetIndices(isicol, &ic));
367: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
368: PetscCall(PetscMalloc1(n + 1, &bi));
369: PetscCall(PetscMalloc1(n + 1, &bdiag));
371: bi[0] = bdiag[0] = 0;
373: /* linked list for storing column indices of the active row */
374: nlnk = n + 1;
375: PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
377: PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
379: /* initial FreeSpace size is f*(ai[n]+1) */
380: f = info->fill;
381: PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
382: current_space = free_space;
384: for (i = 0; i < n; i++) {
385: /* copy previous fill into linked list */
386: nzi = 0;
387: nnz = ai[r[i] + 1] - ai[r[i]];
388: ajtmp = aj + ai[r[i]];
389: PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
390: nzi += nlnk;
392: /* add pivot rows into linked list */
393: row = lnk[n];
394: while (row < i) {
395: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
396: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
397: PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
398: nzi += nlnk;
399: row = lnk[row];
400: }
401: bi[i + 1] = bi[i] + nzi;
402: im[i] = nzi;
404: /* mark bdiag */
405: nzbd = 0;
406: nnz = nzi;
407: k = lnk[n];
408: while (nnz-- && k < i) {
409: nzbd++;
410: k = lnk[k];
411: }
412: bdiag[i] = bi[i] + nzbd;
414: /* if free space is not available, make more free space */
415: if (current_space->local_remaining < nzi) {
416: nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
417: PetscCall(PetscFreeSpaceGet(nnz, ¤t_space));
418: reallocs++;
419: }
421: /* copy data into free space, then initialize lnk */
422: PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
424: bi_ptr[i] = current_space->array;
425: current_space->array += nzi;
426: current_space->local_used += nzi;
427: current_space->local_remaining -= nzi;
428: }
429: #if defined(PETSC_USE_INFO)
430: if (ai[n] != 0) {
431: PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
432: PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
433: PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
434: PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
435: PetscCall(PetscInfo(A, "for best performance.\n"));
436: } else {
437: PetscCall(PetscInfo(A, "Empty matrix\n"));
438: }
439: #endif
441: PetscCall(ISRestoreIndices(isrow, &r));
442: PetscCall(ISRestoreIndices(isicol, &ic));
444: /* destroy list of free space and other temporary array(s) */
445: PetscCall(PetscMalloc1(bi[n] + 1, &bj));
446: PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
447: PetscCall(PetscLLDestroy(lnk, lnkbt));
448: PetscCall(PetscFree2(bi_ptr, im));
450: /* put together the new matrix */
451: PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
452: b = (Mat_SeqBAIJ *)B->data;
453: b->free_ij = PETSC_TRUE;
454: PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a));
455: b->free_a = PETSC_TRUE;
456: b->j = bj;
457: b->i = bi;
458: b->diag = bdiag;
459: b->free_diag = PETSC_TRUE;
460: b->ilen = NULL;
461: b->imax = NULL;
462: b->row = isrow;
463: b->col = iscol;
464: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
466: PetscCall(PetscObjectReference((PetscObject)isrow));
467: PetscCall(PetscObjectReference((PetscObject)iscol));
468: b->icol = isicol;
470: PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
472: b->maxnz = b->nz = bi[n];
474: B->factortype = MAT_FACTOR_LU;
475: B->info.factor_mallocs = reallocs;
476: B->info.fill_ratio_given = f;
478: if (ai[n] != 0) {
479: B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
480: } else {
481: B->info.fill_ratio_needed = 0.0;
482: }
484: PetscCall(ISIdentity(isrow, &row_identity));
485: PetscCall(ISIdentity(iscol, &col_identity));
487: both_identity = (PetscBool)(row_identity && col_identity);
489: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
490: PetscFunctionReturn(PETSC_SUCCESS);
491: }
492: #endif