Actual source code: baij.c
1: /*
2: Defines the basic matrix operations for the BAIJ (compressed row)
3: matrix storage format.
4: */
5: #include <../src/mat/impls/baij/seq/baij.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/kernels/blockinvert.h>
8: #include <petsc/private/kernels/blockmatmult.h>
10: /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */
11: #define TYPE BAIJ
12: #define TYPE_BS
13: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
14: #undef TYPE_BS
15: #define TYPE_BS _BS
16: #define TYPE_BS_ON
17: #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h"
18: #undef TYPE_BS
19: #include "../src/mat/impls/aij/seq/seqhashmat.h"
20: #undef TYPE
21: #undef TYPE_BS_ON
23: #if defined(PETSC_HAVE_HYPRE)
24: PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
25: #endif
27: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
28: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
29: #endif
30: PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
32: static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
33: {
34: Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
35: PetscInt m, n, ib, jb, bs = A->rmap->bs;
36: MatScalar *a_val = a_aij->a;
38: PetscFunctionBegin;
39: PetscCall(MatGetSize(A, &m, &n));
40: PetscCall(PetscArrayzero(reductions, n));
41: if (type == NORM_2) {
42: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
43: for (jb = 0; jb < bs; jb++) {
44: for (ib = 0; ib < bs; ib++) {
45: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
46: a_val++;
47: }
48: }
49: }
50: } else if (type == NORM_1) {
51: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
52: for (jb = 0; jb < bs; jb++) {
53: for (ib = 0; ib < bs; ib++) {
54: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
55: a_val++;
56: }
57: }
58: }
59: } else if (type == NORM_INFINITY) {
60: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
61: for (jb = 0; jb < bs; jb++) {
62: for (ib = 0; ib < bs; ib++) {
63: PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb;
64: reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
65: a_val++;
66: }
67: }
68: }
69: } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
70: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
71: for (jb = 0; jb < bs; jb++) {
72: for (ib = 0; ib < bs; ib++) {
73: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
74: a_val++;
75: }
76: }
77: }
78: } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
79: for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
80: for (jb = 0; jb < bs; jb++) {
81: for (ib = 0; ib < bs; ib++) {
82: reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
83: a_val++;
84: }
85: }
86: }
87: } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
88: if (type == NORM_2) {
89: for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
90: } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
91: for (PetscInt i = 0; i < n; i++) reductions[i] /= m;
92: }
93: PetscFunctionReturn(PETSC_SUCCESS);
94: }
96: static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
97: {
98: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
99: PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
100: MatScalar *v = a->a, *odiag, *diag, work[25], *v_work;
101: PetscReal shift = 0.0;
102: PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE;
104: PetscFunctionBegin;
105: allowzeropivot = PetscNot(A->erroriffailure);
107: if (a->idiagvalid) {
108: if (values) *values = a->idiag;
109: PetscFunctionReturn(PETSC_SUCCESS);
110: }
111: PetscCall(MatMarkDiagonal_SeqBAIJ(A));
112: diag_offset = a->diag;
113: if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag));
114: diag = a->idiag;
115: if (values) *values = a->idiag;
116: /* factor and invert each block */
117: switch (bs) {
118: case 1:
119: for (i = 0; i < mbs; i++) {
120: odiag = v + 1 * diag_offset[i];
121: diag[0] = odiag[0];
123: if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
124: PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
125: A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
126: A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
127: A->factorerror_zeropivot_row = i;
128: PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
129: }
131: diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
132: diag += 1;
133: }
134: break;
135: case 2:
136: for (i = 0; i < mbs; i++) {
137: odiag = v + 4 * diag_offset[i];
138: diag[0] = odiag[0];
139: diag[1] = odiag[1];
140: diag[2] = odiag[2];
141: diag[3] = odiag[3];
142: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
143: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
144: diag += 4;
145: }
146: break;
147: case 3:
148: for (i = 0; i < mbs; i++) {
149: odiag = v + 9 * diag_offset[i];
150: diag[0] = odiag[0];
151: diag[1] = odiag[1];
152: diag[2] = odiag[2];
153: diag[3] = odiag[3];
154: diag[4] = odiag[4];
155: diag[5] = odiag[5];
156: diag[6] = odiag[6];
157: diag[7] = odiag[7];
158: diag[8] = odiag[8];
159: PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
160: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
161: diag += 9;
162: }
163: break;
164: case 4:
165: for (i = 0; i < mbs; i++) {
166: odiag = v + 16 * diag_offset[i];
167: PetscCall(PetscArraycpy(diag, odiag, 16));
168: PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
169: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
170: diag += 16;
171: }
172: break;
173: case 5:
174: for (i = 0; i < mbs; i++) {
175: odiag = v + 25 * diag_offset[i];
176: PetscCall(PetscArraycpy(diag, odiag, 25));
177: PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
178: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
179: diag += 25;
180: }
181: break;
182: case 6:
183: for (i = 0; i < mbs; i++) {
184: odiag = v + 36 * diag_offset[i];
185: PetscCall(PetscArraycpy(diag, odiag, 36));
186: PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
187: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
188: diag += 36;
189: }
190: break;
191: case 7:
192: for (i = 0; i < mbs; i++) {
193: odiag = v + 49 * diag_offset[i];
194: PetscCall(PetscArraycpy(diag, odiag, 49));
195: PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
196: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197: diag += 49;
198: }
199: break;
200: default:
201: PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
202: for (i = 0; i < mbs; i++) {
203: odiag = v + bs2 * diag_offset[i];
204: PetscCall(PetscArraycpy(diag, odiag, bs2));
205: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
206: if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
207: diag += bs2;
208: }
209: PetscCall(PetscFree2(v_work, v_pivots));
210: }
211: a->idiagvalid = PETSC_TRUE;
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
216: {
217: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
218: PetscScalar *x, *work, *w, *workt, *t;
219: const MatScalar *v, *aa = a->a, *idiag;
220: const PetscScalar *b, *xb;
221: PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
222: PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
223: const PetscInt *diag, *ai = a->i, *aj = a->j, *vi;
225: PetscFunctionBegin;
226: its = its * lits;
227: PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
228: PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
229: PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
230: PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
231: PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
233: if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
235: if (!m) PetscFunctionReturn(PETSC_SUCCESS);
236: diag = a->diag;
237: idiag = a->idiag;
238: k = PetscMax(A->rmap->n, A->cmap->n);
239: if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
240: if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
241: if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
242: work = a->mult_work;
243: t = a->sor_workt;
244: w = a->sor_work;
246: PetscCall(VecGetArray(xx, &x));
247: PetscCall(VecGetArrayRead(bb, &b));
249: if (flag & SOR_ZERO_INITIAL_GUESS) {
250: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
251: switch (bs) {
252: case 1:
253: PetscKernel_v_gets_A_times_w_1(x, idiag, b);
254: t[0] = b[0];
255: i2 = 1;
256: idiag += 1;
257: for (i = 1; i < m; i++) {
258: v = aa + ai[i];
259: vi = aj + ai[i];
260: nz = diag[i] - ai[i];
261: s[0] = b[i2];
262: for (j = 0; j < nz; j++) {
263: xw[0] = x[vi[j]];
264: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
265: }
266: t[i2] = s[0];
267: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
268: x[i2] = xw[0];
269: idiag += 1;
270: i2 += 1;
271: }
272: break;
273: case 2:
274: PetscKernel_v_gets_A_times_w_2(x, idiag, b);
275: t[0] = b[0];
276: t[1] = b[1];
277: i2 = 2;
278: idiag += 4;
279: for (i = 1; i < m; i++) {
280: v = aa + 4 * ai[i];
281: vi = aj + ai[i];
282: nz = diag[i] - ai[i];
283: s[0] = b[i2];
284: s[1] = b[i2 + 1];
285: for (j = 0; j < nz; j++) {
286: idx = 2 * vi[j];
287: it = 4 * j;
288: xw[0] = x[idx];
289: xw[1] = x[1 + idx];
290: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
291: }
292: t[i2] = s[0];
293: t[i2 + 1] = s[1];
294: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
295: x[i2] = xw[0];
296: x[i2 + 1] = xw[1];
297: idiag += 4;
298: i2 += 2;
299: }
300: break;
301: case 3:
302: PetscKernel_v_gets_A_times_w_3(x, idiag, b);
303: t[0] = b[0];
304: t[1] = b[1];
305: t[2] = b[2];
306: i2 = 3;
307: idiag += 9;
308: for (i = 1; i < m; i++) {
309: v = aa + 9 * ai[i];
310: vi = aj + ai[i];
311: nz = diag[i] - ai[i];
312: s[0] = b[i2];
313: s[1] = b[i2 + 1];
314: s[2] = b[i2 + 2];
315: while (nz--) {
316: idx = 3 * (*vi++);
317: xw[0] = x[idx];
318: xw[1] = x[1 + idx];
319: xw[2] = x[2 + idx];
320: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
321: v += 9;
322: }
323: t[i2] = s[0];
324: t[i2 + 1] = s[1];
325: t[i2 + 2] = s[2];
326: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
327: x[i2] = xw[0];
328: x[i2 + 1] = xw[1];
329: x[i2 + 2] = xw[2];
330: idiag += 9;
331: i2 += 3;
332: }
333: break;
334: case 4:
335: PetscKernel_v_gets_A_times_w_4(x, idiag, b);
336: t[0] = b[0];
337: t[1] = b[1];
338: t[2] = b[2];
339: t[3] = b[3];
340: i2 = 4;
341: idiag += 16;
342: for (i = 1; i < m; i++) {
343: v = aa + 16 * ai[i];
344: vi = aj + ai[i];
345: nz = diag[i] - ai[i];
346: s[0] = b[i2];
347: s[1] = b[i2 + 1];
348: s[2] = b[i2 + 2];
349: s[3] = b[i2 + 3];
350: while (nz--) {
351: idx = 4 * (*vi++);
352: xw[0] = x[idx];
353: xw[1] = x[1 + idx];
354: xw[2] = x[2 + idx];
355: xw[3] = x[3 + idx];
356: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
357: v += 16;
358: }
359: t[i2] = s[0];
360: t[i2 + 1] = s[1];
361: t[i2 + 2] = s[2];
362: t[i2 + 3] = s[3];
363: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
364: x[i2] = xw[0];
365: x[i2 + 1] = xw[1];
366: x[i2 + 2] = xw[2];
367: x[i2 + 3] = xw[3];
368: idiag += 16;
369: i2 += 4;
370: }
371: break;
372: case 5:
373: PetscKernel_v_gets_A_times_w_5(x, idiag, b);
374: t[0] = b[0];
375: t[1] = b[1];
376: t[2] = b[2];
377: t[3] = b[3];
378: t[4] = b[4];
379: i2 = 5;
380: idiag += 25;
381: for (i = 1; i < m; i++) {
382: v = aa + 25 * ai[i];
383: vi = aj + ai[i];
384: nz = diag[i] - ai[i];
385: s[0] = b[i2];
386: s[1] = b[i2 + 1];
387: s[2] = b[i2 + 2];
388: s[3] = b[i2 + 3];
389: s[4] = b[i2 + 4];
390: while (nz--) {
391: idx = 5 * (*vi++);
392: xw[0] = x[idx];
393: xw[1] = x[1 + idx];
394: xw[2] = x[2 + idx];
395: xw[3] = x[3 + idx];
396: xw[4] = x[4 + idx];
397: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
398: v += 25;
399: }
400: t[i2] = s[0];
401: t[i2 + 1] = s[1];
402: t[i2 + 2] = s[2];
403: t[i2 + 3] = s[3];
404: t[i2 + 4] = s[4];
405: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
406: x[i2] = xw[0];
407: x[i2 + 1] = xw[1];
408: x[i2 + 2] = xw[2];
409: x[i2 + 3] = xw[3];
410: x[i2 + 4] = xw[4];
411: idiag += 25;
412: i2 += 5;
413: }
414: break;
415: case 6:
416: PetscKernel_v_gets_A_times_w_6(x, idiag, b);
417: t[0] = b[0];
418: t[1] = b[1];
419: t[2] = b[2];
420: t[3] = b[3];
421: t[4] = b[4];
422: t[5] = b[5];
423: i2 = 6;
424: idiag += 36;
425: for (i = 1; i < m; i++) {
426: v = aa + 36 * ai[i];
427: vi = aj + ai[i];
428: nz = diag[i] - ai[i];
429: s[0] = b[i2];
430: s[1] = b[i2 + 1];
431: s[2] = b[i2 + 2];
432: s[3] = b[i2 + 3];
433: s[4] = b[i2 + 4];
434: s[5] = b[i2 + 5];
435: while (nz--) {
436: idx = 6 * (*vi++);
437: xw[0] = x[idx];
438: xw[1] = x[1 + idx];
439: xw[2] = x[2 + idx];
440: xw[3] = x[3 + idx];
441: xw[4] = x[4 + idx];
442: xw[5] = x[5 + idx];
443: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
444: v += 36;
445: }
446: t[i2] = s[0];
447: t[i2 + 1] = s[1];
448: t[i2 + 2] = s[2];
449: t[i2 + 3] = s[3];
450: t[i2 + 4] = s[4];
451: t[i2 + 5] = s[5];
452: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
453: x[i2] = xw[0];
454: x[i2 + 1] = xw[1];
455: x[i2 + 2] = xw[2];
456: x[i2 + 3] = xw[3];
457: x[i2 + 4] = xw[4];
458: x[i2 + 5] = xw[5];
459: idiag += 36;
460: i2 += 6;
461: }
462: break;
463: case 7:
464: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
465: t[0] = b[0];
466: t[1] = b[1];
467: t[2] = b[2];
468: t[3] = b[3];
469: t[4] = b[4];
470: t[5] = b[5];
471: t[6] = b[6];
472: i2 = 7;
473: idiag += 49;
474: for (i = 1; i < m; i++) {
475: v = aa + 49 * ai[i];
476: vi = aj + ai[i];
477: nz = diag[i] - ai[i];
478: s[0] = b[i2];
479: s[1] = b[i2 + 1];
480: s[2] = b[i2 + 2];
481: s[3] = b[i2 + 3];
482: s[4] = b[i2 + 4];
483: s[5] = b[i2 + 5];
484: s[6] = b[i2 + 6];
485: while (nz--) {
486: idx = 7 * (*vi++);
487: xw[0] = x[idx];
488: xw[1] = x[1 + idx];
489: xw[2] = x[2 + idx];
490: xw[3] = x[3 + idx];
491: xw[4] = x[4 + idx];
492: xw[5] = x[5 + idx];
493: xw[6] = x[6 + idx];
494: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
495: v += 49;
496: }
497: t[i2] = s[0];
498: t[i2 + 1] = s[1];
499: t[i2 + 2] = s[2];
500: t[i2 + 3] = s[3];
501: t[i2 + 4] = s[4];
502: t[i2 + 5] = s[5];
503: t[i2 + 6] = s[6];
504: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
505: x[i2] = xw[0];
506: x[i2 + 1] = xw[1];
507: x[i2 + 2] = xw[2];
508: x[i2 + 3] = xw[3];
509: x[i2 + 4] = xw[4];
510: x[i2 + 5] = xw[5];
511: x[i2 + 6] = xw[6];
512: idiag += 49;
513: i2 += 7;
514: }
515: break;
516: default:
517: PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
518: PetscCall(PetscArraycpy(t, b, bs));
519: i2 = bs;
520: idiag += bs2;
521: for (i = 1; i < m; i++) {
522: v = aa + bs2 * ai[i];
523: vi = aj + ai[i];
524: nz = diag[i] - ai[i];
526: PetscCall(PetscArraycpy(w, b + i2, bs));
527: /* copy all rows of x that are needed into contiguous space */
528: workt = work;
529: for (j = 0; j < nz; j++) {
530: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
531: workt += bs;
532: }
533: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
534: PetscCall(PetscArraycpy(t + i2, w, bs));
535: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
537: idiag += bs2;
538: i2 += bs;
539: }
540: break;
541: }
542: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
543: PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
544: xb = t;
545: } else xb = b;
546: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
547: idiag = a->idiag + bs2 * (a->mbs - 1);
548: i2 = bs * (m - 1);
549: switch (bs) {
550: case 1:
551: s[0] = xb[i2];
552: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
553: x[i2] = xw[0];
554: i2 -= 1;
555: for (i = m - 2; i >= 0; i--) {
556: v = aa + (diag[i] + 1);
557: vi = aj + diag[i] + 1;
558: nz = ai[i + 1] - diag[i] - 1;
559: s[0] = xb[i2];
560: for (j = 0; j < nz; j++) {
561: xw[0] = x[vi[j]];
562: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
563: }
564: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
565: x[i2] = xw[0];
566: idiag -= 1;
567: i2 -= 1;
568: }
569: break;
570: case 2:
571: s[0] = xb[i2];
572: s[1] = xb[i2 + 1];
573: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
574: x[i2] = xw[0];
575: x[i2 + 1] = xw[1];
576: i2 -= 2;
577: idiag -= 4;
578: for (i = m - 2; i >= 0; i--) {
579: v = aa + 4 * (diag[i] + 1);
580: vi = aj + diag[i] + 1;
581: nz = ai[i + 1] - diag[i] - 1;
582: s[0] = xb[i2];
583: s[1] = xb[i2 + 1];
584: for (j = 0; j < nz; j++) {
585: idx = 2 * vi[j];
586: it = 4 * j;
587: xw[0] = x[idx];
588: xw[1] = x[1 + idx];
589: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
590: }
591: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
592: x[i2] = xw[0];
593: x[i2 + 1] = xw[1];
594: idiag -= 4;
595: i2 -= 2;
596: }
597: break;
598: case 3:
599: s[0] = xb[i2];
600: s[1] = xb[i2 + 1];
601: s[2] = xb[i2 + 2];
602: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
603: x[i2] = xw[0];
604: x[i2 + 1] = xw[1];
605: x[i2 + 2] = xw[2];
606: i2 -= 3;
607: idiag -= 9;
608: for (i = m - 2; i >= 0; i--) {
609: v = aa + 9 * (diag[i] + 1);
610: vi = aj + diag[i] + 1;
611: nz = ai[i + 1] - diag[i] - 1;
612: s[0] = xb[i2];
613: s[1] = xb[i2 + 1];
614: s[2] = xb[i2 + 2];
615: while (nz--) {
616: idx = 3 * (*vi++);
617: xw[0] = x[idx];
618: xw[1] = x[1 + idx];
619: xw[2] = x[2 + idx];
620: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
621: v += 9;
622: }
623: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
624: x[i2] = xw[0];
625: x[i2 + 1] = xw[1];
626: x[i2 + 2] = xw[2];
627: idiag -= 9;
628: i2 -= 3;
629: }
630: break;
631: case 4:
632: s[0] = xb[i2];
633: s[1] = xb[i2 + 1];
634: s[2] = xb[i2 + 2];
635: s[3] = xb[i2 + 3];
636: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
637: x[i2] = xw[0];
638: x[i2 + 1] = xw[1];
639: x[i2 + 2] = xw[2];
640: x[i2 + 3] = xw[3];
641: i2 -= 4;
642: idiag -= 16;
643: for (i = m - 2; i >= 0; i--) {
644: v = aa + 16 * (diag[i] + 1);
645: vi = aj + diag[i] + 1;
646: nz = ai[i + 1] - diag[i] - 1;
647: s[0] = xb[i2];
648: s[1] = xb[i2 + 1];
649: s[2] = xb[i2 + 2];
650: s[3] = xb[i2 + 3];
651: while (nz--) {
652: idx = 4 * (*vi++);
653: xw[0] = x[idx];
654: xw[1] = x[1 + idx];
655: xw[2] = x[2 + idx];
656: xw[3] = x[3 + idx];
657: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
658: v += 16;
659: }
660: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
661: x[i2] = xw[0];
662: x[i2 + 1] = xw[1];
663: x[i2 + 2] = xw[2];
664: x[i2 + 3] = xw[3];
665: idiag -= 16;
666: i2 -= 4;
667: }
668: break;
669: case 5:
670: s[0] = xb[i2];
671: s[1] = xb[i2 + 1];
672: s[2] = xb[i2 + 2];
673: s[3] = xb[i2 + 3];
674: s[4] = xb[i2 + 4];
675: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
676: x[i2] = xw[0];
677: x[i2 + 1] = xw[1];
678: x[i2 + 2] = xw[2];
679: x[i2 + 3] = xw[3];
680: x[i2 + 4] = xw[4];
681: i2 -= 5;
682: idiag -= 25;
683: for (i = m - 2; i >= 0; i--) {
684: v = aa + 25 * (diag[i] + 1);
685: vi = aj + diag[i] + 1;
686: nz = ai[i + 1] - diag[i] - 1;
687: s[0] = xb[i2];
688: s[1] = xb[i2 + 1];
689: s[2] = xb[i2 + 2];
690: s[3] = xb[i2 + 3];
691: s[4] = xb[i2 + 4];
692: while (nz--) {
693: idx = 5 * (*vi++);
694: xw[0] = x[idx];
695: xw[1] = x[1 + idx];
696: xw[2] = x[2 + idx];
697: xw[3] = x[3 + idx];
698: xw[4] = x[4 + idx];
699: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
700: v += 25;
701: }
702: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
703: x[i2] = xw[0];
704: x[i2 + 1] = xw[1];
705: x[i2 + 2] = xw[2];
706: x[i2 + 3] = xw[3];
707: x[i2 + 4] = xw[4];
708: idiag -= 25;
709: i2 -= 5;
710: }
711: break;
712: case 6:
713: s[0] = xb[i2];
714: s[1] = xb[i2 + 1];
715: s[2] = xb[i2 + 2];
716: s[3] = xb[i2 + 3];
717: s[4] = xb[i2 + 4];
718: s[5] = xb[i2 + 5];
719: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
720: x[i2] = xw[0];
721: x[i2 + 1] = xw[1];
722: x[i2 + 2] = xw[2];
723: x[i2 + 3] = xw[3];
724: x[i2 + 4] = xw[4];
725: x[i2 + 5] = xw[5];
726: i2 -= 6;
727: idiag -= 36;
728: for (i = m - 2; i >= 0; i--) {
729: v = aa + 36 * (diag[i] + 1);
730: vi = aj + diag[i] + 1;
731: nz = ai[i + 1] - diag[i] - 1;
732: s[0] = xb[i2];
733: s[1] = xb[i2 + 1];
734: s[2] = xb[i2 + 2];
735: s[3] = xb[i2 + 3];
736: s[4] = xb[i2 + 4];
737: s[5] = xb[i2 + 5];
738: while (nz--) {
739: idx = 6 * (*vi++);
740: xw[0] = x[idx];
741: xw[1] = x[1 + idx];
742: xw[2] = x[2 + idx];
743: xw[3] = x[3 + idx];
744: xw[4] = x[4 + idx];
745: xw[5] = x[5 + idx];
746: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
747: v += 36;
748: }
749: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
750: x[i2] = xw[0];
751: x[i2 + 1] = xw[1];
752: x[i2 + 2] = xw[2];
753: x[i2 + 3] = xw[3];
754: x[i2 + 4] = xw[4];
755: x[i2 + 5] = xw[5];
756: idiag -= 36;
757: i2 -= 6;
758: }
759: break;
760: case 7:
761: s[0] = xb[i2];
762: s[1] = xb[i2 + 1];
763: s[2] = xb[i2 + 2];
764: s[3] = xb[i2 + 3];
765: s[4] = xb[i2 + 4];
766: s[5] = xb[i2 + 5];
767: s[6] = xb[i2 + 6];
768: PetscKernel_v_gets_A_times_w_7(x, idiag, b);
769: x[i2] = xw[0];
770: x[i2 + 1] = xw[1];
771: x[i2 + 2] = xw[2];
772: x[i2 + 3] = xw[3];
773: x[i2 + 4] = xw[4];
774: x[i2 + 5] = xw[5];
775: x[i2 + 6] = xw[6];
776: i2 -= 7;
777: idiag -= 49;
778: for (i = m - 2; i >= 0; i--) {
779: v = aa + 49 * (diag[i] + 1);
780: vi = aj + diag[i] + 1;
781: nz = ai[i + 1] - diag[i] - 1;
782: s[0] = xb[i2];
783: s[1] = xb[i2 + 1];
784: s[2] = xb[i2 + 2];
785: s[3] = xb[i2 + 3];
786: s[4] = xb[i2 + 4];
787: s[5] = xb[i2 + 5];
788: s[6] = xb[i2 + 6];
789: while (nz--) {
790: idx = 7 * (*vi++);
791: xw[0] = x[idx];
792: xw[1] = x[1 + idx];
793: xw[2] = x[2 + idx];
794: xw[3] = x[3 + idx];
795: xw[4] = x[4 + idx];
796: xw[5] = x[5 + idx];
797: xw[6] = x[6 + idx];
798: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
799: v += 49;
800: }
801: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
802: x[i2] = xw[0];
803: x[i2 + 1] = xw[1];
804: x[i2 + 2] = xw[2];
805: x[i2 + 3] = xw[3];
806: x[i2 + 4] = xw[4];
807: x[i2 + 5] = xw[5];
808: x[i2 + 6] = xw[6];
809: idiag -= 49;
810: i2 -= 7;
811: }
812: break;
813: default:
814: PetscCall(PetscArraycpy(w, xb + i2, bs));
815: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
816: i2 -= bs;
817: idiag -= bs2;
818: for (i = m - 2; i >= 0; i--) {
819: v = aa + bs2 * (diag[i] + 1);
820: vi = aj + diag[i] + 1;
821: nz = ai[i + 1] - diag[i] - 1;
823: PetscCall(PetscArraycpy(w, xb + i2, bs));
824: /* copy all rows of x that are needed into contiguous space */
825: workt = work;
826: for (j = 0; j < nz; j++) {
827: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
828: workt += bs;
829: }
830: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
831: PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
833: idiag -= bs2;
834: i2 -= bs;
835: }
836: break;
837: }
838: PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
839: }
840: its--;
841: }
842: while (its--) {
843: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
844: idiag = a->idiag;
845: i2 = 0;
846: switch (bs) {
847: case 1:
848: for (i = 0; i < m; i++) {
849: v = aa + ai[i];
850: vi = aj + ai[i];
851: nz = ai[i + 1] - ai[i];
852: s[0] = b[i2];
853: for (j = 0; j < nz; j++) {
854: xw[0] = x[vi[j]];
855: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
856: }
857: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
858: x[i2] += xw[0];
859: idiag += 1;
860: i2 += 1;
861: }
862: break;
863: case 2:
864: for (i = 0; i < m; i++) {
865: v = aa + 4 * ai[i];
866: vi = aj + ai[i];
867: nz = ai[i + 1] - ai[i];
868: s[0] = b[i2];
869: s[1] = b[i2 + 1];
870: for (j = 0; j < nz; j++) {
871: idx = 2 * vi[j];
872: it = 4 * j;
873: xw[0] = x[idx];
874: xw[1] = x[1 + idx];
875: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
876: }
877: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
878: x[i2] += xw[0];
879: x[i2 + 1] += xw[1];
880: idiag += 4;
881: i2 += 2;
882: }
883: break;
884: case 3:
885: for (i = 0; i < m; i++) {
886: v = aa + 9 * ai[i];
887: vi = aj + ai[i];
888: nz = ai[i + 1] - ai[i];
889: s[0] = b[i2];
890: s[1] = b[i2 + 1];
891: s[2] = b[i2 + 2];
892: while (nz--) {
893: idx = 3 * (*vi++);
894: xw[0] = x[idx];
895: xw[1] = x[1 + idx];
896: xw[2] = x[2 + idx];
897: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
898: v += 9;
899: }
900: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
901: x[i2] += xw[0];
902: x[i2 + 1] += xw[1];
903: x[i2 + 2] += xw[2];
904: idiag += 9;
905: i2 += 3;
906: }
907: break;
908: case 4:
909: for (i = 0; i < m; i++) {
910: v = aa + 16 * ai[i];
911: vi = aj + ai[i];
912: nz = ai[i + 1] - ai[i];
913: s[0] = b[i2];
914: s[1] = b[i2 + 1];
915: s[2] = b[i2 + 2];
916: s[3] = b[i2 + 3];
917: while (nz--) {
918: idx = 4 * (*vi++);
919: xw[0] = x[idx];
920: xw[1] = x[1 + idx];
921: xw[2] = x[2 + idx];
922: xw[3] = x[3 + idx];
923: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
924: v += 16;
925: }
926: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
927: x[i2] += xw[0];
928: x[i2 + 1] += xw[1];
929: x[i2 + 2] += xw[2];
930: x[i2 + 3] += xw[3];
931: idiag += 16;
932: i2 += 4;
933: }
934: break;
935: case 5:
936: for (i = 0; i < m; i++) {
937: v = aa + 25 * ai[i];
938: vi = aj + ai[i];
939: nz = ai[i + 1] - ai[i];
940: s[0] = b[i2];
941: s[1] = b[i2 + 1];
942: s[2] = b[i2 + 2];
943: s[3] = b[i2 + 3];
944: s[4] = b[i2 + 4];
945: while (nz--) {
946: idx = 5 * (*vi++);
947: xw[0] = x[idx];
948: xw[1] = x[1 + idx];
949: xw[2] = x[2 + idx];
950: xw[3] = x[3 + idx];
951: xw[4] = x[4 + idx];
952: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
953: v += 25;
954: }
955: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
956: x[i2] += xw[0];
957: x[i2 + 1] += xw[1];
958: x[i2 + 2] += xw[2];
959: x[i2 + 3] += xw[3];
960: x[i2 + 4] += xw[4];
961: idiag += 25;
962: i2 += 5;
963: }
964: break;
965: case 6:
966: for (i = 0; i < m; i++) {
967: v = aa + 36 * ai[i];
968: vi = aj + ai[i];
969: nz = ai[i + 1] - ai[i];
970: s[0] = b[i2];
971: s[1] = b[i2 + 1];
972: s[2] = b[i2 + 2];
973: s[3] = b[i2 + 3];
974: s[4] = b[i2 + 4];
975: s[5] = b[i2 + 5];
976: while (nz--) {
977: idx = 6 * (*vi++);
978: xw[0] = x[idx];
979: xw[1] = x[1 + idx];
980: xw[2] = x[2 + idx];
981: xw[3] = x[3 + idx];
982: xw[4] = x[4 + idx];
983: xw[5] = x[5 + idx];
984: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
985: v += 36;
986: }
987: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
988: x[i2] += xw[0];
989: x[i2 + 1] += xw[1];
990: x[i2 + 2] += xw[2];
991: x[i2 + 3] += xw[3];
992: x[i2 + 4] += xw[4];
993: x[i2 + 5] += xw[5];
994: idiag += 36;
995: i2 += 6;
996: }
997: break;
998: case 7:
999: for (i = 0; i < m; i++) {
1000: v = aa + 49 * ai[i];
1001: vi = aj + ai[i];
1002: nz = ai[i + 1] - ai[i];
1003: s[0] = b[i2];
1004: s[1] = b[i2 + 1];
1005: s[2] = b[i2 + 2];
1006: s[3] = b[i2 + 3];
1007: s[4] = b[i2 + 4];
1008: s[5] = b[i2 + 5];
1009: s[6] = b[i2 + 6];
1010: while (nz--) {
1011: idx = 7 * (*vi++);
1012: xw[0] = x[idx];
1013: xw[1] = x[1 + idx];
1014: xw[2] = x[2 + idx];
1015: xw[3] = x[3 + idx];
1016: xw[4] = x[4 + idx];
1017: xw[5] = x[5 + idx];
1018: xw[6] = x[6 + idx];
1019: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1020: v += 49;
1021: }
1022: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1023: x[i2] += xw[0];
1024: x[i2 + 1] += xw[1];
1025: x[i2 + 2] += xw[2];
1026: x[i2 + 3] += xw[3];
1027: x[i2 + 4] += xw[4];
1028: x[i2 + 5] += xw[5];
1029: x[i2 + 6] += xw[6];
1030: idiag += 49;
1031: i2 += 7;
1032: }
1033: break;
1034: default:
1035: for (i = 0; i < m; i++) {
1036: v = aa + bs2 * ai[i];
1037: vi = aj + ai[i];
1038: nz = ai[i + 1] - ai[i];
1040: PetscCall(PetscArraycpy(w, b + i2, bs));
1041: /* copy all rows of x that are needed into contiguous space */
1042: workt = work;
1043: for (j = 0; j < nz; j++) {
1044: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1045: workt += bs;
1046: }
1047: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1048: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1050: idiag += bs2;
1051: i2 += bs;
1052: }
1053: break;
1054: }
1055: PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1056: }
1057: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1058: idiag = a->idiag + bs2 * (a->mbs - 1);
1059: i2 = bs * (m - 1);
1060: switch (bs) {
1061: case 1:
1062: for (i = m - 1; i >= 0; i--) {
1063: v = aa + ai[i];
1064: vi = aj + ai[i];
1065: nz = ai[i + 1] - ai[i];
1066: s[0] = b[i2];
1067: for (j = 0; j < nz; j++) {
1068: xw[0] = x[vi[j]];
1069: PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1070: }
1071: PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1072: x[i2] += xw[0];
1073: idiag -= 1;
1074: i2 -= 1;
1075: }
1076: break;
1077: case 2:
1078: for (i = m - 1; i >= 0; i--) {
1079: v = aa + 4 * ai[i];
1080: vi = aj + ai[i];
1081: nz = ai[i + 1] - ai[i];
1082: s[0] = b[i2];
1083: s[1] = b[i2 + 1];
1084: for (j = 0; j < nz; j++) {
1085: idx = 2 * vi[j];
1086: it = 4 * j;
1087: xw[0] = x[idx];
1088: xw[1] = x[1 + idx];
1089: PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1090: }
1091: PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
1092: x[i2] += xw[0];
1093: x[i2 + 1] += xw[1];
1094: idiag -= 4;
1095: i2 -= 2;
1096: }
1097: break;
1098: case 3:
1099: for (i = m - 1; i >= 0; i--) {
1100: v = aa + 9 * ai[i];
1101: vi = aj + ai[i];
1102: nz = ai[i + 1] - ai[i];
1103: s[0] = b[i2];
1104: s[1] = b[i2 + 1];
1105: s[2] = b[i2 + 2];
1106: while (nz--) {
1107: idx = 3 * (*vi++);
1108: xw[0] = x[idx];
1109: xw[1] = x[1 + idx];
1110: xw[2] = x[2 + idx];
1111: PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1112: v += 9;
1113: }
1114: PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
1115: x[i2] += xw[0];
1116: x[i2 + 1] += xw[1];
1117: x[i2 + 2] += xw[2];
1118: idiag -= 9;
1119: i2 -= 3;
1120: }
1121: break;
1122: case 4:
1123: for (i = m - 1; i >= 0; i--) {
1124: v = aa + 16 * ai[i];
1125: vi = aj + ai[i];
1126: nz = ai[i + 1] - ai[i];
1127: s[0] = b[i2];
1128: s[1] = b[i2 + 1];
1129: s[2] = b[i2 + 2];
1130: s[3] = b[i2 + 3];
1131: while (nz--) {
1132: idx = 4 * (*vi++);
1133: xw[0] = x[idx];
1134: xw[1] = x[1 + idx];
1135: xw[2] = x[2 + idx];
1136: xw[3] = x[3 + idx];
1137: PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1138: v += 16;
1139: }
1140: PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
1141: x[i2] += xw[0];
1142: x[i2 + 1] += xw[1];
1143: x[i2 + 2] += xw[2];
1144: x[i2 + 3] += xw[3];
1145: idiag -= 16;
1146: i2 -= 4;
1147: }
1148: break;
1149: case 5:
1150: for (i = m - 1; i >= 0; i--) {
1151: v = aa + 25 * ai[i];
1152: vi = aj + ai[i];
1153: nz = ai[i + 1] - ai[i];
1154: s[0] = b[i2];
1155: s[1] = b[i2 + 1];
1156: s[2] = b[i2 + 2];
1157: s[3] = b[i2 + 3];
1158: s[4] = b[i2 + 4];
1159: while (nz--) {
1160: idx = 5 * (*vi++);
1161: xw[0] = x[idx];
1162: xw[1] = x[1 + idx];
1163: xw[2] = x[2 + idx];
1164: xw[3] = x[3 + idx];
1165: xw[4] = x[4 + idx];
1166: PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1167: v += 25;
1168: }
1169: PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
1170: x[i2] += xw[0];
1171: x[i2 + 1] += xw[1];
1172: x[i2 + 2] += xw[2];
1173: x[i2 + 3] += xw[3];
1174: x[i2 + 4] += xw[4];
1175: idiag -= 25;
1176: i2 -= 5;
1177: }
1178: break;
1179: case 6:
1180: for (i = m - 1; i >= 0; i--) {
1181: v = aa + 36 * ai[i];
1182: vi = aj + ai[i];
1183: nz = ai[i + 1] - ai[i];
1184: s[0] = b[i2];
1185: s[1] = b[i2 + 1];
1186: s[2] = b[i2 + 2];
1187: s[3] = b[i2 + 3];
1188: s[4] = b[i2 + 4];
1189: s[5] = b[i2 + 5];
1190: while (nz--) {
1191: idx = 6 * (*vi++);
1192: xw[0] = x[idx];
1193: xw[1] = x[1 + idx];
1194: xw[2] = x[2 + idx];
1195: xw[3] = x[3 + idx];
1196: xw[4] = x[4 + idx];
1197: xw[5] = x[5 + idx];
1198: PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1199: v += 36;
1200: }
1201: PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
1202: x[i2] += xw[0];
1203: x[i2 + 1] += xw[1];
1204: x[i2 + 2] += xw[2];
1205: x[i2 + 3] += xw[3];
1206: x[i2 + 4] += xw[4];
1207: x[i2 + 5] += xw[5];
1208: idiag -= 36;
1209: i2 -= 6;
1210: }
1211: break;
1212: case 7:
1213: for (i = m - 1; i >= 0; i--) {
1214: v = aa + 49 * ai[i];
1215: vi = aj + ai[i];
1216: nz = ai[i + 1] - ai[i];
1217: s[0] = b[i2];
1218: s[1] = b[i2 + 1];
1219: s[2] = b[i2 + 2];
1220: s[3] = b[i2 + 3];
1221: s[4] = b[i2 + 4];
1222: s[5] = b[i2 + 5];
1223: s[6] = b[i2 + 6];
1224: while (nz--) {
1225: idx = 7 * (*vi++);
1226: xw[0] = x[idx];
1227: xw[1] = x[1 + idx];
1228: xw[2] = x[2 + idx];
1229: xw[3] = x[3 + idx];
1230: xw[4] = x[4 + idx];
1231: xw[5] = x[5 + idx];
1232: xw[6] = x[6 + idx];
1233: PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1234: v += 49;
1235: }
1236: PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
1237: x[i2] += xw[0];
1238: x[i2 + 1] += xw[1];
1239: x[i2 + 2] += xw[2];
1240: x[i2 + 3] += xw[3];
1241: x[i2 + 4] += xw[4];
1242: x[i2 + 5] += xw[5];
1243: x[i2 + 6] += xw[6];
1244: idiag -= 49;
1245: i2 -= 7;
1246: }
1247: break;
1248: default:
1249: for (i = m - 1; i >= 0; i--) {
1250: v = aa + bs2 * ai[i];
1251: vi = aj + ai[i];
1252: nz = ai[i + 1] - ai[i];
1254: PetscCall(PetscArraycpy(w, b + i2, bs));
1255: /* copy all rows of x that are needed into contiguous space */
1256: workt = work;
1257: for (j = 0; j < nz; j++) {
1258: PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1259: workt += bs;
1260: }
1261: PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1262: PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1264: idiag -= bs2;
1265: i2 -= bs;
1266: }
1267: break;
1268: }
1269: PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1270: }
1271: }
1272: PetscCall(VecRestoreArray(xx, &x));
1273: PetscCall(VecRestoreArrayRead(bb, &b));
1274: PetscFunctionReturn(PETSC_SUCCESS);
1275: }
1277: /*
1278: Special version for direct calls from Fortran (Used in PETSc-fun3d)
1279: */
1280: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1281: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1282: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1283: #define matsetvaluesblocked4_ matsetvaluesblocked4
1284: #endif
1286: PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1287: {
1288: Mat A = *AA;
1289: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1290: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1291: PetscInt *ai = a->i, *ailen = a->ilen;
1292: PetscInt *aj = a->j, stepval, lastcol = -1;
1293: const PetscScalar *value = v;
1294: MatScalar *ap, *aa = a->a, *bap;
1296: PetscFunctionBegin;
1297: if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1298: stepval = (n - 1) * 4;
1299: for (k = 0; k < m; k++) { /* loop over added rows */
1300: row = im[k];
1301: rp = aj + ai[row];
1302: ap = aa + 16 * ai[row];
1303: nrow = ailen[row];
1304: low = 0;
1305: high = nrow;
1306: for (l = 0; l < n; l++) { /* loop over added columns */
1307: col = in[l];
1308: if (col <= lastcol) low = 0;
1309: else high = nrow;
1310: lastcol = col;
1311: value = v + k * (stepval + 4 + l) * 4;
1312: while (high - low > 7) {
1313: t = (low + high) / 2;
1314: if (rp[t] > col) high = t;
1315: else low = t;
1316: }
1317: for (i = low; i < high; i++) {
1318: if (rp[i] > col) break;
1319: if (rp[i] == col) {
1320: bap = ap + 16 * i;
1321: for (ii = 0; ii < 4; ii++, value += stepval) {
1322: for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1323: }
1324: goto noinsert2;
1325: }
1326: }
1327: N = nrow++ - 1;
1328: high++; /* added new column index thus must search to one higher than before */
1329: /* shift up all the later entries in this row */
1330: for (ii = N; ii >= i; ii--) {
1331: rp[ii + 1] = rp[ii];
1332: PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1333: }
1334: if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1335: rp[i] = col;
1336: bap = ap + 16 * i;
1337: for (ii = 0; ii < 4; ii++, value += stepval) {
1338: for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1339: }
1340: noinsert2:;
1341: low = i;
1342: }
1343: ailen[row] = nrow;
1344: }
1345: PetscFunctionReturnVoid();
1346: }
1348: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1349: #define matsetvalues4_ MATSETVALUES4
1350: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1351: #define matsetvalues4_ matsetvalues4
1352: #endif
1354: PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1355: {
1356: Mat A = *AA;
1357: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1358: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1359: PetscInt *ai = a->i, *ailen = a->ilen;
1360: PetscInt *aj = a->j, brow, bcol;
1361: PetscInt ridx, cidx, lastcol = -1;
1362: MatScalar *ap, value, *aa = a->a, *bap;
1364: PetscFunctionBegin;
1365: for (k = 0; k < m; k++) { /* loop over added rows */
1366: row = im[k];
1367: brow = row / 4;
1368: rp = aj + ai[brow];
1369: ap = aa + 16 * ai[brow];
1370: nrow = ailen[brow];
1371: low = 0;
1372: high = nrow;
1373: for (l = 0; l < n; l++) { /* loop over added columns */
1374: col = in[l];
1375: bcol = col / 4;
1376: ridx = row % 4;
1377: cidx = col % 4;
1378: value = v[l + k * n];
1379: if (col <= lastcol) low = 0;
1380: else high = nrow;
1381: lastcol = col;
1382: while (high - low > 7) {
1383: t = (low + high) / 2;
1384: if (rp[t] > bcol) high = t;
1385: else low = t;
1386: }
1387: for (i = low; i < high; i++) {
1388: if (rp[i] > bcol) break;
1389: if (rp[i] == bcol) {
1390: bap = ap + 16 * i + 4 * cidx + ridx;
1391: *bap += value;
1392: goto noinsert1;
1393: }
1394: }
1395: N = nrow++ - 1;
1396: high++; /* added new column thus must search to one higher than before */
1397: /* shift up all the later entries in this row */
1398: PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
1399: PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
1400: PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1401: rp[i] = bcol;
1402: ap[16 * i + 4 * cidx + ridx] = value;
1403: noinsert1:;
1404: low = i;
1405: }
1406: ailen[brow] = nrow;
1407: }
1408: PetscFunctionReturnVoid();
1409: }
1411: /*
1412: Checks for missing diagonals
1413: */
1414: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1415: {
1416: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1417: PetscInt *diag, *ii = a->i, i;
1419: PetscFunctionBegin;
1420: PetscCall(MatMarkDiagonal_SeqBAIJ(A));
1421: *missing = PETSC_FALSE;
1422: if (A->rmap->n > 0 && !ii) {
1423: *missing = PETSC_TRUE;
1424: if (d) *d = 0;
1425: PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
1426: } else {
1427: PetscInt n;
1428: n = PetscMin(a->mbs, a->nbs);
1429: diag = a->diag;
1430: for (i = 0; i < n; i++) {
1431: if (diag[i] >= ii[i + 1]) {
1432: *missing = PETSC_TRUE;
1433: if (d) *d = i;
1434: PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1435: break;
1436: }
1437: }
1438: }
1439: PetscFunctionReturn(PETSC_SUCCESS);
1440: }
1442: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1443: {
1444: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1445: PetscInt i, j, m = a->mbs;
1447: PetscFunctionBegin;
1448: if (!a->diag) {
1449: PetscCall(PetscMalloc1(m, &a->diag));
1450: a->free_diag = PETSC_TRUE;
1451: }
1452: for (i = 0; i < m; i++) {
1453: a->diag[i] = a->i[i + 1];
1454: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1455: if (a->j[j] == i) {
1456: a->diag[i] = j;
1457: break;
1458: }
1459: }
1460: }
1461: PetscFunctionReturn(PETSC_SUCCESS);
1462: }
1464: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1465: {
1466: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1467: PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
1468: PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
1470: PetscFunctionBegin;
1471: *nn = n;
1472: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1473: if (symmetric) {
1474: PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1475: nz = tia[n];
1476: } else {
1477: tia = a->i;
1478: tja = a->j;
1479: }
1481: if (!blockcompressed && bs > 1) {
1482: (*nn) *= bs;
1483: /* malloc & create the natural set of indices */
1484: PetscCall(PetscMalloc1((n + 1) * bs, ia));
1485: if (n) {
1486: (*ia)[0] = oshift;
1487: for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
1488: }
1490: for (i = 1; i < n; i++) {
1491: (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1492: for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
1493: }
1494: if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1496: if (inja) {
1497: PetscCall(PetscMalloc1(nz * bs * bs, ja));
1498: cnt = 0;
1499: for (i = 0; i < n; i++) {
1500: for (j = 0; j < bs; j++) {
1501: for (k = tia[i]; k < tia[i + 1]; k++) {
1502: for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
1503: }
1504: }
1505: }
1506: }
1508: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1509: PetscCall(PetscFree(tia));
1510: PetscCall(PetscFree(tja));
1511: }
1512: } else if (oshift == 1) {
1513: if (symmetric) {
1514: nz = tia[A->rmap->n / bs];
1515: /* add 1 to i and j indices */
1516: for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1517: *ia = tia;
1518: if (ja) {
1519: for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1520: *ja = tja;
1521: }
1522: } else {
1523: nz = a->i[A->rmap->n / bs];
1524: /* malloc space and add 1 to i and j indices */
1525: PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1526: for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1527: if (ja) {
1528: PetscCall(PetscMalloc1(nz, ja));
1529: for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1530: }
1531: }
1532: } else {
1533: *ia = tia;
1534: if (ja) *ja = tja;
1535: }
1536: PetscFunctionReturn(PETSC_SUCCESS);
1537: }
1539: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1540: {
1541: PetscFunctionBegin;
1542: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
1543: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1544: PetscCall(PetscFree(*ia));
1545: if (ja) PetscCall(PetscFree(*ja));
1546: }
1547: PetscFunctionReturn(PETSC_SUCCESS);
1548: }
1550: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1551: {
1552: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1554: PetscFunctionBegin;
1555: if (A->hash_active) {
1556: PetscInt bs;
1557: A->ops[0] = a->cops;
1558: PetscCall(PetscHMapIJVDestroy(&a->ht));
1559: PetscCall(MatGetBlockSize(A, &bs));
1560: if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht));
1561: PetscCall(PetscFree(a->dnz));
1562: PetscCall(PetscFree(a->bdnz));
1563: A->hash_active = PETSC_FALSE;
1564: }
1565: PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz));
1566: PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
1567: PetscCall(ISDestroy(&a->row));
1568: PetscCall(ISDestroy(&a->col));
1569: if (a->free_diag) PetscCall(PetscFree(a->diag));
1570: PetscCall(PetscFree(a->idiag));
1571: if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
1572: PetscCall(PetscFree(a->solve_work));
1573: PetscCall(PetscFree(a->mult_work));
1574: PetscCall(PetscFree(a->sor_workt));
1575: PetscCall(PetscFree(a->sor_work));
1576: PetscCall(ISDestroy(&a->icol));
1577: PetscCall(PetscFree(a->saved_values));
1578: PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1580: PetscCall(MatDestroy(&a->sbaijMat));
1581: PetscCall(MatDestroy(&a->parent));
1582: PetscCall(PetscFree(A->data));
1584: PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
1585: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
1586: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
1587: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
1588: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
1589: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
1590: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
1591: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
1592: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
1593: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
1594: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
1595: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
1596: #if defined(PETSC_HAVE_HYPRE)
1597: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
1598: #endif
1599: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
1600: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1601: PetscFunctionReturn(PETSC_SUCCESS);
1602: }
1604: static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1605: {
1606: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1608: PetscFunctionBegin;
1609: switch (op) {
1610: case MAT_ROW_ORIENTED:
1611: a->roworiented = flg;
1612: break;
1613: case MAT_KEEP_NONZERO_PATTERN:
1614: a->keepnonzeropattern = flg;
1615: break;
1616: case MAT_NEW_NONZERO_LOCATIONS:
1617: a->nonew = (flg ? 0 : 1);
1618: break;
1619: case MAT_NEW_NONZERO_LOCATION_ERR:
1620: a->nonew = (flg ? -1 : 0);
1621: break;
1622: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1623: a->nonew = (flg ? -2 : 0);
1624: break;
1625: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1626: a->nounused = (flg ? -1 : 0);
1627: break;
1628: default:
1629: break;
1630: }
1631: PetscFunctionReturn(PETSC_SUCCESS);
1632: }
1634: /* used for both SeqBAIJ and SeqSBAIJ matrices */
1635: PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1636: {
1637: PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
1638: MatScalar *aa_i;
1639: PetscScalar *v_i;
1641: PetscFunctionBegin;
1642: bs = A->rmap->bs;
1643: bs2 = bs * bs;
1644: PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
1646: bn = row / bs; /* Block number */
1647: bp = row % bs; /* Block Position */
1648: M = ai[bn + 1] - ai[bn];
1649: *nz = bs * M;
1651: if (v) {
1652: *v = NULL;
1653: if (*nz) {
1654: PetscCall(PetscMalloc1(*nz, v));
1655: for (i = 0; i < M; i++) { /* for each block in the block row */
1656: v_i = *v + i * bs;
1657: aa_i = aa + bs2 * (ai[bn] + i);
1658: for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
1659: }
1660: }
1661: }
1663: if (idx) {
1664: *idx = NULL;
1665: if (*nz) {
1666: PetscCall(PetscMalloc1(*nz, idx));
1667: for (i = 0; i < M; i++) { /* for each block in the block row */
1668: idx_i = *idx + i * bs;
1669: itmp = bs * aj[ai[bn] + i];
1670: for (j = 0; j < bs; j++) idx_i[j] = itmp++;
1671: }
1672: }
1673: }
1674: PetscFunctionReturn(PETSC_SUCCESS);
1675: }
1677: PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1678: {
1679: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1681: PetscFunctionBegin;
1682: PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
1683: PetscFunctionReturn(PETSC_SUCCESS);
1684: }
1686: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1687: {
1688: PetscFunctionBegin;
1689: if (idx) PetscCall(PetscFree(*idx));
1690: if (v) PetscCall(PetscFree(*v));
1691: PetscFunctionReturn(PETSC_SUCCESS);
1692: }
1694: static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695: {
1696: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
1697: Mat C;
1698: PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
1699: PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr;
1700: MatScalar *ata, *aa = a->a;
1702: PetscFunctionBegin;
1703: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1704: PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1706: for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1708: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1709: PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
1710: PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
1711: PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
1713: at = (Mat_SeqBAIJ *)C->data;
1714: ati = at->i;
1715: for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716: } else {
1717: C = *B;
1718: at = (Mat_SeqBAIJ *)C->data;
1719: ati = at->i;
1720: }
1722: atj = at->j;
1723: ata = at->a;
1725: /* Copy ati into atfill so we have locations of the next free space in atj */
1726: PetscCall(PetscArraycpy(atfill, ati, nbs));
1728: /* Walk through A row-wise and mark nonzero entries of A^T. */
1729: for (i = 0; i < mbs; i++) {
1730: anzj = ai[i + 1] - ai[i];
1731: for (j = 0; j < anzj; j++) {
1732: atj[atfill[*aj]] = i;
1733: for (kr = 0; kr < bs; kr++) {
1734: for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
1735: }
1736: atfill[*aj++] += 1;
1737: }
1738: }
1739: PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
1740: PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
1742: /* Clean up temporary space and complete requests. */
1743: PetscCall(PetscFree(atfill));
1745: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1746: PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs));
1747: *B = C;
1748: } else {
1749: PetscCall(MatHeaderMerge(A, &C));
1750: }
1751: PetscFunctionReturn(PETSC_SUCCESS);
1752: }
1754: static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1755: {
1756: Mat Btrans;
1758: PetscFunctionBegin;
1759: *f = PETSC_FALSE;
1760: PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
1761: PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
1762: PetscCall(MatDestroy(&Btrans));
1763: PetscFunctionReturn(PETSC_SUCCESS);
1764: }
1766: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1767: PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1768: {
1769: Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1770: PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1771: PetscInt *rowlens, *colidxs;
1772: PetscScalar *matvals;
1774: PetscFunctionBegin;
1775: PetscCall(PetscViewerSetUp(viewer));
1777: M = mat->rmap->N;
1778: N = mat->cmap->N;
1779: m = mat->rmap->n;
1780: bs = mat->rmap->bs;
1781: nz = bs * bs * A->nz;
1783: /* write matrix header */
1784: header[0] = MAT_FILE_CLASSID;
1785: header[1] = M;
1786: header[2] = N;
1787: header[3] = nz;
1788: PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1790: /* store row lengths */
1791: PetscCall(PetscMalloc1(m, &rowlens));
1792: for (cnt = 0, i = 0; i < A->mbs; i++)
1793: for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
1794: PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
1795: PetscCall(PetscFree(rowlens));
1797: /* store column indices */
1798: PetscCall(PetscMalloc1(nz, &colidxs));
1799: for (cnt = 0, i = 0; i < A->mbs; i++)
1800: for (k = 0; k < bs; k++)
1801: for (j = A->i[i]; j < A->i[i + 1]; j++)
1802: for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
1803: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1804: PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
1805: PetscCall(PetscFree(colidxs));
1807: /* store nonzero values */
1808: PetscCall(PetscMalloc1(nz, &matvals));
1809: for (cnt = 0, i = 0; i < A->mbs; i++)
1810: for (k = 0; k < bs; k++)
1811: for (j = A->i[i]; j < A->i[i + 1]; j++)
1812: for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
1813: PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1814: PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
1815: PetscCall(PetscFree(matvals));
1817: /* write block size option to the viewer's .info file */
1818: PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1819: PetscFunctionReturn(PETSC_SUCCESS);
1820: }
1822: static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1823: {
1824: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1825: PetscInt i, bs = A->rmap->bs, k;
1827: PetscFunctionBegin;
1828: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1829: for (i = 0; i < a->mbs; i++) {
1830: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
1831: for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
1832: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1833: }
1834: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1835: PetscFunctionReturn(PETSC_SUCCESS);
1836: }
1838: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1839: {
1840: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1841: PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1842: PetscViewerFormat format;
1844: PetscFunctionBegin;
1845: if (A->structure_only) {
1846: PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
1847: PetscFunctionReturn(PETSC_SUCCESS);
1848: }
1850: PetscCall(PetscViewerGetFormat(viewer, &format));
1851: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1852: PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs));
1853: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1854: const char *matname;
1855: Mat aij;
1856: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
1857: PetscCall(PetscObjectGetName((PetscObject)A, &matname));
1858: PetscCall(PetscObjectSetName((PetscObject)aij, matname));
1859: PetscCall(MatView(aij, viewer));
1860: PetscCall(MatDestroy(&aij));
1861: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1862: PetscFunctionReturn(PETSC_SUCCESS);
1863: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1864: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1865: for (i = 0; i < a->mbs; i++) {
1866: for (j = 0; j < bs; j++) {
1867: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1868: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1869: for (l = 0; l < bs; l++) {
1870: #if defined(PETSC_USE_COMPLEX)
1871: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1872: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1873: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1874: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1875: } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
1876: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1877: }
1878: #else
1879: if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1880: #endif
1881: }
1882: }
1883: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1884: }
1885: }
1886: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1887: } else {
1888: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1889: for (i = 0; i < a->mbs; i++) {
1890: for (j = 0; j < bs; j++) {
1891: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1892: for (k = a->i[i]; k < a->i[i + 1]; k++) {
1893: for (l = 0; l < bs; l++) {
1894: #if defined(PETSC_USE_COMPLEX)
1895: if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
1896: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1897: } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
1898: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
1899: } else {
1900: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
1901: }
1902: #else
1903: PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
1904: #endif
1905: }
1906: }
1907: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
1908: }
1909: }
1910: PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1911: }
1912: PetscCall(PetscViewerFlush(viewer));
1913: PetscFunctionReturn(PETSC_SUCCESS);
1914: }
1916: #include <petscdraw.h>
1917: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1918: {
1919: Mat A = (Mat)Aa;
1920: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1921: PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
1922: PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r;
1923: MatScalar *aa;
1924: PetscViewer viewer;
1925: PetscViewerFormat format;
1926: int color;
1928: PetscFunctionBegin;
1929: PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
1930: PetscCall(PetscViewerGetFormat(viewer, &format));
1931: PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
1933: /* loop over matrix elements drawing boxes */
1935: if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1936: PetscDrawCollectiveBegin(draw);
1937: /* Blue for negative, Cyan for zero and Red for positive */
1938: color = PETSC_DRAW_BLUE;
1939: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1940: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1941: y_l = A->rmap->N - row - 1.0;
1942: y_r = y_l + 1.0;
1943: x_l = a->j[j] * bs;
1944: x_r = x_l + 1.0;
1945: aa = a->a + j * bs2;
1946: for (k = 0; k < bs; k++) {
1947: for (l = 0; l < bs; l++) {
1948: if (PetscRealPart(*aa++) >= 0.) continue;
1949: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1950: }
1951: }
1952: }
1953: }
1954: color = PETSC_DRAW_CYAN;
1955: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1956: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1957: y_l = A->rmap->N - row - 1.0;
1958: y_r = y_l + 1.0;
1959: x_l = a->j[j] * bs;
1960: x_r = x_l + 1.0;
1961: aa = a->a + j * bs2;
1962: for (k = 0; k < bs; k++) {
1963: for (l = 0; l < bs; l++) {
1964: if (PetscRealPart(*aa++) != 0.) continue;
1965: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1966: }
1967: }
1968: }
1969: }
1970: color = PETSC_DRAW_RED;
1971: for (i = 0, row = 0; i < mbs; i++, row += bs) {
1972: for (j = a->i[i]; j < a->i[i + 1]; j++) {
1973: y_l = A->rmap->N - row - 1.0;
1974: y_r = y_l + 1.0;
1975: x_l = a->j[j] * bs;
1976: x_r = x_l + 1.0;
1977: aa = a->a + j * bs2;
1978: for (k = 0; k < bs; k++) {
1979: for (l = 0; l < bs; l++) {
1980: if (PetscRealPart(*aa++) <= 0.) continue;
1981: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
1982: }
1983: }
1984: }
1985: }
1986: PetscDrawCollectiveEnd(draw);
1987: } else {
1988: /* use contour shading to indicate magnitude of values */
1989: /* first determine max of all nonzero values */
1990: PetscReal minv = 0.0, maxv = 0.0;
1991: PetscDraw popup;
1993: for (i = 0; i < a->nz * a->bs2; i++) {
1994: if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1995: }
1996: if (minv >= maxv) maxv = minv + PETSC_SMALL;
1997: PetscCall(PetscDrawGetPopup(draw, &popup));
1998: PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
2000: PetscDrawCollectiveBegin(draw);
2001: for (i = 0, row = 0; i < mbs; i++, row += bs) {
2002: for (j = a->i[i]; j < a->i[i + 1]; j++) {
2003: y_l = A->rmap->N - row - 1.0;
2004: y_r = y_l + 1.0;
2005: x_l = a->j[j] * bs;
2006: x_r = x_l + 1.0;
2007: aa = a->a + j * bs2;
2008: for (k = 0; k < bs; k++) {
2009: for (l = 0; l < bs; l++) {
2010: MatScalar v = *aa++;
2011: color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
2012: PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2013: }
2014: }
2015: }
2016: }
2017: PetscDrawCollectiveEnd(draw);
2018: }
2019: PetscFunctionReturn(PETSC_SUCCESS);
2020: }
2022: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2023: {
2024: PetscReal xl, yl, xr, yr, w, h;
2025: PetscDraw draw;
2026: PetscBool isnull;
2028: PetscFunctionBegin;
2029: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
2030: PetscCall(PetscDrawIsNull(draw, &isnull));
2031: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
2033: xr = A->cmap->n;
2034: yr = A->rmap->N;
2035: h = yr / 10.0;
2036: w = xr / 10.0;
2037: xr += w;
2038: yr += h;
2039: xl = -w;
2040: yl = -h;
2041: PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
2042: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
2043: PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
2044: PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
2045: PetscCall(PetscDrawSave(draw));
2046: PetscFunctionReturn(PETSC_SUCCESS);
2047: }
2049: PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2050: {
2051: PetscBool isascii, isbinary, isdraw;
2053: PetscFunctionBegin;
2054: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
2055: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2056: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
2057: if (isascii) {
2058: PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
2059: } else if (isbinary) {
2060: PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
2061: } else if (isdraw) {
2062: PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
2063: } else {
2064: Mat B;
2065: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2066: PetscCall(MatView(B, viewer));
2067: PetscCall(MatDestroy(&B));
2068: }
2069: PetscFunctionReturn(PETSC_SUCCESS);
2070: }
2072: PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2073: {
2074: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2075: PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2076: PetscInt *ai = a->i, *ailen = a->ilen;
2077: PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
2078: MatScalar *ap, *aa = a->a;
2080: PetscFunctionBegin;
2081: for (k = 0; k < m; k++) { /* loop over rows */
2082: row = im[k];
2083: brow = row / bs;
2084: if (row < 0) {
2085: v += n;
2086: continue;
2087: } /* negative row */
2088: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2089: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2090: ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2091: nrow = ailen[brow];
2092: for (l = 0; l < n; l++) { /* loop over columns */
2093: if (in[l] < 0) {
2094: v++;
2095: continue;
2096: } /* negative column */
2097: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
2098: col = in[l];
2099: bcol = col / bs;
2100: cidx = col % bs;
2101: ridx = row % bs;
2102: high = nrow;
2103: low = 0; /* assume unsorted */
2104: while (high - low > 5) {
2105: t = (low + high) / 2;
2106: if (rp[t] > bcol) high = t;
2107: else low = t;
2108: }
2109: for (i = low; i < high; i++) {
2110: if (rp[i] > bcol) break;
2111: if (rp[i] == bcol) {
2112: *v++ = ap[bs2 * i + bs * cidx + ridx];
2113: goto finished;
2114: }
2115: }
2116: *v++ = 0.0;
2117: finished:;
2118: }
2119: }
2120: PetscFunctionReturn(PETSC_SUCCESS);
2121: }
2123: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2124: {
2125: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2126: PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2127: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2128: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2129: PetscBool roworiented = a->roworiented;
2130: const PetscScalar *value = v;
2131: MatScalar *ap = NULL, *aa = a->a, *bap;
2133: PetscFunctionBegin;
2134: if (roworiented) {
2135: stepval = (n - 1) * bs;
2136: } else {
2137: stepval = (m - 1) * bs;
2138: }
2139: for (k = 0; k < m; k++) { /* loop over added rows */
2140: row = im[k];
2141: if (row < 0) continue;
2142: PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
2143: rp = aj + ai[row];
2144: if (!A->structure_only) ap = aa + bs2 * ai[row];
2145: rmax = imax[row];
2146: nrow = ailen[row];
2147: low = 0;
2148: high = nrow;
2149: for (l = 0; l < n; l++) { /* loop over added columns */
2150: if (in[l] < 0) continue;
2151: PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
2152: col = in[l];
2153: if (!A->structure_only) {
2154: if (roworiented) {
2155: value = v + (k * (stepval + bs) + l) * bs;
2156: } else {
2157: value = v + (l * (stepval + bs) + k) * bs;
2158: }
2159: }
2160: if (col <= lastcol) low = 0;
2161: else high = nrow;
2162: lastcol = col;
2163: while (high - low > 7) {
2164: t = (low + high) / 2;
2165: if (rp[t] > col) high = t;
2166: else low = t;
2167: }
2168: for (i = low; i < high; i++) {
2169: if (rp[i] > col) break;
2170: if (rp[i] == col) {
2171: if (A->structure_only) goto noinsert2;
2172: bap = ap + bs2 * i;
2173: if (roworiented) {
2174: if (is == ADD_VALUES) {
2175: for (ii = 0; ii < bs; ii++, value += stepval) {
2176: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2177: }
2178: } else {
2179: for (ii = 0; ii < bs; ii++, value += stepval) {
2180: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2181: }
2182: }
2183: } else {
2184: if (is == ADD_VALUES) {
2185: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2186: for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
2187: bap += bs;
2188: }
2189: } else {
2190: for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2191: for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
2192: bap += bs;
2193: }
2194: }
2195: }
2196: goto noinsert2;
2197: }
2198: }
2199: if (nonew == 1) goto noinsert2;
2200: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2201: if (A->structure_only) {
2202: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
2203: } else {
2204: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2205: }
2206: N = nrow++ - 1;
2207: high++;
2208: /* shift up all the later entries in this row */
2209: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2210: rp[i] = col;
2211: if (!A->structure_only) {
2212: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2213: bap = ap + bs2 * i;
2214: if (roworiented) {
2215: for (ii = 0; ii < bs; ii++, value += stepval) {
2216: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2217: }
2218: } else {
2219: for (ii = 0; ii < bs; ii++, value += stepval) {
2220: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2221: }
2222: }
2223: }
2224: noinsert2:;
2225: low = i;
2226: }
2227: ailen[row] = nrow;
2228: }
2229: PetscFunctionReturn(PETSC_SUCCESS);
2230: }
2232: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2233: {
2234: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2235: PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2236: PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen;
2237: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2238: MatScalar *aa = a->a, *ap;
2239: PetscReal ratio = 0.6;
2241: PetscFunctionBegin;
2242: if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS);
2244: if (m) rmax = ailen[0];
2245: for (i = 1; i < mbs; i++) {
2246: /* move each row back by the amount of empty slots (fshift) before it*/
2247: fshift += imax[i - 1] - ailen[i - 1];
2248: rmax = PetscMax(rmax, ailen[i]);
2249: if (fshift) {
2250: ip = aj + ai[i];
2251: ap = aa + bs2 * ai[i];
2252: N = ailen[i];
2253: PetscCall(PetscArraymove(ip - fshift, ip, N));
2254: if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2255: }
2256: ai[i] = ai[i - 1] + ailen[i - 1];
2257: }
2258: if (mbs) {
2259: fshift += imax[mbs - 1] - ailen[mbs - 1];
2260: ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2261: }
2263: /* reset ilen and imax for each row */
2264: a->nonzerorowcnt = 0;
2265: if (A->structure_only) {
2266: PetscCall(PetscFree2(a->imax, a->ilen));
2267: } else { /* !A->structure_only */
2268: for (i = 0; i < mbs; i++) {
2269: ailen[i] = imax[i] = ai[i + 1] - ai[i];
2270: a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2271: }
2272: }
2273: a->nz = ai[mbs];
2275: /* diagonals may have moved, so kill the diagonal pointers */
2276: a->idiagvalid = PETSC_FALSE;
2277: if (fshift && a->diag) PetscCall(PetscFree(a->diag));
2278: if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
2279: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
2280: PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
2281: PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
2283: A->info.mallocs += a->reallocs;
2284: a->reallocs = 0;
2285: A->info.nz_unneeded = (PetscReal)fshift * bs2;
2286: a->rmax = rmax;
2288: if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
2289: PetscFunctionReturn(PETSC_SUCCESS);
2290: }
2292: /*
2293: This function returns an array of flags which indicate the locations of contiguous
2294: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
2295: then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2296: Assume: sizes should be long enough to hold all the values.
2297: */
2298: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2299: {
2300: PetscInt j = 0;
2302: PetscFunctionBegin;
2303: for (PetscInt i = 0; i < n; j++) {
2304: PetscInt row = idx[i];
2305: if (row % bs != 0) { /* Not the beginning of a block */
2306: sizes[j] = 1;
2307: i++;
2308: } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2309: sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */
2310: i++;
2311: } else { /* Beginning of the block, so check if the complete block exists */
2312: PetscBool flg = PETSC_TRUE;
2313: for (PetscInt k = 1; k < bs; k++) {
2314: if (row + k != idx[i + k]) { /* break in the block */
2315: flg = PETSC_FALSE;
2316: break;
2317: }
2318: }
2319: if (flg) { /* No break in the bs */
2320: sizes[j] = bs;
2321: i += bs;
2322: } else {
2323: sizes[j] = 1;
2324: i++;
2325: }
2326: }
2327: }
2328: *bs_max = j;
2329: PetscFunctionReturn(PETSC_SUCCESS);
2330: }
2332: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2333: {
2334: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2335: PetscInt i, j, k, count, *rows;
2336: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
2337: PetscScalar zero = 0.0;
2338: MatScalar *aa;
2339: const PetscScalar *xx;
2340: PetscScalar *bb;
2342: PetscFunctionBegin;
2343: /* fix right-hand side if needed */
2344: if (x && b) {
2345: PetscCall(VecGetArrayRead(x, &xx));
2346: PetscCall(VecGetArray(b, &bb));
2347: for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
2348: PetscCall(VecRestoreArrayRead(x, &xx));
2349: PetscCall(VecRestoreArray(b, &bb));
2350: }
2352: /* Make a copy of the IS and sort it */
2353: /* allocate memory for rows,sizes */
2354: PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2356: /* copy IS values to rows, and sort them */
2357: for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
2358: PetscCall(PetscSortInt(is_n, rows));
2360: if (baij->keepnonzeropattern) {
2361: for (i = 0; i < is_n; i++) sizes[i] = 1;
2362: bs_max = is_n;
2363: } else {
2364: PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2365: A->nonzerostate++;
2366: }
2368: for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2369: row = rows[j];
2370: PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2371: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2372: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2373: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2374: if (diag != (PetscScalar)0.0) {
2375: if (baij->ilen[row / bs] > 0) {
2376: baij->ilen[row / bs] = 1;
2377: baij->j[baij->i[row / bs]] = row / bs;
2379: PetscCall(PetscArrayzero(aa, count * bs));
2380: }
2381: /* Now insert all the diagonal values for this bs */
2382: for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES);
2383: } else { /* (diag == 0.0) */
2384: baij->ilen[row / bs] = 0;
2385: } /* end (diag == 0.0) */
2386: } else { /* (sizes[i] != bs) */
2387: PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2388: for (k = 0; k < count; k++) {
2389: aa[0] = zero;
2390: aa += bs;
2391: }
2392: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES);
2393: }
2394: }
2396: PetscCall(PetscFree2(rows, sizes));
2397: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2398: PetscFunctionReturn(PETSC_SUCCESS);
2399: }
2401: static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2402: {
2403: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data;
2404: PetscInt i, j, k, count;
2405: PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col;
2406: PetscScalar zero = 0.0;
2407: MatScalar *aa;
2408: const PetscScalar *xx;
2409: PetscScalar *bb;
2410: PetscBool *zeroed, vecs = PETSC_FALSE;
2412: PetscFunctionBegin;
2413: /* fix right-hand side if needed */
2414: if (x && b) {
2415: PetscCall(VecGetArrayRead(x, &xx));
2416: PetscCall(VecGetArray(b, &bb));
2417: vecs = PETSC_TRUE;
2418: }
2420: /* zero the columns */
2421: PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
2422: for (i = 0; i < is_n; i++) {
2423: PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
2424: zeroed[is_idx[i]] = PETSC_TRUE;
2425: }
2426: for (i = 0; i < A->rmap->N; i++) {
2427: if (!zeroed[i]) {
2428: row = i / bs;
2429: for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
2430: for (k = 0; k < bs; k++) {
2431: col = bs * baij->j[j] + k;
2432: if (zeroed[col]) {
2433: aa = baij->a + j * bs2 + (i % bs) + bs * k;
2434: if (vecs) bb[i] -= aa[0] * xx[col];
2435: aa[0] = 0.0;
2436: }
2437: }
2438: }
2439: } else if (vecs) bb[i] = diag * xx[i];
2440: }
2441: PetscCall(PetscFree(zeroed));
2442: if (vecs) {
2443: PetscCall(VecRestoreArrayRead(x, &xx));
2444: PetscCall(VecRestoreArray(b, &bb));
2445: }
2447: /* zero the rows */
2448: for (i = 0; i < is_n; i++) {
2449: row = is_idx[i];
2450: count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2451: aa = baij->a + baij->i[row / bs] * bs2 + (row % bs);
2452: for (k = 0; k < count; k++) {
2453: aa[0] = zero;
2454: aa += bs;
2455: }
2456: if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
2457: }
2458: PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
2459: PetscFunctionReturn(PETSC_SUCCESS);
2460: }
2462: PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2463: {
2464: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2465: PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2466: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2467: PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2468: PetscInt ridx, cidx, bs2 = a->bs2;
2469: PetscBool roworiented = a->roworiented;
2470: MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap;
2472: PetscFunctionBegin;
2473: for (k = 0; k < m; k++) { /* loop over added rows */
2474: row = im[k];
2475: brow = row / bs;
2476: if (row < 0) continue;
2477: PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
2478: rp = PetscSafePointerPlusOffset(aj, ai[brow]);
2479: if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]);
2480: rmax = imax[brow];
2481: nrow = ailen[brow];
2482: low = 0;
2483: high = nrow;
2484: for (l = 0; l < n; l++) { /* loop over added columns */
2485: if (in[l] < 0) continue;
2486: PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
2487: col = in[l];
2488: bcol = col / bs;
2489: ridx = row % bs;
2490: cidx = col % bs;
2491: if (!A->structure_only) {
2492: if (roworiented) {
2493: value = v[l + k * n];
2494: } else {
2495: value = v[k + l * m];
2496: }
2497: }
2498: if (col <= lastcol) low = 0;
2499: else high = nrow;
2500: lastcol = col;
2501: while (high - low > 7) {
2502: t = (low + high) / 2;
2503: if (rp[t] > bcol) high = t;
2504: else low = t;
2505: }
2506: for (i = low; i < high; i++) {
2507: if (rp[i] > bcol) break;
2508: if (rp[i] == bcol) {
2509: bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx);
2510: if (!A->structure_only) {
2511: if (is == ADD_VALUES) *bap += value;
2512: else *bap = value;
2513: }
2514: goto noinsert1;
2515: }
2516: }
2517: if (nonew == 1) goto noinsert1;
2518: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2519: if (A->structure_only) {
2520: MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2521: } else {
2522: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2523: }
2524: N = nrow++ - 1;
2525: high++;
2526: /* shift up all the later entries in this row */
2527: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
2528: rp[i] = bcol;
2529: if (!A->structure_only) {
2530: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
2531: PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2532: ap[bs2 * i + bs * cidx + ridx] = value;
2533: }
2534: a->nz++;
2535: noinsert1:;
2536: low = i;
2537: }
2538: ailen[brow] = nrow;
2539: }
2540: PetscFunctionReturn(PETSC_SUCCESS);
2541: }
2543: static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2544: {
2545: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
2546: Mat outA;
2547: PetscBool row_identity, col_identity;
2549: PetscFunctionBegin;
2550: PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
2551: PetscCall(ISIdentity(row, &row_identity));
2552: PetscCall(ISIdentity(col, &col_identity));
2553: PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
2555: outA = inA;
2556: inA->factortype = MAT_FACTOR_LU;
2557: PetscCall(PetscFree(inA->solvertype));
2558: PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
2560: PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2562: PetscCall(PetscObjectReference((PetscObject)row));
2563: PetscCall(ISDestroy(&a->row));
2564: a->row = row;
2565: PetscCall(PetscObjectReference((PetscObject)col));
2566: PetscCall(ISDestroy(&a->col));
2567: a->col = col;
2569: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2570: PetscCall(ISDestroy(&a->icol));
2571: PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2573: PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
2574: if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work));
2575: PetscCall(MatLUFactorNumeric(outA, inA, info));
2576: PetscFunctionReturn(PETSC_SUCCESS);
2577: }
2579: static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices)
2580: {
2581: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2583: PetscFunctionBegin;
2584: baij->nz = baij->maxnz;
2585: PetscCall(PetscArraycpy(baij->j, indices, baij->nz));
2586: PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs));
2587: PetscFunctionReturn(PETSC_SUCCESS);
2588: }
2590: /*@
2591: MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix.
2593: Input Parameters:
2594: + mat - the `MATSEQBAIJ` matrix
2595: - indices - the block column indices
2597: Level: advanced
2599: Notes:
2600: This can be called if you have precomputed the nonzero structure of the
2601: matrix and want to provide it to the matrix object to improve the performance
2602: of the `MatSetValues()` operation.
2604: You MUST have set the correct numbers of nonzeros per row in the call to
2605: `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
2607: MUST be called before any calls to `MatSetValues()`
2609: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()`
2610: @*/
2611: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2612: {
2613: PetscFunctionBegin;
2615: PetscAssertPointer(indices, 2);
2616: PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices));
2617: PetscFunctionReturn(PETSC_SUCCESS);
2618: }
2620: static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2621: {
2622: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2623: PetscInt i, j, n, row, bs, *ai, *aj, mbs;
2624: PetscReal atmp;
2625: PetscScalar *x, zero = 0.0;
2626: MatScalar *aa;
2627: PetscInt ncols, brow, krow, kcol;
2629: PetscFunctionBegin;
2630: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2631: bs = A->rmap->bs;
2632: aa = a->a;
2633: ai = a->i;
2634: aj = a->j;
2635: mbs = a->mbs;
2637: PetscCall(VecSet(v, zero));
2638: PetscCall(VecGetArray(v, &x));
2639: PetscCall(VecGetLocalSize(v, &n));
2640: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2641: for (i = 0; i < mbs; i++) {
2642: ncols = ai[1] - ai[0];
2643: ai++;
2644: brow = bs * i;
2645: for (j = 0; j < ncols; j++) {
2646: for (kcol = 0; kcol < bs; kcol++) {
2647: for (krow = 0; krow < bs; krow++) {
2648: atmp = PetscAbsScalar(*aa);
2649: aa++;
2650: row = brow + krow; /* row index */
2651: if (PetscAbsScalar(x[row]) < atmp) {
2652: x[row] = atmp;
2653: if (idx) idx[row] = bs * (*aj) + kcol;
2654: }
2655: }
2656: }
2657: aj++;
2658: }
2659: }
2660: PetscCall(VecRestoreArray(v, &x));
2661: PetscFunctionReturn(PETSC_SUCCESS);
2662: }
2664: static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v)
2665: {
2666: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2667: PetscInt i, j, n, row, bs, *ai, mbs;
2668: PetscReal atmp;
2669: PetscScalar *x, zero = 0.0;
2670: MatScalar *aa;
2671: PetscInt ncols, brow, krow, kcol;
2673: PetscFunctionBegin;
2674: PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2675: bs = A->rmap->bs;
2676: aa = a->a;
2677: ai = a->i;
2678: mbs = a->mbs;
2680: PetscCall(VecSet(v, zero));
2681: PetscCall(VecGetArrayWrite(v, &x));
2682: PetscCall(VecGetLocalSize(v, &n));
2683: PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2684: for (i = 0; i < mbs; i++) {
2685: ncols = ai[1] - ai[0];
2686: ai++;
2687: brow = bs * i;
2688: for (j = 0; j < ncols; j++) {
2689: for (kcol = 0; kcol < bs; kcol++) {
2690: for (krow = 0; krow < bs; krow++) {
2691: atmp = PetscAbsScalar(*aa);
2692: aa++;
2693: row = brow + krow; /* row index */
2694: x[row] += atmp;
2695: }
2696: }
2697: }
2698: }
2699: PetscCall(VecRestoreArrayWrite(v, &x));
2700: PetscFunctionReturn(PETSC_SUCCESS);
2701: }
2703: static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2704: {
2705: PetscFunctionBegin;
2706: /* If the two matrices have the same copy implementation, use fast copy. */
2707: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2708: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2709: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
2710: PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
2712: PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
2713: PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
2714: PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
2715: PetscCall(PetscObjectStateIncrease((PetscObject)B));
2716: } else {
2717: PetscCall(MatCopy_Basic(A, B, str));
2718: }
2719: PetscFunctionReturn(PETSC_SUCCESS);
2720: }
2722: static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2723: {
2724: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2726: PetscFunctionBegin;
2727: *array = a->a;
2728: PetscFunctionReturn(PETSC_SUCCESS);
2729: }
2731: static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2732: {
2733: PetscFunctionBegin;
2734: *array = NULL;
2735: PetscFunctionReturn(PETSC_SUCCESS);
2736: }
2738: PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2739: {
2740: PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
2741: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
2742: Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
2744: PetscFunctionBegin;
2745: /* Set the number of nonzeros in the new matrix */
2746: PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
2747: PetscFunctionReturn(PETSC_SUCCESS);
2748: }
2750: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2751: {
2752: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
2753: PetscInt bs = Y->rmap->bs, bs2 = bs * bs;
2754: PetscBLASInt one = 1;
2756: PetscFunctionBegin;
2757: if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2758: PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2759: if (e) {
2760: PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2761: if (e) {
2762: PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2763: if (e) str = SAME_NONZERO_PATTERN;
2764: }
2765: }
2766: if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2767: }
2768: if (str == SAME_NONZERO_PATTERN) {
2769: PetscScalar alpha = a;
2770: PetscBLASInt bnz;
2771: PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2772: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
2773: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2774: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2775: PetscCall(MatAXPY_Basic(Y, a, X, str));
2776: } else {
2777: Mat B;
2778: PetscInt *nnz;
2779: PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
2780: PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
2781: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2782: PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2783: PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
2784: PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
2785: PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
2786: PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
2787: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
2788: PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2789: PetscCall(MatHeaderMerge(Y, &B));
2790: PetscCall(PetscFree(nnz));
2791: }
2792: PetscFunctionReturn(PETSC_SUCCESS);
2793: }
2795: PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2796: {
2797: #if PetscDefined(USE_COMPLEX)
2798: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2799: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2800: MatScalar *aa = a->a;
2802: PetscFunctionBegin;
2803: for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
2804: PetscFunctionReturn(PETSC_SUCCESS);
2805: #else
2806: (void)A;
2807: return PETSC_SUCCESS;
2808: #endif
2809: }
2811: static PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2812: {
2813: #if PetscDefined(USE_COMPLEX)
2814: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2815: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2816: MatScalar *aa = a->a;
2818: PetscFunctionBegin;
2819: for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
2820: PetscFunctionReturn(PETSC_SUCCESS);
2821: #else
2822: (void)A;
2823: return PETSC_SUCCESS;
2824: #endif
2825: }
2827: static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2828: {
2829: #if PetscDefined(USE_COMPLEX)
2830: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2831: PetscInt i, nz = a->bs2 * a->i[a->mbs];
2832: MatScalar *aa = a->a;
2834: PetscFunctionBegin;
2835: for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2836: PetscFunctionReturn(PETSC_SUCCESS);
2837: #else
2838: (void)A;
2839: return PETSC_SUCCESS;
2840: #endif
2841: }
2843: /*
2844: Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2845: */
2846: static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2847: {
2848: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2849: PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
2850: PetscInt nz = a->i[m], row, *jj, mr, col;
2852: PetscFunctionBegin;
2853: *nn = n;
2854: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2855: PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
2856: PetscCall(PetscCalloc1(n, &collengths));
2857: PetscCall(PetscMalloc1(n + 1, &cia));
2858: PetscCall(PetscMalloc1(nz, &cja));
2859: jj = a->j;
2860: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2861: cia[0] = oshift;
2862: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2863: PetscCall(PetscArrayzero(collengths, n));
2864: jj = a->j;
2865: for (row = 0; row < m; row++) {
2866: mr = a->i[row + 1] - a->i[row];
2867: for (i = 0; i < mr; i++) {
2868: col = *jj++;
2870: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2871: }
2872: }
2873: PetscCall(PetscFree(collengths));
2874: *ia = cia;
2875: *ja = cja;
2876: PetscFunctionReturn(PETSC_SUCCESS);
2877: }
2879: static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2880: {
2881: PetscFunctionBegin;
2882: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2883: PetscCall(PetscFree(*ia));
2884: PetscCall(PetscFree(*ja));
2885: PetscFunctionReturn(PETSC_SUCCESS);
2886: }
2888: /*
2889: MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2890: MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2891: spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2892: */
2893: PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2894: {
2895: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2896: PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2897: PetscInt nz = a->i[m], row, *jj, mr, col;
2898: PetscInt *cspidx;
2900: PetscFunctionBegin;
2901: *nn = n;
2902: if (!ia) PetscFunctionReturn(PETSC_SUCCESS);
2904: PetscCall(PetscCalloc1(n, &collengths));
2905: PetscCall(PetscMalloc1(n + 1, &cia));
2906: PetscCall(PetscMalloc1(nz, &cja));
2907: PetscCall(PetscMalloc1(nz, &cspidx));
2908: jj = a->j;
2909: for (i = 0; i < nz; i++) collengths[jj[i]]++;
2910: cia[0] = oshift;
2911: for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
2912: PetscCall(PetscArrayzero(collengths, n));
2913: jj = a->j;
2914: for (row = 0; row < m; row++) {
2915: mr = a->i[row + 1] - a->i[row];
2916: for (i = 0; i < mr; i++) {
2917: col = *jj++;
2918: cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2919: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2920: }
2921: }
2922: PetscCall(PetscFree(collengths));
2923: *ia = cia;
2924: *ja = cja;
2925: *spidx = cspidx;
2926: PetscFunctionReturn(PETSC_SUCCESS);
2927: }
2929: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2930: {
2931: PetscFunctionBegin;
2932: PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
2933: PetscCall(PetscFree(*spidx));
2934: PetscFunctionReturn(PETSC_SUCCESS);
2935: }
2937: static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2938: {
2939: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
2941: PetscFunctionBegin;
2942: if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
2943: PetscCall(MatShift_Basic(Y, a));
2944: PetscFunctionReturn(PETSC_SUCCESS);
2945: }
2947: PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep)
2948: {
2949: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2950: PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k;
2951: PetscInt m = A->rmap->N, *ailen = a->ilen;
2952: PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0;
2953: MatScalar *aa = a->a, *ap;
2954: PetscBool zero;
2956: PetscFunctionBegin;
2957: PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix");
2958: if (m) rmax = ailen[0];
2959: for (i = 1; i <= mbs; i++) {
2960: for (k = ai[i - 1]; k < ai[i]; k++) {
2961: zero = PETSC_TRUE;
2962: ap = aa + bs2 * k;
2963: for (j = 0; j < bs2 && zero; j++) {
2964: if (ap[j] != 0.0) zero = PETSC_FALSE;
2965: }
2966: if (zero && (aj[k] != i - 1 || !keep)) fshift++;
2967: else {
2968: if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1));
2969: aj[k - fshift] = aj[k];
2970: PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2));
2971: }
2972: }
2973: ai[i - 1] -= fshift_prev;
2974: fshift_prev = fshift;
2975: ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1];
2976: a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0);
2977: rmax = PetscMax(rmax, ailen[i - 1]);
2978: }
2979: if (fshift) {
2980: if (mbs) {
2981: ai[mbs] -= fshift;
2982: a->nz = ai[mbs];
2983: }
2984: PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz));
2985: A->nonzerostate++;
2986: A->info.nz_unneeded += (PetscReal)fshift;
2987: a->rmax = rmax;
2988: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
2989: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
2990: }
2991: PetscFunctionReturn(PETSC_SUCCESS);
2992: }
2994: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2995: MatGetRow_SeqBAIJ,
2996: MatRestoreRow_SeqBAIJ,
2997: MatMult_SeqBAIJ_N,
2998: /* 4*/ MatMultAdd_SeqBAIJ_N,
2999: MatMultTranspose_SeqBAIJ,
3000: MatMultTransposeAdd_SeqBAIJ,
3001: NULL,
3002: NULL,
3003: NULL,
3004: /* 10*/ NULL,
3005: MatLUFactor_SeqBAIJ,
3006: NULL,
3007: NULL,
3008: MatTranspose_SeqBAIJ,
3009: /* 15*/ MatGetInfo_SeqBAIJ,
3010: MatEqual_SeqBAIJ,
3011: MatGetDiagonal_SeqBAIJ,
3012: MatDiagonalScale_SeqBAIJ,
3013: MatNorm_SeqBAIJ,
3014: /* 20*/ NULL,
3015: MatAssemblyEnd_SeqBAIJ,
3016: MatSetOption_SeqBAIJ,
3017: MatZeroEntries_SeqBAIJ,
3018: /* 24*/ MatZeroRows_SeqBAIJ,
3019: NULL,
3020: NULL,
3021: NULL,
3022: NULL,
3023: /* 29*/ MatSetUp_Seq_Hash,
3024: NULL,
3025: NULL,
3026: NULL,
3027: NULL,
3028: /* 34*/ MatDuplicate_SeqBAIJ,
3029: NULL,
3030: NULL,
3031: MatILUFactor_SeqBAIJ,
3032: NULL,
3033: /* 39*/ MatAXPY_SeqBAIJ,
3034: MatCreateSubMatrices_SeqBAIJ,
3035: MatIncreaseOverlap_SeqBAIJ,
3036: MatGetValues_SeqBAIJ,
3037: MatCopy_SeqBAIJ,
3038: /* 44*/ NULL,
3039: MatScale_SeqBAIJ,
3040: MatShift_SeqBAIJ,
3041: NULL,
3042: MatZeroRowsColumns_SeqBAIJ,
3043: /* 49*/ NULL,
3044: MatGetRowIJ_SeqBAIJ,
3045: MatRestoreRowIJ_SeqBAIJ,
3046: MatGetColumnIJ_SeqBAIJ,
3047: MatRestoreColumnIJ_SeqBAIJ,
3048: /* 54*/ MatFDColoringCreate_SeqXAIJ,
3049: NULL,
3050: NULL,
3051: NULL,
3052: MatSetValuesBlocked_SeqBAIJ,
3053: /* 59*/ MatCreateSubMatrix_SeqBAIJ,
3054: MatDestroy_SeqBAIJ,
3055: MatView_SeqBAIJ,
3056: NULL,
3057: NULL,
3058: /* 64*/ NULL,
3059: NULL,
3060: NULL,
3061: NULL,
3062: MatGetRowMaxAbs_SeqBAIJ,
3063: /* 69*/ NULL,
3064: MatConvert_Basic,
3065: NULL,
3066: MatFDColoringApply_BAIJ,
3067: NULL,
3068: /* 74*/ NULL,
3069: NULL,
3070: NULL,
3071: NULL,
3072: MatLoad_SeqBAIJ,
3073: /* 79*/ NULL,
3074: NULL,
3075: NULL,
3076: NULL,
3077: NULL,
3078: /* 84*/ NULL,
3079: NULL,
3080: NULL,
3081: NULL,
3082: NULL,
3083: /* 89*/ NULL,
3084: NULL,
3085: NULL,
3086: NULL,
3087: MatConjugate_SeqBAIJ,
3088: /* 94*/ NULL,
3089: NULL,
3090: MatRealPart_SeqBAIJ,
3091: MatImaginaryPart_SeqBAIJ,
3092: NULL,
3093: /* 99*/ NULL,
3094: NULL,
3095: NULL,
3096: NULL,
3097: NULL,
3098: /*104*/ MatMissingDiagonal_SeqBAIJ,
3099: NULL,
3100: NULL,
3101: NULL,
3102: NULL,
3103: /*109*/ NULL,
3104: NULL,
3105: NULL,
3106: MatMultHermitianTranspose_SeqBAIJ,
3107: MatMultHermitianTransposeAdd_SeqBAIJ,
3108: /*114*/ NULL,
3109: NULL,
3110: MatGetColumnReductions_SeqBAIJ,
3111: MatInvertBlockDiagonal_SeqBAIJ,
3112: NULL,
3113: /*119*/ NULL,
3114: NULL,
3115: NULL,
3116: NULL,
3117: NULL,
3118: /*124*/ NULL,
3119: NULL,
3120: NULL,
3121: MatSetBlockSizes_Default,
3122: NULL,
3123: /*129*/ MatFDColoringSetUp_SeqXAIJ,
3124: NULL,
3125: MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3126: MatDestroySubMatrices_SeqBAIJ,
3127: NULL,
3128: /*134*/ NULL,
3129: NULL,
3130: NULL,
3131: MatEliminateZeros_SeqBAIJ,
3132: MatGetRowSumAbs_SeqBAIJ,
3133: /*139*/ NULL,
3134: NULL,
3135: NULL,
3136: MatCopyHashToXAIJ_Seq_Hash,
3137: NULL};
3139: static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3140: {
3141: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3142: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3144: PetscFunctionBegin;
3145: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3147: /* allocate space for values if not already there */
3148: if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values));
3150: /* copy values over */
3151: PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
3152: PetscFunctionReturn(PETSC_SUCCESS);
3153: }
3155: static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3156: {
3157: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
3158: PetscInt nz = aij->i[aij->mbs] * aij->bs2;
3160: PetscFunctionBegin;
3161: PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3162: PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
3164: /* copy values over */
3165: PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
3166: PetscFunctionReturn(PETSC_SUCCESS);
3167: }
3169: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3170: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3172: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3173: {
3174: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data;
3175: PetscInt i, mbs, nbs, bs2;
3176: PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3178: PetscFunctionBegin;
3179: if (B->hash_active) {
3180: PetscInt bs;
3181: B->ops[0] = b->cops;
3182: PetscCall(PetscHMapIJVDestroy(&b->ht));
3183: PetscCall(MatGetBlockSize(B, &bs));
3184: if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht));
3185: PetscCall(PetscFree(b->dnz));
3186: PetscCall(PetscFree(b->bdnz));
3187: B->hash_active = PETSC_FALSE;
3188: }
3189: if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3190: if (nz == MAT_SKIP_ALLOCATION) {
3191: skipallocation = PETSC_TRUE;
3192: nz = 0;
3193: }
3195: PetscCall(MatSetBlockSize(B, bs));
3196: PetscCall(PetscLayoutSetUp(B->rmap));
3197: PetscCall(PetscLayoutSetUp(B->cmap));
3198: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3200: B->preallocated = PETSC_TRUE;
3202: mbs = B->rmap->n / bs;
3203: nbs = B->cmap->n / bs;
3204: bs2 = bs * bs;
3206: PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3208: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3209: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3210: if (nnz) {
3211: for (i = 0; i < mbs; i++) {
3212: PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
3213: PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3214: }
3215: }
3217: PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
3218: PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3219: PetscOptionsEnd();
3221: if (!flg) {
3222: switch (bs) {
3223: case 1:
3224: B->ops->mult = MatMult_SeqBAIJ_1;
3225: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3226: break;
3227: case 2:
3228: B->ops->mult = MatMult_SeqBAIJ_2;
3229: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3230: break;
3231: case 3:
3232: B->ops->mult = MatMult_SeqBAIJ_3;
3233: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3234: break;
3235: case 4:
3236: B->ops->mult = MatMult_SeqBAIJ_4;
3237: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3238: break;
3239: case 5:
3240: B->ops->mult = MatMult_SeqBAIJ_5;
3241: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3242: break;
3243: case 6:
3244: B->ops->mult = MatMult_SeqBAIJ_6;
3245: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3246: break;
3247: case 7:
3248: B->ops->mult = MatMult_SeqBAIJ_7;
3249: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3250: break;
3251: case 9: {
3252: PetscInt version = 1;
3253: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3254: switch (version) {
3255: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3256: case 1:
3257: B->ops->mult = MatMult_SeqBAIJ_9_AVX2;
3258: B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
3259: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3260: break;
3261: #endif
3262: default:
3263: B->ops->mult = MatMult_SeqBAIJ_N;
3264: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3265: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3266: break;
3267: }
3268: break;
3269: }
3270: case 11:
3271: B->ops->mult = MatMult_SeqBAIJ_11;
3272: B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3273: break;
3274: case 12: {
3275: PetscInt version = 1;
3276: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3277: switch (version) {
3278: case 1:
3279: B->ops->mult = MatMult_SeqBAIJ_12_ver1;
3280: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3281: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3282: break;
3283: case 2:
3284: B->ops->mult = MatMult_SeqBAIJ_12_ver2;
3285: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
3286: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3287: break;
3288: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3289: case 3:
3290: B->ops->mult = MatMult_SeqBAIJ_12_AVX2;
3291: B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
3292: PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3293: break;
3294: #endif
3295: default:
3296: B->ops->mult = MatMult_SeqBAIJ_N;
3297: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3298: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3299: break;
3300: }
3301: break;
3302: }
3303: case 15: {
3304: PetscInt version = 1;
3305: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
3306: switch (version) {
3307: case 1:
3308: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
3309: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3310: break;
3311: case 2:
3312: B->ops->mult = MatMult_SeqBAIJ_15_ver2;
3313: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3314: break;
3315: case 3:
3316: B->ops->mult = MatMult_SeqBAIJ_15_ver3;
3317: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3318: break;
3319: case 4:
3320: B->ops->mult = MatMult_SeqBAIJ_15_ver4;
3321: PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
3322: break;
3323: default:
3324: B->ops->mult = MatMult_SeqBAIJ_N;
3325: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3326: break;
3327: }
3328: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3329: break;
3330: }
3331: default:
3332: B->ops->mult = MatMult_SeqBAIJ_N;
3333: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
3334: PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3335: break;
3336: }
3337: }
3338: B->ops->sor = MatSOR_SeqBAIJ;
3339: b->mbs = mbs;
3340: b->nbs = nbs;
3341: if (!skipallocation) {
3342: if (!b->imax) {
3343: PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
3345: b->free_imax_ilen = PETSC_TRUE;
3346: }
3347: /* b->ilen will count nonzeros in each block row so far. */
3348: for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3349: if (!nnz) {
3350: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3351: else if (nz < 0) nz = 1;
3352: nz = PetscMin(nz, nbs);
3353: for (i = 0; i < mbs; i++) b->imax[i] = nz;
3354: PetscCall(PetscIntMultError(nz, mbs, &nz));
3355: } else {
3356: PetscInt64 nz64 = 0;
3357: for (i = 0; i < mbs; i++) {
3358: b->imax[i] = nnz[i];
3359: nz64 += nnz[i];
3360: }
3361: PetscCall(PetscIntCast(nz64, &nz));
3362: }
3364: /* allocate the matrix space */
3365: PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3366: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j));
3367: PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i));
3368: if (B->structure_only) {
3369: b->free_a = PETSC_FALSE;
3370: } else {
3371: PetscInt nzbs2 = 0;
3372: PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
3373: PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a));
3374: b->free_a = PETSC_TRUE;
3375: PetscCall(PetscArrayzero(b->a, nz * bs2));
3376: }
3377: b->free_ij = PETSC_TRUE;
3378: PetscCall(PetscArrayzero(b->j, nz));
3380: b->i[0] = 0;
3381: for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3382: } else {
3383: b->free_a = PETSC_FALSE;
3384: b->free_ij = PETSC_FALSE;
3385: }
3387: b->bs2 = bs2;
3388: b->mbs = mbs;
3389: b->nz = 0;
3390: b->maxnz = nz;
3391: B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3392: B->was_assembled = PETSC_FALSE;
3393: B->assembled = PETSC_FALSE;
3394: if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3395: PetscFunctionReturn(PETSC_SUCCESS);
3396: }
3398: static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3399: {
3400: PetscInt i, m, nz, nz_max = 0, *nnz;
3401: PetscScalar *values = NULL;
3402: PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3404: PetscFunctionBegin;
3405: PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
3406: PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
3407: PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
3408: PetscCall(PetscLayoutSetUp(B->rmap));
3409: PetscCall(PetscLayoutSetUp(B->cmap));
3410: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3411: m = B->rmap->n / bs;
3413: PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
3414: PetscCall(PetscMalloc1(m + 1, &nnz));
3415: for (i = 0; i < m; i++) {
3416: nz = ii[i + 1] - ii[i];
3417: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3418: nz_max = PetscMax(nz_max, nz);
3419: nnz[i] = nz;
3420: }
3421: PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
3422: PetscCall(PetscFree(nnz));
3424: values = (PetscScalar *)V;
3425: if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3426: for (i = 0; i < m; i++) {
3427: PetscInt ncols = ii[i + 1] - ii[i];
3428: const PetscInt *icols = jj + ii[i];
3429: if (bs == 1 || !roworiented) {
3430: const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
3431: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
3432: } else {
3433: PetscInt j;
3434: for (j = 0; j < ncols; j++) {
3435: const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
3436: PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
3437: }
3438: }
3439: }
3440: if (!V) PetscCall(PetscFree(values));
3441: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3442: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3443: PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3444: PetscFunctionReturn(PETSC_SUCCESS);
3445: }
3447: /*@C
3448: MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3450: Not Collective
3452: Input Parameter:
3453: . A - a `MATSEQBAIJ` matrix
3455: Output Parameter:
3456: . array - pointer to the data
3458: Level: intermediate
3460: .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3461: @*/
3462: PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[])
3463: {
3464: PetscFunctionBegin;
3465: PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3466: PetscFunctionReturn(PETSC_SUCCESS);
3467: }
3469: /*@C
3470: MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3472: Not Collective
3474: Input Parameters:
3475: + A - a `MATSEQBAIJ` matrix
3476: - array - pointer to the data
3478: Level: intermediate
3480: .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3481: @*/
3482: PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[])
3483: {
3484: PetscFunctionBegin;
3485: PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3486: PetscFunctionReturn(PETSC_SUCCESS);
3487: }
3489: /*MC
3490: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3491: block sparse compressed row format.
3493: Options Database Keys:
3494: + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()`
3495: - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3497: Level: beginner
3499: Notes:
3500: `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
3501: space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
3503: Run with `-info` to see what version of the matrix-vector product is being used
3505: .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()`
3506: M*/
3508: PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3510: PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3511: {
3512: PetscMPIInt size;
3513: Mat_SeqBAIJ *b;
3515: PetscFunctionBegin;
3516: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
3517: PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3519: PetscCall(PetscNew(&b));
3520: B->data = (void *)b;
3521: B->ops[0] = MatOps_Values;
3523: b->row = NULL;
3524: b->col = NULL;
3525: b->icol = NULL;
3526: b->reallocs = 0;
3527: b->saved_values = NULL;
3529: b->roworiented = PETSC_TRUE;
3530: b->nonew = 0;
3531: b->diag = NULL;
3532: B->spptr = NULL;
3533: B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2;
3534: b->keepnonzeropattern = PETSC_FALSE;
3536: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
3537: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
3538: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
3539: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
3540: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
3541: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
3542: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
3543: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
3544: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
3545: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
3546: #if defined(PETSC_HAVE_HYPRE)
3547: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
3548: #endif
3549: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
3550: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
3551: PetscFunctionReturn(PETSC_SUCCESS);
3552: }
3554: PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3555: {
3556: Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3557: PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3559: PetscFunctionBegin;
3560: PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix");
3561: PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
3563: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3564: c->imax = a->imax;
3565: c->ilen = a->ilen;
3566: c->free_imax_ilen = PETSC_FALSE;
3567: } else {
3568: PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3569: for (i = 0; i < mbs; i++) {
3570: c->imax[i] = a->imax[i];
3571: c->ilen[i] = a->ilen[i];
3572: }
3573: c->free_imax_ilen = PETSC_TRUE;
3574: }
3576: /* allocate the matrix space */
3577: if (mallocmatspace) {
3578: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3579: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3580: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3581: c->free_a = PETSC_TRUE;
3582: c->i = a->i;
3583: c->j = a->j;
3584: c->free_ij = PETSC_FALSE;
3585: c->parent = A;
3586: C->preallocated = PETSC_TRUE;
3587: C->assembled = PETSC_TRUE;
3589: PetscCall(PetscObjectReference((PetscObject)A));
3590: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3591: PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3592: } else {
3593: PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a));
3594: PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j));
3595: PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i));
3596: c->free_a = PETSC_TRUE;
3597: c->free_ij = PETSC_TRUE;
3599: PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3600: if (mbs > 0) {
3601: PetscCall(PetscArraycpy(c->j, a->j, nz));
3602: if (cpvalues == MAT_COPY_VALUES) {
3603: PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
3604: } else {
3605: PetscCall(PetscArrayzero(c->a, bs2 * nz));
3606: }
3607: }
3608: C->preallocated = PETSC_TRUE;
3609: C->assembled = PETSC_TRUE;
3610: }
3611: }
3613: c->roworiented = a->roworiented;
3614: c->nonew = a->nonew;
3616: PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
3617: PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
3619: c->bs2 = a->bs2;
3620: c->mbs = a->mbs;
3621: c->nbs = a->nbs;
3623: if (a->diag) {
3624: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3625: c->diag = a->diag;
3626: c->free_diag = PETSC_FALSE;
3627: } else {
3628: PetscCall(PetscMalloc1(mbs + 1, &c->diag));
3629: for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
3630: c->free_diag = PETSC_TRUE;
3631: }
3632: } else c->diag = NULL;
3634: c->nz = a->nz;
3635: c->maxnz = a->nz; /* Since we allocate exactly the right amount */
3636: c->solve_work = NULL;
3637: c->mult_work = NULL;
3638: c->sor_workt = NULL;
3639: c->sor_work = NULL;
3641: c->compressedrow.use = a->compressedrow.use;
3642: c->compressedrow.nrows = a->compressedrow.nrows;
3643: if (a->compressedrow.use) {
3644: i = a->compressedrow.nrows;
3645: PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
3646: PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
3647: PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
3648: } else {
3649: c->compressedrow.use = PETSC_FALSE;
3650: c->compressedrow.i = NULL;
3651: c->compressedrow.rindex = NULL;
3652: }
3653: c->nonzerorowcnt = a->nonzerorowcnt;
3654: C->nonzerostate = A->nonzerostate;
3656: PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
3657: PetscFunctionReturn(PETSC_SUCCESS);
3658: }
3660: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3661: {
3662: PetscFunctionBegin;
3663: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
3664: PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
3665: PetscCall(MatSetType(*B, MATSEQBAIJ));
3666: PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3667: PetscFunctionReturn(PETSC_SUCCESS);
3668: }
3670: /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3671: PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3672: {
3673: PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3674: PetscInt *rowidxs, *colidxs;
3675: PetscScalar *matvals;
3677: PetscFunctionBegin;
3678: PetscCall(PetscViewerSetUp(viewer));
3680: /* read matrix header */
3681: PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3682: PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3683: M = header[1];
3684: N = header[2];
3685: nz = header[3];
3686: PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3687: PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3688: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3690: /* set block sizes from the viewer's .info file */
3691: PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3692: /* set local and global sizes if not set already */
3693: if (mat->rmap->n < 0) mat->rmap->n = M;
3694: if (mat->cmap->n < 0) mat->cmap->n = N;
3695: if (mat->rmap->N < 0) mat->rmap->N = M;
3696: if (mat->cmap->N < 0) mat->cmap->N = N;
3697: PetscCall(PetscLayoutSetUp(mat->rmap));
3698: PetscCall(PetscLayoutSetUp(mat->cmap));
3700: /* check if the matrix sizes are correct */
3701: PetscCall(MatGetSize(mat, &rows, &cols));
3702: PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3703: PetscCall(MatGetBlockSize(mat, &bs));
3704: PetscCall(MatGetLocalSize(mat, &m, &n));
3705: mbs = m / bs;
3706: nbs = n / bs;
3708: /* read in row lengths, column indices and nonzero values */
3709: PetscCall(PetscMalloc1(m + 1, &rowidxs));
3710: PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
3711: rowidxs[0] = 0;
3712: for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3713: sum = rowidxs[m];
3714: PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3716: /* read in column indices and nonzero values */
3717: PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
3718: PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
3719: PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3721: { /* preallocate matrix storage */
3722: PetscBT bt; /* helper bit set to count nonzeros */
3723: PetscInt *nnz;
3724: PetscBool sbaij;
3726: PetscCall(PetscBTCreate(nbs, &bt));
3727: PetscCall(PetscCalloc1(mbs, &nnz));
3728: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3729: for (i = 0; i < mbs; i++) {
3730: PetscCall(PetscBTMemzero(nbs, bt));
3731: for (k = 0; k < bs; k++) {
3732: PetscInt row = bs * i + k;
3733: for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3734: PetscInt col = colidxs[j];
3735: if (!sbaij || col >= row)
3736: if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3737: }
3738: }
3739: }
3740: PetscCall(PetscBTDestroy(&bt));
3741: PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
3742: PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
3743: PetscCall(PetscFree(nnz));
3744: }
3746: /* store matrix values */
3747: for (i = 0; i < m; i++) {
3748: PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
3749: PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES);
3750: }
3752: PetscCall(PetscFree(rowidxs));
3753: PetscCall(PetscFree2(colidxs, matvals));
3754: PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3755: PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3756: PetscFunctionReturn(PETSC_SUCCESS);
3757: }
3759: PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3760: {
3761: PetscBool isbinary;
3763: PetscFunctionBegin;
3764: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3765: PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
3766: PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3767: PetscFunctionReturn(PETSC_SUCCESS);
3768: }
3770: /*@
3771: MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3772: compressed row) format. For good matrix assembly performance the
3773: user should preallocate the matrix storage by setting the parameter `nz`
3774: (or the array `nnz`).
3776: Collective
3778: Input Parameters:
3779: + comm - MPI communicator, set to `PETSC_COMM_SELF`
3780: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3781: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3782: . m - number of rows
3783: . n - number of columns
3784: . nz - number of nonzero blocks per block row (same for all rows)
3785: - nnz - array containing the number of nonzero blocks in the various block rows
3786: (possibly different for each block row) or `NULL`
3788: Output Parameter:
3789: . A - the matrix
3791: Options Database Keys:
3792: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3793: - -mat_block_size - size of the blocks to use
3795: Level: intermediate
3797: Notes:
3798: It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3799: MatXXXXSetPreallocation() paradigm instead of this routine directly.
3800: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3802: The number of rows and columns must be divisible by blocksize.
3804: If the `nnz` parameter is given then the `nz` parameter is ignored
3806: A nonzero block is any block that as 1 or more nonzeros in it
3808: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3809: storage. That is, the stored row and column indices can begin at
3810: either one (as in Fortran) or zero.
3812: Specify the preallocated storage with either `nz` or `nnz` (not both).
3813: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3814: allocation. See [Sparse Matrices](sec_matsparse) for details.
3815: matrices.
3817: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3818: @*/
3819: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3820: {
3821: PetscFunctionBegin;
3822: PetscCall(MatCreate(comm, A));
3823: PetscCall(MatSetSizes(*A, m, n, m, n));
3824: PetscCall(MatSetType(*A, MATSEQBAIJ));
3825: PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3826: PetscFunctionReturn(PETSC_SUCCESS);
3827: }
3829: /*@
3830: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3831: per row in the matrix. For good matrix assembly performance the
3832: user should preallocate the matrix storage by setting the parameter `nz`
3833: (or the array `nnz`).
3835: Collective
3837: Input Parameters:
3838: + B - the matrix
3839: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
3840: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3841: . nz - number of block nonzeros per block row (same for all rows)
3842: - nnz - array containing the number of block nonzeros in the various block rows
3843: (possibly different for each block row) or `NULL`
3845: Options Database Keys:
3846: + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3847: - -mat_block_size - size of the blocks to use
3849: Level: intermediate
3851: Notes:
3852: If the `nnz` parameter is given then the `nz` parameter is ignored
3854: You can call `MatGetInfo()` to get information on how effective the preallocation was;
3855: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3856: You can also run with the option `-info` and look for messages with the string
3857: malloc in them to see if additional memory allocation was needed.
3859: The `MATSEQBAIJ` format is fully compatible with standard Fortran
3860: storage. That is, the stored row and column indices can begin at
3861: either one (as in Fortran) or zero.
3863: Specify the preallocated storage with either `nz` or `nnz` (not both).
3864: Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory
3865: allocation. See [Sparse Matrices](sec_matsparse) for details.
3867: .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3868: @*/
3869: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3870: {
3871: PetscFunctionBegin;
3875: PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3876: PetscFunctionReturn(PETSC_SUCCESS);
3877: }
3879: /*@C
3880: MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3882: Collective
3884: Input Parameters:
3885: + B - the matrix
3886: . bs - the blocksize
3887: . i - the indices into `j` for the start of each local row (indices start with zero)
3888: . j - the column indices for each local row (indices start with zero) these must be sorted for each row
3889: - v - optional values in the matrix, use `NULL` if not provided
3891: Level: advanced
3893: Notes:
3894: The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()`
3896: The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs
3897: may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
3898: over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
3899: `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3900: block column and the second index is over columns within a block.
3902: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3904: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3905: @*/
3906: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3907: {
3908: PetscFunctionBegin;
3912: PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3913: PetscFunctionReturn(PETSC_SUCCESS);
3914: }
3916: /*@
3917: MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3919: Collective
3921: Input Parameters:
3922: + comm - must be an MPI communicator of size 1
3923: . bs - size of block
3924: . m - number of rows
3925: . n - number of columns
3926: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3927: . j - column indices
3928: - a - matrix values
3930: Output Parameter:
3931: . mat - the matrix
3933: Level: advanced
3935: Notes:
3936: The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays
3937: once the matrix is destroyed
3939: You cannot set new nonzero locations into this matrix, that will generate an error.
3941: The `i` and `j` indices are 0 based
3943: When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3945: The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3946: the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3947: block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3948: with column-major ordering within blocks.
3950: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3951: @*/
3952: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3953: {
3954: Mat_SeqBAIJ *baij;
3956: PetscFunctionBegin;
3957: PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
3958: if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3960: PetscCall(MatCreate(comm, mat));
3961: PetscCall(MatSetSizes(*mat, m, n, m, n));
3962: PetscCall(MatSetType(*mat, MATSEQBAIJ));
3963: PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3964: baij = (Mat_SeqBAIJ *)(*mat)->data;
3965: PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3967: baij->i = i;
3968: baij->j = j;
3969: baij->a = a;
3971: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3972: baij->free_a = PETSC_FALSE;
3973: baij->free_ij = PETSC_FALSE;
3974: baij->free_imax_ilen = PETSC_TRUE;
3976: for (PetscInt ii = 0; ii < m; ii++) {
3977: const PetscInt row_len = i[ii + 1] - i[ii];
3979: baij->ilen[ii] = baij->imax[ii] = row_len;
3980: PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len);
3981: }
3982: if (PetscDefined(USE_DEBUG)) {
3983: for (PetscInt ii = 0; ii < baij->i[m]; ii++) {
3984: PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3985: PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3986: }
3987: }
3989: PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3990: PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3991: PetscFunctionReturn(PETSC_SUCCESS);
3992: }
3994: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3995: {
3996: PetscFunctionBegin;
3997: PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3998: PetscFunctionReturn(PETSC_SUCCESS);
3999: }